1#![deny(warnings)]
2
3pub mod find_enrichment_table_records;
4pub mod get_enrichment_table_record;
5pub mod tables;
6
7#[cfg(test)]
8mod test_util;
9mod vrl_util;
10
11use dyn_clone::DynClone;
12use indoc::indoc;
13use snafu::Snafu;
14pub use tables::{TableRegistry, TableSearch};
15use vrl::{
16 compiler::Function,
17 value::{ObjectMap, Value},
18};
19
20#[derive(Copy, Clone, Debug, PartialEq, Eq)]
21pub struct IndexHandle(pub usize);
22
23#[derive(Clone, Debug, PartialEq, Eq)]
24pub enum Condition<'a> {
25 Equals { field: &'a str, value: Value },
27 BetweenDates {
29 field: &'a str,
30 from: chrono::DateTime<chrono::Utc>,
31 to: chrono::DateTime<chrono::Utc>,
32 },
33 FromDate {
35 field: &'a str,
36 from: chrono::DateTime<chrono::Utc>,
37 },
38 ToDate {
40 field: &'a str,
41 to: chrono::DateTime<chrono::Utc>,
42 },
43}
44
45#[derive(Clone, Copy, Debug, PartialEq, Eq)]
46pub enum Case {
47 Sensitive,
48 Insensitive,
49}
50
51#[derive(Clone, Debug, PartialEq, Eq, Snafu)]
52#[snafu(visibility(pub(crate)))]
53pub enum Error {
54 #[snafu(display("No rows found"))]
55 NoRowsFound,
56 #[snafu(display("More than one row found"))]
57 MoreThanOneRowFound,
58 #[snafu(display("Field(s) '{}' missing from dataset", fields.join(", ")))]
59 MissingDatasetFields { fields: Vec<String> },
60 #[snafu(display("Column contains invalid UTF-8: {source}"))]
61 InvalidUtfInColumn { source: std::str::Utf8Error },
62 #[snafu(display("Failed to encode value: {details}"))]
63 FailedToEncodeValue { details: String },
64 #[snafu(display("Only one condition is allowed"))]
65 OnlyOneConditionAllowed,
66 #[snafu(display("Only equality condition is allowed"))]
67 OnlyEqualityConditionAllowed,
68 #[snafu(display("{kind} condition must be specified"))]
69 MissingCondition { kind: &'static str },
70 #[snafu(display("{field} field is required"))]
71 MissingRequiredField { field: &'static str },
72 #[snafu(display("Only one field is allowed"))]
73 OnlyOneFieldAllowed,
74 #[snafu(display("Invalid address: {source}"))]
75 InvalidAddress { source: std::net::AddrParseError },
76 #[snafu(transparent)]
77 Internal { source: InternalError },
78 #[snafu(display("Table {table} not loaded"))]
79 TableNotLoaded { table: String },
80}
81
82#[derive(Clone, Debug, PartialEq, Eq, Snafu)]
83pub enum InternalError {
84 #[snafu(display("finish_load called prematurely"))]
85 FinishLoadCalled,
86 #[snafu(display("finish_load not called"))]
87 FinishLoadNotCalled,
88 #[snafu(display("Failed to decode value from memory table: {details}"))]
90 FailedToDecode { details: String },
91}
92
93impl From<Error> for vrl::prelude::ExpressionError {
94 fn from(error: Error) -> Self {
95 vrl::prelude::ExpressionError::Error {
96 message: error.to_string(),
97 labels: vec![],
98 notes: vec![],
99 }
100 }
101}
102
103pub trait Table: DynClone {
106 fn find_table_row<'a>(
112 &self,
113 case: Case,
114 condition: &'a [Condition<'a>],
115 select: Option<&[String]>,
116 wildcard: Option<&Value>,
117 index: Option<IndexHandle>,
118 ) -> Result<ObjectMap, Error>;
119
120 fn find_table_rows<'a>(
124 &self,
125 case: Case,
126 condition: &'a [Condition<'a>],
127 select: Option<&[String]>,
128 wildcard: Option<&Value>,
129 index: Option<IndexHandle>,
130 ) -> Result<Vec<ObjectMap>, Error>;
131
132 fn add_index(&mut self, case: Case, fields: &[&str]) -> Result<IndexHandle, Error>;
138
139 fn index_fields(&self) -> Vec<(Case, Vec<String>)>;
141
142 fn needs_reload(&self) -> bool;
144
145 fn extract_state(&self) -> Option<Box<dyn std::any::Any + Send + Sync>> {
147 None
148 }
149}
150
151dyn_clone::clone_trait_object!(Table);
152
153pub fn vrl_functions() -> Vec<Box<dyn Function>> {
154 vec![
155 Box::new(get_enrichment_table_record::GetEnrichmentTableRecord) as _,
156 Box::new(find_enrichment_table_records::FindEnrichmentTableRecords) as _,
157 ]
158}
159
160pub(crate) const ENRICHMENT_TABLE_EXPLAINER: &str = indoc! {r#"
161 For `file` enrichment tables, this condition needs to be a VRL object in which
162 the key-value pairs indicate a field to search mapped to a value to search in that field.
163 This function returns the rows that match the provided condition(s). _All_ fields need to
164 match for rows to be returned; if any fields do not match, then no rows are returned.
165
166 There are three forms of search criteria:
167
168 1. **Exact match search**. The given field must match the value exactly. Case sensitivity
169 can be specified using the `case_sensitive` argument. An exact match search can use an
170 index directly into the dataset, which should make this search fairly "cheap" from a
171 performance perspective.
172
173 2. **Wildcard match search**. The given fields specified by the exact match search may also
174 be matched exactly to the value provided to the `wildcard` parameter.
175 A wildcard match search can also use an index directly into the dataset.
176
177 3. **Date range search**. The given field must be greater than or equal to the `from` date
178 and/or less than or equal to the `to` date. A date range search involves
179 sequentially scanning through the rows that have been located using any exact match
180 criteria. This can be an expensive operation if there are many rows returned by any exact
181 match criteria. Therefore, use date ranges as the _only_ criteria when the enrichment
182 data set is very small.
183
184 For `geoip` and `mmdb` enrichment tables, this condition needs to be a VRL object with a single key-value pair
185 whose value needs to be a valid IP address. Example: `{"ip": .ip }`. If a return field is expected
186 and without a value, `null` is used. This table can return the following fields:
187
188 * ISP databases:
189 * `autonomous_system_number`
190 * `autonomous_system_organization`
191 * `isp`
192 * `organization`
193 * `network`
194
195 * City databases:
196 * `city_name`
197 * `continent_code`
198 * `country_code`
199 * `country_name`
200 * `region_code`
201 * `region_name`
202 * `metro_code`
203 * `latitude`
204 * `longitude`
205 * `postal_code`
206 * `timezone`
207 * `network`
208
209 * Connection-Type databases:
210 * `connection_type`
211 * `network`
212
213 * Anonymous-IP databases:
214 * `is_anonymous`
215 * `is_anonymous_vpn`
216 * `is_hosting_provider`
217 * `is_public_proxy`
218 * `is_residential_proxy`
219 * `is_tor_exit_node`
220 * `network`
221
222 To use this function, you need to update your configuration to
223 include an
224 [`enrichment_tables`](/docs/reference/configuration/global-options/#enrichment_tables)
225 parameter.
226"#};