Skip to main content

enrichment/
lib.rs

1#![deny(warnings)]
2
3pub mod find_enrichment_table_records;
4pub mod get_enrichment_table_record;
5pub mod tables;
6
7#[cfg(test)]
8mod test_util;
9mod vrl_util;
10
11use dyn_clone::DynClone;
12use indoc::indoc;
13use snafu::Snafu;
14pub use tables::{TableRegistry, TableSearch};
15use vrl::{
16    compiler::Function,
17    value::{ObjectMap, Value},
18};
19
20#[derive(Copy, Clone, Debug, PartialEq, Eq)]
21pub struct IndexHandle(pub usize);
22
23#[derive(Clone, Debug, PartialEq, Eq)]
24pub enum Condition<'a> {
25    /// Condition exactly matches the field value.
26    Equals { field: &'a str, value: Value },
27    /// The date in the field is between from and to (inclusive).
28    BetweenDates {
29        field: &'a str,
30        from: chrono::DateTime<chrono::Utc>,
31        to: chrono::DateTime<chrono::Utc>,
32    },
33    /// The date in the field is greater than or equal to `from`.
34    FromDate {
35        field: &'a str,
36        from: chrono::DateTime<chrono::Utc>,
37    },
38    /// The date in the field is less than or equal to `to`.
39    ToDate {
40        field: &'a str,
41        to: chrono::DateTime<chrono::Utc>,
42    },
43}
44
45#[derive(Clone, Copy, Debug, PartialEq, Eq)]
46pub enum Case {
47    Sensitive,
48    Insensitive,
49}
50
51#[derive(Clone, Debug, PartialEq, Eq, Snafu)]
52#[snafu(visibility(pub(crate)))]
53pub enum Error {
54    #[snafu(display("No rows found"))]
55    NoRowsFound,
56    #[snafu(display("More than one row found"))]
57    MoreThanOneRowFound,
58    #[snafu(display("Field(s) '{}' missing from dataset", fields.join(", ")))]
59    MissingDatasetFields { fields: Vec<String> },
60    #[snafu(display("Column contains invalid UTF-8: {source}"))]
61    InvalidUtfInColumn { source: std::str::Utf8Error },
62    #[snafu(display("Failed to encode value: {details}"))]
63    FailedToEncodeValue { details: String },
64    #[snafu(display("Only one condition is allowed"))]
65    OnlyOneConditionAllowed,
66    #[snafu(display("Only equality condition is allowed"))]
67    OnlyEqualityConditionAllowed,
68    #[snafu(display("{kind} condition must be specified"))]
69    MissingCondition { kind: &'static str },
70    #[snafu(display("{field} field is required"))]
71    MissingRequiredField { field: &'static str },
72    #[snafu(display("Only one field is allowed"))]
73    OnlyOneFieldAllowed,
74    #[snafu(display("Invalid address: {source}"))]
75    InvalidAddress { source: std::net::AddrParseError },
76    #[snafu(transparent)]
77    Internal { source: InternalError },
78    #[snafu(display("Table {table} not loaded"))]
79    TableNotLoaded { table: String },
80}
81
82#[derive(Clone, Debug, PartialEq, Eq, Snafu)]
83pub enum InternalError {
84    #[snafu(display("finish_load called prematurely"))]
85    FinishLoadCalled,
86    #[snafu(display("finish_load not called"))]
87    FinishLoadNotCalled,
88    // Unreachable in normal operation: we only decode values that were serialized by us.
89    #[snafu(display("Failed to decode value from memory table: {details}"))]
90    FailedToDecode { details: String },
91}
92
93impl From<Error> for vrl::prelude::ExpressionError {
94    fn from(error: Error) -> Self {
95        vrl::prelude::ExpressionError::Error {
96            message: error.to_string(),
97            labels: vec![],
98            notes: vec![],
99        }
100    }
101}
102
103/// Enrichment tables represent additional data sources that can be used to enrich the event data
104/// passing through Vector.
105pub trait Table: DynClone {
106    /// Search the enrichment table data with the given condition.
107    /// All conditions must match (AND).
108    ///
109    /// # Errors
110    /// Errors if no rows, or more than 1 row is found.
111    fn find_table_row<'a>(
112        &self,
113        case: Case,
114        condition: &'a [Condition<'a>],
115        select: Option<&[String]>,
116        wildcard: Option<&Value>,
117        index: Option<IndexHandle>,
118    ) -> Result<ObjectMap, Error>;
119
120    /// Search the enrichment table data with the given condition.
121    /// All conditions must match (AND).
122    /// Can return multiple matched records
123    fn find_table_rows<'a>(
124        &self,
125        case: Case,
126        condition: &'a [Condition<'a>],
127        select: Option<&[String]>,
128        wildcard: Option<&Value>,
129        index: Option<IndexHandle>,
130    ) -> Result<Vec<ObjectMap>, Error>;
131
132    /// Hints to the enrichment table what data is going to be searched to allow it to index the
133    /// data in advance.
134    ///
135    /// # Errors
136    /// Errors if the fields are not in the table.
137    fn add_index(&mut self, case: Case, fields: &[&str]) -> Result<IndexHandle, Error>;
138
139    /// Returns a list of the field names that are in each index
140    fn index_fields(&self) -> Vec<(Case, Vec<String>)>;
141
142    /// Returns true if the underlying data has changed and the table needs reloading.
143    fn needs_reload(&self) -> bool;
144
145    /// Extracts state from this table
146    fn extract_state(&self) -> Option<Box<dyn std::any::Any + Send + Sync>> {
147        None
148    }
149}
150
151dyn_clone::clone_trait_object!(Table);
152
153pub fn vrl_functions() -> Vec<Box<dyn Function>> {
154    vec![
155        Box::new(get_enrichment_table_record::GetEnrichmentTableRecord) as _,
156        Box::new(find_enrichment_table_records::FindEnrichmentTableRecords) as _,
157    ]
158}
159
160pub(crate) const ENRICHMENT_TABLE_EXPLAINER: &str = indoc! {r#"
161    For `file` enrichment tables, this condition needs to be a VRL object in which
162    the key-value pairs indicate a field to search mapped to a value to search in that field.
163    This function returns the rows that match the provided condition(s). _All_ fields need to
164    match for rows to be returned; if any fields do not match, then no rows are returned.
165
166    There are three forms of search criteria:
167
168    1. **Exact match search**. The given field must match the value exactly. Case sensitivity
169       can be specified using the `case_sensitive` argument. An exact match search can use an
170       index directly into the dataset, which should make this search fairly "cheap" from a
171       performance perspective.
172
173    2. **Wildcard match search**. The given fields specified by the exact match search may also
174        be matched exactly to the value provided to the `wildcard` parameter.
175        A wildcard match search can also use an index directly into the dataset.
176
177    3. **Date range search**. The given field must be greater than or equal to the `from` date
178       and/or less than or equal to the `to` date. A date range search involves
179       sequentially scanning through the rows that have been located using any exact match
180       criteria. This can be an expensive operation if there are many rows returned by any exact
181       match criteria. Therefore, use date ranges as the _only_ criteria when the enrichment
182       data set is very small.
183
184    For `geoip` and `mmdb` enrichment tables, this condition needs to be a VRL object with a single key-value pair
185    whose value needs to be a valid IP address. Example: `{"ip": .ip }`. If a return field is expected
186    and without a value, `null` is used. This table can return the following fields:
187
188    * ISP databases:
189        * `autonomous_system_number`
190        * `autonomous_system_organization`
191        * `isp`
192        * `organization`
193        * `network`
194
195    * City databases:
196        * `city_name`
197        * `continent_code`
198        * `country_code`
199        * `country_name`
200        * `region_code`
201        * `region_name`
202        * `metro_code`
203        * `latitude`
204        * `longitude`
205        * `postal_code`
206        * `timezone`
207        * `network`
208
209    * Connection-Type databases:
210        * `connection_type`
211        * `network`
212
213    * Anonymous-IP databases:
214        * `is_anonymous`
215        * `is_anonymous_vpn`
216        * `is_hosting_provider`
217        * `is_public_proxy`
218        * `is_residential_proxy`
219        * `is_tor_exit_node`
220        * `network`
221
222    To use this function, you need to update your configuration to
223    include an
224    [`enrichment_tables`](/docs/reference/configuration/global-options/#enrichment_tables)
225    parameter.
226"#};