Skip to main content

vector/enrichment_tables/
geoip.rs

1//! Handles enrichment tables for `type = geoip`.
2//! Enrichment data is loaded from one of the MaxMind GeoIP databases,
3//! [MaxMind GeoIP2][maxmind] or [GeoLite2 binary city database][geolite].
4//!
5//! [maxmind]: https://dev.maxmind.com/geoip/geoip2/downloadable
6//! [geolite]: https://dev.maxmind.com/geoip/geoip2/geolite2/#Download_Access
7use std::{fs, net::IpAddr, path::PathBuf, sync::Arc, time::SystemTime};
8
9use maxminddb::{
10    Reader,
11    geoip2::{AnonymousIp, City, ConnectionType, Isp, Names},
12};
13use ordered_float::NotNan;
14use serde::Deserialize;
15use vector_lib::{
16    configurable::configurable_component,
17    enrichment::{Case, Condition, Error, IndexHandle, Table},
18};
19use vrl::value::{ObjectMap, Value};
20
21use crate::config::{EnrichmentTableConfig, GenerateConfig};
22
23// MaxMind GeoIP database files have a type field we can use to recognize specific
24// products. If it is an unknown type, an error will be returned.
25#[derive(Copy, Clone, Debug)]
26#[allow(missing_docs)]
27pub enum DatabaseKind {
28    Asn,
29    Isp,
30    ConnectionType,
31    City,
32    AnonymousIp,
33}
34
35impl TryFrom<&str> for DatabaseKind {
36    type Error = ();
37
38    fn try_from(value: &str) -> Result<Self, Self::Error> {
39        match value {
40            "GeoLite2-ASN" => Ok(Self::Asn),
41            "GeoIP2-ISP" => Ok(Self::Isp),
42            "GeoIP2-Connection-Type" => Ok(Self::ConnectionType),
43            "GeoIP2-City" | "GeoLite2-City" => Ok(Self::City),
44            "GeoIP2-Anonymous-IP" => Ok(Self::AnonymousIp),
45            _ => Err(()),
46        }
47    }
48}
49
50/// Configuration for the `geoip` enrichment table.
51#[derive(Clone, Debug, Eq, PartialEq)]
52#[configurable_component(enrichment_table("geoip"))]
53pub struct GeoipConfig {
54    /// Path to the [MaxMind GeoIP2][geoip2] or [GeoLite2 binary city database file][geolite2]
55    /// (**GeoLite2-City.mmdb**).
56    ///
57    /// Other databases, such as the country database, are not supported.
58    /// `mmdb` enrichment table can be used for other databases.
59    ///
60    /// [geoip2]: https://dev.maxmind.com/geoip/geoip2/downloadable
61    /// [geolite2]: https://dev.maxmind.com/geoip/geoip2/geolite2/#Download_Access
62    pub path: PathBuf,
63
64    /// The locale to use when querying the database.
65    ///
66    /// MaxMind includes localized versions of some of the fields within their database, such as
67    /// country name. This setting can control which of those localized versions are returned by the
68    /// transform.
69    ///
70    /// More information on which portions of the geolocation data are localized, and what languages
71    /// are available, can be found [here][locale_docs].
72    ///
73    /// [locale_docs]: https://support.maxmind.com/hc/en-us/articles/4414877149467-IP-Geolocation-Data#h_01FRRGRYTGZB29ERDBZCX3MR8Q
74    #[serde(default = "default_locale")]
75    pub locale: String,
76}
77
78fn default_locale() -> String {
79    // Valid locales at the time of writing are: "de”, "en", “es”, “fr”, “ja”, “pt-BR”, “ru”, and
80    // “zh-CN”.
81    //
82    // More information, including the up-to-date list of locales, can be found at
83    // https://dev.maxmind.com/geoip/docs/databases/city-and-country?lang=en.
84
85    // TODO: could we detect the system locale and use that as the default locale if it matches one
86    // of the available locales in the dataset, and then fallback to "en" otherwise?
87    "en".to_string()
88}
89
90impl GenerateConfig for GeoipConfig {
91    fn generate_config() -> toml::Value {
92        toml::Value::try_from(Self {
93            path: "/path/to/GeoLite2-City.mmdb".into(),
94            locale: default_locale(),
95        })
96        .unwrap()
97    }
98}
99
100impl EnrichmentTableConfig for GeoipConfig {
101    async fn build(
102        &self,
103        _: &crate::config::GlobalOptions,
104        _: Option<Box<dyn std::any::Any + Send + Sync>>,
105    ) -> crate::Result<Box<dyn Table + Send + Sync>> {
106        Ok(Box::new(Geoip::new(self.clone())?))
107    }
108}
109
110#[derive(Clone)]
111/// A struct that implements [vector_lib::enrichment::Table] to handle loading enrichment data from a GeoIP database.
112pub struct Geoip {
113    config: GeoipConfig,
114    dbreader: Arc<maxminddb::Reader<Vec<u8>>>,
115    dbkind: DatabaseKind,
116    last_modified: SystemTime,
117}
118
119fn lookup_value<'de, A: Deserialize<'de>>(
120    dbreader: &'de Reader<Vec<u8>>,
121    address: IpAddr,
122) -> crate::Result<Option<(A, String)>> {
123    let result = dbreader.lookup(address)?;
124    match result.decode::<A>()? {
125        Some(data) => {
126            let network = result.network()?.to_string();
127            Ok(Some((data, network)))
128        }
129        None => Ok(None),
130    }
131}
132
133impl Geoip {
134    /// Creates a new GeoIP struct from the provided config.
135    pub fn new(config: GeoipConfig) -> crate::Result<Self> {
136        let dbreader = Arc::new(Reader::open_readfile(&config.path)?);
137        let dbkind =
138            DatabaseKind::try_from(dbreader.metadata.database_type.as_str()).map_err(|_| {
139                format!(
140                    "Unsupported MMDB database type ({}). Use `mmdb` enrichment table instead.",
141                    dbreader.metadata.database_type
142                )
143            })?;
144
145        // Check if we can read database with dummy Ip.
146        let ip = IpAddr::V4(std::net::Ipv4Addr::UNSPECIFIED);
147        match dbkind {
148            // Isp
149            DatabaseKind::Asn | DatabaseKind::Isp => lookup_value::<Isp>(&dbreader, ip).map(|_| ()),
150            DatabaseKind::ConnectionType => {
151                lookup_value::<ConnectionType>(&dbreader, ip).map(|_| ())
152            }
153            DatabaseKind::City => lookup_value::<City>(&dbreader, ip).map(|_| ()),
154            DatabaseKind::AnonymousIp => lookup_value::<AnonymousIp>(&dbreader, ip).map(|_| ()),
155        }?;
156
157        Ok(Geoip {
158            last_modified: fs::metadata(&config.path)?.modified()?,
159            dbreader,
160            dbkind,
161            config,
162        })
163    }
164
165    fn lookup(&self, ip: IpAddr, select: Option<&[String]>) -> Option<ObjectMap> {
166        let mut map = ObjectMap::new();
167        let mut add_field = |key: &str, value: Option<Value>| {
168            if select
169                .map(|fields| fields.iter().any(|field| field == key))
170                .unwrap_or(true)
171            {
172                map.insert(key.into(), value.unwrap_or(Value::Null));
173            }
174        };
175
176        macro_rules! add_field {
177            ($k:expr_2021, $v:expr_2021) => {
178                add_field($k, $v.map(Into::into))
179            };
180        }
181
182        match self.dbkind {
183            DatabaseKind::Asn | DatabaseKind::Isp => {
184                let (data, network) = lookup_value::<Isp>(&self.dbreader, ip).ok()??;
185
186                add_field!("autonomous_system_number", data.autonomous_system_number);
187                add_field!(
188                    "autonomous_system_organization",
189                    data.autonomous_system_organization
190                );
191                add_field!("isp", data.isp);
192                add_field!("organization", data.organization);
193                add_field!("network", Some(network));
194            }
195            DatabaseKind::City => {
196                let (data, network): (City, String) =
197                    lookup_value::<City>(&self.dbreader, ip).ok()??;
198
199                add_field!("city_name", self.take_translation(&data.city.names));
200
201                add_field!("continent_code", data.continent.code);
202
203                let country = data.country;
204                add_field!("country_code", country.iso_code);
205                add_field!("country_name", self.take_translation(&country.names));
206
207                let location = data.location;
208                add_field!("timezone", location.time_zone);
209                add_field!(
210                    "latitude",
211                    location.latitude.map(|latitude| Value::Float(
212                        NotNan::new(latitude).expect("latitude cannot be Nan")
213                    ))
214                );
215                add_field!(
216                    "longitude",
217                    location
218                        .longitude
219                        .map(|longitude| NotNan::new(longitude).expect("longitude cannot be Nan"))
220                );
221                add_field!("metro_code", location.metro_code);
222
223                // last subdivision is most specific per https://github.com/maxmind/GeoIP2-java/blob/39385c6ce645374039450f57208b886cf87ade47/src/main/java/com/maxmind/geoip2/model/AbstractCityResponse.java#L96-L107
224                let subdivision = data.subdivisions.last();
225                add_field!(
226                    "region_name",
227                    subdivision.map(|s| self.take_translation(&s.names))
228                );
229
230                add_field!(
231                    "region_code",
232                    subdivision.and_then(|subdivision| subdivision.iso_code)
233                );
234                add_field!("postal_code", data.postal.code);
235                add_field!("network", Some(network));
236            }
237            DatabaseKind::ConnectionType => {
238                let (data, network) = lookup_value::<ConnectionType>(&self.dbreader, ip).ok()??;
239
240                add_field!("connection_type", data.connection_type);
241                add_field!("network", Some(network));
242            }
243            DatabaseKind::AnonymousIp => {
244                let (data, network) = lookup_value::<AnonymousIp>(&self.dbreader, ip).ok()??;
245
246                add_field!("is_anonymous", data.is_anonymous);
247                add_field!("is_anonymous_vpn", data.is_anonymous_vpn);
248                add_field!("is_hosting_provider", data.is_hosting_provider);
249                add_field!("is_public_proxy", data.is_public_proxy);
250                add_field!("is_residential_proxy", data.is_residential_proxy);
251                add_field!("is_tor_exit_node", data.is_tor_exit_node);
252                add_field!("network", Some(network));
253            }
254        }
255
256        Some(map)
257    }
258
259    fn take_translation<'a>(&self, translations: &'a Names<'a>) -> Option<&'a str> {
260        match self.config.locale.as_ref() {
261            "en" => translations.english,
262            "de" => translations.german,
263            "es" => translations.spanish,
264            "fr" => translations.french,
265            "ja" => translations.japanese,
266            "pt-BR" => translations.brazilian_portuguese,
267            "ru" => translations.russian,
268            "zh-CN" => translations.simplified_chinese,
269            _ => None,
270        }
271    }
272}
273
274impl Table for Geoip {
275    /// Search the enrichment table data with the given condition.
276    /// All conditions must match (AND).
277    ///
278    /// # Errors
279    /// Errors if no rows, or more than 1 row is found.
280    fn find_table_row<'a>(
281        &self,
282        case: Case,
283        condition: &'a [Condition<'a>],
284        select: Option<&[String]>,
285        wildcard: Option<&Value>,
286        index: Option<IndexHandle>,
287    ) -> Result<ObjectMap, Error> {
288        let mut rows = self.find_table_rows(case, condition, select, wildcard, index)?;
289
290        match rows.pop() {
291            Some(row) if rows.is_empty() => Ok(row),
292            Some(_) => Err(Error::MoreThanOneRowFound),
293            None => Err(Error::NoRowsFound),
294        }
295    }
296
297    /// Search the enrichment table data with the given condition.
298    /// All conditions must match (AND).
299    /// Can return multiple matched records
300    fn find_table_rows<'a>(
301        &self,
302        _: Case,
303        condition: &'a [Condition<'a>],
304        select: Option<&[String]>,
305        _wildcard: Option<&Value>,
306        _: Option<IndexHandle>,
307    ) -> Result<Vec<ObjectMap>, Error> {
308        match condition.first() {
309            Some(_) if condition.len() > 1 => Err(Error::OnlyOneConditionAllowed),
310            Some(Condition::Equals { value, .. }) => {
311                let ip = value
312                    .to_string_lossy()
313                    .parse::<IpAddr>()
314                    .map_err(|source| Error::InvalidAddress { source })?;
315                Ok(self
316                    .lookup(ip, select)
317                    .map(|values| vec![values])
318                    .unwrap_or_default())
319            }
320            Some(_) => Err(Error::OnlyEqualityConditionAllowed),
321            None => Err(Error::MissingCondition { kind: "IP" }),
322        }
323    }
324
325    /// Hints to the enrichment table what data is going to be searched to allow it to index the
326    /// data in advance.
327    ///
328    /// # Errors
329    /// Errors if the fields are not in the table.
330    fn add_index(&mut self, _: Case, fields: &[&str]) -> Result<IndexHandle, Error> {
331        match fields.len() {
332            0 => Err(Error::MissingRequiredField { field: "IP" }),
333            1 => Ok(IndexHandle(0)),
334            _ => Err(Error::OnlyOneFieldAllowed),
335        }
336    }
337
338    /// Returns a list of the field names that are in each index
339    fn index_fields(&self) -> Vec<(Case, Vec<String>)> {
340        Vec::new()
341    }
342
343    /// Returns true if the underlying data has changed and the table needs reloading.
344    fn needs_reload(&self) -> bool {
345        matches!(fs::metadata(&self.config.path)
346            .and_then(|metadata| metadata.modified()),
347            Ok(modified) if modified > self.last_modified)
348    }
349}
350
351impl std::fmt::Debug for Geoip {
352    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
353        write!(
354            f,
355            "Geoip {} database {})",
356            self.config.locale,
357            self.config.path.display()
358        )
359    }
360}
361
362#[cfg(test)]
363mod tests {
364    use super::*;
365
366    #[test]
367    fn city_lookup() {
368        let values = find("2.125.160.216", "tests/data/GeoIP2-City-Test.mmdb").unwrap();
369
370        let mut expected = ObjectMap::new();
371        expected.insert("city_name".into(), "Boxford".into());
372        expected.insert("country_code".into(), "GB".into());
373        expected.insert("continent_code".into(), "EU".into());
374        expected.insert("country_name".into(), "United Kingdom".into());
375        expected.insert("region_code".into(), "WBK".into());
376        expected.insert("region_name".into(), "West Berkshire".into());
377        expected.insert("timezone".into(), "Europe/London".into());
378        expected.insert("latitude".into(), Value::from(51.75));
379        expected.insert("longitude".into(), Value::from(-1.25));
380        expected.insert("postal_code".into(), "OX1".into());
381        expected.insert("metro_code".into(), Value::Null);
382        expected.insert("network".into(), "2.125.160.216/29".into());
383
384        assert_eq!(values, expected);
385    }
386
387    #[test]
388    fn city_partial_lookup() {
389        let values = find_select(
390            "2.125.160.216",
391            "tests/data/GeoIP2-City-Test.mmdb",
392            Some(&["latitude".to_string(), "longitude".to_string()]),
393        )
394        .unwrap();
395
396        let mut expected = ObjectMap::new();
397        expected.insert("latitude".into(), Value::from(51.75));
398        expected.insert("longitude".into(), Value::from(-1.25));
399
400        assert_eq!(values, expected);
401    }
402
403    #[test]
404    fn city_lookup_partial_results() {
405        let values = find("67.43.156.9", "tests/data/GeoIP2-City-Test.mmdb").unwrap();
406
407        let mut expected = ObjectMap::new();
408        expected.insert("city_name".into(), Value::Null);
409        expected.insert("country_code".into(), "BT".into());
410        expected.insert("country_name".into(), "Bhutan".into());
411        expected.insert("continent_code".into(), "AS".into());
412        expected.insert("region_code".into(), Value::Null);
413        expected.insert("region_name".into(), Value::Null);
414        expected.insert("timezone".into(), "Asia/Thimphu".into());
415        expected.insert("latitude".into(), Value::from(27.5));
416        expected.insert("longitude".into(), Value::from(90.5));
417        expected.insert("postal_code".into(), Value::Null);
418        expected.insert("metro_code".into(), Value::Null);
419        expected.insert("network".into(), "67.43.156.0/24".into());
420
421        assert_eq!(values, expected);
422    }
423
424    #[test]
425    fn city_lookup_no_results() {
426        let values = find("10.1.12.1", "tests/data/GeoIP2-City-Test.mmdb");
427
428        assert!(values.is_none());
429    }
430
431    #[test]
432    fn isp_lookup() {
433        let values = find("208.192.1.2", "tests/data/GeoIP2-ISP-Test.mmdb").unwrap();
434
435        let mut expected = ObjectMap::new();
436        expected.insert("autonomous_system_number".into(), 701i64.into());
437        expected.insert(
438            "autonomous_system_organization".into(),
439            "MCI Communications Services, Inc. d/b/a Verizon Business".into(),
440        );
441        expected.insert("isp".into(), "Verizon Business".into());
442        expected.insert("organization".into(), "Verizon Business".into());
443        expected.insert("network".into(), "208.192.0.0/10".into());
444
445        assert_eq!(values, expected);
446    }
447
448    #[test]
449    fn isp_lookup_partial_results() {
450        let values = find("2600:7000::1", "tests/data/GeoLite2-ASN-Test.mmdb").unwrap();
451
452        let mut expected = ObjectMap::new();
453        expected.insert("autonomous_system_number".into(), 6939i64.into());
454        expected.insert(
455            "autonomous_system_organization".into(),
456            "Hurricane Electric, Inc.".into(),
457        );
458        expected.insert("isp".into(), Value::Null);
459        expected.insert("organization".into(), Value::Null);
460        expected.insert("network".into(), "2600:7000::/24".into());
461
462        assert_eq!(values, expected);
463    }
464
465    #[test]
466    fn isp_lookup_no_results() {
467        let values = find("10.1.12.1", "tests/data/GeoLite2-ASN-Test.mmdb");
468
469        assert!(values.is_none());
470    }
471
472    #[test]
473    fn connection_type_lookup_success() {
474        let values = find(
475            "201.243.200.1",
476            "tests/data/GeoIP2-Connection-Type-Test.mmdb",
477        )
478        .unwrap();
479
480        let mut expected = ObjectMap::new();
481        expected.insert("connection_type".into(), "Corporate".into());
482        expected.insert("network".into(), "201.243.200.0/24".into());
483
484        assert_eq!(values, expected);
485    }
486
487    #[test]
488    fn connection_type_lookup_missing() {
489        let values = find("10.1.12.1", "tests/data/GeoIP2-Connection-Type-Test.mmdb");
490
491        assert!(values.is_none());
492    }
493
494    #[test]
495    fn custom_mmdb_type_error() {
496        let result = Geoip::new(GeoipConfig {
497            path: "tests/data/custom-type.mmdb".into(),
498            locale: default_locale(),
499        });
500
501        assert!(result.is_err());
502    }
503    #[test]
504    fn anonymous_ip_lookup() {
505        let values = find("101.99.92.179", "tests/data/GeoIP2-Anonymous-IP-Test.mmdb").unwrap();
506
507        let mut expected = ObjectMap::new();
508        expected.insert("is_anonymous".into(), true.into());
509        expected.insert("is_anonymous_vpn".into(), true.into());
510        expected.insert("is_hosting_provider".into(), true.into());
511        expected.insert("is_tor_exit_node".into(), true.into());
512        expected.insert("is_public_proxy".into(), Value::Null);
513        expected.insert("is_residential_proxy".into(), Value::Null);
514        expected.insert("network".into(), "101.99.92.179/32".into());
515
516        assert_eq!(values, expected);
517    }
518
519    #[test]
520    fn anonymous_ip_lookup_no_results() {
521        let values = find("10.1.12.1", "tests/data/GeoIP2-Anonymous-IP-Test.mmdb");
522
523        assert!(values.is_none());
524    }
525
526    fn find(ip: &str, database: &str) -> Option<ObjectMap> {
527        find_select(ip, database, None)
528    }
529
530    fn find_select(ip: &str, database: &str, select: Option<&[String]>) -> Option<ObjectMap> {
531        Geoip::new(GeoipConfig {
532            path: database.into(),
533            locale: default_locale(),
534        })
535        .unwrap()
536        .find_table_rows(
537            Case::Insensitive,
538            &[Condition::Equals {
539                field: "ip",
540                value: ip.into(),
541            }],
542            select,
543            None,
544            None,
545        )
546        .unwrap()
547        .pop()
548    }
549}