Skip to main content

vector/sources/host_metrics/
temperature.rs

1use vector_lib::metric_tags;
2
3use super::HostMetrics;
4
5const COMPONENT: &str = "component";
6const TEMPERATURE_CELSIUS: &str = "temperature_celsius";
7const TEMPERATURE_MAX_CELSIUS: &str = "temperature_max_celsius";
8const TEMPERATURE_CRITICAL_CELSIUS: &str = "temperature_critical_celsius";
9
10impl HostMetrics {
11    pub async fn temperature_metrics(&mut self, output: &mut super::MetricsBuffer) {
12        output.name = "temperature";
13        // Refresh the long-lived component list in place. `Component::max()` is
14        // derived by sysinfo from successive refreshes when the sensor does not
15        // expose a hardware maximum, so recreating the list every scrape (as a
16        // fresh `Components::new_with_refreshed_list()` would) resets that
17        // history and makes the reported max equal the latest sample.
18        //
19        // Pass `false` so components that are not re-listed on a refresh are
20        // kept rather than dropped: sysinfo only sets its internal "updated"
21        // flag for `/sys/class/hwmon` sensors, so `refresh(true)` would prune
22        // the `/sys/class/thermal` fallback sensors (used e.g. on Raspberry Pi)
23        // on every scrape and drop their series entirely.
24        self.components.refresh(false);
25        for component in &self.components {
26            // Some sensors expose an empty label (for example when sysinfo falls
27            // back to `/sys/class/thermal`); use the component id as a fallback
28            // so distinct sensors are not collapsed into a single series.
29            let label = if component.label().is_empty() {
30                component.id().unwrap_or_default()
31            } else {
32                component.label()
33            };
34            let tags = || metric_tags!(COMPONENT => label);
35            // Skip non-finite readings: sysinfo can return `NaN` when a sensor
36            // file exists but the read fails, and downstream sinks reject NaN.
37            if let Some(temperature) = component.temperature().filter(|t| t.is_finite()) {
38                output.gauge(TEMPERATURE_CELSIUS, temperature as f64, tags());
39            }
40            if let Some(max) = component.max().filter(|m| m.is_finite()) {
41                output.gauge(TEMPERATURE_MAX_CELSIUS, max as f64, tags());
42            }
43            if let Some(critical) = component.critical().filter(|c| c.is_finite()) {
44                output.gauge(TEMPERATURE_CRITICAL_CELSIUS, critical as f64, tags());
45            }
46        }
47    }
48}
49
50#[cfg(test)]
51mod tests {
52    use super::{
53        super::{HostMetrics, HostMetricsConfig, MetricsBuffer, tests::all_gauges},
54        COMPONENT,
55    };
56
57    #[tokio::test]
58    async fn generates_temperature_metrics() {
59        let mut buffer = MetricsBuffer::new(None);
60        HostMetrics::new(HostMetricsConfig::default())
61            .temperature_metrics(&mut buffer)
62            .await;
63        let metrics = buffer.metrics;
64
65        // Temperature sensors are not exposed in many environments (containers,
66        // virtual machines, CI runners), so the component list can legitimately
67        // be empty. When metrics are produced, they must all be gauges named
68        // `temperature*` and carry the `component` tag.
69        assert!(all_gauges(&metrics));
70        for metric in &metrics {
71            assert!(
72                metric.name().starts_with("temperature"),
73                "unexpected metric name: {}",
74                metric.name()
75            );
76            assert!(
77                metric
78                    .tags()
79                    .expect("temperature metric is missing tags")
80                    .contains_key(COMPONENT),
81                "temperature metric is missing the `component` tag"
82            );
83        }
84    }
85}