vector/sources/host_metrics/temperature.rs
1use vector_lib::metric_tags;
2
3use super::HostMetrics;
4
5const COMPONENT: &str = "component";
6const TEMPERATURE_CELSIUS: &str = "temperature_celsius";
7const TEMPERATURE_MAX_CELSIUS: &str = "temperature_max_celsius";
8const TEMPERATURE_CRITICAL_CELSIUS: &str = "temperature_critical_celsius";
9
10impl HostMetrics {
11 pub async fn temperature_metrics(&mut self, output: &mut super::MetricsBuffer) {
12 output.name = "temperature";
13 // Refresh the long-lived component list in place. `Component::max()` is
14 // derived by sysinfo from successive refreshes when the sensor does not
15 // expose a hardware maximum, so recreating the list every scrape (as a
16 // fresh `Components::new_with_refreshed_list()` would) resets that
17 // history and makes the reported max equal the latest sample.
18 //
19 // Pass `false` so components that are not re-listed on a refresh are
20 // kept rather than dropped: sysinfo only sets its internal "updated"
21 // flag for `/sys/class/hwmon` sensors, so `refresh(true)` would prune
22 // the `/sys/class/thermal` fallback sensors (used e.g. on Raspberry Pi)
23 // on every scrape and drop their series entirely.
24 self.components.refresh(false);
25 for component in &self.components {
26 // Some sensors expose an empty label (for example when sysinfo falls
27 // back to `/sys/class/thermal`); use the component id as a fallback
28 // so distinct sensors are not collapsed into a single series.
29 let label = if component.label().is_empty() {
30 component.id().unwrap_or_default()
31 } else {
32 component.label()
33 };
34 let tags = || metric_tags!(COMPONENT => label);
35 // Skip non-finite readings: sysinfo can return `NaN` when a sensor
36 // file exists but the read fails, and downstream sinks reject NaN.
37 if let Some(temperature) = component.temperature().filter(|t| t.is_finite()) {
38 output.gauge(TEMPERATURE_CELSIUS, temperature as f64, tags());
39 }
40 if let Some(max) = component.max().filter(|m| m.is_finite()) {
41 output.gauge(TEMPERATURE_MAX_CELSIUS, max as f64, tags());
42 }
43 if let Some(critical) = component.critical().filter(|c| c.is_finite()) {
44 output.gauge(TEMPERATURE_CRITICAL_CELSIUS, critical as f64, tags());
45 }
46 }
47 }
48}
49
50#[cfg(test)]
51mod tests {
52 use super::{
53 super::{HostMetrics, HostMetricsConfig, MetricsBuffer, tests::all_gauges},
54 COMPONENT,
55 };
56
57 #[tokio::test]
58 async fn generates_temperature_metrics() {
59 let mut buffer = MetricsBuffer::new(None);
60 HostMetrics::new(HostMetricsConfig::default())
61 .temperature_metrics(&mut buffer)
62 .await;
63 let metrics = buffer.metrics;
64
65 // Temperature sensors are not exposed in many environments (containers,
66 // virtual machines, CI runners), so the component list can legitimately
67 // be empty. When metrics are produced, they must all be gauges named
68 // `temperature*` and carry the `component` tag.
69 assert!(all_gauges(&metrics));
70 for metric in &metrics {
71 assert!(
72 metric.name().starts_with("temperature"),
73 "unexpected metric name: {}",
74 metric.name()
75 );
76 assert!(
77 metric
78 .tags()
79 .expect("temperature metric is missing tags")
80 .contains_key(COMPONENT),
81 "temperature metric is missing the `component` tag"
82 );
83 }
84 }
85}