groups: # system.* metrics - id: metric.system.uptime type: metric metric_name: system.uptime annotations: code_generation: metric_value_type: double stability: development brief: "The time the system has been running." note: | Instrumentations SHOULD use a gauge with type `double` and measure uptime in seconds as a floating point number with the highest precision available. The actual accuracy would depend on the instrumentation and operating system. instrument: gauge unit: "s" entity_associations: - host # system.cpu.* metrics - id: metric.system.cpu.physical.count type: metric metric_name: system.cpu.physical.count annotations: code_generation: metric_value_type: int stability: development brief: "Reports the number of actual physical processor cores on the hardware." note: "Calculated by multiplying the number of sockets by the number of cores per socket" instrument: updowncounter unit: "{cpu}" attributes: [] entity_associations: - host - id: metric.system.cpu.logical.count type: metric metric_name: system.cpu.logical.count annotations: code_generation: metric_value_type: int stability: development brief: "Reports the number of logical (virtual) processor cores created by the operating system to manage multitasking." note: "Calculated by multiplying the number of sockets by the number of cores per socket, and then by the number of threads per core" instrument: updowncounter unit: "{cpu}" attributes: [] entity_associations: - host - id: metric.system.cpu.time type: metric metric_name: system.cpu.time annotations: code_generation: metric_value_type: double stability: development brief: "Seconds each logical CPU spent on each mode." instrument: counter unit: "s" attributes: - ref: cpu.mode note: "Following states SHOULD be used: `user`, `system`, `nice`, `idle`, `iowait`, `interrupt`, `steal`" - ref: cpu.logical_number entity_associations: - host - id: metric.system.cpu.utilization type: metric metric_name: system.cpu.utilization annotations: code_generation: metric_value_type: double stability: development brief: "For each logical CPU, the utilization is calculated as the change in cumulative CPU time (cpu.time) over a measurement interval, divided by the elapsed time." instrument: gauge unit: "1" attributes: - ref: cpu.mode note: "Following modes SHOULD be used: `user`, `system`, `nice`, `idle`, `iowait`, `interrupt`, `steal`" - ref: cpu.logical_number entity_associations: - host - id: metric.system.cpu.frequency type: metric metric_name: system.cpu.frequency annotations: code_generation: metric_value_type: int stability: development brief: "Operating frequency of the logical CPU in Hertz." instrument: gauge unit: "Hz" attributes: - ref: cpu.logical_number entity_associations: - host # system.memory.* metrics - id: metric.system.memory.usage type: metric metric_name: system.memory.usage annotations: code_generation: metric_value_type: int stability: development brief: "Reports memory in use by state." note: | The sum over all `system.memory.state` values SHOULD equal the total memory available on the system, that is `system.memory.limit`. instrument: updowncounter unit: "By" attributes: - ref: system.memory.state entity_associations: - host - id: metric.system.memory.limit type: metric metric_name: system.memory.limit annotations: code_generation: metric_value_type: int stability: development brief: "Total memory available in the system." note: | Its value SHOULD equal the sum of `system.memory.state` over all states. instrument: updowncounter unit: "By" attributes: [] entity_associations: - host - id: metric.system.memory.shared type: metric metric_name: system.memory.shared annotations: code_generation: metric_value_type: int stability: development brief: "Shared memory used (mostly by tmpfs)." note: | Equivalent of `shared` from [`free` command](https://man7.org/linux/man-pages/man1/free.1.html) or `Shmem` from [`/proc/meminfo`](https://man7.org/linux/man-pages/man5/proc.5.html)" instrument: updowncounter unit: "By" entity_associations: - host - id: metric.system.memory.utilization type: metric metric_name: system.memory.utilization annotations: code_generation: metric_value_type: double stability: development brief: "" instrument: gauge unit: "1" attributes: - ref: system.memory.state entity_associations: - host # system.paging.* metrics - id: metric.system.paging.usage type: metric metric_name: system.paging.usage annotations: code_generation: metric_value_type: int stability: development brief: "Unix swap or windows pagefile usage." instrument: updowncounter unit: "By" attributes: - ref: system.paging.state - ref: system.device brief: Unique identifier for the device responsible for managing paging operations. examples: ["/dev/dm-0"] entity_associations: - host - id: metric.system.paging.utilization type: metric metric_name: system.paging.utilization annotations: code_generation: metric_value_type: double stability: development brief: "" instrument: gauge unit: "1" attributes: - ref: system.paging.state - ref: system.device brief: Unique identifier for the device responsible for managing paging operations. examples: ["/dev/dm-0"] entity_associations: - host - id: metric.system.paging.faults type: metric metric_name: system.paging.faults annotations: code_generation: metric_value_type: int stability: development brief: "" instrument: counter unit: "{fault}" attributes: - ref: system.paging.type entity_associations: - host - id: metric.system.paging.operations type: metric metric_name: system.paging.operations stability: development annotations: code_generation: metric_value_type: int brief: "" instrument: counter unit: "{operation}" attributes: - ref: system.paging.type - ref: system.paging.direction entity_associations: - host # system.disk.* metrics - id: metric.system.disk.io type: metric metric_name: system.disk.io annotations: code_generation: metric_value_type: int stability: development brief: "" instrument: counter unit: "By" attributes: - ref: system.device - ref: disk.io.direction entity_associations: - host - id: metric.system.disk.operations type: metric metric_name: system.disk.operations annotations: code_generation: metric_value_type: int stability: development brief: "" instrument: counter unit: "{operation}" attributes: - ref: system.device - ref: disk.io.direction entity_associations: - host - id: metric.system.disk.io_time type: metric metric_name: system.disk.io_time annotations: code_generation: metric_value_type: double stability: development brief: "Time disk spent activated." instrument: counter unit: "s" note: | The real elapsed time ("wall clock") used in the I/O path (time from operations running in parallel are not counted). Measured as: - Linux: Field 13 from [procfs-diskstats](https://www.kernel.org/doc/Documentation/ABI/testing/procfs-diskstats) - Windows: The complement of ["Disk\% Idle Time"](https://learn.microsoft.com/archive/blogs/askcore/windows-performance-monitor-disk-counters-explained#windows-performance-monitor-disk-counters-explained) performance counter: `uptime * (100 - "Disk\% Idle Time") / 100` attributes: - ref: system.device entity_associations: - host - id: metric.system.disk.operation_time type: metric metric_name: system.disk.operation_time annotations: code_generation: metric_value_type: double stability: development brief: "Sum of the time each operation took to complete." instrument: counter unit: "s" note: | Because it is the sum of time each request took, parallel-issued requests each contribute to make the count grow. Measured as: - Linux: Fields 7 & 11 from [procfs-diskstats](https://www.kernel.org/doc/Documentation/ABI/testing/procfs-diskstats) - Windows: "Avg. Disk sec/Read" perf counter multiplied by "Disk Reads/sec" perf counter (similar for Writes) attributes: - ref: system.device - ref: disk.io.direction entity_associations: - host - id: metric.system.disk.merged type: metric metric_name: system.disk.merged annotations: code_generation: metric_value_type: int stability: development brief: "" instrument: counter unit: "{operation}" attributes: - ref: system.device - ref: disk.io.direction entity_associations: - host - id: metric.system.disk.limit type: metric metric_name: system.disk.limit annotations: code_generation: metric_value_type: int stability: development brief: "The total storage capacity of the disk." instrument: updowncounter unit: "By" attributes: - ref: system.device entity_associations: - host # system.filesystem.* metrics - id: metric.system.filesystem.usage type: metric metric_name: system.filesystem.usage annotations: code_generation: metric_value_type: int stability: development brief: "Reports a filesystem's space usage across different states." note: | The sum of all `system.filesystem.usage` values over the different `system.filesystem.state` attributes SHOULD equal the total storage capacity of the filesystem, that is `system.filesystem.limit`. instrument: updowncounter unit: "By" attributes: - ref: system.device brief: Identifier for the device where the filesystem resides. examples: ["/dev/sda", "\\network-drive"] - ref: system.filesystem.state - ref: system.filesystem.type - ref: system.filesystem.mode - ref: system.filesystem.mountpoint entity_associations: - host - id: metric.system.filesystem.utilization type: metric metric_name: system.filesystem.utilization annotations: code_generation: metric_value_type: double stability: development brief: "" instrument: gauge unit: "1" attributes: - ref: system.device brief: Identifier for the device where the filesystem resides. examples: ["/dev/sda", "\\network-drive"] - ref: system.filesystem.state - ref: system.filesystem.type - ref: system.filesystem.mode - ref: system.filesystem.mountpoint entity_associations: - host - id: metric.system.filesystem.limit type: metric metric_name: system.filesystem.limit annotations: code_generation: metric_value_type: int stability: development brief: "The total storage capacity of the filesystem." instrument: updowncounter unit: "By" attributes: - ref: system.device brief: Identifier for the device where the filesystem resides. examples: ["/dev/sda", "\\network-drive"] - ref: system.filesystem.type - ref: system.filesystem.mode - ref: system.filesystem.mountpoint entity_associations: - host # system.network.* metrics - id: metric.system.network.dropped type: metric metric_name: system.network.dropped annotations: code_generation: metric_value_type: int stability: development brief: "Count of packets that are dropped or discarded even though there was no error." instrument: counter unit: "{packet}" note: | Measured as: - Linux: the `drop` column in `/proc/dev/net` ([source](https://web.archive.org/web/20180321091318/http://www.onlamp.com/pub/a/linux/2000/11/16/LinuxAdmin.html)) - Windows: [`InDiscards`/`OutDiscards`](https://docs.microsoft.com/windows/win32/api/netioapi/ns-netioapi-mib_if_row2) from [`GetIfEntry2`](https://docs.microsoft.com/windows/win32/api/netioapi/nf-netioapi-getifentry2) attributes: - ref: network.interface.name - ref: network.io.direction entity_associations: - host - id: metric.system.network.packets type: metric metric_name: system.network.packets annotations: code_generation: metric_value_type: int stability: development brief: "" instrument: counter unit: "{packet}" attributes: - ref: system.device - ref: network.io.direction entity_associations: - host - id: metric.system.network.errors type: metric metric_name: system.network.errors annotations: code_generation: metric_value_type: int stability: development brief: "Count of network errors detected." instrument: counter unit: "{error}" note: | Measured as: - Linux: the `errs` column in `/proc/dev/net` ([source](https://web.archive.org/web/20180321091318/http://www.onlamp.com/pub/a/linux/2000/11/16/LinuxAdmin.html)). - Windows: [`InErrors`/`OutErrors`](https://docs.microsoft.com/windows/win32/api/netioapi/ns-netioapi-mib_if_row2) from [`GetIfEntry2`](https://docs.microsoft.com/windows/win32/api/netioapi/nf-netioapi-getifentry2). attributes: - ref: network.interface.name - ref: network.io.direction entity_associations: - host - id: metric.system.network.io type: metric metric_name: system.network.io annotations: code_generation: metric_value_type: int stability: development brief: "" instrument: counter unit: "By" attributes: - ref: network.interface.name - ref: network.io.direction entity_associations: - host - id: metric.system.network.connection.count type: metric metric_name: system.network.connection.count annotations: code_generation: metric_value_type: int stability: development brief: "" instrument: updowncounter unit: "{connection}" attributes: - ref: network.interface.name - ref: network.connection.state - ref: network.transport entity_associations: - host # system.process.* metrics - id: metric.system.process.count type: metric metric_name: system.process.count annotations: code_generation: metric_value_type: int stability: development brief: "Total number of processes in each state." instrument: updowncounter unit: "{process}" attributes: - ref: system.process.status entity_associations: - host - id: metric.system.process.created type: metric metric_name: system.process.created annotations: code_generation: metric_value_type: int stability: development brief: "Total number of processes created over uptime of the host." instrument: counter unit: "{process}" attributes: [] entity_associations: - host # system.linux.* metrics - id: metric.system.linux.memory.available type: metric metric_name: system.linux.memory.available annotations: code_generation: metric_value_type: int stability: development brief: "An estimate of how much memory is available for starting new applications, without causing swapping." note: | This is an alternative to `system.memory.usage` metric with `state=free`. Linux starting from 3.14 exports "available" memory. It takes "free" memory as a baseline, and then factors in kernel-specific values. This is supposed to be more accurate than just "free" memory. For reference, see the calculations [here](https://superuser.com/a/980821). See also `MemAvailable` in [/proc/meminfo](https://man7.org/linux/man-pages/man5/proc.5.html). instrument: updowncounter unit: "By" attributes: [] entity_associations: - host - id: metric.system.linux.memory.slab.usage type: metric metric_name: system.linux.memory.slab.usage annotations: code_generation: metric_value_type: int stability: development brief: "Reports the memory used by the Linux kernel for managing caches of frequently used objects." note: | The sum over the `reclaimable` and `unreclaimable` state values in `linux.memory.slab.usage` SHOULD be equal to the total slab memory available on the system. Note that the total slab memory is not constant and may vary over time. See also the [Slab allocator](https://blogs.oracle.com/linux/post/understanding-linux-kernel-memory-statistics) and `Slab` in [/proc/meminfo](https://man7.org/linux/man-pages/man5/proc.5.html). instrument: updowncounter unit: "By" attributes: - ref: linux.memory.slab.state entity_associations: - host