groups: # k8s.pod.* metrics - id: metric.k8s.pod.uptime type: metric metric_name: k8s.pod.uptime stability: development brief: "The time the Pod has been running" note: | Instrumentations SHOULD use a gauge with type `double` and measure uptime in seconds as a floating point number with the highest precision available. The actual accuracy would depend on the instrumentation and operating system. instrument: gauge unit: "s" # k8s.pod.cpu.* metrics - id: metric.k8s.pod.cpu.time type: metric metric_name: k8s.pod.cpu.time stability: development brief: "Total CPU time consumed" note: > Total CPU time consumed by the specific Pod on all available CPU cores instrument: counter unit: "s" - id: metric.k8s.pod.cpu.usage type: metric metric_name: k8s.pod.cpu.usage stability: development brief: "Pod's CPU usage, measured in cpus. Range from 0 to the number of allocatable CPUs" note: > CPU usage of the specific Pod on all available CPU cores, averaged over the sample window instrument: gauge unit: "{cpu}" # k8s.pod.memory.* metrics - id: metric.k8s.pod.memory.usage type: metric metric_name: k8s.pod.memory.usage stability: development brief: "Memory usage of the Pod" note: > Total memory usage of the Pod instrument: gauge unit: "By" # k8s.pod.network.* metrics - id: metric.k8s.pod.network.io type: metric metric_name: k8s.pod.network.io stability: development brief: "Network bytes for the Pod" instrument: counter unit: "By" attributes: - ref: network.interface.name - ref: network.io.direction - id: metric.k8s.pod.network.errors type: metric metric_name: k8s.pod.network.errors stability: development brief: "Pod network errors" instrument: counter unit: "{error}" attributes: - ref: network.interface.name - ref: network.io.direction # k8s.node.* metrics - id: metric.k8s.node.uptime type: metric metric_name: k8s.node.uptime stability: development brief: "The time the Node has been running" note: | Instrumentations SHOULD use a gauge with type `double` and measure uptime in seconds as a floating point number with the highest precision available. The actual accuracy would depend on the instrumentation and operating system. instrument: gauge unit: "s" # k8s.node.cpu.* metrics - id: metric.k8s.node.cpu.time type: metric metric_name: k8s.node.cpu.time stability: development brief: "Total CPU time consumed" note: > Total CPU time consumed by the specific Node on all available CPU cores instrument: counter unit: "s" - id: metric.k8s.node.cpu.usage type: metric metric_name: k8s.node.cpu.usage stability: development brief: "Node's CPU usage, measured in cpus. Range from 0 to the number of allocatable CPUs" note: > CPU usage of the specific Node on all available CPU cores, averaged over the sample window instrument: gauge unit: "{cpu}" # k8s.node.memory.* metrics - id: metric.k8s.node.memory.usage type: metric metric_name: k8s.node.memory.usage stability: development brief: "Memory usage of the Node" note: > Total memory usage of the Node instrument: gauge unit: "By" # k8s.node.network.* metrics - id: metric.k8s.node.network.io type: metric metric_name: k8s.node.network.io stability: development brief: "Network bytes for the Node" instrument: counter unit: "By" attributes: - ref: network.interface.name - ref: network.io.direction - id: metric.k8s.node.network.errors type: metric metric_name: k8s.node.network.errors stability: development brief: "Node network errors" instrument: counter unit: "{error}" attributes: - ref: network.interface.name - ref: network.io.direction # k8s.deployment.* metrics - id: metric.k8s.deployment.desired_pods type: metric metric_name: k8s.deployment.desired_pods stability: development brief: "Number of desired replica pods in this deployment" note: | This metric aligns with the `replicas` field of the [K8s DeploymentSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#deploymentspec-v1-apps). This metric SHOULD, at a minimum, be reported against a [`k8s.deployment`](../resource/k8s.md#deployment) resource. instrument: updowncounter unit: "{pod}" - id: metric.k8s.deployment.available_pods type: metric metric_name: k8s.deployment.available_pods stability: development brief: "Total number of available replica pods (ready for at least minReadySeconds) targeted by this deployment" note: | This metric aligns with the `availableReplicas` field of the [K8s DeploymentStatus](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#deploymentstatus-v1-apps). This metric SHOULD, at a minimum, be reported against a [`k8s.deployment`](../resource/k8s.md#deployment) resource. instrument: updowncounter unit: "{pod}" # k8s.replicaset.* metrics - id: metric.k8s.replicaset.desired_pods type: metric metric_name: k8s.replicaset.desired_pods stability: development brief: "Number of desired replica pods in this replicaset" note: | This metric aligns with the `replicas` field of the [K8s ReplicaSetSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#replicasetspec-v1-apps). This metric SHOULD, at a minimum, be reported against a [`k8s.replicaset`](../resource/k8s.md#replicaset) resource. instrument: updowncounter unit: "{pod}" - id: metric.k8s.replicaset.available_pods type: metric metric_name: k8s.replicaset.available_pods stability: development brief: "Total number of available replica pods (ready for at least minReadySeconds) targeted by this replicaset" note: | This metric aligns with the `availableReplicas` field of the [K8s ReplicaSetStatus](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#replicasetstatus-v1-apps). This metric SHOULD, at a minimum, be reported against a [`k8s.replicaset`](../resource/k8s.md#replicaset) resource. instrument: updowncounter unit: "{pod}" # k8s.replicationcontroller.* metrics - id: metric.k8s.replicationcontroller.desired_pods type: metric metric_name: k8s.replicationcontroller.desired_pods stability: development brief: "Number of desired replica pods in this replication controller" note: | This metric aligns with the `replicas` field of the [K8s ReplicationControllerSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#replicationcontrollerspec-v1-core) This metric SHOULD, at a minimum, be reported against a [`k8s.replicationcontroller`](../resource/k8s.md#replicationcontroller) resource. instrument: updowncounter unit: "{pod}" - id: metric.k8s.replicationcontroller.available_pods type: metric metric_name: k8s.replicationcontroller.available_pods stability: development brief: "Total number of available replica pods (ready for at least minReadySeconds) targeted by this replication controller" note: | This metric aligns with the `availableReplicas` field of the [K8s ReplicationControllerStatus](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#replicationcontrollerstatus-v1-core) This metric SHOULD, at a minimum, be reported against a [`k8s.replicationcontroller`](../resource/k8s.md#replicationcontroller) resource. instrument: updowncounter unit: "{pod}" # k8s.statefulset.* metrics - id: metric.k8s.statefulset.desired_pods type: metric metric_name: k8s.statefulset.desired_pods stability: development brief: "Number of desired replica pods in this statefulset" note: | This metric aligns with the `replicas` field of the [K8s StatefulSetSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#statefulsetspec-v1-apps). This metric SHOULD, at a minimum, be reported against a [`k8s.statefulset`](../resource/k8s.md#statefulset) resource. instrument: updowncounter unit: "{pod}" - id: metric.k8s.statefulset.ready_pods type: metric metric_name: k8s.statefulset.ready_pods stability: development brief: "The number of replica pods created for this statefulset with a Ready Condition" note: | This metric aligns with the `readyReplicas` field of the [K8s StatefulSetStatus](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#statefulsetstatus-v1-apps). This metric SHOULD, at a minimum, be reported against a [`k8s.statefulset`](../resource/k8s.md#statefulset) resource. instrument: updowncounter unit: "{pod}" - id: metric.k8s.statefulset.current_pods type: metric metric_name: k8s.statefulset.current_pods stability: development brief: "The number of replica pods created by the statefulset controller from the statefulset version indicated by currentRevision" note: | This metric aligns with the `currentReplicas` field of the [K8s StatefulSetStatus](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#statefulsetstatus-v1-apps). This metric SHOULD, at a minimum, be reported against a [`k8s.statefulset`](../resource/k8s.md#statefulset) resource. instrument: updowncounter unit: "{pod}" - id: metric.k8s.statefulset.updated_pods type: metric metric_name: k8s.statefulset.updated_pods stability: development brief: "Number of replica pods created by the statefulset controller from the statefulset version indicated by updateRevision" note: | This metric aligns with the `updatedReplicas` field of the [K8s StatefulSetStatus](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#statefulsetstatus-v1-apps). This metric SHOULD, at a minimum, be reported against a [`k8s.statefulset`](../resource/k8s.md#statefulset) resource. instrument: updowncounter unit: "{pod}" # k8s.hpa.* metrics - id: metric.k8s.hpa.desired_pods type: metric metric_name: k8s.hpa.desired_pods stability: development brief: "Desired number of replica pods managed by this horizontal pod autoscaler, as last calculated by the autoscaler" note: | This metric aligns with the `desiredReplicas` field of the [K8s HorizontalPodAutoscalerStatus](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#horizontalpodautoscalerstatus-v2-autoscaling) This metric SHOULD, at a minimum, be reported against a [`k8s.hpa`](../resource/k8s.md#horizontalpodautoscaler) resource. instrument: updowncounter unit: "{pod}" - id: metric.k8s.hpa.current_pods type: metric metric_name: k8s.hpa.current_pods stability: development brief: "Current number of replica pods managed by this horizontal pod autoscaler, as last seen by the autoscaler" note: | This metric aligns with the `currentReplicas` field of the [K8s HorizontalPodAutoscalerStatus](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#horizontalpodautoscalerstatus-v2-autoscaling) This metric SHOULD, at a minimum, be reported against a [`k8s.hpa`](../resource/k8s.md#horizontalpodautoscaler) resource. instrument: updowncounter unit: "{pod}" - id: metric.k8s.hpa.max_pods type: metric metric_name: k8s.hpa.max_pods stability: development brief: "The upper limit for the number of replica pods to which the autoscaler can scale up" note: | This metric aligns with the `maxReplicas` field of the [K8s HorizontalPodAutoscalerSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#horizontalpodautoscalerspec-v2-autoscaling) This metric SHOULD, at a minimum, be reported against a [`k8s.hpa`](../resource/k8s.md#horizontalpodautoscaler) resource. instrument: updowncounter unit: "{pod}" - id: metric.k8s.hpa.min_pods type: metric metric_name: k8s.hpa.min_pods stability: development brief: "The lower limit for the number of replica pods to which the autoscaler can scale down" note: | This metric aligns with the `minReplicas` field of the [K8s HorizontalPodAutoscalerSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#horizontalpodautoscalerspec-v2-autoscaling) This metric SHOULD, at a minimum, be reported against a [`k8s.hpa`](../resource/k8s.md#horizontalpodautoscaler) resource. instrument: updowncounter unit: "{pod}" # k8s.daemonset.* metrics - id: metric.k8s.daemonset.current_scheduled_nodes type: metric metric_name: k8s.daemonset.current_scheduled_nodes stability: development brief: "Number of nodes that are running at least 1 daemon pod and are supposed to run the daemon pod" note: | This metric aligns with the `currentNumberScheduled` field of the [K8s DaemonSetStatus](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#daemonsetstatus-v1-apps). This metric SHOULD, at a minimum, be reported against a [`k8s.daemonset`](../resource/k8s.md#daemonset) resource. instrument: updowncounter unit: "{node}" - id: metric.k8s.daemonset.desired_scheduled_nodes type: metric metric_name: k8s.daemonset.desired_scheduled_nodes stability: development brief: "Number of nodes that should be running the daemon pod (including nodes currently running the daemon pod)" note: | This metric aligns with the `desiredNumberScheduled` field of the [K8s DaemonSetStatus](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#daemonsetstatus-v1-apps). This metric SHOULD, at a minimum, be reported against a [`k8s.daemonset`](../resource/k8s.md#daemonset) resource. instrument: updowncounter unit: "{node}" - id: metric.k8s.daemonset.misscheduled_nodes type: metric metric_name: k8s.daemonset.misscheduled_nodes stability: development brief: "Number of nodes that are running the daemon pod, but are not supposed to run the daemon pod" note: | This metric aligns with the `numberMisscheduled` field of the [K8s DaemonSetStatus](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#daemonsetstatus-v1-apps). This metric SHOULD, at a minimum, be reported against a [`k8s.daemonset`](../resource/k8s.md#daemonset) resource. instrument: updowncounter unit: "{node}" - id: metric.k8s.daemonset.ready_nodes type: metric metric_name: k8s.daemonset.ready_nodes stability: development brief: "Number of nodes that should be running the daemon pod and have one or more of the daemon pod running and ready" note: | This metric aligns with the `numberReady` field of the [K8s DaemonSetStatus](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#daemonsetstatus-v1-apps). This metric SHOULD, at a minimum, be reported against a [`k8s.daemonset`](../resource/k8s.md#daemonset) resource. instrument: updowncounter unit: "{node}" # k8s.job.* metrics - id: metric.k8s.job.active_pods type: metric metric_name: k8s.job.active_pods stability: development brief: "The number of pending and actively running pods for a job" instrument: updowncounter unit: "{pod}" note: | This metric aligns with the `active` field of the [K8s JobStatus](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#jobstatus-v1-batch). This metric SHOULD, at a minimum, be reported against a [`k8s.job`](../resource/k8s.md#job) resource. - id: metric.k8s.job.failed_pods type: metric metric_name: k8s.job.failed_pods stability: development brief: "The number of pods which reached phase Failed for a job" instrument: updowncounter unit: "{pod}" note: | This metric aligns with the `failed` field of the [K8s JobStatus](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#jobstatus-v1-batch). This metric SHOULD, at a minimum, be reported against a [`k8s.job`](../resource/k8s.md#job) resource. - id: metric.k8s.job.successful_pods type: metric metric_name: k8s.job.successful_pods stability: development brief: "The number of pods which reached phase Succeeded for a job" instrument: updowncounter unit: "{pod}" note: | This metric aligns with the `succeeded` field of the [K8s JobStatus](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#jobstatus-v1-batch). This metric SHOULD, at a minimum, be reported against a [`k8s.job`](../resource/k8s.md#job) resource. - id: metric.k8s.job.desired_successful_pods type: metric metric_name: k8s.job.desired_successful_pods stability: development brief: "The desired number of successfully finished pods the job should be run with" instrument: updowncounter unit: "{pod}" note: | This metric aligns with the `completions` field of the [K8s JobSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#jobspec-v1-batch). This metric SHOULD, at a minimum, be reported against a [`k8s.job`](../resource/k8s.md#job) resource. - id: metric.k8s.job.max_parallel_pods type: metric metric_name: k8s.job.max_parallel_pods stability: development brief: "The max desired number of pods the job should run at any given time" instrument: updowncounter unit: "{pod}" note: | This metric aligns with the `parallelism` field of the [K8s JobSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#jobspec-v1-batch). This metric SHOULD, at a minimum, be reported against a [`k8s.job`](../resource/k8s.md#job) resource. # k8s.job.* metrics - id: metric.k8s.cronjob.active_jobs type: metric metric_name: k8s.cronjob.active_jobs stability: development brief: "The number of actively running jobs for a cronjob" instrument: updowncounter unit: "{job}" note: | This metric aligns with the `active` field of the [K8s CronJobStatus](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#cronjobstatus-v1-batch). This metric SHOULD, at a minimum, be reported against a [`k8s.cronjob`](../resource/k8s.md#cronjob) resource. # k8s.namespace.* metrics - id: metric.k8s.namespace.phase type: metric metric_name: k8s.namespace.phase stability: development brief: "Describes number of K8s namespaces that are currently in a given phase." instrument: updowncounter unit: "{namespace}" note: | This metric SHOULD, at a minimum, be reported against a [`k8s.namespace`](../resource/k8s.md#namespace) resource. attributes: - ref: k8s.namespace.phase requirement_level: required # k8s.container.* metrics - id: metric.k8s.container.cpu.limit type: metric metric_name: k8s.container.cpu.limit stability: development brief: "Maximum CPU resource limit set for the container" entity_associations: - k8s.container note: | See https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#resourcerequirements-v1-core for details. instrument: gauge unit: "{cpu}" - id: metric.k8s.container.cpu.request type: metric metric_name: k8s.container.cpu.request stability: development brief: "CPU resource requested for the container" entity_associations: - k8s.container note: | See https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#resourcerequirements-v1-core for details. instrument: gauge unit: "{cpu}" - id: metric.k8s.container.memory.limit type: metric metric_name: k8s.container.memory.limit stability: development brief: "Maximum memory resource limit set for the container" entity_associations: - k8s.container note: | See https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#resourcerequirements-v1-core for details. instrument: gauge unit: "By" - id: metric.k8s.container.memory.request type: metric metric_name: k8s.container.memory.request stability: development brief: "Memory resource requested for the container" entity_associations: - k8s.container note: | See https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#resourcerequirements-v1-core for details. instrument: gauge unit: "By" - id: metric.k8s.container.storage.limit type: metric metric_name: k8s.container.storage.limit stability: development brief: "Maximum storage resource limit set for the container" entity_associations: - k8s.container note: | See https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#resourcerequirements-v1-core for details. instrument: gauge unit: "By" - id: metric.k8s.container.storage.request type: metric metric_name: k8s.container.storage.request stability: development brief: "Storage resource requested for the container" entity_associations: - k8s.container note: | See https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#resourcerequirements-v1-core for details. instrument: gauge unit: "By" - id: metric.k8s.container.ephemeral_storage.limit type: metric metric_name: k8s.container.ephemeral_storage.limit stability: development brief: "Maximum ephemeral storage resource limit set for the container" entity_associations: - k8s.container note: | See https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#resourcerequirements-v1-core for details. instrument: gauge unit: "By" - id: metric.k8s.container.ephemeral_storage.request type: metric metric_name: k8s.container.ephemeral_storage.request stability: development brief: "Ephemeral storage resource requested for the container" entity_associations: - k8s.container note: | See https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#resourcerequirements-v1-core for details. instrument: gauge unit: "By" - id: metric.k8s.container.restart.count type: metric metric_name: k8s.container.restart.count stability: development brief: "Describes how many times the container has restarted (since the last counter reset)" instrument: updowncounter unit: "{restart}" entity_associations: - k8s.container note: | This value is pulled directly from the K8s API and the value can go indefinitely high and be reset to 0 at any time depending on how your kubelet is configured to prune dead containers. It is best to not depend too much on the exact value but rather look at it as either == 0, in which case you can conclude there were no restarts in the recent past, or > 0, in which case you can conclude there were restarts in the recent past, and not try and analyze the value beyond that.