From fb7b4209ef5b383b45034dcfc9d69ff9842f32db Mon Sep 17 00:00:00 2001 From: Povilas Versockas Date: Thu, 29 May 2025 18:16:47 +0300 Subject: [PATCH] add k8s.container.status.state and k8s.container.status.reason metrics (#1784) Co-authored-by: Christos Markou Co-authored-by: Jina Jain Co-authored-by: Liudmila Molkova --- .chloggen/container-status.yaml | 22 +++++++++ docs/registry/attributes/k8s.md | 28 +++++++++++ docs/system/k8s-metrics.md | 87 +++++++++++++++++++++++++++++++++ model/k8s/metrics.yaml | 28 +++++++++++ model/k8s/registry.yaml | 66 +++++++++++++++++++++++++ 5 files changed, 231 insertions(+) create mode 100755 .chloggen/container-status.yaml diff --git a/.chloggen/container-status.yaml b/.chloggen/container-status.yaml new file mode 100755 index 000000000..2733a8c20 --- /dev/null +++ b/.chloggen/container-status.yaml @@ -0,0 +1,22 @@ +# Use this changelog template to create an entry for release notes. +# +# If your change doesn't affect end users you should instead start +# your pull request title with [chore] or use the "Skip Changelog" label. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the area of concern in the attributes-registry, (e.g. http, cloud, db) +component: k8s + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: "Add k8s.container.status.state and k8s.container.status.reason metrics" + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +# The values here must be integers. +issues: [1672] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: diff --git a/docs/registry/attributes/k8s.md b/docs/registry/attributes/k8s.md index f46c66cab..c2d9097bf 100644 --- a/docs/registry/attributes/k8s.md +++ b/docs/registry/attributes/k8s.md @@ -17,6 +17,8 @@ Kubernetes resource attributes. | `k8s.container.name` | string | The name of the Container from Pod specification, must be unique within a Pod. Container runtime usually uses different globally unique name (`container.name`). | `redis` | ![Development](https://img.shields.io/badge/-development-blue) | | `k8s.container.restart_count` | int | Number of times the container was restarted. This attribute can be used to identify a particular container (running or stopped) within a container spec. | | ![Development](https://img.shields.io/badge/-development-blue) | | `k8s.container.status.last_terminated_reason` | string | Last terminated reason of the Container. | `Evicted`; `Error` | ![Development](https://img.shields.io/badge/-development-blue) | +| `k8s.container.status.reason` | string | The reason for the container state. Corresponds to the `reason` field of the: [K8s ContainerStateWaiting](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#containerstatewaiting-v1-core) or [K8s ContainerStateTerminated](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#containerstateterminated-v1-core) | `ContainerCreating`; `CrashLoopBackOff`; `CreateContainerConfigError`; `ErrImagePull`; `ImagePullBackOff`; `OOMKilled`; `Completed`; `Error`; `ContainerCannotRun` | ![Development](https://img.shields.io/badge/-development-blue) | +| `k8s.container.status.state` | string | The state of the container. [K8s ContainerState](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#containerstate-v1-core) | `terminated`; `running`; `waiting` | ![Development](https://img.shields.io/badge/-development-blue) | | `k8s.cronjob.annotation.` | string | The cronjob annotation placed on the CronJob, the `` being the annotation name, the value being the annotation value. [2] | `4`; `` | ![Development](https://img.shields.io/badge/-development-blue) | | `k8s.cronjob.label.` | string | The label placed on the CronJob, the `` being the label name, the value being the label value. [3] | `weekly`; `` | ![Development](https://img.shields.io/badge/-development-blue) | | `k8s.cronjob.name` | string | The name of the CronJob. | `opentelemetry` | ![Development](https://img.shields.io/badge/-development-blue) | @@ -160,6 +162,32 @@ conflict. --- +`k8s.container.status.reason` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used. + +| Value | Description | Stability | +|---|---|---| +| `Completed` | The container has completed execution. | ![Development](https://img.shields.io/badge/-development-blue) | +| `ContainerCannotRun` | The container cannot run. | ![Development](https://img.shields.io/badge/-development-blue) | +| `ContainerCreating` | The container is being created. | ![Development](https://img.shields.io/badge/-development-blue) | +| `CrashLoopBackOff` | The container is in a crash loop back off state. | ![Development](https://img.shields.io/badge/-development-blue) | +| `CreateContainerConfigError` | There was an error creating the container configuration. | ![Development](https://img.shields.io/badge/-development-blue) | +| `ErrImagePull` | There was an error pulling the container image. | ![Development](https://img.shields.io/badge/-development-blue) | +| `Error` | There was an error with the container. | ![Development](https://img.shields.io/badge/-development-blue) | +| `ImagePullBackOff` | The container image pull is in back off state. | ![Development](https://img.shields.io/badge/-development-blue) | +| `OOMKilled` | The container was killed due to out of memory. | ![Development](https://img.shields.io/badge/-development-blue) | + +--- + +`k8s.container.status.state` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used. + +| Value | Description | Stability | +|---|---|---| +| `running` | The container is running. | ![Development](https://img.shields.io/badge/-development-blue) | +| `terminated` | The container has terminated. | ![Development](https://img.shields.io/badge/-development-blue) | +| `waiting` | The container is waiting. | ![Development](https://img.shields.io/badge/-development-blue) | + +--- + `k8s.namespace.phase` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used. | Value | Description | Stability | diff --git a/docs/system/k8s-metrics.md b/docs/system/k8s-metrics.md index 3f7a45cd1..43f55d461 100644 --- a/docs/system/k8s-metrics.md +++ b/docs/system/k8s-metrics.md @@ -24,6 +24,9 @@ and therefore inherit its attributes, like `k8s.pod.name` and `k8s.pod.uid`. - [Metric: `k8s.pod.memory.usage`](#metric-k8spodmemoryusage) - [Metric: `k8s.pod.network.io`](#metric-k8spodnetworkio) - [Metric: `k8s.pod.network.errors`](#metric-k8spodnetworkerrors) +- [Container metrics](#container-metrics) + - [Metric: `k8s.container.status.state`](#metric-k8scontainerstatusstate) + - [Metric: `k8s.container.status.reason`](#metric-k8scontainerstatusreason) - [Node metrics](#node-metrics) - [Metric: `k8s.node.uptime`](#metric-k8snodeuptime) - [Metric: `k8s.node.cpu.time`](#metric-k8snodecputime) @@ -239,6 +242,90 @@ This metric is [recommended][MetricRecommended]. +## Container metrics + +**Description:** Container level metrics captured under the namespace `k8s.container`. + +### Metric: `k8s.container.status.state` + +This metric is [recommended][MetricRecommended]. + + + + + + + + +| Name | Instrument Type | Unit (UCUM) | Description | Stability | Entity Associations | +| -------- | --------------- | ----------- | -------------- | --------- | ------ | +| `k8s.container.status.state` | UpDownCounter | `{container}` | Describes the number of K8s containers that are currently in a given state [1] | ![Development](https://img.shields.io/badge/-development-blue) | | + +**[1]:** All possible container states will be reported at each time interval to avoid missing metrics. +Only the value corresponding to the current state will be non-zero. + +| Attribute | Type | Description | Examples | [Requirement Level](https://opentelemetry.io/docs/specs/semconv/general/attribute-requirement-level/) | Stability | +|---|---|---|---|---|---| +| [`k8s.container.status.state`](/docs/registry/attributes/k8s.md) | string | The state of the container. [K8s ContainerState](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#containerstate-v1-core) | `terminated`; `running`; `waiting` | `Required` | ![Development](https://img.shields.io/badge/-development-blue) | + +--- + +`k8s.container.status.state` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used. + +| Value | Description | Stability | +|---|---|---| +| `running` | The container is running. | ![Development](https://img.shields.io/badge/-development-blue) | +| `terminated` | The container has terminated. | ![Development](https://img.shields.io/badge/-development-blue) | +| `waiting` | The container is waiting. | ![Development](https://img.shields.io/badge/-development-blue) | + + + + + + +### Metric: `k8s.container.status.reason` + +This metric is [recommended][MetricRecommended]. + + + + + + + + +| Name | Instrument Type | Unit (UCUM) | Description | Stability | Entity Associations | +| -------- | --------------- | ----------- | -------------- | --------- | ------ | +| `k8s.container.status.reason` | UpDownCounter | `{container}` | Describes the number of K8s containers that are currently in a state for a given reason [1] | ![Development](https://img.shields.io/badge/-development-blue) | | + +**[1]:** All possible container state reasons will be reported at each time interval to avoid missing metrics. +Only the value corresponding to the current state reason will be non-zero. + +| Attribute | Type | Description | Examples | [Requirement Level](https://opentelemetry.io/docs/specs/semconv/general/attribute-requirement-level/) | Stability | +|---|---|---|---|---|---| +| [`k8s.container.status.reason`](/docs/registry/attributes/k8s.md) | string | The reason for the container state. Corresponds to the `reason` field of the: [K8s ContainerStateWaiting](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#containerstatewaiting-v1-core) or [K8s ContainerStateTerminated](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#containerstateterminated-v1-core) | `ContainerCreating`; `CrashLoopBackOff`; `CreateContainerConfigError`; `ErrImagePull`; `ImagePullBackOff`; `OOMKilled`; `Completed`; `Error`; `ContainerCannotRun` | `Required` | ![Development](https://img.shields.io/badge/-development-blue) | + +--- + +`k8s.container.status.reason` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used. + +| Value | Description | Stability | +|---|---|---| +| `Completed` | The container has completed execution. | ![Development](https://img.shields.io/badge/-development-blue) | +| `ContainerCannotRun` | The container cannot run. | ![Development](https://img.shields.io/badge/-development-blue) | +| `ContainerCreating` | The container is being created. | ![Development](https://img.shields.io/badge/-development-blue) | +| `CrashLoopBackOff` | The container is in a crash loop back off state. | ![Development](https://img.shields.io/badge/-development-blue) | +| `CreateContainerConfigError` | There was an error creating the container configuration. | ![Development](https://img.shields.io/badge/-development-blue) | +| `ErrImagePull` | There was an error pulling the container image. | ![Development](https://img.shields.io/badge/-development-blue) | +| `Error` | There was an error with the container. | ![Development](https://img.shields.io/badge/-development-blue) | +| `ImagePullBackOff` | The container image pull is in back off state. | ![Development](https://img.shields.io/badge/-development-blue) | +| `OOMKilled` | The container was killed due to out of memory. | ![Development](https://img.shields.io/badge/-development-blue) | + + + + + + ## Node metrics **Description:** Node level metrics captured under the namespace `k8s.node`. diff --git a/model/k8s/metrics.yaml b/model/k8s/metrics.yaml index d95832466..73e48c668 100644 --- a/model/k8s/metrics.yaml +++ b/model/k8s/metrics.yaml @@ -63,6 +63,34 @@ groups: - ref: network.interface.name - ref: network.io.direction + # k8s.container.* metrics + - id: metric.k8s.container.status.state + type: metric + metric_name: k8s.container.status.state + stability: experimental + brief: "Describes the number of K8s containers that are currently in a given state" + note: | + All possible container states will be reported at each time interval to avoid missing metrics. + Only the value corresponding to the current state will be non-zero. + instrument: updowncounter + unit: "{container}" + attributes: + - ref: k8s.container.status.state + requirement_level: required + - id: metric.k8s.container.status.reason + type: metric + metric_name: k8s.container.status.reason + stability: experimental + brief: "Describes the number of K8s containers that are currently in a state for a given reason" + instrument: updowncounter + unit: "{container}" + note: | + All possible container state reasons will be reported at each time interval to avoid missing metrics. + Only the value corresponding to the current state reason will be non-zero. + attributes: + - ref: k8s.container.status.reason + requirement_level: required + # k8s.node.* metrics - id: metric.k8s.node.uptime type: metric diff --git a/model/k8s/registry.yaml b/model/k8s/registry.yaml index 04f503328..0fcfb38d6 100644 --- a/model/k8s/registry.yaml +++ b/model/k8s/registry.yaml @@ -434,3 +434,69 @@ groups: value: 'terminating' brief: "Terminating namespace phase as described by [K8s API](https://pkg.go.dev/k8s.io/api@v0.31.3/core/v1#NamespacePhase)" stability: development + - id: k8s.container.status.state + stability: experimental + brief: > + The state of the container. + [K8s ContainerState](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#containerstate-v1-core) + type: + members: + - id: terminated + value: 'terminated' + brief: 'The container has terminated.' + stability: experimental + - id: running + value: 'running' + brief: 'The container is running.' + stability: experimental + - id: waiting + value: 'waiting' + brief: 'The container is waiting.' + stability: experimental + examples: ['terminated', 'running', 'waiting'] + - id: k8s.container.status.reason + type: + members: + - id: container_creating + value: 'ContainerCreating' + brief: 'The container is being created.' + stability: experimental + - id: crash_loop_back_off + value: 'CrashLoopBackOff' + brief: 'The container is in a crash loop back off state.' + stability: experimental + - id: create_container_config_error + value: 'CreateContainerConfigError' + brief: 'There was an error creating the container configuration.' + stability: experimental + - id: err_image_pull + value: 'ErrImagePull' + brief: 'There was an error pulling the container image.' + stability: experimental + - id: image_pull_back_off + value: 'ImagePullBackOff' + brief: 'The container image pull is in back off state.' + stability: experimental + - id: oom_killed + value: 'OOMKilled' + brief: 'The container was killed due to out of memory.' + stability: experimental + - id: completed + value: 'Completed' + brief: 'The container has completed execution.' + stability: experimental + - id: error + value: 'Error' + brief: 'There was an error with the container.' + stability: experimental + - id: container_cannot_run + value: 'ContainerCannotRun' + brief: 'The container cannot run.' + stability: experimental + stability: experimental + brief: > + The reason for the container state. Corresponds to the `reason` field of the: + [K8s ContainerStateWaiting](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#containerstatewaiting-v1-core) + or + [K8s ContainerStateTerminated](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#containerstateterminated-v1-core) + examples: ['ContainerCreating', 'CrashLoopBackOff', 'CreateContainerConfigError', 'ErrImagePull', 'ImagePullBackOff', 'OOMKilled', 'Completed', 'Error', 'ContainerCannotRun']