From 96dd7f2c3fec1ec2704b5587c2b7eed41e39f93d Mon Sep 17 00:00:00 2001 From: Christos Markou Date: Thu, 29 May 2025 18:02:42 +0300 Subject: [PATCH] Add k8s.container.restart.count metric (#2191) Signed-off-by: ChrsMark --- .chloggen/add_c_restarts.yaml | 22 ++++++++++++++++++++++ docs/non-normative/k8s-migration.md | 9 ++++++--- docs/system/k8s-metrics.md | 27 +++++++++++++++++++++++++++ model/k8s/metrics.yaml | 15 +++++++++++++++ 4 files changed, 70 insertions(+), 3 deletions(-) create mode 100644 .chloggen/add_c_restarts.yaml diff --git a/.chloggen/add_c_restarts.yaml b/.chloggen/add_c_restarts.yaml new file mode 100644 index 000000000..b61779d23 --- /dev/null +++ b/.chloggen/add_c_restarts.yaml @@ -0,0 +1,22 @@ +# Use this changelog template to create an entry for release notes. +# +# If your change doesn't affect end users you should instead start +# your pull request title with [chore] or use the "Skip Changelog" label. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the area of concern in the attributes-registry, (e.g. http, cloud, db) +component: k8s + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Add k8s.container.restart.count metric + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +# The values here must be integers. +issues: [2191] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: diff --git a/docs/non-normative/k8s-migration.md b/docs/non-normative/k8s-migration.md index 566c8034f..03ce623c5 100644 --- a/docs/non-normative/k8s-migration.md +++ b/docs/non-normative/k8s-migration.md @@ -287,9 +287,11 @@ The changes are the following: The K8s Container metrics implemented by the Collector and specifically the [k8scluster](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/v0.115.0/receiver/k8sclusterreceiver/documentation.md) -receiver were introduced as semantic conventions in -[#2178](https://github.com/open-telemetry/semantic-conventions/pull/2178) (TODO: replace with SemConv version once -available). +receiver were introduced as semantic conventions in: + +- [#2178](https://github.com/open-telemetry/semantic-conventions/pull/2178) (TODO: replace with SemConv version once +available) +- [#2074](https://github.com/open-telemetry/semantic-conventions/issues/2074) The changes in their metrics are the following: @@ -305,5 +307,6 @@ The changes in their metrics are the following: | `k8s.container.storage_request` | `k8s.container.storage.request` | | `k8s.container.ephemeralstorage_limit` | `k8s.container.ephemeral_storage.limit` | | `k8s.container.ephemeralstorage_request` | `k8s.container.ephemeral_storage.request` | +| `k8s.container.restarts` (type: `gauge`) | `k8s.container.restart.count` (type: `updowncounter`) | diff --git a/docs/system/k8s-metrics.md b/docs/system/k8s-metrics.md index dd3e2097e..3f7a45cd1 100644 --- a/docs/system/k8s-metrics.md +++ b/docs/system/k8s-metrics.md @@ -74,6 +74,7 @@ and therefore inherit its attributes, like `k8s.pod.name` and `k8s.pod.uid`. - [Metric: `k8s.container.storage.request`](#metric-k8scontainerstoragerequest) - [Metric: `k8s.container.ephemeral_storage.limit`](#metric-k8scontainerephemeral_storagelimit) - [Metric: `k8s.container.ephemeral_storage.request`](#metric-k8scontainerephemeral_storagerequest) + - [Metric: `k8s.container.restart.count`](#metric-k8scontainerrestartcount) @@ -1278,5 +1279,31 @@ This metric is [recommended][MetricRecommended]. +### Metric: `k8s.container.restart.count` + +This metric is [recommended][MetricRecommended]. + + + + + + + + +| Name | Instrument Type | Unit (UCUM) | Description | Stability | Entity Associations | +| -------- | --------------- | ----------- | -------------- | --------- | ------ | +| `k8s.container.restart.count` | UpDownCounter | `{restart}` | Describes how many times the container has restarted (since the last counter reset) [1] | ![Development](https://img.shields.io/badge/-development-blue) | `k8s.container` | + +**[1]:** This value is pulled directly from the K8s API and the value can go indefinitely high and be reset to 0 +at any time depending on how your kubelet is configured to prune dead containers. +It is best to not depend too much on the exact value but rather look at it as +either == 0, in which case you can conclude there were no restarts in the recent past, or > 0, in which case +you can conclude there were restarts in the recent past, and not try and analyze the value beyond that. + + + + + + [DocumentStatus]: https://opentelemetry.io/docs/specs/otel/document-status [MetricRecommended]: /docs/general/metric-requirement-level.md#recommended diff --git a/model/k8s/metrics.yaml b/model/k8s/metrics.yaml index 4730cec88..d95832466 100644 --- a/model/k8s/metrics.yaml +++ b/model/k8s/metrics.yaml @@ -559,3 +559,18 @@ groups: See https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#resourcerequirements-v1-core for details. instrument: gauge unit: "By" + - id: metric.k8s.container.restart.count + type: metric + metric_name: k8s.container.restart.count + stability: development + brief: "Describes how many times the container has restarted (since the last counter reset)" + instrument: updowncounter + unit: "{restart}" + entity_associations: + - k8s.container + note: | + This value is pulled directly from the K8s API and the value can go indefinitely high and be reset to 0 + at any time depending on how your kubelet is configured to prune dead containers. + It is best to not depend too much on the exact value but rather look at it as + either == 0, in which case you can conclude there were no restarts in the recent past, or > 0, in which case + you can conclude there were restarts in the recent past, and not try and analyze the value beyond that.