kube-state-metrics/internal/store/job.go

482 lines
12 KiB
Go

/*
Copyright 2016 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package store
import (
"context"
"strconv"
basemetrics "k8s.io/component-base/metrics"
"k8s.io/kube-state-metrics/v2/pkg/metric"
generator "k8s.io/kube-state-metrics/v2/pkg/metric_generator"
v1batch "k8s.io/api/batch/v1"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/watch"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/cache"
)
var (
descJobAnnotationsName = "kube_job_annotations"
descJobAnnotationsHelp = "Kubernetes annotations converted to Prometheus labels."
descJobLabelsName = "kube_job_labels"
descJobLabelsHelp = "Kubernetes labels converted to Prometheus labels."
descJobLabelsDefaultLabels = []string{"namespace", "job_name"}
jobFailureReasons = []string{"BackoffLimitExceeded", "DeadlineExceeded", "Evicted"}
)
func jobMetricFamilies(allowAnnotationsList, allowLabelsList []string) []generator.FamilyGenerator {
return []generator.FamilyGenerator{
*generator.NewFamilyGeneratorWithStability(
descJobAnnotationsName,
descJobAnnotationsHelp,
metric.Gauge,
basemetrics.ALPHA,
"",
wrapJobFunc(func(j *v1batch.Job) *metric.Family {
if len(allowAnnotationsList) == 0 {
return &metric.Family{}
}
annotationKeys, annotationValues := createPrometheusLabelKeysValues("annotation", j.Annotations, allowAnnotationsList)
return &metric.Family{
Metrics: []*metric.Metric{
{
LabelKeys: annotationKeys,
LabelValues: annotationValues,
Value: 1,
},
},
}
}),
),
*generator.NewFamilyGeneratorWithStability(
descJobLabelsName,
descJobLabelsHelp,
metric.Gauge,
basemetrics.STABLE,
"",
wrapJobFunc(func(j *v1batch.Job) *metric.Family {
if len(allowLabelsList) == 0 {
return &metric.Family{}
}
labelKeys, labelValues := createPrometheusLabelKeysValues("label", j.Labels, allowLabelsList)
return &metric.Family{
Metrics: []*metric.Metric{
{
LabelKeys: labelKeys,
LabelValues: labelValues,
Value: 1,
},
},
}
}),
),
*generator.NewFamilyGeneratorWithStability(
"kube_job_info",
"Information about job.",
metric.Gauge,
basemetrics.STABLE,
"",
wrapJobFunc(func(_ *v1batch.Job) *metric.Family {
return &metric.Family{
Metrics: []*metric.Metric{
{
Value: 1,
},
},
}
}),
),
*generator.NewFamilyGeneratorWithStability(
"kube_job_created",
"Unix creation timestamp",
metric.Gauge,
basemetrics.STABLE,
"",
wrapJobFunc(func(j *v1batch.Job) *metric.Family {
ms := []*metric.Metric{}
if !j.CreationTimestamp.IsZero() {
ms = append(ms, &metric.Metric{
Value: float64(j.CreationTimestamp.Unix()),
})
}
return &metric.Family{
Metrics: ms,
}
}),
),
*generator.NewFamilyGeneratorWithStability(
"kube_job_spec_parallelism",
"The maximum desired number of pods the job should run at any given time.",
metric.Gauge,
basemetrics.STABLE,
"",
wrapJobFunc(func(j *v1batch.Job) *metric.Family {
ms := []*metric.Metric{}
if j.Spec.Parallelism != nil {
ms = append(ms, &metric.Metric{
Value: float64(*j.Spec.Parallelism),
})
}
return &metric.Family{
Metrics: ms,
}
}),
),
*generator.NewFamilyGeneratorWithStability(
"kube_job_spec_completions",
"The desired number of successfully finished pods the job should be run with.",
metric.Gauge,
basemetrics.STABLE,
"",
wrapJobFunc(func(j *v1batch.Job) *metric.Family {
ms := []*metric.Metric{}
if j.Spec.Completions != nil {
ms = append(ms, &metric.Metric{
Value: float64(*j.Spec.Completions),
})
}
return &metric.Family{
Metrics: ms,
}
}),
),
*generator.NewFamilyGeneratorWithStability(
"kube_job_spec_active_deadline_seconds",
"The duration in seconds relative to the startTime that the job may be active before the system tries to terminate it.",
metric.Gauge,
basemetrics.STABLE,
"",
wrapJobFunc(func(j *v1batch.Job) *metric.Family {
ms := []*metric.Metric{}
if j.Spec.ActiveDeadlineSeconds != nil {
ms = append(ms, &metric.Metric{
Value: float64(*j.Spec.ActiveDeadlineSeconds),
})
}
return &metric.Family{
Metrics: ms,
}
}),
),
*generator.NewFamilyGeneratorWithStability(
"kube_job_status_succeeded",
"The number of pods which reached Phase Succeeded.",
metric.Gauge,
basemetrics.STABLE,
"",
wrapJobFunc(func(j *v1batch.Job) *metric.Family {
return &metric.Family{
Metrics: []*metric.Metric{
{
Value: float64(j.Status.Succeeded),
},
},
}
}),
),
*generator.NewFamilyGeneratorWithStability(
"kube_job_status_failed",
"The number of pods which reached Phase Failed and the reason for failure.",
metric.Gauge,
basemetrics.STABLE,
"",
wrapJobFunc(func(j *v1batch.Job) *metric.Family {
var ms []*metric.Metric
if float64(j.Status.Failed) == 0 {
return &metric.Family{
Metrics: []*metric.Metric{
{
Value: float64(j.Status.Failed),
},
},
}
}
reasonKnown := false
for _, c := range j.Status.Conditions {
condition := c
if condition.Type == v1batch.JobFailed {
for _, reason := range jobFailureReasons {
reasonKnown = reasonKnown || failureReason(&condition, reason)
// for known reasons
ms = append(ms, &metric.Metric{
LabelKeys: []string{"reason"},
LabelValues: []string{reason},
Value: boolFloat64(failureReason(&condition, reason)),
})
}
}
}
// for unknown reasons
if !reasonKnown {
ms = append(ms, &metric.Metric{
LabelKeys: []string{"reason"},
LabelValues: []string{""},
Value: float64(j.Status.Failed),
})
}
return &metric.Family{
Metrics: ms,
}
}),
),
*generator.NewFamilyGeneratorWithStability(
"kube_job_status_active",
"The number of actively running pods.",
metric.Gauge,
basemetrics.STABLE,
"",
wrapJobFunc(func(j *v1batch.Job) *metric.Family {
return &metric.Family{
Metrics: []*metric.Metric{
{
Value: float64(j.Status.Active),
},
},
}
}),
),
*generator.NewFamilyGeneratorWithStability(
"kube_job_status_ready",
"The number of ready pods that belong to this Job.",
metric.Gauge,
basemetrics.ALPHA,
"",
wrapJobFunc(func(j *v1batch.Job) *metric.Family {
value := float64(0)
if j.Status.Ready != nil {
value = float64(*j.Status.Ready)
}
return &metric.Family{
Metrics: []*metric.Metric{
{
Value: value,
},
},
}
}),
),
*generator.NewFamilyGeneratorWithStability(
"kube_job_complete",
"The job has completed its execution.",
metric.Gauge,
basemetrics.STABLE,
"",
wrapJobFunc(func(j *v1batch.Job) *metric.Family {
ms := []*metric.Metric{}
for _, c := range j.Status.Conditions {
if c.Type == v1batch.JobComplete {
metrics := addConditionMetrics(c.Status)
for _, m := range metrics {
metric := m
metric.LabelKeys = []string{"condition"}
ms = append(ms, metric)
}
}
}
return &metric.Family{
Metrics: ms,
}
}),
),
*generator.NewFamilyGeneratorWithStability(
"kube_job_failed",
"The job has failed its execution.",
metric.Gauge,
basemetrics.STABLE,
"",
wrapJobFunc(func(j *v1batch.Job) *metric.Family {
ms := []*metric.Metric{}
for _, c := range j.Status.Conditions {
if c.Type == v1batch.JobFailed {
metrics := addConditionMetrics(c.Status)
for _, m := range metrics {
metric := m
metric.LabelKeys = []string{"condition"}
ms = append(ms, metric)
}
}
}
return &metric.Family{
Metrics: ms,
}
}),
),
*generator.NewFamilyGeneratorWithStability(
"kube_job_status_start_time",
"StartTime represents time when the job was acknowledged by the Job Manager.",
metric.Gauge,
basemetrics.STABLE,
"",
wrapJobFunc(func(j *v1batch.Job) *metric.Family {
ms := []*metric.Metric{}
if j.Status.StartTime != nil {
ms = append(ms, &metric.Metric{
Value: float64(j.Status.StartTime.Unix()),
})
}
return &metric.Family{
Metrics: ms,
}
}),
),
*generator.NewFamilyGeneratorWithStability(
"kube_job_status_completion_time",
"CompletionTime represents time when the job was completed.",
metric.Gauge,
basemetrics.STABLE,
"",
wrapJobFunc(func(j *v1batch.Job) *metric.Family {
ms := []*metric.Metric{}
if j.Status.CompletionTime != nil {
ms = append(ms, &metric.Metric{
Value: float64(j.Status.CompletionTime.Unix()),
})
}
return &metric.Family{
Metrics: ms,
}
}),
),
*generator.NewFamilyGeneratorWithStability(
"kube_job_status_suspended",
"The number of pods which reached Phase Suspended.",
metric.Gauge,
basemetrics.ALPHA,
"",
wrapJobFunc(func(j *v1batch.Job) *metric.Family {
ms := []*metric.Metric{}
for _, c := range j.Status.Conditions {
if c.Type == v1batch.JobSuspended {
ms = append(ms, &metric.Metric{
Value: boolFloat64(c.Status == v1.ConditionTrue),
})
}
}
return &metric.Family{
Metrics: ms,
}
}),
),
*generator.NewFamilyGeneratorWithStability(
"kube_job_owner",
"Information about the Job's owner.",
metric.Gauge,
basemetrics.STABLE,
"",
wrapJobFunc(func(j *v1batch.Job) *metric.Family {
labelKeys := []string{"owner_kind", "owner_name", "owner_is_controller"}
owners := j.GetOwnerReferences()
if len(owners) == 0 {
return &metric.Family{
Metrics: []*metric.Metric{
{
LabelKeys: labelKeys,
LabelValues: []string{"", "", ""},
Value: 1,
},
},
}
}
ms := make([]*metric.Metric, len(owners))
for i, owner := range owners {
if owner.Controller != nil {
ms[i] = &metric.Metric{
LabelKeys: labelKeys,
LabelValues: []string{owner.Kind, owner.Name, strconv.FormatBool(*owner.Controller)},
Value: 1,
}
} else {
ms[i] = &metric.Metric{
LabelKeys: labelKeys,
LabelValues: []string{owner.Kind, owner.Name, "false"},
Value: 1,
}
}
}
return &metric.Family{
Metrics: ms,
}
}),
),
}
}
func wrapJobFunc(f func(*v1batch.Job) *metric.Family) func(interface{}) *metric.Family {
return func(obj interface{}) *metric.Family {
job := obj.(*v1batch.Job)
metricFamily := f(job)
for _, m := range metricFamily.Metrics {
m.LabelKeys, m.LabelValues = mergeKeyValues(descJobLabelsDefaultLabels, []string{job.Namespace, job.Name}, m.LabelKeys, m.LabelValues)
}
return metricFamily
}
}
func createJobListWatch(kubeClient clientset.Interface, ns string, fieldSelector string) cache.ListerWatcher {
return &cache.ListWatch{
ListFunc: func(opts metav1.ListOptions) (runtime.Object, error) {
opts.FieldSelector = fieldSelector
return kubeClient.BatchV1().Jobs(ns).List(context.TODO(), opts)
},
WatchFunc: func(opts metav1.ListOptions) (watch.Interface, error) {
opts.FieldSelector = fieldSelector
return kubeClient.BatchV1().Jobs(ns).Watch(context.TODO(), opts)
},
}
}
func failureReason(jc *v1batch.JobCondition, reason string) bool {
if jc == nil {
return false
}
return jc.Reason == reason
}