482 lines
12 KiB
Go
482 lines
12 KiB
Go
/*
|
|
Copyright 2016 The Kubernetes Authors All rights reserved.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package store
|
|
|
|
import (
|
|
"context"
|
|
"strconv"
|
|
|
|
basemetrics "k8s.io/component-base/metrics"
|
|
|
|
"k8s.io/kube-state-metrics/v2/pkg/metric"
|
|
generator "k8s.io/kube-state-metrics/v2/pkg/metric_generator"
|
|
|
|
v1batch "k8s.io/api/batch/v1"
|
|
v1 "k8s.io/api/core/v1"
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
"k8s.io/apimachinery/pkg/runtime"
|
|
"k8s.io/apimachinery/pkg/watch"
|
|
clientset "k8s.io/client-go/kubernetes"
|
|
"k8s.io/client-go/tools/cache"
|
|
)
|
|
|
|
var (
|
|
descJobAnnotationsName = "kube_job_annotations"
|
|
descJobAnnotationsHelp = "Kubernetes annotations converted to Prometheus labels."
|
|
descJobLabelsName = "kube_job_labels"
|
|
descJobLabelsHelp = "Kubernetes labels converted to Prometheus labels."
|
|
descJobLabelsDefaultLabels = []string{"namespace", "job_name"}
|
|
jobFailureReasons = []string{"BackoffLimitExceeded", "DeadlineExceeded", "Evicted"}
|
|
)
|
|
|
|
func jobMetricFamilies(allowAnnotationsList, allowLabelsList []string) []generator.FamilyGenerator {
|
|
return []generator.FamilyGenerator{
|
|
*generator.NewFamilyGeneratorWithStability(
|
|
descJobAnnotationsName,
|
|
descJobAnnotationsHelp,
|
|
metric.Gauge,
|
|
basemetrics.ALPHA,
|
|
"",
|
|
wrapJobFunc(func(j *v1batch.Job) *metric.Family {
|
|
if len(allowAnnotationsList) == 0 {
|
|
return &metric.Family{}
|
|
}
|
|
annotationKeys, annotationValues := createPrometheusLabelKeysValues("annotation", j.Annotations, allowAnnotationsList)
|
|
return &metric.Family{
|
|
Metrics: []*metric.Metric{
|
|
{
|
|
LabelKeys: annotationKeys,
|
|
LabelValues: annotationValues,
|
|
Value: 1,
|
|
},
|
|
},
|
|
}
|
|
}),
|
|
),
|
|
*generator.NewFamilyGeneratorWithStability(
|
|
descJobLabelsName,
|
|
descJobLabelsHelp,
|
|
metric.Gauge,
|
|
basemetrics.STABLE,
|
|
"",
|
|
wrapJobFunc(func(j *v1batch.Job) *metric.Family {
|
|
if len(allowLabelsList) == 0 {
|
|
return &metric.Family{}
|
|
}
|
|
labelKeys, labelValues := createPrometheusLabelKeysValues("label", j.Labels, allowLabelsList)
|
|
return &metric.Family{
|
|
Metrics: []*metric.Metric{
|
|
{
|
|
LabelKeys: labelKeys,
|
|
LabelValues: labelValues,
|
|
Value: 1,
|
|
},
|
|
},
|
|
}
|
|
}),
|
|
),
|
|
*generator.NewFamilyGeneratorWithStability(
|
|
"kube_job_info",
|
|
"Information about job.",
|
|
metric.Gauge,
|
|
basemetrics.STABLE,
|
|
"",
|
|
wrapJobFunc(func(_ *v1batch.Job) *metric.Family {
|
|
return &metric.Family{
|
|
Metrics: []*metric.Metric{
|
|
{
|
|
Value: 1,
|
|
},
|
|
},
|
|
}
|
|
}),
|
|
),
|
|
*generator.NewFamilyGeneratorWithStability(
|
|
"kube_job_created",
|
|
"Unix creation timestamp",
|
|
metric.Gauge,
|
|
basemetrics.STABLE,
|
|
"",
|
|
wrapJobFunc(func(j *v1batch.Job) *metric.Family {
|
|
ms := []*metric.Metric{}
|
|
|
|
if !j.CreationTimestamp.IsZero() {
|
|
ms = append(ms, &metric.Metric{
|
|
Value: float64(j.CreationTimestamp.Unix()),
|
|
})
|
|
}
|
|
|
|
return &metric.Family{
|
|
Metrics: ms,
|
|
}
|
|
}),
|
|
),
|
|
*generator.NewFamilyGeneratorWithStability(
|
|
"kube_job_spec_parallelism",
|
|
"The maximum desired number of pods the job should run at any given time.",
|
|
metric.Gauge,
|
|
basemetrics.STABLE,
|
|
"",
|
|
wrapJobFunc(func(j *v1batch.Job) *metric.Family {
|
|
ms := []*metric.Metric{}
|
|
|
|
if j.Spec.Parallelism != nil {
|
|
ms = append(ms, &metric.Metric{
|
|
Value: float64(*j.Spec.Parallelism),
|
|
})
|
|
}
|
|
|
|
return &metric.Family{
|
|
Metrics: ms,
|
|
}
|
|
}),
|
|
),
|
|
*generator.NewFamilyGeneratorWithStability(
|
|
"kube_job_spec_completions",
|
|
"The desired number of successfully finished pods the job should be run with.",
|
|
metric.Gauge,
|
|
basemetrics.STABLE,
|
|
"",
|
|
wrapJobFunc(func(j *v1batch.Job) *metric.Family {
|
|
ms := []*metric.Metric{}
|
|
|
|
if j.Spec.Completions != nil {
|
|
ms = append(ms, &metric.Metric{
|
|
Value: float64(*j.Spec.Completions),
|
|
})
|
|
}
|
|
|
|
return &metric.Family{
|
|
Metrics: ms,
|
|
}
|
|
}),
|
|
),
|
|
*generator.NewFamilyGeneratorWithStability(
|
|
"kube_job_spec_active_deadline_seconds",
|
|
"The duration in seconds relative to the startTime that the job may be active before the system tries to terminate it.",
|
|
metric.Gauge,
|
|
basemetrics.STABLE,
|
|
"",
|
|
wrapJobFunc(func(j *v1batch.Job) *metric.Family {
|
|
ms := []*metric.Metric{}
|
|
|
|
if j.Spec.ActiveDeadlineSeconds != nil {
|
|
ms = append(ms, &metric.Metric{
|
|
Value: float64(*j.Spec.ActiveDeadlineSeconds),
|
|
})
|
|
}
|
|
|
|
return &metric.Family{
|
|
Metrics: ms,
|
|
}
|
|
}),
|
|
),
|
|
*generator.NewFamilyGeneratorWithStability(
|
|
"kube_job_status_succeeded",
|
|
"The number of pods which reached Phase Succeeded.",
|
|
metric.Gauge,
|
|
basemetrics.STABLE,
|
|
"",
|
|
wrapJobFunc(func(j *v1batch.Job) *metric.Family {
|
|
return &metric.Family{
|
|
Metrics: []*metric.Metric{
|
|
{
|
|
Value: float64(j.Status.Succeeded),
|
|
},
|
|
},
|
|
}
|
|
}),
|
|
),
|
|
*generator.NewFamilyGeneratorWithStability(
|
|
"kube_job_status_failed",
|
|
"The number of pods which reached Phase Failed and the reason for failure.",
|
|
metric.Gauge,
|
|
basemetrics.STABLE,
|
|
"",
|
|
wrapJobFunc(func(j *v1batch.Job) *metric.Family {
|
|
var ms []*metric.Metric
|
|
|
|
if float64(j.Status.Failed) == 0 {
|
|
return &metric.Family{
|
|
Metrics: []*metric.Metric{
|
|
{
|
|
Value: float64(j.Status.Failed),
|
|
},
|
|
},
|
|
}
|
|
}
|
|
|
|
reasonKnown := false
|
|
for _, c := range j.Status.Conditions {
|
|
condition := c
|
|
if condition.Type == v1batch.JobFailed {
|
|
for _, reason := range jobFailureReasons {
|
|
reasonKnown = reasonKnown || failureReason(&condition, reason)
|
|
|
|
// for known reasons
|
|
ms = append(ms, &metric.Metric{
|
|
LabelKeys: []string{"reason"},
|
|
LabelValues: []string{reason},
|
|
Value: boolFloat64(failureReason(&condition, reason)),
|
|
})
|
|
}
|
|
}
|
|
}
|
|
// for unknown reasons
|
|
if !reasonKnown {
|
|
ms = append(ms, &metric.Metric{
|
|
LabelKeys: []string{"reason"},
|
|
LabelValues: []string{""},
|
|
Value: float64(j.Status.Failed),
|
|
})
|
|
}
|
|
|
|
return &metric.Family{
|
|
Metrics: ms,
|
|
}
|
|
}),
|
|
),
|
|
*generator.NewFamilyGeneratorWithStability(
|
|
"kube_job_status_active",
|
|
"The number of actively running pods.",
|
|
metric.Gauge,
|
|
basemetrics.STABLE,
|
|
"",
|
|
wrapJobFunc(func(j *v1batch.Job) *metric.Family {
|
|
return &metric.Family{
|
|
Metrics: []*metric.Metric{
|
|
{
|
|
Value: float64(j.Status.Active),
|
|
},
|
|
},
|
|
}
|
|
}),
|
|
),
|
|
*generator.NewFamilyGeneratorWithStability(
|
|
"kube_job_status_ready",
|
|
"The number of ready pods that belong to this Job.",
|
|
metric.Gauge,
|
|
basemetrics.ALPHA,
|
|
"",
|
|
wrapJobFunc(func(j *v1batch.Job) *metric.Family {
|
|
value := float64(0)
|
|
if j.Status.Ready != nil {
|
|
value = float64(*j.Status.Ready)
|
|
}
|
|
return &metric.Family{
|
|
Metrics: []*metric.Metric{
|
|
{
|
|
Value: value,
|
|
},
|
|
},
|
|
}
|
|
}),
|
|
),
|
|
*generator.NewFamilyGeneratorWithStability(
|
|
"kube_job_complete",
|
|
"The job has completed its execution.",
|
|
metric.Gauge,
|
|
basemetrics.STABLE,
|
|
"",
|
|
wrapJobFunc(func(j *v1batch.Job) *metric.Family {
|
|
ms := []*metric.Metric{}
|
|
for _, c := range j.Status.Conditions {
|
|
if c.Type == v1batch.JobComplete {
|
|
metrics := addConditionMetrics(c.Status)
|
|
for _, m := range metrics {
|
|
metric := m
|
|
metric.LabelKeys = []string{"condition"}
|
|
ms = append(ms, metric)
|
|
}
|
|
}
|
|
}
|
|
|
|
return &metric.Family{
|
|
Metrics: ms,
|
|
}
|
|
}),
|
|
),
|
|
*generator.NewFamilyGeneratorWithStability(
|
|
"kube_job_failed",
|
|
"The job has failed its execution.",
|
|
metric.Gauge,
|
|
basemetrics.STABLE,
|
|
"",
|
|
wrapJobFunc(func(j *v1batch.Job) *metric.Family {
|
|
ms := []*metric.Metric{}
|
|
|
|
for _, c := range j.Status.Conditions {
|
|
if c.Type == v1batch.JobFailed {
|
|
metrics := addConditionMetrics(c.Status)
|
|
for _, m := range metrics {
|
|
metric := m
|
|
metric.LabelKeys = []string{"condition"}
|
|
ms = append(ms, metric)
|
|
}
|
|
}
|
|
}
|
|
|
|
return &metric.Family{
|
|
Metrics: ms,
|
|
}
|
|
}),
|
|
),
|
|
*generator.NewFamilyGeneratorWithStability(
|
|
"kube_job_status_start_time",
|
|
"StartTime represents time when the job was acknowledged by the Job Manager.",
|
|
metric.Gauge,
|
|
basemetrics.STABLE,
|
|
"",
|
|
wrapJobFunc(func(j *v1batch.Job) *metric.Family {
|
|
ms := []*metric.Metric{}
|
|
|
|
if j.Status.StartTime != nil {
|
|
ms = append(ms, &metric.Metric{
|
|
|
|
Value: float64(j.Status.StartTime.Unix()),
|
|
})
|
|
}
|
|
|
|
return &metric.Family{
|
|
Metrics: ms,
|
|
}
|
|
}),
|
|
),
|
|
*generator.NewFamilyGeneratorWithStability(
|
|
"kube_job_status_completion_time",
|
|
"CompletionTime represents time when the job was completed.",
|
|
metric.Gauge,
|
|
basemetrics.STABLE,
|
|
"",
|
|
wrapJobFunc(func(j *v1batch.Job) *metric.Family {
|
|
ms := []*metric.Metric{}
|
|
if j.Status.CompletionTime != nil {
|
|
ms = append(ms, &metric.Metric{
|
|
|
|
Value: float64(j.Status.CompletionTime.Unix()),
|
|
})
|
|
}
|
|
|
|
return &metric.Family{
|
|
Metrics: ms,
|
|
}
|
|
}),
|
|
),
|
|
*generator.NewFamilyGeneratorWithStability(
|
|
"kube_job_status_suspended",
|
|
"The number of pods which reached Phase Suspended.",
|
|
metric.Gauge,
|
|
basemetrics.ALPHA,
|
|
"",
|
|
wrapJobFunc(func(j *v1batch.Job) *metric.Family {
|
|
ms := []*metric.Metric{}
|
|
for _, c := range j.Status.Conditions {
|
|
if c.Type == v1batch.JobSuspended {
|
|
ms = append(ms, &metric.Metric{
|
|
Value: boolFloat64(c.Status == v1.ConditionTrue),
|
|
})
|
|
}
|
|
}
|
|
|
|
return &metric.Family{
|
|
Metrics: ms,
|
|
}
|
|
}),
|
|
),
|
|
*generator.NewFamilyGeneratorWithStability(
|
|
"kube_job_owner",
|
|
"Information about the Job's owner.",
|
|
metric.Gauge,
|
|
basemetrics.STABLE,
|
|
"",
|
|
wrapJobFunc(func(j *v1batch.Job) *metric.Family {
|
|
labelKeys := []string{"owner_kind", "owner_name", "owner_is_controller"}
|
|
|
|
owners := j.GetOwnerReferences()
|
|
|
|
if len(owners) == 0 {
|
|
return &metric.Family{
|
|
Metrics: []*metric.Metric{
|
|
{
|
|
LabelKeys: labelKeys,
|
|
LabelValues: []string{"", "", ""},
|
|
Value: 1,
|
|
},
|
|
},
|
|
}
|
|
}
|
|
|
|
ms := make([]*metric.Metric, len(owners))
|
|
|
|
for i, owner := range owners {
|
|
if owner.Controller != nil {
|
|
ms[i] = &metric.Metric{
|
|
LabelKeys: labelKeys,
|
|
LabelValues: []string{owner.Kind, owner.Name, strconv.FormatBool(*owner.Controller)},
|
|
Value: 1,
|
|
}
|
|
} else {
|
|
ms[i] = &metric.Metric{
|
|
LabelKeys: labelKeys,
|
|
LabelValues: []string{owner.Kind, owner.Name, "false"},
|
|
Value: 1,
|
|
}
|
|
}
|
|
}
|
|
|
|
return &metric.Family{
|
|
Metrics: ms,
|
|
}
|
|
}),
|
|
),
|
|
}
|
|
}
|
|
|
|
func wrapJobFunc(f func(*v1batch.Job) *metric.Family) func(interface{}) *metric.Family {
|
|
return func(obj interface{}) *metric.Family {
|
|
job := obj.(*v1batch.Job)
|
|
|
|
metricFamily := f(job)
|
|
|
|
for _, m := range metricFamily.Metrics {
|
|
m.LabelKeys, m.LabelValues = mergeKeyValues(descJobLabelsDefaultLabels, []string{job.Namespace, job.Name}, m.LabelKeys, m.LabelValues)
|
|
}
|
|
|
|
return metricFamily
|
|
}
|
|
}
|
|
|
|
func createJobListWatch(kubeClient clientset.Interface, ns string, fieldSelector string) cache.ListerWatcher {
|
|
return &cache.ListWatch{
|
|
ListFunc: func(opts metav1.ListOptions) (runtime.Object, error) {
|
|
opts.FieldSelector = fieldSelector
|
|
return kubeClient.BatchV1().Jobs(ns).List(context.TODO(), opts)
|
|
},
|
|
WatchFunc: func(opts metav1.ListOptions) (watch.Interface, error) {
|
|
opts.FieldSelector = fieldSelector
|
|
return kubeClient.BatchV1().Jobs(ns).Watch(context.TODO(), opts)
|
|
},
|
|
}
|
|
}
|
|
|
|
func failureReason(jc *v1batch.JobCondition, reason string) bool {
|
|
if jc == nil {
|
|
return false
|
|
}
|
|
return jc.Reason == reason
|
|
}
|