Merge pull request #330 from andyxning/add_metrics_for_kube-state-metrics

add kube-state-metrics own metrics
This commit is contained in:
Frederic Branczyk 2018-01-03 16:45:27 +01:00 committed by GitHub
commit 310ce6c4b1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
21 changed files with 140 additions and 28 deletions

View File

@ -24,6 +24,7 @@ the raw metrics.
- [Resource group version compatibility](#resource-group-version-compatibility)
- [Container Image](#container-image)
- [Metrics Documentation](#metrics-documentation)
- [Kube-state-metrics self metrics](#kube-state-metrics-self-metrics)
- [Resource recommendation](#resource-recommendation)
- [kube-state-metrics vs. Heaspter](#kube-state-metrics-vs-heapster)
- [Setup](#setup)
@ -100,6 +101,14 @@ additional metrics!
See the [`Documentation`](Documentation) directory for documentation of the exposed metrics.
### Kube-state-metrics self metrics
kube-state-metrics exposes its own metrics under `--telemetry-host` and `--telemetry-port`.
| Metric name | Metric type | Description | Labels/tags |
| ----------- | ----------- | ----------- | ----------- |
| ksm_scrape_error_total | Counter | Total scrape errors encountered when scraping a resource | `resource`=<resource name> |
| ksm_resources_per_scrape | Summary | Number of resources returned per scrape | `resource`=<resource name> |
### Resource recommendation
Resource usage changes with the size of the cluster. As a general rule, you should allocate

View File

@ -18,8 +18,26 @@ package collectors
import (
"time"
"github.com/prometheus/client_golang/prometheus"
)
var (
resyncPeriod = 5 * time.Minute
ScrapeErrorTotalMetric = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "ksm_scrape_error_total",
Help: "Total scrape errors encountered when scraping a resource",
},
[]string{"resource"},
)
ResourcesPerScrapeMetric = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Name: "ksm_resources_per_scrape",
Help: "Number of resources returned per scrape",
},
[]string{"resource"},
)
)

View File

@ -75,9 +75,12 @@ func (csc *componentStatusCollector) Describe(ch chan<- *prometheus.Desc) {
func (csc *componentStatusCollector) Collect(ch chan<- prometheus.Metric) {
csl, err := csc.store.List()
if err != nil {
ScrapeErrorTotalMetric.With(prometheus.Labels{"resource": "componentstatus"}).Inc()
glog.Errorf("listing component status failed: %s", err)
return
}
ResourcesPerScrapeMetric.With(prometheus.Labels{"resource": "componentstatus"}).Observe(float64(len(csl.Items)))
for _, s := range csl.Items {
csc.collectComponentStatus(ch, s)
}

View File

@ -128,9 +128,12 @@ func (dc *cronJobCollector) Describe(ch chan<- *prometheus.Desc) {
func (cjc *cronJobCollector) Collect(ch chan<- prometheus.Metric) {
cronjobs, err := cjc.store.List()
if err != nil {
ScrapeErrorTotalMetric.With(prometheus.Labels{"resource": "cronjob"}).Inc()
glog.Errorf("listing cronjobs failed: %s", err)
return
}
ResourcesPerScrapeMetric.With(prometheus.Labels{"resource": "cronjob"}).Observe(float64(len(cronjobs)))
for _, cj := range cronjobs {
cjc.collectCronJob(ch, cj)
}

View File

@ -123,9 +123,12 @@ func (dc *daemonsetCollector) Describe(ch chan<- *prometheus.Desc) {
func (dc *daemonsetCollector) Collect(ch chan<- prometheus.Metric) {
dss, err := dc.store.List()
if err != nil {
ScrapeErrorTotalMetric.With(prometheus.Labels{"resource": "daemonset"}).Inc()
glog.Errorf("listing daemonsets failed: %s", err)
return
}
ResourcesPerScrapeMetric.With(prometheus.Labels{"resource": "daemonset"}).Observe(float64(len(dss)))
for _, d := range dss {
dc.collectDaemonSet(ch, d)
}

View File

@ -147,9 +147,12 @@ func (dc *deploymentCollector) Describe(ch chan<- *prometheus.Desc) {
func (dc *deploymentCollector) Collect(ch chan<- prometheus.Metric) {
ds, err := dc.store.List()
if err != nil {
ScrapeErrorTotalMetric.With(prometheus.Labels{"resource": "deployment"}).Inc()
glog.Errorf("listing deployments failed: %s", err)
return
}
ResourcesPerScrapeMetric.With(prometheus.Labels{"resource": "deployment"}).Observe(float64(len(ds)))
for _, d := range ds {
dc.collectDeployment(ch, d)
}

View File

@ -107,9 +107,12 @@ func (pc *endpointCollector) Describe(ch chan<- *prometheus.Desc) {
func (ec *endpointCollector) Collect(ch chan<- prometheus.Metric) {
endpoints, err := ec.store.List()
if err != nil {
ScrapeErrorTotalMetric.With(prometheus.Labels{"resource": "endpoint"}).Inc()
glog.Errorf("listing endpoints failed: %s", err)
return
}
ResourcesPerScrapeMetric.With(prometheus.Labels{"resource": "endpoint"}).Observe(float64(len(endpoints)))
for _, e := range endpoints {
ec.collectEndpoints(ch, e)
}

View File

@ -111,9 +111,12 @@ func (hc *hpaCollector) Describe(ch chan<- *prometheus.Desc) {
func (hc *hpaCollector) Collect(ch chan<- prometheus.Metric) {
hpas, err := hc.store.List()
if err != nil {
ScrapeErrorTotalMetric.With(prometheus.Labels{"resource": "horizontalpodautoscaler"}).Inc()
glog.Errorf("listing HorizontalPodAutoscalers failed: %s", err)
return
}
ResourcesPerScrapeMetric.With(prometheus.Labels{"resource": "horizontalpodautoscaler"}).Observe(float64(len(hpas.Items)))
for _, h := range hpas.Items {
hc.collectHPA(ch, h)
}

View File

@ -152,9 +152,12 @@ func (dc *jobCollector) Describe(ch chan<- *prometheus.Desc) {
func (jc *jobCollector) Collect(ch chan<- prometheus.Metric) {
jobs, err := jc.store.List()
if err != nil {
ScrapeErrorTotalMetric.With(prometheus.Labels{"resource": "job"}).Inc()
glog.Errorf("listing jobs failed: %s", err)
return
}
ResourcesPerScrapeMetric.With(prometheus.Labels{"resource": "job"}).Observe(float64(len(jobs)))
for _, j := range jobs {
jc.collectJob(ch, j)
}

View File

@ -88,10 +88,12 @@ func (lrc *limitRangeCollector) Describe(ch chan<- *prometheus.Desc) {
func (lrc *limitRangeCollector) Collect(ch chan<- prometheus.Metric) {
limitRangeCollector, err := lrc.store.List()
if err != nil {
ScrapeErrorTotalMetric.With(prometheus.Labels{"resource": "limitrange"}).Inc()
glog.Errorf("listing limit ranges failed: %s", err)
return
}
ResourcesPerScrapeMetric.With(prometheus.Labels{"resource": "limitrange"}).Observe(float64(len(limitRangeCollector.Items)))
for _, rq := range limitRangeCollector.Items {
lrc.collectLimitRange(ch, rq)
}

View File

@ -100,10 +100,12 @@ func (nsc *namespaceCollector) Describe(ch chan<- *prometheus.Desc) {
func (nsc *namespaceCollector) Collect(ch chan<- prometheus.Metric) {
nsls, err := nsc.store.List()
if err != nil {
ScrapeErrorTotalMetric.With(prometheus.Labels{"resource": "namespace"}).Inc()
glog.Errorf("listing namespace failed: %s", err)
return
}
ResourcesPerScrapeMetric.With(prometheus.Labels{"resource": "namespace"}).Observe(float64(len(nsls)))
for _, rq := range nsls {
nsc.collectNamespace(ch, rq)
}

View File

@ -173,9 +173,12 @@ func (nc *nodeCollector) Describe(ch chan<- *prometheus.Desc) {
func (nc *nodeCollector) Collect(ch chan<- prometheus.Metric) {
nodes, err := nc.store.List()
if err != nil {
ScrapeErrorTotalMetric.With(prometheus.Labels{"resource": "node"}).Inc()
glog.Errorf("listing nodes failed: %s", err)
return
}
ResourcesPerScrapeMetric.With(prometheus.Labels{"resource": "node"}).Observe(float64(len(nodes.Items)))
for _, n := range nodes.Items {
nc.collectNode(ch, n)
}

View File

@ -86,10 +86,12 @@ func (collector *persistentVolumeCollector) Describe(ch chan<- *prometheus.Desc)
func (collector *persistentVolumeCollector) Collect(ch chan<- prometheus.Metric) {
persistentVolumeCollector, err := collector.store.List()
if err != nil {
ScrapeErrorTotalMetric.With(prometheus.Labels{"resource": "persistentvolume"}).Inc()
glog.Errorf("listing persistentVolume failed: %s", err)
return
}
ResourcesPerScrapeMetric.With(prometheus.Labels{"resource": "persistentvolume"}).Observe(float64(len(persistentVolumeCollector.Items)))
for _, pv := range persistentVolumeCollector.Items {
collector.collectPersistentVolume(ch, pv)
}

View File

@ -99,10 +99,12 @@ func (collector *persistentVolumeClaimCollector) Describe(ch chan<- *prometheus.
func (collector *persistentVolumeClaimCollector) Collect(ch chan<- prometheus.Metric) {
persistentVolumeClaimCollector, err := collector.store.List()
if err != nil {
ScrapeErrorTotalMetric.With(prometheus.Labels{"resource": "persistentvolumeclaim"}).Inc()
glog.Errorf("listing persistent volume claims failed: %s", err)
return
}
ResourcesPerScrapeMetric.With(prometheus.Labels{"resource": "persistentvolumeclaim"}).Observe(float64(len(persistentVolumeClaimCollector.Items)))
for _, pvc := range persistentVolumeClaimCollector.Items {
collector.collectPersistentVolumeClaim(ch, pvc)
}

View File

@ -233,9 +233,12 @@ func (pc *podCollector) Describe(ch chan<- *prometheus.Desc) {
func (pc *podCollector) Collect(ch chan<- prometheus.Metric) {
pods, err := pc.store.List()
if err != nil {
ScrapeErrorTotalMetric.With(prometheus.Labels{"resource": "pod"}).Inc()
glog.Errorf("listing pods failed: %s", err)
return
}
ResourcesPerScrapeMetric.With(prometheus.Labels{"resource": "pod"}).Observe(float64(len(pods)))
for _, p := range pods {
pc.collectPod(ch, p)
}

View File

@ -111,9 +111,12 @@ func (dc *replicasetCollector) Describe(ch chan<- *prometheus.Desc) {
func (dc *replicasetCollector) Collect(ch chan<- prometheus.Metric) {
rss, err := dc.store.List()
if err != nil {
ScrapeErrorTotalMetric.With(prometheus.Labels{"resource": "replicaset"}).Inc()
glog.Errorf("listing replicasets failed: %s", err)
return
}
ResourcesPerScrapeMetric.With(prometheus.Labels{"resource": "replicaset"}).Observe(float64(len(rss)))
for _, d := range rss {
dc.collectReplicaSet(ch, d)
}

View File

@ -117,9 +117,12 @@ func (dc *replicationcontrollerCollector) Describe(ch chan<- *prometheus.Desc) {
func (dc *replicationcontrollerCollector) Collect(ch chan<- prometheus.Metric) {
rcs, err := dc.store.List()
if err != nil {
ScrapeErrorTotalMetric.With(prometheus.Labels{"resource": "replicationcontroller"}).Inc()
glog.Errorf("listing replicationcontrollers failed: %s", err)
return
}
ResourcesPerScrapeMetric.With(prometheus.Labels{"resource": "replicationcontroller"}).Observe(float64(len(rcs)))
for _, d := range rcs {
dc.collectReplicationController(ch, d)
}

View File

@ -86,10 +86,12 @@ func (rqc *resourceQuotaCollector) Describe(ch chan<- *prometheus.Desc) {
func (rqc *resourceQuotaCollector) Collect(ch chan<- prometheus.Metric) {
resourceQuota, err := rqc.store.List()
if err != nil {
ScrapeErrorTotalMetric.With(prometheus.Labels{"resource": "resourcequota"}).Inc()
glog.Errorf("listing resource quotas failed: %s", err)
return
}
ResourcesPerScrapeMetric.With(prometheus.Labels{"resource": "resourcequota"}).Observe(float64(len(resourceQuota.Items)))
for _, rq := range resourceQuota.Items {
rqc.collectResourceQuota(ch, rq)
}

View File

@ -100,9 +100,12 @@ func (pc *serviceCollector) Describe(ch chan<- *prometheus.Desc) {
func (sc *serviceCollector) Collect(ch chan<- prometheus.Metric) {
services, err := sc.store.List()
if err != nil {
ScrapeErrorTotalMetric.With(prometheus.Labels{"resource": "service"}).Inc()
glog.Errorf("listing services failed: %s", err)
return
}
ResourcesPerScrapeMetric.With(prometheus.Labels{"resource": "service"}).Observe(float64(len(services)))
for _, s := range services {
sc.collectService(ch, s)
}

View File

@ -134,9 +134,12 @@ func (dc *statefulSetCollector) Describe(ch chan<- *prometheus.Desc) {
func (sc *statefulSetCollector) Collect(ch chan<- prometheus.Metric) {
sss, err := sc.store.List()
if err != nil {
ScrapeErrorTotalMetric.With(prometheus.Labels{"resource": "statefulset"}).Inc()
glog.Errorf("listing statefulsets failed: %s", err)
return
}
ResourcesPerScrapeMetric.With(prometheus.Labels{"resource": "statefulset"}).Observe(float64(len(sss)))
for _, d := range sss {
sc.collectStatefulSet(ch, d)
}

92
main.go
View File

@ -38,7 +38,7 @@ import (
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/clientcmd"
"k8s.io/kube-state-metrics/collectors"
kcollectors "k8s.io/kube-state-metrics/collectors"
"k8s.io/kube-state-metrics/version"
)
@ -69,24 +69,24 @@ var (
"endpoints": struct{}{},
}
availableCollectors = map[string]func(registry prometheus.Registerer, kubeClient clientset.Interface, namespace string){
"componentstatuses": collectors.RegisterComponentStatusCollector,
"cronjobs": collectors.RegisterCronJobCollector,
"daemonsets": collectors.RegisterDaemonSetCollector,
"deployments": collectors.RegisterDeploymentCollector,
"jobs": collectors.RegisterJobCollector,
"limitranges": collectors.RegisterLimitRangeCollector,
"nodes": collectors.RegisterNodeCollector,
"pods": collectors.RegisterPodCollector,
"replicasets": collectors.RegisterReplicaSetCollector,
"replicationcontrollers": collectors.RegisterReplicationControllerCollector,
"resourcequotas": collectors.RegisterResourceQuotaCollector,
"services": collectors.RegisterServiceCollector,
"statefulsets": collectors.RegisterStatefulSetCollector,
"persistentvolumes": collectors.RegisterPersistentVolumeCollector,
"persistentvolumeclaims": collectors.RegisterPersistentVolumeClaimCollector,
"namespaces": collectors.RegisterNamespaceCollector,
"horizontalpodautoscalers": collectors.RegisterHorizontalPodAutoScalerCollector,
"endpoints": collectors.RegisterEndpointCollector,
"componentstatuses": kcollectors.RegisterComponentStatusCollector,
"cronjobs": kcollectors.RegisterCronJobCollector,
"daemonsets": kcollectors.RegisterDaemonSetCollector,
"deployments": kcollectors.RegisterDeploymentCollector,
"jobs": kcollectors.RegisterJobCollector,
"limitranges": kcollectors.RegisterLimitRangeCollector,
"nodes": kcollectors.RegisterNodeCollector,
"pods": kcollectors.RegisterPodCollector,
"replicasets": kcollectors.RegisterReplicaSetCollector,
"replicationcontrollers": kcollectors.RegisterReplicationControllerCollector,
"resourcequotas": kcollectors.RegisterResourceQuotaCollector,
"services": kcollectors.RegisterServiceCollector,
"statefulsets": kcollectors.RegisterStatefulSetCollector,
"persistentvolumes": kcollectors.RegisterPersistentVolumeCollector,
"persistentvolumeclaims": kcollectors.RegisterPersistentVolumeClaimCollector,
"namespaces": kcollectors.RegisterNamespaceCollector,
"horizontalpodautoscalers": kcollectors.RegisterHorizontalPodAutoScalerCollector,
"endpoints": kcollectors.RegisterEndpointCollector,
}
)
@ -129,15 +129,17 @@ func (c *collectorSet) Type() string {
}
type options struct {
inCluster bool
apiserver string
kubeconfig string
help bool
port int
host string
collectors collectorSet
namespace string
version bool
inCluster bool
apiserver string
kubeconfig string
help bool
port int
host string
telemetryPort int
telemetryHost string
collectors collectorSet
namespace string
version bool
}
func main() {
@ -154,6 +156,8 @@ func main() {
flags.BoolVarP(&options.help, "help", "h", false, "Print help text")
flags.IntVar(&options.port, "port", 80, `Port to expose metrics on.`)
flags.StringVar(&options.host, "host", "0.0.0.0", `Host to expose metrics on.`)
flags.IntVar(&options.telemetryPort, "telemetry-port", 81, `Port to expose kube-state-metrics self metrics on.`)
flags.StringVar(&options.telemetryHost, "telemetry-host", "0.0.0.0", `Host to expose kube-state-metrics self metrics on.`)
flags.Var(&options.collectors, "collectors", fmt.Sprintf("Comma-separated list of collectors to be enabled. Defaults to %q", &defaultCollectors))
flags.StringVar(&options.namespace, "namespace", metav1.NamespaceAll, "namespace to be enabled for collecting resources")
flags.BoolVarP(&options.version, "version", "", false, "kube-state-metrics build version information")
@ -206,6 +210,13 @@ func main() {
glog.Fatalf("Failed to create client: %v", err)
}
ksmMetricsRegistry := prometheus.NewRegistry()
ksmMetricsRegistry.Register(kcollectors.ResourcesPerScrapeMetric)
ksmMetricsRegistry.Register(kcollectors.ScrapeErrorTotalMetric)
ksmMetricsRegistry.Register(prometheus.NewProcessCollector(os.Getpid(), ""))
ksmMetricsRegistry.Register(prometheus.NewGoCollector())
go telemetryServer(ksmMetricsRegistry, options.telemetryHost, options.telemetryPort)
registry := prometheus.NewRegistry()
registerCollectors(registry, kubeClient, collectors, options.namespace)
metricsServer(registry, options.host, options.port)
@ -273,6 +284,31 @@ func createKubeClient(inCluster bool, apiserver string, kubeconfig string) (kube
return kubeClient, nil
}
func telemetryServer(registry prometheus.Gatherer, host string, port int) {
// Address to listen on for web interface and telemetry
listenAddress := net.JoinHostPort(host, strconv.Itoa(port))
glog.Infof("Starting kube-state-metrics self metrics server: %s", listenAddress)
mux := http.NewServeMux()
// Add metricsPath
mux.Handle(metricsPath, promhttp.HandlerFor(registry, promhttp.HandlerOpts{}))
// Add index
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
w.Write([]byte(`<html>
<head><title>Kube-State-Metrics Metrics Server</title></head>
<body>
<h1>Kube-State-Metrics Metrics</h1>
<ul>
<li><a href='` + metricsPath + `'>metrics</a></li>
</ul>
</body>
</html>`))
})
log.Fatal(http.ListenAndServe(listenAddress, mux))
}
func metricsServer(registry prometheus.Gatherer, host string, port int) {
// Address to listen on for web interface and telemetry
listenAddress := net.JoinHostPort(host, strconv.Itoa(port))