diff --git a/README.md b/README.md index 8b299292..58dbf31b 100644 --- a/README.md +++ b/README.md @@ -346,11 +346,11 @@ After running the above, if you see `Clusterrolebinding "cluster-admin-binding" #### Healthcheck Endpoints -The following healthcheck endpoints are available, some of which are used to determine the result of the aforementioned probes: +The following healthcheck endpoints are available (`self` refers to the telemetry port, while `main` refers to the exposition port): -* `/healthz`: Returns a 200 status code if the application is running. We recommend to use this for the startup probe. -* `/livez`: Returns a 200 status code if the application is not affected by an outage of the Kubernetes API Server. We recommend to using this for the liveness probe. -* `/readyz`: Returns a 200 status code if the application is ready to accept traffic. We recommend using this for the readiness probe. +* `/healthz` (exposed on `main`): Returns a 200 status code if the application is running. We recommend to use this for the startup probe. +* `/livez` (exposed on `main`): Returns a 200 status code if the application is not affected by an outage of the Kubernetes API Server. We recommend to using this for the liveness probe. +* `/readyz` (exposed on `self`): Returns a 200 status code if the application is ready to accept traffic. We recommend using this for the readiness probe. Note that it is discouraged to use the telemetry metrics endpoint for any probe when proxying the exposition data. diff --git a/README.md.tpl b/README.md.tpl index 67facfae..2c57c999 100644 --- a/README.md.tpl +++ b/README.md.tpl @@ -347,11 +347,11 @@ After running the above, if you see `Clusterrolebinding "cluster-admin-binding" #### Healthcheck Endpoints -The following healthcheck endpoints are available, some of which are used to determine the result of the aforementioned probes: +The following healthcheck endpoints are available (`self` refers to the telemetry port, while `main` refers to the exposition port): -* `/healthz`: Returns a 200 status code if the application is running. We recommend to use this for the startup probe. -* `/livez`: Returns a 200 status code if the application is not affected by an outage of the Kubernetes API Server. We recommend to using this for the liveness probe. -* `/readyz`: Returns a 200 status code if the application is ready to accept traffic. We recommend using this for the readiness probe. +* `/healthz` (exposed on `main`): Returns a 200 status code if the application is running. We recommend to use this for the startup probe. +* `/livez` (exposed on `main`): Returns a 200 status code if the application is not affected by an outage of the Kubernetes API Server. We recommend to using this for the liveness probe. +* `/readyz` (exposed on `self`): Returns a 200 status code if the application is ready to accept traffic. We recommend using this for the readiness probe. Note that it is discouraged to use the telemetry metrics endpoint for any probe when proxying the exposition data. diff --git a/examples/autosharding/statefulset.yaml b/examples/autosharding/statefulset.yaml index 059832e8..a000c2dd 100644 --- a/examples/autosharding/statefulset.yaml +++ b/examples/autosharding/statefulset.yaml @@ -50,7 +50,7 @@ spec: readinessProbe: httpGet: path: /readyz - port: http-metrics + port: telemetry initialDelaySeconds: 5 timeoutSeconds: 5 securityContext: diff --git a/examples/daemonsetsharding/daemonset.yaml b/examples/daemonsetsharding/daemonset.yaml index f5f8c090..6f5d59a6 100644 --- a/examples/daemonsetsharding/daemonset.yaml +++ b/examples/daemonsetsharding/daemonset.yaml @@ -45,7 +45,7 @@ spec: readinessProbe: httpGet: path: /readyz - port: http-metrics + port: telemetry initialDelaySeconds: 5 timeoutSeconds: 5 securityContext: diff --git a/examples/daemonsetsharding/deployment-no-node-pods.yaml b/examples/daemonsetsharding/deployment-no-node-pods.yaml index dc484a61..268f8fb4 100644 --- a/examples/daemonsetsharding/deployment-no-node-pods.yaml +++ b/examples/daemonsetsharding/deployment-no-node-pods.yaml @@ -40,7 +40,7 @@ spec: readinessProbe: httpGet: path: /readyz - port: http-metrics + port: telemetry initialDelaySeconds: 5 timeoutSeconds: 5 securityContext: diff --git a/examples/daemonsetsharding/deployment.yaml b/examples/daemonsetsharding/deployment.yaml index f9ad51f3..d54a215b 100644 --- a/examples/daemonsetsharding/deployment.yaml +++ b/examples/daemonsetsharding/deployment.yaml @@ -39,7 +39,7 @@ spec: readinessProbe: httpGet: path: /readyz - port: http-metrics + port: telemetry initialDelaySeconds: 5 timeoutSeconds: 5 securityContext: diff --git a/examples/standard/deployment.yaml b/examples/standard/deployment.yaml index b34a4337..9130ae74 100644 --- a/examples/standard/deployment.yaml +++ b/examples/standard/deployment.yaml @@ -37,7 +37,7 @@ spec: readinessProbe: httpGet: path: /readyz - port: http-metrics + port: telemetry initialDelaySeconds: 5 timeoutSeconds: 5 securityContext: diff --git a/jsonnet/kube-state-metrics/kube-state-metrics.libsonnet b/jsonnet/kube-state-metrics/kube-state-metrics.libsonnet index 6a961126..1b2a157d 100644 --- a/jsonnet/kube-state-metrics/kube-state-metrics.libsonnet +++ b/jsonnet/kube-state-metrics/kube-state-metrics.libsonnet @@ -196,7 +196,7 @@ path: '/livez', } }, readinessProbe: { timeoutSeconds: 5, initialDelaySeconds: 5, httpGet: { - port: "http-metrics", + port: "telemetry", path: '/readyz', } }, }; diff --git a/pkg/app/server.go b/pkg/app/server.go index eee48009..a9f1e7c2 100644 --- a/pkg/app/server.go +++ b/pkg/app/server.go @@ -42,6 +42,7 @@ import ( versionCollector "github.com/prometheus/client_golang/prometheus/collectors/version" "github.com/prometheus/client_golang/prometheus/promauto" "github.com/prometheus/client_golang/prometheus/promhttp" + "github.com/prometheus/client_golang/prometheus/testutil" "github.com/prometheus/common/version" "github.com/prometheus/exporter-toolkit/web" @@ -377,6 +378,18 @@ func buildTelemetryServer(registry prometheus.Gatherer) *http.ServeMux { // Add metricsPath mux.Handle(metricsPath, promhttp.HandlerFor(registry, promhttp.HandlerOpts{ErrorLog: promLogger{}})) + // Add readyzPath + mux.Handle(readyzPath, http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + count, err := testutil.GatherAndCount(registry) + if err != nil || count == 0 { + w.WriteHeader(http.StatusServiceUnavailable) + w.Write([]byte(http.StatusText(http.StatusServiceUnavailable))) + return + } + w.WriteHeader(http.StatusOK) + w.Write([]byte(http.StatusText(http.StatusOK))) + })) + // Add index landingConfig := web.LandingConfig{ Name: "kube-state-metrics", @@ -426,9 +439,6 @@ func buildMetricsServer(m *metricshandler.MetricsHandler, durationObserver prome // Add livezPath mux.Handle(livezPath, handleClusterDelegationForProber(client, livezPath)) - // Add readyzPath - mux.Handle(readyzPath, handleClusterDelegationForProber(client, readyzPath)) - // Add healthzPath mux.Handle(healthzPath, handleClusterDelegationForProber(client, healthzPath))