enhancement: add `livez` endpoint
Add a `livez` endpoint to identify network outages. This helps in restarting the binary if such as case is observed. Signed-off-by: Pranshu Srivastava <rexagod@gmail.com> Signed-off-by: Pranshu Srivastava <rexagod@gmail.com>
This commit is contained in:
parent
086af0cbaf
commit
eb80c09755
|
|
@ -342,6 +342,14 @@ Note that your GCP identity is case sensitive but `gcloud info` as of Google Clo
|
||||||
|
|
||||||
After running the above, if you see `Clusterrolebinding "cluster-admin-binding" created`, then you are able to continue with the setup of this service.
|
After running the above, if you see `Clusterrolebinding "cluster-admin-binding" created`, then you are able to continue with the setup of this service.
|
||||||
|
|
||||||
|
#### Healthcheck Endpoints
|
||||||
|
|
||||||
|
The following healthcheck endpoints are available, some of which are used to determine the result of the aforementioned probes:
|
||||||
|
|
||||||
|
* `/livez`: Returns a 200 status code if the application is not affected by an outage of the Kubernetes API Server. We recommend to use this as a liveness probe.
|
||||||
|
* `/metrics`: Returns a 200 status code if the application is able to serve metrics. While this is available for both ports, we recommend to use the telemetry metrics endpoint as a readiness probe.
|
||||||
|
* `/healthz`: Returns a 200 status code if the application is running. We recommend to use this as a startup probe.
|
||||||
|
|
||||||
#### Limited privileges environment
|
#### Limited privileges environment
|
||||||
|
|
||||||
If you want to run kube-state-metrics in an environment where you don't have cluster-reader role, you can:
|
If you want to run kube-state-metrics in an environment where you don't have cluster-reader role, you can:
|
||||||
|
|
|
||||||
|
|
@ -343,6 +343,14 @@ Note that your GCP identity is case sensitive but `gcloud info` as of Google Clo
|
||||||
|
|
||||||
After running the above, if you see `Clusterrolebinding "cluster-admin-binding" created`, then you are able to continue with the setup of this service.
|
After running the above, if you see `Clusterrolebinding "cluster-admin-binding" created`, then you are able to continue with the setup of this service.
|
||||||
|
|
||||||
|
#### Healthcheck Endpoints
|
||||||
|
|
||||||
|
The following healthcheck endpoints are available, some of which are used to determine the result of the aforementioned probes:
|
||||||
|
|
||||||
|
* `/livez`: Returns a 200 status code if the application is not affected by an outage of the Kubernetes API Server. We recommend to use this as a liveness probe.
|
||||||
|
* `/metrics`: Returns a 200 status code if the application is able to serve metrics. While this is available for both ports, we recommend to use the telemetry metrics endpoint as a readiness probe.
|
||||||
|
* `/healthz`: Returns a 200 status code if the application is running. We recommend to use this as a startup probe.
|
||||||
|
|
||||||
#### Limited privileges environment
|
#### Limited privileges environment
|
||||||
|
|
||||||
If you want to run kube-state-metrics in an environment where you don't have cluster-reader role, you can:
|
If you want to run kube-state-metrics in an environment where you don't have cluster-reader role, you can:
|
||||||
|
|
|
||||||
|
|
@ -37,7 +37,7 @@ spec:
|
||||||
image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.12.0
|
image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.12.0
|
||||||
livenessProbe:
|
livenessProbe:
|
||||||
httpGet:
|
httpGet:
|
||||||
path: /healthz
|
path: /livez
|
||||||
port: 8080
|
port: 8080
|
||||||
initialDelaySeconds: 5
|
initialDelaySeconds: 5
|
||||||
timeoutSeconds: 5
|
timeoutSeconds: 5
|
||||||
|
|
|
||||||
|
|
@ -32,7 +32,7 @@ spec:
|
||||||
image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.12.0
|
image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.12.0
|
||||||
livenessProbe:
|
livenessProbe:
|
||||||
httpGet:
|
httpGet:
|
||||||
path: /healthz
|
path: /livez
|
||||||
port: 8080
|
port: 8080
|
||||||
initialDelaySeconds: 5
|
initialDelaySeconds: 5
|
||||||
timeoutSeconds: 5
|
timeoutSeconds: 5
|
||||||
|
|
|
||||||
|
|
@ -27,7 +27,7 @@ spec:
|
||||||
image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.12.0
|
image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.12.0
|
||||||
livenessProbe:
|
livenessProbe:
|
||||||
httpGet:
|
httpGet:
|
||||||
path: /healthz
|
path: /livez
|
||||||
port: 8080
|
port: 8080
|
||||||
initialDelaySeconds: 5
|
initialDelaySeconds: 5
|
||||||
timeoutSeconds: 5
|
timeoutSeconds: 5
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,7 @@ spec:
|
||||||
image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.12.0
|
image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.12.0
|
||||||
livenessProbe:
|
livenessProbe:
|
||||||
httpGet:
|
httpGet:
|
||||||
path: /healthz
|
path: /livez
|
||||||
port: 8080
|
port: 8080
|
||||||
initialDelaySeconds: 5
|
initialDelaySeconds: 5
|
||||||
timeoutSeconds: 5
|
timeoutSeconds: 5
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,7 @@ spec:
|
||||||
- image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.12.0
|
- image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.12.0
|
||||||
livenessProbe:
|
livenessProbe:
|
||||||
httpGet:
|
httpGet:
|
||||||
path: /healthz
|
path: /livez
|
||||||
port: 8080
|
port: 8080
|
||||||
initialDelaySeconds: 5
|
initialDelaySeconds: 5
|
||||||
timeoutSeconds: 5
|
timeoutSeconds: 5
|
||||||
|
|
|
||||||
|
|
@ -193,7 +193,7 @@
|
||||||
},
|
},
|
||||||
livenessProbe: { timeoutSeconds: 5, initialDelaySeconds: 5, httpGet: {
|
livenessProbe: { timeoutSeconds: 5, initialDelaySeconds: 5, httpGet: {
|
||||||
port: 8080,
|
port: 8080,
|
||||||
path: '/healthz',
|
path: '/livez',
|
||||||
} },
|
} },
|
||||||
readinessProbe: { timeoutSeconds: 5, initialDelaySeconds: 5, httpGet: {
|
readinessProbe: { timeoutSeconds: 5, initialDelaySeconds: 5, httpGet: {
|
||||||
port: 8081,
|
port: 8081,
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,12 @@ import (
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"gopkg.in/yaml.v3"
|
||||||
|
"k8s.io/client-go/kubernetes"
|
||||||
|
_ "k8s.io/client-go/plugin/pkg/client/auth" // Initialize common client auth plugins.
|
||||||
|
"k8s.io/client-go/tools/clientcmd"
|
||||||
|
"k8s.io/klog/v2"
|
||||||
|
|
||||||
"github.com/oklog/run"
|
"github.com/oklog/run"
|
||||||
"github.com/prometheus/client_golang/prometheus"
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
"github.com/prometheus/client_golang/prometheus/collectors"
|
"github.com/prometheus/client_golang/prometheus/collectors"
|
||||||
|
|
@ -38,10 +44,6 @@ import (
|
||||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||||
"github.com/prometheus/common/version"
|
"github.com/prometheus/common/version"
|
||||||
"github.com/prometheus/exporter-toolkit/web"
|
"github.com/prometheus/exporter-toolkit/web"
|
||||||
"gopkg.in/yaml.v3"
|
|
||||||
_ "k8s.io/client-go/plugin/pkg/client/auth" // Initialize common client auth plugins.
|
|
||||||
"k8s.io/client-go/tools/clientcmd"
|
|
||||||
"k8s.io/klog/v2"
|
|
||||||
|
|
||||||
"k8s.io/kube-state-metrics/v2/internal/discovery"
|
"k8s.io/kube-state-metrics/v2/internal/discovery"
|
||||||
"k8s.io/kube-state-metrics/v2/internal/store"
|
"k8s.io/kube-state-metrics/v2/internal/store"
|
||||||
|
|
@ -59,6 +61,7 @@ import (
|
||||||
const (
|
const (
|
||||||
metricsPath = "/metrics"
|
metricsPath = "/metrics"
|
||||||
healthzPath = "/healthz"
|
healthzPath = "/healthz"
|
||||||
|
livezPath = "/livez"
|
||||||
)
|
)
|
||||||
|
|
||||||
// promLogger implements promhttp.Logger
|
// promLogger implements promhttp.Logger
|
||||||
|
|
@ -321,7 +324,7 @@ func RunKubeStateMetrics(ctx context.Context, opts *options.Options) error {
|
||||||
WebConfigFile: &tlsConfig,
|
WebConfigFile: &tlsConfig,
|
||||||
}
|
}
|
||||||
|
|
||||||
metricsMux := buildMetricsServer(m, durationVec)
|
metricsMux := buildMetricsServer(m, durationVec, kubeClient)
|
||||||
metricsServerListenAddress := net.JoinHostPort(opts.Host, strconv.Itoa(opts.Port))
|
metricsServerListenAddress := net.JoinHostPort(opts.Host, strconv.Itoa(opts.Port))
|
||||||
metricsServer := http.Server{
|
metricsServer := http.Server{
|
||||||
Handler: metricsMux,
|
Handler: metricsMux,
|
||||||
|
|
@ -393,7 +396,7 @@ func buildTelemetryServer(registry prometheus.Gatherer) *http.ServeMux {
|
||||||
return mux
|
return mux
|
||||||
}
|
}
|
||||||
|
|
||||||
func buildMetricsServer(m *metricshandler.MetricsHandler, durationObserver prometheus.ObserverVec) *http.ServeMux {
|
func buildMetricsServer(m *metricshandler.MetricsHandler, durationObserver prometheus.ObserverVec, client kubernetes.Interface) *http.ServeMux {
|
||||||
mux := http.NewServeMux()
|
mux := http.NewServeMux()
|
||||||
|
|
||||||
// TODO: This doesn't belong into serveMetrics
|
// TODO: This doesn't belong into serveMetrics
|
||||||
|
|
@ -403,7 +406,23 @@ func buildMetricsServer(m *metricshandler.MetricsHandler, durationObserver prome
|
||||||
mux.Handle("/debug/pprof/symbol", http.HandlerFunc(pprof.Symbol))
|
mux.Handle("/debug/pprof/symbol", http.HandlerFunc(pprof.Symbol))
|
||||||
mux.Handle("/debug/pprof/trace", http.HandlerFunc(pprof.Trace))
|
mux.Handle("/debug/pprof/trace", http.HandlerFunc(pprof.Trace))
|
||||||
|
|
||||||
|
// Add metricsPath
|
||||||
mux.Handle(metricsPath, promhttp.InstrumentHandlerDuration(durationObserver, m))
|
mux.Handle(metricsPath, promhttp.InstrumentHandlerDuration(durationObserver, m))
|
||||||
|
|
||||||
|
// Add livezPath
|
||||||
|
mux.Handle(livezPath, http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
|
||||||
|
// Query the Kube API to make sure we are not affected by a network outage.
|
||||||
|
got := client.CoreV1().RESTClient().Get().AbsPath("/livez").Do(context.Background())
|
||||||
|
if got.Error() != nil {
|
||||||
|
w.WriteHeader(http.StatusServiceUnavailable)
|
||||||
|
w.Write([]byte(http.StatusText(http.StatusServiceUnavailable)))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
w.Write([]byte(http.StatusText(http.StatusOK)))
|
||||||
|
}))
|
||||||
|
|
||||||
// Add healthzPath
|
// Add healthzPath
|
||||||
mux.HandleFunc(healthzPath, func(w http.ResponseWriter, _ *http.Request) {
|
mux.HandleFunc(healthzPath, func(w http.ResponseWriter, _ *http.Request) {
|
||||||
w.WriteHeader(http.StatusOK)
|
w.WriteHeader(http.StatusOK)
|
||||||
|
|
@ -424,6 +443,10 @@ func buildMetricsServer(m *metricshandler.MetricsHandler, durationObserver prome
|
||||||
Address: healthzPath,
|
Address: healthzPath,
|
||||||
Text: "Healthz",
|
Text: "Healthz",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
Address: livezPath,
|
||||||
|
Text: "Livez",
|
||||||
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
landingPage, err := web.NewLandingPage(landingConfig)
|
landingPage, err := web.NewLandingPage(landingConfig)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue