enhancement: add `livez` endpoint
Add a `livez` endpoint to identify network outages. This helps in restarting the binary if such as case is observed. Signed-off-by: Pranshu Srivastava <rexagod@gmail.com> Signed-off-by: Pranshu Srivastava <rexagod@gmail.com>
This commit is contained in:
parent
086af0cbaf
commit
eb80c09755
|
|
@ -342,6 +342,14 @@ Note that your GCP identity is case sensitive but `gcloud info` as of Google Clo
|
|||
|
||||
After running the above, if you see `Clusterrolebinding "cluster-admin-binding" created`, then you are able to continue with the setup of this service.
|
||||
|
||||
#### Healthcheck Endpoints
|
||||
|
||||
The following healthcheck endpoints are available, some of which are used to determine the result of the aforementioned probes:
|
||||
|
||||
* `/livez`: Returns a 200 status code if the application is not affected by an outage of the Kubernetes API Server. We recommend to use this as a liveness probe.
|
||||
* `/metrics`: Returns a 200 status code if the application is able to serve metrics. While this is available for both ports, we recommend to use the telemetry metrics endpoint as a readiness probe.
|
||||
* `/healthz`: Returns a 200 status code if the application is running. We recommend to use this as a startup probe.
|
||||
|
||||
#### Limited privileges environment
|
||||
|
||||
If you want to run kube-state-metrics in an environment where you don't have cluster-reader role, you can:
|
||||
|
|
|
|||
|
|
@ -343,6 +343,14 @@ Note that your GCP identity is case sensitive but `gcloud info` as of Google Clo
|
|||
|
||||
After running the above, if you see `Clusterrolebinding "cluster-admin-binding" created`, then you are able to continue with the setup of this service.
|
||||
|
||||
#### Healthcheck Endpoints
|
||||
|
||||
The following healthcheck endpoints are available, some of which are used to determine the result of the aforementioned probes:
|
||||
|
||||
* `/livez`: Returns a 200 status code if the application is not affected by an outage of the Kubernetes API Server. We recommend to use this as a liveness probe.
|
||||
* `/metrics`: Returns a 200 status code if the application is able to serve metrics. While this is available for both ports, we recommend to use the telemetry metrics endpoint as a readiness probe.
|
||||
* `/healthz`: Returns a 200 status code if the application is running. We recommend to use this as a startup probe.
|
||||
|
||||
#### Limited privileges environment
|
||||
|
||||
If you want to run kube-state-metrics in an environment where you don't have cluster-reader role, you can:
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ spec:
|
|||
image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.12.0
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /healthz
|
||||
path: /livez
|
||||
port: 8080
|
||||
initialDelaySeconds: 5
|
||||
timeoutSeconds: 5
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ spec:
|
|||
image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.12.0
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /healthz
|
||||
path: /livez
|
||||
port: 8080
|
||||
initialDelaySeconds: 5
|
||||
timeoutSeconds: 5
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ spec:
|
|||
image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.12.0
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /healthz
|
||||
path: /livez
|
||||
port: 8080
|
||||
initialDelaySeconds: 5
|
||||
timeoutSeconds: 5
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ spec:
|
|||
image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.12.0
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /healthz
|
||||
path: /livez
|
||||
port: 8080
|
||||
initialDelaySeconds: 5
|
||||
timeoutSeconds: 5
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ spec:
|
|||
- image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.12.0
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /healthz
|
||||
path: /livez
|
||||
port: 8080
|
||||
initialDelaySeconds: 5
|
||||
timeoutSeconds: 5
|
||||
|
|
|
|||
|
|
@ -193,7 +193,7 @@
|
|||
},
|
||||
livenessProbe: { timeoutSeconds: 5, initialDelaySeconds: 5, httpGet: {
|
||||
port: 8080,
|
||||
path: '/healthz',
|
||||
path: '/livez',
|
||||
} },
|
||||
readinessProbe: { timeoutSeconds: 5, initialDelaySeconds: 5, httpGet: {
|
||||
port: 8081,
|
||||
|
|
|
|||
|
|
@ -30,6 +30,12 @@ import (
|
|||
"strings"
|
||||
"time"
|
||||
|
||||
"gopkg.in/yaml.v3"
|
||||
"k8s.io/client-go/kubernetes"
|
||||
_ "k8s.io/client-go/plugin/pkg/client/auth" // Initialize common client auth plugins.
|
||||
"k8s.io/client-go/tools/clientcmd"
|
||||
"k8s.io/klog/v2"
|
||||
|
||||
"github.com/oklog/run"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/collectors"
|
||||
|
|
@ -38,10 +44,6 @@ import (
|
|||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
"github.com/prometheus/common/version"
|
||||
"github.com/prometheus/exporter-toolkit/web"
|
||||
"gopkg.in/yaml.v3"
|
||||
_ "k8s.io/client-go/plugin/pkg/client/auth" // Initialize common client auth plugins.
|
||||
"k8s.io/client-go/tools/clientcmd"
|
||||
"k8s.io/klog/v2"
|
||||
|
||||
"k8s.io/kube-state-metrics/v2/internal/discovery"
|
||||
"k8s.io/kube-state-metrics/v2/internal/store"
|
||||
|
|
@ -59,6 +61,7 @@ import (
|
|||
const (
|
||||
metricsPath = "/metrics"
|
||||
healthzPath = "/healthz"
|
||||
livezPath = "/livez"
|
||||
)
|
||||
|
||||
// promLogger implements promhttp.Logger
|
||||
|
|
@ -321,7 +324,7 @@ func RunKubeStateMetrics(ctx context.Context, opts *options.Options) error {
|
|||
WebConfigFile: &tlsConfig,
|
||||
}
|
||||
|
||||
metricsMux := buildMetricsServer(m, durationVec)
|
||||
metricsMux := buildMetricsServer(m, durationVec, kubeClient)
|
||||
metricsServerListenAddress := net.JoinHostPort(opts.Host, strconv.Itoa(opts.Port))
|
||||
metricsServer := http.Server{
|
||||
Handler: metricsMux,
|
||||
|
|
@ -393,7 +396,7 @@ func buildTelemetryServer(registry prometheus.Gatherer) *http.ServeMux {
|
|||
return mux
|
||||
}
|
||||
|
||||
func buildMetricsServer(m *metricshandler.MetricsHandler, durationObserver prometheus.ObserverVec) *http.ServeMux {
|
||||
func buildMetricsServer(m *metricshandler.MetricsHandler, durationObserver prometheus.ObserverVec, client kubernetes.Interface) *http.ServeMux {
|
||||
mux := http.NewServeMux()
|
||||
|
||||
// TODO: This doesn't belong into serveMetrics
|
||||
|
|
@ -403,7 +406,23 @@ func buildMetricsServer(m *metricshandler.MetricsHandler, durationObserver prome
|
|||
mux.Handle("/debug/pprof/symbol", http.HandlerFunc(pprof.Symbol))
|
||||
mux.Handle("/debug/pprof/trace", http.HandlerFunc(pprof.Trace))
|
||||
|
||||
// Add metricsPath
|
||||
mux.Handle(metricsPath, promhttp.InstrumentHandlerDuration(durationObserver, m))
|
||||
|
||||
// Add livezPath
|
||||
mux.Handle(livezPath, http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
||||
|
||||
// Query the Kube API to make sure we are not affected by a network outage.
|
||||
got := client.CoreV1().RESTClient().Get().AbsPath("/livez").Do(context.Background())
|
||||
if got.Error() != nil {
|
||||
w.WriteHeader(http.StatusServiceUnavailable)
|
||||
w.Write([]byte(http.StatusText(http.StatusServiceUnavailable)))
|
||||
return
|
||||
}
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte(http.StatusText(http.StatusOK)))
|
||||
}))
|
||||
|
||||
// Add healthzPath
|
||||
mux.HandleFunc(healthzPath, func(w http.ResponseWriter, _ *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
|
|
@ -424,6 +443,10 @@ func buildMetricsServer(m *metricshandler.MetricsHandler, durationObserver prome
|
|||
Address: healthzPath,
|
||||
Text: "Healthz",
|
||||
},
|
||||
{
|
||||
Address: livezPath,
|
||||
Text: "Livez",
|
||||
},
|
||||
},
|
||||
}
|
||||
landingPage, err := web.NewLandingPage(landingConfig)
|
||||
|
|
|
|||
Loading…
Reference in New Issue