Merge pull request #1260 from kgolab/kg-vpa-liveness
Add simple health-check to all VPA components.
This commit is contained in:
commit
1e24f65015
|
|
@ -19,6 +19,7 @@ package main
|
|||
import (
|
||||
"flag"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/golang/glog"
|
||||
kube_flag "k8s.io/apiserver/pkg/util/flag"
|
||||
|
|
@ -50,7 +51,8 @@ func main() {
|
|||
kube_flag.InitFlags()
|
||||
glog.V(1).Infof("Vertical Pod Autoscaler %s Admission Controller", common.VerticalPodAutoscalerVersion)
|
||||
|
||||
metrics.Initialize(*address)
|
||||
healthCheck := metrics.NewHealthCheck(time.Minute, false)
|
||||
metrics.Initialize(*address, healthCheck)
|
||||
metrics_admission.Register()
|
||||
|
||||
certs := initCerts(*certsDir)
|
||||
|
|
@ -59,6 +61,7 @@ func main() {
|
|||
as := logic.NewAdmissionServer(logic.NewRecommendationProvider(vpaLister, vpa_api_util.NewCappingRecommendationProcessor()), logic.NewDefaultPodPreProcessor())
|
||||
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
|
||||
as.Serve(w, r)
|
||||
healthCheck.UpdateLastActivity()
|
||||
})
|
||||
clientset := getClient()
|
||||
server := &http.Server{
|
||||
|
|
|
|||
|
|
@ -46,7 +46,8 @@ func main() {
|
|||
|
||||
config := createKubeConfig(float32(*kubeApiQps), int(*kubeApiBurst))
|
||||
|
||||
metrics.Initialize(*address)
|
||||
healthCheck := metrics.NewHealthCheck(*metricsFetcherInterval*5, true)
|
||||
metrics.Initialize(*address, healthCheck)
|
||||
metrics_recommender.Register()
|
||||
|
||||
useCheckpoints := *storage != "prometheus"
|
||||
|
|
@ -62,6 +63,7 @@ func main() {
|
|||
case <-time.After(*metricsFetcherInterval):
|
||||
{
|
||||
recommender.RunOnce()
|
||||
healthCheck.UpdateLastActivity()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -48,7 +48,8 @@ func main() {
|
|||
kube_flag.InitFlags()
|
||||
glog.V(1).Infof("Vertical Pod Autoscaler %s Updater", common.VerticalPodAutoscalerVersion)
|
||||
|
||||
metrics.Initialize(*address)
|
||||
healthCheck := metrics.NewHealthCheck(*updaterInterval*5, true)
|
||||
metrics.Initialize(*address, healthCheck)
|
||||
metrics_updater.Register()
|
||||
|
||||
kubeClient, vpaClient := createKubeClients()
|
||||
|
|
@ -58,6 +59,7 @@ func main() {
|
|||
case <-time.After(*updaterInterval):
|
||||
{
|
||||
updater.RunOnce()
|
||||
healthCheck.UpdateLastActivity()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,76 @@
|
|||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// HealthCheck contains information about last activity time of the monitored component.
|
||||
// NOTE: This started as a simplified version of ClusterAutoscaler's HealthCheck.
|
||||
type HealthCheck struct {
|
||||
activityTimeout time.Duration
|
||||
checkTimeout bool
|
||||
lastActivity time.Time
|
||||
mutex *sync.Mutex
|
||||
}
|
||||
|
||||
// NewHealthCheck builds new HealthCheck object with given timeout.
|
||||
func NewHealthCheck(activityTimeout time.Duration, checkTimeout bool) *HealthCheck {
|
||||
return &HealthCheck{
|
||||
activityTimeout: activityTimeout,
|
||||
checkTimeout: checkTimeout,
|
||||
lastActivity: time.Now(),
|
||||
mutex: &sync.Mutex{},
|
||||
}
|
||||
}
|
||||
|
||||
// checkLastActivity returns true if the last activity was too long ago, with duration from it.
|
||||
func (hc *HealthCheck) checkLastActivity() (bool, time.Duration) {
|
||||
hc.mutex.Lock()
|
||||
defer hc.mutex.Unlock()
|
||||
|
||||
now := time.Now()
|
||||
lastActivity := hc.lastActivity
|
||||
activityTimedOut := now.After(lastActivity.Add(hc.activityTimeout))
|
||||
timedOut := hc.checkTimeout && activityTimedOut
|
||||
|
||||
return timedOut, now.Sub(lastActivity)
|
||||
}
|
||||
|
||||
// ServeHTTP implements http.Handler interface to provide a health-check endpoint.
|
||||
func (hc *HealthCheck) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||
timedOut, ago := hc.checkLastActivity()
|
||||
if timedOut {
|
||||
w.WriteHeader(500)
|
||||
w.Write([]byte(fmt.Sprintf("Error: last activity more than %v ago", ago)))
|
||||
} else {
|
||||
w.WriteHeader(200)
|
||||
w.Write([]byte("OK"))
|
||||
}
|
||||
}
|
||||
|
||||
// UpdateLastActivity updates last time of activity to now
|
||||
func (hc *HealthCheck) UpdateLastActivity() {
|
||||
hc.mutex.Lock()
|
||||
defer hc.mutex.Unlock()
|
||||
|
||||
hc.lastActivity = time.Now()
|
||||
}
|
||||
|
|
@ -41,10 +41,13 @@ const (
|
|||
TopMetricsNamespace = "vpa_"
|
||||
)
|
||||
|
||||
// Initialize sets up Prometheus to expose metrics on the given address
|
||||
func Initialize(address string) {
|
||||
// Initialize sets up Prometheus to expose metrics & (optionally) health-check on the given address
|
||||
func Initialize(address string, healthCheck *HealthCheck) {
|
||||
go func() {
|
||||
http.Handle("/metrics", promhttp.Handler())
|
||||
if healthCheck != nil {
|
||||
http.Handle("/health-check", healthCheck)
|
||||
}
|
||||
err := http.ListenAndServe(address, nil)
|
||||
glog.Fatalf("Failed to start metrics: %v", err)
|
||||
}()
|
||||
|
|
|
|||
Loading…
Reference in New Issue