Merge pull request #1260 from kgolab/kg-vpa-liveness

Add simple health-check to all VPA components.
This commit is contained in:
k8s-ci-robot 2018-09-19 09:28:13 -07:00 committed by GitHub
commit 1e24f65015
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 91 additions and 5 deletions

View File

@ -19,6 +19,7 @@ package main
import (
"flag"
"net/http"
"time"
"github.com/golang/glog"
kube_flag "k8s.io/apiserver/pkg/util/flag"
@ -50,7 +51,8 @@ func main() {
kube_flag.InitFlags()
glog.V(1).Infof("Vertical Pod Autoscaler %s Admission Controller", common.VerticalPodAutoscalerVersion)
metrics.Initialize(*address)
healthCheck := metrics.NewHealthCheck(time.Minute, false)
metrics.Initialize(*address, healthCheck)
metrics_admission.Register()
certs := initCerts(*certsDir)
@ -59,6 +61,7 @@ func main() {
as := logic.NewAdmissionServer(logic.NewRecommendationProvider(vpaLister, vpa_api_util.NewCappingRecommendationProcessor()), logic.NewDefaultPodPreProcessor())
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
as.Serve(w, r)
healthCheck.UpdateLastActivity()
})
clientset := getClient()
server := &http.Server{

View File

@ -46,7 +46,8 @@ func main() {
config := createKubeConfig(float32(*kubeApiQps), int(*kubeApiBurst))
metrics.Initialize(*address)
healthCheck := metrics.NewHealthCheck(*metricsFetcherInterval*5, true)
metrics.Initialize(*address, healthCheck)
metrics_recommender.Register()
useCheckpoints := *storage != "prometheus"
@ -62,6 +63,7 @@ func main() {
case <-time.After(*metricsFetcherInterval):
{
recommender.RunOnce()
healthCheck.UpdateLastActivity()
}
}
}

View File

@ -48,7 +48,8 @@ func main() {
kube_flag.InitFlags()
glog.V(1).Infof("Vertical Pod Autoscaler %s Updater", common.VerticalPodAutoscalerVersion)
metrics.Initialize(*address)
healthCheck := metrics.NewHealthCheck(*updaterInterval*5, true)
metrics.Initialize(*address, healthCheck)
metrics_updater.Register()
kubeClient, vpaClient := createKubeClients()
@ -58,6 +59,7 @@ func main() {
case <-time.After(*updaterInterval):
{
updater.RunOnce()
healthCheck.UpdateLastActivity()
}
}
}

View File

@ -0,0 +1,76 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metrics
import (
"fmt"
"net/http"
"sync"
"time"
)
// HealthCheck contains information about last activity time of the monitored component.
// NOTE: This started as a simplified version of ClusterAutoscaler's HealthCheck.
type HealthCheck struct {
activityTimeout time.Duration
checkTimeout bool
lastActivity time.Time
mutex *sync.Mutex
}
// NewHealthCheck builds new HealthCheck object with given timeout.
func NewHealthCheck(activityTimeout time.Duration, checkTimeout bool) *HealthCheck {
return &HealthCheck{
activityTimeout: activityTimeout,
checkTimeout: checkTimeout,
lastActivity: time.Now(),
mutex: &sync.Mutex{},
}
}
// checkLastActivity returns true if the last activity was too long ago, with duration from it.
func (hc *HealthCheck) checkLastActivity() (bool, time.Duration) {
hc.mutex.Lock()
defer hc.mutex.Unlock()
now := time.Now()
lastActivity := hc.lastActivity
activityTimedOut := now.After(lastActivity.Add(hc.activityTimeout))
timedOut := hc.checkTimeout && activityTimedOut
return timedOut, now.Sub(lastActivity)
}
// ServeHTTP implements http.Handler interface to provide a health-check endpoint.
func (hc *HealthCheck) ServeHTTP(w http.ResponseWriter, r *http.Request) {
timedOut, ago := hc.checkLastActivity()
if timedOut {
w.WriteHeader(500)
w.Write([]byte(fmt.Sprintf("Error: last activity more than %v ago", ago)))
} else {
w.WriteHeader(200)
w.Write([]byte("OK"))
}
}
// UpdateLastActivity updates last time of activity to now
func (hc *HealthCheck) UpdateLastActivity() {
hc.mutex.Lock()
defer hc.mutex.Unlock()
hc.lastActivity = time.Now()
}

View File

@ -41,10 +41,13 @@ const (
TopMetricsNamespace = "vpa_"
)
// Initialize sets up Prometheus to expose metrics on the given address
func Initialize(address string) {
// Initialize sets up Prometheus to expose metrics & (optionally) health-check on the given address
func Initialize(address string, healthCheck *HealthCheck) {
go func() {
http.Handle("/metrics", promhttp.Handler())
if healthCheck != nil {
http.Handle("/health-check", healthCheck)
}
err := http.ListenAndServe(address, nil)
glog.Fatalf("Failed to start metrics: %v", err)
}()