Readiness checks fail until caches are synced (#10166)

Fixes https://github.com/linkerd/linkerd2/issues/10036

The Linkerd control plane components written in go serve liveness and readiness probes endpoint on their admin server.  However, the admin server is not started until k8s informer caches are synced, which can take a long time on large clusters.  This means that liveness checks can time out causing the controller to be restarted.

We start the admin server before attempting to sync caches so that we can respond to liveness checks immediately.  We fail readiness probes until the caches are synced.

Signed-off-by: Alex Leong <alex@buoyant.io>
This commit is contained in:
Alex Leong 2023-01-25 11:43:09 -08:00 committed by GitHub
parent d33179843e
commit b0778bb2ea
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 92 additions and 60 deletions

View File

@ -37,6 +37,16 @@ func Main(args []string) {
flags.ConfigureAndParse(cmd, args)
ready := false
adminServer := admin.NewServer(*metricsAddr, *enablePprof, &ready)
go func() {
log.Infof("starting admin server on %s", *metricsAddr)
if err := adminServer.ListenAndServe(); err != nil {
log.Errorf("failed to start destination admin server: %s", err)
}
}()
stop := make(chan os.Signal, 1)
signal.Notify(stop, os.Interrupt, syscall.SIGTERM)
@ -129,14 +139,7 @@ func Main(args []string) {
}
}()
adminServer := admin.NewServer(*metricsAddr, *enablePprof)
go func() {
log.Infof("starting admin server on %s", *metricsAddr)
if err := adminServer.ListenAndServe(); err != nil {
log.Errorf("failed to start destination admin server: %s", err)
}
}()
ready = true
<-stop

View File

@ -52,6 +52,16 @@ func Main(args []string) {
flags.ConfigureAndParse(cmd, args)
ready := false
adminServer := admin.NewServer(*adminAddr, *enablePprof, &ready)
go func() {
log.Infof("starting admin server on %s", *adminAddr)
if err := adminServer.ListenAndServe(); err != nil {
log.Errorf("failed to start identity admin server: %s", err)
}
}()
identityTrustAnchorPEM, err := os.ReadFile(k8s.MountPathTrustRootsPEM)
if err != nil {
log.Fatalf("could not read identity trust anchors PEM: %s", err.Error())
@ -174,15 +184,6 @@ func Main(args []string) {
//
// Bind and serve
//
adminServer := admin.NewServer(*adminAddr, *enablePprof)
go func() {
log.Infof("starting admin server on %s", *adminAddr)
if err := adminServer.ListenAndServe(); err != nil {
log.Errorf("failed to start identity admin server: %s", err)
}
}()
lis, err := net.Listen("tcp", *addr)
if err != nil {
//nolint:gocritic
@ -202,6 +203,9 @@ func Main(args []string) {
log.Errorf("failed to start identity gRPC server: %s", err)
}
}()
ready = true
<-stop
log.Infof("shutting down gRPC server on %s", *addr)
srv.GracefulStop()

View File

@ -24,6 +24,16 @@ func Launch(
kubeconfig string,
enablePprof bool,
) {
ready := false
adminServer := admin.NewServer(metricsAddr, enablePprof, &ready)
go func() {
log.Infof("starting admin server on %s", metricsAddr)
if err := adminServer.ListenAndServe(); err != nil {
log.Errorf("failed to start webhook admin server: %s", err)
}
}()
stop := make(chan os.Signal, 1)
defer close(stop)
signal.Notify(stop, os.Interrupt, syscall.SIGTERM)
@ -56,14 +66,7 @@ func Launch(
metadataAPI.Sync(nil)
adminServer := admin.NewServer(metricsAddr, enablePprof)
go func() {
log.Infof("starting admin server on %s", metricsAddr)
if err := adminServer.ListenAndServe(); err != nil {
log.Errorf("failed to start webhook admin server: %s", err)
}
}()
ready = true
<-stop
log.Info("shutting down webhook server")

View File

@ -45,6 +45,16 @@ func Main(args []string) {
flags.ConfigureAndParse(cmd, args)
linkName := cmd.Arg(0)
ready := false
adminServer := admin.NewServer(*metricsAddr, *enablePprof, &ready)
go func() {
log.Infof("starting admin server on %s", *metricsAddr)
if err := adminServer.ListenAndServe(); err != nil {
log.Errorf("failed to start service mirror admin server: %s", err)
}
}()
stop := make(chan os.Signal, 1)
signal.Notify(stop, os.Interrupt, syscall.SIGTERM)
@ -77,17 +87,10 @@ func Main(args []string) {
metrics := servicemirror.NewProbeMetricVecs()
adminServer := admin.NewServer(*metricsAddr, *enablePprof)
go func() {
log.Infof("starting admin server on %s", *metricsAddr)
if err := adminServer.ListenAndServe(); err != nil {
log.Errorf("failed to start service mirror admin server: %s", err)
}
}()
controllerK8sAPI.Sync(nil)
ready = true
main:
for {
// Start link watch

View File

@ -13,13 +13,15 @@ import (
type handler struct {
promHandler http.Handler
enablePprof bool
ready *bool
}
// NewServer returns an initialized `http.Server`, configured to listen on an address.
func NewServer(addr string, enablePprof bool) *http.Server {
func NewServer(addr string, enablePprof bool, ready *bool) *http.Server {
h := &handler{
promHandler: promhttp.Handler(),
enablePprof: enablePprof,
ready: ready,
}
return &http.Server{
@ -63,5 +65,10 @@ func (h *handler) servePing(w http.ResponseWriter) {
}
func (h *handler) serveReady(w http.ResponseWriter) {
w.Write([]byte("ok\n"))
if *h.ready {
w.Write([]byte("ok\n"))
} else {
w.WriteHeader(http.StatusInternalServerError)
w.Write([]byte("not ready\n"))
}
}

View File

@ -34,6 +34,17 @@ func main() {
traceCollector := flags.AddTraceFlags(cmd)
flags.ConfigureAndParse(cmd, os.Args[1:])
ready := false
adminServer := admin.NewServer(*metricsAddr, *enablePprof, &ready)
go func() {
log.Infof("starting admin server on %s", *metricsAddr)
if err := adminServer.ListenAndServe(); err != nil {
log.Errorf("failed to start metrics API admin server: %s", err)
}
}()
ctx := context.Background()
stop := make(chan os.Signal, 1)
@ -93,14 +104,7 @@ func main() {
}
}()
adminServer := admin.NewServer(*metricsAddr, *enablePprof)
go func() {
log.Infof("starting admin server on %s", *metricsAddr)
if err := adminServer.ListenAndServe(); err != nil {
log.Errorf("failed to start metrics API admin server: %s", err)
}
}()
ready = true
<-stop

View File

@ -30,6 +30,17 @@ func Main(args []string) {
traceCollector := flags.AddTraceFlags(cmd)
flags.ConfigureAndParse(cmd, args)
ready := false
adminServer := admin.NewServer(*metricsAddr, *enablePprof, &ready)
go func() {
log.Infof("starting admin server on %s", *metricsAddr)
if err := adminServer.ListenAndServe(); err != nil {
log.Errorf("failed to start tap admin server: %s", err)
}
}()
ctx := context.Background()
stop := make(chan os.Signal, 1)
signal.Notify(stop, os.Interrupt, syscall.SIGTERM)
@ -69,14 +80,7 @@ func Main(args []string) {
k8sAPI.Sync(nil)
go apiServer.Start(ctx)
adminServer := admin.NewServer(*metricsAddr, *enablePprof)
go func() {
log.Infof("starting admin server on %s", *metricsAddr)
if err := adminServer.ListenAndServe(); err != nil {
log.Errorf("failed to start tap admin server: %s", err)
}
}()
ready = true
<-stop
log.Infof("shutting down APIServer on %s", *apiServerAddr)

View File

@ -45,6 +45,17 @@ func main() {
traceCollector := flags.AddTraceFlags(cmd)
flags.ConfigureAndParse(cmd, os.Args[1:])
ready := false
adminServer := admin.NewServer(*metricsAddr, *enablePprof, &ready)
go func() {
log.Infof("starting admin server on %s", *metricsAddr)
if err := adminServer.ListenAndServe(); err != nil {
log.Errorf("failed to start web admin server: %s", err)
}
}()
ctx := context.Background()
_, _, err := net.SplitHostPort(*vizAPIAddr) // Verify vizAPIAddr is of the form host:port.
@ -106,14 +117,7 @@ func main() {
}
}()
adminServer := admin.NewServer(*metricsAddr, *enablePprof)
go func() {
log.Infof("starting admin server on %s", *metricsAddr)
if err := adminServer.ListenAndServe(); err != nil {
log.Errorf("failed to start web admin server: %s", err)
}
}()
ready = true
<-stop