mirror of https://github.com/fluxcd/flagger.git
				
				
				
			
		
			
				
	
	
		
			379 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Go
		
	
	
	
			
		
		
	
	
			379 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Go
		
	
	
	
| /*
 | |
| Copyright 2020 The Flux authors
 | |
| 
 | |
| Licensed under the Apache License, Version 2.0 (the "License");
 | |
| you may not use this file except in compliance with the License.
 | |
| You may obtain a copy of the License at
 | |
| 
 | |
|     http://www.apache.org/licenses/LICENSE-2.0
 | |
| 
 | |
| Unless required by applicable law or agreed to in writing, software
 | |
| distributed under the License is distributed on an "AS IS" BASIS,
 | |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| See the License for the specific language governing permissions and
 | |
| limitations under the License.
 | |
| */
 | |
| 
 | |
| package controller
 | |
| 
 | |
| import (
 | |
| 	"context"
 | |
| 	"errors"
 | |
| 	"fmt"
 | |
| 	"strings"
 | |
| 	"time"
 | |
| 
 | |
| 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 | |
| 
 | |
| 	flaggerv1 "github.com/fluxcd/flagger/pkg/apis/flagger/v1beta1"
 | |
| 	"github.com/fluxcd/flagger/pkg/metrics/observers"
 | |
| 	"github.com/fluxcd/flagger/pkg/metrics/providers"
 | |
| 	serving "knative.dev/serving/pkg/apis/serving/v1"
 | |
| )
 | |
| 
 | |
| const (
 | |
| 	MetricsProviderServiceSuffix = ":service"
 | |
| )
 | |
| 
 | |
| // to be called during canary initialization
 | |
| func (c *Controller) checkMetricProviderAvailability(canary *flaggerv1.Canary) error {
 | |
| 	for _, metric := range canary.GetAnalysis().Metrics {
 | |
| 		if metric.Name == "request-success-rate" || metric.Name == "request-duration" {
 | |
| 			observerFactory := c.observerFactory
 | |
| 			if canary.Spec.MetricsServer != "" {
 | |
| 				var err error
 | |
| 				observerFactory, err = observers.NewFactory(canary.Spec.MetricsServer)
 | |
| 				if err != nil {
 | |
| 					return fmt.Errorf("error building Prometheus client for %s %v", canary.Spec.MetricsServer, err)
 | |
| 				}
 | |
| 			}
 | |
| 			if ok, err := observerFactory.Client.IsOnline(); !ok || err != nil {
 | |
| 				return fmt.Errorf("prometheus not avaiable: %v", err)
 | |
| 			}
 | |
| 			continue
 | |
| 		}
 | |
| 
 | |
| 		if metric.TemplateRef != nil {
 | |
| 			namespace := canary.Namespace
 | |
| 			if metric.TemplateRef.Namespace != canary.Namespace && metric.TemplateRef.Namespace != "" {
 | |
| 				namespace = metric.TemplateRef.Namespace
 | |
| 			}
 | |
| 
 | |
| 			template, err := c.flaggerInformers.MetricInformer.Lister().MetricTemplates(namespace).Get(metric.TemplateRef.Name)
 | |
| 			if err != nil {
 | |
| 				return fmt.Errorf("metric template %s.%s error: %v", metric.TemplateRef.Name, namespace, err)
 | |
| 			}
 | |
| 
 | |
| 			var credentials map[string][]byte
 | |
| 			if template.Spec.Provider.SecretRef != nil {
 | |
| 				secret, err := c.kubeClient.CoreV1().Secrets(namespace).Get(context.TODO(), template.Spec.Provider.SecretRef.Name, metav1.GetOptions{})
 | |
| 				if err != nil {
 | |
| 					return fmt.Errorf("metric template %s.%s secret %s error: %v",
 | |
| 						metric.TemplateRef.Name, namespace, template.Spec.Provider.SecretRef.Name, err)
 | |
| 				}
 | |
| 				credentials = secret.Data
 | |
| 			}
 | |
| 
 | |
| 			factory := providers.Factory{}
 | |
| 			provider, err := factory.Provider(metric.Interval, template.Spec.Provider, credentials, c.kubeConfig)
 | |
| 			if err != nil {
 | |
| 				return fmt.Errorf("metric template %s.%s provider %s error: %v",
 | |
| 					metric.TemplateRef.Name, namespace, template.Spec.Provider.Type, err)
 | |
| 			}
 | |
| 
 | |
| 			if ok, err := provider.IsOnline(); !ok || err != nil {
 | |
| 				return fmt.Errorf("%v in metric template %s.%s not avaiable: %v", template.Spec.Provider.Type,
 | |
| 					template.Name, template.Namespace, err)
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 	c.recordEventInfof(canary, "all the metrics providers are available!")
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| func (c *Controller) runBuiltinMetricChecks(canary *flaggerv1.Canary) bool {
 | |
| 	// override the global provider if one is specified in the canary spec
 | |
| 	var metricsProvider string
 | |
| 	// set the metrics provider to Crossover Prometheus when Crossover is the mesh provider
 | |
| 	// For example, `crossover` metrics provider should be used for `smi:crossover` mesh provider
 | |
| 	if strings.Contains(c.meshProvider, "crossover") {
 | |
| 		metricsProvider = "crossover"
 | |
| 	} else {
 | |
| 		metricsProvider = c.meshProvider
 | |
| 	}
 | |
| 
 | |
| 	if canary.Spec.Provider != "" {
 | |
| 		metricsProvider = canary.Spec.Provider
 | |
| 
 | |
| 		// set the metrics provider to Linkerd Prometheus when Linkerd is the default mesh provider
 | |
| 		if strings.Contains(c.meshProvider, "linkerd") {
 | |
| 			metricsProvider = "linkerd"
 | |
| 		}
 | |
| 	}
 | |
| 	// set the metrics provider to query Prometheus for the canary Kubernetes service if the canary target is Service
 | |
| 	if canary.Spec.TargetRef.Kind == "Service" && !canary.Spec.TargetRef.IsKnativeService() {
 | |
| 		metricsProvider = metricsProvider + MetricsProviderServiceSuffix
 | |
| 	}
 | |
| 
 | |
| 	var knativeService *serving.Service
 | |
| 	if canary.Spec.Provider == flaggerv1.KnativeProvider || c.meshProvider == flaggerv1.KnativeProvider {
 | |
| 		var err error
 | |
| 		knativeService, err = c.knativeClient.ServingV1().Services(canary.Namespace).Get(context.TODO(), canary.Spec.TargetRef.Name, metav1.GetOptions{})
 | |
| 		if err != nil {
 | |
| 			c.recordEventErrorf(canary, "Error fetching Knative service %s/%s %v", canary.Namespace, canary.Spec.TargetRef.Name, err)
 | |
| 			return false
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// create observer based on the mesh provider
 | |
| 	observerFactory := c.observerFactory
 | |
| 
 | |
| 	// override the global metrics server if one is specified in the canary spec
 | |
| 	if canary.Spec.MetricsServer != "" {
 | |
| 		var err error
 | |
| 		observerFactory, err = observers.NewFactory(canary.Spec.MetricsServer)
 | |
| 		if err != nil {
 | |
| 			c.recordEventErrorf(canary, "Error building Prometheus client for %s %v", canary.Spec.MetricsServer, err)
 | |
| 			return false
 | |
| 		}
 | |
| 	}
 | |
| 	observer := observerFactory.Observer(metricsProvider)
 | |
| 
 | |
| 	// run metrics checks
 | |
| 	for _, metric := range canary.GetAnalysis().Metrics {
 | |
| 		if metric.Interval == "" {
 | |
| 			metric.Interval = canary.GetMetricInterval()
 | |
| 		}
 | |
| 
 | |
| 		if metric.Name == "request-success-rate" {
 | |
| 			model := toMetricModel(canary, metric.Interval, metric.TemplateVariables)
 | |
| 			if knativeService != nil {
 | |
| 				model.Route = knativeService.Status.LatestCreatedRevisionName
 | |
| 			}
 | |
| 			val, err := observer.GetRequestSuccessRate(model)
 | |
| 			if err != nil {
 | |
| 				if errors.Is(err, providers.ErrNoValuesFound) {
 | |
| 					c.recordEventWarningf(canary,
 | |
| 						"Halt advancement no values found for %s metric %s probably %s.%s is not receiving traffic: %v",
 | |
| 						metricsProvider, metric.Name, canary.Spec.TargetRef.Name, canary.Namespace, err)
 | |
| 				} else {
 | |
| 					c.recordEventErrorf(canary, "Prometheus query failed: %v", err)
 | |
| 				}
 | |
| 				return false
 | |
| 			}
 | |
| 			c.recorder.SetAnalysis(canary, metric.Name, val)
 | |
| 			if metric.ThresholdRange != nil {
 | |
| 				tr := *metric.ThresholdRange
 | |
| 				if tr.Min != nil && val < *tr.Min {
 | |
| 					c.recordEventWarningf(canary, "Halt %s.%s advancement success rate %.2f%% < %v%%",
 | |
| 						canary.Name, canary.Namespace, val, *tr.Min)
 | |
| 					return false
 | |
| 				}
 | |
| 				if tr.Max != nil && val > *tr.Max {
 | |
| 					c.recordEventWarningf(canary, "Halt %s.%s advancement success rate %.2f%% > %v%%",
 | |
| 						canary.Name, canary.Namespace, val, *tr.Max)
 | |
| 					return false
 | |
| 				}
 | |
| 			} else if metric.Threshold > val {
 | |
| 				c.recordEventWarningf(canary, "Halt %s.%s advancement success rate %.2f%% < %v%%",
 | |
| 					canary.Name, canary.Namespace, val, metric.Threshold)
 | |
| 				return false
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		if metric.Name == "request-duration" {
 | |
| 			model := toMetricModel(canary, metric.Interval, metric.TemplateVariables)
 | |
| 			if knativeService != nil {
 | |
| 				model.Route = knativeService.Status.LatestCreatedRevisionName
 | |
| 			}
 | |
| 			val, err := observer.GetRequestDuration(model)
 | |
| 			if err != nil {
 | |
| 				if errors.Is(err, providers.ErrNoValuesFound) {
 | |
| 					c.recordEventWarningf(canary, "Halt advancement no values found for %s metric %s probably %s.%s is not receiving traffic",
 | |
| 						metricsProvider, metric.Name, canary.Spec.TargetRef.Name, canary.Namespace)
 | |
| 				} else {
 | |
| 					c.recordEventErrorf(canary, "Prometheus query failed: %v", err)
 | |
| 				}
 | |
| 				return false
 | |
| 			}
 | |
| 			c.recorder.SetAnalysis(canary, metric.Name, val.Seconds())
 | |
| 			if metric.ThresholdRange != nil {
 | |
| 				tr := *metric.ThresholdRange
 | |
| 				if tr.Min != nil && val < time.Duration(*tr.Min)*time.Millisecond {
 | |
| 					c.recordEventWarningf(canary, "Halt %s.%s advancement request duration %v < %v",
 | |
| 						canary.Name, canary.Namespace, val, time.Duration(*tr.Min)*time.Millisecond)
 | |
| 					return false
 | |
| 				}
 | |
| 				if tr.Max != nil && val > time.Duration(*tr.Max)*time.Millisecond {
 | |
| 					c.recordEventWarningf(canary, "Halt %s.%s advancement request duration %v > %v",
 | |
| 						canary.Name, canary.Namespace, val, time.Duration(*tr.Max)*time.Millisecond)
 | |
| 					return false
 | |
| 				}
 | |
| 			} else if val > time.Duration(metric.Threshold)*time.Millisecond {
 | |
| 				c.recordEventWarningf(canary, "Halt %s.%s advancement request duration %v > %v",
 | |
| 					canary.Name, canary.Namespace, val, time.Duration(metric.Threshold)*time.Millisecond)
 | |
| 				return false
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		// in-line PromQL
 | |
| 		if metric.Query != "" {
 | |
| 			model := toMetricModel(canary, metric.Interval, metric.TemplateVariables)
 | |
| 			if knativeService != nil {
 | |
| 				model.Route = knativeService.Status.LatestCreatedRevisionName
 | |
| 			}
 | |
| 			query, err := observers.RenderQuery(metric.Query, model)
 | |
| 			val, err := observerFactory.Client.RunQuery(query)
 | |
| 			if err != nil {
 | |
| 				if errors.Is(err, providers.ErrNoValuesFound) {
 | |
| 					c.recordEventWarningf(canary, "Halt advancement no values found for metric: %s",
 | |
| 						metric.Name)
 | |
| 				} else {
 | |
| 					c.recordEventErrorf(canary, "Prometheus query failed for %s: %v", metric.Name, err)
 | |
| 				}
 | |
| 				return false
 | |
| 			}
 | |
| 			c.recorder.SetAnalysis(canary, metric.Name, val)
 | |
| 			if metric.ThresholdRange != nil {
 | |
| 				tr := *metric.ThresholdRange
 | |
| 				if tr.Min != nil && val < *tr.Min {
 | |
| 					c.recordEventWarningf(canary, "Halt %s.%s advancement %s %.2f < %v",
 | |
| 						canary.Name, canary.Namespace, metric.Name, val, *tr.Min)
 | |
| 					return false
 | |
| 				}
 | |
| 				if tr.Max != nil && val > *tr.Max {
 | |
| 					c.recordEventWarningf(canary, "Halt %s.%s advancement %s %.2f > %v",
 | |
| 						canary.Name, canary.Namespace, metric.Name, val, *tr.Max)
 | |
| 					return false
 | |
| 				}
 | |
| 			} else if val > metric.Threshold {
 | |
| 				c.recordEventWarningf(canary, "Halt %s.%s advancement %s %.2f > %v",
 | |
| 					canary.Name, canary.Namespace, metric.Name, val, metric.Threshold)
 | |
| 				return false
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return true
 | |
| }
 | |
| 
 | |
| func (c *Controller) runMetricChecks(canary *flaggerv1.Canary) bool {
 | |
| 	var knativeService *serving.Service
 | |
| 	if canary.Spec.Provider == flaggerv1.KnativeProvider || c.meshProvider == flaggerv1.KnativeProvider {
 | |
| 		var err error
 | |
| 		knativeService, err = c.knativeClient.ServingV1().Services(canary.Namespace).Get(context.TODO(), canary.Spec.TargetRef.Name, metav1.GetOptions{})
 | |
| 		if err != nil {
 | |
| 			c.recordEventErrorf(canary, "Error fetching Knative service %s/%s %v", canary.Namespace, canary.Spec.TargetRef.Name, err)
 | |
| 			return false
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	for _, metric := range canary.GetAnalysis().Metrics {
 | |
| 		if metric.TemplateRef != nil {
 | |
| 			namespace := canary.Namespace
 | |
| 			if metric.TemplateRef.Namespace != canary.Namespace && metric.TemplateRef.Namespace != "" {
 | |
| 				namespace = metric.TemplateRef.Namespace
 | |
| 			}
 | |
| 
 | |
| 			template, err := c.flaggerInformers.MetricInformer.Lister().MetricTemplates(namespace).Get(metric.TemplateRef.Name)
 | |
| 			if err != nil {
 | |
| 				c.recordEventErrorf(canary, "Metric template %s.%s error: %v", metric.TemplateRef.Name, namespace, err)
 | |
| 				return false
 | |
| 			}
 | |
| 
 | |
| 			var credentials map[string][]byte
 | |
| 			if template.Spec.Provider.SecretRef != nil {
 | |
| 				secret, err := c.kubeClient.CoreV1().Secrets(namespace).Get(context.TODO(), template.Spec.Provider.SecretRef.Name, metav1.GetOptions{})
 | |
| 				if err != nil {
 | |
| 					c.recordEventErrorf(canary, "Metric template %s.%s secret %s error: %v",
 | |
| 						metric.TemplateRef.Name, namespace, template.Spec.Provider.SecretRef.Name, err)
 | |
| 					return false
 | |
| 				}
 | |
| 				credentials = secret.Data
 | |
| 			}
 | |
| 
 | |
| 			factory := providers.Factory{}
 | |
| 			provider, err := factory.Provider(metric.Interval, template.Spec.Provider, credentials, c.kubeConfig)
 | |
| 			if err != nil {
 | |
| 				c.recordEventErrorf(canary, "Metric template %s.%s provider %s error: %v",
 | |
| 					metric.TemplateRef.Name, namespace, template.Spec.Provider.Type, err)
 | |
| 				return false
 | |
| 			}
 | |
| 
 | |
| 			model := toMetricModel(canary, metric.Interval, metric.TemplateVariables)
 | |
| 			if knativeService != nil {
 | |
| 				model.Route = knativeService.Status.LatestCreatedRevisionName
 | |
| 			}
 | |
| 			query, err := observers.RenderQuery(template.Spec.Query, model)
 | |
| 			c.logger.With("canary", fmt.Sprintf("%s.%s", canary.Name, namespace)).
 | |
| 				Debugf("Metric template %s.%s query: %s", metric.TemplateRef.Name, namespace, query)
 | |
| 			if err != nil {
 | |
| 				c.recordEventErrorf(canary, "Metric template %s.%s query render error: %v",
 | |
| 					metric.TemplateRef.Name, namespace, err)
 | |
| 				return false
 | |
| 			}
 | |
| 
 | |
| 			val, err := provider.RunQuery(query)
 | |
| 			if err != nil {
 | |
| 				if errors.Is(err, providers.ErrNoValuesFound) {
 | |
| 					c.recordEventWarningf(canary, "Halt advancement no values found for custom metric: %s: %v",
 | |
| 						metric.Name, err)
 | |
| 				} else {
 | |
| 					c.recordEventErrorf(canary, "Metric query failed for %s: %v", metric.Name, err)
 | |
| 				}
 | |
| 				return false
 | |
| 			}
 | |
| 
 | |
| 			c.recorder.SetAnalysis(canary, metric.Name, val)
 | |
| 
 | |
| 			if metric.ThresholdRange != nil {
 | |
| 				tr := *metric.ThresholdRange
 | |
| 				if tr.Min != nil && val < *tr.Min {
 | |
| 					c.recordEventWarningf(canary, "Halt %s.%s advancement %s %.2f < %v",
 | |
| 						canary.Name, canary.Namespace, metric.Name, val, *tr.Min)
 | |
| 					return false
 | |
| 				}
 | |
| 				if tr.Max != nil && val > *tr.Max {
 | |
| 					c.recordEventWarningf(canary, "Halt %s.%s advancement %s %.2f > %v",
 | |
| 						canary.Name, canary.Namespace, metric.Name, val, *tr.Max)
 | |
| 					return false
 | |
| 				}
 | |
| 			} else if val > metric.Threshold {
 | |
| 				c.recordEventWarningf(canary, "Halt %s.%s advancement %s %.2f > %v",
 | |
| 					canary.Name, canary.Namespace, metric.Name, val, metric.Threshold)
 | |
| 				return false
 | |
| 			}
 | |
| 		} else if metric.Name != "request-success-rate" && metric.Name != "request-duration" && metric.Query == "" {
 | |
| 			c.recordEventErrorf(canary, "Metric query failed for no usable metrics template and query were configured")
 | |
| 			return false
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return true
 | |
| }
 | |
| 
 | |
| func toMetricModel(r *flaggerv1.Canary, interval string, variables map[string]string) flaggerv1.MetricTemplateModel {
 | |
| 	service := r.Spec.TargetRef.Name
 | |
| 	if r.Spec.Service.Name != "" {
 | |
| 		service = r.Spec.Service.Name
 | |
| 	}
 | |
| 	ingress := r.Spec.TargetRef.Name
 | |
| 	if r.Spec.IngressRef != nil {
 | |
| 		ingress = r.Spec.IngressRef.Name
 | |
| 	}
 | |
| 	route := r.Spec.TargetRef.Name
 | |
| 	if r.Spec.RouteRef != nil {
 | |
| 		route = r.Spec.RouteRef.Name
 | |
| 	}
 | |
| 	return flaggerv1.MetricTemplateModel{
 | |
| 		Name:      r.Name,
 | |
| 		Namespace: r.Namespace,
 | |
| 		Target:    r.Spec.TargetRef.Name,
 | |
| 		Service:   service,
 | |
| 		Ingress:   ingress,
 | |
| 		Route:     route,
 | |
| 		Interval:  interval,
 | |
| 		Variables: variables,
 | |
| 	}
 | |
| }
 |