package main import ( "flag" "fmt" "math" "math/rand" "net/http" "os" "os/signal" "strconv" "strings" "time" prom "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/runconduit/conduit/controller/k8s" log "github.com/sirupsen/logrus" "google.golang.org/grpc/codes" "k8s.io/api/apps/v1beta2" "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/labels" // Load all the auth plugins for the cloud providers. _ "k8s.io/client-go/plugin/pkg/client/auth" ) /* A simple script for exposing simulated prometheus metrics */ type simulatedProxy struct { sleep time.Duration deployments []string registerer *prom.Registry inbound *proxyMetricCollectors outbound *proxyMetricCollectors } type proxyMetricCollectors struct { requestTotals *prom.CounterVec responseTotals *prom.CounterVec responseLatencyMs *prom.HistogramVec tcpAcceptOpenTotal prom.Counter tcpAcceptCloseTotal *prom.CounterVec tcpConnectOpenTotal prom.Counter tcpConnectCloseTotal *prom.CounterVec tcpConnectionsOpen prom.Gauge tcpConnectionDurationMs *prom.HistogramVec receivedBytes *prom.CounterVec sentBytes *prom.CounterVec } const ( successRate = 0.9 ) var ( grpcResponseCodes = []codes.Code{ codes.OK, codes.PermissionDenied, codes.Unavailable, } httpResponseCodes = []int{ http.StatusContinue, http.StatusSwitchingProtocols, http.StatusProcessing, http.StatusOK, http.StatusCreated, http.StatusAccepted, http.StatusNonAuthoritativeInfo, http.StatusNoContent, http.StatusResetContent, http.StatusPartialContent, http.StatusMultiStatus, http.StatusAlreadyReported, http.StatusIMUsed, http.StatusMultipleChoices, http.StatusMovedPermanently, http.StatusFound, http.StatusSeeOther, http.StatusNotModified, http.StatusUseProxy, http.StatusTemporaryRedirect, http.StatusPermanentRedirect, http.StatusBadRequest, http.StatusUnauthorized, http.StatusPaymentRequired, http.StatusForbidden, http.StatusNotFound, http.StatusMethodNotAllowed, http.StatusNotAcceptable, http.StatusProxyAuthRequired, http.StatusRequestTimeout, http.StatusConflict, http.StatusGone, http.StatusLengthRequired, http.StatusPreconditionFailed, http.StatusRequestEntityTooLarge, http.StatusRequestURITooLong, http.StatusUnsupportedMediaType, http.StatusRequestedRangeNotSatisfiable, http.StatusExpectationFailed, http.StatusTeapot, http.StatusUnprocessableEntity, http.StatusLocked, http.StatusFailedDependency, http.StatusUpgradeRequired, http.StatusPreconditionRequired, http.StatusTooManyRequests, http.StatusRequestHeaderFieldsTooLarge, http.StatusUnavailableForLegalReasons, http.StatusInternalServerError, http.StatusNotImplemented, http.StatusBadGateway, http.StatusServiceUnavailable, http.StatusGatewayTimeout, http.StatusHTTPVersionNotSupported, http.StatusVariantAlsoNegotiates, http.StatusInsufficientStorage, http.StatusLoopDetected, http.StatusNotExtended, http.StatusNetworkAuthenticationRequired, } // latencyBucketBounds holds the maximum value (inclusive, in tenths of a // millisecond) that may be counted in a given histogram bucket. // These values are one order of magnitude greater than the controller's // Prometheus buckets, because the proxy will reports latencies in tenths // of a millisecond rather than whole milliseconds. latencyBucketBounds = []float64{ // prometheus.LinearBuckets(1, 1, 5), 10, 20, 30, 40, 50, // prometheus.LinearBuckets(10, 10, 5), 100, 200, 300, 400, 500, // prometheus.LinearBuckets(100, 100, 5), 1000, 2000, 3000, 4000, 5000, // prometheus.LinearBuckets(1000, 1000, 5), 10000, 20000, 30000, 40000, 50000, // prometheus.LinearBuckets(10000, 10000, 5), 100000, 200000, 300000, 400000, 500000, // Prometheus implicitly creates a max bucket for everything that // falls outside of the highest-valued bucket, but we need to // create it explicitly. math.MaxUint32, } ) // generateProxyTraffic randomly creates metrics under the guise of a single conduit proxy routing traffic. // metrics are generated for each proxyMetricCollector. func (s *simulatedProxy) generateProxyTraffic() { for { for _, deployment := range s.deployments { // // inbound // inboundRandomCount := int(rand.Float64() * 10) // inbound requests s.inbound.requestTotals.With(prom.Labels{}).Add(float64(inboundRandomCount)) // inbound responses inboundResponseLabels := randomResponseLabels() s.inbound.responseTotals.With(inboundResponseLabels).Add(float64(inboundRandomCount)) for _, latency := range randomLatencies(inboundRandomCount) { s.inbound.responseLatencyMs.With(inboundResponseLabels).Observe(latency) } s.inbound.generateTCPStats(inboundRandomCount) // // outbound // outboundRandomCount := int(rand.Float64() * 10) // split the deployment name into ["namespace", "deployment"] dstList := strings.Split(deployment, "/") outboundLabels := prom.Labels{"dst_namespace": dstList[0], "dst_deployment": dstList[1]} // outbound requests s.outbound.requestTotals.With(outboundLabels).Add(float64(outboundRandomCount)) // outbound resposnes outboundResponseLabels := outboundLabels for k, v := range randomResponseLabels() { outboundResponseLabels[k] = v } s.outbound.responseTotals.With(outboundResponseLabels).Add(float64(outboundRandomCount)) for _, latency := range randomLatencies(outboundRandomCount) { s.outbound.responseLatencyMs.With(outboundResponseLabels).Observe(latency) } s.outbound.generateTCPStats(outboundRandomCount) } time.Sleep(s.sleep) } } func (p *proxyMetricCollectors) generateTCPStats(randomCount int) { logCtx := log.WithFields(log.Fields{"randomCount": randomCount}) if randomCount <= 0 { logCtx.Debugln("generateTCPStats: randomCount <= 0; skipping") return } closeLabels := prom.Labels{"classification": "success"} failLabels := prom.Labels{"classification": "failure"} // jitter the accept/connect counts a little bit to simulate connection pooling etc. acceptCount := jitter(randomCount, 0.1) p.tcpAcceptOpenTotal.Add(float64(acceptCount)) // up to acceptCount accepted connections remain open... acceptOpenCount := rand.Intn(acceptCount) // ...and the rest have been closed. acceptClosedCount := acceptCount - acceptOpenCount // simulate some failures acceptFailedCount := 0 if acceptClosedCount >= 2 { acceptFailedCount = rand.Intn(acceptClosedCount / 2) acceptClosedCount -= acceptFailedCount p.tcpAcceptCloseTotal.With(failLabels).Add(float64(acceptFailedCount)) } p.tcpAcceptCloseTotal.With(closeLabels).Add(float64(acceptClosedCount)) connectCount := jitter(randomCount, 0.1) p.tcpConnectOpenTotal.Add(float64(connectCount)) connectOpenCount := rand.Intn(connectCount) connectClosedCount := connectCount - connectOpenCount connectFailedCount := 0 if connectClosedCount >= 2 { connectFailedCount = rand.Intn(connectClosedCount / 2) connectClosedCount -= connectFailedCount p.tcpConnectCloseTotal.With(failLabels).Add(float64(connectFailedCount)) } p.tcpConnectCloseTotal.With(closeLabels).Add(float64(connectClosedCount)) p.tcpConnectionsOpen.Set(float64(acceptOpenCount + connectOpenCount)) // connect durations + bytes sent/received totalClosed := acceptClosedCount + connectClosedCount for _, latency := range randomLatencies(totalClosed) { p.tcpConnectionDurationMs.With(closeLabels).Observe(latency) // XXX: are these reasonable values for sent/received bytes? p.sentBytes.With(closeLabels).Add(float64(rand.Intn(50000) + 1024)) p.receivedBytes.With(closeLabels).Add(float64(rand.Intn(50000) + 1024)) } // durations for simulated failures totalFailed := acceptFailedCount + connectFailedCount for _, latency := range randomLatencies(totalFailed) { p.tcpConnectionDurationMs.With(failLabels).Observe(latency) // XXX: are these reasonable values for sent/received bytes? p.sentBytes.With(failLabels).Add(float64(rand.Intn(50000))) p.receivedBytes.With(failLabels).Add(float64(rand.Intn(50000))) } } func jitter(toJitter int, frac float64) int { logCtx := log.WithFields(log.Fields{ "toJitter": toJitter, "frac": frac, }) if toJitter <= 0 { logCtx.Debugln("jitter(): toJitter <= 0; returning 0") return 0 } sign := rand.Intn(2) if sign == 0 { sign = -1 } amount := int(float64(toJitter)*frac) + 1 jitter := rand.Intn(amount) * sign jittered := toJitter + jitter if jittered <= 0 { logCtx.WithFields(log.Fields{ "amount": amount, "jitter": jitter, "jittered": jittered, }).Debugln("jitter(): jittered <= 0; returning 1") return 1 } return jittered } func randomResponseLabels() prom.Labels { labelMap := prom.Labels{"classification": "success"} grpcCode := randomGrpcResponseCode() labelMap["grpc_status_code"] = fmt.Sprintf("%d", grpcCode) httpCode := randomHttpResponseCode() labelMap["status_code"] = fmt.Sprintf("%d", httpCode) if grpcCode != uint32(codes.OK) || httpCode != http.StatusOK { labelMap["classification"] = "failure" } return labelMap } func randomGrpcResponseCode() uint32 { code := codes.OK if rand.Float32() > successRate { code = grpcResponseCodes[rand.Intn(len(grpcResponseCodes))] } return uint32(code) } func randomHttpResponseCode() uint32 { code := http.StatusOK if rand.Float32() > successRate { code = httpResponseCodes[rand.Intn(len(httpResponseCodes))] } return uint32(code) } func randomLatencies(count int) []float64 { latencies := make([]float64, count) for i := 0; i < count; i++ { // Select a latency from a bucket. latencies[i] = latencyBucketBounds[rand.Int31n(int32(len(latencyBucketBounds)))] } return latencies } func randomDeployments(deployments []string, count int) []string { randomDeployments := []string{} length := int32(len(deployments)) for i := 0; i < count; i++ { randomDeployments = append(randomDeployments, deployments[rand.Int31n(length)]) } return randomDeployments } func newSimulatedProxy( pod *v1.Pod, deploy *v1beta2.Deployment, deployments []string, sleep *time.Duration, maxDst int, ) *simulatedProxy { dstDeployments := randomDeployments(deployments, maxDst) constTCPLabels := prom.Labels{ // TCP metrics won't be labeled with an authority. "namespace": pod.GetNamespace(), "deployment": deploy.Name, "pod_template_hash": pod.GetLabels()["pod-template-hash"], "pod": pod.GetName(), // TODO: support other k8s objects // "daemon_set", // "k8s_job", // "replication_controller", // "replica_set", } constLabels := prom.Labels{ "authority": "fakeauthority:123", "namespace": pod.GetNamespace(), "deployment": deploy.Name, "pod_template_hash": pod.GetLabels()["pod-template-hash"], "pod": pod.GetName(), // TODO: support other k8s objects // "daemon_set", // "k8s_job", // "replication_controller", // "replica_set", } // make 50% of requests tls if rand.Int31n(2) > 0 { constLabels["tls"] = "true" } requestLabels := []string{ "direction", // outbound only "dst_namespace", "dst_deployment", // TODO: support other k8s dst objects // "dst_daemon_set", // "dst_job", // "dst_replication_controller", // "dst_replica_set", } responseLabels := append( requestLabels, []string{ "classification", "grpc_status_code", "status_code", }..., ) tcpLabels := []string{ "direction", } tcpCloseLabels := append( tcpLabels, []string{"classification"}..., ) requestTotals := prom.NewCounterVec( prom.CounterOpts{ Name: "request_total", Help: "A counter of the number of requests the proxy has received", ConstLabels: constLabels, }, requestLabels) responseTotals := prom.NewCounterVec( prom.CounterOpts{ Name: "response_total", Help: "A counter of the number of responses the proxy has received", ConstLabels: constLabels, }, responseLabels) responseLatencyMs := prom.NewHistogramVec( prom.HistogramOpts{ Name: "response_latency_ms", Help: "A histogram of the total latency of a response", ConstLabels: constLabels, Buckets: latencyBucketBounds, }, responseLabels) tcpAcceptOpenTotal := prom.NewCounterVec( prom.CounterOpts{ Name: "tcp_accept_open_total", Help: "A counter of the total number of transport connections which have been accepted by the proxy.", ConstLabels: constTCPLabels, }, tcpLabels) tcpAcceptCloseTotal := prom.NewCounterVec( prom.CounterOpts{ Name: "tcp_accept_close_total", Help: "A counter of the total number of transport connections accepted by the proxy which have been closed.", ConstLabels: constTCPLabels, }, tcpCloseLabels) tcpConnectOpenTotal := prom.NewCounterVec( prom.CounterOpts{ Name: "tcp_connect_open_total", Help: "A counter of the total number of transport connections which have been opened by the proxy.", ConstLabels: constTCPLabels, }, tcpLabels) tcpConnectCloseTotal := prom.NewCounterVec( prom.CounterOpts{ Name: "tcp_connect_close_total", Help: "A counter of the total number of transport connections opened by the proxy which have been closed.", ConstLabels: constTCPLabels, }, tcpCloseLabels) tcpConnectionsOpen := prom.NewGaugeVec( prom.GaugeOpts{ Name: "tcp_connections_open", Help: "A gauge of the number of transport connections currently open.", ConstLabels: constTCPLabels, }, tcpLabels) tcpConnectionDurationMs := prom.NewHistogramVec( prom.HistogramOpts{ Name: "tcp_connection_duration_ms", Help: "A histogram of the duration of the lifetime of a connection, in milliseconds.", ConstLabels: constTCPLabels, Buckets: latencyBucketBounds, }, tcpCloseLabels) sentBytes := prom.NewCounterVec( prom.CounterOpts{ Name: "sent_bytes", Help: "A counter of the total number of sent bytes.", ConstLabels: constTCPLabels, }, tcpCloseLabels) receivedBytes := prom.NewCounterVec( prom.CounterOpts{ Name: "received_bytes", Help: "A counter of the total number of recieved bytes.", ConstLabels: constTCPLabels, }, tcpCloseLabels) inboundLabels := prom.Labels{ "direction": "inbound", // dst_* labels are not valid for inbound, but all labels must always be set // in every increment call, so we set these to empty for all inbound metrics. "dst_namespace": "", "dst_deployment": "", } // TCP stats don't have dst labels inboundTCPLabels := prom.Labels{ "direction": "inbound", } outboundLabels := prom.Labels{ "direction": "outbound", } proxy := simulatedProxy{ sleep: *sleep, deployments: dstDeployments, registerer: prom.NewRegistry(), inbound: &proxyMetricCollectors{ requestTotals: requestTotals.MustCurryWith(inboundLabels), responseTotals: responseTotals.MustCurryWith(inboundLabels), responseLatencyMs: responseLatencyMs.MustCurryWith(inboundLabels).(*prom.HistogramVec), tcpAcceptOpenTotal: tcpAcceptOpenTotal.With(inboundTCPLabels), tcpAcceptCloseTotal: tcpAcceptCloseTotal.MustCurryWith(inboundTCPLabels), tcpConnectOpenTotal: tcpConnectOpenTotal.With(inboundTCPLabels), tcpConnectCloseTotal: tcpConnectCloseTotal.MustCurryWith(inboundTCPLabels), tcpConnectionsOpen: tcpConnectionsOpen.With(inboundTCPLabels), tcpConnectionDurationMs: tcpConnectionDurationMs.MustCurryWith(inboundTCPLabels).(*prom.HistogramVec), sentBytes: sentBytes.MustCurryWith(inboundTCPLabels), receivedBytes: receivedBytes.MustCurryWith(inboundTCPLabels), }, outbound: &proxyMetricCollectors{ requestTotals: requestTotals.MustCurryWith(outboundLabels), responseTotals: responseTotals.MustCurryWith(outboundLabels), responseLatencyMs: responseLatencyMs.MustCurryWith(outboundLabels).(*prom.HistogramVec), tcpAcceptOpenTotal: tcpAcceptOpenTotal.With(outboundLabels), tcpAcceptCloseTotal: tcpAcceptCloseTotal.MustCurryWith(outboundLabels), tcpConnectOpenTotal: tcpConnectOpenTotal.With(outboundLabels), tcpConnectCloseTotal: tcpConnectCloseTotal.MustCurryWith(outboundLabels), tcpConnectionsOpen: tcpConnectionsOpen.With(outboundLabels), tcpConnectionDurationMs: tcpConnectionDurationMs.MustCurryWith(outboundLabels).(*prom.HistogramVec), sentBytes: sentBytes.MustCurryWith(outboundLabels), receivedBytes: receivedBytes.MustCurryWith(outboundLabels), }, } proxy.registerer.MustRegister( requestTotals, responseTotals, responseLatencyMs, tcpAcceptOpenTotal, tcpAcceptCloseTotal, tcpConnectOpenTotal, tcpConnectCloseTotal, tcpConnectionsOpen, tcpConnectionDurationMs, sentBytes, receivedBytes, ) return &proxy } func getDeploymentByPod(k8sAPI *k8s.API, maxPods int) map[*v1.Pod]*v1beta2.Deployment { deployList, err := k8sAPI.Deploy().Lister().List(labels.Everything()) if err != nil { log.Fatal(err.Error()) } allPods := map[*v1.Pod]*v1beta2.Deployment{} for _, deploy := range deployList { pods, err := k8sAPI.GetPodsFor(deploy, false) if err != nil { log.Fatalf("GetPodsFor failed with %s", err) return map[*v1.Pod]*v1beta2.Deployment{} } for _, pod := range pods { if pod.Status.PodIP != "" && !strings.HasPrefix(pod.GetNamespace(), "kube-") { allPods[pod] = deploy if maxPods != 0 && len(allPods) == maxPods { return allPods } } } } return allPods } func main() { rand.Seed(time.Now().UnixNano()) sleep := flag.Duration("sleep", time.Second, "time to sleep between requests") metricsPorts := flag.String("metric-ports", "10000-10002", "range (inclusive) of network ports to serve prometheus metrics") kubeConfigPath := flag.String("kubeconfig", "", "path to kube config - required") flag.Parse() if len(flag.Args()) > 0 { log.Fatal("Unable to parse command line arguments") return } ports := strings.Split(*metricsPorts, "-") if len(ports) != 2 { log.Fatalf("Invalid metric-ports flag, must be of the form '[start]-[end]': %s", *metricsPorts) } startPort, err := strconv.Atoi(ports[0]) if err != nil { log.Fatalf("Invalid start port, must be an integer: %s", ports[0]) } endPort, err := strconv.Atoi(ports[1]) if err != nil { log.Fatalf("Invalid end port, must be an integer: %s", ports[1]) } clientSet, err := k8s.NewClientSet(*kubeConfigPath) if err != nil { log.Fatal(err.Error()) } k8sAPI := k8s.NewAPI( clientSet, k8s.Deploy, k8s.Pod, ) k8sAPI.Sync(nil) proxyCount := endPort - startPort + 1 simulatedPods := getDeploymentByPod(k8sAPI, proxyCount) podsFound := len(simulatedPods) if podsFound < proxyCount { log.Warnf("Found only %d pods to simulate %d proxies, creating %d fake pods.", podsFound, proxyCount, proxyCount-podsFound) needed := proxyCount - podsFound for needed > 0 { for pod, deploy := range simulatedPods { fakePod := pod.DeepCopy() fakePod.SetName(fmt.Sprintf("%s-fake", pod.GetName())) simulatedPods[fakePod] = deploy needed-- if needed == 0 { break } } } } stopCh := make(chan os.Signal) signal.Notify(stopCh, os.Interrupt, os.Kill) deployments := []string{} for _, deploy := range simulatedPods { deployments = append(deployments, fmt.Sprintf("%s/%s", deploy.Namespace, deploy.Name)) } // simulate network topology of N * sqrt(N) request paths maxDst := int(math.Sqrt(float64(len(simulatedPods)))) + 1 port := startPort for pod, deploy := range simulatedPods { proxy := newSimulatedProxy(pod, deploy, deployments, sleep, maxDst) addr := fmt.Sprintf("0.0.0.0:%d", port) server := &http.Server{ Addr: addr, Handler: promhttp.HandlerFor(proxy.registerer, promhttp.HandlerOpts{}), } log.Infof("serving scrapable metrics on %s", addr) go server.ListenAndServe() go proxy.generateProxyTraffic() port++ } <-stopCh }