From 9e490549635ce9f9a7b0751aa77f6115a285dced Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Wed, 24 Jan 2018 10:57:23 -0800 Subject: [PATCH] Classify non-gRPC status codes for HTTP telemetry (#200) Currently, all "success"/"failure" classifications in the telemetry API are made based on the `grpc-status` trailer. If the trailer is not present, then a request is assumed to have failed. As we start proxying non-gRPC traffic, the controller needs to also be aware of HTTP status codes, so that non-gRPC requests are not assumed to always fail. I've modified the telemetry API server to classify requests based on their HTTP status codes when the `grpc-status` trailer is not present. I've also modified the `simulate-proxy` script to generate fake HTTP/2 traffic without the `grpc-status` trailer. Closes #196 Signed-off-by: Eliza Weisman --- controller/script/simulate-proxy/main.go | 183 +++++++++++++++++------ controller/telemetry/server.go | 9 ++ 2 files changed, 144 insertions(+), 48 deletions(-) diff --git a/controller/script/simulate-proxy/main.go b/controller/script/simulate-proxy/main.go index 8b9e7a4da..f524fcb3f 100644 --- a/controller/script/simulate-proxy/main.go +++ b/controller/script/simulate-proxy/main.go @@ -24,12 +24,74 @@ import ( /* A simple script for posting simulated telemetry data to the proxy api */ var ( - responseCodes = []codes.Code{ + grpcResponseCodes = []codes.Code{ codes.OK, codes.PermissionDenied, codes.Unavailable, } + httpResponseCodes = []int{ + http.StatusContinue, + http.StatusSwitchingProtocols, + http.StatusProcessing, + http.StatusOK, + http.StatusCreated, + http.StatusAccepted, + http.StatusNonAuthoritativeInfo, + http.StatusNoContent, + http.StatusResetContent, + http.StatusPartialContent, + http.StatusMultiStatus, + http.StatusAlreadyReported, + http.StatusIMUsed, + http.StatusMultipleChoices, + http.StatusMovedPermanently, + http.StatusFound, + http.StatusSeeOther, + http.StatusNotModified, + http.StatusUseProxy, + http.StatusTemporaryRedirect, + http.StatusPermanentRedirect, + http.StatusBadRequest, + http.StatusUnauthorized, + http.StatusPaymentRequired, + http.StatusForbidden, + http.StatusNotFound, + http.StatusMethodNotAllowed, + http.StatusNotAcceptable, + http.StatusProxyAuthRequired, + http.StatusRequestTimeout, + http.StatusConflict, + http.StatusGone, + http.StatusLengthRequired, + http.StatusPreconditionFailed, + http.StatusRequestEntityTooLarge, + http.StatusRequestURITooLong, + http.StatusUnsupportedMediaType, + http.StatusRequestedRangeNotSatisfiable, + http.StatusExpectationFailed, + http.StatusTeapot, + http.StatusUnprocessableEntity, + http.StatusLocked, + http.StatusFailedDependency, + http.StatusUpgradeRequired, + http.StatusPreconditionRequired, + http.StatusTooManyRequests, + http.StatusRequestHeaderFieldsTooLarge, + http.StatusUnavailableForLegalReasons, + http.StatusInternalServerError, + http.StatusNotImplemented, + http.StatusBadGateway, + http.StatusServiceUnavailable, + http.StatusGatewayTimeout, + http.StatusHTTPVersionNotSupported, + http.StatusVariantAlsoNegotiates, + http.StatusInsufficientStorage, + http.StatusLoopDetected, + http.StatusNotExtended, + http.StatusNetworkAuthenticationRequired, + } + streamSummary = &pb.StreamSummary{ BytesSent: 12345, DurationMs: 10, @@ -60,12 +122,12 @@ func randomLatencies(count uint32) (latencies []*pb.Latency) { return } -func randomEos(count uint32) (eos []*pb.EosScope) { - responseCodes := make(map[uint32]uint32) +func randomGrpcEos(count uint32) (eos []*pb.EosScope) { + grpcResponseCodes := make(map[uint32]uint32) for i := uint32(0); i < count; i++ { - responseCodes[randomResponseCode()] += 1 + grpcResponseCodes[randomGrpcResponseCode()] += 1 } - for code, streamCount := range responseCodes { + for code, streamCount := range grpcResponseCodes { eos = append(eos, &pb.EosScope{ Ctx: &pb.EosCtx{End: &pb.EosCtx_GrpcStatusCode{GrpcStatusCode: code}}, Streams: streamSummaries(streamCount), @@ -74,8 +136,22 @@ func randomEos(count uint32) (eos []*pb.EosScope) { return } -func randomResponseCode() uint32 { - return uint32(responseCodes[rand.Intn(len(responseCodes))]) +func randomH2Eos(count uint32) (eos []*pb.EosScope) { + for i := uint32(0); i < count; i++ { + eos = append(eos, &pb.EosScope{ + Ctx: &pb.EosCtx{End: &pb.EosCtx_Other{Other: true}}, + Streams: streamSummaries(i), + }) + } + return +} + +func randomGrpcResponseCode() uint32 { + return uint32(grpcResponseCodes[rand.Intn(len(grpcResponseCodes))]) +} + +func randomHttpResponseCode() uint32 { + return uint32(httpResponseCodes[rand.Intn(len(httpResponseCodes))]) } func streamSummaries(count uint32) (summaries []*pb.StreamSummary) { @@ -170,53 +246,13 @@ func main() { sourceIp := randomPod(allPods, nil) targetIp := randomPod(allPods, sourceIp) - // HTTP req := &pb.ReportRequest{ Process: &pb.Process{ ScheduledInstance: "hello-1mfa0", ScheduledNamespace: "people", }, - ClientTransports: []*pb.ClientTransport{}, - ServerTransports: []*pb.ServerTransport{}, - Proxy: pb.ReportRequest_INBOUND, - Requests: []*pb.RequestScope{ - &pb.RequestScope{ - Ctx: &pb.RequestCtx{ - SourceIp: sourceIp, - TargetAddr: &common.TcpAddress{ - Ip: targetIp, - Port: randomPort(), - }, - Authority: "world.greeting:7778", - Method: &common.HttpMethod{Type: &common.HttpMethod_Registered_{Registered: common.HttpMethod_GET}}, - Path: "/World/Greeting", - }, - Count: count, - Responses: []*pb.ResponseScope{ - &pb.ResponseScope{ - Ctx: &pb.ResponseCtx{ - HttpStatusCode: http.StatusOK, - }, - ResponseLatencies: randomLatencies(count), - Ends: randomEos(count), - }, - }, - }, - }, - } - - _, err = client.Report(context.Background(), req) - if err != nil { - log.Fatal(err.Error()) - } - - // TCP - req = &pb.ReportRequest{ - Process: &pb.Process{ - ScheduledInstance: "hello-tcp-1mfa0", - ScheduledNamespace: "people-tcp", - }, ClientTransports: []*pb.ClientTransport{ + // TCP &pb.ClientTransport{ TargetAddr: &common.TcpAddress{ Ip: targetIp, @@ -233,6 +269,7 @@ func main() { }, }, ServerTransports: []*pb.ServerTransport{ + // TCP &pb.ServerTransport{ SourceIp: sourceIp, Connects: count, @@ -246,6 +283,56 @@ func main() { }, }, Proxy: pb.ReportRequest_INBOUND, + Requests: []*pb.RequestScope{ + + // gRPC + &pb.RequestScope{ + Ctx: &pb.RequestCtx{ + SourceIp: sourceIp, + TargetAddr: &common.TcpAddress{ + Ip: targetIp, + Port: randomPort(), + }, + Authority: "world.greeting:7778", + Method: &common.HttpMethod{Type: &common.HttpMethod_Registered_{Registered: common.HttpMethod_GET}}, + Path: "/World/GreetingGrpc", + }, + Count: count, + Responses: []*pb.ResponseScope{ + &pb.ResponseScope{ + Ctx: &pb.ResponseCtx{ + HttpStatusCode: http.StatusOK, + }, + ResponseLatencies: randomLatencies(count), + Ends: randomGrpcEos(count), + }, + }, + }, + + // HTTP/2 + &pb.RequestScope{ + Ctx: &pb.RequestCtx{ + SourceIp: sourceIp, + TargetAddr: &common.TcpAddress{ + Ip: targetIp, + Port: randomPort(), + }, + Authority: "world.greeting:7778", + Method: &common.HttpMethod{Type: &common.HttpMethod_Registered_{Registered: common.HttpMethod_GET}}, + Path: "/World/GreetingH2", + }, + Count: count, + Responses: []*pb.ResponseScope{ + &pb.ResponseScope{ + Ctx: &pb.ResponseCtx{ + HttpStatusCode: randomHttpResponseCode(), + }, + ResponseLatencies: randomLatencies(count), + Ends: randomH2Eos(count), + }, + }, + }, + }, } _, err = client.Report(context.Background(), req) diff --git a/controller/telemetry/server.go b/controller/telemetry/server.go index c75711ec4..11fa4e7ae 100644 --- a/controller/telemetry/server.go +++ b/controller/telemetry/server.go @@ -4,6 +4,7 @@ import ( "errors" "fmt" "net" + "net/http" "strconv" "sync" "time" @@ -397,9 +398,17 @@ func responseLabelsFor(responseScope *write.ResponseScope, eosScope *write.EosSc classification := "failure" switch x := eosScope.Ctx.End.(type) { case *write.EosCtx_GrpcStatusCode: + // The stream ended with a `grpc-status` trailer. + // Classify based on the gRPC status code. if x.GrpcStatusCode == uint32(codes.OK) { classification = "success" } + case *write.EosCtx_Other: + // The stream did not end with a `grpc-status` trailer (i.e., it was + // not a gRPC message). Classify based on the response's HTTP status. + if responseScope.Ctx.HttpStatusCode < http.StatusInternalServerError { + classification = "success" + } } return prometheus.Labels{ "http_status_code": httpStatusCode,