Classify non-gRPC status codes for HTTP telemetry (#200)

Currently, all "success"/"failure" classifications in the telemetry API are made based on the `grpc-status` trailer. If the trailer is not present, then a request is assumed to have failed. As we start proxying non-gRPC traffic, the controller needs to also be aware of HTTP status codes, so that non-gRPC requests are not assumed to always fail.

I've modified the telemetry API server to classify requests based on their HTTP status codes when the `grpc-status` trailer is not present. 

I've also modified the `simulate-proxy` script to generate fake HTTP/2 traffic without the `grpc-status` trailer.

Closes #196

Signed-off-by: Eliza Weisman <eliza@buoyant.io>
This commit is contained in:
Eliza Weisman 2018-01-24 10:57:23 -08:00 committed by GitHub
parent 54aef56e25
commit 9e49054963
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 144 additions and 48 deletions

View File

@ -24,12 +24,74 @@ import (
/* A simple script for posting simulated telemetry data to the proxy api */
var (
responseCodes = []codes.Code{
grpcResponseCodes = []codes.Code{
codes.OK,
codes.PermissionDenied,
codes.Unavailable,
}
httpResponseCodes = []int{
http.StatusContinue,
http.StatusSwitchingProtocols,
http.StatusProcessing,
http.StatusOK,
http.StatusCreated,
http.StatusAccepted,
http.StatusNonAuthoritativeInfo,
http.StatusNoContent,
http.StatusResetContent,
http.StatusPartialContent,
http.StatusMultiStatus,
http.StatusAlreadyReported,
http.StatusIMUsed,
http.StatusMultipleChoices,
http.StatusMovedPermanently,
http.StatusFound,
http.StatusSeeOther,
http.StatusNotModified,
http.StatusUseProxy,
http.StatusTemporaryRedirect,
http.StatusPermanentRedirect,
http.StatusBadRequest,
http.StatusUnauthorized,
http.StatusPaymentRequired,
http.StatusForbidden,
http.StatusNotFound,
http.StatusMethodNotAllowed,
http.StatusNotAcceptable,
http.StatusProxyAuthRequired,
http.StatusRequestTimeout,
http.StatusConflict,
http.StatusGone,
http.StatusLengthRequired,
http.StatusPreconditionFailed,
http.StatusRequestEntityTooLarge,
http.StatusRequestURITooLong,
http.StatusUnsupportedMediaType,
http.StatusRequestedRangeNotSatisfiable,
http.StatusExpectationFailed,
http.StatusTeapot,
http.StatusUnprocessableEntity,
http.StatusLocked,
http.StatusFailedDependency,
http.StatusUpgradeRequired,
http.StatusPreconditionRequired,
http.StatusTooManyRequests,
http.StatusRequestHeaderFieldsTooLarge,
http.StatusUnavailableForLegalReasons,
http.StatusInternalServerError,
http.StatusNotImplemented,
http.StatusBadGateway,
http.StatusServiceUnavailable,
http.StatusGatewayTimeout,
http.StatusHTTPVersionNotSupported,
http.StatusVariantAlsoNegotiates,
http.StatusInsufficientStorage,
http.StatusLoopDetected,
http.StatusNotExtended,
http.StatusNetworkAuthenticationRequired,
}
streamSummary = &pb.StreamSummary{
BytesSent: 12345,
DurationMs: 10,
@ -60,12 +122,12 @@ func randomLatencies(count uint32) (latencies []*pb.Latency) {
return
}
func randomEos(count uint32) (eos []*pb.EosScope) {
responseCodes := make(map[uint32]uint32)
func randomGrpcEos(count uint32) (eos []*pb.EosScope) {
grpcResponseCodes := make(map[uint32]uint32)
for i := uint32(0); i < count; i++ {
responseCodes[randomResponseCode()] += 1
grpcResponseCodes[randomGrpcResponseCode()] += 1
}
for code, streamCount := range responseCodes {
for code, streamCount := range grpcResponseCodes {
eos = append(eos, &pb.EosScope{
Ctx: &pb.EosCtx{End: &pb.EosCtx_GrpcStatusCode{GrpcStatusCode: code}},
Streams: streamSummaries(streamCount),
@ -74,8 +136,22 @@ func randomEos(count uint32) (eos []*pb.EosScope) {
return
}
func randomResponseCode() uint32 {
return uint32(responseCodes[rand.Intn(len(responseCodes))])
func randomH2Eos(count uint32) (eos []*pb.EosScope) {
for i := uint32(0); i < count; i++ {
eos = append(eos, &pb.EosScope{
Ctx: &pb.EosCtx{End: &pb.EosCtx_Other{Other: true}},
Streams: streamSummaries(i),
})
}
return
}
func randomGrpcResponseCode() uint32 {
return uint32(grpcResponseCodes[rand.Intn(len(grpcResponseCodes))])
}
func randomHttpResponseCode() uint32 {
return uint32(httpResponseCodes[rand.Intn(len(httpResponseCodes))])
}
func streamSummaries(count uint32) (summaries []*pb.StreamSummary) {
@ -170,53 +246,13 @@ func main() {
sourceIp := randomPod(allPods, nil)
targetIp := randomPod(allPods, sourceIp)
// HTTP
req := &pb.ReportRequest{
Process: &pb.Process{
ScheduledInstance: "hello-1mfa0",
ScheduledNamespace: "people",
},
ClientTransports: []*pb.ClientTransport{},
ServerTransports: []*pb.ServerTransport{},
Proxy: pb.ReportRequest_INBOUND,
Requests: []*pb.RequestScope{
&pb.RequestScope{
Ctx: &pb.RequestCtx{
SourceIp: sourceIp,
TargetAddr: &common.TcpAddress{
Ip: targetIp,
Port: randomPort(),
},
Authority: "world.greeting:7778",
Method: &common.HttpMethod{Type: &common.HttpMethod_Registered_{Registered: common.HttpMethod_GET}},
Path: "/World/Greeting",
},
Count: count,
Responses: []*pb.ResponseScope{
&pb.ResponseScope{
Ctx: &pb.ResponseCtx{
HttpStatusCode: http.StatusOK,
},
ResponseLatencies: randomLatencies(count),
Ends: randomEos(count),
},
},
},
},
}
_, err = client.Report(context.Background(), req)
if err != nil {
log.Fatal(err.Error())
}
// TCP
req = &pb.ReportRequest{
Process: &pb.Process{
ScheduledInstance: "hello-tcp-1mfa0",
ScheduledNamespace: "people-tcp",
},
ClientTransports: []*pb.ClientTransport{
// TCP
&pb.ClientTransport{
TargetAddr: &common.TcpAddress{
Ip: targetIp,
@ -233,6 +269,7 @@ func main() {
},
},
ServerTransports: []*pb.ServerTransport{
// TCP
&pb.ServerTransport{
SourceIp: sourceIp,
Connects: count,
@ -246,6 +283,56 @@ func main() {
},
},
Proxy: pb.ReportRequest_INBOUND,
Requests: []*pb.RequestScope{
// gRPC
&pb.RequestScope{
Ctx: &pb.RequestCtx{
SourceIp: sourceIp,
TargetAddr: &common.TcpAddress{
Ip: targetIp,
Port: randomPort(),
},
Authority: "world.greeting:7778",
Method: &common.HttpMethod{Type: &common.HttpMethod_Registered_{Registered: common.HttpMethod_GET}},
Path: "/World/GreetingGrpc",
},
Count: count,
Responses: []*pb.ResponseScope{
&pb.ResponseScope{
Ctx: &pb.ResponseCtx{
HttpStatusCode: http.StatusOK,
},
ResponseLatencies: randomLatencies(count),
Ends: randomGrpcEos(count),
},
},
},
// HTTP/2
&pb.RequestScope{
Ctx: &pb.RequestCtx{
SourceIp: sourceIp,
TargetAddr: &common.TcpAddress{
Ip: targetIp,
Port: randomPort(),
},
Authority: "world.greeting:7778",
Method: &common.HttpMethod{Type: &common.HttpMethod_Registered_{Registered: common.HttpMethod_GET}},
Path: "/World/GreetingH2",
},
Count: count,
Responses: []*pb.ResponseScope{
&pb.ResponseScope{
Ctx: &pb.ResponseCtx{
HttpStatusCode: randomHttpResponseCode(),
},
ResponseLatencies: randomLatencies(count),
Ends: randomH2Eos(count),
},
},
},
},
}
_, err = client.Report(context.Background(), req)

View File

@ -4,6 +4,7 @@ import (
"errors"
"fmt"
"net"
"net/http"
"strconv"
"sync"
"time"
@ -397,9 +398,17 @@ func responseLabelsFor(responseScope *write.ResponseScope, eosScope *write.EosSc
classification := "failure"
switch x := eosScope.Ctx.End.(type) {
case *write.EosCtx_GrpcStatusCode:
// The stream ended with a `grpc-status` trailer.
// Classify based on the gRPC status code.
if x.GrpcStatusCode == uint32(codes.OK) {
classification = "success"
}
case *write.EosCtx_Other:
// The stream did not end with a `grpc-status` trailer (i.e., it was
// not a gRPC message). Classify based on the response's HTTP status.
if responseScope.Ctx.HttpStatusCode < http.StatusInternalServerError {
classification = "success"
}
}
return prometheus.Labels{
"http_status_code": httpStatusCode,