mirror of https://github.com/linkerd/linkerd2.git
Classify non-gRPC status codes for HTTP telemetry (#200)
Currently, all "success"/"failure" classifications in the telemetry API are made based on the `grpc-status` trailer. If the trailer is not present, then a request is assumed to have failed. As we start proxying non-gRPC traffic, the controller needs to also be aware of HTTP status codes, so that non-gRPC requests are not assumed to always fail. I've modified the telemetry API server to classify requests based on their HTTP status codes when the `grpc-status` trailer is not present. I've also modified the `simulate-proxy` script to generate fake HTTP/2 traffic without the `grpc-status` trailer. Closes #196 Signed-off-by: Eliza Weisman <eliza@buoyant.io>
This commit is contained in:
parent
54aef56e25
commit
9e49054963
|
@ -24,12 +24,74 @@ import (
|
|||
/* A simple script for posting simulated telemetry data to the proxy api */
|
||||
|
||||
var (
|
||||
responseCodes = []codes.Code{
|
||||
grpcResponseCodes = []codes.Code{
|
||||
codes.OK,
|
||||
codes.PermissionDenied,
|
||||
codes.Unavailable,
|
||||
}
|
||||
|
||||
httpResponseCodes = []int{
|
||||
http.StatusContinue,
|
||||
http.StatusSwitchingProtocols,
|
||||
http.StatusProcessing,
|
||||
http.StatusOK,
|
||||
http.StatusCreated,
|
||||
http.StatusAccepted,
|
||||
http.StatusNonAuthoritativeInfo,
|
||||
http.StatusNoContent,
|
||||
http.StatusResetContent,
|
||||
http.StatusPartialContent,
|
||||
http.StatusMultiStatus,
|
||||
http.StatusAlreadyReported,
|
||||
http.StatusIMUsed,
|
||||
http.StatusMultipleChoices,
|
||||
http.StatusMovedPermanently,
|
||||
http.StatusFound,
|
||||
http.StatusSeeOther,
|
||||
http.StatusNotModified,
|
||||
http.StatusUseProxy,
|
||||
http.StatusTemporaryRedirect,
|
||||
http.StatusPermanentRedirect,
|
||||
http.StatusBadRequest,
|
||||
http.StatusUnauthorized,
|
||||
http.StatusPaymentRequired,
|
||||
http.StatusForbidden,
|
||||
http.StatusNotFound,
|
||||
http.StatusMethodNotAllowed,
|
||||
http.StatusNotAcceptable,
|
||||
http.StatusProxyAuthRequired,
|
||||
http.StatusRequestTimeout,
|
||||
http.StatusConflict,
|
||||
http.StatusGone,
|
||||
http.StatusLengthRequired,
|
||||
http.StatusPreconditionFailed,
|
||||
http.StatusRequestEntityTooLarge,
|
||||
http.StatusRequestURITooLong,
|
||||
http.StatusUnsupportedMediaType,
|
||||
http.StatusRequestedRangeNotSatisfiable,
|
||||
http.StatusExpectationFailed,
|
||||
http.StatusTeapot,
|
||||
http.StatusUnprocessableEntity,
|
||||
http.StatusLocked,
|
||||
http.StatusFailedDependency,
|
||||
http.StatusUpgradeRequired,
|
||||
http.StatusPreconditionRequired,
|
||||
http.StatusTooManyRequests,
|
||||
http.StatusRequestHeaderFieldsTooLarge,
|
||||
http.StatusUnavailableForLegalReasons,
|
||||
http.StatusInternalServerError,
|
||||
http.StatusNotImplemented,
|
||||
http.StatusBadGateway,
|
||||
http.StatusServiceUnavailable,
|
||||
http.StatusGatewayTimeout,
|
||||
http.StatusHTTPVersionNotSupported,
|
||||
http.StatusVariantAlsoNegotiates,
|
||||
http.StatusInsufficientStorage,
|
||||
http.StatusLoopDetected,
|
||||
http.StatusNotExtended,
|
||||
http.StatusNetworkAuthenticationRequired,
|
||||
}
|
||||
|
||||
streamSummary = &pb.StreamSummary{
|
||||
BytesSent: 12345,
|
||||
DurationMs: 10,
|
||||
|
@ -60,12 +122,12 @@ func randomLatencies(count uint32) (latencies []*pb.Latency) {
|
|||
return
|
||||
}
|
||||
|
||||
func randomEos(count uint32) (eos []*pb.EosScope) {
|
||||
responseCodes := make(map[uint32]uint32)
|
||||
func randomGrpcEos(count uint32) (eos []*pb.EosScope) {
|
||||
grpcResponseCodes := make(map[uint32]uint32)
|
||||
for i := uint32(0); i < count; i++ {
|
||||
responseCodes[randomResponseCode()] += 1
|
||||
grpcResponseCodes[randomGrpcResponseCode()] += 1
|
||||
}
|
||||
for code, streamCount := range responseCodes {
|
||||
for code, streamCount := range grpcResponseCodes {
|
||||
eos = append(eos, &pb.EosScope{
|
||||
Ctx: &pb.EosCtx{End: &pb.EosCtx_GrpcStatusCode{GrpcStatusCode: code}},
|
||||
Streams: streamSummaries(streamCount),
|
||||
|
@ -74,8 +136,22 @@ func randomEos(count uint32) (eos []*pb.EosScope) {
|
|||
return
|
||||
}
|
||||
|
||||
func randomResponseCode() uint32 {
|
||||
return uint32(responseCodes[rand.Intn(len(responseCodes))])
|
||||
func randomH2Eos(count uint32) (eos []*pb.EosScope) {
|
||||
for i := uint32(0); i < count; i++ {
|
||||
eos = append(eos, &pb.EosScope{
|
||||
Ctx: &pb.EosCtx{End: &pb.EosCtx_Other{Other: true}},
|
||||
Streams: streamSummaries(i),
|
||||
})
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func randomGrpcResponseCode() uint32 {
|
||||
return uint32(grpcResponseCodes[rand.Intn(len(grpcResponseCodes))])
|
||||
}
|
||||
|
||||
func randomHttpResponseCode() uint32 {
|
||||
return uint32(httpResponseCodes[rand.Intn(len(httpResponseCodes))])
|
||||
}
|
||||
|
||||
func streamSummaries(count uint32) (summaries []*pb.StreamSummary) {
|
||||
|
@ -170,53 +246,13 @@ func main() {
|
|||
sourceIp := randomPod(allPods, nil)
|
||||
targetIp := randomPod(allPods, sourceIp)
|
||||
|
||||
// HTTP
|
||||
req := &pb.ReportRequest{
|
||||
Process: &pb.Process{
|
||||
ScheduledInstance: "hello-1mfa0",
|
||||
ScheduledNamespace: "people",
|
||||
},
|
||||
ClientTransports: []*pb.ClientTransport{},
|
||||
ServerTransports: []*pb.ServerTransport{},
|
||||
Proxy: pb.ReportRequest_INBOUND,
|
||||
Requests: []*pb.RequestScope{
|
||||
&pb.RequestScope{
|
||||
Ctx: &pb.RequestCtx{
|
||||
SourceIp: sourceIp,
|
||||
TargetAddr: &common.TcpAddress{
|
||||
Ip: targetIp,
|
||||
Port: randomPort(),
|
||||
},
|
||||
Authority: "world.greeting:7778",
|
||||
Method: &common.HttpMethod{Type: &common.HttpMethod_Registered_{Registered: common.HttpMethod_GET}},
|
||||
Path: "/World/Greeting",
|
||||
},
|
||||
Count: count,
|
||||
Responses: []*pb.ResponseScope{
|
||||
&pb.ResponseScope{
|
||||
Ctx: &pb.ResponseCtx{
|
||||
HttpStatusCode: http.StatusOK,
|
||||
},
|
||||
ResponseLatencies: randomLatencies(count),
|
||||
Ends: randomEos(count),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
_, err = client.Report(context.Background(), req)
|
||||
if err != nil {
|
||||
log.Fatal(err.Error())
|
||||
}
|
||||
|
||||
// TCP
|
||||
req = &pb.ReportRequest{
|
||||
Process: &pb.Process{
|
||||
ScheduledInstance: "hello-tcp-1mfa0",
|
||||
ScheduledNamespace: "people-tcp",
|
||||
},
|
||||
ClientTransports: []*pb.ClientTransport{
|
||||
// TCP
|
||||
&pb.ClientTransport{
|
||||
TargetAddr: &common.TcpAddress{
|
||||
Ip: targetIp,
|
||||
|
@ -233,6 +269,7 @@ func main() {
|
|||
},
|
||||
},
|
||||
ServerTransports: []*pb.ServerTransport{
|
||||
// TCP
|
||||
&pb.ServerTransport{
|
||||
SourceIp: sourceIp,
|
||||
Connects: count,
|
||||
|
@ -246,6 +283,56 @@ func main() {
|
|||
},
|
||||
},
|
||||
Proxy: pb.ReportRequest_INBOUND,
|
||||
Requests: []*pb.RequestScope{
|
||||
|
||||
// gRPC
|
||||
&pb.RequestScope{
|
||||
Ctx: &pb.RequestCtx{
|
||||
SourceIp: sourceIp,
|
||||
TargetAddr: &common.TcpAddress{
|
||||
Ip: targetIp,
|
||||
Port: randomPort(),
|
||||
},
|
||||
Authority: "world.greeting:7778",
|
||||
Method: &common.HttpMethod{Type: &common.HttpMethod_Registered_{Registered: common.HttpMethod_GET}},
|
||||
Path: "/World/GreetingGrpc",
|
||||
},
|
||||
Count: count,
|
||||
Responses: []*pb.ResponseScope{
|
||||
&pb.ResponseScope{
|
||||
Ctx: &pb.ResponseCtx{
|
||||
HttpStatusCode: http.StatusOK,
|
||||
},
|
||||
ResponseLatencies: randomLatencies(count),
|
||||
Ends: randomGrpcEos(count),
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
// HTTP/2
|
||||
&pb.RequestScope{
|
||||
Ctx: &pb.RequestCtx{
|
||||
SourceIp: sourceIp,
|
||||
TargetAddr: &common.TcpAddress{
|
||||
Ip: targetIp,
|
||||
Port: randomPort(),
|
||||
},
|
||||
Authority: "world.greeting:7778",
|
||||
Method: &common.HttpMethod{Type: &common.HttpMethod_Registered_{Registered: common.HttpMethod_GET}},
|
||||
Path: "/World/GreetingH2",
|
||||
},
|
||||
Count: count,
|
||||
Responses: []*pb.ResponseScope{
|
||||
&pb.ResponseScope{
|
||||
Ctx: &pb.ResponseCtx{
|
||||
HttpStatusCode: randomHttpResponseCode(),
|
||||
},
|
||||
ResponseLatencies: randomLatencies(count),
|
||||
Ends: randomH2Eos(count),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
_, err = client.Report(context.Background(), req)
|
||||
|
|
|
@ -4,6 +4,7 @@ import (
|
|||
"errors"
|
||||
"fmt"
|
||||
"net"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"sync"
|
||||
"time"
|
||||
|
@ -397,9 +398,17 @@ func responseLabelsFor(responseScope *write.ResponseScope, eosScope *write.EosSc
|
|||
classification := "failure"
|
||||
switch x := eosScope.Ctx.End.(type) {
|
||||
case *write.EosCtx_GrpcStatusCode:
|
||||
// The stream ended with a `grpc-status` trailer.
|
||||
// Classify based on the gRPC status code.
|
||||
if x.GrpcStatusCode == uint32(codes.OK) {
|
||||
classification = "success"
|
||||
}
|
||||
case *write.EosCtx_Other:
|
||||
// The stream did not end with a `grpc-status` trailer (i.e., it was
|
||||
// not a gRPC message). Classify based on the response's HTTP status.
|
||||
if responseScope.Ctx.HttpStatusCode < http.StatusInternalServerError {
|
||||
classification = "success"
|
||||
}
|
||||
}
|
||||
return prometheus.Labels{
|
||||
"http_status_code": httpStatusCode,
|
||||
|
|
Loading…
Reference in New Issue