mirror of https://github.com/linkerd/linkerd2.git
Classify non-gRPC status codes for HTTP telemetry (#200)
Currently, all "success"/"failure" classifications in the telemetry API are made based on the `grpc-status` trailer. If the trailer is not present, then a request is assumed to have failed. As we start proxying non-gRPC traffic, the controller needs to also be aware of HTTP status codes, so that non-gRPC requests are not assumed to always fail. I've modified the telemetry API server to classify requests based on their HTTP status codes when the `grpc-status` trailer is not present. I've also modified the `simulate-proxy` script to generate fake HTTP/2 traffic without the `grpc-status` trailer. Closes #196 Signed-off-by: Eliza Weisman <eliza@buoyant.io>
This commit is contained in:
parent
54aef56e25
commit
9e49054963
|
@ -24,12 +24,74 @@ import (
|
||||||
/* A simple script for posting simulated telemetry data to the proxy api */
|
/* A simple script for posting simulated telemetry data to the proxy api */
|
||||||
|
|
||||||
var (
|
var (
|
||||||
responseCodes = []codes.Code{
|
grpcResponseCodes = []codes.Code{
|
||||||
codes.OK,
|
codes.OK,
|
||||||
codes.PermissionDenied,
|
codes.PermissionDenied,
|
||||||
codes.Unavailable,
|
codes.Unavailable,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
httpResponseCodes = []int{
|
||||||
|
http.StatusContinue,
|
||||||
|
http.StatusSwitchingProtocols,
|
||||||
|
http.StatusProcessing,
|
||||||
|
http.StatusOK,
|
||||||
|
http.StatusCreated,
|
||||||
|
http.StatusAccepted,
|
||||||
|
http.StatusNonAuthoritativeInfo,
|
||||||
|
http.StatusNoContent,
|
||||||
|
http.StatusResetContent,
|
||||||
|
http.StatusPartialContent,
|
||||||
|
http.StatusMultiStatus,
|
||||||
|
http.StatusAlreadyReported,
|
||||||
|
http.StatusIMUsed,
|
||||||
|
http.StatusMultipleChoices,
|
||||||
|
http.StatusMovedPermanently,
|
||||||
|
http.StatusFound,
|
||||||
|
http.StatusSeeOther,
|
||||||
|
http.StatusNotModified,
|
||||||
|
http.StatusUseProxy,
|
||||||
|
http.StatusTemporaryRedirect,
|
||||||
|
http.StatusPermanentRedirect,
|
||||||
|
http.StatusBadRequest,
|
||||||
|
http.StatusUnauthorized,
|
||||||
|
http.StatusPaymentRequired,
|
||||||
|
http.StatusForbidden,
|
||||||
|
http.StatusNotFound,
|
||||||
|
http.StatusMethodNotAllowed,
|
||||||
|
http.StatusNotAcceptable,
|
||||||
|
http.StatusProxyAuthRequired,
|
||||||
|
http.StatusRequestTimeout,
|
||||||
|
http.StatusConflict,
|
||||||
|
http.StatusGone,
|
||||||
|
http.StatusLengthRequired,
|
||||||
|
http.StatusPreconditionFailed,
|
||||||
|
http.StatusRequestEntityTooLarge,
|
||||||
|
http.StatusRequestURITooLong,
|
||||||
|
http.StatusUnsupportedMediaType,
|
||||||
|
http.StatusRequestedRangeNotSatisfiable,
|
||||||
|
http.StatusExpectationFailed,
|
||||||
|
http.StatusTeapot,
|
||||||
|
http.StatusUnprocessableEntity,
|
||||||
|
http.StatusLocked,
|
||||||
|
http.StatusFailedDependency,
|
||||||
|
http.StatusUpgradeRequired,
|
||||||
|
http.StatusPreconditionRequired,
|
||||||
|
http.StatusTooManyRequests,
|
||||||
|
http.StatusRequestHeaderFieldsTooLarge,
|
||||||
|
http.StatusUnavailableForLegalReasons,
|
||||||
|
http.StatusInternalServerError,
|
||||||
|
http.StatusNotImplemented,
|
||||||
|
http.StatusBadGateway,
|
||||||
|
http.StatusServiceUnavailable,
|
||||||
|
http.StatusGatewayTimeout,
|
||||||
|
http.StatusHTTPVersionNotSupported,
|
||||||
|
http.StatusVariantAlsoNegotiates,
|
||||||
|
http.StatusInsufficientStorage,
|
||||||
|
http.StatusLoopDetected,
|
||||||
|
http.StatusNotExtended,
|
||||||
|
http.StatusNetworkAuthenticationRequired,
|
||||||
|
}
|
||||||
|
|
||||||
streamSummary = &pb.StreamSummary{
|
streamSummary = &pb.StreamSummary{
|
||||||
BytesSent: 12345,
|
BytesSent: 12345,
|
||||||
DurationMs: 10,
|
DurationMs: 10,
|
||||||
|
@ -60,12 +122,12 @@ func randomLatencies(count uint32) (latencies []*pb.Latency) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
func randomEos(count uint32) (eos []*pb.EosScope) {
|
func randomGrpcEos(count uint32) (eos []*pb.EosScope) {
|
||||||
responseCodes := make(map[uint32]uint32)
|
grpcResponseCodes := make(map[uint32]uint32)
|
||||||
for i := uint32(0); i < count; i++ {
|
for i := uint32(0); i < count; i++ {
|
||||||
responseCodes[randomResponseCode()] += 1
|
grpcResponseCodes[randomGrpcResponseCode()] += 1
|
||||||
}
|
}
|
||||||
for code, streamCount := range responseCodes {
|
for code, streamCount := range grpcResponseCodes {
|
||||||
eos = append(eos, &pb.EosScope{
|
eos = append(eos, &pb.EosScope{
|
||||||
Ctx: &pb.EosCtx{End: &pb.EosCtx_GrpcStatusCode{GrpcStatusCode: code}},
|
Ctx: &pb.EosCtx{End: &pb.EosCtx_GrpcStatusCode{GrpcStatusCode: code}},
|
||||||
Streams: streamSummaries(streamCount),
|
Streams: streamSummaries(streamCount),
|
||||||
|
@ -74,8 +136,22 @@ func randomEos(count uint32) (eos []*pb.EosScope) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
func randomResponseCode() uint32 {
|
func randomH2Eos(count uint32) (eos []*pb.EosScope) {
|
||||||
return uint32(responseCodes[rand.Intn(len(responseCodes))])
|
for i := uint32(0); i < count; i++ {
|
||||||
|
eos = append(eos, &pb.EosScope{
|
||||||
|
Ctx: &pb.EosCtx{End: &pb.EosCtx_Other{Other: true}},
|
||||||
|
Streams: streamSummaries(i),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func randomGrpcResponseCode() uint32 {
|
||||||
|
return uint32(grpcResponseCodes[rand.Intn(len(grpcResponseCodes))])
|
||||||
|
}
|
||||||
|
|
||||||
|
func randomHttpResponseCode() uint32 {
|
||||||
|
return uint32(httpResponseCodes[rand.Intn(len(httpResponseCodes))])
|
||||||
}
|
}
|
||||||
|
|
||||||
func streamSummaries(count uint32) (summaries []*pb.StreamSummary) {
|
func streamSummaries(count uint32) (summaries []*pb.StreamSummary) {
|
||||||
|
@ -170,53 +246,13 @@ func main() {
|
||||||
sourceIp := randomPod(allPods, nil)
|
sourceIp := randomPod(allPods, nil)
|
||||||
targetIp := randomPod(allPods, sourceIp)
|
targetIp := randomPod(allPods, sourceIp)
|
||||||
|
|
||||||
// HTTP
|
|
||||||
req := &pb.ReportRequest{
|
req := &pb.ReportRequest{
|
||||||
Process: &pb.Process{
|
Process: &pb.Process{
|
||||||
ScheduledInstance: "hello-1mfa0",
|
ScheduledInstance: "hello-1mfa0",
|
||||||
ScheduledNamespace: "people",
|
ScheduledNamespace: "people",
|
||||||
},
|
},
|
||||||
ClientTransports: []*pb.ClientTransport{},
|
|
||||||
ServerTransports: []*pb.ServerTransport{},
|
|
||||||
Proxy: pb.ReportRequest_INBOUND,
|
|
||||||
Requests: []*pb.RequestScope{
|
|
||||||
&pb.RequestScope{
|
|
||||||
Ctx: &pb.RequestCtx{
|
|
||||||
SourceIp: sourceIp,
|
|
||||||
TargetAddr: &common.TcpAddress{
|
|
||||||
Ip: targetIp,
|
|
||||||
Port: randomPort(),
|
|
||||||
},
|
|
||||||
Authority: "world.greeting:7778",
|
|
||||||
Method: &common.HttpMethod{Type: &common.HttpMethod_Registered_{Registered: common.HttpMethod_GET}},
|
|
||||||
Path: "/World/Greeting",
|
|
||||||
},
|
|
||||||
Count: count,
|
|
||||||
Responses: []*pb.ResponseScope{
|
|
||||||
&pb.ResponseScope{
|
|
||||||
Ctx: &pb.ResponseCtx{
|
|
||||||
HttpStatusCode: http.StatusOK,
|
|
||||||
},
|
|
||||||
ResponseLatencies: randomLatencies(count),
|
|
||||||
Ends: randomEos(count),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
_, err = client.Report(context.Background(), req)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatal(err.Error())
|
|
||||||
}
|
|
||||||
|
|
||||||
// TCP
|
|
||||||
req = &pb.ReportRequest{
|
|
||||||
Process: &pb.Process{
|
|
||||||
ScheduledInstance: "hello-tcp-1mfa0",
|
|
||||||
ScheduledNamespace: "people-tcp",
|
|
||||||
},
|
|
||||||
ClientTransports: []*pb.ClientTransport{
|
ClientTransports: []*pb.ClientTransport{
|
||||||
|
// TCP
|
||||||
&pb.ClientTransport{
|
&pb.ClientTransport{
|
||||||
TargetAddr: &common.TcpAddress{
|
TargetAddr: &common.TcpAddress{
|
||||||
Ip: targetIp,
|
Ip: targetIp,
|
||||||
|
@ -233,6 +269,7 @@ func main() {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
ServerTransports: []*pb.ServerTransport{
|
ServerTransports: []*pb.ServerTransport{
|
||||||
|
// TCP
|
||||||
&pb.ServerTransport{
|
&pb.ServerTransport{
|
||||||
SourceIp: sourceIp,
|
SourceIp: sourceIp,
|
||||||
Connects: count,
|
Connects: count,
|
||||||
|
@ -246,6 +283,56 @@ func main() {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
Proxy: pb.ReportRequest_INBOUND,
|
Proxy: pb.ReportRequest_INBOUND,
|
||||||
|
Requests: []*pb.RequestScope{
|
||||||
|
|
||||||
|
// gRPC
|
||||||
|
&pb.RequestScope{
|
||||||
|
Ctx: &pb.RequestCtx{
|
||||||
|
SourceIp: sourceIp,
|
||||||
|
TargetAddr: &common.TcpAddress{
|
||||||
|
Ip: targetIp,
|
||||||
|
Port: randomPort(),
|
||||||
|
},
|
||||||
|
Authority: "world.greeting:7778",
|
||||||
|
Method: &common.HttpMethod{Type: &common.HttpMethod_Registered_{Registered: common.HttpMethod_GET}},
|
||||||
|
Path: "/World/GreetingGrpc",
|
||||||
|
},
|
||||||
|
Count: count,
|
||||||
|
Responses: []*pb.ResponseScope{
|
||||||
|
&pb.ResponseScope{
|
||||||
|
Ctx: &pb.ResponseCtx{
|
||||||
|
HttpStatusCode: http.StatusOK,
|
||||||
|
},
|
||||||
|
ResponseLatencies: randomLatencies(count),
|
||||||
|
Ends: randomGrpcEos(count),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
|
||||||
|
// HTTP/2
|
||||||
|
&pb.RequestScope{
|
||||||
|
Ctx: &pb.RequestCtx{
|
||||||
|
SourceIp: sourceIp,
|
||||||
|
TargetAddr: &common.TcpAddress{
|
||||||
|
Ip: targetIp,
|
||||||
|
Port: randomPort(),
|
||||||
|
},
|
||||||
|
Authority: "world.greeting:7778",
|
||||||
|
Method: &common.HttpMethod{Type: &common.HttpMethod_Registered_{Registered: common.HttpMethod_GET}},
|
||||||
|
Path: "/World/GreetingH2",
|
||||||
|
},
|
||||||
|
Count: count,
|
||||||
|
Responses: []*pb.ResponseScope{
|
||||||
|
&pb.ResponseScope{
|
||||||
|
Ctx: &pb.ResponseCtx{
|
||||||
|
HttpStatusCode: randomHttpResponseCode(),
|
||||||
|
},
|
||||||
|
ResponseLatencies: randomLatencies(count),
|
||||||
|
Ends: randomH2Eos(count),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
_, err = client.Report(context.Background(), req)
|
_, err = client.Report(context.Background(), req)
|
||||||
|
|
|
@ -4,6 +4,7 @@ import (
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"net"
|
"net"
|
||||||
|
"net/http"
|
||||||
"strconv"
|
"strconv"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
@ -397,9 +398,17 @@ func responseLabelsFor(responseScope *write.ResponseScope, eosScope *write.EosSc
|
||||||
classification := "failure"
|
classification := "failure"
|
||||||
switch x := eosScope.Ctx.End.(type) {
|
switch x := eosScope.Ctx.End.(type) {
|
||||||
case *write.EosCtx_GrpcStatusCode:
|
case *write.EosCtx_GrpcStatusCode:
|
||||||
|
// The stream ended with a `grpc-status` trailer.
|
||||||
|
// Classify based on the gRPC status code.
|
||||||
if x.GrpcStatusCode == uint32(codes.OK) {
|
if x.GrpcStatusCode == uint32(codes.OK) {
|
||||||
classification = "success"
|
classification = "success"
|
||||||
}
|
}
|
||||||
|
case *write.EosCtx_Other:
|
||||||
|
// The stream did not end with a `grpc-status` trailer (i.e., it was
|
||||||
|
// not a gRPC message). Classify based on the response's HTTP status.
|
||||||
|
if responseScope.Ctx.HttpStatusCode < http.StatusInternalServerError {
|
||||||
|
classification = "success"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return prometheus.Labels{
|
return prometheus.Labels{
|
||||||
"http_status_code": httpStatusCode,
|
"http_status_code": httpStatusCode,
|
||||||
|
|
Loading…
Reference in New Issue