Add filtering by job in stat, tap, top; fix panic (#1904)

Filtering by Kubernetes job was not supported. Also filtering by any unknown
type caused a panic.

Add filtering support by Kubernetes job, with special case mapping `job` to
`k8s_job`, to not conflict with Prometheus' job label.

Fix panic when unknown type specified as a `--from` or `--to` flag.

Fix `job` label from `linkerd-proxy` overwriting Prometheus `job` label at
collection time. This caused all metrics collected by proxy sidecars in
Kubernetes jobs to be collected into an incorrect Prometheus job, rather than
the expected `linkerd-proxy` Prometheus job.

Fix `unsupported resource type` tap error message incorrectly printing the
target resource rather than the destination.

Set `--controller-log-level debug` in `install_test.go` for easier debugging.

Expose `slow-cooker`'s metrics via a k8s service in the tap integration test, to
validate proxy requests with a job as destination.

Fixes #1872
Part of #627

Signed-off-by: Andrew Seigner <siggy@buoyant.io>
This commit is contained in:
Andrew Seigner 2018-12-03 15:34:49 -08:00 committed by GitHub
parent 926395f616
commit 37a5455445
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 186 additions and 37 deletions

View File

@ -66,16 +66,17 @@ func newCmdStat() *cobra.Command {
* au/my-authority
* po/mypod1 rc/my-replication-controller
* po mypod1 mypod2
* deploy/ po/
* all
Valid resource types include:
Valid resource types include:
* deployments
* namespaces
* pods
* replicationcontrollers
* authorities (not supported in --from)
* services (only supported if a --from is also specified, or as a --to)
* jobs (only supported as a --from or --to)
* all (all resource types, not supported in --from or --to)
This command will hide resources that have completed, such as pods that are in the Succeeded or Failed phases.

View File

@ -59,12 +59,12 @@ func newCmdTap() *cobra.Command {
* ns/my-ns
Valid resource types include:
* deployments
* namespaces
* pods
* replicationcontrollers
* services (only supported as a "--to" resource)`,
* services (only supported as a --to resource)
* jobs (only supported as a --to resource)`,
Example: ` # tap the web deployment in the default namespace
linkerd tap deploy/web

View File

@ -638,6 +638,9 @@ data:
- source_labels: [__meta_kubernetes_pod_label_linkerd_io_proxy_job]
action: replace
target_label: k8s_job
# drop __meta_kubernetes_pod_label_linkerd_io_proxy_job
- action: labeldrop
regex: __meta_kubernetes_pod_label_linkerd_io_proxy_job
# __meta_kubernetes_pod_label_linkerd_io_proxy_deployment=foo =>
# deployment=foo
- action: labelmap

View File

@ -662,6 +662,9 @@ data:
- source_labels: [__meta_kubernetes_pod_label_linkerd_io_proxy_job]
action: replace
target_label: k8s_job
# drop __meta_kubernetes_pod_label_linkerd_io_proxy_job
- action: labeldrop
regex: __meta_kubernetes_pod_label_linkerd_io_proxy_job
# __meta_kubernetes_pod_label_linkerd_io_proxy_deployment=foo =>
# deployment=foo
- action: labelmap

View File

@ -662,6 +662,9 @@ data:
- source_labels: [__meta_kubernetes_pod_label_linkerd_io_proxy_job]
action: replace
target_label: k8s_job
# drop __meta_kubernetes_pod_label_linkerd_io_proxy_job
- action: labeldrop
regex: __meta_kubernetes_pod_label_linkerd_io_proxy_job
# __meta_kubernetes_pod_label_linkerd_io_proxy_deployment=foo =>
# deployment=foo
- action: labelmap

View File

@ -643,6 +643,9 @@ data:
- source_labels: [__meta_kubernetes_pod_label_linkerd_io_proxy_job]
action: replace
target_label: k8s_job
# drop __meta_kubernetes_pod_label_linkerd_io_proxy_job
- action: labeldrop
regex: __meta_kubernetes_pod_label_linkerd_io_proxy_job
# __meta_kubernetes_pod_label_linkerd_io_proxy_deployment=foo =>
# deployment=foo
- action: labelmap

View File

@ -647,6 +647,9 @@ data:
- source_labels: [__meta_kubernetes_pod_label_linkerd_io_proxy_job]
action: replace
target_label: k8s_job
# drop __meta_kubernetes_pod_label_linkerd_io_proxy_job
- action: labeldrop
regex: __meta_kubernetes_pod_label_linkerd_io_proxy_job
# __meta_kubernetes_pod_label_linkerd_io_proxy_deployment=foo =>
# deployment=foo
- action: labelmap

View File

@ -101,12 +101,12 @@ func newCmdTop() *cobra.Command {
* ns/my-ns
Valid resource types include:
* deployments
* namespaces
* pods
* replicationcontrollers
* services (only supported as a "--to" resource)`,
* services (only supported as a --to resource)
* jobs (only supported as a --to resource)`,
Example: ` # display traffic for the web deployment in the default namespace
linkerd top deploy/web

View File

@ -488,6 +488,9 @@ data:
- source_labels: [__meta_kubernetes_pod_label_linkerd_io_proxy_job]
action: replace
target_label: k8s_job
# drop __meta_kubernetes_pod_label_linkerd_io_proxy_job
- action: labeldrop
regex: __meta_kubernetes_pod_label_linkerd_io_proxy_job
# __meta_kubernetes_pod_label_linkerd_io_proxy_deployment=foo =>
# deployment=foo
- action: labelmap

View File

@ -125,7 +125,8 @@ func promDirectionLabels(direction string) model.LabelSet {
}
func promResourceType(resource *pb.Resource) model.LabelName {
return model.LabelName(resource.Type)
l5dLabel := k8s.KindToL5DLabel(resource.Type)
return model.LabelName(l5dLabel)
}
func (s *grpcServer) getPrometheusMetrics(ctx context.Context, volumeQueryTemplate, latencyQueryTemplate, labels, timeWindow, groupBy string) ([]promResult, error) {

View File

@ -29,18 +29,18 @@ var (
// target resource on an outbound 'to' query
// destination resource on an outbound 'from' query
ValidTargets = []string{
k8s.Authority,
k8s.Deployment,
k8s.Namespace,
k8s.Pod,
k8s.ReplicationController,
k8s.Authority,
}
// ValidDestinations specifies resource types allowed as a destination:
// ValidTapDestinations specifies resource types allowed as a tap destination:
// destination resource on an outbound 'to' query
// target resource on an outbound 'from' query
ValidDestinations = []string{
ValidTapDestinations = []string{
k8s.Deployment,
k8s.Job,
k8s.Namespace,
k8s.Pod,
k8s.ReplicationController,
@ -281,6 +281,10 @@ func validateFromResourceType(resourceType string) (string, error) {
// It's the same as BuildResources but only admits one arg and only returns one resource
func BuildResource(namespace, arg string) (pb.Resource, error) {
res, err := BuildResources(namespace, []string{arg})
if err != nil {
return pb.Resource{}, err
}
return res[0], err
}
@ -386,8 +390,8 @@ func BuildTapByResourceRequest(params TapRequestParams) (*pb.TapByResourceReques
if err != nil {
return nil, fmt.Errorf("destination resource invalid: %s", err)
}
if !contains(ValidDestinations, destination.Type) {
return nil, fmt.Errorf("unsupported resource type [%s]", target.Type)
if !contains(ValidTapDestinations, destination.Type) {
return nil, fmt.Errorf("unsupported resource type [%s]", destination.Type)
}
match := pb.TapByResourceRequest_Match{

View File

@ -16,8 +16,8 @@ import (
func TestGRPCError(t *testing.T) {
t.Run("Maps errors to gRPC errors", func(t *testing.T) {
expectations := map[error]error{
nil: nil,
errors.New("normal erro"): errors.New("rpc error: code = Unknown desc = normal erro"),
nil: nil,
errors.New("normal erro"): errors.New("rpc error: code = Unknown desc = normal erro"),
status.Error(codes.NotFound, "grpc not found"): errors.New("rpc error: code = NotFound desc = grpc not found"),
k8sError.NewNotFound(schema.GroupResource{Group: "foo", Resource: "bar"}, "http not found"): errors.New("rpc error: code = NotFound desc = bar.foo \"http not found\" not found"),
k8sError.NewServiceUnavailable("unavailable"): errors.New("rpc error: code = Unavailable desc = unavailable"),
@ -224,6 +224,95 @@ func TestBuildResource(t *testing.T) {
resource pb.Resource
}
t.Run("Returns expected errors on invalid input", func(t *testing.T) {
msg := "cannot find Kubernetes canonical name from friendly name [invalid]"
expectations := []resourceExp{
resourceExp{
namespace: "",
args: []string{"invalid"},
},
}
for _, exp := range expectations {
_, err := BuildResource(exp.namespace, exp.args[0])
if err == nil {
t.Fatalf("BuildResource called with invalid resources unexpectedly succeeded, should have returned %s", msg)
}
if err.Error() != msg {
t.Fatalf("BuildResource called with invalid resources should have returned: %s but got unexpected message: %s", msg, err)
}
}
})
t.Run("Correctly parses Kubernetes resources from the command line", func(t *testing.T) {
expectations := []resourceExp{
resourceExp{
namespace: "test-ns",
args: []string{"deployments"},
resource: pb.Resource{
Namespace: "test-ns",
Type: k8s.Deployment,
Name: "",
},
},
resourceExp{
namespace: "",
args: []string{"deploy/foo"},
resource: pb.Resource{
Namespace: "",
Type: k8s.Deployment,
Name: "foo",
},
},
resourceExp{
namespace: "foo-ns",
args: []string{"po"},
resource: pb.Resource{
Namespace: "foo-ns",
Type: k8s.Pod,
Name: "",
},
},
resourceExp{
namespace: "foo-ns",
args: []string{"ns"},
resource: pb.Resource{
Namespace: "",
Type: k8s.Namespace,
Name: "",
},
},
resourceExp{
namespace: "foo-ns",
args: []string{"ns/foo-ns2"},
resource: pb.Resource{
Namespace: "",
Type: k8s.Namespace,
Name: "foo-ns2",
},
},
}
for _, exp := range expectations {
res, err := BuildResource(exp.namespace, exp.args[0])
if err != nil {
t.Fatalf("Unexpected error from BuildResource(%+v) => %s", exp, err)
}
if !reflect.DeepEqual(exp.resource, res) {
t.Fatalf("Expected resource to be [%+v] but was [%+v]", exp.resource, res)
}
}
})
}
func TestBuildResources(t *testing.T) {
type resourceExp struct {
namespace string
args []string
resource pb.Resource
}
t.Run("Rejects duped resources", func(t *testing.T) {
msg := "cannot supply duplicate resources"
expectations := []resourceExp{
@ -240,10 +329,10 @@ func TestBuildResource(t *testing.T) {
for _, exp := range expectations {
_, err := BuildResources(exp.namespace, exp.args)
if err == nil {
t.Fatalf("BuildResource called with duped resources unexpectedly succeeded, should have returned %s", msg)
t.Fatalf("BuildResources called with duped resources unexpectedly succeeded, should have returned %s", msg)
}
if err.Error() != msg {
t.Fatalf("BuildResource called with duped resources should have returned: %s but got unexpected message: %s", msg, err)
t.Fatalf("BuildResources called with duped resources should have returned: %s but got unexpected message: %s", msg, err)
}
}
})
@ -268,10 +357,10 @@ func TestBuildResource(t *testing.T) {
for _, exp := range expectations {
_, err := BuildResources(exp.namespace, exp.args)
if err == nil {
t.Fatalf("BuildResource called with 'all' and another resource unexpectedly succeeded, should have returned %s", msg)
t.Fatalf("BuildResources called with 'all' and another resource unexpectedly succeeded, should have returned %s", msg)
}
if err.Error() != msg {
t.Fatalf("BuildResource called with 'all' and another resource should have returned: %s but got unexpected message: %s", msg, err)
t.Fatalf("BuildResources called with 'all' and another resource should have returned: %s but got unexpected message: %s", msg, err)
}
}
})

View File

@ -203,7 +203,8 @@ func makeByResourceMatch(match *public.TapByResourceRequest_Match) (*proxy.Obser
func destinationLabels(resource *public.Resource) map[string]string {
dstLabels := map[string]string{}
if resource.Name != "" {
dstLabels[resource.Type] = resource.Name
l5dLabel := pkgK8s.KindToL5DLabel(resource.Type)
dstLabels[l5dLabel] = resource.Name
}
if resource.Type != pkgK8s.Namespace && resource.Namespace != "" {
dstLabels["namespace"] = resource.Namespace

View File

@ -13,6 +13,7 @@ const (
Authority = "authority"
DaemonSet = "daemonset"
Deployment = "deployment"
Job = "job"
Namespace = "namespace"
Pod = "pod"
ReplicationController = "replicationcontroller"
@ -20,6 +21,9 @@ const (
Service = "service"
ServiceProfile = "serviceprofile"
StatefulSet = "statefulset"
// special case k8s job label, to not conflict with Prometheus' job label
l5dJob = "k8s_job"
)
// AllResources is a sorted list of all resources defined as constants above.
@ -27,6 +31,7 @@ var AllResources = []string{
Authority,
DaemonSet,
Deployment,
Job,
Namespace,
Pod,
ReplicationController,
@ -94,22 +99,28 @@ func GetConfig(fpath, kubeContext string) (*rest.Config, error) {
// This also works for non-k8s resources, e.g. authorities
func CanonicalResourceNameFromFriendlyName(friendlyName string) (string, error) {
switch friendlyName {
case "deploy", "deployment", "deployments":
return Deployment, nil
case "au", "authority", "authorities":
return Authority, nil
case "ds", "daemonset", "daemonsets":
return DaemonSet, nil
case "deploy", "deployment", "deployments":
return Deployment, nil
case "job", "jobs":
return Job, nil
case "ns", "namespace", "namespaces":
return Namespace, nil
case "po", "pod", "pods":
return Pod, nil
case "rc", "replicationcontroller", "replicationcontrollers":
return ReplicationController, nil
case "rs", "replicaset", "replicasets":
return ReplicaSet, nil
case "svc", "service", "services":
return Service, nil
case "sp", "serviceprofile", "serviceprofiles":
return ServiceProfile, nil
case "sts", "statefulset", "statefulsets":
return StatefulSet, nil
case "au", "authority", "authorities":
return Authority, nil
case "all":
return All, nil
}
@ -121,10 +132,14 @@ func CanonicalResourceNameFromFriendlyName(friendlyName string) (string, error)
// Essentially the reverse of CanonicalResourceNameFromFriendlyName
func ShortNameFromCanonicalResourceName(canonicalName string) string {
switch canonicalName {
case Deployment:
return "deploy"
case Authority:
return "au"
case DaemonSet:
return "ds"
case Deployment:
return "deploy"
case Job:
return "job"
case Namespace:
return "ns"
case Pod:
@ -135,11 +150,18 @@ func ShortNameFromCanonicalResourceName(canonicalName string) string {
return "rs"
case Service:
return "svc"
case ServiceProfile:
return "sp"
case StatefulSet:
return "sts"
case Authority:
return "au"
default:
return ""
}
}
func KindToL5DLabel(k8sKind string) string {
if k8sKind == Job {
return l5dJob
}
return k8sKind
}

View File

@ -175,11 +175,9 @@ func CreatedByAnnotationValue() string {
// GetPodLabels returns the set of prometheus owner labels for a given pod
func GetPodLabels(ownerKind, ownerName string, pod *coreV1.Pod) map[string]string {
labels := map[string]string{"pod": pod.Name}
if ownerKind == "job" {
labels["k8s_job"] = ownerName
} else {
labels[ownerKind] = ownerName
}
l5dLabel := KindToL5DLabel(ownerKind)
labels[l5dLabel] = ownerName
if controllerNS := pod.Labels[ControllerNSLabel]; controllerNS != "" {
labels["control_plane_ns"] = controllerNS

View File

@ -66,7 +66,7 @@ func TestCheckPreInstall(t *testing.T) {
}
func TestInstall(t *testing.T) {
cmd := []string{"install", "--linkerd-version", TestHelper.GetVersion()}
cmd := []string{"install", "--controller-log-level", "debug", "--linkerd-version", TestHelper.GetVersion()}
if TestHelper.TLS() {
cmd = append(cmd, []string{"--tls", "optional"}...)
linkerdDeployReplicas["ca"] = 1
@ -188,7 +188,7 @@ func TestInject(t *testing.T) {
t.Fatalf("kubectl apply command failed\n%s", out)
}
for _, deploy := range []string{"smoke-test-terminus","smoke-test-gateway"} {
for _, deploy := range []string{"smoke-test-terminus", "smoke-test-gateway"} {
err = TestHelper.CheckPods(prefixedNs, deploy, 1)
if err != nil {
t.Fatalf("Unexpected error: %v", err)

View File

@ -167,7 +167,8 @@ metadata:
spec:
template:
metadata:
name: slow-cooker
labels:
app: slow-cooker
spec:
containers:
- name: slow-cooker
@ -178,5 +179,19 @@ spec:
- "-c"
- |
sleep 15 # wait for pods to start
slow_cooker http://gateway-svc:8080
slow_cooker -metric-addr 0.0.0.0:9999 http://gateway-svc:8080
ports:
- containerPort: 9999
restartPolicy: OnFailure
---
apiVersion: v1
kind: Service
metadata:
name: slow-cooker
spec:
selector:
app: slow-cooker
ports:
- name: metrics
port: 9999
targetPort: 9999