Merge pull request #13 from brancz/pod-metrics

add more pod metrics
This commit is contained in:
Fabian Reinartz 2016-09-12 18:03:04 +02:00 committed by GitHub
commit 1703dbe0fa
4 changed files with 374 additions and 30 deletions

View File

@ -22,7 +22,9 @@ There are many more metrics we could report, but this first pass is focused on
those that could be used for actionable alerts. Please contribute PR's for
additional metrics!
### WARNING: THESE METRIC/TAG NAMES ARE UNSTABLE AND MAY CHANGE IN A FUTURE RELEASE.
> WARNING: THESE METRIC/TAG NAMES ARE UNSTABLE AND MAY CHANGE IN A FUTURE RELEASE.
### Node Metrics
| Metric name| Metric type | Labels/tags |
| ---------- | ----------- | ----------- |
@ -36,6 +38,11 @@ additional metrics!
| node_status_allocateable_cpu_cores | Gauge | `node`=<node-address>|
| node_status_allocateable_memory_bytes | Gauge | `node`=<node-address>|
| node_status_allocateable_pods | Gauge | `node`=<node-address>|
### Deployment Metrics
| Metric name| Metric type | Labels/tags |
| ---------- | ----------- | ----------- |
| deployment_replicas | Gauge | `deployment`=&lt;deployment-name&gt; <br> `namespace`=&lt;deployment-namespace&gt; |
| deployment_replicas_available | Gauge | `deployment`=&lt;deployment-name&gt; <br> `namespace`=&lt;deployment-namespace&gt; |
| deployment_replicas_unavailable | Gauge | `deployment`=&lt;deployment-name&gt; <br> `namespace`=&lt;deployment-namespace&gt; |
@ -43,7 +50,20 @@ additional metrics!
| deployment_replicas_observed_generation | Gauge | `deployment`=&lt;deployment-name&gt; <br> `namespace`=&lt;deployment-namespace&gt; |
| deployment_spec_replicas | Gauge | `deployment`=&lt;deployment-name&gt; <br> `namespace`=&lt;deployment-namespace&gt; |
| deployment_spec_paused | Gauge | `deployment`=&lt;deployment-name&gt; <br> `namespace`=&lt;deployment-namespace&gt; |
| pod_container_restarts | Counter | `container`=&lt;container-name&gt; <br> `namespace`=&lt;pod-namespace&gt; <br> `pod`=&lt;pod-name&gt; |
### Pod Metrics
| Metric name| Metric type | Labels/tags |
| ---------- | ----------- | ----------- |
| pod_info | Gauge | `pod`=&lt;pod-name&gt; <br> `namespace`=&lt;pod-namespace&gt; <br> `host_ip`=&lt;host-ip&gt; <br> `pod_ip`=&lt;pod-ip&gt; <br> `start_time`=&lt;date-time since kubelet acknowledged pod&gt; |
| pod_status_phase | Gauge | `pod`=&lt;pod-name&gt; <br> `namespace`=&lt;pod-namespace&gt; <br> `phase`=&lt;Pending|Running\|Succeeded\|Failed\|Unknown&gt; |
| pod_status_ready | Gauge | `pod`=&lt;pod-name&gt; <br> `namespace`=&lt;pod-namespace&gt; <br> `condition`=&lt;true\|false\|unknown&gt; |
| pod_container_info | Gauge | `container`=&lt;container-name&gt; <br> `pod`=&lt;pod-name&gt; <br> `namespace`=&lt;pod-namespace&gt; <br> `image`=&lt;image-name&gt; <br> `image_id`=&lt;image-id&gt; <br> `container_id`=&lt;containerid&gt; |
| pod_container_status_waiting | Gauge | `container`=&lt;container-name&gt; <br> `pod`=&lt;pod-name&gt; <br> `namespace`=&lt;pod-namespace&gt; |
| pod_container_status_running | Gauge | `container`=&lt;container-name&gt; <br> `pod`=&lt;pod-name&gt; <br> `namespace`=&lt;pod-namespace&gt; |
| pod_container_status_terminated | Gauge | `container`=&lt;container-name&gt; <br> `pod`=&lt;pod-name&gt; <br> `namespace`=&lt;pod-namespace&gt; |
| pod_container_status_ready | Gauge | `container`=&lt;container-name&gt; <br> `pod`=&lt;pod-name&gt; <br> `namespace`=&lt;pod-namespace&gt; |
| pod_container_status_restarts | Counter | `container`=&lt;container-name&gt; <br> `namespace`=&lt;pod-namespace&gt; <br> `pod`=&lt;pod-name&gt; |
## kube-state-metrics vs. Heapster

16
node.go
View File

@ -140,9 +140,9 @@ func (nc *nodeCollector) collectNode(ch chan<- prometheus.Metric, n api.Node) {
for _, c := range n.Status.Conditions {
switch c.Type {
case api.NodeReady:
nodeConditionMetrics(ch, descNodeStatusReady, n.Name, c.Status)
addConditionMetrics(ch, descNodeStatusReady, c.Status, n.Name)
case api.NodeOutOfDisk:
nodeConditionMetrics(ch, descNodeStatusOutOfDisk, n.Name, c.Status)
addConditionMetrics(ch, descNodeStatusOutOfDisk, c.Status, n.Name)
}
}
@ -168,19 +168,21 @@ func (nc *nodeCollector) collectNode(ch chan<- prometheus.Metric, n api.Node) {
addResource(descNodeStatusAllocateablePods, n.Status.Allocatable, api.ResourcePods)
}
// nodeConditionMetrics generates one metric for each possible node condition status.
func nodeConditionMetrics(ch chan<- prometheus.Metric, desc *prometheus.Desc, name string, cs api.ConditionStatus) {
// addConditionMetrics generates one metric for each possible node condition
// status. For this function to work properly, the last label in the metric
// description must be the condition.
func addConditionMetrics(ch chan<- prometheus.Metric, desc *prometheus.Desc, cs api.ConditionStatus, lv ...string) {
ch <- prometheus.MustNewConstMetric(
desc, prometheus.GaugeValue, boolFloat64(cs == api.ConditionTrue),
name, "true",
append(lv, "true")...,
)
ch <- prometheus.MustNewConstMetric(
desc, prometheus.GaugeValue, boolFloat64(cs == api.ConditionFalse),
name, "false",
append(lv, "false")...,
)
ch <- prometheus.MustNewConstMetric(
desc, prometheus.GaugeValue, boolFloat64(cs == api.ConditionUnknown),
name, "unknown",
append(lv, "unknown")...,
)
}

95
pod.go
View File

@ -24,8 +24,48 @@ import (
)
var (
descContainerRestarts = prometheus.NewDesc(
"pod_container_restarts",
descPodInfo = prometheus.NewDesc(
"pod_info",
"Information about pod.",
[]string{"namespace", "pod", "host_ip", "pod_ip"}, nil,
)
descPodStatusPhase = prometheus.NewDesc(
"pod_status_phase",
"The pods current phase.",
[]string{"namespace", "pod", "phase"}, nil,
)
descPodStatusReady = prometheus.NewDesc(
"pod_status_ready",
"Describes whether the pod is ready to serve requests.",
[]string{"namespace", "pod", "condition"}, nil,
)
descPodContainerInfo = prometheus.NewDesc(
"pod_container_info",
"Information about a container in a pod.",
[]string{"namespace", "pod", "container", "image", "image_id", "container_id"}, nil,
)
descPodContainerStatusWaiting = prometheus.NewDesc(
"pod_container_status_waiting",
"Describes whether the container is currently in waiting state.",
[]string{"namespace", "pod", "container"}, nil,
)
descPodContainerStatusRunning = prometheus.NewDesc(
"pod_container_status_running",
"Describes whether the container is currently in running state.",
[]string{"namespace", "pod", "container"}, nil,
)
descPodContainerStatusTerminated = prometheus.NewDesc(
"pod_container_status_terminated",
"Describes whether the container is currently in terminated state.",
[]string{"namespace", "pod", "container"}, nil,
)
descPodContainerStatusReady = prometheus.NewDesc(
"pod_container_status_ready",
"Describes whether the containers readiness check succeeded.",
[]string{"namespace", "pod", "container"}, nil,
)
descPodContainerStatusRestarts = prometheus.NewDesc(
"pod_container_status_restarts",
"The number of container restarts per container.",
[]string{"namespace", "pod", "container"}, nil,
)
@ -42,7 +82,15 @@ type podCollector struct {
// Describe implements the prometheus.Collector interface.
func (pc *podCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- descContainerRestarts
ch <- descPodInfo
ch <- descPodStatusPhase
ch <- descPodStatusReady
ch <- descPodContainerInfo
ch <- descPodContainerStatusWaiting
ch <- descPodContainerStatusRunning
ch <- descPodContainerStatusTerminated
ch <- descPodContainerStatusReady
ch <- descPodContainerStatusRestarts
}
// Collect implements the prometheus.Collector interface.
@ -53,19 +101,40 @@ func (pc *podCollector) Collect(ch chan<- prometheus.Metric) {
return
}
for _, p := range pods {
for _, m := range pc.collectPod(p) {
ch <- m
pc.collectPod(ch, p)
}
}
func (pc *podCollector) collectPod(ch chan<- prometheus.Metric, p *api.Pod) {
addConstMetric := func(desc *prometheus.Desc, t prometheus.ValueType, v float64, lv ...string) {
lv = append([]string{p.Namespace, p.Name}, lv...)
ch <- prometheus.MustNewConstMetric(desc, t, v, lv...)
}
addGauge := func(desc *prometheus.Desc, v float64, lv ...string) {
addConstMetric(desc, prometheus.GaugeValue, v, lv...)
}
addCounter := func(desc *prometheus.Desc, v float64, lv ...string) {
addConstMetric(desc, prometheus.CounterValue, v, lv...)
}
addGauge(descPodInfo, 1, p.Status.HostIP, p.Status.PodIP)
addGauge(descPodStatusPhase, 1, string(p.Status.Phase))
for _, c := range p.Status.Conditions {
switch c.Type {
case api.PodReady:
addConditionMetrics(ch, descPodStatusReady, c.Status, p.Namespace, p.Name)
}
}
}
func (pc *podCollector) collectPod(p *api.Pod) (res []prometheus.Metric) {
for _, cs := range p.Status.ContainerStatuses {
res = append(res, prometheus.MustNewConstMetric(
descContainerRestarts, prometheus.CounterValue, float64(cs.RestartCount),
p.Namespace, p.Name, cs.Name,
))
addGauge(descPodContainerInfo, 1,
cs.Name, cs.Image, cs.ImageID, cs.ContainerID,
)
addGauge(descPodContainerStatusWaiting, boolFloat64(cs.State.Waiting != nil), cs.Name)
addGauge(descPodContainerStatusRunning, boolFloat64(cs.State.Running != nil), cs.Name)
addGauge(descPodContainerStatusTerminated, boolFloat64(cs.State.Terminated != nil), cs.Name)
addGauge(descPodContainerStatusReady, boolFloat64(cs.Ready), cs.Name)
addCounter(descPodContainerStatusRestarts, float64(cs.RestartCount), cs.Name)
}
return
}

View File

@ -23,6 +23,8 @@ import (
"k8s.io/kubernetes/pkg/labels"
)
var ()
type mockPodStore struct {
f func() ([]*api.Pod, error)
}
@ -35,14 +37,117 @@ func TestPodCollector(t *testing.T) {
// Fixed metadata on type and help text. We prepend this to every expected
// output so we only have to modify a single place when doing adjustments.
const metadata = `
# HELP pod_container_restarts The number of container restarts per container.
# TYPE pod_container_restarts counter
# HELP pod_container_info Information about a container in a pod.
# TYPE pod_container_info gauge
# HELP pod_container_status_ready Describes whether the containers readiness check succeeded.
# TYPE pod_container_status_ready gauge
# HELP pod_container_status_restarts The number of container restarts per container.
# TYPE pod_container_status_restarts counter
# HELP pod_container_status_running Describes whether the container is currently in running state.
# TYPE pod_container_status_running gauge
# HELP pod_container_status_terminated Describes whether the container is currently in terminated state.
# TYPE pod_container_status_terminated gauge
# HELP pod_container_status_waiting Describes whether the container is currently in waiting state.
# TYPE pod_container_status_waiting gauge
# HELP pod_info Information about pod.
# TYPE pod_info gauge
# HELP pod_status_phase The pods current phase.
# TYPE pod_status_phase gauge
# HELP pod_status_ready Describes whether the pod is ready to serve requests.
# TYPE pod_status_ready gauge
`
cases := []struct {
pods []*api.Pod
want string
pods []*api.Pod
metrics []string
want string
}{
{
pods: []*api.Pod{
{
ObjectMeta: api.ObjectMeta{
Name: "pod1",
Namespace: "ns1",
},
Status: api.PodStatus{
ContainerStatuses: []api.ContainerStatus{
api.ContainerStatus{
Name: "container1",
Image: "gcr.io/google_containers/hyperkube1",
ImageID: "docker://sha256:aaa",
ContainerID: "docker://ab123",
},
},
},
}, {
ObjectMeta: api.ObjectMeta{
Name: "pod2",
Namespace: "ns2",
},
Status: api.PodStatus{
ContainerStatuses: []api.ContainerStatus{
api.ContainerStatus{
Name: "container2",
Image: "gcr.io/google_containers/hyperkube2",
ImageID: "docker://sha256:bbb",
ContainerID: "docker://cd456",
},
api.ContainerStatus{
Name: "container3",
Image: "gcr.io/google_containers/hyperkube3",
ImageID: "docker://sha256:ccc",
ContainerID: "docker://ef789",
},
},
},
},
},
want: metadata + `
pod_container_info{container="container1",container_id="docker://ab123",image="gcr.io/google_containers/hyperkube1",image_id="docker://sha256:aaa",namespace="ns1",pod="pod1"} 1
pod_container_info{container="container2",container_id="docker://cd456",image="gcr.io/google_containers/hyperkube2",image_id="docker://sha256:bbb",namespace="ns2",pod="pod2"} 1
pod_container_info{container="container3",container_id="docker://ef789",image="gcr.io/google_containers/hyperkube3",image_id="docker://sha256:ccc",namespace="ns2",pod="pod2"} 1
`,
metrics: []string{"pod_container_info"},
}, {
pods: []*api.Pod{
{
ObjectMeta: api.ObjectMeta{
Name: "pod1",
Namespace: "ns1",
},
Status: api.PodStatus{
ContainerStatuses: []api.ContainerStatus{
api.ContainerStatus{
Name: "container1",
Ready: true,
},
},
},
}, {
ObjectMeta: api.ObjectMeta{
Name: "pod2",
Namespace: "ns2",
},
Status: api.PodStatus{
ContainerStatuses: []api.ContainerStatus{
api.ContainerStatus{
Name: "container2",
Ready: true,
},
api.ContainerStatus{
Name: "container3",
Ready: false,
},
},
},
},
},
want: metadata + `
pod_container_status_ready{container="container1",namespace="ns1",pod="pod1"} 1
pod_container_status_ready{container="container2",namespace="ns2",pod="pod2"} 1
pod_container_status_ready{container="container3",namespace="ns2",pod="pod2"} 0
`,
metrics: []string{"pod_container_status_ready"},
}, {
pods: []*api.Pod{
{
ObjectMeta: api.ObjectMeta{
@ -77,10 +182,158 @@ func TestPodCollector(t *testing.T) {
},
},
want: metadata + `
pod_container_restarts{container="container1",namespace="ns1",pod="pod1"} 0
pod_container_restarts{container="container2",namespace="ns2",pod="pod2"} 0
pod_container_restarts{container="container3",namespace="ns2",pod="pod2"} 1
pod_container_status_restarts{container="container1",namespace="ns1",pod="pod1"} 0
pod_container_status_restarts{container="container2",namespace="ns2",pod="pod2"} 0
pod_container_status_restarts{container="container3",namespace="ns2",pod="pod2"} 1
`,
metrics: []string{"pod_container_status_restarts"},
}, {
pods: []*api.Pod{
{
ObjectMeta: api.ObjectMeta{
Name: "pod1",
Namespace: "ns1",
},
Status: api.PodStatus{
ContainerStatuses: []api.ContainerStatus{
api.ContainerStatus{
Name: "container1",
State: api.ContainerState{
Running: &api.ContainerStateRunning{},
},
},
},
},
}, {
ObjectMeta: api.ObjectMeta{
Name: "pod2",
Namespace: "ns2",
},
Status: api.PodStatus{
ContainerStatuses: []api.ContainerStatus{
api.ContainerStatus{
Name: "container2",
State: api.ContainerState{
Terminated: &api.ContainerStateTerminated{},
},
},
api.ContainerStatus{
Name: "container3",
State: api.ContainerState{
Waiting: &api.ContainerStateWaiting{},
},
},
},
},
},
},
want: metadata + `
pod_container_status_running{container="container1",namespace="ns1",pod="pod1"} 1
pod_container_status_running{container="container2",namespace="ns2",pod="pod2"} 0
pod_container_status_running{container="container3",namespace="ns2",pod="pod2"} 0
pod_container_status_terminated{container="container1",namespace="ns1",pod="pod1"} 0
pod_container_status_terminated{container="container2",namespace="ns2",pod="pod2"} 1
pod_container_status_terminated{container="container3",namespace="ns2",pod="pod2"} 0
pod_container_status_waiting{container="container1",namespace="ns1",pod="pod1"} 0
pod_container_status_waiting{container="container2",namespace="ns2",pod="pod2"} 0
pod_container_status_waiting{container="container3",namespace="ns2",pod="pod2"} 1
`,
metrics: []string{
"pod_container_status_running",
"pod_container_status_waiting",
"pod_container_status_terminated",
},
}, {
pods: []*api.Pod{
{
ObjectMeta: api.ObjectMeta{
Name: "pod1",
Namespace: "ns1",
},
Status: api.PodStatus{
HostIP: "1.1.1.1",
PodIP: "1.2.3.4",
},
}, {
ObjectMeta: api.ObjectMeta{
Name: "pod2",
Namespace: "ns2",
},
Status: api.PodStatus{
HostIP: "1.1.1.1",
PodIP: "2.3.4.5",
},
},
},
want: metadata + `
pod_info{host_ip="1.1.1.1",namespace="ns1",pod="pod1",pod_ip="1.2.3.4"} 1
pod_info{host_ip="1.1.1.1",namespace="ns2",pod="pod2",pod_ip="2.3.4.5"} 1
`,
metrics: []string{"pod_info"},
}, {
pods: []*api.Pod{
{
ObjectMeta: api.ObjectMeta{
Name: "pod1",
Namespace: "ns1",
},
Status: api.PodStatus{
Phase: "Running",
},
}, {
ObjectMeta: api.ObjectMeta{
Name: "pod2",
Namespace: "ns2",
},
Status: api.PodStatus{
Phase: "Pending",
},
},
},
want: metadata + `
pod_status_phase{namespace="ns1",phase="Running",pod="pod1"} 1
pod_status_phase{namespace="ns2",phase="Pending",pod="pod2"} 1
`,
metrics: []string{"pod_status_phase"},
}, {
pods: []*api.Pod{
{
ObjectMeta: api.ObjectMeta{
Name: "pod1",
Namespace: "ns1",
},
Status: api.PodStatus{
Conditions: []api.PodCondition{
api.PodCondition{
Type: api.PodReady,
Status: api.ConditionTrue,
},
},
},
}, {
ObjectMeta: api.ObjectMeta{
Name: "pod2",
Namespace: "ns2",
},
Status: api.PodStatus{
Conditions: []api.PodCondition{
api.PodCondition{
Type: api.PodReady,
Status: api.ConditionFalse,
},
},
},
},
},
want: metadata + `
pod_status_ready{condition="false",namespace="ns1",pod="pod1"} 0
pod_status_ready{condition="false",namespace="ns2",pod="pod2"} 1
pod_status_ready{condition="true",namespace="ns1",pod="pod1"} 1
pod_status_ready{condition="true",namespace="ns2",pod="pod2"} 0
pod_status_ready{condition="unknown",namespace="ns1",pod="pod1"} 0
pod_status_ready{condition="unknown",namespace="ns2",pod="pod2"} 0
`,
metrics: []string{"pod_status_ready"},
},
}
for _, c := range cases {
@ -89,7 +342,7 @@ func TestPodCollector(t *testing.T) {
f: func() ([]*api.Pod, error) { return c.pods, nil },
},
}
if err := gatherAndCompare(pc, c.want, nil); err != nil {
if err := gatherAndCompare(pc, c.want, c.metrics); err != nil {
t.Errorf("unexpected collecting result:\n%s", err)
}
}