Fix Prometheus CPU query

This commit is contained in:
Povilas Versockas 2020-04-01 06:42:46 +03:00
parent 8e619f67ad
commit 48089b2628
No known key found for this signature in database
GPG Key ID: 394F2BDC36A5C701
2 changed files with 49 additions and 21 deletions

View File

@ -86,8 +86,10 @@ func (p *prometheusHistoryProvider) getContainerIDFromLabels(labels map[string]s
return &model.ContainerID{
PodID: model.PodID{
Namespace: namespace,
PodName: podName},
ContainerName: containerName}, nil
PodName: podName,
},
ContainerName: containerName,
}, nil
}
func (p *prometheusHistoryProvider) getPodIDFromLabels(labels map[string]string) (*model.PodID, error) {
@ -131,7 +133,8 @@ func getContainerUsageSamplesFromSamples(samples []Sample, resource model.Resour
res = append(res, model.ContainerUsageSample{
MeasureStart: sample.Timestamp,
Usage: resourceAmountFromValue(sample.Value, resource),
Resource: resource})
Resource: resource,
})
}
return res
}
@ -190,7 +193,9 @@ func (p *prometheusHistoryProvider) GetClusterHistory() (map[model.PodID]*PodHis
podSelector := fmt.Sprintf("job=\"%s\", %s=~\".+\", %s!=\"POD\", %s!=\"\"",
p.config.CadvisorMetricsJobName, p.config.CtrPodNameLabel,
p.config.CtrNameLabel, p.config.CtrNameLabel)
err := p.readResourceHistory(res, fmt.Sprintf("rate(container_cpu_usage_seconds_total{%s}[%s])", podSelector, p.config.HistoryLength), model.ResourceCPU)
// This query uses Prometheus Subquery notation, to gives us a result of a five minute cpu rate by default evaluated every 1minute for last config.HistoryLength days/hours/minutes. In order to change the evaluation step, you need change Prometheus global.evaluation_interval configuration parameter.
err := p.readResourceHistory(res, fmt.Sprintf("rate(container_cpu_usage_seconds_total{%s}[5m])[%s:]", podSelector, p.config.HistoryLength), model.ResourceCPU)
if err != nil {
return nil, fmt.Errorf("cannot get usage history: %v", err)
}

View File

@ -27,7 +27,7 @@ import (
)
const (
cpuQuery = "rate(container_cpu_usage_seconds_total{job=\"kubernetes-cadvisor\", pod_name=~\".+\", name!=\"POD\", name!=\"\"}[8d])"
cpuQuery = "rate(container_cpu_usage_seconds_total{job=\"kubernetes-cadvisor\", pod_name=~\".+\", name!=\"POD\", name!=\"\"}[5m])[8d:]"
memoryQuery = "container_memory_working_set_bytes{job=\"kubernetes-cadvisor\", pod_name=~\".+\", name!=\"POD\", name!=\"\"}[8d]"
labelsQuery = "up{job=\"kubernetes-pods\"}[8d]"
)
@ -64,7 +64,8 @@ func TestGetEmptyClusterHistory(t *testing.T) {
mockClient := mockPrometheusClient{}
historyProvider := prometheusHistoryProvider{
config: getDefaultPrometheusHistoryProviderConfigForTest(),
prometheusClient: &mockClient}
prometheusClient: &mockClient,
}
mockClient.On("GetTimeseries", mock.AnythingOfType("string")).Times(3).Return(
[]Timeseries{}, nil)
tss, err := historyProvider.GetClusterHistory()
@ -77,7 +78,8 @@ func TestPrometheusError(t *testing.T) {
mockClient := mockPrometheusClient{}
historyProvider := prometheusHistoryProvider{
config: getDefaultPrometheusHistoryProviderConfigForTest(),
prometheusClient: &mockClient}
prometheusClient: &mockClient,
}
mockClient.On("GetTimeseries", mock.AnythingOfType("string")).Times(3).Return(
nil, fmt.Errorf("bla"))
_, err := historyProvider.GetClusterHistory()
@ -88,15 +90,19 @@ func TestGetCPUSamples(t *testing.T) {
mockClient := mockPrometheusClient{}
historyProvider := prometheusHistoryProvider{
config: getDefaultPrometheusHistoryProviderConfigForTest(),
prometheusClient: &mockClient}
prometheusClient: &mockClient,
}
mockClient.On("GetTimeseries", cpuQuery).Return(
[]Timeseries{{
Labels: map[string]string{
"namespace": "default",
"pod_name": "pod",
"name": "container"},
"name": "container",
},
Samples: []Sample{{
Value: 5.5, Timestamp: time.Unix(1, 0)}}}}, nil)
Value: 5.5, Timestamp: time.Unix(1, 0),
}},
}}, nil)
mockClient.On("GetTimeseries", memoryQuery).Return([]Timeseries{}, nil)
mockClient.On("GetTimeseries", labelsQuery).Return([]Timeseries{}, nil)
podID := model.PodID{Namespace: "default", PodName: "pod"}
@ -105,7 +111,9 @@ func TestGetCPUSamples(t *testing.T) {
Samples: map[string][]model.ContainerUsageSample{"container": {{
MeasureStart: time.Unix(1, 0),
Usage: model.CPUAmountFromCores(5.5),
Resource: model.ResourceCPU}}}}
Resource: model.ResourceCPU,
}}},
}
histories, err := historyProvider.GetClusterHistory()
assert.Nil(t, err)
assert.Equal(t, histories, map[model.PodID]*PodHistory{podID: podHistory})
@ -115,16 +123,20 @@ func TestGetMemorySamples(t *testing.T) {
mockClient := mockPrometheusClient{}
historyProvider := prometheusHistoryProvider{
config: getDefaultPrometheusHistoryProviderConfigForTest(),
prometheusClient: &mockClient}
prometheusClient: &mockClient,
}
mockClient.On("GetTimeseries", cpuQuery).Return([]Timeseries{}, nil)
mockClient.On("GetTimeseries", memoryQuery).Return(
[]Timeseries{{
Labels: map[string]string{
"namespace": "default",
"pod_name": "pod",
"name": "container"},
"name": "container",
},
Samples: []Sample{{
Value: 12345, Timestamp: time.Unix(1, 0)}}}}, nil)
Value: 12345, Timestamp: time.Unix(1, 0),
}},
}}, nil)
mockClient.On("GetTimeseries", labelsQuery).Return([]Timeseries{}, nil)
podID := model.PodID{Namespace: "default", PodName: "pod"}
podHistory := &PodHistory{
@ -132,7 +144,9 @@ func TestGetMemorySamples(t *testing.T) {
Samples: map[string][]model.ContainerUsageSample{"container": {{
MeasureStart: time.Unix(1, 0),
Usage: model.MemoryAmountFromBytes(12345),
Resource: model.ResourceMemory}}}}
Resource: model.ResourceMemory,
}}},
}
histories, err := historyProvider.GetClusterHistory()
assert.Nil(t, err)
assert.Equal(t, histories, map[model.PodID]*PodHistory{podID: podHistory})
@ -142,7 +156,8 @@ func TestGetLabels(t *testing.T) {
mockClient := mockPrometheusClient{}
historyProvider := prometheusHistoryProvider{
config: getDefaultPrometheusHistoryProviderConfigForTest(),
prometheusClient: &mockClient}
prometheusClient: &mockClient,
}
mockClient.On("GetTimeseries", cpuQuery).Return([]Timeseries{}, nil)
mockClient.On("GetTimeseries", memoryQuery).Return([]Timeseries{}, nil)
mockClient.On("GetTimeseries", labelsQuery).Return([]Timeseries{
@ -150,21 +165,29 @@ func TestGetLabels(t *testing.T) {
Labels: map[string]string{
"kubernetes_namespace": "default",
"kubernetes_pod_name": "pod",
"pod_label_x": "y"},
"pod_label_x": "y",
},
Samples: []Sample{{
Value: 1, Timestamp: time.Unix(10, 0)}}},
Value: 1, Timestamp: time.Unix(10, 0),
}},
},
{
Labels: map[string]string{
"kubernetes_namespace": "default",
"kubernetes_pod_name": "pod",
"pod_label_x": "z"},
"pod_label_x": "z",
},
Samples: []Sample{{
Value: 1, Timestamp: time.Unix(20, 0)}}}}, nil)
Value: 1, Timestamp: time.Unix(20, 0),
}},
},
}, nil)
podID := model.PodID{Namespace: "default", PodName: "pod"}
podHistory := &PodHistory{
LastLabels: map[string]string{"x": "z"},
LastSeen: time.Unix(20, 0),
Samples: map[string][]model.ContainerUsageSample{}}
Samples: map[string][]model.ContainerUsageSample{},
}
histories, err := historyProvider.GetClusterHistory()
assert.Nil(t, err)
assert.Equal(t, histories, map[model.PodID]*PodHistory{podID: podHistory})