docs/archived/v0.23-docs/install/collecting-metrics/prometheus.yaml

145 lines
4.0 KiB
YAML

apiVersion: v1
kind: ServiceAccount
metadata:
name: prometheus
namespace: metrics
---
# Note: For general cluster use, you may want to use a ClusteRole and
# ClusterRoleBinding to grant Prometheus the ability to list all services and
# pods in the cluster. For this use case, we only need to grant access to the
# same namespace, and can use a Role and RoleBinding.
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: watch-services-and-pods
namespace: metrics
rules:
- apiGroups:
- ""
resources:
- services
- endpoints
- pods
verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: prom-watch-services-and-pods
namespace: metrics
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: watch-services-and-pods
subjects:
- kind: ServiceAccount
name: prometheus
---
apiVersion: v1
kind: ConfigMap
metadata:
name: prom-config
namespace: metrics
data:
prometheus.yaml: |
global:
scrape_interval: 30s
scrape_timeout: 10s
evaluation_interval: 30s
rule_files:
- /etc/prometheus/config/prometheus-rules-*.yaml
scrape_configs:
- job_name: otel-collector
honor_labels: true
honor_timestamps: true
metrics_path: /metrics
# Note that we *don't want* to use relabel to collect labels here,
# because these are the labels of the opentelemetry collector.
relabel_configs:
- action: keep
source_labels: [__meta_kubernetes_service_label_app]
regex: otel-export
- action: keep
source_labels: [__meta_kubernetes_endpoint_port_name]
regex: prom-export
kubernetes_sd_configs:
- role: endpoints
namespaces:
names:
- metrics
prometheus-rules-example.yaml: |
groups:
- name: example
rules:
- record: pod:http_requests:irate5m
expr: label_replace(rate(knative_dev_internal_serving_revision_app_request_latencies_count[5m]), "service", "$1", "pod_name", "(.*)-deployment-.+-.+")
- record: service:http_requests:irate5m
expr: sum(pod:http_requests:irate5m) by (service)
- record: pod:http_latency:buckets5m
expr: sum(label_replace(rate(knative_dev_internal_serving_revision_app_request_latencies_bucket[5m]), "service", "$1", "pod_name", "(.*)-deployment-.+-.+")) by (pod_name,service,le)
- record: service:http_latency:buckets5m
expr: sum by (service,le)(pod:http_latency:buckets5m) / ignoring(le) group_left service:http_requests:irate5m
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: prometheus
namespace: metrics
spec:
selector:
matchLabels:
app: prometheus
replicas: 1 # Each replica will hold all data in memory.
template:
metadata:
labels:
app: prometheus
spec:
containers:
- name: prometheus
image: quay.io/prometheus/prometheus
args:
- --config.file=/etc/prometheus/config/prometheus.yaml
- --storage.tsdb.path=/prometheus
- --storage.tsdb.retention.time=24h
- --storage.tsdb.no-lockfile
- --web.console.templates=/etc/prometheus/consoles
- --web.console.libraries=/etc/prometheus/console_libraries
- --web.enable-admin-api
- --web.enable-lifecycle
- --web.route-prefix=/
resources:
# This is a small sizing; adjust as needed for your environment.
requests:
memory: 200Mi
cpu: 50m
ports:
- name: ui
containerPort: 9090
volumeMounts:
- name: config
mountPath: etc/prometheus/config
- name: prometheus-emptydir
mountPath: /prometheus
volumes:
- name: config
configMap:
name: prom-config
- name: prometheus-emptydir
emptyDir: {}
---
apiVersion: v1
kind: Service
metadata:
name: prometheus
namespace: metrics
spec:
selector:
app: prometheus
ports:
- name: ui
port: 9090
targetPort: 9090