apiVersion: v1 kind: ServiceAccount metadata: name: prometheus namespace: metrics --- # Note: For general cluster use, you may want to use a ClusteRole and # ClusterRoleBinding to grant Prometheus the ability to list all services and # pods in the cluster. For this use case, we only need to grant access to the # same namespace, and can use a Role and RoleBinding. apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: name: watch-services-and-pods namespace: metrics rules: - apiGroups: - "" resources: - services - endpoints - pods verbs: ["get", "list", "watch"] --- apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: name: prom-watch-services-and-pods namespace: metrics roleRef: apiGroup: rbac.authorization.k8s.io kind: Role name: watch-services-and-pods subjects: - kind: ServiceAccount name: prometheus --- apiVersion: v1 kind: ConfigMap metadata: name: prom-config namespace: metrics data: prometheus.yaml: | global: scrape_interval: 30s scrape_timeout: 10s evaluation_interval: 30s rule_files: - /etc/prometheus/config/prometheus-rules-*.yaml scrape_configs: - job_name: otel-collector honor_labels: true honor_timestamps: true metrics_path: /metrics # Note that we *don't want* to use relabel to collect labels here, # because these are the labels of the opentelemetry collector. relabel_configs: - action: keep source_labels: [__meta_kubernetes_service_label_app] regex: otel-export - action: keep source_labels: [__meta_kubernetes_endpoint_port_name] regex: prom-export kubernetes_sd_configs: - role: endpoints namespaces: names: - metrics prometheus-rules-example.yaml: | groups: - name: example rules: - record: pod:http_requests:irate5m expr: label_replace(rate(knative_dev_internal_serving_revision_app_request_latencies_count[5m]), "service", "$1", "pod_name", "(.*)-deployment-.+-.+") - record: service:http_requests:irate5m expr: sum(pod:http_requests:irate5m) by (service) - record: pod:http_latency:buckets5m expr: sum(label_replace(rate(knative_dev_internal_serving_revision_app_request_latencies_bucket[5m]), "service", "$1", "pod_name", "(.*)-deployment-.+-.+")) by (pod_name,service,le) - record: service:http_latency:buckets5m expr: sum by (service,le)(pod:http_latency:buckets5m) / ignoring(le) group_left service:http_requests:irate5m --- apiVersion: apps/v1 kind: Deployment metadata: name: prometheus namespace: metrics spec: selector: matchLabels: app: prometheus replicas: 1 # Each replica will hold all data in memory. template: metadata: labels: app: prometheus spec: containers: - name: prometheus image: quay.io/prometheus/prometheus args: - --config.file=/etc/prometheus/config/prometheus.yaml - --storage.tsdb.path=/prometheus - --storage.tsdb.retention.time=24h - --storage.tsdb.no-lockfile - --web.console.templates=/etc/prometheus/consoles - --web.console.libraries=/etc/prometheus/console_libraries - --web.enable-admin-api - --web.enable-lifecycle - --web.route-prefix=/ resources: # This is a small sizing; adjust as needed for your environment. requests: memory: 200Mi cpu: 50m ports: - name: ui containerPort: 9090 volumeMounts: - name: config mountPath: etc/prometheus/config - name: prometheus-emptydir mountPath: /prometheus volumes: - name: config configMap: name: prom-config - name: prometheus-emptydir emptyDir: {} --- apiVersion: v1 kind: Service metadata: name: prometheus namespace: metrics spec: selector: app: prometheus ports: - name: ui port: 9090 targetPort: 9090