Adding monitoring and other setup files for Percona and removing event router dependency. (#2438)
* Added percona monitoring setup for Litmus Signed-off-by: ishangupta-ds <ishan.gupta@mayadata.io>
This commit is contained in:
parent
0b5a480cfd
commit
32e35ad4fb
|
|
@ -1,6 +1,6 @@
|
|||
# Monitor Chaos on Sock-Shop
|
||||
|
||||
Chaos experiments on sock-shop app with grafana dashboard to monitor it.
|
||||
Chaos experiments on sock-shop app with grafana dashboard to monitor it.
|
||||
|
||||
## Step-0: Obtain the demo artefacts
|
||||
|
||||
|
|
@ -11,7 +11,6 @@ Chaos experiments on sock-shop app with grafana dashboard to monitor it.
|
|||
cd litmus/demo/sample-applications/sock-shop
|
||||
```
|
||||
|
||||
|
||||
## Step-1: Setup Sock-Shop Microservices Application
|
||||
|
||||
- Create sock-shop namespace on the cluster
|
||||
|
|
@ -30,7 +29,7 @@ Chaos experiments on sock-shop app with grafana dashboard to monitor it.
|
|||
|
||||
## Step-2: Setup the LitmusChaos Infrastructure
|
||||
|
||||
- Install the litmus chaos operator and CRDs
|
||||
- Install the litmus chaos operator and CRDs
|
||||
|
||||
```
|
||||
kubectl apply -f https://litmuschaos.github.io/litmus/litmus-operator-v1.6.1.yaml
|
||||
|
|
@ -45,15 +44,13 @@ Chaos experiments on sock-shop app with grafana dashboard to monitor it.
|
|||
- Install the chaos experiments in admin(litmus) namespace
|
||||
|
||||
```
|
||||
kubectl apply -f https://hub.litmuschaos.io/api/chaos/1.6.1?file=charts/generic/experiments.yaml -n litmus
|
||||
kubectl apply -f https://hub.litmuschaos.io/api/chaos/1.6.1?file=charts/generic/experiments.yaml -n litmus
|
||||
```
|
||||
|
||||
- Install the chaos experiment metrics exporter and chaos event exporter
|
||||
|
||||
```
|
||||
kubectl apply -f deploy/litmus-metrics/01-event-router-cm.yaml
|
||||
kubectl apply -f deploy/litmus-metrics/02-event-router.yaml
|
||||
kubectl apply -f deploy/litmus-metrics/03-chaos-exporter.yaml
|
||||
kubectl apply -f deploy/litmus-metrics/chaos-exporter.yaml
|
||||
```
|
||||
|
||||
## Step-3: Setup the Monitoring Infrastructure
|
||||
|
|
@ -73,11 +70,11 @@ Chaos experiments on sock-shop app with grafana dashboard to monitor it.
|
|||
|
||||
- Access the grafana dashboard via the NodePort (or loadbalancer) service IP or via a port-forward operation on localhost
|
||||
|
||||
Note: To change the service type to Loadbalancer, perform a `kubectl edit svc prometheus -n monitoring` and replace
|
||||
Note: To change the service type to Loadbalancer, perform a `kubectl edit svc prometheus -n monitoring` and replace
|
||||
`type: NodePort` to `type: LoadBalancer`
|
||||
|
||||
```
|
||||
kubectl get svc -n monitoring
|
||||
kubectl get svc -n monitoring
|
||||
```
|
||||
|
||||
Default username/password credentials: `admin/admin`
|
||||
|
|
@ -92,10 +89,8 @@ Chaos experiments on sock-shop app with grafana dashboard to monitor it.
|
|||
|
||||
## Step-4: Execute the Chaos Experiments
|
||||
|
||||
|
||||
- For the sake of illustration, let us execute a CPU hog experiment on the `catalogue` microservice & a Memory Hog experiment on
|
||||
- For the sake of illustration, let us execute a CPU hog experiment on the `catalogue` microservice & a Memory Hog experiment on
|
||||
the `orders` microservice in a staggered manner
|
||||
|
||||
|
||||
```
|
||||
kubectl apply -f chaos/catalogue/catalogue-cpu-hog.yaml
|
||||
|
|
@ -106,20 +101,19 @@ Chaos experiments on sock-shop app with grafana dashboard to monitor it.
|
|||
```
|
||||
kubectl apply -f chaos/orders/orders-memory-hog.yaml
|
||||
```
|
||||
|
||||
|
||||
- Verify execution of chaos experiments
|
||||
|
||||
```
|
||||
kubectl describe chaosengine catalogue-cpu-hog -n litmus
|
||||
kubectl describe chaosengine orders-memory-hog -n litmus
|
||||
```
|
||||
|
||||
|
||||
## Step-5: Visualize Chaos Impact
|
||||
|
||||
- Observe the impact of chaos injection through increased Latency & reduced QPS (queries per second) on the microservices
|
||||
under test.
|
||||
- Observe the impact of chaos injection through increased Latency & reduced QPS (queries per second) on the microservices
|
||||
under test.
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
|
|
|
|||
|
|
@ -1,13 +0,0 @@
|
|||
apiVersion: v1
|
||||
data:
|
||||
config.json: |-
|
||||
{
|
||||
"sink": "http",
|
||||
"httpSinkUrl": "http://localhost:8080",
|
||||
"httpSinkBufferSize": 1500,
|
||||
"httpSinkDiscardMessages": true
|
||||
}
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: litmus-eventrouter-http-cm
|
||||
namespace: litmus
|
||||
|
|
@ -1,50 +0,0 @@
|
|||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
labels:
|
||||
app: litmus-eventrouter
|
||||
name: litmus-eventrouter
|
||||
namespace: litmus
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: litmus-eventrouter
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: litmus-eventrouter
|
||||
spec:
|
||||
containers:
|
||||
- image: containership/eventrouter
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: litmus-eventrouter
|
||||
volumeMounts:
|
||||
- mountPath: /etc/eventrouter
|
||||
name: config-volume
|
||||
serviceAccount: litmus
|
||||
serviceAccountName: litmus
|
||||
volumes:
|
||||
- configMap:
|
||||
defaultMode: 420
|
||||
name: litmus-eventrouter-http-cm
|
||||
name: config-volume
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
app: litmus-eventrouter
|
||||
name: litmus-eventrouter
|
||||
namespace: litmus
|
||||
spec:
|
||||
ports:
|
||||
- nodePort: 31399
|
||||
port: 8080
|
||||
protocol: TCP
|
||||
targetPort: 8080
|
||||
selector:
|
||||
app: litmus-eventrouter
|
||||
sessionAffinity: None
|
||||
type: NodePort
|
||||
|
||||
|
|
@ -1,40 +0,0 @@
|
|||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
labels:
|
||||
app: chaos-monitor
|
||||
name: chaos-monitor
|
||||
namespace: litmus
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chaos-monitor
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: chaos-monitor
|
||||
spec:
|
||||
containers:
|
||||
- image: litmuschaos/chaos-exporter:ci
|
||||
imagePullPolicy: Always
|
||||
name: chaos-exporter
|
||||
serviceAccount: litmus
|
||||
serviceAccountName: litmus
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
app: chaos-monitor
|
||||
name: chaos-monitor
|
||||
namespace: litmus
|
||||
spec:
|
||||
ports:
|
||||
- port: 8080
|
||||
protocol: TCP
|
||||
targetPort: 8080
|
||||
selector:
|
||||
app: chaos-monitor
|
||||
type: ClusterIP
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
labels:
|
||||
app: chaos-exporter
|
||||
name: chaos-exporter
|
||||
namespace: litmus
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chaos-exporter
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: chaos-exporter
|
||||
spec:
|
||||
containers:
|
||||
- image: litmuschaos/chaos-exporter:1.13.0
|
||||
imagePullPolicy: Always
|
||||
name: chaos-exporter
|
||||
serviceAccountName: litmus
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
app: chaos-exporter
|
||||
name: chaos-exporter
|
||||
namespace: litmus
|
||||
spec:
|
||||
ports:
|
||||
- port: 8080
|
||||
name: tcp
|
||||
protocol: TCP
|
||||
targetPort: 8080
|
||||
selector:
|
||||
app: chaos-exporter
|
||||
type: ClusterIP
|
||||
|
|
@ -5,10 +5,14 @@ data:
|
|||
scrape_interval: 15s
|
||||
rule_files:
|
||||
- "/etc/prometheus-rules/alert.rules"
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- static_configs:
|
||||
- targets: ["alertmanager:9093"]
|
||||
scrape_configs:
|
||||
- job_name: 'chaos-monitor'
|
||||
- job_name: 'chaos-exporter'
|
||||
static_configs:
|
||||
- targets: ['chaos-monitor.litmus.svc.cluster.local:8080']
|
||||
- targets: ['chaos-exporter.litmus.svc.cluster.local:8080']
|
||||
- job_name: kubernetes-service-endpoints
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
|
|
|
|||
|
|
@ -1,17 +1,19 @@
|
|||
apiVersion: v1
|
||||
data:
|
||||
alert.rules: |
|
||||
# Alert for high error rate in the Sock Shop.
|
||||
|
||||
ALERT HighErrorRate
|
||||
IF rate(request_duration_seconds_count{status_code="500"}[5m]) > 1
|
||||
FOR 5m
|
||||
LABELS { severity = "slack" }
|
||||
ANNOTATIONS {
|
||||
summary = "High HTTP 500 error rates",
|
||||
description = "Rate of HTTP 500 errors per 5 minutes: {{ $value }}",
|
||||
}
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: prometheus-alertrules
|
||||
namespace: monitoring
|
||||
data:
|
||||
alert.rules: |
|
||||
groups:
|
||||
- name: PM2 Alert
|
||||
rules:
|
||||
# Alert for high error rate in the Sock Shop.
|
||||
- alert: HighErrorRate
|
||||
expr: rate(request_duration_seconds_count{status_code="500"}[5m]) > 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: "slack"
|
||||
annotations:
|
||||
summary: "High HTTP 500 error rates"
|
||||
description: "Rate of HTTP 500 errors per 5 minutes: {{ $value }}"
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
annotations:
|
||||
name: prometheus-deployment
|
||||
namespace: monitoring
|
||||
spec:
|
||||
|
|
@ -16,31 +15,29 @@ spec:
|
|||
name: prometheus
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- -storage.local.retention=360h
|
||||
- -storage.local.memory-chunks=1048576
|
||||
- -config.file=/etc/prometheus/prometheus.yml
|
||||
- -alertmanager.url=http://alertmanager:9093
|
||||
image: prom/prometheus:v1.5.2
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: prometheus
|
||||
ports:
|
||||
- containerPort: 9090
|
||||
name: web
|
||||
protocol: TCP
|
||||
volumeMounts:
|
||||
- mountPath: /etc/prometheus
|
||||
name: config-volume
|
||||
- mountPath: /etc/prometheus-rules
|
||||
name: alertrules-volume
|
||||
- args:
|
||||
- --storage.tsdb.retention.time=360h
|
||||
- --config.file=/etc/prometheus/prometheus.yml
|
||||
image: prom/prometheus:v2.25.0
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: prometheus
|
||||
ports:
|
||||
- containerPort: 9090
|
||||
name: web
|
||||
protocol: TCP
|
||||
volumeMounts:
|
||||
- mountPath: /etc/prometheus
|
||||
name: config-volume
|
||||
- mountPath: /etc/prometheus-rules
|
||||
name: alertrules-volume
|
||||
serviceAccount: prometheus
|
||||
serviceAccountName: prometheus
|
||||
volumes:
|
||||
- configMap:
|
||||
defaultMode: 420
|
||||
name: prometheus-configmap
|
||||
name: config-volume
|
||||
- configMap:
|
||||
defaultMode: 420
|
||||
name: prometheus-alertrules
|
||||
name: alertrules-volume
|
||||
- configMap:
|
||||
defaultMode: 420
|
||||
name: prometheus-configmap
|
||||
name: config-volume
|
||||
- configMap:
|
||||
defaultMode: 420
|
||||
name: prometheus-alertrules
|
||||
name: alertrules-volume
|
||||
|
|
|
|||
|
|
@ -205,9 +205,7 @@
|
|||
"options": {
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"max"
|
||||
],
|
||||
"calcs": ["max"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
|
|
@ -217,7 +215,7 @@
|
|||
"pluginVersion": "7.0.6",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(chaosengine_experiments_count{engine_namespace=\"litmus\",job=\"chaos-monitor\"})",
|
||||
"expr": "sum(chaosengine_experiments_count{engine_namespace=\"litmus\",job=\"chaos-exporter\"})",
|
||||
"interval": "",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "",
|
||||
|
|
@ -276,9 +274,7 @@
|
|||
"options": {
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"max"
|
||||
],
|
||||
"calcs": ["max"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
|
|
@ -288,7 +284,7 @@
|
|||
"pluginVersion": "7.0.6",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(chaosengine_passed_experiments{engine_namespace=\"litmus\",job=\"chaos-monitor\"})",
|
||||
"expr": "sum(chaosengine_passed_experiments{engine_namespace=\"litmus\",job=\"chaos-exporter\"})",
|
||||
"interval": "",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "",
|
||||
|
|
@ -347,9 +343,7 @@
|
|||
"options": {
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"max"
|
||||
],
|
||||
"calcs": ["max"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
|
|
@ -359,7 +353,7 @@
|
|||
"pluginVersion": "7.0.6",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(chaosengine_failed_experiments{engine_namespace=\"litmus\",job=\"chaos-monitor\"})",
|
||||
"expr": "sum(chaosengine_failed_experiments{engine_namespace=\"litmus\",job=\"chaos-exporter\"})",
|
||||
"interval": "",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "",
|
||||
|
|
@ -416,9 +410,7 @@
|
|||
"options": {
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
|
|
@ -428,7 +420,7 @@
|
|||
"pluginVersion": "7.0.6",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(chaosengine_waiting_experiments{engine_namespace=\"litmus\",job=\"chaos-monitor\"})",
|
||||
"expr": "sum(chaosengine_waiting_experiments{engine_namespace=\"litmus\",job=\"chaos-exporter\"})",
|
||||
"interval": "",
|
||||
"intervalFactor": 3,
|
||||
"legendFormat": "",
|
||||
|
|
@ -2105,17 +2097,7 @@
|
|||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
"time_options": ["5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d"]
|
||||
},
|
||||
"timezone": "browser",
|
||||
"title": "Sock-Shop Performance",
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
This directory contains chaos interleaved grafana dashboards along with the utilities needed to get started with monitoring chaos experiments and workflows.
|
||||
|
||||
# Components
|
||||
## Components
|
||||
|
||||
- [Grafana Dashboards](https://github.com/litmuschaos/litmus/blob/master/monitoring/grafana-dashboards)
|
||||
|
||||
|
|
@ -12,28 +12,12 @@ This directory contains chaos interleaved grafana dashboards along with the util
|
|||
|
||||
> Contains utilities required to setup monitoring infrastructure on a kubernetes cluster.
|
||||
|
||||
# Demonstration
|
||||
|
||||
## Monitor Chaos on Sock-Shop
|
||||
|
||||
Run chaos experiments and workflows on sock-shop application with grafana dashboard to monitor it.
|
||||
|
||||
### Step-1: Setup Sock-Shop Microservices Application
|
||||
|
||||
- Apply the sock-shop microservices manifests
|
||||
|
||||
```
|
||||
kubectl apply -f utils/sample-application-under-test/sock-shop/
|
||||
```
|
||||
|
||||
- Wait until all services are up. Verify via `kubectl get pods -n sock-shop`
|
||||
|
||||
### Step-2: Setup the LitmusChaos Infrastructure
|
||||
### Setup the LitmusChaos Infrastructure
|
||||
|
||||
- Install the litmus chaos operator and CRDs
|
||||
|
||||
```
|
||||
kubectl apply -f https://litmuschaos.github.io/litmus/litmus-operator-v1.9.0.yaml
|
||||
kubectl apply -f https://litmuschaos.github.io/litmus/litmus-operator-v1.13.0.yaml
|
||||
```
|
||||
|
||||
- Install the litmus-admin serviceaccount for centralized/admin-mode of chaos execution
|
||||
|
|
@ -45,10 +29,10 @@ Run chaos experiments and workflows on sock-shop application with grafana dashbo
|
|||
- Install the chaos experiments in admin(litmus) namespace
|
||||
|
||||
```
|
||||
kubectl apply -f https://hub.litmuschaos.io/api/chaos/1.9.0?file=charts/generic/experiments.yaml -n litmus
|
||||
kubectl apply -f https://hub.litmuschaos.io/api/chaos/1.13.0?file=charts/generic/experiments.yaml -n litmus
|
||||
```
|
||||
|
||||
### Step-3: Setup the Monitoring Infrastructure
|
||||
### Setup the Monitoring Infrastructure
|
||||
|
||||
- Create monitoring namespace on the cluster
|
||||
|
||||
|
|
@ -57,51 +41,47 @@ Run chaos experiments and workflows on sock-shop application with grafana dashbo
|
|||
```
|
||||
|
||||
- Setup prometheus TSDB
|
||||
|
||||
|
||||
> Model-1 (optional): Service monitor and prometheus operator model.
|
||||
|
||||
Create the operator to instantiate all CRDs
|
||||
Create the operator to instantiate all CRDs
|
||||
|
||||
```
|
||||
kubectl -n monitoring apply -f utils/prometheus/prometheus-operator/
|
||||
```
|
||||
```
|
||||
kubectl -n monitoring apply -f utils/prometheus/prometheus-operator/
|
||||
```
|
||||
|
||||
Deploy monitoring components
|
||||
Deploy monitoring components
|
||||
|
||||
```
|
||||
kubectl -n monitoring apply -f utils/metrics-exporters-with-service-monitors/node-exporter/
|
||||
kubectl -n monitoring apply -f utils/metrics-exporters-with-service-monitors/kube-state-metrics/
|
||||
kubectl -n monitoring apply -f utils/alert-manager-with-service-monitor/
|
||||
kubectl -n sock-shop apply -f utils/sample-application-service-monitors/sock-shop/
|
||||
kubectl -n litmus apply -f utils/metrics-exporters-with-service-monitors/litmus-metrics/chaos-exporter/
|
||||
kubectl -n litmus apply -f utils/metrics-exporters-with-service-monitors/litmus-metrics/litmus-event-router/
|
||||
```
|
||||
```
|
||||
kubectl -n monitoring apply -f utils/metrics-exporters-with-service-monitors/node-exporter/
|
||||
kubectl -n monitoring apply -f utils/metrics-exporters-with-service-monitors/kube-state-metrics/
|
||||
kubectl -n monitoring apply -f utils/alert-manager-with-service-monitor/
|
||||
kubectl -n litmus apply -f utils/metrics-exporters-with-service-monitors/litmus-metrics/chaos-exporter/
|
||||
```
|
||||
|
||||
Deploy prometheus instance and all the service monitors for targets
|
||||
Deploy prometheus instance and all the service monitors for targets
|
||||
|
||||
```
|
||||
kubectl -n monitoring apply -f utils/prometheus/prometheus-configuration/
|
||||
```
|
||||
|
||||
Note: To change the service type to NodePort, perform a `kubectl edit svc prometheus-k8s -n monitoring` and replace `type: LoadBalancer` to `type: NodePort`
|
||||
```
|
||||
kubectl -n monitoring apply -f utils/prometheus/prometheus-configuration/
|
||||
```
|
||||
|
||||
Note: To change the service type to NodePort, perform a `kubectl edit svc prometheus-k8s -n monitoring` and replace `type: LoadBalancer` to `type: NodePort`
|
||||
|
||||
> Model-2 (optional): Prometheus scrape config model.
|
||||
|
||||
Deploy prometheus components
|
||||
Deploy prometheus components
|
||||
|
||||
```
|
||||
kubectl -n monitoring apply -f utils/prometheus/prometheus-scrape-configuration/
|
||||
```
|
||||
```
|
||||
kubectl -n monitoring apply -f utils/prometheus/prometheus-scrape-configuration/
|
||||
```
|
||||
|
||||
Deploy metrics exporters
|
||||
Deploy metrics exporters
|
||||
|
||||
```
|
||||
kubectl -n monitoring apply -f utils/metrics-exporters/kube-state-metrics/
|
||||
kubectl -n monitoring apply -f utils/metrics-exporters/node-exporter/
|
||||
kubectl -n litmus apply -f utils/metrics-exporters/litmus-metrics/chaos-exporter/
|
||||
kubectl -n litmus apply -f utils/metrics-exporters/litmus-metrics/litmus-event-router/
|
||||
```
|
||||
```
|
||||
kubectl -n monitoring apply -f utils/metrics-exporters/kube-state-metrics/
|
||||
kubectl -n monitoring apply -f utils/metrics-exporters/node-exporter/
|
||||
kubectl -n litmus apply -f utils/metrics-exporters/litmus-metrics/chaos-exporter/
|
||||
```
|
||||
|
||||
- Apply the grafana manifests after deploying prometheus for all metrics.
|
||||
|
||||
|
|
@ -112,10 +92,12 @@ Run chaos experiments and workflows on sock-shop application with grafana dashbo
|
|||
- You may access the grafana dashboard via the LoadBalancer (or NodePort) service IP or via a port-forward operation on localhost
|
||||
|
||||
View the services running in the monitoring namespace
|
||||
|
||||
```
|
||||
kubectl get svc -n monitoring
|
||||
```
|
||||
Now copy the EXTERNAL-IP of grafana and view it in the browser
|
||||
|
||||
Now copy the EXTERNAL-IP of grafana and view it in the browser
|
||||
|
||||
Default username/password credentials: `admin/admin`
|
||||
|
||||
|
|
@ -127,130 +109,4 @@ Run chaos experiments and workflows on sock-shop application with grafana dashbo
|
|||
|
||||

|
||||
|
||||
- Import the grafana dashboard "Sock-Shop Performance" provided [here](https://raw.githubusercontent.com/litmuschaos/litmus/master/monitoring/grafana-dashboards/sock-shop/Sock-Shop-Performance-Under-Chaos.json)
|
||||
|
||||
- Import the grafana dashboard "Node and Pod Chaos Demo" provided [here](https://raw.githubusercontent.com/litmuschaos/litmus/master/monitoring/grafana-dashboards/kubernetes/Node-and-pod-metrics-dashboard.json)
|
||||
|
||||
### Step-4: Execute the Chaos Experiments
|
||||
|
||||
- For the sake of illustration, let us execute node and pod level, CPU hog experiments on the `catalogue` microservice & Memory Hog experiments on the `orders` microservice in a staggered manner.
|
||||
|
||||
```
|
||||
kubectl apply -f utils/sample-chaos-injectors/chaos-experiments/catalogue/catalogue-pod-cpu-hog.yaml
|
||||
```
|
||||
|
||||
Wait for ~60s
|
||||
|
||||
```
|
||||
kubectl apply -f utils/sample-chaos-injectors/chaos-experiments/orders/orders-pod-memory-hog.yaml
|
||||
```
|
||||
|
||||
Wait for ~60s
|
||||
|
||||
```
|
||||
kubectl apply -f utils/sample-chaos-injectors/chaos-experiments/catalogue/catalogue-node-cpu-hog.yaml
|
||||
```
|
||||
|
||||
Wait for ~60s
|
||||
|
||||
```
|
||||
kubectl apply -f utils/sample-chaos-injectors/chaos-experiments/orders/orders-node-memory-hog.yaml
|
||||
```
|
||||
|
||||
- Verify execution of chaos experiments
|
||||
|
||||
```
|
||||
kubectl describe chaosengine catalogue-pod-cpu-hog -n litmus
|
||||
kubectl describe chaosengine orders-pod-memory-hog -n litmus
|
||||
kubectl describe chaosengine catalogue-node-cpu-hog -n litmus
|
||||
kubectl describe chaosengine orders-node-memory-hog -n litmus
|
||||
```
|
||||
|
||||
### Step-5: Visualize Chaos Impact
|
||||
|
||||
- Observe the impact of chaos injection through increased Latency & reduced QPS (queries per second) on the microservices
|
||||
under test.
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
### Step-6 (optional): Inject continous chaos using Argo CD.
|
||||
|
||||
- Install Chaos workflow infrastructure.
|
||||
|
||||
- Create argo namespace
|
||||
|
||||
```
|
||||
kubectl create ns argo
|
||||
```
|
||||
|
||||
- Create the CRDs, workflow controller deployment with associated RBAC.
|
||||
|
||||
```
|
||||
kubectl apply -f https://raw.githubusercontent.com/argoproj/argo/stable/manifests/install.yaml -n argo
|
||||
```
|
||||
|
||||
- Install the argo CLI on the test harness machine (where the kubeconfig is available)
|
||||
|
||||
```bash
|
||||
# Download the binary
|
||||
curl -sLO https://github.com/argoproj/argo/releases/download/v2.11.0/argo-linux-amd64.gz
|
||||
|
||||
# Unzip
|
||||
gunzip argo-linux-amd64.gz
|
||||
|
||||
# Make binary executable
|
||||
chmod +x argo-linux-amd64
|
||||
|
||||
# Move binary to path
|
||||
mv ./argo-linux-amd64 /usr/local/bin/argo
|
||||
|
||||
# Test installation
|
||||
argo version
|
||||
```
|
||||
|
||||
- Create the Argo Access ServiceAccount
|
||||
|
||||
```
|
||||
kubectl apply -f https://raw.githubusercontent.com/litmuschaos/chaos-workflows/master/Argo/argo-access.yaml -n litmus
|
||||
```
|
||||
|
||||
- Run one or more of the litmuschaos experiments as Chaos workflows using argo CLI or kubectl.
|
||||
|
||||
> Node CPU hog
|
||||
```bash
|
||||
argo cron create utils/sample-chaos-injectors/chaos-workflows-with-argo-CD/catalogue/catalogue-node-cpu-hog-workflow.yaml -n litmus
|
||||
```
|
||||
|
||||
> Node memory hog
|
||||
```bash
|
||||
argo cron create utils/sample-chaos-injectors/chaos-workflows-with-argo-CD/orders/orders-node-memory-hog-workflow.yaml -n litmus
|
||||
```
|
||||
|
||||
> Pod CPU hog
|
||||
|
||||
```bash
|
||||
kubectl apply -f utils/sample-chaos-injectors/chaos-workflows-with-argo-CD/catalogue/catalogue-pod-cpu-hog-workflow.yaml -n litmus
|
||||
```
|
||||
|
||||
> Pod memory hog
|
||||
```bash
|
||||
kubectl apply -f utils/sample-chaos-injectors/chaos-workflows-with-argo-CD/orders/orders-pod-memory-hog-workflow.yaml -n litmus
|
||||
```
|
||||
|
||||
- Visualize the Chaos cron workflow through argo UI by obtaining Node port or Load Balancer IP.
|
||||
|
||||
```
|
||||
kubectl patch svc argo-server -n argo -p '{"spec": {"type": "NodePort"}}'
|
||||
```
|
||||
|
||||
OR
|
||||
|
||||
```
|
||||
kubectl patch svc argo-server -n argo -p '{"spec": {"type": "LoadBalancer"}}'
|
||||
```
|
||||
|
||||

|
||||
|
||||

|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@
|
|||
"type": "grafana",
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"version": "7.1.5"
|
||||
"version": "7.4.3"
|
||||
},
|
||||
{
|
||||
"type": "panel",
|
||||
|
|
@ -40,6 +40,59 @@
|
|||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"type": "dashboard"
|
||||
},
|
||||
{
|
||||
"datasource": "${DS_DS_PROMETHEUS}",
|
||||
"enable": true,
|
||||
"expr": "litmuschaos_awaited_experiments{chaosresult_name=\"catalogue-pod-cpu-hog-pod-cpu-hog\", chaosresult_namespace=\"litmus\"}",
|
||||
"hide": false,
|
||||
"iconColor": "rgba(255, 96, 96, 1)",
|
||||
"name": "catalogue-pod-cpu-hog",
|
||||
"showIn": 0,
|
||||
"step": "1s",
|
||||
"tagKeys": "litmus, chaos",
|
||||
"textFormat": "pod-cpu-hog",
|
||||
"titleFormat": "catalogue-chaos"
|
||||
},
|
||||
{
|
||||
"datasource": "${DS_DS_PROMETHEUS}",
|
||||
"enable": true,
|
||||
"expr": "litmuschaos_awaited_experiments{chaosresult_name=\"orders-pod-memory-hog-pod-memory-hog\", chaosresult_namespace=\"litmus\"}",
|
||||
"hide": false,
|
||||
"iconColor": "rgba(255, 96, 96, 1)",
|
||||
"name": "orders-pod-memory-hog",
|
||||
"showIn": 0,
|
||||
"step": "1s",
|
||||
"tagKeys": "litmus, chaos",
|
||||
"textFormat": "pod-memory-hog",
|
||||
"titleFormat": "orders-chaos"
|
||||
},
|
||||
{
|
||||
"datasource": "${DS_DS_PROMETHEUS}",
|
||||
"enable": true,
|
||||
"expr": "litmuschaos_awaited_experiments{chaosresult_name=\"orders-node-memory-hog-node-memory-hog\",chaosresult_namespace=\"litmus\"}",
|
||||
"hide": false,
|
||||
"iconColor": "rgba(255, 96, 96, 1)",
|
||||
"name": "orders-node-memory-hog",
|
||||
"showIn": 0,
|
||||
"step": "1s",
|
||||
"tagKeys": "litmus, chaos",
|
||||
"textFormat": "node-memory-hog",
|
||||
"titleFormat": "orders-chaos",
|
||||
"useValueForTime": false
|
||||
},
|
||||
{
|
||||
"datasource": "${DS_DS_PROMETHEUS}",
|
||||
"enable": true,
|
||||
"expr": "litmuschaos_awaited_experiments{chaosresult_name=\"catalogue-node-cpu-hog-node-cpu-hog\",chaosresult_namespace=\"litmus\"}",
|
||||
"hide": false,
|
||||
"iconColor": "rgba(255, 96, 96, 1)",
|
||||
"name": "catalogue-node-cpu-hog",
|
||||
"showIn": 0,
|
||||
"step": "1s",
|
||||
"tagKeys": "litmus, chaos",
|
||||
"textFormat": "node-cpu-hog",
|
||||
"titleFormat": "catalogue-chaos"
|
||||
}
|
||||
]
|
||||
},
|
||||
|
|
@ -47,7 +100,7 @@
|
|||
"gnetId": null,
|
||||
"graphTooltip": 0,
|
||||
"id": null,
|
||||
"iteration": 1600497075563,
|
||||
"iteration": 1614781246233,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
|
|
@ -106,49 +159,15 @@
|
|||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"nullPointMode": "connected",
|
||||
"options": {
|
||||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.1.5",
|
||||
"pluginVersion": "7.4.3",
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
{
|
||||
"$$hashKey": "object:26",
|
||||
"alias": "catalogue-pod-cpu-hog",
|
||||
"color": "#C4162A",
|
||||
"fill": 1,
|
||||
"fillGradient": 5,
|
||||
"steppedLine": true,
|
||||
"yaxis": 2
|
||||
},
|
||||
{
|
||||
"$$hashKey": "object:27",
|
||||
"alias": "orders-pod-memory-hog",
|
||||
"color": "#C4162A",
|
||||
"fill": 1,
|
||||
"fillGradient": 5,
|
||||
"steppedLine": true,
|
||||
"yaxis": 2
|
||||
},
|
||||
{
|
||||
"$$hashKey": "object:28",
|
||||
"alias": "catalogue-node-cpu-hog",
|
||||
"color": "#C4162A",
|
||||
"fill": 1,
|
||||
"fillGradient": 5,
|
||||
"steppedLine": true,
|
||||
"yaxis": 2
|
||||
},
|
||||
{
|
||||
"$$hashKey": "object:29",
|
||||
"alias": "orders-node-memory-hog",
|
||||
"color": "#C4162A",
|
||||
"fill": 1,
|
||||
"fillGradient": 5,
|
||||
"steppedLine": true,
|
||||
"yaxis": 2
|
||||
}
|
||||
],
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
|
|
@ -158,30 +177,6 @@
|
|||
"interval": "",
|
||||
"legendFormat": "\"{{instance}}\"",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "heptio_eventrouter_normal_total{reason=\"ChaosInject\",involved_object_name=\"catalogue-pod-cpu-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} - on () (heptio_eventrouter_normal_total{reason=\"ChaosEngineCompleted\",involved_object_name=\"catalogue-pod-cpu-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} OR on() vector(0))",
|
||||
"interval": "",
|
||||
"legendFormat": "catalogue-pod-cpu-hog",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "heptio_eventrouter_normal_total{reason=\"ChaosInject\",involved_object_name=\"orders-pod-memory-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} - on () (heptio_eventrouter_normal_total{reason=\"ChaosEngineCompleted\",involved_object_name=\"orders-pod-memory-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} OR on() vector(0))",
|
||||
"interval": "",
|
||||
"legendFormat": "orders-pod-memory-hog",
|
||||
"refId": "C"
|
||||
},
|
||||
{
|
||||
"expr": "heptio_eventrouter_normal_total{reason=\"ChaosInject\",involved_object_name=\"catalogue-node-cpu-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} - on () (heptio_eventrouter_normal_total{reason=\"ChaosEngineCompleted\",involved_object_name=\"catalogue-node-cpu-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} OR on() vector(0))",
|
||||
"interval": "",
|
||||
"legendFormat": "catalogue-node-cpu-hog",
|
||||
"refId": "D"
|
||||
},
|
||||
{
|
||||
"expr": "heptio_eventrouter_normal_total{reason=\"ChaosInject\",involved_object_name=\"orders-node-memory-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} - on () (heptio_eventrouter_normal_total{reason=\"ChaosEngineCompleted\",involved_object_name=\"orders-node-memory-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} OR on() vector(0))",
|
||||
"interval": "",
|
||||
"legendFormat": "orders-node-memory-hog",
|
||||
"refId": "E"
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
|
|
@ -219,7 +214,7 @@
|
|||
"logBase": 2,
|
||||
"max": "1",
|
||||
"min": "0",
|
||||
"show": true
|
||||
"show": false
|
||||
}
|
||||
],
|
||||
"yaxis": {
|
||||
|
|
@ -268,45 +263,15 @@
|
|||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"nullPointMode": "null as zero",
|
||||
"options": {
|
||||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.1.5",
|
||||
"pluginVersion": "7.4.3",
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
{
|
||||
"alias": "catalogue-pod-cpu-hog",
|
||||
"color": "#C4162A",
|
||||
"fill": 1,
|
||||
"fillGradient": 5,
|
||||
"steppedLine": true,
|
||||
"yaxis": 2
|
||||
},
|
||||
{
|
||||
"alias": "orders-pod-memory-hog",
|
||||
"color": "#C4162A",
|
||||
"fill": 1,
|
||||
"fillGradient": 5,
|
||||
"steppedLine": true,
|
||||
"yaxis": 2
|
||||
},
|
||||
{
|
||||
"alias": "catalogue-node-cpu-hog",
|
||||
"color": "#C4162A",
|
||||
"fill": 1,
|
||||
"fillGradient": 5,
|
||||
"steppedLine": true,
|
||||
"yaxis": 2
|
||||
},
|
||||
{
|
||||
"alias": "orders-node-memory-hog",
|
||||
"color": "#C4162A",
|
||||
"fill": 1,
|
||||
"fillGradient": 5,
|
||||
"steppedLine": true,
|
||||
"yaxis": 2
|
||||
}
|
||||
],
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
|
|
@ -316,30 +281,6 @@
|
|||
"interval": "",
|
||||
"legendFormat": "\"{{instance}}\"",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "heptio_eventrouter_normal_total{reason=\"ChaosInject\",involved_object_name=\"catalogue-pod-cpu-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} - on () (heptio_eventrouter_normal_total{reason=\"ChaosEngineCompleted\",involved_object_name=\"catalogue-pod-cpu-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} OR on() vector(0))",
|
||||
"interval": "",
|
||||
"legendFormat": "catalogue-pod-cpu-hog",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "heptio_eventrouter_normal_total{reason=\"ChaosInject\",involved_object_name=\"orders-pod-memory-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} - on () (heptio_eventrouter_normal_total{reason=\"ChaosEngineCompleted\",involved_object_name=\"orders-pod-memory-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} OR on() vector(0))",
|
||||
"interval": "",
|
||||
"legendFormat": "orders-pod-memory-hog",
|
||||
"refId": "C"
|
||||
},
|
||||
{
|
||||
"expr": "heptio_eventrouter_normal_total{reason=\"ChaosInject\",involved_object_name=\"catalogue-node-cpu-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} - on () (heptio_eventrouter_normal_total{reason=\"ChaosEngineCompleted\",involved_object_name=\"catalogue-node-cpu-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} OR on() vector(0))",
|
||||
"interval": "",
|
||||
"legendFormat": "catalogue-node-cpu-hog",
|
||||
"refId": "D"
|
||||
},
|
||||
{
|
||||
"expr": "heptio_eventrouter_normal_total{reason=\"ChaosInject\",involved_object_name=\"orders-node-memory-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} - on () (heptio_eventrouter_normal_total{reason=\"ChaosEngineCompleted\",involved_object_name=\"orders-node-memory-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} OR on() vector(0))",
|
||||
"interval": "",
|
||||
"legendFormat": "orders-node-memory-hog",
|
||||
"refId": "E"
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
|
|
@ -362,6 +303,7 @@
|
|||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"$$hashKey": "object:251",
|
||||
"format": "percentunit",
|
||||
"label": "Memory",
|
||||
"logBase": 1,
|
||||
|
|
@ -370,12 +312,13 @@
|
|||
"show": true
|
||||
},
|
||||
{
|
||||
"$$hashKey": "object:252",
|
||||
"format": "short",
|
||||
"label": "CHAOS",
|
||||
"logBase": 2,
|
||||
"max": "1",
|
||||
"min": "0",
|
||||
"show": true
|
||||
"show": false
|
||||
}
|
||||
],
|
||||
"yaxis": {
|
||||
|
|
@ -425,45 +368,15 @@
|
|||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"nullPointMode": "connected",
|
||||
"options": {
|
||||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.1.5",
|
||||
"pluginVersion": "7.4.3",
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
{
|
||||
"alias": "catalogue-pod-cpu-hog",
|
||||
"color": "#C4162A",
|
||||
"fill": 1,
|
||||
"fillGradient": 5,
|
||||
"steppedLine": true,
|
||||
"yaxis": 2
|
||||
},
|
||||
{
|
||||
"alias": "orders-pod-memory-hog",
|
||||
"color": "#C4162A",
|
||||
"fill": 1,
|
||||
"fillGradient": 5,
|
||||
"steppedLine": true,
|
||||
"yaxis": 2
|
||||
},
|
||||
{
|
||||
"alias": "catalogue-node-cpu-hog",
|
||||
"color": "#C4162A",
|
||||
"fill": 1,
|
||||
"fillGradient": 5,
|
||||
"steppedLine": true,
|
||||
"yaxis": 2
|
||||
},
|
||||
{
|
||||
"alias": "orders-node-memory-hog",
|
||||
"color": "#C4162A",
|
||||
"fill": 1,
|
||||
"fillGradient": 5,
|
||||
"steppedLine": true,
|
||||
"yaxis": 2
|
||||
}
|
||||
],
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
|
|
@ -473,30 +386,6 @@
|
|||
"interval": "",
|
||||
"legendFormat": "\"{{pod}}\"",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "heptio_eventrouter_normal_total{reason=\"ChaosInject\",involved_object_name=\"catalogue-pod-cpu-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} - on () (heptio_eventrouter_normal_total{reason=\"ChaosEngineCompleted\",involved_object_name=\"catalogue-pod-cpu-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} OR on() vector(0))",
|
||||
"interval": "",
|
||||
"legendFormat": "catalogue-pod-cpu-hog",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "heptio_eventrouter_normal_total{reason=\"ChaosInject\",involved_object_name=\"orders-pod-memory-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} - on () (heptio_eventrouter_normal_total{reason=\"ChaosEngineCompleted\",involved_object_name=\"orders-pod-memory-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} OR on() vector(0))",
|
||||
"interval": "",
|
||||
"legendFormat": "orders-pod-memory-hog",
|
||||
"refId": "C"
|
||||
},
|
||||
{
|
||||
"expr": "heptio_eventrouter_normal_total{reason=\"ChaosInject\",involved_object_name=\"catalogue-node-cpu-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} - on () (heptio_eventrouter_normal_total{reason=\"ChaosEngineCompleted\",involved_object_name=\"catalogue-node-cpu-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} OR on() vector(0))",
|
||||
"interval": "",
|
||||
"legendFormat": "catalogue-node-cpu-hog",
|
||||
"refId": "D"
|
||||
},
|
||||
{
|
||||
"expr": "heptio_eventrouter_normal_total{reason=\"ChaosInject\",involved_object_name=\"orders-node-memory-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} - on () (heptio_eventrouter_normal_total{reason=\"ChaosEngineCompleted\",involved_object_name=\"orders-node-memory-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} OR on() vector(0))",
|
||||
"interval": "",
|
||||
"legendFormat": "orders-node-memory-hog",
|
||||
"refId": "E"
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
|
|
@ -519,6 +408,7 @@
|
|||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"$$hashKey": "object:535",
|
||||
"format": "short",
|
||||
"label": "cores",
|
||||
"logBase": 1,
|
||||
|
|
@ -527,12 +417,13 @@
|
|||
"show": true
|
||||
},
|
||||
{
|
||||
"$$hashKey": "object:536",
|
||||
"format": "short",
|
||||
"label": "CHAOS",
|
||||
"logBase": 2,
|
||||
"max": "1",
|
||||
"min": "0",
|
||||
"show": true
|
||||
"show": false
|
||||
}
|
||||
],
|
||||
"yaxis": {
|
||||
|
|
@ -581,45 +472,15 @@
|
|||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"nullPointMode": "connected",
|
||||
"options": {
|
||||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.1.5",
|
||||
"pluginVersion": "7.4.3",
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
{
|
||||
"alias": "catalogue-pod-cpu-hog",
|
||||
"color": "#C4162A",
|
||||
"fill": 1,
|
||||
"fillGradient": 5,
|
||||
"steppedLine": true,
|
||||
"yaxis": 2
|
||||
},
|
||||
{
|
||||
"alias": "orders-pod-memory-hog",
|
||||
"color": "#C4162A",
|
||||
"fill": 1,
|
||||
"fillGradient": 5,
|
||||
"steppedLine": true,
|
||||
"yaxis": 2
|
||||
},
|
||||
{
|
||||
"alias": "catalogue-node-cpu-hog",
|
||||
"color": "#C4162A",
|
||||
"fill": 1,
|
||||
"fillGradient": 5,
|
||||
"steppedLine": true,
|
||||
"yaxis": 2
|
||||
},
|
||||
{
|
||||
"alias": "orders-node-memory-hog",
|
||||
"color": "#C4162A",
|
||||
"fill": 1,
|
||||
"fillGradient": 5,
|
||||
"steppedLine": true,
|
||||
"yaxis": 2
|
||||
}
|
||||
],
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
|
|
@ -629,30 +490,6 @@
|
|||
"interval": "",
|
||||
"legendFormat": "\"{{pod}}\"",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "heptio_eventrouter_normal_total{reason=\"ChaosInject\",involved_object_name=\"catalogue-pod-cpu-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} - on () (heptio_eventrouter_normal_total{reason=\"ChaosEngineCompleted\",involved_object_name=\"catalogue-pod-cpu-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} OR on() vector(0))",
|
||||
"interval": "",
|
||||
"legendFormat": "catalogue-pod-cpu-hog",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "heptio_eventrouter_normal_total{reason=\"ChaosInject\",involved_object_name=\"orders-pod-memory-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} - on () (heptio_eventrouter_normal_total{reason=\"ChaosEngineCompleted\",involved_object_name=\"orders-pod-memory-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} OR on() vector(0))",
|
||||
"interval": "",
|
||||
"legendFormat": "orders-pod-memory-hog",
|
||||
"refId": "C"
|
||||
},
|
||||
{
|
||||
"expr": "heptio_eventrouter_normal_total{reason=\"ChaosInject\",involved_object_name=\"catalogue-node-cpu-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} - on () (heptio_eventrouter_normal_total{reason=\"ChaosEngineCompleted\",involved_object_name=\"catalogue-node-cpu-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} OR on() vector(0))",
|
||||
"interval": "",
|
||||
"legendFormat": "catalogue-node-cpu-hog",
|
||||
"refId": "D"
|
||||
},
|
||||
{
|
||||
"expr": "heptio_eventrouter_normal_total{reason=\"ChaosInject\",involved_object_name=\"orders-node-memory-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} - on () (heptio_eventrouter_normal_total{reason=\"ChaosEngineCompleted\",involved_object_name=\"orders-node-memory-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} OR on() vector(0))",
|
||||
"interval": "",
|
||||
"legendFormat": "orders-node-memory-hog",
|
||||
"refId": "E"
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
|
|
@ -675,6 +512,7 @@
|
|||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"$$hashKey": "object:665",
|
||||
"format": "bytes",
|
||||
"label": "Memory",
|
||||
"logBase": 1,
|
||||
|
|
@ -683,12 +521,13 @@
|
|||
"show": true
|
||||
},
|
||||
{
|
||||
"$$hashKey": "object:666",
|
||||
"format": "short",
|
||||
"label": "CHAOS",
|
||||
"logBase": 2,
|
||||
"max": "1",
|
||||
"min": "0",
|
||||
"show": true
|
||||
"show": false
|
||||
}
|
||||
],
|
||||
"yaxis": {
|
||||
|
|
@ -698,7 +537,7 @@
|
|||
}
|
||||
],
|
||||
"refresh": "5s",
|
||||
"schemaVersion": 26,
|
||||
"schemaVersion": 27,
|
||||
"style": "dark",
|
||||
"tags": [],
|
||||
"templating": {
|
||||
|
|
@ -709,6 +548,8 @@
|
|||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"description": null,
|
||||
"error": null,
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "",
|
||||
|
|
@ -727,13 +568,18 @@
|
|||
"current": {},
|
||||
"datasource": "$datasource",
|
||||
"definition": "label_values(kube_pod_info, cluster)",
|
||||
"description": null,
|
||||
"error": null,
|
||||
"hide": 2,
|
||||
"includeAll": false,
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"name": "cluster",
|
||||
"options": [],
|
||||
"query": "label_values(kube_pod_info, cluster)",
|
||||
"query": {
|
||||
"query": "label_values(kube_pod_info, cluster)",
|
||||
"refId": "DS_PROMETHEUS-cluster-Variable-Query"
|
||||
},
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
|
|
@ -767,5 +613,5 @@
|
|||
"timezone": "",
|
||||
"title": "Node and Pod Chaos Metrics",
|
||||
"uid": "nodepodmetrics",
|
||||
"version": 1
|
||||
"version": 12
|
||||
}
|
||||
|
|
@ -1,14 +1,11 @@
|
|||
# Node and Pod K8s metrics dashboard
|
||||
|
||||
This dashboard visualizes Node and Pod level CPU and memory utilization metrics interleaved with chaos events.
|
||||
This dashboard visualizes Node and Pod level CPU and memory utilization metrics interleaved with chaos events.
|
||||
|
||||
|
||||
## Prerequisites
|
||||
## Prerequisites
|
||||
|
||||
- Node exporter and kube state metrics exporter with service monitor.
|
||||
|
||||
- Litmus event router with service monitor.
|
||||
|
||||
- Prometheus operator and deployment configured for using the service monitors.
|
||||
|
||||
OR
|
||||
|
|
@ -17,7 +14,6 @@ OR
|
|||
|
||||
- Chaos engine name must match the labels used in PromQL for the grafana dashboard.
|
||||
|
||||
|
||||
## Instructions
|
||||
|
||||
- Download the dashboard json file.
|
||||
|
|
@ -32,7 +28,6 @@ OR
|
|||
|
||||
- Tune the PromQL queries to match the labels with engine name and other parameters as per need.
|
||||
|
||||
|
||||
## Screenshot
|
||||
|
||||

|
||||

|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,39 @@
|
|||
# Node and Pod K8s metrics dashboard
|
||||
|
||||
These dashboards visualize Percona SQL server and PMM server metrics interleaved with chaos events.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- mysqld exporter with service monitor and Prometheus operator and deployment configured for using the service monitor.
|
||||
|
||||
- mysqld exporter with Prometheus deployment with job configured for scraping metrics from the service endpoint.
|
||||
|
||||
OR
|
||||
|
||||
- Percona PMM installation.
|
||||
|
||||
- Chaos engine name must match the labels used in PromQL for the grafana dashboard.
|
||||
|
||||
## Instructions
|
||||
|
||||
- Download the dashboard json file.
|
||||
|
||||
- Import the json file into grafana.
|
||||
|
||||

|
||||
|
||||
- Change data source for the dashboard accordingly.
|
||||
|
||||

|
||||
|
||||
- Tune the PromQL queries to match the labels with engine name and other parameters as per need.
|
||||
|
||||
## Screenshot
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
|
@ -6,7 +6,7 @@ This dashboard visualizes Sock Shop application metrics metrics interleaved with
|
|||
|
||||
- Sock shop microservices application deployments with service monitors.
|
||||
|
||||
- Litmus event router and chaos exporter with service monitors.
|
||||
- Litmus chaos exporter with service monitors.
|
||||
|
||||
- Prometheus operator and deployment configured for using the service monitors.
|
||||
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@
|
|||
"type": "grafana",
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"version": "7.2.2"
|
||||
"version": "7.4.3"
|
||||
},
|
||||
{
|
||||
"type": "panel",
|
||||
|
|
@ -40,7 +40,7 @@
|
|||
{
|
||||
"datasource": "${DS_DS_PROMETHEUS}",
|
||||
"enable": true,
|
||||
"expr": "heptio_eventrouter_normal_total{reason=\"ChaosEngineInitialized\", involved_object_namespace=\"litmus\",involved_object_name=\"orders-pod-memory-hog\", involved_object_kind=\"ChaosEngine\"} - on () (heptio_eventrouter_normal_total{reason=\"ChaosEngineCompleted\", involved_object_namespace=\"litmus\",involved_object_name=\"orders-pod-memory-hog\", involved_object_kind=\"ChaosEngine\"} OR on() vector(0))",
|
||||
"expr": "litmuschaos_awaited_experiments{chaosresult_name=\"orders-pod-memory-hog-pod-memory-hog\", chaosresult_namespace=\"litmus\"}",
|
||||
"hide": false,
|
||||
"iconColor": "#F2CC0C",
|
||||
"limit": 100,
|
||||
|
|
@ -56,9 +56,9 @@
|
|||
{
|
||||
"datasource": "${DS_DS_PROMETHEUS}",
|
||||
"enable": true,
|
||||
"expr": "heptio_eventrouter_normal_total{reason=\"ChaosEngineInitialized\",involved_object_name=\"catalogue-pod-cpu-hog\",involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} - on () (heptio_eventrouter_normal_total{reason=\"ChaosEngineCompleted\", involved_object_namespace=\"litmus\",involved_object_name=\"catalogue-pod-cpu-hog\",involved_object_kind=\"ChaosEngine\"} OR on() vector(0))",
|
||||
"expr": "litmuschaos_awaited_experiments{chaosresult_name=\"catalogue-pod-cpu-hog-pod-cpu-hog\", chaosresult_namespace=\"litmus\"}",
|
||||
"hide": false,
|
||||
"iconColor": "#E0B400",
|
||||
"iconColor": "#E02F44",
|
||||
"limit": 100,
|
||||
"name": "catalogue-pod-cpu-hog",
|
||||
"showIn": 0,
|
||||
|
|
@ -71,9 +71,9 @@
|
|||
{
|
||||
"datasource": "${DS_DS_PROMETHEUS}",
|
||||
"enable": true,
|
||||
"expr": "heptio_eventrouter_normal_total{reason=\"ChaosEngineInitialized\", involved_object_namespace=\"litmus\",involved_object_name=\"catalogue-node-cpu-hog\", involved_object_kind=\"ChaosEngine\"} - on () (heptio_eventrouter_normal_total{reason=\"ChaosEngineCompleted\", involved_object_namespace=\"litmus\",involved_object_name=\"catalogue-node-cpu-hog\", involved_object_kind=\"ChaosEngine\"} OR on() vector(0))",
|
||||
"expr": "litmuschaos_awaited_experiments{chaosresult_name=\"catalogue-node-cpu-hog-node-cpu-hog\",chaosresult_namespace=\"litmus\"}",
|
||||
"hide": false,
|
||||
"iconColor": "#E0B400",
|
||||
"iconColor": "#3274D9",
|
||||
"limit": 100,
|
||||
"name": "catalogue-node-cpu-hog",
|
||||
"showIn": 0,
|
||||
|
|
@ -86,9 +86,9 @@
|
|||
{
|
||||
"datasource": "${DS_DS_PROMETHEUS}",
|
||||
"enable": true,
|
||||
"expr": "heptio_eventrouter_normal_total{reason=\"ChaosEngineInitialized\", involved_object_namespace=\"litmus\",involved_object_name=\"orders-node-memory-hog\", involved_object_kind=\"ChaosEngine\"} - on () (heptio_eventrouter_normal_total{reason=\"ChaosEngineCompleted\", involved_object_namespace=\"litmus\",involved_object_name=\"orders-node-memory-hog\", involved_object_kind=\"ChaosEngine\"} OR on() vector(0))",
|
||||
"expr": "litmuschaos_awaited_experiments{chaosresult_name=\"orders-node-memory-hog-node-memory-hog\",chaosresult_namespace=\"litmus\"}",
|
||||
"hide": false,
|
||||
"iconColor": "#E0B400",
|
||||
"iconColor": "#56A64B",
|
||||
"limit": 100,
|
||||
"name": "orders-node-memory-hog",
|
||||
"showIn": 0,
|
||||
|
|
@ -173,74 +173,20 @@
|
|||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.2.2",
|
||||
"pluginVersion": "7.4.3",
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
{
|
||||
"$$hashKey": "object:485",
|
||||
"alias": "catalogue-pod-cpu-hog",
|
||||
"color": "#C4162A",
|
||||
"fillGradient": 5,
|
||||
"stack": "A",
|
||||
"steppedLine": true
|
||||
},
|
||||
{
|
||||
"$$hashKey": "object:486",
|
||||
"alias": "orders-pod-memory-hog",
|
||||
"color": "#C4162A",
|
||||
"fillGradient": 5,
|
||||
"stack": "B",
|
||||
"steppedLine": true
|
||||
},
|
||||
{
|
||||
"$$hashKey": "object:676",
|
||||
"alias": "catalogue-node-cpu-hog",
|
||||
"color": "#C4162A",
|
||||
"fillGradient": 5,
|
||||
"stack": "C",
|
||||
"steppedLine": true
|
||||
},
|
||||
{
|
||||
"$$hashKey": "object:681",
|
||||
"alias": "orders-node-memory-hog",
|
||||
"color": "#C4162A",
|
||||
"fillGradient": 5,
|
||||
"stack": "D",
|
||||
"steppedLine": true
|
||||
}
|
||||
],
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": true,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "heptio_eventrouter_normal_total{reason=\"ChaosInject\",involved_object_name=\"catalogue-pod-cpu-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} - on () (heptio_eventrouter_normal_total{reason=\"ChaosEngineCompleted\",involved_object_name=\"catalogue-pod-cpu-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} OR on() vector(0))",
|
||||
"instant": false,
|
||||
"expr": "litmuschaos_awaited_experiments",
|
||||
"interval": "",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "catalogue-pod-cpu-hog",
|
||||
"legendFormat": "{{chaosresult_name}}",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "heptio_eventrouter_normal_total{reason=\"ChaosInject\",involved_object_name=\"orders-pod-memory-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} - on () (heptio_eventrouter_normal_total{reason=\"ChaosEngineCompleted\",involved_object_name=\"orders-pod-memory-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} OR on() vector(0))",
|
||||
"interval": "",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "orders-pod-memory-hog",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "heptio_eventrouter_normal_total{reason=\"ChaosInject\",involved_object_name=\"catalogue-node-cpu-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} - on () (heptio_eventrouter_normal_total{reason=\"ChaosEngineCompleted\",involved_object_name=\"catalogue-node-cpu-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} OR on() vector(0))",
|
||||
"interval": "",
|
||||
"legendFormat": "catalogue-node-cpu-hog",
|
||||
"refId": "C"
|
||||
},
|
||||
{
|
||||
"expr": "heptio_eventrouter_normal_total{reason=\"ChaosInject\",involved_object_name=\"orders-node-memory-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} - on () (heptio_eventrouter_normal_total{reason=\"ChaosEngineCompleted\",involved_object_name=\"orders-node-memory-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} OR on() vector(0))",
|
||||
"interval": "",
|
||||
"legendFormat": "orders-node-memory-hog",
|
||||
"refId": "D"
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
|
|
@ -278,7 +224,7 @@
|
|||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
"show": false
|
||||
}
|
||||
],
|
||||
"yaxis": {
|
||||
|
|
@ -293,7 +239,6 @@
|
|||
"defaults": {
|
||||
"custom": {},
|
||||
"mappings": [],
|
||||
"nullValueMode": "connected",
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
|
|
@ -331,13 +276,14 @@
|
|||
"values": false
|
||||
},
|
||||
"showThresholdLabels": false,
|
||||
"showThresholdMarkers": true
|
||||
"showThresholdMarkers": true,
|
||||
"text": {}
|
||||
},
|
||||
"pluginVersion": "7.2.2",
|
||||
"pluginVersion": "7.4.3",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(chaosengine_experiments_count{engine_namespace=\"litmus\",job=\"chaos-monitor\"})",
|
||||
"instant": true,
|
||||
"expr": "sum(litmuschaos_passed_experiments + litmuschaos_failed_experiments)",
|
||||
"instant": false,
|
||||
"interval": "",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "",
|
||||
|
|
@ -357,7 +303,6 @@
|
|||
"defaults": {
|
||||
"custom": {},
|
||||
"mappings": [],
|
||||
"nullValueMode": "connected",
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
|
|
@ -395,13 +340,14 @@
|
|||
"values": false
|
||||
},
|
||||
"showThresholdLabels": false,
|
||||
"showThresholdMarkers": true
|
||||
"showThresholdMarkers": true,
|
||||
"text": {}
|
||||
},
|
||||
"pluginVersion": "7.2.2",
|
||||
"pluginVersion": "7.4.3",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(chaosengine_passed_experiments{engine_namespace=\"litmus\",job=\"chaos-monitor\"})",
|
||||
"instant": true,
|
||||
"expr": "sum(litmuschaos_passed_experiments)",
|
||||
"instant": false,
|
||||
"interval": "",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "",
|
||||
|
|
@ -421,7 +367,6 @@
|
|||
"defaults": {
|
||||
"custom": {},
|
||||
"mappings": [],
|
||||
"nullValueMode": "connected",
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
|
|
@ -459,13 +404,14 @@
|
|||
"values": false
|
||||
},
|
||||
"showThresholdLabels": false,
|
||||
"showThresholdMarkers": true
|
||||
"showThresholdMarkers": true,
|
||||
"text": {}
|
||||
},
|
||||
"pluginVersion": "7.2.2",
|
||||
"pluginVersion": "7.4.3",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(chaosengine_failed_experiments{engine_namespace=\"litmus\",job=\"chaos-monitor\"})",
|
||||
"instant": true,
|
||||
"expr": "sum(litmuschaos_failed_experiments)",
|
||||
"instant": false,
|
||||
"interval": "",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "",
|
||||
|
|
@ -483,7 +429,6 @@
|
|||
"defaults": {
|
||||
"custom": {},
|
||||
"mappings": [],
|
||||
"nullValueMode": "connected",
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
|
|
@ -521,12 +466,13 @@
|
|||
"values": false
|
||||
},
|
||||
"showThresholdLabels": false,
|
||||
"showThresholdMarkers": true
|
||||
"showThresholdMarkers": true,
|
||||
"text": {}
|
||||
},
|
||||
"pluginVersion": "7.2.2",
|
||||
"pluginVersion": "7.4.3",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(chaosengine_waiting_experiments{engine_namespace=\"litmus\",job=\"chaos-monitor\"})",
|
||||
"expr": "sum(litmuschaos_awaited_experiments)",
|
||||
"instant": true,
|
||||
"interval": "",
|
||||
"intervalFactor": 3,
|
||||
|
|
@ -592,7 +538,7 @@
|
|||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.2.2",
|
||||
"pluginVersion": "7.4.3",
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
|
|
@ -640,13 +586,13 @@
|
|||
"step": 2
|
||||
},
|
||||
{
|
||||
"expr": "heptio_eventrouter_normal_total{reason=\"ChaosInject\",involved_object_name=\"orders-pod-memory-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} - on () (heptio_eventrouter_normal_total{reason=\"ChaosEngineCompleted\",involved_object_name=\"orders-pod-memory-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} OR on() vector(0))",
|
||||
"expr": "litmuschaos_experiment_chaos_injected_time{chaosresult_name=\"orders-pod-memory-hog-pod-memory-hog\",chaosresult_namespace=\"kubera\"} - on () (litmuschaos_experiment_end_time{chaosresult_name=\"orders-pod-memory-hog-pod-memory-hog\",chaosresult_namespace=\"kubera\"} OR on() vector(0))",
|
||||
"interval": "",
|
||||
"legendFormat": "orders-pod-memory-hog",
|
||||
"refId": "D"
|
||||
},
|
||||
{
|
||||
"expr": "heptio_eventrouter_normal_total{reason=\"ChaosInject\",involved_object_name=\"orders-node-memory-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} - on () (heptio_eventrouter_normal_total{reason=\"ChaosEngineCompleted\",involved_object_name=\"orders-node-memory-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} OR on() vector(0))",
|
||||
"expr": "litmuschaos_experiment_chaos_injected_time{chaosresult_name=\"orders-node-memory-hog-node-memory-hog\",chaosresult_namespace=\"kubera\"} - on () (litmuschaos_experiment_end_time{chaosresult_name=\"orders-node-memory-hog-node-memory-hog\",chaosresult_namespace=\"kubera\"} OR on() vector(0))",
|
||||
"interval": "",
|
||||
"legendFormat": "orders-node-memory-hog",
|
||||
"refId": "F"
|
||||
|
|
@ -735,7 +681,7 @@
|
|||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.2.2",
|
||||
"pluginVersion": "7.4.3",
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
|
|
@ -801,13 +747,13 @@
|
|||
"step": 2
|
||||
},
|
||||
{
|
||||
"expr": "heptio_eventrouter_normal_total{reason=\"ChaosInject\",involved_object_name=\"orders-pod-memory-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} - on () (heptio_eventrouter_normal_total{reason=\"ChaosEngineCompleted\",involved_object_name=\"orders-pod-memory-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} OR on() vector(0))",
|
||||
"expr": "litmuschaos_experiment_chaos_injected_time{chaosresult_name=\"orders-pod-memory-hog-pod-memory-hog\",chaosresult_namespace=\"kubera\"} - on () (litmuschaos_experiment_end_time{chaosresult_name=\"orders-pod-memory-hog-pod-memory-hog\",chaosresult_namespace=\"kubera\"} OR on() vector(0))",
|
||||
"interval": "",
|
||||
"legendFormat": "orders-pod-memory-hog",
|
||||
"refId": "E"
|
||||
},
|
||||
{
|
||||
"expr": "heptio_eventrouter_normal_total{reason=\"ChaosInject\",involved_object_name=\"orders-node-memory-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} - on () (heptio_eventrouter_normal_total{reason=\"ChaosEngineCompleted\",involved_object_name=\"orders-node-memory-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} OR on() vector(0))",
|
||||
"expr": "litmuschaos_experiment_chaos_injected_time{chaosresult_name=\"orders-node-memory-hog-node-memory-hog\",chaosresult_namespace=\"kubera\"} - on () (litmuschaos_experiment_end_time{chaosresult_name=\"orders-node-memory-hog-node-memory-hog\",chaosresult_namespace=\"kubera\"} OR on() vector(0))",
|
||||
"interval": "",
|
||||
"legendFormat": "orders-node-memory-hog",
|
||||
"refId": "G"
|
||||
|
|
@ -911,7 +857,7 @@
|
|||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.2.2",
|
||||
"pluginVersion": "7.4.3",
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
|
|
@ -961,13 +907,13 @@
|
|||
"step": 2
|
||||
},
|
||||
{
|
||||
"expr": "heptio_eventrouter_normal_total{reason=\"ChaosInject\",involved_object_name=\"catalogue-pod-cpu-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} - on () (heptio_eventrouter_normal_total{reason=\"ChaosEngineCompleted\",involved_object_name=\"catalogue-pod-cpu-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} OR on() vector(0))",
|
||||
"expr": "litmuschaos_experiment_chaos_injected_time{chaosresult_name=\"catalogue-pod-cpu-hog-pod-cpu-hog\",chaosresult_namespace=\"kubera\"} - on () (litmuschaos_experiment_end_time{chaosresult_name=\"catalogue-pod-cpu-hog-pod-cpu-hog\",chaosresult_namespace=\"kubera\"} OR on() vector(0))",
|
||||
"interval": "",
|
||||
"legendFormat": "catalogue-pod-cpu-hog",
|
||||
"refId": "C"
|
||||
},
|
||||
{
|
||||
"expr": "heptio_eventrouter_normal_total{reason=\"ChaosInject\",involved_object_name=\"catalogue-node-cpu-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} - on () (heptio_eventrouter_normal_total{reason=\"ChaosEngineCompleted\",involved_object_name=\"catalogue-node-cpu-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} OR on() vector(0))",
|
||||
"expr": "litmuschaos_experiment_chaos_injected_time{chaosresult_name=\"catalogue-node-cpu-hog-node-cpu-hog\",chaosresult_namespace=\"kubera\"} - on () (litmuschaos_experiment_end_time{chaosresult_name=\"catalogue-node-cpu-hog-node-cpu-hog\",chaosresult_namespace=\"kubera\"} OR on() vector(0))",
|
||||
"interval": "",
|
||||
"legendFormat": "catalogue-node-cpu-hog",
|
||||
"refId": "E"
|
||||
|
|
@ -1056,7 +1002,7 @@
|
|||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.2.2",
|
||||
"pluginVersion": "7.4.3",
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
|
|
@ -1124,13 +1070,13 @@
|
|||
"step": 2
|
||||
},
|
||||
{
|
||||
"expr": "heptio_eventrouter_normal_total{reason=\"ChaosInject\",involved_object_name=\"catalogue-pod-cpu-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} - on () (heptio_eventrouter_normal_total{reason=\"ChaosEngineCompleted\",involved_object_name=\"catalogue-pod-cpu-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} OR on() vector(0))",
|
||||
"expr": "litmuschaos_experiment_chaos_injected_time{chaosresult_name=\"catalogue-pod-cpu-hog-pod-cpu-hog\",chaosresult_namespace=\"kubera\"} - on () (litmuschaos_experiment_end_time{chaosresult_name=\"catalogue-pod-cpu-hog-pod-cpu-hog\",chaosresult_namespace=\"kubera\"} OR on() vector(0))",
|
||||
"interval": "",
|
||||
"legendFormat": "catalogue-pod-cpu-hog",
|
||||
"refId": "D"
|
||||
},
|
||||
{
|
||||
"expr": "heptio_eventrouter_normal_total{reason=\"ChaosInject\",involved_object_name=\"catalogue-node-cpu-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} - on () (heptio_eventrouter_normal_total{reason=\"ChaosEngineCompleted\",involved_object_name=\"catalogue-node-cpu-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} OR on() vector(0))",
|
||||
"expr": "litmuschaos_experiment_chaos_injected_time{chaosresult_name=\"catalogue-node-cpu-hog-node-cpu-hog\",chaosresult_namespace=\"kubera\"} - on () (litmuschaos_experiment_end_time{chaosresult_name=\"catalogue-node-cpu-hog-node-cpu-hog\",chaosresult_namespace=\"kubera\"} OR on() vector(0))",
|
||||
"interval": "",
|
||||
"legendFormat": "catalogue-node-cpu-hog",
|
||||
"refId": "F"
|
||||
|
|
@ -1232,7 +1178,7 @@
|
|||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.2.2",
|
||||
"pluginVersion": "7.4.3",
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
|
|
@ -1339,7 +1285,7 @@
|
|||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.2.2",
|
||||
"pluginVersion": "7.4.3",
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
|
|
@ -1464,7 +1410,7 @@
|
|||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.2.2",
|
||||
"pluginVersion": "7.4.3",
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
|
|
@ -1572,7 +1518,7 @@
|
|||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.2.2",
|
||||
"pluginVersion": "7.4.3",
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
|
|
@ -1697,7 +1643,7 @@
|
|||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.2.2",
|
||||
"pluginVersion": "7.4.3",
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
|
|
@ -1800,7 +1746,7 @@
|
|||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.2.2",
|
||||
"pluginVersion": "7.4.3",
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
|
|
@ -1925,7 +1871,7 @@
|
|||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.2.2",
|
||||
"pluginVersion": "7.4.3",
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
|
|
@ -2028,7 +1974,7 @@
|
|||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.2.2",
|
||||
"pluginVersion": "7.4.3",
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
|
|
@ -2153,7 +2099,7 @@
|
|||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.2.2",
|
||||
"pluginVersion": "7.4.3",
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
|
|
@ -2256,7 +2202,7 @@
|
|||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.2.2",
|
||||
"pluginVersion": "7.4.3",
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
|
|
@ -2328,7 +2274,7 @@
|
|||
}
|
||||
],
|
||||
"refresh": "10s",
|
||||
"schemaVersion": 26,
|
||||
"schemaVersion": 27,
|
||||
"style": "dark",
|
||||
"tags": [],
|
||||
"templating": {
|
||||
|
|
@ -2365,5 +2311,5 @@
|
|||
"timezone": "browser",
|
||||
"title": "Sock-Shop Performance",
|
||||
"uid": "sockshopperfornamce",
|
||||
"version": 1
|
||||
"version": 7
|
||||
}
|
||||
|
|
@ -7,11 +7,11 @@ need to set up prometheus and grafana to enable monitoring, it is possible to ju
|
|||
|
||||
# Components
|
||||
|
||||
- [Grafana Dashboards](https://github.com/litmuschaos/litmus/blob/master/monitoring/platforms/kublr/grafana-dashboards)
|
||||
- [Grafana Dashboards](https://github.com/litmuschaos/litmus/blob/master/monitoring/platforms/kublr/grafana-dashboards)
|
||||
|
||||
> Contains chaos interleaved grafana dashboards for various native k8s and application metrics. These dashboards are
|
||||
> modified slightly relative to the dashboards in the generic getting started guide to accomodate for additional labels
|
||||
> Kublr introduces in its centralized monitoring.
|
||||
> Contains chaos interleaved grafana dashboards for various native k8s and application metrics. These dashboards are
|
||||
> modified slightly relative to the dashboards in the generic getting started guide to accomodate for additional labels
|
||||
> Kublr introduces in its centralized monitoring.
|
||||
|
||||
# Demonstration
|
||||
|
||||
|
|
@ -21,62 +21,64 @@ Run chaos experiments and workflows on sock-shop application with grafana dashbo
|
|||
|
||||
### Step-1: Setup Sock-Shop Microservices Application
|
||||
|
||||
- Apply the sock-shop microservices manifests
|
||||
- Apply the sock-shop microservices manifests
|
||||
|
||||
```
|
||||
kubectl apply -f ../../utils/sample-application-under-test/sock-shop/
|
||||
```
|
||||
```
|
||||
kubectl apply -f ../../utils/sample-application-under-test/sock-shop/
|
||||
```
|
||||
|
||||
- Wait until all services are up. Verify via `kubectl get pods -n sock-shop`
|
||||
- Wait until all services are up. Verify via `kubectl get pods -n sock-shop`
|
||||
|
||||
### Step-2: Setup the LitmusChaos Infrastructure
|
||||
|
||||
- Install the litmus chaos operator and CRDs
|
||||
- Install the litmus chaos operator and CRDs
|
||||
|
||||
```
|
||||
kubectl apply -f https://litmuschaos.github.io/litmus/litmus-operator-v1.9.0.yaml
|
||||
```
|
||||
```
|
||||
kubectl apply -f https://litmuschaos.github.io/litmus/litmus-operator-v1.9.0.yaml
|
||||
```
|
||||
|
||||
- Install the litmus-admin serviceaccount for centralized/admin-mode of chaos execution
|
||||
- Install the litmus-admin serviceaccount for centralized/admin-mode of chaos execution
|
||||
|
||||
```
|
||||
kubectl apply -f https://litmuschaos.github.io/litmus/litmus-admin-rbac.yaml
|
||||
```
|
||||
```
|
||||
kubectl apply -f https://litmuschaos.github.io/litmus/litmus-admin-rbac.yaml
|
||||
```
|
||||
|
||||
- Install the chaos experiments in admin(litmus) namespace
|
||||
- Install the chaos experiments in admin(litmus) namespace
|
||||
|
||||
```
|
||||
kubectl apply -f https://hub.litmuschaos.io/api/chaos/1.9.0?file=charts/generic/experiments.yaml -n litmus
|
||||
```
|
||||
```
|
||||
kubectl apply -f https://hub.litmuschaos.io/api/chaos/1.9.0?file=charts/generic/experiments.yaml -n litmus
|
||||
```
|
||||
|
||||
### Step-3: Configure Sock-shop application and Litmus for the Kublr centralized monitoring Infrastructure
|
||||
|
||||
- Deploy Litmus monitoring components
|
||||
- Deploy Litmus monitoring components
|
||||
|
||||
```
|
||||
kubectl -n litmus apply -f ../../utils/metrics-exporters/litmus-metrics/chaos-exporter/
|
||||
kubectl -n litmus apply -f ../../utils/metrics-exporters/litmus-metrics/litmus-event-router/
|
||||
```
|
||||
```
|
||||
kubectl -n litmus apply -f ../../utils/metrics-exporters/litmus-metrics/chaos-exporter/
|
||||
kubectl -n litmus apply -f ../../utils/metrics-exporters/litmus-metrics/litmus-event-router/
|
||||
```
|
||||
|
||||
- Enable Litmus metrics collection on the Litmus monitoring components
|
||||
- Enable Litmus metrics collection on the Litmus monitoring components
|
||||
|
||||
```
|
||||
```
|
||||
|
||||
kubectl annotate svc -n litmus --overwrite \
|
||||
chaos-monitor chaos-operator-metrics litmus-eventrouter \
|
||||
'prometheus.io/scrape=true'
|
||||
kubectl annotate svc -n litmus --overwrite \
|
||||
chaos-exporter chaos-operator-metrics litmus-eventrouter \
|
||||
'prometheus.io/scrape=true'
|
||||
|
||||
- Enable custom metrics collection on the Sock-shop application
|
||||
```
|
||||
|
||||
```
|
||||
kubectl annotate svc -n sock-shop --overwrite \
|
||||
carts catalogue front-end orders payment shipping user \
|
||||
'prometheus.io/scrape=true'
|
||||
```
|
||||
- Enable custom metrics collection on the Sock-shop application
|
||||
|
||||
- Import the grafana dashboards choosing `prometheus` as data source.
|
||||
```
|
||||
kubectl annotate svc -n sock-shop --overwrite \
|
||||
carts catalogue front-end orders payment shipping user \
|
||||
'prometheus.io/scrape=true'
|
||||
```
|
||||
|
||||

|
||||
- Import the grafana dashboards choosing `prometheus` as data source.
|
||||
|
||||

|
||||
|
||||
- Import the grafana dashboard "Sock-Shop Performance" provided [here](https://raw.githubusercontent.com/litmuschaos/litmus/master/monitoring/platforms/kublr/grafana-dashboards/kubernetes/Sock-Shop-Performance-Under-Chaos.json)
|
||||
|
||||
|
|
@ -84,131 +86,134 @@ Run chaos experiments and workflows on sock-shop application with grafana dashbo
|
|||
|
||||
### Step-4: Execute the Chaos Experiments
|
||||
|
||||
- For the sake of illustration, let us execute node and pod level, CPU hog experiments on the `catalogue` microservice & Memory Hog
|
||||
experiments on the `orders` microservice in a staggered manner.
|
||||
- For the sake of illustration, let us execute node and pod level, CPU hog experiments on the `catalogue` microservice & Memory Hog
|
||||
experiments on the `orders` microservice in a staggered manner.
|
||||
|
||||
```
|
||||
kubectl apply -f ../../utils/sample-chaos-injectors/chaos-experiments/catalogue/catalogue-pod-cpu-hog.yaml
|
||||
```
|
||||
```
|
||||
kubectl apply -f ../../utils/sample-chaos-injectors/chaos-experiments/catalogue/catalogue-pod-cpu-hog.yaml
|
||||
```
|
||||
|
||||
Wait for ~60s
|
||||
Wait for ~60s
|
||||
|
||||
```
|
||||
kubectl apply -f ../../utils/sample-chaos-injectors/chaos-experiments/orders/orders-pod-memory-hog.yaml
|
||||
```
|
||||
```
|
||||
kubectl apply -f ../../utils/sample-chaos-injectors/chaos-experiments/orders/orders-pod-memory-hog.yaml
|
||||
```
|
||||
|
||||
Wait for ~60s
|
||||
Wait for ~60s
|
||||
|
||||
```
|
||||
kubectl apply -f ../../utils/sample-chaos-injectors/chaos-experiments/catalogue/catalogue-node-cpu-hog.yaml
|
||||
```
|
||||
```
|
||||
kubectl apply -f ../../utils/sample-chaos-injectors/chaos-experiments/catalogue/catalogue-node-cpu-hog.yaml
|
||||
```
|
||||
|
||||
Wait for ~60s
|
||||
Wait for ~60s
|
||||
|
||||
```
|
||||
kubectl apply -f ../../utils/sample-chaos-injectors/chaos-experiments/orders/orders-node-memory-hog.yaml
|
||||
```
|
||||
```
|
||||
kubectl apply -f ../../utils/sample-chaos-injectors/chaos-experiments/orders/orders-node-memory-hog.yaml
|
||||
```
|
||||
|
||||
- Verify execution of chaos experiments
|
||||
- Verify execution of chaos experiments
|
||||
|
||||
```
|
||||
kubectl describe chaosengine catalogue-pod-cpu-hog -n litmus
|
||||
kubectl describe chaosengine orders-pod-memory-hog -n litmus
|
||||
kubectl describe chaosengine catalogue-node-cpu-hog -n litmus
|
||||
kubectl describe chaosengine orders-node-memory-hog -n litmus
|
||||
```
|
||||
```
|
||||
kubectl describe chaosengine catalogue-pod-cpu-hog -n litmus
|
||||
kubectl describe chaosengine orders-pod-memory-hog -n litmus
|
||||
kubectl describe chaosengine catalogue-node-cpu-hog -n litmus
|
||||
kubectl describe chaosengine orders-node-memory-hog -n litmus
|
||||
```
|
||||
|
||||
### Step-5: Visualize Chaos Impact
|
||||
|
||||
- Observe the impact of chaos injection through increased Latency & reduced QPS (queries per second) on the microservices
|
||||
under test.
|
||||
- Observe the impact of chaos injection through increased Latency & reduced QPS (queries per second) on the microservices
|
||||
under test.
|
||||
|
||||

|
||||

|
||||
|
||||

|
||||

|
||||
|
||||
### Step-6 (optional): Inject continous chaos using Argo CD.
|
||||
|
||||
- Install Chaos workflow infrastructure.
|
||||
- Install Chaos workflow infrastructure.
|
||||
|
||||
- Create argo namespace
|
||||
|
||||
```
|
||||
kubectl create ns argo
|
||||
```
|
||||
|
||||
- Create the CRDs, workflow controller deployment with associated RBAC.
|
||||
|
||||
```
|
||||
kubectl apply -f https://raw.githubusercontent.com/argoproj/argo/stable/manifests/install.yaml -n argo
|
||||
```
|
||||
|
||||
- Install the argo CLI on the test harness machine (where the kubeconfig is available)
|
||||
|
||||
```bash
|
||||
# Download the binary
|
||||
curl -sLO https://github.com/argoproj/argo/releases/download/v2.11.0/argo-linux-amd64.gz
|
||||
|
||||
# Unzip
|
||||
gunzip argo-linux-amd64.gz
|
||||
|
||||
# Make binary executable
|
||||
chmod +x argo-linux-amd64
|
||||
|
||||
# Move binary to path
|
||||
mv ./argo-linux-amd64 /usr/local/bin/argo
|
||||
|
||||
# Test installation
|
||||
argo version
|
||||
```
|
||||
|
||||
- Create the Argo Access ServiceAccount
|
||||
- Create argo namespace
|
||||
|
||||
```
|
||||
kubectl apply -f https://raw.githubusercontent.com/litmuschaos/chaos-workflows/master/Argo/argo-access.yaml -n litmus
|
||||
kubectl create ns argo
|
||||
```
|
||||
|
||||
- Run one or more of the litmuschaos experiments as Chaos workflows using argo CLI or kubectl.
|
||||
- Create the CRDs, workflow controller deployment with associated RBAC.
|
||||
|
||||
> Node CPU hog
|
||||
```bash
|
||||
argo cron create ../../utils/sample-chaos-injectors/chaos-workflows-with-argo-CD/catalogue/catalogue-node-cpu-hog-workflow.yaml -n litmus
|
||||
```
|
||||
kubectl apply -f https://raw.githubusercontent.com/argoproj/argo/stable/manifests/install.yaml -n argo
|
||||
```
|
||||
|
||||
> Node memory hog
|
||||
```bash
|
||||
argo cron create ../../utils/sample-chaos-injectors/chaos-workflows-with-argo-CD/orders/orders-node-memory-hog-workflow.yaml -n litmus
|
||||
```
|
||||
|
||||
> Pod CPU hog
|
||||
- Install the argo CLI on the test harness machine (where the kubeconfig is available)
|
||||
|
||||
```bash
|
||||
kubectl apply -f ../../utils/sample-chaos-injectors/chaos-workflows-with-argo-CD/catalogue/catalogue-pod-cpu-hog-workflow.yaml -n litmus
|
||||
# Download the binary
|
||||
curl -sLO https://github.com/argoproj/argo/releases/download/v2.11.0/argo-linux-amd64.gz
|
||||
|
||||
# Unzip
|
||||
gunzip argo-linux-amd64.gz
|
||||
|
||||
# Make binary executable
|
||||
chmod +x argo-linux-amd64
|
||||
|
||||
# Move binary to path
|
||||
mv ./argo-linux-amd64 /usr/local/bin/argo
|
||||
|
||||
# Test installation
|
||||
argo version
|
||||
```
|
||||
|
||||
> Pod memory hog
|
||||
```bash
|
||||
kubectl apply -f ../../utils/sample-chaos-injectors/chaos-workflows-with-argo-CD/orders/orders-pod-memory-hog-workflow.yaml -n litmus
|
||||
```
|
||||
- Create the Argo Access ServiceAccount
|
||||
|
||||
- Visualize the Chaos cron workflow through argo UI by obtaining Node port or Load Balancer IP.
|
||||
```
|
||||
kubectl apply -f https://raw.githubusercontent.com/litmuschaos/chaos-workflows/master/Argo/argo-access.yaml -n litmus
|
||||
```
|
||||
|
||||
```
|
||||
kubectl port-forward svc/argo-server -n argo 2746
|
||||
```
|
||||
- Run one or more of the litmuschaos experiments as Chaos workflows using argo CLI or kubectl.
|
||||
|
||||
OR
|
||||
> Node CPU hog
|
||||
|
||||
```
|
||||
kubectl patch svc argo-server -n argo -p '{"spec": {"type": "NodePort"}}'
|
||||
```
|
||||
```bash
|
||||
argo cron create ../../utils/sample-chaos-injectors/chaos-workflows-with-argo-CD/catalogue/catalogue-node-cpu-hog-workflow.yaml -n litmus
|
||||
```
|
||||
|
||||
OR
|
||||
> Node memory hog
|
||||
|
||||
```
|
||||
kubectl patch svc argo-server -n argo -p '{"spec": {"type": "LoadBalancer"}}'
|
||||
```
|
||||
```bash
|
||||
argo cron create ../../utils/sample-chaos-injectors/chaos-workflows-with-argo-CD/orders/orders-node-memory-hog-workflow.yaml -n litmus
|
||||
```
|
||||
|
||||

|
||||
> Pod CPU hog
|
||||
|
||||

|
||||
```bash
|
||||
kubectl apply -f ../../utils/sample-chaos-injectors/chaos-workflows-with-argo-CD/catalogue/catalogue-pod-cpu-hog-workflow.yaml -n litmus
|
||||
```
|
||||
|
||||
> Pod memory hog
|
||||
|
||||
```bash
|
||||
kubectl apply -f ../../utils/sample-chaos-injectors/chaos-workflows-with-argo-CD/orders/orders-pod-memory-hog-workflow.yaml -n litmus
|
||||
```
|
||||
|
||||
- Visualize the Chaos cron workflow through argo UI by obtaining Node port or Load Balancer IP.
|
||||
|
||||
```
|
||||
kubectl port-forward svc/argo-server -n argo 2746
|
||||
```
|
||||
|
||||
OR
|
||||
|
||||
```
|
||||
kubectl patch svc argo-server -n argo -p '{"spec": {"type": "NodePort"}}'
|
||||
```
|
||||
|
||||
OR
|
||||
|
||||
```
|
||||
kubectl patch svc argo-server -n argo -p '{"spec": {"type": "LoadBalancer"}}'
|
||||
```
|
||||
|
||||

|
||||
|
||||

|
||||
|
|
|
|||
Binary file not shown.
|
After Width: | Height: | Size: 1.0 MiB |
Binary file not shown.
|
After Width: | Height: | Size: 1.1 MiB |
Binary file not shown.
|
After Width: | Height: | Size: 597 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 581 KiB |
|
|
@ -3,13 +3,13 @@ kind: ClusterRoleBinding
|
|||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/version: 1.9.5
|
||||
app.kubernetes.io/version: latest
|
||||
name: kube-state-metrics
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: kube-state-metrics
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: kube-state-metrics
|
||||
namespace: monitoring
|
||||
- kind: ServiceAccount
|
||||
name: kube-state-metrics
|
||||
namespace: monitoring
|
||||
|
|
|
|||
|
|
@ -3,115 +3,116 @@ kind: ClusterRole
|
|||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/version: 1.9.5
|
||||
app.kubernetes.io/version: latest
|
||||
name: kube-state-metrics
|
||||
rules:
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- configmaps
|
||||
- secrets
|
||||
- nodes
|
||||
- pods
|
||||
- services
|
||||
- resourcequotas
|
||||
- replicationcontrollers
|
||||
- limitranges
|
||||
- persistentvolumeclaims
|
||||
- persistentvolumes
|
||||
- namespaces
|
||||
- endpoints
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- extensions
|
||||
resources:
|
||||
- daemonsets
|
||||
- deployments
|
||||
- replicasets
|
||||
- ingresses
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- apps
|
||||
resources:
|
||||
- statefulsets
|
||||
- daemonsets
|
||||
- deployments
|
||||
- replicasets
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- batch
|
||||
resources:
|
||||
- cronjobs
|
||||
- jobs
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- autoscaling
|
||||
resources:
|
||||
- horizontalpodautoscalers
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- authentication.k8s.io
|
||||
resources:
|
||||
- tokenreviews
|
||||
verbs:
|
||||
- create
|
||||
- apiGroups:
|
||||
- authorization.k8s.io
|
||||
resources:
|
||||
- subjectaccessreviews
|
||||
verbs:
|
||||
- create
|
||||
- apiGroups:
|
||||
- policy
|
||||
resources:
|
||||
- poddisruptionbudgets
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- certificates.k8s.io
|
||||
resources:
|
||||
- certificatesigningrequests
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- storage.k8s.io
|
||||
resources:
|
||||
- storageclasses
|
||||
- volumeattachments
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- admissionregistration.k8s.io
|
||||
resources:
|
||||
- mutatingwebhookconfigurations
|
||||
- validatingwebhookconfigurations
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- networking.k8s.io
|
||||
resources:
|
||||
- networkpolicies
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- coordination.k8s.io
|
||||
resources:
|
||||
- leases
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- configmaps
|
||||
- secrets
|
||||
- nodes
|
||||
- pods
|
||||
- services
|
||||
- resourcequotas
|
||||
- replicationcontrollers
|
||||
- limitranges
|
||||
- persistentvolumeclaims
|
||||
- persistentvolumes
|
||||
- namespaces
|
||||
- endpoints
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- extensions
|
||||
- networking.k8s.io
|
||||
resources:
|
||||
- daemonsets
|
||||
- deployments
|
||||
- replicasets
|
||||
- ingresses
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- apps
|
||||
resources:
|
||||
- statefulsets
|
||||
- daemonsets
|
||||
- deployments
|
||||
- replicasets
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- batch
|
||||
resources:
|
||||
- cronjobs
|
||||
- jobs
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- autoscaling
|
||||
resources:
|
||||
- horizontalpodautoscalers
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- authentication.k8s.io
|
||||
resources:
|
||||
- tokenreviews
|
||||
verbs:
|
||||
- create
|
||||
- apiGroups:
|
||||
- authorization.k8s.io
|
||||
resources:
|
||||
- subjectaccessreviews
|
||||
verbs:
|
||||
- create
|
||||
- apiGroups:
|
||||
- policy
|
||||
resources:
|
||||
- poddisruptionbudgets
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- certificates.k8s.io
|
||||
resources:
|
||||
- certificatesigningrequests
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- storage.k8s.io
|
||||
resources:
|
||||
- storageclasses
|
||||
- volumeattachments
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- admissionregistration.k8s.io
|
||||
resources:
|
||||
- mutatingwebhookconfigurations
|
||||
- validatingwebhookconfigurations
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- networking.k8s.io
|
||||
resources:
|
||||
- networkpolicies
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- coordination.k8s.io
|
||||
resources:
|
||||
- leases
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ kind: Deployment
|
|||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/version: 1.9.5
|
||||
app.kubernetes.io/version: latest
|
||||
name: kube-state-metrics
|
||||
namespace: monitoring
|
||||
spec:
|
||||
|
|
@ -15,42 +15,30 @@ spec:
|
|||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/version: 1.9.5
|
||||
app.kubernetes.io/version: latest
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --host=127.0.0.1
|
||||
- --port=8081
|
||||
- --telemetry-host=127.0.0.1
|
||||
- --telemetry-port=8082
|
||||
image: quay.io/coreos/kube-state-metrics:v1.9.5
|
||||
name: kube-state-metrics
|
||||
securityContext:
|
||||
runAsUser: 65534
|
||||
- args:
|
||||
- --logtostderr
|
||||
- --secure-listen-address=:8443
|
||||
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256
|
||||
- --upstream=http://127.0.0.1:8081/
|
||||
image: quay.io/coreos/kube-rbac-proxy:v0.4.1
|
||||
name: kube-rbac-proxy-main
|
||||
ports:
|
||||
- containerPort: 8443
|
||||
name: https-main
|
||||
securityContext:
|
||||
runAsUser: 65534
|
||||
- args:
|
||||
- --logtostderr
|
||||
- --secure-listen-address=:9443
|
||||
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256
|
||||
- --upstream=http://127.0.0.1:8082/
|
||||
image: quay.io/coreos/kube-rbac-proxy:v0.4.1
|
||||
name: kube-rbac-proxy-self
|
||||
ports:
|
||||
- containerPort: 9443
|
||||
name: https-self
|
||||
securityContext:
|
||||
runAsUser: 65534
|
||||
- image: k8s.gcr.io/kube-state-metrics/kube-state-metrics:v2.0.0-beta
|
||||
securityContext:
|
||||
runAsUser: 65534
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: 8080
|
||||
initialDelaySeconds: 5
|
||||
timeoutSeconds: 5
|
||||
name: kube-state-metrics
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
name: http-metrics
|
||||
- containerPort: 8081
|
||||
name: telemetry
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /
|
||||
port: 8081
|
||||
initialDelaySeconds: 5
|
||||
timeoutSeconds: 5
|
||||
nodeSelector:
|
||||
kubernetes.io/os: linux
|
||||
serviceAccountName: kube-state-metrics
|
||||
serviceAccountName: kube-state-metrics
|
||||
|
|
|
|||
|
|
@ -3,6 +3,6 @@ kind: ServiceAccount
|
|||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/version: 1.9.5
|
||||
app.kubernetes.io/version: latest
|
||||
name: kube-state-metrics
|
||||
namespace: monitoring
|
||||
namespace: monitoring
|
||||
|
|
|
|||
|
|
@ -3,30 +3,30 @@ kind: ServiceMonitor
|
|||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/version: 1.9.5
|
||||
app.kubernetes.io/version: latest
|
||||
k8s-app: kube-state-metrics
|
||||
name: kube-state-metrics
|
||||
namespace: monitoring
|
||||
spec:
|
||||
endpoints:
|
||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
honorLabels: true
|
||||
interval: 30s
|
||||
port: https-main
|
||||
relabelings:
|
||||
- action: labeldrop
|
||||
regex: (pod|service|endpoint|namespace)
|
||||
scheme: https
|
||||
scrapeTimeout: 30s
|
||||
tlsConfig:
|
||||
insecureSkipVerify: true
|
||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
interval: 30s
|
||||
port: https-self
|
||||
scheme: https
|
||||
tlsConfig:
|
||||
insecureSkipVerify: true
|
||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
honorLabels: true
|
||||
interval: 30s
|
||||
port: http-metrics
|
||||
relabelings:
|
||||
- action: labeldrop
|
||||
regex: (pod|service|endpoint|namespace)
|
||||
scheme: http
|
||||
scrapeTimeout: 30s
|
||||
tlsConfig:
|
||||
insecureSkipVerify: true
|
||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
interval: 30s
|
||||
port: telemetry
|
||||
scheme: http
|
||||
tlsConfig:
|
||||
insecureSkipVerify: true
|
||||
jobLabel: app.kubernetes.io/name
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
|
|
|
|||
|
|
@ -3,17 +3,17 @@ kind: Service
|
|||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/version: 1.9.5
|
||||
app.kubernetes.io/version: latest
|
||||
name: kube-state-metrics
|
||||
namespace: monitoring
|
||||
spec:
|
||||
clusterIP: None
|
||||
ports:
|
||||
- name: https-main
|
||||
port: 8443
|
||||
targetPort: https-main
|
||||
- name: https-self
|
||||
port: 9443
|
||||
targetPort: https-self
|
||||
- name: http-metrics
|
||||
port: 8080
|
||||
targetPort: http-metrics
|
||||
- name: telemetry
|
||||
port: 8081
|
||||
targetPort: telemetry
|
||||
selector:
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
|
|
|
|||
|
|
@ -3,39 +3,43 @@ apiVersion: apps/v1
|
|||
kind: Deployment
|
||||
metadata:
|
||||
labels:
|
||||
app: chaos-monitor
|
||||
name: chaos-monitor
|
||||
app: chaos-exporter
|
||||
name: chaos-exporter
|
||||
namespace: litmus
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chaos-monitor
|
||||
app: chaos-exporter
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: chaos-monitor
|
||||
app: chaos-exporter
|
||||
spec:
|
||||
containers:
|
||||
- image: litmuschaos/chaos-exporter:ci
|
||||
imagePullPolicy: Always
|
||||
name: chaos-exporter
|
||||
serviceAccount: litmus
|
||||
- image: litmuschaos/chaos-exporter:1.13.0
|
||||
imagePullPolicy: Always
|
||||
name: chaos-exporter
|
||||
# uncomment the following lines to use the litmuschaos exporter for monitoring the chaos events and chaosresults for a selected namespace
|
||||
# env:
|
||||
# - name: WATCH_NAMESPACE
|
||||
# value: "litmus"
|
||||
# to use with litmus portal in cluster scope change service account name to 'litmus-cluster-scope' and for using litmus portal in namespaced scope change to 'litmus-namespace-scope'
|
||||
serviceAccountName: litmus
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
app: chaos-monitor
|
||||
name: chaos-monitor
|
||||
app: chaos-exporter
|
||||
name: chaos-exporter
|
||||
namespace: litmus
|
||||
spec:
|
||||
ports:
|
||||
- port: 8080
|
||||
name: tcp
|
||||
protocol: TCP
|
||||
targetPort: 8080
|
||||
- port: 8080
|
||||
name: tcp
|
||||
protocol: TCP
|
||||
targetPort: 8080
|
||||
selector:
|
||||
app: chaos-monitor
|
||||
app: chaos-exporter
|
||||
type: ClusterIP
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: chaos-monitor
|
||||
namespace: litmus
|
||||
name: chaos-exporter
|
||||
namespace: litmus
|
||||
|
|
|
|||
|
|
@ -9,10 +9,10 @@ spec:
|
|||
jobLabel: app
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chaos-monitor
|
||||
app: chaos-exporter
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- litmus
|
||||
- litmus
|
||||
endpoints:
|
||||
- port: tcp
|
||||
interval: 1s
|
||||
- port: tcp
|
||||
interval: 1s
|
||||
|
|
|
|||
|
|
@ -1,13 +0,0 @@
|
|||
apiVersion: v1
|
||||
data:
|
||||
config.json: |-
|
||||
{
|
||||
"sink": "http",
|
||||
"httpSinkUrl": "http://localhost:8080",
|
||||
"httpSinkBufferSize": 1500,
|
||||
"httpSinkDiscardMessages": true
|
||||
}
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: litmus-eventrouter-http-cm
|
||||
namespace: litmus
|
||||
|
|
@ -1,51 +0,0 @@
|
|||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
labels:
|
||||
app: litmus-eventrouter
|
||||
name: litmus-eventrouter
|
||||
namespace: litmus
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: litmus-eventrouter
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: litmus-eventrouter
|
||||
spec:
|
||||
containers:
|
||||
- image: containership/eventrouter
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: litmus-eventrouter
|
||||
volumeMounts:
|
||||
- mountPath: /etc/eventrouter
|
||||
name: config-volume
|
||||
serviceAccount: litmus
|
||||
serviceAccountName: litmus
|
||||
volumes:
|
||||
- configMap:
|
||||
defaultMode: 420
|
||||
name: litmus-eventrouter-http-cm
|
||||
name: config-volume
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
app: litmus-eventrouter
|
||||
name: litmus-eventrouter
|
||||
namespace: litmus
|
||||
spec:
|
||||
ports:
|
||||
- nodePort: 31399
|
||||
name: web
|
||||
port: 8080
|
||||
protocol: TCP
|
||||
targetPort: 8080
|
||||
selector:
|
||||
app: litmus-eventrouter
|
||||
sessionAffinity: None
|
||||
type: NodePort
|
||||
|
||||
|
|
@ -1,5 +0,0 @@
|
|||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: litmus-eventrouter
|
||||
namespace: litmus
|
||||
|
|
@ -1,18 +0,0 @@
|
|||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: litmus-event-router
|
||||
labels:
|
||||
k8s-app: litmus-event-router
|
||||
namespace: litmus
|
||||
spec:
|
||||
jobLabel: app
|
||||
selector:
|
||||
matchLabels:
|
||||
app: litmus-eventrouter
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- litmus
|
||||
endpoints:
|
||||
- port: web
|
||||
interval: 1s
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: mysql-exporter
|
||||
namespace: monitoring
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
k8s-app: mysql-exporter
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
k8s-app: mysql-exporter
|
||||
name: mysql-exporter
|
||||
spec:
|
||||
containers:
|
||||
- image: prom/mysqld-exporter:latest
|
||||
imagePullPolicy: "Always"
|
||||
env:
|
||||
- name: DATA_SOURCE_NAME
|
||||
value: "root:root_password@(cluster1-haproxy.pxc.svc.cluster.local:3306)/"
|
||||
args:
|
||||
- "--collect.info_schema.processlist"
|
||||
- "--collect.info_schema.innodb_metrics"
|
||||
- "--collect.info_schema.tablestats"
|
||||
- "--collect.info_schema.userstats"
|
||||
- "--collect.engine_innodb_status"
|
||||
- "--collect.slave_hosts"
|
||||
name: mysql-exporter
|
||||
ports:
|
||||
- containerPort: 9104
|
||||
name: mysql-metrics
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: mysql-exporter
|
||||
namespace: monitoring
|
||||
|
|
@ -0,0 +1,22 @@
|
|||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: mysql-exporter
|
||||
labels:
|
||||
k8s-app: mysql-exporter
|
||||
namespace: monitoring
|
||||
spec:
|
||||
jobLabel: k8s-app
|
||||
selector:
|
||||
matchLabels:
|
||||
k8s-app: mysql-exporter
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- monitoring
|
||||
endpoints:
|
||||
- port: mysql-metrics
|
||||
scheme: http
|
||||
interval: 30s
|
||||
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
tlsConfig:
|
||||
insecureSkipVerify: true
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
k8s-app: mysql-exporter
|
||||
name: mysql-exporter
|
||||
namespace: monitoring
|
||||
spec:
|
||||
ports:
|
||||
- name: mysql-metrics
|
||||
port: 9104
|
||||
protocol: TCP
|
||||
targetPort: 9104
|
||||
selector:
|
||||
k8s-app: mysql-exporter
|
||||
|
|
@ -3,7 +3,7 @@ kind: DaemonSet
|
|||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: node-exporter
|
||||
app.kubernetes.io/version: v0.18.1
|
||||
app.kubernetes.io/version: latest
|
||||
name: node-exporter
|
||||
namespace: monitoring
|
||||
spec:
|
||||
|
|
@ -14,60 +14,60 @@ spec:
|
|||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: node-exporter
|
||||
app.kubernetes.io/version: v0.18.1
|
||||
app.kubernetes.io/version: latest
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --web.listen-address=127.0.0.1:9100
|
||||
- --path.procfs=/host/proc
|
||||
- --path.sysfs=/host/sys
|
||||
- --path.rootfs=/host/root
|
||||
- --no-collector.wifi
|
||||
- --no-collector.hwmon
|
||||
- --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/)
|
||||
image: quay.io/prometheus/node-exporter:v0.18.1
|
||||
name: node-exporter
|
||||
resources:
|
||||
limits:
|
||||
cpu: 250m
|
||||
memory: 180Mi
|
||||
requests:
|
||||
cpu: 102m
|
||||
memory: 180Mi
|
||||
volumeMounts:
|
||||
- mountPath: /host/proc
|
||||
name: proc
|
||||
readOnly: false
|
||||
- mountPath: /host/sys
|
||||
name: sys
|
||||
readOnly: false
|
||||
- mountPath: /host/root
|
||||
mountPropagation: HostToContainer
|
||||
name: root
|
||||
readOnly: true
|
||||
- args:
|
||||
- --logtostderr
|
||||
- --secure-listen-address=[$(IP)]:9100
|
||||
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256
|
||||
- --upstream=http://127.0.0.1:9100/
|
||||
env:
|
||||
- name: IP
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: status.podIP
|
||||
image: quay.io/coreos/kube-rbac-proxy:v0.4.1
|
||||
name: kube-rbac-proxy
|
||||
ports:
|
||||
- containerPort: 9100
|
||||
hostPort: 9100
|
||||
name: https
|
||||
resources:
|
||||
limits:
|
||||
cpu: 20m
|
||||
memory: 40Mi
|
||||
requests:
|
||||
cpu: 10m
|
||||
memory: 20Mi
|
||||
- args:
|
||||
- --web.listen-address=127.0.0.1:9100
|
||||
- --path.procfs=/host/proc
|
||||
- --path.sysfs=/host/sys
|
||||
- --path.rootfs=/host/root
|
||||
- --no-collector.wifi
|
||||
- --no-collector.hwmon
|
||||
- --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/)
|
||||
image: quay.io/prometheus/node-exporter:latest
|
||||
name: node-exporter
|
||||
resources:
|
||||
limits:
|
||||
cpu: 250m
|
||||
memory: 180Mi
|
||||
requests:
|
||||
cpu: 102m
|
||||
memory: 180Mi
|
||||
volumeMounts:
|
||||
- mountPath: /host/proc
|
||||
name: proc
|
||||
readOnly: false
|
||||
- mountPath: /host/sys
|
||||
name: sys
|
||||
readOnly: false
|
||||
- mountPath: /host/root
|
||||
mountPropagation: HostToContainer
|
||||
name: root
|
||||
readOnly: true
|
||||
- args:
|
||||
- --logtostderr
|
||||
- --secure-listen-address=[$(IP)]:9100
|
||||
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256
|
||||
- --upstream=http://127.0.0.1:9100/
|
||||
env:
|
||||
- name: IP
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: status.podIP
|
||||
image: quay.io/coreos/kube-rbac-proxy:v0.5.0
|
||||
name: kube-rbac-proxy
|
||||
ports:
|
||||
- containerPort: 9100
|
||||
hostPort: 9100
|
||||
name: https
|
||||
resources:
|
||||
limits:
|
||||
cpu: 20m
|
||||
memory: 40Mi
|
||||
requests:
|
||||
cpu: 10m
|
||||
memory: 20Mi
|
||||
hostNetwork: true
|
||||
hostPID: true
|
||||
nodeSelector:
|
||||
|
|
@ -77,14 +77,14 @@ spec:
|
|||
runAsUser: 65534
|
||||
serviceAccountName: node-exporter
|
||||
tolerations:
|
||||
- operator: Exists
|
||||
- operator: Exists
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /proc
|
||||
name: proc
|
||||
- hostPath:
|
||||
path: /sys
|
||||
name: sys
|
||||
- hostPath:
|
||||
path: /
|
||||
name: root
|
||||
- hostPath:
|
||||
path: /proc
|
||||
name: proc
|
||||
- hostPath:
|
||||
path: /sys
|
||||
name: sys
|
||||
- hostPath:
|
||||
path: /
|
||||
name: root
|
||||
|
|
|
|||
|
|
@ -3,26 +3,26 @@ kind: ServiceMonitor
|
|||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: node-exporter
|
||||
app.kubernetes.io/version: v0.18.1
|
||||
app.kubernetes.io/version: latest
|
||||
k8s-app: node-exporter
|
||||
name: node-exporter
|
||||
namespace: monitoring
|
||||
spec:
|
||||
endpoints:
|
||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
interval: 15s
|
||||
port: https
|
||||
relabelings:
|
||||
- action: replace
|
||||
regex: (.*)
|
||||
replacement: $1
|
||||
sourceLabels:
|
||||
- __meta_kubernetes_pod_node_name
|
||||
targetLabel: instance
|
||||
scheme: https
|
||||
tlsConfig:
|
||||
insecureSkipVerify: true
|
||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
interval: 15s
|
||||
port: https
|
||||
relabelings:
|
||||
- action: replace
|
||||
regex: (.*)
|
||||
replacement: $1
|
||||
sourceLabels:
|
||||
- __meta_kubernetes_pod_node_name
|
||||
targetLabel: instance
|
||||
scheme: https
|
||||
tlsConfig:
|
||||
insecureSkipVerify: true
|
||||
jobLabel: app.kubernetes.io/name
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: node-exporter
|
||||
app.kubernetes.io/name: node-exporter
|
||||
|
|
|
|||
|
|
@ -3,14 +3,14 @@ kind: Service
|
|||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: node-exporter
|
||||
app.kubernetes.io/version: v0.18.1
|
||||
app.kubernetes.io/version: latest
|
||||
name: node-exporter
|
||||
namespace: monitoring
|
||||
spec:
|
||||
clusterIP: None
|
||||
ports:
|
||||
- name: https
|
||||
port: 9100
|
||||
targetPort: https
|
||||
- name: https
|
||||
port: 9100
|
||||
targetPort: https
|
||||
selector:
|
||||
app.kubernetes.io/name: node-exporter
|
||||
app.kubernetes.io/name: node-exporter
|
||||
|
|
|
|||
|
|
@ -3,13 +3,13 @@ kind: ClusterRoleBinding
|
|||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/version: 1.9.5
|
||||
app.kubernetes.io/version: latest
|
||||
name: kube-state-metrics
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: kube-state-metrics
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: kube-state-metrics
|
||||
namespace: monitoring
|
||||
- kind: ServiceAccount
|
||||
name: kube-state-metrics
|
||||
namespace: monitoring
|
||||
|
|
|
|||
|
|
@ -3,115 +3,108 @@ kind: ClusterRole
|
|||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/version: 1.9.5
|
||||
app.kubernetes.io/version: latest
|
||||
name: kube-state-metrics
|
||||
rules:
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- configmaps
|
||||
- secrets
|
||||
- nodes
|
||||
- pods
|
||||
- services
|
||||
- resourcequotas
|
||||
- replicationcontrollers
|
||||
- limitranges
|
||||
- persistentvolumeclaims
|
||||
- persistentvolumes
|
||||
- namespaces
|
||||
- endpoints
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- extensions
|
||||
resources:
|
||||
- daemonsets
|
||||
- deployments
|
||||
- replicasets
|
||||
- ingresses
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- apps
|
||||
resources:
|
||||
- statefulsets
|
||||
- daemonsets
|
||||
- deployments
|
||||
- replicasets
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- batch
|
||||
resources:
|
||||
- cronjobs
|
||||
- jobs
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- autoscaling
|
||||
resources:
|
||||
- horizontalpodautoscalers
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- authentication.k8s.io
|
||||
resources:
|
||||
- tokenreviews
|
||||
verbs:
|
||||
- create
|
||||
- apiGroups:
|
||||
- authorization.k8s.io
|
||||
resources:
|
||||
- subjectaccessreviews
|
||||
verbs:
|
||||
- create
|
||||
- apiGroups:
|
||||
- policy
|
||||
resources:
|
||||
- poddisruptionbudgets
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- certificates.k8s.io
|
||||
resources:
|
||||
- certificatesigningrequests
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- storage.k8s.io
|
||||
resources:
|
||||
- storageclasses
|
||||
- volumeattachments
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- admissionregistration.k8s.io
|
||||
resources:
|
||||
- mutatingwebhookconfigurations
|
||||
- validatingwebhookconfigurations
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- networking.k8s.io
|
||||
resources:
|
||||
- networkpolicies
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- coordination.k8s.io
|
||||
resources:
|
||||
- leases
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- configmaps
|
||||
- secrets
|
||||
- nodes
|
||||
- pods
|
||||
- services
|
||||
- resourcequotas
|
||||
- replicationcontrollers
|
||||
- limitranges
|
||||
- persistentvolumeclaims
|
||||
- persistentvolumes
|
||||
- namespaces
|
||||
- endpoints
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- extensions
|
||||
resources:
|
||||
- daemonsets
|
||||
- deployments
|
||||
- replicasets
|
||||
- ingresses
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- apps
|
||||
resources:
|
||||
- statefulsets
|
||||
- daemonsets
|
||||
- deployments
|
||||
- replicasets
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- batch
|
||||
resources:
|
||||
- cronjobs
|
||||
- jobs
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- autoscaling
|
||||
resources:
|
||||
- horizontalpodautoscalers
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- authentication.k8s.io
|
||||
resources:
|
||||
- tokenreviews
|
||||
verbs:
|
||||
- create
|
||||
- apiGroups:
|
||||
- authorization.k8s.io
|
||||
resources:
|
||||
- subjectaccessreviews
|
||||
verbs:
|
||||
- create
|
||||
- apiGroups:
|
||||
- policy
|
||||
resources:
|
||||
- poddisruptionbudgets
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- certificates.k8s.io
|
||||
resources:
|
||||
- certificatesigningrequests
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- storage.k8s.io
|
||||
resources:
|
||||
- storageclasses
|
||||
- volumeattachments
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- admissionregistration.k8s.io
|
||||
resources:
|
||||
- mutatingwebhookconfigurations
|
||||
- validatingwebhookconfigurations
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- networking.k8s.io
|
||||
resources:
|
||||
- networkpolicies
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ kind: Deployment
|
|||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/version: 1.9.5
|
||||
app.kubernetes.io/version: latest
|
||||
name: kube-state-metrics
|
||||
namespace: monitoring
|
||||
spec:
|
||||
|
|
@ -15,42 +15,30 @@ spec:
|
|||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/version: 1.9.5
|
||||
app.kubernetes.io/version: latest
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --host=127.0.0.1
|
||||
- --port=8081
|
||||
- --telemetry-host=127.0.0.1
|
||||
- --telemetry-port=8082
|
||||
image: quay.io/coreos/kube-state-metrics:v1.9.5
|
||||
name: kube-state-metrics
|
||||
securityContext:
|
||||
runAsUser: 65534
|
||||
- args:
|
||||
- --logtostderr
|
||||
- --secure-listen-address=:8443
|
||||
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256
|
||||
- --upstream=http://127.0.0.1:8081/
|
||||
image: quay.io/coreos/kube-rbac-proxy:v0.4.1
|
||||
name: kube-rbac-proxy-main
|
||||
ports:
|
||||
- containerPort: 8443
|
||||
name: https-main
|
||||
securityContext:
|
||||
runAsUser: 65534
|
||||
- args:
|
||||
- --logtostderr
|
||||
- --secure-listen-address=:9443
|
||||
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256
|
||||
- --upstream=http://127.0.0.1:8082/
|
||||
image: quay.io/coreos/kube-rbac-proxy:v0.4.1
|
||||
name: kube-rbac-proxy-self
|
||||
ports:
|
||||
- containerPort: 9443
|
||||
name: https-self
|
||||
securityContext:
|
||||
runAsUser: 65534
|
||||
- image: k8s.gcr.io/kube-state-metrics/kube-state-metrics:v2.0.0-beta
|
||||
securityContext:
|
||||
runAsUser: 65534
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: 8080
|
||||
initialDelaySeconds: 5
|
||||
timeoutSeconds: 5
|
||||
name: kube-state-metrics
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
name: http-metrics
|
||||
- containerPort: 8081
|
||||
name: telemetry
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /
|
||||
port: 8081
|
||||
initialDelaySeconds: 5
|
||||
timeoutSeconds: 5
|
||||
nodeSelector:
|
||||
kubernetes.io/os: linux
|
||||
serviceAccountName: kube-state-metrics
|
||||
serviceAccountName: kube-state-metrics
|
||||
|
|
|
|||
|
|
@ -3,6 +3,6 @@ kind: ServiceAccount
|
|||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/version: 1.9.5
|
||||
app.kubernetes.io/version: latest
|
||||
name: kube-state-metrics
|
||||
namespace: monitoring
|
||||
namespace: monitoring
|
||||
|
|
|
|||
|
|
@ -3,22 +3,22 @@ kind: Service
|
|||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/version: 1.9.5
|
||||
app.kubernetes.io/version: latest
|
||||
name: kube-state-metrics
|
||||
namespace: monitoring
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/path: "/metrics"
|
||||
prometheus.io/port: "9091"
|
||||
prometheus.io/port: "8080"
|
||||
prometheus.io/scheme: "http"
|
||||
spec:
|
||||
clusterIP: None
|
||||
ports:
|
||||
- name: https-main
|
||||
port: 8443
|
||||
targetPort: https-main
|
||||
- name: https-self
|
||||
port: 9443
|
||||
targetPort: https-self
|
||||
- name: http-metrics
|
||||
port: 8080
|
||||
targetPort: http-metrics
|
||||
- name: telemetry
|
||||
port: 8081
|
||||
targetPort: telemetry
|
||||
selector:
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
|
|
|
|||
|
|
@ -3,39 +3,43 @@ apiVersion: apps/v1
|
|||
kind: Deployment
|
||||
metadata:
|
||||
labels:
|
||||
app: chaos-monitor
|
||||
name: chaos-monitor
|
||||
app: chaos-exporter
|
||||
name: chaos-exporter
|
||||
namespace: litmus
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chaos-monitor
|
||||
app: chaos-exporter
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: chaos-monitor
|
||||
app: chaos-exporter
|
||||
spec:
|
||||
containers:
|
||||
- image: litmuschaos/chaos-exporter:ci
|
||||
imagePullPolicy: Always
|
||||
name: chaos-exporter
|
||||
serviceAccount: litmus
|
||||
- image: litmuschaos/chaos-exporter:1.13.0
|
||||
imagePullPolicy: Always
|
||||
name: chaos-exporter
|
||||
# uncomment the following lines to use the litmuschaos exporter for monitoring the chaos events and chaosresults for a selected namespace
|
||||
# env:
|
||||
# - name: WATCH_NAMESPACE
|
||||
# value: "litmus"
|
||||
# to use with litmus portal in cluster scope change service account name to 'litmus-cluster-scope' and for using litmus portal in namespaced scope change to 'litmus-namespace-scope'
|
||||
serviceAccountName: litmus
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
app: chaos-monitor
|
||||
name: chaos-monitor
|
||||
app: chaos-exporter
|
||||
name: chaos-exporter
|
||||
namespace: litmus
|
||||
spec:
|
||||
ports:
|
||||
- port: 8080
|
||||
name: tcp
|
||||
protocol: TCP
|
||||
targetPort: 8080
|
||||
- port: 8080
|
||||
name: tcp
|
||||
protocol: TCP
|
||||
targetPort: 8080
|
||||
selector:
|
||||
app: chaos-monitor
|
||||
type: ClusterIP
|
||||
app: chaos-exporter
|
||||
type: ClusterIP
|
||||
|
|
|
|||
|
|
@ -1,13 +0,0 @@
|
|||
apiVersion: v1
|
||||
data:
|
||||
config.json: |-
|
||||
{
|
||||
"sink": "http",
|
||||
"httpSinkUrl": "http://localhost:8080",
|
||||
"httpSinkBufferSize": 1500,
|
||||
"httpSinkDiscardMessages": true
|
||||
}
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: litmus-eventrouter-http-cm
|
||||
namespace: litmus
|
||||
|
|
@ -1,56 +0,0 @@
|
|||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
labels:
|
||||
app: litmus-eventrouter
|
||||
name: litmus-eventrouter
|
||||
namespace: litmus
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: litmus-eventrouter
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: litmus-eventrouter
|
||||
spec:
|
||||
containers:
|
||||
- image: containership/eventrouter
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: litmus-eventrouter
|
||||
volumeMounts:
|
||||
- mountPath: /etc/eventrouter
|
||||
name: config-volume
|
||||
serviceAccount: litmus
|
||||
serviceAccountName: litmus
|
||||
volumes:
|
||||
- configMap:
|
||||
defaultMode: 420
|
||||
name: litmus-eventrouter-http-cm
|
||||
name: config-volume
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
app: litmus-eventrouter
|
||||
name: litmus-eventrouter
|
||||
namespace: litmus
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/path: "/metrics"
|
||||
prometheus.io/port: "8080"
|
||||
prometheus.io/scheme: "http"
|
||||
spec:
|
||||
ports:
|
||||
- nodePort: 31399
|
||||
name: web
|
||||
port: 8080
|
||||
protocol: TCP
|
||||
targetPort: 8080
|
||||
selector:
|
||||
app: litmus-eventrouter
|
||||
sessionAffinity: None
|
||||
type: NodePort
|
||||
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: mysql-exporter
|
||||
namespace: monitoring
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
k8s-app: mysql-exporter
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
k8s-app: mysql-exporter
|
||||
name: mysql-exporter
|
||||
spec:
|
||||
containers:
|
||||
- image: prom/mysqld-exporter:latest
|
||||
imagePullPolicy: "Always"
|
||||
env:
|
||||
- name: DATA_SOURCE_NAME
|
||||
value: "root:root_password@(cluster1-haproxy.pxc.svc.cluster.local:3306)/"
|
||||
args:
|
||||
- "--collect.info_schema.processlist"
|
||||
- "--collect.info_schema.innodb_metrics"
|
||||
- "--collect.info_schema.tablestats"
|
||||
- "--collect.info_schema.userstats"
|
||||
- "--collect.engine_innodb_status"
|
||||
- "--collect.slave_hosts"
|
||||
name: mysql-exporter
|
||||
ports:
|
||||
- containerPort: 9104
|
||||
name: mysql-metrics
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
k8s-app: mysql-exporter
|
||||
name: mysql-exporter
|
||||
namespace: monitoring
|
||||
spec:
|
||||
ports:
|
||||
- name: mysql-metrics
|
||||
port: 9104
|
||||
protocol: TCP
|
||||
targetPort: 9104
|
||||
selector:
|
||||
k8s-app: mysql-exporter
|
||||
|
|
@ -7,6 +7,6 @@ roleRef:
|
|||
kind: ClusterRole
|
||||
name: node-exporter
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: node-exporter
|
||||
namespace: monitoring
|
||||
- kind: ServiceAccount
|
||||
name: node-exporter
|
||||
namespace: monitoring
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: node-exporter
|
||||
labels:
|
||||
app.kubernetes.io/name: node-exporter
|
||||
app.kubernetes.io/version: v0.18.1
|
||||
name: node-exporter
|
||||
app.kubernetes.io/version: latest
|
||||
namespace: monitoring
|
||||
spec:
|
||||
selector:
|
||||
|
|
@ -14,63 +14,10 @@ spec:
|
|||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: node-exporter
|
||||
app.kubernetes.io/version: v0.18.1
|
||||
app.kubernetes.io/version: latest
|
||||
app: node-exporter
|
||||
name: node-exporter
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --web.listen-address=0.0.0.0:9100
|
||||
- --path.procfs=/host/proc
|
||||
- --path.sysfs=/host/sys
|
||||
- --path.rootfs=/host/root
|
||||
- --no-collector.wifi
|
||||
- --no-collector.hwmon
|
||||
- --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/)
|
||||
image: quay.io/prometheus/node-exporter:v0.18.1
|
||||
name: node-exporter
|
||||
resources:
|
||||
limits:
|
||||
cpu: 250m
|
||||
memory: 180Mi
|
||||
requests:
|
||||
cpu: 102m
|
||||
memory: 180Mi
|
||||
volumeMounts:
|
||||
- mountPath: /host/proc
|
||||
name: proc
|
||||
readOnly: false
|
||||
- mountPath: /host/sys
|
||||
name: sys
|
||||
readOnly: false
|
||||
- mountPath: /host/root
|
||||
mountPropagation: HostToContainer
|
||||
name: root
|
||||
readOnly: true
|
||||
- args:
|
||||
- --logtostderr
|
||||
- --secure-listen-address=[$(IP)]:9101
|
||||
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256
|
||||
- --upstream=http://127.0.0.1:9100/
|
||||
env:
|
||||
- name: IP
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: status.podIP
|
||||
image: quay.io/coreos/kube-rbac-proxy:v0.4.1
|
||||
name: kube-rbac-proxy
|
||||
ports:
|
||||
- containerPort: 9100
|
||||
hostPort: 9100
|
||||
name: http
|
||||
- containerPort: 9101
|
||||
hostPort: 9101
|
||||
name: https
|
||||
resources:
|
||||
limits:
|
||||
cpu: 20m
|
||||
memory: 40Mi
|
||||
requests:
|
||||
cpu: 10m
|
||||
memory: 20Mi
|
||||
hostNetwork: true
|
||||
hostPID: true
|
||||
nodeSelector:
|
||||
|
|
@ -79,15 +26,48 @@ spec:
|
|||
runAsNonRoot: true
|
||||
runAsUser: 65534
|
||||
serviceAccountName: node-exporter
|
||||
containers:
|
||||
- image: quay.io/prometheus/node-exporter:latest
|
||||
args:
|
||||
- --web.listen-address=0.0.0.0:9100
|
||||
- --path.procfs=/host/proc
|
||||
- --path.sysfs=/host/sys
|
||||
- --path.rootfs=/host/root
|
||||
- --no-collector.wifi
|
||||
- --no-collector.hwmon
|
||||
- --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/)
|
||||
name: node-exporter
|
||||
ports:
|
||||
- containerPort: 9100
|
||||
hostPort: 9100
|
||||
name: http
|
||||
resources:
|
||||
requests:
|
||||
memory: 30Mi
|
||||
cpu: 100m
|
||||
limits:
|
||||
memory: 50Mi
|
||||
cpu: 200m
|
||||
volumeMounts:
|
||||
- mountPath: /host/proc
|
||||
name: proc
|
||||
readOnly: false
|
||||
- mountPath: /host/sys
|
||||
name: sys
|
||||
readOnly: false
|
||||
- mountPath: /host/root
|
||||
mountPropagation: HostToContainer
|
||||
name: root
|
||||
readOnly: true
|
||||
tolerations:
|
||||
- operator: Exists
|
||||
- operator: Exists
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /proc
|
||||
name: proc
|
||||
- hostPath:
|
||||
path: /sys
|
||||
name: sys
|
||||
- hostPath:
|
||||
path: /
|
||||
name: root
|
||||
- hostPath:
|
||||
path: /proc
|
||||
name: proc
|
||||
- hostPath:
|
||||
path: /sys
|
||||
name: sys
|
||||
- hostPath:
|
||||
path: /
|
||||
name: root
|
||||
|
|
|
|||
|
|
@ -2,23 +2,17 @@ apiVersion: v1
|
|||
kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: node-exporter
|
||||
app.kubernetes.io/version: v0.18.1
|
||||
name: node-exporter
|
||||
namespace: monitoring
|
||||
app: node-exporter
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/path: "/metrics"
|
||||
prometheus.io/port: "9091"
|
||||
prometheus.io/scheme: "http"
|
||||
name: node-exporter
|
||||
namespace: monitoring
|
||||
spec:
|
||||
type: ClusterIP
|
||||
clusterIP: None
|
||||
ports:
|
||||
- name: http
|
||||
- name: metrics
|
||||
port: 9100
|
||||
targetPort: http
|
||||
- name: https
|
||||
port: 9101
|
||||
targetPort: https
|
||||
selector:
|
||||
app.kubernetes.io/name: node-exporter
|
||||
app: node-exporter
|
||||
|
|
|
|||
|
|
@ -7,12 +7,12 @@ metadata:
|
|||
spec:
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- name: alertmanager-main
|
||||
namespace: monitoring
|
||||
port: web
|
||||
- name: alertmanager-main
|
||||
namespace: monitoring
|
||||
port: web
|
||||
externalLabels:
|
||||
cluster: docker-desktop
|
||||
image: quay.io/prometheus/prometheus:v2.19.2
|
||||
image: quay.io/prometheus/prometheus:v2.25.0
|
||||
nodeSelector:
|
||||
kubernetes.io/os: linux
|
||||
podMonitorNamespaceSelector: {}
|
||||
|
|
@ -33,26 +33,26 @@ spec:
|
|||
serviceMonitorNamespaceSelector: {}
|
||||
serviceMonitorSelector:
|
||||
matchExpressions:
|
||||
- key: k8s-app
|
||||
operator: In
|
||||
values:
|
||||
- node-exporter
|
||||
- kube-state-metrics
|
||||
- apiserver
|
||||
- kubelet
|
||||
- carts
|
||||
- carts-db
|
||||
- shipping
|
||||
- rabbitmq
|
||||
- queue-master
|
||||
- catalogue-db
|
||||
- catalogue
|
||||
- front-end
|
||||
- orders-db
|
||||
- orders
|
||||
- payment
|
||||
- user-db
|
||||
- user
|
||||
- litmus-event-router
|
||||
- chaos-exporter
|
||||
version: v2.19.2
|
||||
- key: k8s-app
|
||||
operator: In
|
||||
values:
|
||||
- node-exporter
|
||||
- kube-state-metrics
|
||||
- apiserver
|
||||
- kubelet
|
||||
- carts
|
||||
- carts-db
|
||||
- shipping
|
||||
- rabbitmq
|
||||
- queue-master
|
||||
- catalogue-db
|
||||
- catalogue
|
||||
- front-end
|
||||
- orders-db
|
||||
- orders
|
||||
- payment
|
||||
- user-db
|
||||
- user
|
||||
- chaos-exporter
|
||||
- mysql-exporter
|
||||
version: v2.25.0
|
||||
|
|
|
|||
|
|
@ -20,37 +20,37 @@ spec:
|
|||
app.kubernetes.io/version: v0.40.0
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --kubelet-service=kube-system/kubelet
|
||||
- --logtostderr=true
|
||||
- --config-reloader-image=jimmidyson/configmap-reload:v0.3.0
|
||||
- --prometheus-config-reloader=quay.io/coreos/prometheus-config-reloader:v0.40.0
|
||||
image: quay.io/coreos/prometheus-operator:v0.40.0
|
||||
name: prometheus-operator
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
name: http
|
||||
# resources:
|
||||
# limits:
|
||||
# cpu: 200m
|
||||
# memory: 200Mi
|
||||
# requests:
|
||||
# cpu: 100m
|
||||
# memory: 100Mi
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
- args:
|
||||
- --logtostderr
|
||||
- --secure-listen-address=:8443
|
||||
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256
|
||||
- --upstream=http://127.0.0.1:8080/
|
||||
image: quay.io/coreos/kube-rbac-proxy:v0.4.1
|
||||
name: kube-rbac-proxy
|
||||
ports:
|
||||
- containerPort: 8443
|
||||
name: https
|
||||
securityContext:
|
||||
runAsUser: 65534
|
||||
- args:
|
||||
- --kubelet-service=kube-system/kubelet
|
||||
- --logtostderr=true
|
||||
- --config-reloader-image=jimmidyson/configmap-reload:v0.5.0
|
||||
- --prometheus-config-reloader=quay.io/coreos/prometheus-config-reloader:v0.42.1
|
||||
image: quay.io/coreos/prometheus-operator:v0.40.0
|
||||
name: prometheus-operator
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
name: http
|
||||
# resources:
|
||||
# limits:
|
||||
# cpu: 200m
|
||||
# memory: 200Mi
|
||||
# requests:
|
||||
# cpu: 100m
|
||||
# memory: 100Mi
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
- args:
|
||||
- --logtostderr
|
||||
- --secure-listen-address=:8443
|
||||
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256
|
||||
- --upstream=http://127.0.0.1:8080/
|
||||
image: quay.io/coreos/kube-rbac-proxy:v0.5.0
|
||||
name: kube-rbac-proxy
|
||||
ports:
|
||||
- containerPort: 8443
|
||||
name: https
|
||||
securityContext:
|
||||
runAsUser: 65534
|
||||
nodeSelector:
|
||||
beta.kubernetes.io/os: linux
|
||||
securityContext:
|
||||
|
|
|
|||
|
|
@ -10,9 +10,9 @@ metadata:
|
|||
spec:
|
||||
clusterIP: None
|
||||
ports:
|
||||
- name: https
|
||||
port: 8443
|
||||
targetPort: https
|
||||
- name: https
|
||||
port: 8443
|
||||
targetPort: https
|
||||
selector:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
|
|
|
|||
|
|
@ -13,21 +13,21 @@ metadata:
|
|||
labels:
|
||||
app: prometheus
|
||||
rules:
|
||||
- apiGroups: [""] # "" indicates the core API group
|
||||
resources:
|
||||
- nodes
|
||||
- nodes/proxy
|
||||
- services
|
||||
- endpoints
|
||||
- pods
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- nonResourceURLs:
|
||||
- /metrics
|
||||
verbs:
|
||||
- get
|
||||
- apiGroups: [""] # "" indicates the core API group
|
||||
resources:
|
||||
- nodes
|
||||
- nodes/proxy
|
||||
- services
|
||||
- endpoints
|
||||
- pods
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- nonResourceURLs:
|
||||
- /metrics
|
||||
verbs:
|
||||
- get
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1beta1
|
||||
kind: ClusterRoleBinding
|
||||
|
|
@ -40,6 +40,6 @@ roleRef:
|
|||
kind: ClusterRole
|
||||
name: prometheus
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: prometheus
|
||||
namespace: monitoring
|
||||
- kind: ServiceAccount
|
||||
name: prometheus
|
||||
namespace: monitoring
|
||||
|
|
|
|||
|
|
@ -5,10 +5,47 @@ data:
|
|||
scrape_interval: 5s
|
||||
rule_files:
|
||||
- "/etc/prometheus-rules/alert.rules"
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- static_configs:
|
||||
- targets: ["alertmanager:9093"]
|
||||
scrape_configs:
|
||||
- job_name: 'chaos-monitor'
|
||||
- job_name: 'chaos-exporter'
|
||||
static_configs:
|
||||
- targets: ['chaos-monitor.litmus.svc.cluster.local:8080']
|
||||
- targets: ['chaos-exporter.litmus.svc.cluster.local:8080']
|
||||
- job_name: 'mysql_server1'
|
||||
static_configs:
|
||||
- targets: ['mysql-exporter.monitoring.svc.cluster.local:9104']
|
||||
labels:
|
||||
alias: db1
|
||||
- job_name: 'kube-state-metrics'
|
||||
static_configs:
|
||||
- targets: ['kube-state-metrics.monitoring.svc.cluster.local:8080']
|
||||
- job_name: 'node-exporter'
|
||||
static_configs:
|
||||
- targets: ['node-exporter.monitoring.svc.cluster.local:9100']
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_endpoints_name]
|
||||
regex: 'node-exporter'
|
||||
action: keep
|
||||
- job_name: 'cAdvisor'
|
||||
scheme: https
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
kubernetes_sd_configs:
|
||||
- role: node
|
||||
relabel_configs:
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_node_label_(.+)
|
||||
- target_label: __address__
|
||||
replacement: kubernetes.default.svc:443
|
||||
- source_labels: [__meta_kubernetes_node_name]
|
||||
regex: (.+)
|
||||
target_label: __metrics_path__
|
||||
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
|
||||
- job_name: kubernetes-service-endpoints
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
|
|
@ -96,7 +133,7 @@ data:
|
|||
- source_labels:
|
||||
- __meta_kubernetes_namespace
|
||||
- __meta_kubernetes_pod_label_name
|
||||
action: keep
|
||||
action: drop
|
||||
regex: ^kube-system;weave-net$
|
||||
- source_labels:
|
||||
- __meta_kubernetes_pod_container_name
|
||||
|
|
@ -119,4 +156,4 @@ data:
|
|||
kind: ConfigMap
|
||||
metadata:
|
||||
name: prometheus-configmap
|
||||
namespace: monitoring
|
||||
namespace: monitoring
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
|
|
@ -15,31 +15,29 @@ spec:
|
|||
name: prometheus
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- -storage.local.retention=360h
|
||||
- -storage.local.memory-chunks=1048576
|
||||
- -config.file=/etc/prometheus/prometheus.yml
|
||||
- -alertmanager.url=http://alertmanager:9093
|
||||
image: prom/prometheus:v1.5.2
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: prometheus
|
||||
ports:
|
||||
- containerPort: 9090
|
||||
name: web
|
||||
protocol: TCP
|
||||
volumeMounts:
|
||||
- mountPath: /etc/prometheus
|
||||
name: config-volume
|
||||
- mountPath: /etc/prometheus-rules
|
||||
name: alertrules-volume
|
||||
- args:
|
||||
- --storage.tsdb.retention.time=360h
|
||||
- --config.file=/etc/prometheus/prometheus.yml
|
||||
image: prom/prometheus:v2.25.0
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: prometheus
|
||||
ports:
|
||||
- containerPort: 9090
|
||||
name: web
|
||||
protocol: TCP
|
||||
volumeMounts:
|
||||
- mountPath: /etc/prometheus
|
||||
name: config-volume
|
||||
- mountPath: /etc/prometheus-rules
|
||||
name: alertrules-volume
|
||||
serviceAccount: prometheus
|
||||
serviceAccountName: prometheus
|
||||
volumes:
|
||||
- configMap:
|
||||
defaultMode: 420
|
||||
name: prometheus-configmap
|
||||
name: config-volume
|
||||
- configMap:
|
||||
defaultMode: 420
|
||||
name: prometheus-alertrules
|
||||
name: alertrules-volume
|
||||
- configMap:
|
||||
defaultMode: 420
|
||||
name: prometheus-configmap
|
||||
name: config-volume
|
||||
- configMap:
|
||||
defaultMode: 420
|
||||
name: prometheus-alertrules
|
||||
name: alertrules-volume
|
||||
|
|
|
|||
|
|
@ -2,18 +2,18 @@ apiVersion: v1
|
|||
kind: Service
|
||||
metadata:
|
||||
annotations:
|
||||
prometheus.io/scrape: 'true'
|
||||
prometheus.io/scrape: "true"
|
||||
labels:
|
||||
name: prometheus
|
||||
name: prometheus
|
||||
name: prometheus-k8s
|
||||
name: prometheus-k8s
|
||||
namespace: monitoring
|
||||
spec:
|
||||
selector:
|
||||
app: prometheus
|
||||
type: NodePort
|
||||
type: LoadBalancer
|
||||
ports:
|
||||
- name: prometheus
|
||||
protocol: TCP
|
||||
port: 9090
|
||||
targetPort: 9090
|
||||
nodePort: 31090
|
||||
- name: prometheus
|
||||
protocol: TCP
|
||||
port: 9090
|
||||
targetPort: 9090
|
||||
nodePort: 31090
|
||||
|
|
|
|||
|
|
@ -0,0 +1,175 @@
|
|||
# Demonstration
|
||||
|
||||
## Monitor Chaos on Percona
|
||||
|
||||
Run chaos experiments on percona application with grafana dashboard to monitor it.
|
||||
|
||||
### Setup Percona Application
|
||||
|
||||
- Setup Percona.
|
||||
|
||||
```
|
||||
kubectl apply -f ../../sample-application-under-test/percona/crd.yaml
|
||||
```
|
||||
|
||||
```
|
||||
kubectl create namespace pxc
|
||||
```
|
||||
|
||||
```
|
||||
kubectl -n pxc apply -f ../../sample-application-under-test/percona/rbac.yaml
|
||||
```
|
||||
|
||||
```
|
||||
kubectl -n pxc apply -f ../../sample-application-under-test/percona/operator.yaml
|
||||
```
|
||||
|
||||
```
|
||||
kubectl -n pxc apply -f ../../sample-application-under-test/percona/secrets.yaml
|
||||
```
|
||||
|
||||
- (optional-PMM) Setup PMM for Percona
|
||||
|
||||
```
|
||||
helm repo add percona https://percona-charts.storage.googleapis.com
|
||||
```
|
||||
|
||||
```
|
||||
helm repo update
|
||||
```
|
||||
|
||||
```
|
||||
helm install monitoring percona/pmm-server --set platform=kubernetes --version 2.7.0 --set "credentials.password=newpass"
|
||||
```
|
||||
|
||||
- Wait until all services are up. Verify via `kubectl get pods -n pxc`
|
||||
|
||||
- (optional-PMM) Check for logs and monitoring service
|
||||
|
||||
```
|
||||
kubectl logs -f cluster1-pxc-0 -c pmm-client
|
||||
```
|
||||
|
||||
```
|
||||
kubectl get service/monitoring-service -o wide
|
||||
```
|
||||
|
||||
- Apply the CR with pmm:enabled: true OR false
|
||||
|
||||
```
|
||||
kubectl -n pxc apply -f ../../sample-application-under-test/percona/cr.yaml
|
||||
```
|
||||
|
||||
- To Check connectivity to newly created cluster for PMM
|
||||
|
||||
```
|
||||
kubectl run -i --rm --tty percona-client --image=percona:8.0 --restart=Never -- bash -il percona-client:/$ mysql -h cluster1-haproxy -uroot -proot_password
|
||||
```
|
||||
|
||||
### Setup the Monitoring Infrastructure (if not using PMM)
|
||||
|
||||
- Create monitoring namespace on the cluster
|
||||
|
||||
```
|
||||
kubectl create ns monitoring
|
||||
```
|
||||
|
||||
- Setup prometheus TSDB (Using mySQLd exporter for percona SQL metrics)
|
||||
|
||||
```
|
||||
kubectl create ns monitoring
|
||||
```
|
||||
|
||||
> Model-1 (optional): Service monitor and prometheus operator model.
|
||||
|
||||
Create the operator to instantiate all CRDs
|
||||
|
||||
```
|
||||
kubectl -n monitoring apply -f ../../prometheus/prometheus-operator/
|
||||
```
|
||||
|
||||
Deploy monitoring components
|
||||
|
||||
```
|
||||
kubectl -n litmus apply -f ../../metrics-exporters-with-service-monitors/litmus-metrics/chaos-exporter/
|
||||
kubectl -n monitoring apply -f ../../metrics-exporters-with-service-monitors/mysqld-exporter/
|
||||
```
|
||||
|
||||
Deploy prometheus instance and all the service monitors for targets
|
||||
|
||||
```
|
||||
kubectl -n monitoring apply -f ../../prometheus/prometheus-configuration/
|
||||
```
|
||||
|
||||
Note: To change the service type to NodePort, perform a `kubectl edit svc prometheus-k8s -n monitoring` and replace `type: LoadBalancer` to `type: NodePort`
|
||||
|
||||
> Model-2 (optional): Prometheus scrape config model.
|
||||
|
||||
Deploy prometheus components
|
||||
|
||||
```
|
||||
kubectl -n monitoring apply -f ../../prometheus/prometheus-scrape-configuration/
|
||||
```
|
||||
|
||||
Deploy metrics exporters
|
||||
|
||||
```
|
||||
kubectl -n litmus apply -f ../../metrics-exporters/litmus-metrics/chaos-exporter/
|
||||
kubectl -n monitoring apply -f ../../metrics-exporters/mysqld-exporter/
|
||||
```
|
||||
|
||||
- Apply the grafana manifests after deploying prometheus for all metrics.
|
||||
|
||||
```
|
||||
kubectl -n monitoring apply -f ../../grafana/
|
||||
```
|
||||
|
||||
- You may access the grafana dashboard via the LoadBalancer (or NodePort) service IP or via a port-forward operation on localhost
|
||||
|
||||
View the services running in the monitoring namespace
|
||||
|
||||
```
|
||||
kubectl get svc -n monitoring
|
||||
```
|
||||
|
||||
Now copy the EXTERNAL-IP of grafana and view it in the browser
|
||||
|
||||
Default username/password credentials: `admin/admin`
|
||||
|
||||
### Configure the Monitoring Infrastructure
|
||||
|
||||
- Add the prometheus datasource from monitoring namespace as DS_PROMETHEUS for Grafana via the Grafana Settings menu for PMM
|
||||
|
||||

|
||||
|
||||
- Import the grafana dashboards
|
||||
|
||||

|
||||
|
||||
- (optional) Import the grafana dashboard "MySQL Overview" provided [here](https://raw.githubusercontent.com/ishangupta-ds/litmus/percona/monitoring/grafana-dashboards/percona-sql/MySQL-Overview-Interleaved.json)
|
||||
|
||||
- (optional) Import the grafana dashboard "PXC Galera Node summary Performance" provided [here](https://raw.githubusercontent.com/ishangupta-ds/litmus/percona/monitoring/grafana-dashboards/percona-sql/PXC_Galera_Node_Summary_Interleaved.json)
|
||||
|
||||
- (optional) Use PMM for monitoring.
|
||||
|
||||
### Execute the Chaos Experiments
|
||||
|
||||
```
|
||||
kubectl apply -f ../../sample-chaos-injectors/chaos-experiments/percona/percona-network-loss.yaml
|
||||
```
|
||||
|
||||
- Verify execution of chaos experiments
|
||||
|
||||
```
|
||||
kubectl describe chaosengine percona-network-chaos -n litmus
|
||||
```
|
||||
|
||||
### Visualize Chaos Impact
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
|
@ -0,0 +1,438 @@
|
|||
apiVersion: pxc.percona.com/v1-6-0
|
||||
kind: PerconaXtraDBCluster
|
||||
metadata:
|
||||
name: cluster1
|
||||
finalizers:
|
||||
- delete-pxc-pods-in-order
|
||||
# - delete-proxysql-pvc
|
||||
# - delete-pxc-pvc
|
||||
# annotations:
|
||||
# percona.com/issue-vault-token: "true"
|
||||
spec:
|
||||
crVersion: 1.6.0
|
||||
secretsName: my-cluster-secrets
|
||||
vaultSecretName: keyring-secret-vault
|
||||
sslSecretName: my-cluster-ssl
|
||||
sslInternalSecretName: my-cluster-ssl-internal
|
||||
logCollectorSecretName: my-log-collector-secrets
|
||||
disableHookValidation: true
|
||||
# tls:
|
||||
# SANs:
|
||||
# - pxc-1.example.com
|
||||
# - pxc-2.example.com
|
||||
# - pxc-3.example.com
|
||||
# issuerConf:
|
||||
# name: special-selfsigned-issuer
|
||||
# kind: ClusterIssuer
|
||||
# group: cert-manager.io
|
||||
allowUnsafeConfigurations: false
|
||||
# pause: false
|
||||
updateStrategy: SmartUpdate
|
||||
upgradeOptions:
|
||||
versionServiceEndpoint: https://check.percona.com
|
||||
apply: recommended
|
||||
schedule: "0 4 * * *"
|
||||
pxc:
|
||||
size: 3
|
||||
image: percona/percona-xtradb-cluster:8.0.20-11.1
|
||||
# autoRecovery: false
|
||||
# schedulerName: mycustom-scheduler
|
||||
# readinessDelaySec: 15
|
||||
# livenessDelaySec: 600
|
||||
# forceUnsafeBootstrap: false
|
||||
# configuration: |
|
||||
# [mysqld]
|
||||
# wsrep_debug=ON
|
||||
# wsrep_provider_options="gcache.size=1G; gcache.recover=yes"
|
||||
# [sst]
|
||||
# xbstream-opts=--decompress
|
||||
# [xtrabackup]
|
||||
# compress=lz4
|
||||
# for PXC 5.7
|
||||
# [xtrabackup]
|
||||
# compress
|
||||
# imagePullSecrets:
|
||||
# - name: private-registry-credentials
|
||||
# priorityClassName: high-priority
|
||||
# annotations:
|
||||
# iam.amazonaws.com/role: role-arn
|
||||
# labels:
|
||||
# rack: rack-22
|
||||
# containerSecurityContext:
|
||||
# privileged: false
|
||||
# podSecurityContext:
|
||||
# runAsUser: 1001
|
||||
# runAsGroup: 1001
|
||||
# supplementalGroups: [1001]
|
||||
# serviceAccountName: percona-xtradb-cluster-operator-workload
|
||||
# imagePullPolicy: Always
|
||||
resources:
|
||||
requests:
|
||||
memory: 1G
|
||||
cpu: 600m
|
||||
# ephemeral-storage: 1Gi
|
||||
# limits:
|
||||
# memory: 1G
|
||||
# cpu: "1"
|
||||
# ephemeral-storage: 1Gi
|
||||
# nodeSelector:
|
||||
# disktype: ssd
|
||||
affinity:
|
||||
antiAffinityTopologyKey: "kubernetes.io/hostname"
|
||||
# advanced:
|
||||
# nodeAffinity:
|
||||
# requiredDuringSchedulingIgnoredDuringExecution:
|
||||
# nodeSelectorTerms:
|
||||
# - matchExpressions:
|
||||
# - key: kubernetes.io/e2e-az-name
|
||||
# operator: In
|
||||
# values:
|
||||
# - e2e-az1
|
||||
# - e2e-az2
|
||||
# tolerations:
|
||||
# - key: "node.alpha.kubernetes.io/unreachable"
|
||||
# operator: "Exists"
|
||||
# effect: "NoExecute"
|
||||
# tolerationSeconds: 6000
|
||||
podDisruptionBudget:
|
||||
maxUnavailable: 1
|
||||
# minAvailable: 0
|
||||
volumeSpec:
|
||||
# emptyDir: {}
|
||||
# hostPath:
|
||||
# path: /data
|
||||
# type: Directory
|
||||
persistentVolumeClaim:
|
||||
# storageClassName: standard
|
||||
# accessModes: [ "ReadWriteOnce" ]
|
||||
resources:
|
||||
requests:
|
||||
storage: 6Gi
|
||||
gracePeriod: 600
|
||||
haproxy:
|
||||
enabled: true
|
||||
size: 3
|
||||
image: percona/percona-xtradb-cluster-operator:1.6.0-haproxy
|
||||
# imagePullPolicy: Always
|
||||
# schedulerName: mycustom-scheduler
|
||||
# configuration: |
|
||||
# global
|
||||
# maxconn 2048
|
||||
# external-check
|
||||
# stats socket /var/run/haproxy.sock mode 600 expose-fd listeners level user
|
||||
#
|
||||
# defaults
|
||||
# log global
|
||||
# mode tcp
|
||||
# retries 10
|
||||
# timeout client 28800s
|
||||
# timeout connect 100500
|
||||
# timeout server 28800s
|
||||
#
|
||||
# frontend galera-in
|
||||
# bind *:3309 accept-proxy
|
||||
# bind *:3306 accept-proxy
|
||||
# mode tcp
|
||||
# option clitcpka
|
||||
# default_backend galera-nodes
|
||||
#
|
||||
# frontend galera-replica-in
|
||||
# bind *:3307
|
||||
# mode tcp
|
||||
# option clitcpka
|
||||
# default_backend galera-replica-nodes
|
||||
# imagePullSecrets:
|
||||
# - name: private-registry-credentials
|
||||
# annotations:
|
||||
# iam.amazonaws.com/role: role-arn
|
||||
# labels:
|
||||
# rack: rack-22
|
||||
# serviceType: ClusterIP
|
||||
# externalTrafficPolicy: Cluster
|
||||
# replicasServiceType: ClusterIP
|
||||
# replicasExternalTrafficPolicy: Cluster
|
||||
# schedulerName: "default"
|
||||
resources:
|
||||
requests:
|
||||
memory: 1G
|
||||
cpu: 600m
|
||||
# limits:
|
||||
# memory: 1G
|
||||
# cpu: 700m
|
||||
# priorityClassName: high-priority
|
||||
# nodeSelector:
|
||||
# disktype: ssd
|
||||
# sidecarResources:
|
||||
# requests:
|
||||
# memory: 1G
|
||||
# cpu: 500m
|
||||
# limits:
|
||||
# memory: 2G
|
||||
# cpu: 600m
|
||||
# serviceAccountName: percona-xtradb-cluster-operator-workload
|
||||
affinity:
|
||||
antiAffinityTopologyKey: "kubernetes.io/hostname"
|
||||
# advanced:
|
||||
# nodeAffinity:
|
||||
# requiredDuringSchedulingIgnoredDuringExecution:
|
||||
# nodeSelectorTerms:
|
||||
# - matchExpressions:
|
||||
# - key: kubernetes.io/e2e-az-name
|
||||
# operator: In
|
||||
# values:
|
||||
# - e2e-az1
|
||||
# - e2e-az2
|
||||
# tolerations:
|
||||
# - key: "node.alpha.kubernetes.io/unreachable"
|
||||
# operator: "Exists"
|
||||
# effect: "NoExecute"
|
||||
# tolerationSeconds: 6000
|
||||
podDisruptionBudget:
|
||||
maxUnavailable: 1
|
||||
# minAvailable: 0
|
||||
gracePeriod: 30
|
||||
# loadBalancerSourceRanges:
|
||||
# - 10.0.0.0/8
|
||||
# serviceAnnotations:
|
||||
# service.beta.kubernetes.io/aws-load-balancer-backend-protocol: http
|
||||
proxysql:
|
||||
enabled: false
|
||||
size: 3
|
||||
image: percona/percona-xtradb-cluster-operator:1.6.0-proxysql
|
||||
# imagePullPolicy: Always
|
||||
# configuration: |
|
||||
# datadir="/var/lib/proxysql"
|
||||
#
|
||||
# admin_variables =
|
||||
# {
|
||||
# admin_credentials="proxyadmin:admin_password"
|
||||
# mysql_ifaces="0.0.0.0:6032"
|
||||
# refresh_interval=2000
|
||||
#
|
||||
# cluster_username="proxyadmin"
|
||||
# cluster_password="admin_password"
|
||||
# cluster_check_interval_ms=200
|
||||
# cluster_check_status_frequency=100
|
||||
# cluster_mysql_query_rules_save_to_disk=true
|
||||
# cluster_mysql_servers_save_to_disk=true
|
||||
# cluster_mysql_users_save_to_disk=true
|
||||
# cluster_proxysql_servers_save_to_disk=true
|
||||
# cluster_mysql_query_rules_diffs_before_sync=1
|
||||
# cluster_mysql_servers_diffs_before_sync=1
|
||||
# cluster_mysql_users_diffs_before_sync=1
|
||||
# cluster_proxysql_servers_diffs_before_sync=1
|
||||
# }
|
||||
#
|
||||
# mysql_variables=
|
||||
# {
|
||||
# monitor_password="monitor"
|
||||
# monitor_galera_healthcheck_interval=1000
|
||||
# threads=2
|
||||
# max_connections=2048
|
||||
# default_query_delay=0
|
||||
# default_query_timeout=10000
|
||||
# poll_timeout=2000
|
||||
# interfaces="0.0.0.0:3306"
|
||||
# default_schema="information_schema"
|
||||
# stacksize=1048576
|
||||
# connect_timeout_server=10000
|
||||
# monitor_history=60000
|
||||
# monitor_connect_interval=20000
|
||||
# monitor_ping_interval=10000
|
||||
# ping_timeout_server=200
|
||||
# commands_stats=true
|
||||
# sessions_sort=true
|
||||
# have_ssl=true
|
||||
# ssl_p2s_ca="/etc/proxysql/ssl-internal/ca.crt"
|
||||
# ssl_p2s_cert="/etc/proxysql/ssl-internal/tls.crt"
|
||||
# ssl_p2s_key="/etc/proxysql/ssl-internal/tls.key"
|
||||
# ssl_p2s_cipher="ECDHE-RSA-AES128-GCM-SHA256"
|
||||
# }
|
||||
# schedulerName: mycustom-scheduler
|
||||
# imagePullSecrets:
|
||||
# - name: private-registry-credentials
|
||||
# annotations:
|
||||
# iam.amazonaws.com/role: role-arn
|
||||
# labels:
|
||||
# rack: rack-22
|
||||
# serviceType: ClusterIP
|
||||
# externalTrafficPolicy: Cluster
|
||||
# schedulerName: "default"
|
||||
resources:
|
||||
requests:
|
||||
memory: 1G
|
||||
cpu: 600m
|
||||
# limits:
|
||||
# memory: 1G
|
||||
# cpu: 700m
|
||||
# priorityClassName: high-priority
|
||||
# nodeSelector:
|
||||
# disktype: ssd
|
||||
# sidecarResources:
|
||||
# requests:
|
||||
# memory: 1G
|
||||
# cpu: 500m
|
||||
# limits:
|
||||
# memory: 2G
|
||||
# cpu: 600m
|
||||
# serviceAccountName: percona-xtradb-cluster-operator-workload
|
||||
affinity:
|
||||
antiAffinityTopologyKey: "kubernetes.io/hostname"
|
||||
# advanced:
|
||||
# nodeAffinity:
|
||||
# requiredDuringSchedulingIgnoredDuringExecution:
|
||||
# nodeSelectorTerms:
|
||||
# - matchExpressions:
|
||||
# - key: kubernetes.io/e2e-az-name
|
||||
# operator: In
|
||||
# values:
|
||||
# - e2e-az1
|
||||
# - e2e-az2
|
||||
# tolerations:
|
||||
# - key: "node.alpha.kubernetes.io/unreachable"
|
||||
# operator: "Exists"
|
||||
# effect: "NoExecute"
|
||||
# tolerationSeconds: 6000
|
||||
volumeSpec:
|
||||
# emptyDir: {}
|
||||
# hostPath:
|
||||
# path: /data
|
||||
# type: Directory
|
||||
persistentVolumeClaim:
|
||||
# storageClassName: standard
|
||||
# accessModes: [ "ReadWriteOnce" ]
|
||||
resources:
|
||||
requests:
|
||||
storage: 2Gi
|
||||
podDisruptionBudget:
|
||||
maxUnavailable: 1
|
||||
# minAvailable: 0
|
||||
gracePeriod: 30
|
||||
# loadBalancerSourceRanges:
|
||||
# - 10.0.0.0/8
|
||||
# serviceAnnotations:
|
||||
# service.beta.kubernetes.io/aws-load-balancer-backend-protocol: http
|
||||
logcollector:
|
||||
enabled: true
|
||||
image: percona/percona-xtradb-cluster-operator:1.6.0-logcollector
|
||||
# configuration: |
|
||||
# [OUTPUT]
|
||||
# Name es
|
||||
# Match *
|
||||
# Host 192.168.2.3
|
||||
# Port 9200
|
||||
# Index my_index
|
||||
# Type my_type
|
||||
# resources:
|
||||
# requests:
|
||||
# memory: 200M
|
||||
# cpu: 500m
|
||||
pmm:
|
||||
enabled: true #Switch to false for non-PMM setup.
|
||||
image: percona/pmm-client:2.12.0
|
||||
serverHost: monitoring-service
|
||||
serverUser: admin
|
||||
# pxcParams: "--disable-tablestats-limit=2000"
|
||||
# proxysqlParams: "--custom-labels=CUSTOM-LABELS"
|
||||
# resources:
|
||||
# requests:
|
||||
# memory: 200M
|
||||
# cpu: 500m
|
||||
backup:
|
||||
image: percona/percona-xtradb-cluster-operator:1.6.0-pxc8.0-backup
|
||||
# serviceAccountName: percona-xtradb-cluster-operator
|
||||
# imagePullSecrets:
|
||||
# - name: private-registry-credentials
|
||||
pitr:
|
||||
enabled: false
|
||||
storageName: STORAGE-NAME-HERE
|
||||
timeBetweenUploads: 60
|
||||
storages:
|
||||
s3-us-west:
|
||||
type: s3
|
||||
# nodeSelector:
|
||||
# storage: tape
|
||||
# backupWorker: 'True'
|
||||
# resources:
|
||||
# requests:
|
||||
# memory: 1G
|
||||
# cpu: 600m
|
||||
# affinity:
|
||||
# nodeAffinity:
|
||||
# requiredDuringSchedulingIgnoredDuringExecution:
|
||||
# nodeSelectorTerms:
|
||||
# - matchExpressions:
|
||||
# - key: backupWorker
|
||||
# operator: In
|
||||
# values:
|
||||
# - 'True'
|
||||
# tolerations:
|
||||
# - key: "backupWorker"
|
||||
# operator: "Equal"
|
||||
# value: "True"
|
||||
# effect: "NoSchedule"
|
||||
# annotations:
|
||||
# testName: scheduled-backup
|
||||
# labels:
|
||||
# backupWorker: 'True'
|
||||
# schedulerName: 'default-scheduler'
|
||||
# priorityClassName: 'high-priority'
|
||||
# containerSecurityContext:
|
||||
# privileged: true
|
||||
# podSecurityContext:
|
||||
# fsGroup: 1001
|
||||
# supplementalGroups: [1001, 1002, 1003]
|
||||
s3:
|
||||
bucket: S3-BACKUP-BUCKET-NAME-HERE
|
||||
credentialsSecret: my-cluster-name-backup-s3
|
||||
region: us-west-2
|
||||
fs-pvc:
|
||||
type: filesystem
|
||||
# nodeSelector:
|
||||
# storage: tape
|
||||
# backupWorker: 'True'
|
||||
# resources:
|
||||
# requests:
|
||||
# memory: 1G
|
||||
# cpu: 600m
|
||||
# affinity:
|
||||
# nodeAffinity:
|
||||
# requiredDuringSchedulingIgnoredDuringExecution:
|
||||
# nodeSelectorTerms:
|
||||
# - matchExpressions:
|
||||
# - key: backupWorker
|
||||
# operator: In
|
||||
# values:
|
||||
# - 'True'
|
||||
# tolerations:
|
||||
# - key: "backupWorker"
|
||||
# operator: "Equal"
|
||||
# value: "True"
|
||||
# effect: "NoSchedule"
|
||||
# annotations:
|
||||
# testName: scheduled-backup
|
||||
# labels:
|
||||
# backupWorker: 'True'
|
||||
# schedulerName: 'default-scheduler'
|
||||
# priorityClassName: 'high-priority'
|
||||
# containerSecurityContext:
|
||||
# privileged: true
|
||||
# podSecurityContext:
|
||||
# fsGroup: 1001
|
||||
# supplementalGroups: [1001, 1002, 1003]
|
||||
volume:
|
||||
persistentVolumeClaim:
|
||||
# storageClassName: standard
|
||||
accessModes: ["ReadWriteOnce"]
|
||||
resources:
|
||||
requests:
|
||||
storage: 6Gi
|
||||
schedule:
|
||||
- name: "sat-night-backup"
|
||||
schedule: "0 0 * * 6"
|
||||
keep: 3
|
||||
storageName: s3-us-west
|
||||
- name: "daily-backup"
|
||||
schedule: "0 0 * * *"
|
||||
keep: 5
|
||||
storageName: fs-pvc
|
||||
|
|
@ -0,0 +1,193 @@
|
|||
apiVersion: apiextensions.k8s.io/v1beta1
|
||||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
name: perconaxtradbclusters.pxc.percona.com
|
||||
spec:
|
||||
group: pxc.percona.com
|
||||
names:
|
||||
kind: PerconaXtraDBCluster
|
||||
listKind: PerconaXtraDBClusterList
|
||||
plural: perconaxtradbclusters
|
||||
singular: perconaxtradbcluster
|
||||
shortNames:
|
||||
- pxc
|
||||
- pxcs
|
||||
scope: Namespaced
|
||||
versions:
|
||||
- name: v1
|
||||
storage: false
|
||||
served: true
|
||||
- name: v1-1-0
|
||||
storage: false
|
||||
served: true
|
||||
- name: v1-2-0
|
||||
storage: false
|
||||
served: true
|
||||
- name: v1-3-0
|
||||
storage: false
|
||||
served: true
|
||||
- name: v1-4-0
|
||||
storage: false
|
||||
served: true
|
||||
- name: v1-5-0
|
||||
storage: false
|
||||
served: true
|
||||
- name: v1-6-0
|
||||
storage: false
|
||||
served: true
|
||||
- name: v1-7-0
|
||||
storage: true
|
||||
served: true
|
||||
- name: v1alpha1
|
||||
storage: false
|
||||
served: true
|
||||
additionalPrinterColumns:
|
||||
- name: Endpoint
|
||||
type: string
|
||||
JSONPath: .status.host
|
||||
- name: Status
|
||||
type: string
|
||||
JSONPath: .status.state
|
||||
- name: PXC
|
||||
type: string
|
||||
description: Ready pxc nodes
|
||||
JSONPath: .status.pxc.ready
|
||||
- name: proxysql
|
||||
type: string
|
||||
description: Ready proxysql nodes
|
||||
JSONPath: .status.proxysql.ready
|
||||
- name: haproxy
|
||||
type: string
|
||||
description: Ready haproxy nodes
|
||||
JSONPath: .status.haproxy.ready
|
||||
- name: Age
|
||||
type: date
|
||||
JSONPath: .metadata.creationTimestamp
|
||||
subresources:
|
||||
status: {}
|
||||
---
|
||||
apiVersion: apiextensions.k8s.io/v1beta1
|
||||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
name: perconaxtradbclusterbackups.pxc.percona.com
|
||||
spec:
|
||||
group: pxc.percona.com
|
||||
names:
|
||||
kind: PerconaXtraDBClusterBackup
|
||||
listKind: PerconaXtraDBClusterBackupList
|
||||
plural: perconaxtradbclusterbackups
|
||||
singular: perconaxtradbclusterbackup
|
||||
shortNames:
|
||||
- pxc-backup
|
||||
- pxc-backups
|
||||
scope: Namespaced
|
||||
versions:
|
||||
- name: v1
|
||||
storage: true
|
||||
served: true
|
||||
additionalPrinterColumns:
|
||||
- name: Cluster
|
||||
type: string
|
||||
description: Cluster name
|
||||
JSONPath: .spec.pxcCluster
|
||||
- name: Storage
|
||||
type: string
|
||||
description: Storage name from pxc spec
|
||||
JSONPath: .status.storageName
|
||||
- name: Destination
|
||||
type: string
|
||||
description: Backup destination
|
||||
JSONPath: .status.destination
|
||||
- name: Status
|
||||
type: string
|
||||
description: Job status
|
||||
JSONPath: .status.state
|
||||
- name: Completed
|
||||
description: Completed time
|
||||
type: date
|
||||
JSONPath: .status.completed
|
||||
- name: Age
|
||||
type: date
|
||||
JSONPath: .metadata.creationTimestamp
|
||||
subresources:
|
||||
status: {}
|
||||
---
|
||||
apiVersion: apiextensions.k8s.io/v1beta1
|
||||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
name: perconaxtradbclusterrestores.pxc.percona.com
|
||||
spec:
|
||||
group: pxc.percona.com
|
||||
names:
|
||||
kind: PerconaXtraDBClusterRestore
|
||||
listKind: PerconaXtraDBClusterRestoreList
|
||||
plural: perconaxtradbclusterrestores
|
||||
singular: perconaxtradbclusterrestore
|
||||
shortNames:
|
||||
- pxc-restore
|
||||
- pxc-restores
|
||||
scope: Namespaced
|
||||
versions:
|
||||
- name: v1
|
||||
storage: true
|
||||
served: true
|
||||
additionalPrinterColumns:
|
||||
- name: Cluster
|
||||
type: string
|
||||
description: Cluster name
|
||||
JSONPath: .spec.pxcCluster
|
||||
- name: Status
|
||||
type: string
|
||||
description: Job status
|
||||
JSONPath: .status.state
|
||||
- name: Completed
|
||||
description: Completed time
|
||||
type: date
|
||||
JSONPath: .status.completed
|
||||
- name: Age
|
||||
type: date
|
||||
JSONPath: .metadata.creationTimestamp
|
||||
subresources:
|
||||
status: {}
|
||||
---
|
||||
apiVersion: apiextensions.k8s.io/v1beta1
|
||||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
name: perconaxtradbbackups.pxc.percona.com
|
||||
spec:
|
||||
group: pxc.percona.com
|
||||
names:
|
||||
kind: PerconaXtraDBBackup
|
||||
listKind: PerconaXtraDBBackupList
|
||||
plural: perconaxtradbbackups
|
||||
singular: perconaxtradbbackup
|
||||
shortNames: []
|
||||
scope: Namespaced
|
||||
versions:
|
||||
- name: v1alpha1
|
||||
storage: true
|
||||
served: true
|
||||
additionalPrinterColumns:
|
||||
- name: Cluster
|
||||
type: string
|
||||
description: Cluster name
|
||||
JSONPath: .spec.pxcCluster
|
||||
- name: Storage
|
||||
type: string
|
||||
description: Storage name from pxc spec
|
||||
JSONPath: .status.storageName
|
||||
- name: Destination
|
||||
type: string
|
||||
description: Backup destination
|
||||
JSONPath: .status.destination
|
||||
- name: Status
|
||||
type: string
|
||||
description: Job status
|
||||
JSONPath: .status.state
|
||||
- name: Completed
|
||||
description: Completed time
|
||||
type: date
|
||||
JSONPath: .status.completed
|
||||
- name: Age
|
||||
type: date
|
||||
JSONPath: .metadata.creationTimestamp
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: percona-xtradb-cluster-operator
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: operator
|
||||
app.kubernetes.io/instance: percona-xtradb-cluster-operator
|
||||
app.kubernetes.io/name: percona-xtradb-cluster-operator
|
||||
app.kubernetes.io/part-of: percona-xtradb-cluster-operator
|
||||
strategy:
|
||||
rollingUpdate:
|
||||
maxUnavailable: 1
|
||||
type: RollingUpdate
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: operator
|
||||
app.kubernetes.io/instance: percona-xtradb-cluster-operator
|
||||
app.kubernetes.io/name: percona-xtradb-cluster-operator
|
||||
app.kubernetes.io/part-of: percona-xtradb-cluster-operator
|
||||
spec:
|
||||
containers:
|
||||
- command:
|
||||
- percona-xtradb-cluster-operator
|
||||
env:
|
||||
- name: WATCH_NAMESPACE
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
apiVersion: v1
|
||||
fieldPath: metadata.namespace
|
||||
- name: POD_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
apiVersion: v1
|
||||
fieldPath: metadata.name
|
||||
- name: OPERATOR_NAME
|
||||
value: percona-xtradb-cluster-operator
|
||||
image: percona/percona-xtradb-cluster-operator:1.6.0
|
||||
imagePullPolicy: Always
|
||||
livenessProbe:
|
||||
failureThreshold: 3
|
||||
httpGet:
|
||||
path: /metrics
|
||||
port: metrics
|
||||
scheme: HTTP
|
||||
name: percona-xtradb-cluster-operator
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
name: metrics
|
||||
protocol: TCP
|
||||
serviceAccountName: percona-xtradb-cluster-operator
|
||||
|
|
@ -0,0 +1,111 @@
|
|||
kind: Role
|
||||
apiVersion: rbac.authorization.k8s.io/v1beta1
|
||||
metadata:
|
||||
name: percona-xtradb-cluster-operator
|
||||
rules:
|
||||
- apiGroups:
|
||||
- pxc.percona.com
|
||||
resources:
|
||||
- perconaxtradbclusters
|
||||
- perconaxtradbclusters/status
|
||||
- perconaxtradbclusterbackups
|
||||
- perconaxtradbclusterbackups/status
|
||||
- perconaxtradbclusterrestores
|
||||
- perconaxtradbclusterrestores/status
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- create
|
||||
- update
|
||||
- patch
|
||||
- delete
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- pods
|
||||
- pods/exec
|
||||
- pods/log
|
||||
- configmaps
|
||||
- services
|
||||
- persistentvolumeclaims
|
||||
- secrets
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- create
|
||||
- update
|
||||
- patch
|
||||
- delete
|
||||
- apiGroups:
|
||||
- apps
|
||||
resources:
|
||||
- deployments
|
||||
- replicasets
|
||||
- statefulsets
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- create
|
||||
- update
|
||||
- patch
|
||||
- delete
|
||||
- apiGroups:
|
||||
- batch
|
||||
resources:
|
||||
- jobs
|
||||
- cronjobs
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- create
|
||||
- update
|
||||
- patch
|
||||
- delete
|
||||
- apiGroups:
|
||||
- policy
|
||||
resources:
|
||||
- poddisruptionbudgets
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- create
|
||||
- update
|
||||
- patch
|
||||
- delete
|
||||
- apiGroups:
|
||||
- certmanager.k8s.io
|
||||
- cert-manager.io
|
||||
resources:
|
||||
- issuers
|
||||
- certificates
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- create
|
||||
- update
|
||||
- patch
|
||||
- delete
|
||||
- deletecollection
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: percona-xtradb-cluster-operator
|
||||
---
|
||||
kind: RoleBinding
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
metadata:
|
||||
name: service-account-percona-xtradb-cluster-operator
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: percona-xtradb-cluster-operator
|
||||
roleRef:
|
||||
kind: Role
|
||||
name: percona-xtradb-cluster-operator
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: my-cluster-secrets
|
||||
type: Opaque
|
||||
stringData:
|
||||
root: root_password
|
||||
xtrabackup: backup_password
|
||||
monitor: monitory
|
||||
clustercheck: clustercheckpassword
|
||||
proxyadmin: admin_password
|
||||
pmmserver: newpass
|
||||
operator: operatoradmin
|
||||
serverUser: admin
|
||||
|
|
@ -0,0 +1,156 @@
|
|||
# Demonstration
|
||||
|
||||
## Monitor Chaos on Sock-Shop
|
||||
|
||||
Run chaos experiments and workflows on sock-shop application with grafana dashboard to monitor it.
|
||||
|
||||
### Setup Sock-Shop Microservices Application
|
||||
|
||||
- Apply the sock-shop microservices manifests
|
||||
|
||||
```
|
||||
kubectl apply -f .
|
||||
```
|
||||
|
||||
- Wait until all services are up. Verify via `kubectl get pods -n sock-shop`
|
||||
|
||||
### Setup the Monitoring Components
|
||||
|
||||
- create service monitors for all the application services if using prometheus operator with service monitors.
|
||||
|
||||
```
|
||||
kubectl -n sock-shop apply -f ../../sample-application-service-monitors/sock-shop/
|
||||
```
|
||||
|
||||
### Import the grafana dashboard
|
||||
|
||||

|
||||
|
||||
- Import the grafana dashboard "Sock-Shop Performance" provided [here](https://raw.githubusercontent.com/litmuschaos/litmus/master/monitoring/grafana-dashboards/sock-shop/Sock-Shop-Performance-Under-Chaos.json)
|
||||
|
||||
### Execute the Chaos Experiments
|
||||
|
||||
- For the sake of illustration, let us execute node and pod level, CPU hog experiments on the `catalogue` microservice & Memory Hog experiments on the `orders` microservice in a staggered manner.
|
||||
|
||||
```
|
||||
kubectl apply -f ../../sample-chaos-injectors/chaos-experiments/catalogue/catalogue-pod-cpu-hog.yaml
|
||||
```
|
||||
|
||||
Wait for ~60s
|
||||
|
||||
```
|
||||
kubectl apply -f ../../sample-chaos-injectors/chaos-experiments/orders/orders-pod-memory-hog.yaml
|
||||
```
|
||||
|
||||
Wait for ~60s
|
||||
|
||||
```
|
||||
kubectl apply -f ../../sample-chaos-injectors/chaos-experiments/catalogue/catalogue-node-cpu-hog.yaml
|
||||
```
|
||||
|
||||
Wait for ~60s
|
||||
|
||||
```
|
||||
kubectl apply -f ../../sample-chaos-injectors/chaos-experiments/orders/orders-node-memory-hog.yaml
|
||||
```
|
||||
|
||||
- Verify execution of chaos experiments
|
||||
|
||||
```
|
||||
kubectl describe chaosengine catalogue-pod-cpu-hog -n litmus
|
||||
kubectl describe chaosengine orders-pod-memory-hog -n litmus
|
||||
kubectl describe chaosengine catalogue-node-cpu-hog -n litmus
|
||||
kubectl describe chaosengine orders-node-memory-hog -n litmus
|
||||
```
|
||||
|
||||
### Visualize Chaos Impact
|
||||
|
||||
- Observe the impact of chaos injection through increased Latency & reduced QPS (queries per second) on the microservices
|
||||
under test.
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
### Inject continous chaos using Argo CD.
|
||||
|
||||
- Install Chaos workflow infrastructure.
|
||||
|
||||
- Create argo namespace
|
||||
|
||||
```
|
||||
kubectl create ns argo
|
||||
```
|
||||
|
||||
- Create the CRDs, workflow controller deployment with associated RBAC.
|
||||
|
||||
```
|
||||
kubectl apply -f https://raw.githubusercontent.com/argoproj/argo/stable/manifests/install.yaml -n argo
|
||||
```
|
||||
|
||||
- Install the argo CLI on the test harness machine (where the kubeconfig is available)
|
||||
|
||||
```bash
|
||||
# Download the binary
|
||||
curl -sLO https://github.com/argoproj/argo/releases/download/v2.11.0/argo-linux-amd64.gz
|
||||
|
||||
# Unzip
|
||||
gunzip argo-linux-amd64.gz
|
||||
|
||||
# Make binary executable
|
||||
chmod +x argo-linux-amd64
|
||||
|
||||
# Move binary to path
|
||||
mv ./argo-linux-amd64 /usr/local/bin/argo
|
||||
|
||||
# Test installation
|
||||
argo version
|
||||
```
|
||||
|
||||
- Create the Argo Access ServiceAccount
|
||||
|
||||
```
|
||||
kubectl apply -f https://raw.githubusercontent.com/litmuschaos/chaos-workflows/master/Argo/argo-access.yaml -n litmus
|
||||
```
|
||||
|
||||
- Run one or more of the litmuschaos experiments as Chaos workflows using argo CLI or kubectl.
|
||||
|
||||
> Node CPU hog
|
||||
|
||||
```bash
|
||||
argo cron create ../../sample-chaos-injectors/chaos-workflows-with-argo-CD/catalogue/catalogue-node-cpu-hog-workflow.yaml -n litmus
|
||||
```
|
||||
|
||||
> Node memory hog
|
||||
|
||||
```bash
|
||||
argo cron create ../../sample-chaos-injectors/chaos-workflows-with-argo-CD/orders/orders-node-memory-hog-workflow.yaml -n litmus
|
||||
```
|
||||
|
||||
> Pod CPU hog
|
||||
|
||||
```bash
|
||||
kubectl apply -f ../../sample-chaos-injectors/chaos-workflows-with-argo-CD/catalogue/catalogue-pod-cpu-hog-workflow.yaml -n litmus
|
||||
```
|
||||
|
||||
> Pod memory hog
|
||||
|
||||
```bash
|
||||
kubectl apply -f ../../sample-chaos-injectors/chaos-workflows-with-argo-CD/orders/orders-pod-memory-hog-workflow.yaml -n litmus
|
||||
```
|
||||
|
||||
- Visualize the Chaos cron workflow through argo UI by obtaining Node port or Load Balancer IP.
|
||||
|
||||
```
|
||||
kubectl patch svc argo-server -n argo -p '{"spec": {"type": "NodePort"}}'
|
||||
```
|
||||
|
||||
OR
|
||||
|
||||
```
|
||||
kubectl patch svc argo-server -n argo -p '{"spec": {"type": "LoadBalancer"}}'
|
||||
```
|
||||
|
||||

|
||||
|
||||

|
||||
|
|
@ -0,0 +1,39 @@
|
|||
apiVersion: litmuschaos.io/v1alpha1
|
||||
kind: ChaosEngine
|
||||
metadata:
|
||||
name: percona-network-chaos
|
||||
namespace: litmus
|
||||
spec:
|
||||
jobCleanUpPolicy: "retain"
|
||||
annotationCheck: "false"
|
||||
engineState: "active"
|
||||
monitoring: true
|
||||
appinfo:
|
||||
appns: "pxc"
|
||||
applabel: "app.kubernetes.io/name=percona-xtradb-cluster"
|
||||
appkind: "statefulset"
|
||||
chaosServiceAccount: litmus-admin
|
||||
experiments:
|
||||
- name: pod-network-loss
|
||||
spec:
|
||||
components:
|
||||
env:
|
||||
#Network interface inside target container
|
||||
- name: NETWORK_INTERFACE
|
||||
value: "eth0"
|
||||
- name: NETWORK_PACKET_LOSS_PERCENTAGE
|
||||
value: "100"
|
||||
- name: TOTAL_CHAOS_DURATION
|
||||
value: "9" # in seconds
|
||||
- name: TARGET_PODS
|
||||
value: "cluster1-pxc-1"
|
||||
- name: DESTINATION_HOSTS
|
||||
value: "cluster1-pxc-0,cluster1-pxc-2"
|
||||
# provide the name of container runtime
|
||||
# for litmus LIB, it supports docker, containerd, crio
|
||||
# for pumba LIB, it supports docker only
|
||||
- name: CONTAINER_RUNTIME
|
||||
value: "docker"
|
||||
# provide the socket file path
|
||||
- name: SOCKET_PATH
|
||||
value: "/var/run/docker.sock"
|
||||
Loading…
Reference in New Issue