diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 40798af9e..af9992408 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -19,6 +19,7 @@ jobs: event-tracker: ${{ steps.filter.outputs.event-tracker }} upgrade-agent-cp: ${{ steps.filter.outputs.upgrade-agent-cp }} subscriber: ${{ steps.filter.outputs.subscriber }} + litmus-demo: ${{ steps.filter.outputs.litmus-demo }} dex-server: ${{ steps.filter.outputs.dex-server }} steps: # For pull requests it's not necessary to checkout the code @@ -38,6 +39,8 @@ jobs: - 'litmus-portal/upgrade-agents/control-plane/**' subscriber: - 'litmus-portal/cluster-agents/subscriber/**' + litmus-demo: + - 'demo/1.x/**' dex-server: - 'litmus-portal/dex-server/**' @@ -209,6 +212,19 @@ jobs: cd litmus-portal/frontend docker build . -f Dockerfile --build-arg TARGETARCH=amd64 + docker-build-litmus-demo: + runs-on: ubuntu-latest + needs: + - changes + if: ${{ needs.changes.outputs.litmus-demo == 'true' }} + steps: + - name: Checkout code + uses: actions/checkout@v2 + - name: Build frontend docker image + shell: bash + run: | + cd demo/1.x + docker build . -f Dockerfile docker-build-dex-server: runs-on: ubuntu-latest needs: diff --git a/demo/1.x/Dockerfile b/demo/1.x/Dockerfile new file mode 100644 index 000000000..d7af6745b --- /dev/null +++ b/demo/1.x/Dockerfile @@ -0,0 +1,44 @@ +# select base image from https://hub.docker.com/_/docker +FROM docker:stable + +# Add Maintainer Info +LABEL maintainer="LitmusDev" + +# Set common env variable +ARG HOME_DIR="/litmus_demo" +ENV HOME_DIR=${HOME_DIR} + + +WORKDIR ${HOME_DIR} + +RUN apk add --update --no-cache build-base python3-dev python3 jpeg-dev zlib-dev libffi-dev libressl-dev bash git gettext curl \ + && curl -O https://bootstrap.pypa.io/get-pip.py \ + && python3 get-pip.py \ + && pip install cryptography==3.3.1 \ + && pip install --upgrade six docker-compose + +# install kubectl +COPY --from=lachlanevenson/k8s-kubectl:latest /usr/local/bin/kubectl /usr/local/bin/kubectl + +# install helm +COPY --from=alpine/helm:latest /usr/bin/helm /usr/bin/helm + +# install kind +RUN curl -Lo ./kind https://kind.sigs.k8s.io/dl/v0.9.0/kind-linux-amd64 \ +&& chmod +x ./kind && mv ./kind /usr/local/bin/kind + +# install gcloud +RUN curl -sSL https://sdk.cloud.google.com | bash +ENV PATH $PATH:/root/google-cloud-sdk/bin + +# install eksctl +COPY --from=weaveworks/eksctl:latest /usr/local/bin/eksctl /usr/local/bin/eksctl + +# install python dependencies +copy requirements.txt /tmp/requirements.txt +RUN pip3 install -r /tmp/requirements.txt + +COPY . ./ + +ENTRYPOINT ["python3", "manage.py"] +CMD ["-h"] \ No newline at end of file diff --git a/demo/1.x/Makefile b/demo/1.x/Makefile new file mode 100644 index 000000000..7830bc1f3 --- /dev/null +++ b/demo/1.x/Makefile @@ -0,0 +1,54 @@ +# Makefile for building Litmus Demo +# Reference Guide - https://www.gnu.org/software/make/manual/make.html + +# +# Internal variables or constants. +# NOTE - These will be executed when any make target is invoked. +# +IS_DOCKER_INSTALLED = $(shell which docker >> /dev/null 2>&1; echo $$?) + +.DEFAULT_GOAL := help +VERSION ?= latest + +ORG_NAME ?= litmuschaos +BASE_IMAGE_NAME ?= litmus-demo +CONTAINER_INSTANCE ?= default + +# Add the following 'help' target to your Makefile +# And add help text after each target name starting with '\#\#' +.PHONY: help deps +help: ##Show this help. + @fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##//' + +## deps: Check if docker is installed or not +deps: _build_check_docker + +_build_check_docker: + @echo "------------------" + @echo "--> Check the Docker deps" + @echo "------------------" + @if [ $(IS_DOCKER_INSTALLED) -eq 1 ]; \ + then echo "" \ + && echo "ERROR:\tdocker is not installed. Please install it before build." \ + && echo "" \ + && exit 1; \ + fi; + +## build: Builds litmus-demo docker image +build: deps + @echo "------------------" + @echo "--> Building Litmus-Demo" + @docker build -t $(ORG_NAME)/$(BASE_IMAGE_NAME):$(VERSION) . + @echo "------------------" + +## exec: Login to container and execute litmus demo commands +exec: + @echo "------------------" + @echo "--> Login to Litmus Demo container" + @docker run --rm --name $(BASE_IMAGE_NAME)-$(CONTAINER_INSTANCE) \ + -v /var/run/docker.sock:/var/run/docker.sock --net="host" \ + -it --entrypoint bash $(ORG_NAME)/$(BASE_IMAGE_NAME) + +## stop: stops docker container +stop: + @docker stop $(BASE_IMAGE_NAME)-${CONTAINER_INSTANCE} diff --git a/demo/1.x/README.md b/demo/1.x/README.md new file mode 100644 index 000000000..39486bfc4 --- /dev/null +++ b/demo/1.x/README.md @@ -0,0 +1,242 @@ +# Litmus Kubernetes Demo Environment + +The purpose of this repository is to familiarize oneself with running litmus chaos experiments in a realistic app environment running multiple services on different Kubernetes clusters. + +It makes to spin up a fully deployed [GKE](https://cloud.google.com/kubernetes-engine/) cluster or [EKS](https://aws.amazon.com/eks/) cluster easy with a microservice application or even you can spin up a [KinD](https://kind.sigs.k8s.io/docs/user/quick-start/) (Kubernetes-in-Docker) cluster which is a lightweight easy to use and handle for the applications and performing chaos. +[Sock Shop](https://github.com/microservices-demo/microservices-demo), and +[Litmus Chaos Engine](https://litmuschaos.github.io/litmus/) to create chaos scenarios. + +After cloning this repository, start the litmus demo container, and using the `start` command to create the fully deployed cluster, you will be able to run Litmus Chaos experiments using the `test` command in the cluster. You can find all the experiment configuration under the `/litmus` directory of this repository and the script to deploy and run them in `manage.py`. + +It currently works with KinD, GKE and EKS so either you can use a KinD cluster by following the below steps or you would need a Google Cloud account to run this on GKE environment or an AWS account to run this on EKS environment and the support for Azure is planned in future. + +## Requirements + +1. Docker 18.09 or greater + +## Setup Docker Container +You can setup & run the demo from a containerized environment by following the below mentioned steps. + + +```bash +git clone https://github.com/litmuschaos/litmus.git +cd demo/1.x/ +``` + +_Build Docker Image_ + +```bash +docker build -t litmuschaos/litmus-demo . +``` +OR +```bash +make build +``` + +Run docker container interactive, now you can run any commands mentioned [here](#usage) with python3. + +```bash +docker run -v /var/run/docker.sock:/var/run/docker.sock --net="host" -it --entrypoint bash litmuschaos/litmus-demo +$ python3 -h +``` +OR +```bash +make exec +``` + +You can run commands inside the container {-h, start, test, list, stop} ... +```bash +$ ./runcmd -h +``` + +You can also run the `manage.py` demo script in a non containerized environment for which you have to install the dependencies.You can refer [Get Started with LitmusChaos in Minutes](https://bit.ly/3kZv3KA) blog for setting up non containerized litmus demo environment. + +## Startup + +To start the GKE cluster and deploy all the required components: + +**_for kind cluster_** +```bash +./manage.py start --platform kind +``` + +**_for GKE cluster_** +```bash +./manage.py start --platform GKE --project {GC_PROJECT} --key {ZE_KEY} +``` +**_for EKS cluster_** +```bash +./manage.py start --platform EKS --name {EKS_CLUSTER_NAME} +``` + +**Flag values for start** + + + + + + + + + + + + + + + + + + + + + + + + + + +
Flag Description Default
--platform or -pt Set the platform to start with demo enviroment. Available platforms are kind and GKE. Support for other platforms will also be added. Default value is kind
--name or -n Required when --platform is GKE. It sets GKE cluster name Default value is litmus-k8s-demo
--zone or -z Required when --platform is GKE. It sets GCloud Zone to spin GKE cluster up in Default value is us-central1-a
--project or -p Required when --platform is GKE. It sets GCloud Project to spin GKE cluster up in No Default value
+ +## Test + +To run all the Litmus ChaosEngine experiments: + +```bash +./manage.py test +``` +You can optionally add the `--wait=` argument to change the wait time between experiments in minutes. By default, +it is 1 min. + +To run a specific experiment (found under the ./litmus directory): + +```bash +./manage.py test --test=pod-delete +``` + +**Flag values for test** + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Flag Description Default
--test or -t Name of test to run based on yaml file name under /litmus folder. Default value is * (all)
--wait or -w Number of minutes to wait between experiments. Default value is 1 (in min)
--type or -ty Select the type of chaos to be performed, it can have values pod for pod level chaos,node for infra/node level chaos and all to perform all chaos. Default value is all
--platform or -pt Set the platform to perform chaos. Available platforms are kind and GKE. Default value is kind
--report or -r Set report flag to yes for generating pdf report of the experiment result summary Default value is no
+ +## Usage + +To see full command-line options use the `-h` flag: + +```bash +./manage.py -h +``` + +This will output the following: + +```bash +usage: manage.py [-h] {start,test,list,stop} ... + +Spin up Litmus Demo Environment on Kubernetes. + +positional arguments:** + {start,test,list,stop} + start Start a Cluster with the demo environment deployed. + test Run Litmus ChaosEngine Experiments inside litmus demo + environment. + list List all available Litmus ChaosEngine Experiments + available to run. + stop Shutdown the Cluster with the demo environment + deployed. +``` + + +### Notes + +- To view application deployment picked, success/failure of reconcile operations (i.e., creation of chaos-runner pod or lack thereof), check the chaos operator logs. Ex: + +```bash +kubectl logs -f chaos-operator-ce-6899bbdb9-jz6jv -n litmus +``` + +- To view the parameters with which the experiment job is created, the status of experiment, the success of chaosengine patch operation, and cleanup of the experiment pod, check the logs of the chaos-runner pod. Ex: + +```bash +kubectl logs sock-chaos-runner -n sock-shop +``` + +- To view the logs of the chaos experiment itself, use the value `retain` in `.spec.jobCleanupPolicy` of the chaosengine CR + +```bash +kubectl logs container-kill-1oo8wv-85lsl -n sock-shop +``` + +(The detailed troubleshooting faq here: https://litmuschaos.github.io/litmus/experiments/faq/content/) + +- To re-run the chaosexperiment, cleanup and re-create the chaosengine CR + +```bash +kubectl delete chaosengine sock-chaos -n sock-shop +kubectl apply -f litmus/chaosengine.yaml +``` + +## Generate PDF of the experiment result summary + +We can also generate the pdf report of the experiment result summary using --report flag as follow: + +```bash +./manage.py test --report=yes +``` +It will generate a pdf report of name `chaos-report.pdf` in the current location containing chaos result summary. + +## List + +Lists all the available Litmus Chaos Experiments in this repo under the `./litmus` directory for a particular platform: + +```bash +./manage.py list --platform +``` + + +## Shutdown + +To shut down and destroy the cluster when you're finished: + +**_for kind cluster_** +``` bash +./manage.py --platform kind stop +``` + +**_for GKE cluster_** +```bash +./manage.py --platform GKE stop --project {GC_PROJECT} +``` + +**_for EKS cluster_** +```bash +./manage.py --platform EKS stop --name {EKS_CLUSTER_NAME} --awsregion {EKS_REGION_NAME} +``` diff --git a/demo/1.x/deploy/litmus-metrics/01-event-router-cm.yaml b/demo/1.x/deploy/litmus-metrics/01-event-router-cm.yaml new file mode 100644 index 000000000..84c791f7d --- /dev/null +++ b/demo/1.x/deploy/litmus-metrics/01-event-router-cm.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +data: + config.json: |- + { + "sink": "http", + "httpSinkUrl": "http://localhost:8080", + "httpSinkBufferSize": 1500, + "httpSinkDiscardMessages": true + } +kind: ConfigMap +metadata: + name: litmus-eventrouter-http-cm + namespace: litmus diff --git a/demo/1.x/deploy/litmus-metrics/02-event-router.yaml b/demo/1.x/deploy/litmus-metrics/02-event-router.yaml new file mode 100644 index 000000000..7ad17a78a --- /dev/null +++ b/demo/1.x/deploy/litmus-metrics/02-event-router.yaml @@ -0,0 +1,50 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: litmus-eventrouter + name: litmus-eventrouter + namespace: litmus +spec: + replicas: 1 + selector: + matchLabels: + app: litmus-eventrouter + template: + metadata: + labels: + app: litmus-eventrouter + spec: + containers: + - image: containership/eventrouter + imagePullPolicy: IfNotPresent + name: litmus-eventrouter + volumeMounts: + - mountPath: /etc/eventrouter + name: config-volume + serviceAccount: litmus + serviceAccountName: litmus + volumes: + - configMap: + defaultMode: 420 + name: litmus-eventrouter-http-cm + name: config-volume +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app: litmus-eventrouter + name: litmus-eventrouter + namespace: litmus +spec: + ports: + - nodePort: 31399 + port: 8080 + protocol: TCP + targetPort: 8080 + selector: + app: litmus-eventrouter + sessionAffinity: None + type: NodePort + diff --git a/demo/1.x/deploy/litmus-metrics/03-chaos-exporter.yaml b/demo/1.x/deploy/litmus-metrics/03-chaos-exporter.yaml new file mode 100644 index 000000000..ab9695e7d --- /dev/null +++ b/demo/1.x/deploy/litmus-metrics/03-chaos-exporter.yaml @@ -0,0 +1,40 @@ +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: chaos-monitor + name: chaos-monitor + namespace: litmus +spec: + replicas: 1 + selector: + matchLabels: + app: chaos-monitor + template: + metadata: + labels: + app: chaos-monitor + spec: + containers: + - image: litmuschaos/chaos-exporter:ci + imagePullPolicy: Always + name: chaos-exporter + serviceAccount: litmus + serviceAccountName: litmus +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app: chaos-monitor + name: chaos-monitor + namespace: litmus +spec: + ports: + - port: 8080 + protocol: TCP + targetPort: 8080 + selector: + app: chaos-monitor + type: ClusterIP diff --git a/demo/1.x/deploy/litmus-rbac.yaml b/demo/1.x/deploy/litmus-rbac.yaml new file mode 100644 index 000000000..003324d53 --- /dev/null +++ b/demo/1.x/deploy/litmus-rbac.yaml @@ -0,0 +1,33 @@ +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: sock-shop-chaos-engine + namespace: sock-shop +--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: sock-shop-chaos-engine + namespace: sock-shop +rules: +- apiGroups: ["","apps","batch","extensions","litmuschaos.io","openebs.io","storage.k8s.io"] + resources: ["chaosengines","chaosexperiments","chaosresults","cstorpools","cstorvolumereplicas","configmaps","secrets","pods","pods/exec","pods/log","pods/eviction","jobs","replicasets","deployments","daemonsets","statefulsets","persistentvolumeclaims","persistentvolumes","storageclasses","services","events"] + verbs: ["create","delete","get","list","patch","update"] +- apiGroups: [""] + resources: ["nodes"] + verbs: ["get","list","patch"] +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: sock-shop-chaos-engine + namespace: sock-shop +roleRef: + kind: ClusterRole + name: sock-shop-chaos-engine + apiGroup: rbac.authorization.k8s.io +subjects: +- kind: ServiceAccount + name: sock-shop-chaos-engine + namespace: sock-shop diff --git a/demo/1.x/deploy/monitoring/01-monitoring-ns.yaml b/demo/1.x/deploy/monitoring/01-monitoring-ns.yaml new file mode 100644 index 000000000..d32523606 --- /dev/null +++ b/demo/1.x/deploy/monitoring/01-monitoring-ns.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: monitoring diff --git a/demo/1.x/deploy/monitoring/02-prometheus-rbac.yaml b/demo/1.x/deploy/monitoring/02-prometheus-rbac.yaml new file mode 100644 index 000000000..22cc141aa --- /dev/null +++ b/demo/1.x/deploy/monitoring/02-prometheus-rbac.yaml @@ -0,0 +1,47 @@ +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: prometheus + namespace: monitoring + labels: + app: prometheus +--- +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRole +metadata: + name: prometheus + labels: + app: prometheus +rules: +- apiGroups: [""] # "" indicates the core API group + resources: + - nodes + - nodes/proxy + - services + - endpoints + - pods + verbs: + - get + - list + - watch +- nonResourceURLs: + - /metrics + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRoleBinding +metadata: + name: prometheus + labels: + app: prometheus +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: prometheus +subjects: +- kind: ServiceAccount + name: prometheus + namespace: monitoring + diff --git a/demo/1.x/deploy/monitoring/03-prometheus-configmap.yaml b/demo/1.x/deploy/monitoring/03-prometheus-configmap.yaml new file mode 100644 index 000000000..48cdf4c15 --- /dev/null +++ b/demo/1.x/deploy/monitoring/03-prometheus-configmap.yaml @@ -0,0 +1,122 @@ +apiVersion: v1 +data: + prometheus.yml: | + global: + scrape_interval: 15s + rule_files: + - "/etc/prometheus-rules/alert.rules" + scrape_configs: + - job_name: 'chaos-monitor' + static_configs: + - targets: ['chaos-monitor.litmus.svc.cluster.local:8080'] + - job_name: kubernetes-service-endpoints + kubernetes_sd_configs: + - role: endpoints + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + relabel_configs: + - source_labels: + - __meta_kubernetes_service_label_component + regex: apiserver + action: replace + target_label: __scheme__ + replacement: https + - source_labels: + - __meta_kubernetes_service_label_kubernetes_io_cluster_service + action: drop + regex: 'true' + - source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_scrape + action: drop + regex: 'false' + - source_labels: + - __meta_kubernetes_pod_container_port_name + action: drop + regex: .*-noscrape + - source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_scheme + action: replace + target_label: __scheme__ + regex: ^(https?)$ + replacement: $1 + - source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_path + action: replace + target_label: __metrics_path__ + regex: ^(.+)$ + replacement: $1 + - source_labels: + - __address__ + - __meta_kubernetes_service_annotation_prometheus_io_port + action: replace + target_label: __address__ + regex: '^(.+)(?::\d+);(\d+)$' + replacement: '$1:$2' + - action: labelmap + regex: ^__meta_kubernetes_service_label_(.+)$ + replacement: $1 + - source_labels: + - __meta_kubernetes_namespace + - __meta_kubernetes_service_name + separator: / + target_label: job + - job_name: kubernetes-pods + kubernetes_sd_configs: + - role: pod + relabel_configs: + - source_labels: + - __meta_kubernetes_pod_annotation_prometheus_io_scrape + action: keep + regex: 'true' + - source_labels: + - __meta_kubernetes_namespace + - __meta_kubernetes_pod_label_name + separator: / + target_label: job + - source_labels: + - __meta_kubernetes_pod_node_name + target_label: node + - job_name: kubernetes-nodes + kubernetes_sd_configs: + - role: node + tls_config: + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + relabel_configs: + - target_label: __scheme__ + replacement: https + - source_labels: + - __meta_kubernetes_node_label_kubernetes_io_hostname + target_label: instance + - job_name: weave + kubernetes_sd_configs: + - role: pod + relabel_configs: + - source_labels: + - __meta_kubernetes_namespace + - __meta_kubernetes_pod_label_name + action: keep + regex: ^kube-system;weave-net$ + - source_labels: + - __meta_kubernetes_pod_container_name + - __address__ + action: replace + target_label: __address__ + regex: '^weave;(.+?)(?::\d+)?$' + replacement: '$1:6782' + - source_labels: + - __meta_kubernetes_pod_container_name + - __address__ + action: replace + target_label: __address__ + regex: '^weave-npc;(.+?)(?::\d+)?$' + replacement: '$1:6781' + - source_labels: + - __meta_kubernetes_pod_container_name + action: replace + target_label: job +kind: ConfigMap +metadata: + name: prometheus-configmap + namespace: monitoring diff --git a/demo/1.x/deploy/monitoring/04-prometheus-alert-rules.yaml b/demo/1.x/deploy/monitoring/04-prometheus-alert-rules.yaml new file mode 100644 index 000000000..0e509b919 --- /dev/null +++ b/demo/1.x/deploy/monitoring/04-prometheus-alert-rules.yaml @@ -0,0 +1,17 @@ +apiVersion: v1 +data: + alert.rules: | + # Alert for high error rate in the Sock Shop. + + ALERT HighErrorRate + IF rate(request_duration_seconds_count{status_code="500"}[5m]) > 1 + FOR 5m + LABELS { severity = "slack" } + ANNOTATIONS { + summary = "High HTTP 500 error rates", + description = "Rate of HTTP 500 errors per 5 minutes: {{ $value }}", + } +kind: ConfigMap +metadata: + name: prometheus-alertrules + namespace: monitoring diff --git a/demo/1.x/deploy/monitoring/05-prometheus-deployment.yaml b/demo/1.x/deploy/monitoring/05-prometheus-deployment.yaml new file mode 100644 index 000000000..6aac74fdb --- /dev/null +++ b/demo/1.x/deploy/monitoring/05-prometheus-deployment.yaml @@ -0,0 +1,46 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + annotations: + name: prometheus-deployment + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + app: prometheus + template: + metadata: + labels: + app: prometheus + name: prometheus + spec: + containers: + - args: + - -storage.local.retention=360h + - -storage.local.memory-chunks=1048576 + - -config.file=/etc/prometheus/prometheus.yml + - -alertmanager.url=http://alertmanager:9093 + image: prom/prometheus:v1.5.2 + imagePullPolicy: IfNotPresent + name: prometheus + ports: + - containerPort: 9090 + name: web + protocol: TCP + volumeMounts: + - mountPath: /etc/prometheus + name: config-volume + - mountPath: /etc/prometheus-rules + name: alertrules-volume + serviceAccount: prometheus + serviceAccountName: prometheus + volumes: + - configMap: + defaultMode: 420 + name: prometheus-configmap + name: config-volume + - configMap: + defaultMode: 420 + name: prometheus-alertrules + name: alertrules-volume diff --git a/demo/1.x/deploy/monitoring/06-prometheus-svc.yaml b/demo/1.x/deploy/monitoring/06-prometheus-svc.yaml new file mode 100644 index 000000000..873372504 --- /dev/null +++ b/demo/1.x/deploy/monitoring/06-prometheus-svc.yaml @@ -0,0 +1,20 @@ +apiVersion: v1 +kind: Service +metadata: + annotations: + prometheus.io/scrape: 'true' + labels: + name: prometheus + name: prometheus + namespace: monitoring +spec: + selector: + app: prometheus + type: NodePort + ports: + - name: prometheus + protocol: TCP + port: 9090 + targetPort: 9090 + nodePort: 31090 + diff --git a/demo/1.x/deploy/monitoring/07-grafana-deployment.yaml b/demo/1.x/deploy/monitoring/07-grafana-deployment.yaml new file mode 100644 index 000000000..3da65271a --- /dev/null +++ b/demo/1.x/deploy/monitoring/07-grafana-deployment.yaml @@ -0,0 +1,30 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: grafana + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + app: grafana + template: + metadata: + labels: + app: grafana + name: grafana + spec: + containers: + - image: grafana/grafana:latest + imagePullPolicy: Always + name: grafana + ports: + - containerPort: 3000 + name: grafana + protocol: TCP + volumeMounts: + - mountPath: /var/lib/grafana + name: grafana-storage + volumes: + - emptyDir: {} + name: grafana-storage diff --git a/demo/1.x/deploy/monitoring/08-grafana-svc.yaml b/demo/1.x/deploy/monitoring/08-grafana-svc.yaml new file mode 100644 index 000000000..33279fb1f --- /dev/null +++ b/demo/1.x/deploy/monitoring/08-grafana-svc.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Service +metadata: + annotations: + name: grafana + namespace: monitoring +spec: + ports: + - nodePort: 31687 + port: 3000 + protocol: TCP + targetPort: 3000 + selector: + app: grafana + sessionAffinity: None + type: NodePort diff --git a/demo/1.x/deploy/monitoring/09-grafana-import-dashboard.yaml b/demo/1.x/deploy/monitoring/09-grafana-import-dashboard.yaml new file mode 100644 index 000000000..c1bc3422c --- /dev/null +++ b/demo/1.x/deploy/monitoring/09-grafana-import-dashboard.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +data: + prometheus-datasource.json: | + { + "name": "prometheus", + "type": "prometheus", + "url": "http://prometheus:9090", + "access": "proxy", + "basicAuth": false + } +kind: ConfigMap +metadata: + name: grafana-import-dashboards + namespace: monitoring diff --git a/demo/1.x/deploy/monitoring/10-grafana-dashboard.json b/demo/1.x/deploy/monitoring/10-grafana-dashboard.json new file mode 100644 index 000000000..8d30a30c6 --- /dev/null +++ b/demo/1.x/deploy/monitoring/10-grafana-dashboard.json @@ -0,0 +1,2124 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "DS_PROMETHEUS", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "limit": 100, + "name": "Annotations & Alerts", + "showIn": 0, + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 1, + "links": [], + "panels": [ + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 20, + "panels": [], + "repeat": null, + "title": "Chaos Metrics", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 10, + "fillGradient": 5, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 1 + }, + "hiddenSeries": false, + "id": 15, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "catalogue-cpu-hog", + "color": "#96D98D", + "fillGradient": 5, + "stack": "A", + "steppedLine": true + }, + { + "alias": "orders-memory-hog", + "color": "#E0B400", + "fillGradient": 5, + "stack": "B", + "steppedLine": true + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "heptio_eventrouter_normal_total{reason=\"ChaosInject\",involved_object_name=\"catalogue-cpu-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} - on () (heptio_eventrouter_normal_total{reason=\"PostChaosCheck\",involved_object_name=\"catalogue-cpu-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} OR on() vector(0))", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "catalogue-cpu-hog", + "refId": "A", + "step": 2 + }, + { + "expr": "heptio_eventrouter_normal_total{reason=\"ChaosInject\",involved_object_name=\"orders-memory-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} - on () (heptio_eventrouter_normal_total{reason=\"PostChaosCheck\",involved_object_name=\"orders-memory-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} OR on() vector(0))", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "orders-memory-hog", + "refId": "B", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Chaos Experiments", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": "CHAOS = TRUE", + "logBase": 2, + "max": "1", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cacheTimeout": null, + "datasource": null, + "fieldConfig": { + "defaults": { + "custom": {}, + "mappings": [ + { + "id": 0, + "op": "=", + "text": "N/A", + "type": 1, + "value": "null" + } + ], + "nullValueMode": "connected", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 3, + "x": 12, + "y": 1 + }, + "id": 16, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "max" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "7.0.6", + "targets": [ + { + "expr": "sum(chaosengine_experiments_count{engine_namespace=\"litmus\",job=\"chaos-monitor\"})", + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 20 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Total Experiments Run", + "type": "gauge" + }, + { + "cacheTimeout": null, + "datasource": null, + "fieldConfig": { + "defaults": { + "custom": {}, + "mappings": [ + { + "id": 0, + "op": "=", + "text": "N/A", + "type": 1, + "value": "null" + } + ], + "nullValueMode": "connected", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 3, + "x": 15, + "y": 1 + }, + "id": 28, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "max" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "7.0.6", + "targets": [ + { + "expr": "sum(chaosengine_passed_experiments{engine_namespace=\"litmus\",job=\"chaos-monitor\"})", + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 20 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Passed Experiments", + "type": "gauge" + }, + { + "cacheTimeout": null, + "datasource": null, + "fieldConfig": { + "defaults": { + "custom": {}, + "mappings": [ + { + "id": 0, + "op": "=", + "text": "N/A", + "type": 1, + "value": "null" + } + ], + "nullValueMode": "connected", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 3, + "x": 18, + "y": 1 + }, + "id": 19, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "max" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "7.0.6", + "targets": [ + { + "expr": "sum(chaosengine_failed_experiments{engine_namespace=\"litmus\",job=\"chaos-monitor\"})", + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 20 + } + ], + "title": "Failed Experiments", + "type": "gauge" + }, + { + "cacheTimeout": null, + "datasource": null, + "fieldConfig": { + "defaults": { + "custom": {}, + "mappings": [ + { + "id": 0, + "op": "=", + "text": "N/A", + "type": 1, + "value": "null" + } + ], + "nullValueMode": "connected", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 3, + "x": 21, + "y": 1 + }, + "id": 18, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "7.0.6", + "targets": [ + { + "expr": "sum(chaosengine_waiting_experiments{engine_namespace=\"litmus\",job=\"chaos-monitor\"})", + "interval": "", + "intervalFactor": 3, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Queued Experiments", + "type": "gauge" + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 21, + "panels": [], + "repeat": null, + "title": "Orders Metrics", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 11 + }, + "hiddenSeries": false, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "2xx", + "lines": true + }, + { + "alias": "Chaos Period", + "color": "#F2495C", + "fillGradient": 7, + "steppedLine": true, + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(request_duration_seconds_count{name=\"orders\",status_code=~\"2..\",route!=\"metrics\"}[1m])) * 100", + "intervalFactor": 2, + "legendFormat": "2xx", + "refId": "A", + "step": 2 + }, + { + "expr": "sum(rate(request_duration_seconds_count{name=\"orders\",status_code=~\"4.+|5.+\"}[1m])) * 100", + "intervalFactor": 2, + "legendFormat": "4xx/5xx", + "refId": "B", + "step": 2 + }, + { + "expr": "heptio_eventrouter_normal_total{reason=\"ChaosInject\",involved_object_name=\"orders-memory-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} - on () (heptio_eventrouter_normal_total{reason=\"PostChaosCheck\",involved_object_name=\"orders-memory-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} OR on() vector(0))", + "interval": "", + "legendFormat": "Chaos Period", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Orders QPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ops", + "label": "QPS (1 min)", + "logBase": 1, + "min": 0, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": "CHAOS", + "logBase": 2, + "max": "1", + "min": "0", + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 11 + }, + "hiddenSeries": false, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "99th quantile", + "lines": true + }, + { + "alias": "50th quantile", + "lines": true + }, + { + "alias": "Mean", + "lines": true + }, + { + "alias": "Chaos Period", + "color": "#E02F44", + "fillGradient": 7, + "steppedLine": true, + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(request_duration_seconds_bucket{name=\"orders\"}[1m])) by (name, le))", + "intervalFactor": 2, + "legendFormat": "99th quantile", + "refId": "A", + "step": 2 + }, + { + "expr": "histogram_quantile(0.5, sum(rate(request_duration_seconds_bucket{name=\"orders\"}[1m])) by (name, le))", + "intervalFactor": 2, + "legendFormat": "50th quantile", + "refId": "B", + "step": 2 + }, + { + "expr": "sum(rate(request_duration_seconds_sum{name=\"orders\"}[1m])) / sum(rate(request_duration_seconds_count{name=\"orders\"}[1m]))", + "intervalFactor": 2, + "legendFormat": "Mean", + "refId": "C", + "step": 2 + }, + { + "expr": "heptio_eventrouter_normal_total{reason=\"ChaosInject\",involved_object_name=\"orders-memory-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} - on () (heptio_eventrouter_normal_total{reason=\"PostChaosCheck\",involved_object_name=\"orders-memory-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} OR on() vector(0))", + "interval": "", + "legendFormat": "Chaos Period", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Orders latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "label": "CHAOS", + "logBase": 2, + "max": "1", + "min": "0", + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 22, + "panels": [], + "repeat": null, + "title": "Catalogue Metrics", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 19 + }, + "hiddenSeries": false, + "id": 1, + "legend": { + "avg": false, + "current": false, + "hideEmpty": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "2xx", + "lines": true + }, + { + "alias": "Chaos Period", + "color": "#F2495C", + "fillGradient": 7, + "lines": true, + "steppedLine": true, + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(request_duration_seconds_count{name=\"catalogue\",status_code=~\"2..\",route!=\"metrics\"}[1m])) * 100", + "intervalFactor": 2, + "legendFormat": "2xx", + "refId": "A", + "step": 2 + }, + { + "expr": "sum(rate(request_duration_seconds_count{name=\"catalogue\",status_code=~\"4.+|5.+\"}[1m])) * 100", + "intervalFactor": 2, + "legendFormat": "4xx/5xx", + "refId": "B", + "step": 2 + }, + { + "expr": "heptio_eventrouter_normal_total{reason=\"ChaosInject\",involved_object_name=\"catalogue-cpu-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} - on () (heptio_eventrouter_normal_total{reason=\"PostChaosCheck\",involved_object_name=\"catalogue-cpu-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} OR on() vector(0))", + "interval": "", + "legendFormat": "Chaos Period", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Catalogue QPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ops", + "label": "QPS (1 min)", + "logBase": 1, + "min": 0, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": "CHAOS", + "logBase": 2, + "max": "1", + "min": "0", + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 19 + }, + "hiddenSeries": false, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "99th quantile", + "lines": true + }, + { + "alias": "50th quantile", + "lines": true + }, + { + "alias": "Mean", + "lines": true + }, + { + "alias": "Chaos Period", + "color": "#E02F44", + "fillGradient": 7, + "lines": true, + "steppedLine": true, + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(request_duration_seconds_bucket{name=\"catalogue\"}[1m])) by (name, le))", + "intervalFactor": 2, + "legendFormat": "99th quantile", + "refId": "A", + "step": 2 + }, + { + "expr": "histogram_quantile(0.5, sum(rate(request_duration_seconds_bucket{name=\"catalogue\"}[1m])) by (name, le))", + "intervalFactor": 2, + "legendFormat": "50th quantile", + "refId": "B", + "step": 2 + }, + { + "expr": "sum(rate(request_duration_seconds_sum{name=\"catalogue\"}[1m])) / sum(rate(request_duration_seconds_count{name=\"catalogue\"}[1m]))", + "intervalFactor": 2, + "legendFormat": "Mean", + "refId": "C", + "step": 2 + }, + { + "expr": "heptio_eventrouter_normal_total{reason=\"ChaosInject\",involved_object_name=\"catalogue-cpu-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} - on () (heptio_eventrouter_normal_total{reason=\"PostChaosCheck\",involved_object_name=\"catalogue-cpu-hog\", involved_object_namespace=\"litmus\", involved_object_kind=\"ChaosEngine\"} OR on() vector(0))", + "interval": "", + "legendFormat": "Chaos Period", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Catalogue latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "label": "CHAOS", + "logBase": 2, + "max": "1", + "min": "0", + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 27 + }, + "id": 23, + "panels": [], + "repeat": null, + "title": "Payement Metrics", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 28 + }, + "hiddenSeries": false, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(request_duration_seconds_count{name=\"payment\",status_code=~\"2..\",route!=\"metrics\"}[1m])) * 100", + "intervalFactor": 2, + "legendFormat": "2xx", + "refId": "A", + "step": 2 + }, + { + "expr": "sum(rate(request_duration_seconds_count{name=\"payment\",status_code=~\"4.+|5.+\"}[1m])) * 100", + "intervalFactor": 2, + "legendFormat": "4xx/5xx", + "refId": "B", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Payment QPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ops", + "label": "QPS (1 min)", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 28 + }, + "hiddenSeries": false, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(request_duration_seconds_bucket{name=\"payment\"}[1m])) by (name, le))", + "intervalFactor": 2, + "legendFormat": "99th quantile", + "refId": "A", + "step": 2 + }, + { + "expr": "histogram_quantile(0.5, sum(rate(request_duration_seconds_bucket{name=\"payment\"}[1m])) by (name, le))", + "intervalFactor": 2, + "legendFormat": "50th quantile", + "refId": "B", + "step": 2 + }, + { + "expr": "sum(rate(request_duration_seconds_sum{name=\"payment\"}[1m])) / sum(rate(request_duration_seconds_count{name=\"payment\"}[1m]))", + "intervalFactor": 2, + "legendFormat": "Mean", + "refId": "C", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Payment latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 35 + }, + "id": 24, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 14 + }, + "hiddenSeries": false, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(request_duration_seconds_count{name=\"shipping\",status_code=~\"2..\",route!=\"metrics\"}[1m])) * 100", + "intervalFactor": 2, + "legendFormat": "2xx", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(rate(request_duration_seconds_count{name=\"shipping\",status_code=~\"4.+|5.+\"}[1m])) * 100", + "intervalFactor": 2, + "legendFormat": "4xx/5xx", + "refId": "B", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Shipping QPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ops", + "label": "QPS (1 min)", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 14 + }, + "hiddenSeries": false, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(request_duration_seconds_bucket{name=\"shipping\"}[1m])) by (name, le))", + "intervalFactor": 2, + "legendFormat": "99th quantile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.5, sum(rate(request_duration_seconds_bucket{name=\"shipping\"}[1m])) by (name, le))", + "intervalFactor": 2, + "legendFormat": "50th quantile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(request_duration_seconds_sum{name=\"shipping\"}[1m])) / sum(rate(request_duration_seconds_count{name=\"shipping\"}[1m]))", + "intervalFactor": 2, + "legendFormat": "Mean", + "refId": "C", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Shipping latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "repeat": null, + "title": "Shipping Metrics", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 36 + }, + "id": 25, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 6 + }, + "hiddenSeries": false, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(request_duration_seconds_count{name=\"user\",status_code=~\"2..\",route!=\"metrics\"}[1m])) * 100", + "intervalFactor": 2, + "legendFormat": "2xx", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(rate(request_duration_seconds_count{name=\"user\",status_code=~\"4.+|5.+\"}[1m])) * 100", + "intervalFactor": 2, + "legendFormat": "4xx/5xx", + "refId": "B", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "User QPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ops", + "label": "QPS (1 min)", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 6 + }, + "hiddenSeries": false, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(request_duration_seconds_bucket{name=\"user\"}[1m])) by (name, le))", + "intervalFactor": 2, + "legendFormat": "99th quantile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.5, sum(rate(request_duration_seconds_bucket{name=\"user\"}[1m])) by (name, le))", + "intervalFactor": 2, + "legendFormat": "50th quantile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(request_duration_seconds_sum{name=\"user\"}[1m])) / sum(rate(request_duration_seconds_count{name=\"user\"}[1m]))", + "intervalFactor": 2, + "legendFormat": "Mean", + "refId": "C", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "User latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "repeat": null, + "title": "User Metrics", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 37 + }, + "id": 26, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 7 + }, + "hiddenSeries": false, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(request_duration_seconds_count{name=\"front-end\",status_code=~\"2..\",route!=\"metrics\"}[1m])) * 100", + "intervalFactor": 2, + "legendFormat": "2xx", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(rate(request_duration_seconds_count{name=\"front-end\",status_code=~\"4.+|5.+\"}[1m])) * 100", + "intervalFactor": 2, + "legendFormat": "4xx/5xx", + "refId": "B", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Frontend QPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ops", + "label": "QPS (1 min)", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 7 + }, + "hiddenSeries": false, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(request_duration_seconds_bucket{name=\"front-end\"}[1m])) by (name, le))", + "intervalFactor": 2, + "legendFormat": "99th quantile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.5, sum(rate(request_duration_seconds_bucket{name=\"front-end\"}[1m])) by (name, le))", + "intervalFactor": 2, + "legendFormat": "50th quantile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(request_duration_seconds_sum{name=\"front-end\"}[1m])) / sum(rate(request_duration_seconds_count{name=\"front-end\"}[1m]))", + "intervalFactor": 2, + "legendFormat": "Mean", + "refId": "C", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Frontend latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "repeat": null, + "title": "Frontend Metrics", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 38 + }, + "id": 27, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 8 + }, + "hiddenSeries": false, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(request_duration_seconds_count{name=\"carts\",status_code=~\"2..\",route!=\"metrics\"}[1m])) * 100", + "intervalFactor": 2, + "legendFormat": "2xx", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(rate(request_duration_seconds_count{name=\"carts\",status_code=~\"4.+|5.+\"}[1m])) * 100", + "intervalFactor": 2, + "legendFormat": "4xx/5xx", + "refId": "B", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Cart QPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ops", + "label": "QPS (1 min)", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 8 + }, + "hiddenSeries": false, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(request_duration_seconds_bucket{name=\"carts\"}[1m])) by (name, le))", + "intervalFactor": 2, + "legendFormat": "99th quantile", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.5, sum(rate(request_duration_seconds_bucket{name=\"carts\"}[1m])) by (name, le))", + "intervalFactor": 2, + "legendFormat": "50th quantile", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(request_duration_seconds_sum{name=\"carts\"}[1m])) / sum(rate(request_duration_seconds_count{name=\"carts\"}[1m]))", + "intervalFactor": 2, + "legendFormat": "Mean", + "refId": "C", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Cart latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "repeat": null, + "title": "Cart Metrics", + "type": "row" + } + ], + "refresh": "5s", + "schemaVersion": 25, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Sock-Shop Performance", + "uid": "paRJzVnGk", + "version": 6 +} diff --git a/demo/1.x/deploy/random-log-counter.yaml b/demo/1.x/deploy/random-log-counter.yaml new file mode 100644 index 000000000..4be771372 --- /dev/null +++ b/demo/1.x/deploy/random-log-counter.yaml @@ -0,0 +1,28 @@ +--- +apiVersion: v1 +kind: Namespace +metadata: + name: random-counter +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: random-counter + labels: + name: random-counter + namespace: random-counter +spec: + replicas: 1 + selector: + matchLabels: + name: random-counter + template: + metadata: + labels: + name: random-counter + spec: + containers: + - name: random-log-counter + image: zebrium/random-log-counter:latest + imagePullPolicy: Always +--- \ No newline at end of file diff --git a/demo/1.x/deploy/sock-shop.yaml b/demo/1.x/deploy/sock-shop.yaml new file mode 100644 index 000000000..f1fb5364b --- /dev/null +++ b/demo/1.x/deploy/sock-shop.yaml @@ -0,0 +1,800 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: sock-shop +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: carts-db + labels: + name: carts-db + app: sock-shop + annotations: + litmuschaos.io/chaos: "true" + namespace: sock-shop +spec: + replicas: 1 + selector: + matchLabels: + name: carts-db + app: sock-shop + template: + metadata: + labels: + name: carts-db + app: sock-shop + spec: + containers: + - name: carts-db + image: mongo + ports: + - name: mongo + containerPort: 27017 + resources: + requests: + ephemeral-storage: "1Gi" + limits: + ephemeral-storage: "2Gi" + securityContext: + capabilities: + drop: + - all + add: + - CHOWN + - SETGID + - SETUID + readOnlyRootFilesystem: true + volumeMounts: + - mountPath: /tmp + name: tmp-volume + volumes: + - name: tmp-volume + emptyDir: + medium: Memory + nodeSelector: + beta.kubernetes.io/os: linux +--- +apiVersion: v1 +kind: Service +metadata: + name: carts-db + labels: + name: carts-db + namespace: sock-shop +spec: + ports: + # the port that this service should serve on + - port: 27017 + targetPort: 27017 + selector: + name: carts-db +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: carts + labels: + name: carts + app: sock-shop + annotations: + litmuschaos.io/chaos: "true" + namespace: sock-shop +spec: + replicas: 1 + selector: + matchLabels: + name: carts + app: sock-shop + template: + metadata: + labels: + name: carts + app: sock-shop + spec: + containers: + - name: carts + image: weaveworksdemos/carts:0.4.8 + ports: + - containerPort: 80 + env: + - name: ZIPKIN + value: zipkin.jaeger.svc.cluster.local + - name: JAVA_OPTS + value: -Xms64m -Xmx128m -XX:PermSize=32m -XX:MaxPermSize=64m -XX:+UseG1GC -Djava.security.egd=file:/dev/urandom + securityContext: + runAsNonRoot: true + runAsUser: 10001 + capabilities: + drop: + - all + add: + - NET_BIND_SERVICE + readOnlyRootFilesystem: true + volumeMounts: + - mountPath: /tmp + name: tmp-volume + volumes: + - name: tmp-volume + emptyDir: + medium: Memory + nodeSelector: + beta.kubernetes.io/os: linux +--- +apiVersion: v1 +kind: Service +metadata: + name: carts + labels: + name: carts + namespace: sock-shop +spec: + ports: + # the port that this service should serve on + - port: 80 + targetPort: 80 + selector: + name: carts +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: catalogue-db + labels: + name: catalogue-db + app: sock-shop + annotations: + litmuschaos.io/chaos: "true" + namespace: sock-shop +spec: + replicas: 1 + selector: + matchLabels: + name: catalogue-db + app: sock-shop + template: + metadata: + labels: + name: catalogue-db + app: sock-shop + spec: + containers: + - name: catalogue-db + image: weaveworksdemos/catalogue-db:0.3.0 + env: + - name: MYSQL_ROOT_PASSWORD + value: fake_password + - name: MYSQL_DATABASE + value: socksdb + ports: + - name: mysql + containerPort: 3306 + nodeSelector: + beta.kubernetes.io/os: linux +--- +apiVersion: v1 +kind: Service +metadata: + name: catalogue-db + labels: + name: catalogue-db + namespace: sock-shop +spec: + ports: + # the port that this service should serve on + - port: 3306 + targetPort: 3306 + selector: + name: catalogue-db +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: catalogue + labels: + name: catalogue + app: sock-shop + annotations: + litmuschaos.io/chaos: "true" + namespace: sock-shop +spec: + replicas: 1 + selector: + matchLabels: + name: catalogue + app: sock-shop + template: + metadata: + labels: + name: catalogue + app: sock-shop + spec: + containers: + - name: catalogue + image: weaveworksdemos/catalogue:0.3.5 + ports: + - containerPort: 80 + securityContext: + runAsNonRoot: true + runAsUser: 10001 + capabilities: + drop: + - all + add: + - NET_BIND_SERVICE + readOnlyRootFilesystem: true + nodeSelector: + beta.kubernetes.io/os: linux +--- +apiVersion: v1 +kind: Service +metadata: + name: catalogue + labels: + name: catalogue + namespace: sock-shop +spec: + ports: + # the port that this service should serve on + - port: 80 + targetPort: 80 + selector: + name: catalogue +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: front-end + labels: + name: front-end + app: sock-shop + annotations: + litmuschaos.io/chaos: "true" + namespace: sock-shop +spec: + replicas: 1 + selector: + matchLabels: + name: front-end + app: sock-shop + template: + metadata: + labels: + name: front-end + app: sock-shop + spec: + containers: + - name: front-end + image: weaveworksdemos/front-end:0.3.12 + resources: + requests: + cpu: 100m + memory: 100Mi + ports: + - containerPort: 8079 + securityContext: + runAsNonRoot: true + runAsUser: 10001 + capabilities: + drop: + - all + readOnlyRootFilesystem: true + nodeSelector: + beta.kubernetes.io/os: linux +--- +apiVersion: v1 +kind: Service +metadata: + name: front-end + labels: + name: front-end + namespace: sock-shop +spec: + type: NodePort + ports: + - port: 80 + targetPort: 8079 + nodePort: 30001 + selector: + name: front-end +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: orders-db + labels: + name: orders-db + app: sock-shop + annotations: + litmuschaos.io/chaos: "true" + namespace: sock-shop +spec: + replicas: 1 + selector: + matchLabels: + name: orders-db + app: sock-shop + template: + metadata: + labels: + name: orders-db + app: sock-shop + spec: + containers: + - name: orders-db + image: mongo + ports: + - name: mongo + containerPort: 27017 + securityContext: + capabilities: + drop: + - all + add: + - CHOWN + - SETGID + - SETUID + readOnlyRootFilesystem: true + volumeMounts: + - mountPath: /tmp + name: tmp-volume + volumes: + - name: tmp-volume + emptyDir: + medium: Memory + nodeSelector: + beta.kubernetes.io/os: linux +--- +apiVersion: v1 +kind: Service +metadata: + name: orders-db + labels: + name: orders-db + namespace: sock-shop +spec: + ports: + # the port that this service should serve on + - port: 27017 + targetPort: 27017 + selector: + name: orders-db +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: orders + labels: + name: orders + app: sock-shop + annotations: + litmuschaos.io/chaos: "true" + namespace: sock-shop +spec: + replicas: 1 + selector: + matchLabels: + name: orders + app: sock-shop + template: + metadata: + labels: + name: orders + app: sock-shop + spec: + containers: + - name: orders + image: weaveworksdemos/orders:0.4.7 + env: + - name: ZIPKIN + value: zipkin.jaeger.svc.cluster.local + - name: JAVA_OPTS + value: -Xms64m -Xmx128m -XX:PermSize=32m -XX:MaxPermSize=64m -XX:+UseG1GC -Djava.security.egd=file:/dev/urandom + ports: + - containerPort: 80 + securityContext: + runAsNonRoot: true + runAsUser: 10001 + capabilities: + drop: + - all + add: + - NET_BIND_SERVICE + readOnlyRootFilesystem: true + volumeMounts: + - mountPath: /tmp + name: tmp-volume + volumes: + - name: tmp-volume + emptyDir: + medium: Memory + nodeSelector: + beta.kubernetes.io/os: linux +--- +apiVersion: v1 +kind: Service +metadata: + name: orders + labels: + name: orders + namespace: sock-shop +spec: + ports: + # the port that this service should serve on + - port: 80 + targetPort: 80 + selector: + name: orders +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: payment + labels: + name: payment + app: sock-shop + annotations: + litmuschaos.io/chaos: "true" + namespace: sock-shop +spec: + replicas: 1 + selector: + matchLabels: + name: payment + app: sock-shop + template: + metadata: + labels: + name: payment + app: sock-shop + spec: + containers: + - name: payment + image: weaveworksdemos/payment:0.4.3 + ports: + - containerPort: 80 + securityContext: + runAsNonRoot: true + runAsUser: 10001 + capabilities: + drop: + - all + add: + - NET_BIND_SERVICE + readOnlyRootFilesystem: true + nodeSelector: + beta.kubernetes.io/os: linux +--- +apiVersion: v1 +kind: Service +metadata: + name: payment + labels: + name: payment + namespace: sock-shop +spec: + ports: + # the port that this service should serve on + - port: 80 + targetPort: 80 + selector: + name: payment +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: queue-master + labels: + name: queue-master + app: sock-shop + annotations: + litmuschaos.io/chaos: "true" + namespace: sock-shop +spec: + replicas: 1 + selector: + matchLabels: + name: queue-master + app: sock-shop + template: + metadata: + labels: + name: queue-master + app: sock-shop + spec: + containers: + - name: queue-master + image: weaveworksdemos/queue-master:0.3.1 + ports: + - containerPort: 80 + nodeSelector: + beta.kubernetes.io/os: linux +--- +apiVersion: v1 +kind: Service +metadata: + name: queue-master + labels: + name: queue-master + annotations: + prometheus.io/path: "/prometheus" + namespace: sock-shop +spec: + ports: + # the port that this service should serve on + - port: 80 + targetPort: 80 + selector: + name: queue-master +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: rabbitmq + labels: + name: rabbitmq + app: sock-shop + annotations: + litmuschaos.io/chaos: "true" + namespace: sock-shop +spec: + replicas: 1 + selector: + matchLabels: + name: rabbitmq + app: sock-shop + template: + metadata: + labels: + name: rabbitmq + app: sock-shop + spec: + containers: + - name: rabbitmq + image: rabbitmq:3.6.8 + ports: + - containerPort: 5672 + securityContext: + capabilities: + drop: + - all + add: + - CHOWN + - SETGID + - SETUID + - DAC_OVERRIDE + readOnlyRootFilesystem: true + nodeSelector: + beta.kubernetes.io/os: linux +--- +apiVersion: v1 +kind: Service +metadata: + name: rabbitmq + labels: + name: rabbitmq + namespace: sock-shop +spec: + ports: + # the port that this service should serve on + - port: 5672 + targetPort: 5672 + selector: + name: rabbitmq +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: shipping + labels: + name: shipping + app: sock-shop + annotations: + litmuschaos.io/chaos: "true" + namespace: sock-shop +spec: + replicas: 1 + selector: + matchLabels: + name: shipping + app: sock-shop + template: + metadata: + labels: + name: shipping + app: sock-shop + spec: + containers: + - name: shipping + image: weaveworksdemos/shipping:0.4.8 + env: + - name: ZIPKIN + value: zipkin.jaeger.svc.cluster.local + - name: JAVA_OPTS + value: -Xms64m -Xmx128m -XX:PermSize=32m -XX:MaxPermSize=64m -XX:+UseG1GC -Djava.security.egd=file:/dev/urandom + ports: + - containerPort: 80 + securityContext: + runAsNonRoot: true + runAsUser: 10001 + capabilities: + drop: + - all + add: + - NET_BIND_SERVICE + readOnlyRootFilesystem: true + volumeMounts: + - mountPath: /tmp + name: tmp-volume + volumes: + - name: tmp-volume + emptyDir: + medium: Memory + nodeSelector: + beta.kubernetes.io/os: linux +--- +apiVersion: v1 +kind: Service +metadata: + name: shipping + labels: + name: shipping + namespace: sock-shop +spec: + ports: + # the port that this service should serve on + - port: 80 + targetPort: 80 + selector: + name: shipping +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: user-db + labels: + name: user-db + app: sock-shop + annotations: + litmuschaos.io/chaos: "true" + namespace: sock-shop +spec: + replicas: 1 + selector: + matchLabels: + name: user-db + app: sock-shop + template: + metadata: + labels: + name: user-db + app: sock-shop + spec: + containers: + - name: user-db + image: weaveworksdemos/user-db:0.4.0 + ports: + - name: mongo + containerPort: 27017 + securityContext: + capabilities: + drop: + - all + add: + - CHOWN + - SETGID + - SETUID + readOnlyRootFilesystem: true + volumeMounts: + - mountPath: /tmp + name: tmp-volume + volumes: + - name: tmp-volume + emptyDir: + medium: Memory + nodeSelector: + beta.kubernetes.io/os: linux +--- +apiVersion: v1 +kind: Service +metadata: + name: user-db + labels: + name: user-db + namespace: sock-shop +spec: + ports: + # the port that this service should serve on + - port: 27017 + targetPort: 27017 + selector: + name: user-db +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: user + labels: + name: user + app: sock-shop + annotations: + litmuschaos.io/chaos: "true" + namespace: sock-shop +spec: + replicas: 1 + selector: + matchLabels: + name: user + app: sock-shop + template: + metadata: + labels: + name: user + app: sock-shop + spec: + containers: + - name: user + image: weaveworksdemos/user:0.4.7 + ports: + - containerPort: 80 + env: + - name: MONGO_HOST + value: user-db:27017 + securityContext: + runAsNonRoot: true + runAsUser: 10001 + capabilities: + drop: + - all + add: + - NET_BIND_SERVICE + readOnlyRootFilesystem: true + nodeSelector: + beta.kubernetes.io/os: linux +--- +apiVersion: v1 +kind: Service +metadata: + name: user + labels: + name: user + namespace: sock-shop +spec: + ports: + # the port that this service should serve on + - port: 80 + targetPort: 80 + selector: + name: user +--- +apiVersion: extensions/v1beta1 +kind: Ingress +metadata: + name: basic-ingress + namespace: sock-shop +spec: + backend: + serviceName: front-end + servicePort: 80 +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: user-load + labels: + name: user-load + namespace: sock-shop +spec: + replicas: 1 + selector: + matchLabels: + name: user-load + template: + metadata: + labels: + name: user-load + spec: + containers: + - name: user-load + image: weaveworksdemos/load-test + args: ["-h", "front-end:80", "-r", "9999999"] diff --git a/demo/1.x/images/litmus.png b/demo/1.x/images/litmus.png new file mode 100644 index 000000000..24b1e47a6 Binary files /dev/null and b/demo/1.x/images/litmus.png differ diff --git a/demo/1.x/kind-setup/kind-config.yaml b/demo/1.x/kind-setup/kind-config.yaml new file mode 100644 index 000000000..40ac8b4b9 --- /dev/null +++ b/demo/1.x/kind-setup/kind-config.yaml @@ -0,0 +1,6 @@ +apiVersion: kind.x-k8s.io/v1alpha4 +kind: Cluster +nodes: +- role: control-plane +- role: worker +- role: worker \ No newline at end of file diff --git a/demo/1.x/litmus/container-kill.yaml b/demo/1.x/litmus/container-kill.yaml new file mode 100644 index 000000000..ef7143cb4 --- /dev/null +++ b/demo/1.x/litmus/container-kill.yaml @@ -0,0 +1,26 @@ +apiVersion: litmuschaos.io/v1alpha1 +kind: ChaosEngine +metadata: + name: sock-chaos + namespace: sock-shop +spec: + engineState: 'active' + appinfo: + appns: sock-shop + # FYI, To see app label, apply kubectl get pods --show-labels + # unique-label of the application under test (AUT) + applabel: "name=carts-db" + appkind: deployment + chaosServiceAccount: sock-shop-chaos-engine + experiments: + - name: container-kill + spec: + components: + env: + - name: TARGET_CONTAINER + value: carts-db + - name: CHAOS_INTERVAL + value: '10' + - name: TOTAL_CHAOS_DURATION + value: '50' + diff --git a/demo/1.x/litmus/disk-fill.yaml b/demo/1.x/litmus/disk-fill.yaml new file mode 100644 index 000000000..9c8449b92 --- /dev/null +++ b/demo/1.x/litmus/disk-fill.yaml @@ -0,0 +1,30 @@ +apiVersion: litmuschaos.io/v1alpha1 +kind: ChaosEngine +metadata: + name: sock-chaos + namespace: sock-shop +spec: + engineState: 'active' + appinfo: + appns: sock-shop + # FYI, To see app label, apply kubectl get pods --show-labels + # unique-label of the application under test (AUT) + applabel: "name=carts-db" + appkind: deployment + chaosServiceAccount: sock-shop-chaos-engine + experiments: + - name: disk-fill + spec: + components: + env: + # specify the fill percentage according to the disk pressure required + - name: FILL_PERCENTAGE + value: "80" + + - name: TARGET_CONTAINER + value: "carts-db" + + - name: FILL_PERCENTAGE + value: '' + - name: CONTAINER_PATH + value: '/var/lib/containerd/io.containerd.grpc.v1.cri/containers/' diff --git a/demo/1.x/litmus/node-cpu-hog.yaml b/demo/1.x/litmus/node-cpu-hog.yaml new file mode 100644 index 000000000..2c4e453bf --- /dev/null +++ b/demo/1.x/litmus/node-cpu-hog.yaml @@ -0,0 +1,29 @@ +apiVersion: litmuschaos.io/v1alpha1 +kind: ChaosEngine +metadata: + name: sock-chaos + namespace: sock-shop +spec: + engineState: 'active' + appinfo: + appns: sock-shop + # FYI, To see app label, apply kubectl get pods --show-labels + # unique-label of the application under test (AUT) + applabel: "name=carts-db" + appkind: deployment + chaosServiceAccount: sock-shop-chaos-engine + experiments: + - name: node-cpu-hog + spec: + components: + env: + # set chaos duration (in sec) as desired + - name: TOTAL_CHAOS_DURATION + value: '60' + + - name: NODE_CPU_CORE + value: '2' + + - name: TARGET_NODES + value: '' + \ No newline at end of file diff --git a/demo/1.x/litmus/node-memory-hog.yaml b/demo/1.x/litmus/node-memory-hog.yaml new file mode 100644 index 000000000..ee3464102 --- /dev/null +++ b/demo/1.x/litmus/node-memory-hog.yaml @@ -0,0 +1,26 @@ +apiVersion: litmuschaos.io/v1alpha1 +kind: ChaosEngine +metadata: + name: sock-chaos + namespace: sock-shop +spec: + engineState: 'active' + appinfo: + appns: sock-shop + # FYI, To see app label, apply kubectl get pods --show-labels + # unique-label of the application under test (AUT) + applabel: "name=carts-db" + appkind: deployment + chaosServiceAccount: sock-shop-chaos-engine + experiments: + - name: node-memory-hog + spec: + components: + env: + # set chaos duration (in sec) as desired + - name: TOTAL_CHAOS_DURATION + value: '120' + + - name: MEMORY_CONSUMPTION_PERCENTAGE + value: '50' + \ No newline at end of file diff --git a/demo/1.x/litmus/pod-cpu-hog.yaml b/demo/1.x/litmus/pod-cpu-hog.yaml new file mode 100644 index 000000000..39edda2c5 --- /dev/null +++ b/demo/1.x/litmus/pod-cpu-hog.yaml @@ -0,0 +1,41 @@ +apiVersion: litmuschaos.io/v1alpha1 +kind: ChaosEngine +metadata: + name: sock-chaos + namespace: sock-shop +spec: + engineState: 'active' + appinfo: + appns: sock-shop + # FYI, To see app label, apply kubectl get pods --show-labels + # unique-label of the application under test (AUT) + applabel: "name=carts-db" + appkind: deployment + chaosServiceAccount: sock-shop-chaos-engine + experiments: + - name: pod-cpu-hog + spec: + components: + env: + # Provide name of target container + # where chaos has to be injected + - name: TARGET_CONTAINER + value: 'carts-db' + + - name: TOTAL_CHAOS_DURATION + value: '60' # in seconds + + - name: CPU_CORES + value: '1' + + ## Percentage of total pods to target + - name: PODS_AFFECTED_PERC + value: '' + + - name: CONTAINER_RUNTIME + value: 'containerd' + + # provide the socket file path + # applicable only for containerd and crio runtime + - name: SOCKET_PATH + value: '/run/containerd/containerd.sock' \ No newline at end of file diff --git a/demo/1.x/litmus/pod-delete.yaml b/demo/1.x/litmus/pod-delete.yaml new file mode 100644 index 000000000..3d066e52e --- /dev/null +++ b/demo/1.x/litmus/pod-delete.yaml @@ -0,0 +1,30 @@ +apiVersion: litmuschaos.io/v1alpha1 +kind: ChaosEngine +metadata: + name: sock-chaos + namespace: sock-shop +spec: + engineState: 'active' + appinfo: + appns: sock-shop + # FYI, To see app label, apply kubectl get pods --show-labels + # unique-label of the application under test (AUT) + applabel: "name=carts-db" + appkind: deployment + chaosServiceAccount: sock-shop-chaos-engine + experiments: + - name: pod-delete + spec: + components: + env: + # set chaos duration (in sec) as desired + - name: TOTAL_CHAOS_DURATION + value: '30' + + # set chaos interval (in sec) as desired + - name: CHAOS_INTERVAL + value: '5' + + # pod failures without '--force' & default terminationGracePeriodSeconds + - name: FORCE + value: "false" diff --git a/demo/1.x/litmus/pod-memory-hog.yaml b/demo/1.x/litmus/pod-memory-hog.yaml new file mode 100644 index 000000000..b70e13673 --- /dev/null +++ b/demo/1.x/litmus/pod-memory-hog.yaml @@ -0,0 +1,42 @@ +apiVersion: litmuschaos.io/v1alpha1 +kind: ChaosEngine +metadata: + name: sock-chaos + namespace: sock-shop +spec: + engineState: 'active' + appinfo: + appns: sock-shop + # FYI, To see app label, apply kubectl get pods --show-labels + # unique-label of the application under test (AUT) + applabel: "name=carts-db" + appkind: deployment + chaosServiceAccount: sock-shop-chaos-engine + experiments: + - name: pod-memory-hog + spec: + components: + env: + # Provide name of target container + # where chaos has to be injected + - name: TARGET_CONTAINER + value: 'carts-db' + + - name: TOTAL_CHAOS_DURATION + value: '60' # in seconds + + # Enter the amount of memory in megabytes to be consumed by the application pod + - name: MEMORY_CONSUMPTION + value: '500' + + ## percentage of total pods to target + - name: PODS_AFFECTED_PERC + value: '' + + - name: CONTAINER_RUNTIME + value: 'containerd' + + # provide the socket file path + # applicable only for containerd and crio runtime + - name: SOCKET_PATH + value: '/run/containerd/containerd.sock' \ No newline at end of file diff --git a/demo/1.x/litmus/pod-network-corruption.yaml b/demo/1.x/litmus/pod-network-corruption.yaml new file mode 100644 index 000000000..e5a446039 --- /dev/null +++ b/demo/1.x/litmus/pod-network-corruption.yaml @@ -0,0 +1,36 @@ +apiVersion: litmuschaos.io/v1alpha1 +kind: ChaosEngine +metadata: + name: sock-chaos + namespace: sock-shop +spec: + engineState: 'active' + appinfo: + appns: sock-shop + # FYI, To see app label, apply kubectl get pods --show-labels + # unique-label of the application under test (AUT) + applabel: "name=carts-db" + appkind: deployment + chaosServiceAccount: sock-shop-chaos-engine + experiments: + - name: pod-network-corruption + spec: + components: + env: + - name: TARGET_CONTAINER + value: "carts-db" + + - name: NETWORK_INTERFACE + value: eth0 + + - name: NETWORK_PACKET_CORRUPTION_PERCENTAGE + value: "100" + + - name: TOTAL_CHAOS_DURATION + value: "60" + + - name: CONTAINER_RUNTIME + value: 'containerd' + + - name: SOCKET_PATH + value: '/run/containerd/containerd.sock' \ No newline at end of file diff --git a/demo/1.x/litmus/pod-network-latency.yaml b/demo/1.x/litmus/pod-network-latency.yaml new file mode 100644 index 000000000..dc19b65b8 --- /dev/null +++ b/demo/1.x/litmus/pod-network-latency.yaml @@ -0,0 +1,38 @@ +apiVersion: litmuschaos.io/v1alpha1 +kind: ChaosEngine +metadata: + name: sock-chaos + namespace: sock-shop +spec: + engineState: 'active' + appinfo: + appns: sock-shop + # FYI, To see app label, apply kubectl get pods --show-labels + # unique-label of the application under test (AUT) + applabel: "name=carts-db" + appkind: deployment + chaosServiceAccount: sock-shop-chaos-engine + experiments: + - name: pod-network-latency + spec: + components: + env: + #Container name where chaos has to be injected + - name: TARGET_CONTAINER + value: 'carts-db' + + #Network interface inside target container + - name: NETWORK_INTERFACE + value: 'eth0' + + - name: NETWORK_LATENCY + value: '60000' + + - name: TOTAL_CHAOS_DURATION + value: '60' # in seconds + + - name: CONTAINER_RUNTIME + value: 'containerd' + + - name: SOCKET_PATH + value: '/run/containerd/containerd.sock' \ No newline at end of file diff --git a/demo/1.x/litmus/pod-network-loss.yaml b/demo/1.x/litmus/pod-network-loss.yaml new file mode 100644 index 000000000..91f359eb0 --- /dev/null +++ b/demo/1.x/litmus/pod-network-loss.yaml @@ -0,0 +1,38 @@ +apiVersion: litmuschaos.io/v1alpha1 +kind: ChaosEngine +metadata: + name: sock-chaos + namespace: sock-shop +spec: + engineState: 'active' + appinfo: + appns: sock-shop + # FYI, To see app label, apply kubectl get pods --show-labels + # unique-label of the application under test (AUT) + applabel: "name=carts-db" + appkind: deployment + chaosServiceAccount: sock-shop-chaos-engine + experiments: + - name: pod-network-loss + spec: + components: + env: + #Container name where chaos has to be injected + - name: TARGET_CONTAINER + value: 'carts-db' + + #Network interface inside target container + - name: NETWORK_INTERFACE + value: 'eth0' + + - name: NETWORK_PACKET_LOSS_PERCENTAGE + value: '100' + + - name: TOTAL_CHAOS_DURATION + value: '60' # in seconds + + - name: CONTAINER_RUNTIME + value: 'containerd' + + - name: SOCKET_PATH + value: '/run/containerd/containerd.sock' \ No newline at end of file diff --git a/demo/1.x/manage.py b/demo/1.x/manage.py new file mode 100755 index 000000000..4f85d7d1f --- /dev/null +++ b/demo/1.x/manage.py @@ -0,0 +1,563 @@ +#!/usr/bin/env python3 + +import argparse +import os +import json +import sys +import time +from datetime import datetime +import subprocess +import yaml +from reportlab.lib import colors +from reportlab.lib.pagesizes import LETTER +from reportlab.platypus import SimpleDocTemplate, Paragraph, Table, TableStyle, Image +from reportlab.lib.styles import getSampleStyleSheet + +class bcolors: + HEADER = '\033[95m' + OKBLUE = '\033[94m' + OKGREEN = '\033[92m' + WARNING = '\033[93m' + FAIL = '\033[91m' + ENDC = '\033[0m' + BOLD = '\033[1m' + UNDERLINE = '\033[4m' + +def print_color(text: str, color:bcolors = bcolors.BOLD): + """ + Utility method to print colored text to stdout. + + :param text: The text to print + :param color: The bcolors to print text in (defaults to bold) + :return: + """ + print(f"{color}{text}{bcolors.ENDC}") + +def run_shell(cmd: str): + """ + Runs a shell command and prints command to stdout before + running so user can see what was run + + :param cmd: The shell command to run + :return: + """ + print_color(f"** RUNNING: {cmd}") + os.system(cmd) + +# Subcommand options +def start(args): + if (f"{args.platform}" == "GKE"): + """ + Start a GKE Cluster with the demo environment deployed. + """ + print_color(f"Starting GKE cluster in project {args.project} with name {args.name} in zone {args.zone}", bcolors.OKBLUE) + + # Ensure GCloud SDK is up to date + run_shell("gcloud components update") + + # Set GCloud project + run_shell(f"gcloud config set project \"{args.project}\"") + + # Spinup cluster + run_shell(f"gcloud container clusters create {args.name} --zone {args.zone} --cluster-version 1.14.10-gke.17 --machine-type n1-standard-2 --no-enable-autoupgrade") + + # Get kubectl credentials + run_shell(f"gcloud container clusters get-credentials {args.name} --zone {args.zone}") + + print_color("\nGKE Cluster Running with following nodes:\n") + run_shell(f"kubectl get nodes") + + # Deploy all demo apps + run_shell("kubectl create -f ./deploy/sock-shop.yaml") + run_shell("kubectl create -f ./deploy/random-log-counter.yaml") + + # Deploy Litmus ChaosOperator to run Experiments that create incidents + run_shell("kubectl apply -f https://litmuschaos.github.io/litmus/litmus-operator-v2.1.0.yaml") + + # Install Litmus Experiments - TEMP Workaround to set experiment versions until Chaos Hub supports in URL + run_shell("curl -sL https://github.com/litmuschaos/chaos-charts/archive/2.1.0.tar.gz -o litmus.tar.gz") + run_shell("tar -zxvf litmus.tar.gz") + run_shell("rm litmus.tar.gz") + run_shell("find chaos-charts-2.1.0 -name experiments.yaml | grep generic | xargs kubectl apply -n sock-shop -f") + #run_shell("kubectl create -f https://hub.litmuschaos.io/api/chaos?file=charts/generic/experiments.yaml -n sock-shop") + #run_shell("kubectl create -f https://hub.litmuschaos.io/api/chaos?file=charts/kafka/experiments.yaml -n kafka") + + # Create the chaos serviceaccount with permissions needed to run the generic K8s experiments + run_shell("kubectl create -f ./deploy/litmus-rbac.yaml") + + # Get ingress IP address + run_shell("sleep 60") # Wait 1 min for ingress to finish setting up + print_color("\nIngress Details:\n", bcolors.UNDERLINE) + run_shell("kubectl get ingress basic-ingress --namespace=sock-shop") + + try: + ingress_ip = \ + json.loads(os.popen('kubectl get ingress basic-ingress --namespace=sock-shop -o json').read())["status"][ + "loadBalancer"]["ingress"][0]["ip"] + print_color(f"\nYou can access the web application in a few minutes at: http://{ingress_ip}\n\n") + except: + print_color("Ingress still being setup. Use the following command to get the IP later:", bcolors.WARNING) + print_color("\tkubectl get ingress basic-ingress --namespace=sock-shop", bcolors.WARNING) + + print_color("***************************************************************************************************", bcolors.WARNING) + print_color(f"* {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} Finished creating cluster.", bcolors.WARNING) + print_color("* Please wait at least 15 minutes for environment to become fully initalised.") + print_color("* The ingress to access the web application from your browser can take at least 5 minutes to create.", bcolors.WARNING) + print_color("*", bcolors.WARNING) + print_color("*", bcolors.WARNING) + print_color("* IMPORTANT: To reliably detect Chaos experiment incidents you must reduce the Refractory Period for your account to 10 minutes.", bcolors.WARNING) + print_color("*", bcolors.WARNING) + print_color("***************************************************************************************************\n\n", bcolors.WARNING) + + elif (f"{args.platform}" == "kind"): + """ + Start a KinD cluster with the demo environment + """ + print_color(f"Setup kind cluster", bcolors.OKBLUE) + + # install kind if not found + if not os.path.isfile('/usr/local/bin/kind'): + # Installing kind cluster + run_shell(f"curl -Lo ./kind https://kind.sigs.k8s.io/dl/v0.11.1/kind-$(uname)-amd64") + run_shell(f"chmod +x ./kind") + run_shell(f"mv ./kind /usr/local/bin/kind") + + # Verify the kind installation + run_shell(f"kind version") + + print_color(f"Starting single node {args.platform} cluster with default name kind", bcolors.OKBLUE) + + # Start a single node kind cluster + run_shell(f"kind create cluster --config kind-setup/kind-config.yaml --wait=5m") + run_shell(f"kubectl cluster-info --context kind-kind") + + # Getting the nodes of the cluster + print_color("\KinD Cluster Running with following nodes:\n") + run_shell(f"kubectl get nodes\n") + + # Deploy all demo apps + run_shell("kubectl create -f ./deploy/sock-shop.yaml") + run_shell("kubectl create -f ./deploy/random-log-counter.yaml") + + # Deploy Litmus ChaosOperator to run Experiments that create incidents + run_shell("kubectl apply -f https://litmuschaos.github.io/litmus/litmus-operator-v2.1.0.yaml") + + # Install Litmus Experiments - TEMP Workaround to set experiment versions until Chaos Hub supports in URL + run_shell("curl -sL https://github.com/litmuschaos/chaos-charts/archive/2.1.0.tar.gz -o litmus.tar.gz") + run_shell("tar -zxvf litmus.tar.gz") + run_shell("rm litmus.tar.gz") + run_shell("find chaos-charts-2.1.0 -name experiments.yaml | grep generic | xargs kubectl apply -n sock-shop -f") + + # Create the chaos serviceaccount with permissions needed to run the generic K8s experiments + run_shell("kubectl create -f ./deploy/litmus-rbac.yaml") + + # Get ingress IP address + run_shell("sleep 100") # Wait 1 min for ingress to finish setting up and operator pods to come in Running state + print_color("* Please wait for few minutes for environment to become fully initalised.") + + if (f"{args.platform}" == "EKS"): + """ + Start a EKS Cluster with the demo environment deployed. + """ + print_color(f"Starting EKS cluster with name {args.name} in region {args.awsregion}", bcolors.OKBLUE) + + # Spinup cluster + run_shell(f"eksctl create cluster --region {args.awsregion} --node-type {args.awsnodetype} --nodes {args.awsnodes} " + f"--nodes-min {args.awsnodesmin} --nodes-max {args.awsnodesmax} --name {args.name}") + + # Get kubectl credentials + run_shell(f"eksctl utils write-kubeconfig --cluster={args.name}") + + print_color("\nEKS Cluster Running with following nodes:\n") + run_shell(f"kubectl get nodes") + + # Deploy AWS ALB Ingress Controller + run_shell(f"eksctl utils associate-iam-oidc-provider --region {args.awsregion} --cluster {args.name} --approve") + # Check if the ALBIngressControllerPolicy exists + account_id = subprocess.getoutput("aws sts get-caller-identity --output json --query 'Account'").replace('"', '') + policy_arn = 'arn:aws:iam::' + account_id + ':policy/ALBIngressControllerIAMPolicy' + policy_check_arg = 'aws iam get-policy --policy-arn ' + policy_arn + policy_check = subprocess.getoutput(policy_check_arg) + if 'NoSuchEntity' in policy_check: + run_shell(f"curl -o /tmp/iam-policy.json https://raw.githubusercontent.com/kubernetes-sigs/aws-alb-ingress-controller/v1.1.8/docs/examples/iam-policy.json") + run_shell(f"aws iam create-policy --policy-name ALBIngressControllerIAMPolicy --policy-document file:///tmp/iam-policy.json") + run_shell(f"kubectl apply -f https://raw.githubusercontent.com/kubernetes-sigs/aws-alb-ingress-controller/v1.1.8/docs/examples/rbac-role.yaml") + run_shell(f"eksctl create iamserviceaccount --region {args.awsregion} --name alb-ingress-controller " + f"--namespace kube-system --cluster {args.name} --attach-policy-arn {policy_arn} --override-existing-serviceaccounts --approve") + run_shell(f"kubectl apply -f https://raw.githubusercontent.com/kubernetes-sigs/aws-alb-ingress-controller/v1.1.8/docs/examples/alb-ingress-controller.yaml") + run_shell('kubectl patch deployment alb-ingress-controller -n kube-system -p \'{"spec": {"template": {"spec": {"containers": [{"name": "alb-ingress-controller", "args": ["--ingress-class=alb", "--cluster-name=%s"]}]}}}}\'' % args.name) + + # Deploy all demo apps + run_shell("kubectl create -f ./deploy/sock-shop.yaml") + run_shell("kubectl annotate ingress basic-ingress -n sock-shop alb.ingress.kubernetes.io/scheme='internet-facing' kubernetes.io/ingress.class='alb'") + run_shell("kubectl create -f ./deploy/random-log-counter.yaml") + + # Deploy Litmus ChaosOperator to run Experiments that create incidents + run_shell("kubectl apply -f https://litmuschaos.github.io/litmus/litmus-operator-v2.1.0.yaml") + + # Install Litmus Experiments - TEMP Workaround to set experiment versions until Chaos Hub supports in URL + run_shell("curl -sL https://github.com/litmuschaos/chaos-charts/archive/2.1.0.tar.gz -o litmus.tar.gz") + run_shell("tar -zxvf litmus.tar.gz") + run_shell("rm litmus.tar.gz") + run_shell("find chaos-charts-2.1.0 -name experiments.yaml | grep generic | xargs kubectl apply -n sock-shop -f") + #run_shell("kubectl create -f https://hub.litmuschaos.io/api/chaos?file=charts/generic/experiments.yaml -n sock-shop") + #run_shell("kubectl create -f https://hub.litmuschaos.io/api/chaos?file=charts/kafka/experiments.yaml -n kafka") + + # Create the chaos serviceaccount with permissions needed to run the generic K8s experiments + run_shell("kubectl create -f ./deploy/litmus-rbac.yaml") + + # Get ingress IP address + run_shell("sleep 60") # Wait 1 min for ingress to finish setting up + print_color("\nIngress Details:\n", bcolors.UNDERLINE) + run_shell("kubectl get ingress basic-ingress --namespace=sock-shop") + + try: + ingress_ip = \ + json.loads(os.popen('kubectl get ingress basic-ingress --namespace=sock-shop -o json').read())["status"][ + "loadBalancer"]["ingress"][0]["ip"] + print_color(f"\nYou can access the web application in a few minutes at: http://{ingress_ip}\n\n") + except: + print_color("Ingress still being setup. Use the following command to get the IP later:", bcolors.WARNING) + print_color("\tkubectl get ingress basic-ingress --namespace=sock-shop", bcolors.WARNING) + + print_color("***************************************************************************************************", bcolors.WARNING) + print_color(f"* {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} Finished creating cluster.", bcolors.WARNING) + print_color("* Please wait at least 15 minutes for environment to become fully initalised.") + print_color("* The ingress to access the web application from your browser can take at least 5 minutes to create.", bcolors.WARNING) + print_color("*", bcolors.WARNING) + print_color("*", bcolors.WARNING) + print_color("* IMPORTANT: To reliably detect Chaos experiment incidents you must reduce the Refractory Period for your account to 10 minutes.", bcolors.WARNING) + print_color("*", bcolors.WARNING) + print_color("***************************************************************************************************\n\n", bcolors.WARNING) + +def stop(args): + if (f"{args.platform}" == "GKE"): + """ + Shutdown the GKE Cluster with the demo environment deployed. + """ + print_color(f"Stopping GKE cluster in project {args.project} with name {args.name} in zone {args.zone}", bcolors.OKBLUE) + + # Set GCloud project + run_shell(f"gcloud config set project \"{args.project}\"") + + # Stop cluster + run_shell(f"gcloud container clusters delete {args.name} --zone {args.zone}") + elif (f"{args.platform}" == "kind"): + """ + Shutdown the kind cluster with the demo environment deployed. + """ + print_color(f"Stopping kind cluster", bcolors.OKBLUE) + run_shell(f"kind delete cluster") + + elif (f"{args.platform}" == "EKS"): + """ + Shutdown the EKS Cluster with the demo environment deployed. + """ + print_color(f"Stopping EKS cluster with name {args.name} in {args.awsregion}", bcolors.OKBLUE) + + # Stop cluster + run_shell(f"eksctl delete cluster --name={args.name} --region={args.awsregion}") + +class ExperimentResult(object): + """ + Holds Experiment Result + """ + + def __init__(self, name:str, status:str, startTime:datetime): + self.name = name + self.status = status + self.startTime = startTime + +def run_experiment(experiment: str): + """ + Run a specific experiment + + :param experiment: The name of the experiment as defined in the YAML, i.e. container-kill + :return: ExperimentResult object with results of experiment + """ + print_color("***************************************************************************************************", bcolors.OKBLUE) + print_color(f"* {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} Experiment: {experiment}", bcolors.OKBLUE) + print_color("***************************************************************************************************", bcolors.OKBLUE) + + experiment_file = experiment + ".yaml" + + # Set namespace to check + with open(f"./litmus/{experiment_file}") as f: + spec = yaml.load(f, Loader=yaml.FullLoader) + result_name = spec['metadata']['name'] + namespace = spec['metadata']['namespace'] + + print_color(f"Running Litmus ChaosEngine Experiment {experiment_file} in namespace {namespace}") + print_color(f"Deploying {experiment_file}...") + run_shell(f"kubectl delete chaosengine {result_name} -n {namespace}") + run_shell(f"kubectl create -f ./litmus/{experiment_file} -n {namespace}") + + # Check status of experiment execution + startTime = datetime.now() + print_color(f"{startTime.strftime('%Y-%m-%d %H:%M:%S')} Running experiment...") + expStatusCmd = "kubectl get chaosengine " + result_name + " -o jsonpath='{.status.experiments[0].status}' -n " + namespace + run_shell(expStatusCmd) + logs_cmd = f"kubectl logs --since=10s -l name={experiment} -n {namespace}" + print(f"\n{bcolors.OKGREEN}//** Experiment Logs ({logs_cmd}) **//\n\n") + try: + while subprocess.check_output(expStatusCmd, shell=True).decode('unicode-escape') != "Completed": + os.system(logs_cmd) + os.system("sleep 10") + + print(f"\n\n//** End of Experiment Logs **//{bcolors.ENDC}\n") + + # View experiment results + run_shell(f"kubectl describe chaosresult {result_name}-{experiment} -n {namespace}") + + except: + print_color("User has cancelled script execution.", bcolors.FAIL) + sys.exit(2) + + # Store Experiment Result + status = subprocess.check_output("kubectl get chaosresult " + result_name + "-" + experiment + " -n " + namespace + " -o jsonpath='{.status.experimentStatus.verdict}'", shell=True).decode('unicode-escape') + return ExperimentResult(experiment, status, startTime) + +def test(args): + """ + Run Litmus ChaosEngine Experiments inside the demo environment. + Each experiment is defined under its own yaml file under the /litmus directory. You can run + a specific experiment by specifying a test name that matches one of the yaml file names in the directory + but by default all '*' experiments will be run with 20 minute wait period between each experiment + to ensure that it doesn't cluster the incidents together into one incident + """ + startTimeStamp = time.monotonic() + #for GKE platform + if (f"{args.platform}" == "GKE" and (f"{args.type}" == "all")): + experiments = sorted(os.listdir('./litmus')) + + elif (f"{args.platform}" == "GKE" and (f"{args.type}" == "pod")): + experiments = ["container-kill.yaml","disk-fill","pod-cpu-hog","pod-delete","pod-memory-hog","pod-network-corruption","pod-network-latency","pod-network-loss"] + + elif (f"{args.platform}" == "GKE" and (f"{args.type}" == "node")): + kind_supported = ["node-cpu-hog","node-memory-hog"] + + #for kind platform + if ((f"{args.platform}" == "kind") and (f"{args.type}" == "all")): + kind_supported = ["pod-delete","container-kill","node-cpu-hog","node-memory-hog"] + experiments=[s + ".yaml" for s in kind_supported] + + elif ((f"{args.platform}" == "kind") and (f"{args.type}" == "pod")): + experiments = ["node-cpu-hog.yaml","node-memory-hog.yaml"] + + elif ((f"{args.platform}" == "kind") and (f"{args.type}" == "node")): + experiments = ["node-cpu-hog.yaml","node-memory-hog.yaml"] + + # for EKS platform + if (f"{args.platform}" == "EKS" and (f"{args.type}" == "all")): + experiments = sorted(os.listdir('./litmus')) + + elif (f"{args.platform}" == "EKS" and (f"{args.type}" == "pod")): + experiments = ["container-kill.yaml", "disk-fill", "pod-cpu-hog", "pod-delete", "pod-memory-hog", + "pod-network-corruption", "pod-network-latency", "pod-network-loss"] + + elif (f"{args.platform}" == "EKS" and (f"{args.type}" == "node")): + kind_supported = ["node-cpu-hog", "node-memory-hog"] + + experiment_results = [] + + if args.test == '*': + # Run all experiments in /litmus directory with wait time between them + print_color(f"Running all Litmus ChaosEngine Experiments with {args.wait} mins wait time between each one...") + lstindex = len(experiments) + for experiment_file in experiments: + result = run_experiment(experiment_file.replace('.yaml', '')) + experiment_results.append(result) + print_color(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} Waiting {args.wait} mins before running next experiment...", bcolors.WARNING) + lstindex -= 1 + if lstindex != 0: + time.sleep(args.wait * 60) + else: + # Check experiment exists + experiment_file = args.test + ".yaml" + if experiment_file in experiments: + result = run_experiment(args.test) + experiment_results.append(result) + else: + print_color(f"ERROR: {experiment_file} not found in ./litmus directory. Please check the name and try again.", bcolors.FAIL) + sys.exit(2) + + # Print out experiment result summary + print_color("***************************************************************************************************", bcolors.OKBLUE) + print_color("* Experiments Result Summary", bcolors.OKBLUE) + print_color("***************************************************************************************************\n", bcolors.OKBLUE) + headers = ["#", "Start Time", "Experiment", "Status"] + row_format = "{:>25}" * (len(headers) + 1) + print_color(row_format.format("", *headers), bcolors.OKBLUE) + i = 1 + for result in experiment_results: + if result.status == "Pass": + print_color(row_format.format("", str(i), result.startTime.strftime('%Y-%m-%d %H:%M:%S'), result.name," "+ result.status + " 'carts-db' Service is up and Running after chaos"), bcolors.OKBLUE) + i += 1 + else: + print_color(row_format.format("", str(i), result.startTime.strftime('%Y-%m-%d %H:%M:%S'), result.name, result.status), bcolors.OKBLUE) + i += 1 + print("\n") + currentTimeStamp= time.monotonic() + diffTimeStamp = currentTimeStamp - startTimeStamp + ty_res = time.gmtime(diffTimeStamp) + totalTime = time.strftime("%H:%M:%S",ty_res) + + if (f"{args.report}" == "yes"): + + print_color("Creating PDF Report", bcolors.OKBLUE) + + fileName = 'chaos-report.pdf' + pdf = SimpleDocTemplate(fileName,pagesize=LETTER) + styles = getSampleStyleSheet() + data = [None] + data[0] = ["S.No.", "Start Time", "Experiment", "Status"] + + i =1 + expPassed = 0 + expFailed = 0 + for result in experiment_results: + if result.status == "Pass": + data.append([str(i), result.startTime.strftime('%Y-%m-%d %H:%M:%S'), result.name," "+ result.status + " 'carts-db' Service is up and Running after chaos"]) + i += 1 + expPassed +=1 + else: + data.append([str(i), result.startTime.strftime('%Y-%m-%d %H:%M:%S'), result.name,result.status]) + i += 1 + expFailed +=1 + + table = Table(data) + + picture = Image("images/litmus.png") + picture.drawWidth = 100 + picture.drawHeight = 100 + picTable = Table([[picture]],100,100) + + elems = [] + + # Adding logo + elems.append(picTable) + # Adding title + text = "LitmusChaos Report

Experiments Result Summary" + para = Paragraph(text, styles['Title']) + elems.append(para) + + ## Adding result table + elems.append(table) + + style = TableStyle([ + ('BACKGROUND',(0,0),(3,0), colors.green), + ('TEXTCOLOR',(0,0),(-1,0), colors.whitesmoke), + ('ALIGN',(0,0),(-1,-1), 'CENTER'), + ('FONTNAME',(0,0),(-1,0), 'Courier'), + ('FONTSIZE',(0,0),(-1,0), 14), + ('BOTTOMPADDING',(0,0),(-1,0), 12), + ('BACKGROUND',(0,1),(-1,-1), colors.beige), + ]) + + ts = TableStyle([ + ('BOX',(0,0),(-1,-1),1,colors.black) + ]) + + ## Adding table style + table.setStyle(style) + table.setStyle(ts) + + para1 = Paragraph("The total number of passed experiments: %s " % str(expPassed), styles['Heading3']) + elems.append(para1) + para2 = Paragraph("The total number of failed experiments: %s " % str(expFailed), styles['Heading3']) + elems.append(para2) + para3 = Paragraph("The total experiment execution time: %s (HH:MM:SS)" % str(totalTime), styles['Heading3']) + elems.append(para3) + + pdf.build(elems) + print_color("PDF Report Created Successfully ", bcolors.OKBLUE) + + +def list(args): + """ + List all available Litmus Chaos Experiments available in this repository + """ + experiments = sorted(os.listdir('./litmus')) + print_color("Available Litmus Chaos Experiments:\n\n") + if (f"{args.platform}" == "GKE"): + i = 1 + for experiment_file in experiments: + print_color(f"\t{i}. {experiment_file.replace('.yaml', '')}") + i += 1 + + if (f"{args.platform}" == "kind"): + kind_supported = ["pod-delete","container-kill","node-cpu-hog","node-memory-hog"] + i = 0 + for i in range(0, len(kind_supported)): + print_color(f"\t{i+1}. {kind_supported[i]}") + i += 1 + + if (f"{args.platform}" == "EKS"): + i = 1 + for experiment_file in experiments: + print_color(f"\t{i}. {experiment_file.replace('.yaml', '')}") + i += 1 + +if __name__ == "__main__": + + # Add command line arguments + parser = argparse.ArgumentParser(description='Spin up a Demo Environment on Kubernetes.') + subparsers = parser.add_subparsers() + + # Start command + parser_start = subparsers.add_parser("start", help="Start a Cluster with the demo environment deployed.") + parser_start.add_argument("-p", "--project", type=str, + help="Set GCloud Project to spin GKE cluster up in") + parser_start.add_argument("-z", "--zone", type=str, default="us-central1-a", + help="Set GCloud Zone to spin GKE cluster up in") + parser_start.add_argument("-n", "--name", type=str, default="litmus-k8s-demo", + help="Set GKE/EKS cluster name") + parser_start.add_argument("-pt", "--platform", type=str, default="kind", + help="Set the platform to start with demo enviroment. Available platforms are kind, GKE and EKS. Default value is kind") + parser_start.add_argument("-awsr", "--awsregion", type=str, default="us-east-1", + help="Set the AWS Region to spin EKS cluster up in"), + parser_start.add_argument("-awsnt", "--awsnodetype", type=str, default="t3a.medium", + help="Set the EC2 instance type"), + parser_start.add_argument("-awsn", "--awsnodes", type=int, default=3, + help="Set the number of nodes in the EKS cluster"), + parser_start.add_argument("-awsnmin", "--awsnodesmin", type=int, default=1, + help="Set the minimum number of nodes in the auto-scaling group for the EKS cluster"), + parser_start.add_argument("-awsnmax", "--awsnodesmax", type=int, default=4, + help="Set the maximum number of nodes in the auto-scaling group for the EKS cluster"), + parser_start.set_defaults(func=start) + + # Test command + parser_test = subparsers.add_parser("test", help="Run Litmus ChaosEngine Experiments inside litmus demo environment.") + parser_test.add_argument("-t", "--test", type=str, default="*", + help="Name of test to run based on yaml file name under /litmus folder. '*' runs all of them with wait time between each experiement.") + parser_test.add_argument("-w", "--wait", type=int, default=1, + help="Number of minutes to wait between experiments. Defaults to 1 mins to avoid the clustering incidents together.") + parser_test.add_argument("-pt", "--platform", type=str, default="kind", + help="Set the platform to perform chaos. Available platforms are kind, GKE and EKS. Default value is kind") + parser_test.add_argument("-ty", "--type", type=str, default="all", + help="Select the type of chaos to be performed, it can have values pod for pod level chaos,node for infra/node level chaos and all to perform all chaos") + parser_test.add_argument("-r", "--report", type=str, default="no", + help="Select yes to generate the pdf report of the chaos result of different experiment execution") + parser_test.set_defaults(func=test) + + # List Tests Command + parser_list = subparsers.add_parser("list", help="List all available Litmus ChaosEngine Experiments available to run.") + parser_list.add_argument("-pt", "--platform", type=str, default="kind", + help="Set the platform to list the chaos experiment available. Available platforms are kind, GKE and EKS. Default value is kind") + parser_list.set_defaults(func=list) + + # Stop command + parser_stop = subparsers.add_parser("stop", help="Shutdown the Cluster with the demo environment deployed.") + parser_stop.add_argument("-p", "--project", type=str, + help="Set GCloud Project to spin GKE cluster down in") + parser_stop.add_argument("-z", "--zone", type=str, default="us-central1-a", + help="Set GCloud Zone to spin GKE cluster down in") + parser_stop.add_argument("-n", "--name", type=str, default="litmus-k8s-demo", + help="Set GKE/EKS cluster name") + parser_stop.add_argument("-awsr", "--awsregion", type=str, default="us-east-1", + help="Set AWS Region to spin EKS cluster down in") + parser_stop.add_argument("-pt", "--platform", type=str, default="kind", + help="Set platform which was used to deploy the demo environment. Default value is kind") + parser_stop.set_defaults(func=stop) + args = parser.parse_args() + args.func(args) diff --git a/demo/1.x/requirements.txt b/demo/1.x/requirements.txt new file mode 100644 index 000000000..230de98b7 --- /dev/null +++ b/demo/1.x/requirements.txt @@ -0,0 +1,3 @@ +PyYAML==5.1.2 +reportlab==3.5.53 +awscli==1.18.157 diff --git a/demo/1.x/runcmd b/demo/1.x/runcmd new file mode 100755 index 000000000..225c7a329 --- /dev/null +++ b/demo/1.x/runcmd @@ -0,0 +1,8 @@ +#!/bin/bash + +# Wrapper script to run commands inside container +echo "running $@ inside container" + +docker run --rm --name litmus-demo-instance \ + -v /var/run/docker.sock:/var/run/docker.sock --net="host" \ + -it litmuschaos/litmus-demo $@ \ No newline at end of file