(feat): Chaos Experiment: Added disk loss chaos experiment logic and job for AWS (#909)
* Added logic, k8s job and gcloud utils for disk loss Signed-off-by: Raj <mail.rajdas@gmail.com>
This commit is contained in:
parent
17950a4e3c
commit
8ddfc62cb8
|
|
@ -8,8 +8,9 @@ RUN apt-get clean && \
|
|||
apt-get install -y --no-install-recommends python-minimal python-pip netcat iproute2 jq sshpass \
|
||||
curl openssh-client python-setuptools && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN pip install --upgrade pip
|
||||
#Installing ansible and dependencies for k8s module
|
||||
RUN pip install ansible==2.7.3 openshift jmespath
|
||||
RUN pip install ansible==2.7.3 openshift jmespath boto boto3
|
||||
|
||||
RUN touch /mnt/parameters.yml /mnt/cloud_config.yml
|
||||
|
||||
|
|
@ -21,8 +22,7 @@ RUN gcloud --version
|
|||
#Installing Kubectl
|
||||
ENV KUBE_LATEST_VERSION="v1.12.0"
|
||||
RUN curl -L https://storage.googleapis.com/kubernetes-release/release/${KUBE_LATEST_VERSION}/bin/linux/amd64/kubectl -o /usr/local/bin/kubectl && \
|
||||
chmod +x /usr/local/bin/kubectl && \
|
||||
curl -o /usr/local/bin/aws-iam-authenticator https://amazon-eks.s3-us-west-2.amazonaws.com/1.10.3/2018-07-26/bin/linux/amd64/aws-iam-authenticator && \chmod +x /usr/local/bin/aws-iam-authenticator
|
||||
chmod +x /usr/local/bin/kubectl
|
||||
|
||||
#Adding hosts entries and making ansible folders
|
||||
RUN mkdir /etc/ansible/ /ansible && \
|
||||
|
|
|
|||
|
|
@ -0,0 +1,21 @@
|
|||
- name: Detaching the disk
|
||||
ec2_vol:
|
||||
id: "{{ disk_name }}"
|
||||
instance: None
|
||||
region: "{{ zone_name }}"
|
||||
|
||||
- name: chaos injection for {{ c_duration }}s
|
||||
wait_for:
|
||||
timeout: "{{ c_duration }}"
|
||||
|
||||
- name: Verify that the disk is connected to node (post)
|
||||
include_tasks: "/utils/cloud/aws/status_disk.yml"
|
||||
|
||||
- block:
|
||||
- name: If disk is not attached, it will attach manually
|
||||
ec2_vol:
|
||||
instance: "{{ node_name }}"
|
||||
id: "{{ disk_name }}"
|
||||
device_name: "{{ device_name }}"
|
||||
region: "{{ zone_name }}"
|
||||
when: "inuse == false"
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
- name: Detaching the disk
|
||||
shell: gcloud compute instances detach-disk {{ node_name }} --device-name {{ disk_name }} --zone {{ zone_name }}
|
||||
shell: gcloud compute instances detach-disk {{ node_name }} --disk {{ disk_name }} --zone {{ zone_name }}
|
||||
|
||||
- name: chaos injection for {{ c_duration }}s
|
||||
wait_for:
|
||||
|
|
|
|||
|
|
@ -76,35 +76,42 @@
|
|||
<td> Cloud Platform name </td>
|
||||
<td> Mandatory </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td> CLOUD_NAMESPACE </td>
|
||||
<td> This is a chaos namespace which will create all infra chaos resources in that namespace </td>
|
||||
<td> Mandatory </td>
|
||||
</tr>
|
||||
</tr>
|
||||
<tr>
|
||||
<td> PROJECT_ID </td>
|
||||
<td> GCP project ID </td>
|
||||
<td> Mandatory </td>
|
||||
</tr>
|
||||
</tr>
|
||||
<tr>
|
||||
<td> NODE_NAME </td>
|
||||
<td> Node name of the cluster </td>
|
||||
<td> Mandatory </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td> DISK_NAME </td>
|
||||
<td> Disk Name of the node, it must be an external disk. </td>
|
||||
<td> Mandatory </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td> DEVICE_NAME </td>
|
||||
<td> Enter the device name which you wanted to mount only for AWS. </td>
|
||||
<td> Mandatory </td>
|
||||
</tr>
|
||||
</tr>
|
||||
<tr>
|
||||
<td> ZONE_NAME </td>
|
||||
<td> Zone Name of the node </td>
|
||||
<td> Mandatory </td>
|
||||
</tr>
|
||||
</tr>
|
||||
<tr>
|
||||
<td> CHAOSENGINE </td>
|
||||
<td> ChaosEngine CR name associated with the experiment instance </td>
|
||||
<td> Mandatory </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td> CHAOS_SERVICE_ACCOUNT </td>
|
||||
<td> Service account used by the litmus </td>
|
||||
<td> Mandatory </td>
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# All code here is not indented because j2 is space sensitive
|
||||
|
||||
# checks if cloud_platform is set or not
|
||||
{% if cloud_platform is defined and cloud_platform == 'GCP' or cloud_platform == 'AWS' %}
|
||||
{% if cloud_platform is defined and cloud_platform == 'GCP' %}
|
||||
c_util: /chaoslib/litmus/platform/gke/disk_loss.yml
|
||||
{% elif cloud_platform is defined and cloud_platform == 'AWS' %}
|
||||
c_util: /chaoslib/litmus/platform/aws/disk_loss.yml
|
||||
{% endif %}
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@
|
|||
cloud_platform: "{{ lookup('env','CLOUD_PLATFORM') }}"
|
||||
c_ns: "{{ lookup('env','CHAOS_NAMESPACE') }}"
|
||||
disk_name: "{{ lookup('env','DISK_NAME') }}"
|
||||
device_name: "{{ lookup('env', 'DEVICE_NAME') }}"
|
||||
node_name: "{{ lookup('env','NODE_NAME') }}"
|
||||
project_id: "{{ lookup('env','PROJECT_ID') }}"
|
||||
zone_name: "{{ lookup('env','ZONE_NAME') }}"
|
||||
|
|
@ -47,14 +48,24 @@
|
|||
- name: Gcloud authentication
|
||||
include_tasks: "/utils/cloud/gcp/gcloud_configure.yml"
|
||||
when: "cloud_platform == 'GCP'"
|
||||
|
||||
# AWS authentication
|
||||
- name: AWS authentication
|
||||
include_tasks: "/utils/cloud/aws/aws_configure.yml"
|
||||
when: "cloud_platform == 'AWS'"
|
||||
|
||||
## PRE-CHAOS DISK LIVENESS CHECK
|
||||
- name: Verify that the disk is connected to node (pre)
|
||||
include_tasks: "/utils/cloud/gcp/status_disk.yml"
|
||||
when: "cloud_platform == 'GCP'"
|
||||
|
||||
- name: Verify that the disk is connected to node (pre)
|
||||
include_tasks: "/utils/cloud/aws/status_disk.yml"
|
||||
when: "cloud_platform == 'AWS'"
|
||||
|
||||
# Checking disk is attached to node
|
||||
- debug:
|
||||
msg: echo "disk attached"
|
||||
msg: "disk attached"
|
||||
when: "inuse == true"
|
||||
|
||||
## INJECTING CHAOS
|
||||
|
|
@ -72,10 +83,15 @@
|
|||
## POST-CHAOS DISK LIVENESS CHECK
|
||||
- name: Verify that the disk is connected to node (post)
|
||||
include_tasks: "/utils/cloud/gcp/status_disk.yml"
|
||||
|
||||
when: "cloud_platform == 'GCP'"
|
||||
|
||||
- name: Verify that the disk is connected to node (post)
|
||||
include_tasks: "/utils/cloud/aws/status_disk.yml"
|
||||
when: "cloud_platform == 'AWS'"
|
||||
|
||||
# Checking disk is attached to node
|
||||
- debug:
|
||||
msg: echo "disk attached"
|
||||
msg: "disk attached"
|
||||
when: "inuse == true"
|
||||
|
||||
- set_fact:
|
||||
|
|
|
|||
|
|
@ -57,6 +57,10 @@ spec:
|
|||
# Enter the disk name
|
||||
- name: DISK_NAME
|
||||
value: ''
|
||||
|
||||
# Enter the device name
|
||||
- name: DEVICE_NAME
|
||||
value: ''
|
||||
|
||||
# Enter the zone name
|
||||
- name: ZONE_NAME
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# All code here is not indented because j2 is space sensitive
|
||||
# Initially, it "inuse" set to false
|
||||
{% set disk = namespace(inuse=false) %}
|
||||
# For GCP
|
||||
{% if cloud_platform is defined and cloud_platform == 'GCP' %}
|
||||
{% set expect_user = 'https://www.googleapis.com/compute/v1/projects/' + project_id + '/zones/' + zone_name + '/instances/' + node_name %}
|
||||
# loop through all the disk users and checks if current_user is equal to expect_user
|
||||
{% for current_user in disk_users.stdout_lines %}
|
||||
|
|
@ -9,8 +11,23 @@
|
|||
{% set disk.inuse = true %}
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
|
||||
# For AWS
|
||||
{% elif cloud_platform is defined and cloud_platform == 'AWS' %}
|
||||
{% set expect_user = node_name %}
|
||||
# loop through all the disk users and checks if current_user is equal to expect_user
|
||||
{% for current_user in disk_users.volumes %}
|
||||
{% if current_user.attachment_set.instance_id == expect_user and current_user.attachment_set.status == "attached" %}
|
||||
# If the condition is true, then set "inuse" to true
|
||||
{% set disk.inuse = true %}
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
|
||||
# This will append inuse: true/false
|
||||
{% if disk.inuse == true %}
|
||||
inuse: true
|
||||
{% else %}
|
||||
inuse: false
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,14 @@
|
|||
- name: Creates directory for aws configuration
|
||||
file:
|
||||
path: /root/.aws
|
||||
state: directory
|
||||
|
||||
- name: Creating credential file in aws directory
|
||||
file:
|
||||
path: /root/.aws/credentials
|
||||
state: touch
|
||||
|
||||
- name: Copying aws credentials from cloud_config
|
||||
copy:
|
||||
src: /mnt/cloud_config.yml
|
||||
dest: /root/.aws/credentials
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
# ec2_vol_facts is deprecated, once python2 is upgraded to python3 in ansible runner
|
||||
# we can change ec2_vol_facts to ec2_vol_info.
|
||||
- name: Getting disk users
|
||||
ec2_vol_facts:
|
||||
filters:
|
||||
volume-id: "{{ disk_name }}"
|
||||
register: disk_users
|
||||
|
||||
- name: Disk status check
|
||||
template:
|
||||
src: disk_status_check.j2
|
||||
dest: disk_status_check.yml
|
||||
|
||||
- include_vars:
|
||||
file: disk_status_check.yml
|
||||
Loading…
Reference in New Issue