(feat): Chaos Experiment: Added disk loss chaos experiment logic and job for AWS (#909)

* Added logic, k8s job and gcloud utils for disk loss

Signed-off-by: Raj <mail.rajdas@gmail.com>
This commit is contained in:
Raj Babu Das 2019-11-08 15:16:17 +05:30 committed by Chandan Kumar
parent 17950a4e3c
commit 8ddfc62cb8
10 changed files with 109 additions and 13 deletions

View File

@ -8,8 +8,9 @@ RUN apt-get clean && \
apt-get install -y --no-install-recommends python-minimal python-pip netcat iproute2 jq sshpass \
curl openssh-client python-setuptools && rm -rf /var/lib/apt/lists/*
RUN pip install --upgrade pip
#Installing ansible and dependencies for k8s module
RUN pip install ansible==2.7.3 openshift jmespath
RUN pip install ansible==2.7.3 openshift jmespath boto boto3
RUN touch /mnt/parameters.yml /mnt/cloud_config.yml
@ -21,8 +22,7 @@ RUN gcloud --version
#Installing Kubectl
ENV KUBE_LATEST_VERSION="v1.12.0"
RUN curl -L https://storage.googleapis.com/kubernetes-release/release/${KUBE_LATEST_VERSION}/bin/linux/amd64/kubectl -o /usr/local/bin/kubectl && \
chmod +x /usr/local/bin/kubectl && \
curl -o /usr/local/bin/aws-iam-authenticator https://amazon-eks.s3-us-west-2.amazonaws.com/1.10.3/2018-07-26/bin/linux/amd64/aws-iam-authenticator && \chmod +x /usr/local/bin/aws-iam-authenticator
chmod +x /usr/local/bin/kubectl
#Adding hosts entries and making ansible folders
RUN mkdir /etc/ansible/ /ansible && \

View File

@ -0,0 +1,21 @@
- name: Detaching the disk
ec2_vol:
id: "{{ disk_name }}"
instance: None
region: "{{ zone_name }}"
- name: chaos injection for {{ c_duration }}s
wait_for:
timeout: "{{ c_duration }}"
- name: Verify that the disk is connected to node (post)
include_tasks: "/utils/cloud/aws/status_disk.yml"
- block:
- name: If disk is not attached, it will attach manually
ec2_vol:
instance: "{{ node_name }}"
id: "{{ disk_name }}"
device_name: "{{ device_name }}"
region: "{{ zone_name }}"
when: "inuse == false"

View File

@ -1,5 +1,5 @@
- name: Detaching the disk
shell: gcloud compute instances detach-disk {{ node_name }} --device-name {{ disk_name }} --zone {{ zone_name }}
shell: gcloud compute instances detach-disk {{ node_name }} --disk {{ disk_name }} --zone {{ zone_name }}
- name: chaos injection for {{ c_duration }}s
wait_for:

View File

@ -76,35 +76,42 @@
<td> Cloud Platform name </td>
<td> Mandatory </td>
</tr>
<tr>
<td> CLOUD_NAMESPACE </td>
<td> This is a chaos namespace which will create all infra chaos resources in that namespace </td>
<td> Mandatory </td>
</tr>
</tr>
<tr>
<td> PROJECT_ID </td>
<td> GCP project ID </td>
<td> Mandatory </td>
</tr>
</tr>
<tr>
<td> NODE_NAME </td>
<td> Node name of the cluster </td>
<td> Mandatory </td>
</tr>
<tr>
<td> DISK_NAME </td>
<td> Disk Name of the node, it must be an external disk. </td>
<td> Mandatory </td>
</tr>
<tr>
<td> DEVICE_NAME </td>
<td> Enter the device name which you wanted to mount only for AWS. </td>
<td> Mandatory </td>
</tr>
</tr>
<tr>
<td> ZONE_NAME </td>
<td> Zone Name of the node </td>
<td> Mandatory </td>
</tr>
</tr>
<tr>
<td> CHAOSENGINE </td>
<td> ChaosEngine CR name associated with the experiment instance </td>
<td> Mandatory </td>
</tr>
<tr>
<td> CHAOS_SERVICE_ACCOUNT </td>
<td> Service account used by the litmus </td>
<td> Mandatory </td>

View File

@ -1,6 +1,8 @@
# All code here is not indented because j2 is space sensitive
# checks if cloud_platform is set or not
{% if cloud_platform is defined and cloud_platform == 'GCP' or cloud_platform == 'AWS' %}
{% if cloud_platform is defined and cloud_platform == 'GCP' %}
c_util: /chaoslib/litmus/platform/gke/disk_loss.yml
{% elif cloud_platform is defined and cloud_platform == 'AWS' %}
c_util: /chaoslib/litmus/platform/aws/disk_loss.yml
{% endif %}

View File

@ -10,6 +10,7 @@
cloud_platform: "{{ lookup('env','CLOUD_PLATFORM') }}"
c_ns: "{{ lookup('env','CHAOS_NAMESPACE') }}"
disk_name: "{{ lookup('env','DISK_NAME') }}"
device_name: "{{ lookup('env', 'DEVICE_NAME') }}"
node_name: "{{ lookup('env','NODE_NAME') }}"
project_id: "{{ lookup('env','PROJECT_ID') }}"
zone_name: "{{ lookup('env','ZONE_NAME') }}"
@ -47,14 +48,24 @@
- name: Gcloud authentication
include_tasks: "/utils/cloud/gcp/gcloud_configure.yml"
when: "cloud_platform == 'GCP'"
# AWS authentication
- name: AWS authentication
include_tasks: "/utils/cloud/aws/aws_configure.yml"
when: "cloud_platform == 'AWS'"
## PRE-CHAOS DISK LIVENESS CHECK
- name: Verify that the disk is connected to node (pre)
include_tasks: "/utils/cloud/gcp/status_disk.yml"
when: "cloud_platform == 'GCP'"
- name: Verify that the disk is connected to node (pre)
include_tasks: "/utils/cloud/aws/status_disk.yml"
when: "cloud_platform == 'AWS'"
# Checking disk is attached to node
- debug:
msg: echo "disk attached"
msg: "disk attached"
when: "inuse == true"
## INJECTING CHAOS
@ -72,10 +83,15 @@
## POST-CHAOS DISK LIVENESS CHECK
- name: Verify that the disk is connected to node (post)
include_tasks: "/utils/cloud/gcp/status_disk.yml"
when: "cloud_platform == 'GCP'"
- name: Verify that the disk is connected to node (post)
include_tasks: "/utils/cloud/aws/status_disk.yml"
when: "cloud_platform == 'AWS'"
# Checking disk is attached to node
- debug:
msg: echo "disk attached"
msg: "disk attached"
when: "inuse == true"
- set_fact:

View File

@ -57,6 +57,10 @@ spec:
# Enter the disk name
- name: DISK_NAME
value: ''
# Enter the device name
- name: DEVICE_NAME
value: ''
# Enter the zone name
- name: ZONE_NAME

View File

@ -1,6 +1,8 @@
# All code here is not indented because j2 is space sensitive
# Initially, it "inuse" set to false
{% set disk = namespace(inuse=false) %}
# For GCP
{% if cloud_platform is defined and cloud_platform == 'GCP' %}
{% set expect_user = 'https://www.googleapis.com/compute/v1/projects/' + project_id + '/zones/' + zone_name + '/instances/' + node_name %}
# loop through all the disk users and checks if current_user is equal to expect_user
{% for current_user in disk_users.stdout_lines %}
@ -9,8 +11,23 @@
{% set disk.inuse = true %}
{% endif %}
{% endfor %}
# For AWS
{% elif cloud_platform is defined and cloud_platform == 'AWS' %}
{% set expect_user = node_name %}
# loop through all the disk users and checks if current_user is equal to expect_user
{% for current_user in disk_users.volumes %}
{% if current_user.attachment_set.instance_id == expect_user and current_user.attachment_set.status == "attached" %}
# If the condition is true, then set "inuse" to true
{% set disk.inuse = true %}
{% endif %}
{% endfor %}
# This will append inuse: true/false
{% if disk.inuse == true %}
inuse: true
{% else %}
inuse: false
{% endif %}
{% endif %}
{% endif %}

View File

@ -0,0 +1,14 @@
- name: Creates directory for aws configuration
file:
path: /root/.aws
state: directory
- name: Creating credential file in aws directory
file:
path: /root/.aws/credentials
state: touch
- name: Copying aws credentials from cloud_config
copy:
src: /mnt/cloud_config.yml
dest: /root/.aws/credentials

View File

@ -0,0 +1,15 @@
# ec2_vol_facts is deprecated, once python2 is upgraded to python3 in ansible runner
# we can change ec2_vol_facts to ec2_vol_info.
- name: Getting disk users
ec2_vol_facts:
filters:
volume-id: "{{ disk_name }}"
register: disk_users
- name: Disk status check
template:
src: disk_status_check.j2
dest: disk_status_check.yml
- include_vars:
file: disk_status_check.yml