[AWS SageMaker] Integration tests automation (#3768)

* # This is a combination of 5 commits.
# This is the 1st commit message:

Add initial scripts

# This is the commit message #2:

Add working pytest script

# This is the commit message #3:

Add initial scripts

# This is the commit message #4:

Add environment variable files

# This is the commit message #5:

Remove old cluster script

* Add initial scripts

Add working pytest script

Add initial scripts

Add environment variable files

Remove old cluster script

Update pipeline credentials to OIDC

Add initial scripts

Add working pytest script

Add initial scripts

Add working pytest script

* Remove debugging mark

* Update example EKS cluster name

* Remove quiet from Docker build

* Manually pass env

* Update env list vars as string

* Update use array directly

* Update variable array to export

* Update to using read for splitting

* Move to helper script

* Update export from CodeBuild

* Add wait for minio

* Update kubectl wait timeout

* Update minor changes for PR

* Update integration test buildspec to quiet build

* Add region to delete EKS

* Add wait for pods

* Updated README

* Add fixed interval wait

* Fix CodeBuild step order

* Add file lock for experiment ID

* Fix missing pytest parameter

* Update run create only once

* Add filelock to conda env

* Update experiment name ensuring creation each time

* Add try/catch with create experiment

* Remove caching from KFP deployment

* Remove disable KFP caching

* Move .gitignore changes to inside component

* Add blank line to default .gitignore
This commit is contained in:
Nicholas Thomson 2020-05-20 14:18:19 -07:00 committed by GitHub
parent 4a961ce268
commit f2a860b84c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 404 additions and 50 deletions

2
components/aws/sagemaker/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
# Any environment variable files
**/*/.env

View File

@ -1,14 +1,24 @@
version: 0.2
env:
variables:
CONTAINER_VARIABLES: "AWS_CONTAINER_CREDENTIALS_RELATIVE_URI EKS_PRIVATE_SUBNETS EKS_PUBLIC_SUBNETS PYTEST_ADDOPTS S3_DATA_BUCKET EKS_EXISTING_CLUSTER SAGEMAKER_EXECUTION_ROLE_ARN REGION"
phases:
build:
commands:
- cd components/aws
- docker build . -f ./sagemaker/tests/integration_tests/Dockerfile -t amazon/integration-test-image --quiet
- cd sagemaker/codebuild/scripts && export CONTAINER_VARIABLE_ARGUMENTS="$(./construct_environment_array.sh)"
# Run the container and copy the results to /tmp
# Passes all host environment variables through to the container
- docker run --name integration-test-container $(env | cut -f1 -d= | sed 's/^/-e /') amazon/integration-test-image
- docker cp integration-test-container:/app/tests/integration_tests/integration_tests.log /tmp/results.xml
# Passes all listed host environment variables through to the container
- docker run --name integration-test-container $(echo $CONTAINER_VARIABLE_ARGUMENTS) amazon/integration-test-image
post_build:
commands:
- docker cp integration-test-container:/tests/integration_tests/integration_tests.log /tmp/results.xml
- docker rm -f integration-test-container
reports:

View File

@ -0,0 +1,10 @@
#!/usr/bin/env bash
# This script breaks up a string of environment variable names into a list of
# parameters that `docker run` accepts. This needs to be made into a script
# for CodeBuild because these commands do not run in dash - the default terminal
# on the CodeBuild standard images.
IFS=' ' read -a variable_array <<< $CONTAINER_VARIABLES
printf -v CONTAINER_VARIABLE_ARGUMENTS -- "--env %s " "${variable_array[@]}"
echo $CONTAINER_VARIABLE_ARGUMENTS

View File

@ -8,6 +8,9 @@ phases:
# Run the container and copy the results to /tmp
# Passes all host environment variables through to the container
- docker run --name unit-test-container $(env | cut -f1 -d= | sed 's/^/-e /') amazon/unit-test-image
post_build:
commands:
- docker cp unit-test-container:/app/tests/unit_tests/unit_tests.log /tmp/results.xml
- docker rm -f unit-test-container

View File

@ -0,0 +1,12 @@
# If you would like to override the credentials for the container
# AWS_ACCESS_KEY_ID=
# AWS_SECRET_ACCESS_KEY=
# AWS_SESSION_TOKEN=
REGION=us-east-1
SAGEMAKER_EXECUTION_ROLE_ARN=arn:aws:iam::123456789012:role/service-role/AmazonSageMaker-ExecutionRole-Example
S3_DATA_BUCKET=my-data-bucket
# If you hope to use an existing EKS cluster, rather than creating a new one.
# EKS_EXISTING_CLUSTER=my-eks-cluster

View File

@ -0,0 +1,43 @@
FROM continuumio/miniconda:4.7.12
RUN apt-get update && apt-get install -y --no-install-recommends \
curl \
wget \
git \
jq
# Install eksctl
RUN curl --location "https://github.com/weaveworks/eksctl/releases/download/0.19.0/eksctl_$(uname -s)_amd64.tar.gz" | tar xz -C /tmp \
&& mv /tmp/eksctl /usr/local/bin
# Install aws-iam-authenticator
RUN curl -S -o /usr/local/bin/aws-iam-authenticator https://amazon-eks.s3.us-west-2.amazonaws.com/1.16.8/2020-04-16/bin/linux/amd64/aws-iam-authenticator \
&& chmod +x /usr/local/bin/aws-iam-authenticator
# Install Kubectl
RUN curl -LO https://storage.googleapis.com/kubernetes-release/release/v1.18.0/bin/linux/amd64/kubectl \
&& chmod +x ./kubectl \
&& mv ./kubectl /usr/local/bin/kubectl
# Install Argo CLI
RUN curl -sSL -o /usr/local/bin/argo https://github.com/argoproj/argo/releases/download/v2.8.0/argo-linux-amd64 \
&& chmod +x /usr/local/bin/argo
# Copy conda environment early to avoid cache busting
COPY ./sagemaker/tests/integration_tests/environment.yml environment.yml
# Create conda environment for running tests and set as start-up environment
RUN conda env create -f environment.yml
RUN echo "source activate kfp_test_env" > ~/.bashrc
ENV PATH "/opt/conda/envs/kfp_test_env/bin":$PATH
# Environment variables to be used by tests
ENV REGION="us-west-2"
ENV SAGEMAKER_EXECUTION_ROLE_ARN="arn:aws:iam::1234567890:role/sagemaker-role"
ENV S3_DATA_BUCKET="kfp-test-data"
ENV MINIO_LOCAL_PORT=9000
ENV KFP_NAMESPACE="kubeflow"
COPY ./sagemaker/ .
ENTRYPOINT [ "/bin/bash", "./tests/integration_tests/scripts/run_integration_tests" ]

View File

@ -1,42 +1,21 @@
## Requirements
1. [Conda](https://docs.conda.io/en/latest/miniconda.html)
1. [Kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/)
1. Argo CLI: [Mac](https://github.com/argoproj/homebrew-tap), [Linux](https://eksworkshop.com/advanced/410_batch/install/)
1. K8s cluster with Kubeflow pipelines > 0.4.0 installed
1. [IAM Role](https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-roles.html) with a SageMakerFullAccess and S3FullAccess
1. IAM User credentials with SageMakerFullAccess permissions
1. [Docker](https://www.docker.com/)
1. [IAM Role](https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-roles.html) with a SageMakerFullAccess and AmazonS3FullAccess
1. IAM User credentials with SageMakerFullAccess, AWSCloudFormationFullAccess, IAMFullAccess, AmazonEC2FullAccess, AmazonS3FullAccess permissions
## Creating S3 buckets with datasets
Change the bucket name and run the python script `[s3_sample_data_creator.py](https://github.com/kubeflow/pipelines/tree/master/samples/contrib/aws-samples/mnist-kmeans-sagemaker#the-sample-dataset)` to create S3 buckets with mnist dataset in the region where you want to run the tests
In the following Python script, change the bucket name and run the [`s3_sample_data_creator.py`](https://github.com/kubeflow/pipelines/tree/master/samples/contrib/aws-samples/mnist-kmeans-sagemaker#the-sample-dataset) to create an S3 bucket with the sample mnist dataset in the region where you want to run the tests.
## Step to run integration tests
1. Configure AWS credentials with access to EKS cluster
1. Fetch kubeconfig to `~/.kube/config` or set `KUBECONFIG` environment variable to point to kubeconfig of the cluster
1. Create a [secret](https://kubernetes.io/docs/tasks/inject-data-application/distribute-credentials-secure/) named `aws-secret` in kubeflow namespace with credentials of IAM User for SageMakerFullAccess
```yaml
apiVersion: v1
kind: Secret
metadata:
name: aws-secret
namespace: kubeflow
type: Opaque
data:
AWS_ACCESS_KEY_ID: YOUR_BASE64_ACCESS_KEY
AWS_SECRET_ACCESS_KEY: YOUR_BASE64_SECRET_ACCESS
```
> Note: To get base64 string, run `echo -n $AWS_ACCESS_KEY_ID | base64`
1. Create conda environment using environment.yml for running tests. Run `conda env create -f environment.yml`
1. Activate the conda environment `conda activate kfp_test_env`
1. Run port-forward to minio service in background. Example: `kubectl port-forward svc/minio-service 9000:9000 -n kubeflow &`
1. Provide the following arguments to pytest:
1. `region`: AWS region where test will run. Default - us-west-2
1. `role-arn`: SageMaker execution IAM role ARN
1. `s3-data-bucket`: Regional S3 bucket in which test data is hosted
1. `minio-service-port`: Localhost port to which minio service is mapped to. Default - 9000
1. `kfp-namespace`: Cluster namespace where kubeflow pipelines is installed. Default - Kubeflow
1. cd into this directory and run
```
pytest --region <> --role-arn <> --s3-data-bucket <> --minio-service-port <> --kfp-namespace <>
```
1. Copy the `.env.example` file to `.env` and in the following steps modify the fields of this new file:
1. Configure the AWS credentials fields with those of your IAM User.
1. Update the `SAGEMAKER_EXECUTION_ROLE_ARN` with that of your role created earlier.
1. Update the `S3_DATA_BUCKET` parameter with the name of the bucket created earlier.
1. (Optional) If you have already created an EKS cluster for testing, replace the `EKS_EXISTING_CLUSTER` field with it's name.
1. Build the image by doing the following:
1. Navigate to the `components/aws` directory.
1. Run `docker build . -f sagemaker/tests/integration_tests/Dockerfile -t amazon/integration_test`
1. Run the image, injecting your environment variable files:
1. Navigate to the `components/aws` directory.
1. Run `docker run --env-file sagemaker/tests/integration_tests/.env amazon/integration_test`

View File

@ -5,6 +5,7 @@ import os
import utils
from datetime import datetime
from filelock import FileLock
def pytest_addoption(parser):
@ -86,12 +87,29 @@ def kfp_client():
kfp_installed_namespace = utils.get_kfp_namespace()
return kfp.Client(namespace=kfp_installed_namespace)
@pytest.fixture(scope="session")
def experiment_id(kfp_client):
exp_name = datetime.now().strftime("%Y-%m-%d")
def get_experiment_id(kfp_client):
exp_name = datetime.now().strftime("%Y-%m-%d-%H-%M")
try:
experiment = kfp_client.get_experiment(experiment_name=exp_name)
except ValueError:
experiment = kfp_client.create_experiment(name=exp_name)
return experiment.id
@pytest.fixture(scope="session")
def experiment_id(kfp_client, tmp_path_factory, worker_id):
if not worker_id:
return get_experiment_id(kfp_client)
# Locking taking as an example from
# https://github.com/pytest-dev/pytest-xdist#making-session-scoped-fixtures-execute-only-once
# get the temp directory shared by all workers
root_tmp_dir = tmp_path_factory.getbasetemp().parent
fn = root_tmp_dir / "experiment_id"
with FileLock(str(fn) + ".lock"):
if fn.is_file():
data = fn.read_text()
else:
data = get_experiment_id(kfp_client)
fn.write_text(data)
return data

View File

@ -12,6 +12,7 @@ dependencies:
- pyyaml=5.3.*
- flake8=3.7.*
- flake8-black=0.1.*
- filelock=3.0.*
- pip:
- kubernetes==11.0.*
- kfp==0.5.*

View File

@ -15,6 +15,7 @@ Arguments:
variant_name_1: variant-1
instance_type_1: ml.m4.xlarge
initial_instance_count_1: 1
initial_variant_weight_1: 1.0
network_isolation: "True"
role: ((ROLE_ARN))

View File

@ -34,7 +34,7 @@ def create_endpoint_pipeline(
model_artifact_url=model_artifact_url,
network_isolation=network_isolation,
role=role,
).apply(use_aws_secret("aws-secret", "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"))
)
sagemaker_deploy_op(
region=region,
@ -46,7 +46,7 @@ def create_endpoint_pipeline(
instance_type_1=instance_type_1,
initial_instance_count_1=initial_instance_count_1,
initial_variant_weight_1=initial_variant_weight_1,
).apply(use_aws_secret("aws-secret", "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"))
)
if __name__ == "__main__":

View File

@ -26,7 +26,7 @@ def create_model_pipeline(
model_artifact_url=model_artifact_url,
network_isolation=network_isolation,
role=role,
).apply(use_aws_secret("aws-secret", "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"))
)
if __name__ == "__main__":

View File

@ -56,7 +56,7 @@ def hpo_pipeline(
network_isolation=network_isolation,
max_wait_time=max_wait_time,
role=role,
).apply(use_aws_secret("aws-secret", "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"))
)
if __name__ == "__main__":

View File

@ -46,7 +46,7 @@ def training_pipeline(
max_wait_time=max_wait_time,
checkpoint_config=checkpoint_config,
role=role,
).apply(use_aws_secret("aws-secret", "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"))
)
if __name__ == "__main__":

View File

@ -40,7 +40,7 @@ def batch_transform_pipeline(
model_artifact_url=model_artifact_url,
network_isolation=network_isolation,
role=role,
).apply(use_aws_secret("aws-secret", "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"))
)
sagemaker_batch_transform_op(
region=region,
@ -57,7 +57,7 @@ def batch_transform_pipeline(
split_type=split_type,
compression_type=compression_type,
output_location=output_location,
).apply(use_aws_secret("aws-secret", "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"))
)
if __name__ == "__main__":

View File

@ -0,0 +1,68 @@
#!/usr/bin/env bash
# Helper script to generate an IAM Role needed to install role-based authentication to a KFP service account.
#
# Run as:
# $ ./generate_iam_role ${cluster_arn/cluster_name} ${role_name} ${cluster_region} [optional: ${service_namespace} ${service_account}]
#
CLUSTER_ARN="${1}"
ROLE_NAME="${2}"
CLUSTER_REGION="${3:-us-east-1}"
SERVICE_NAMESPACE="${4:-kubeflow}"
SERVICE_ACCOUNT="${5:-pipeline-runner}"
aws_account=$(aws sts get-caller-identity --query Account --output text)
trustfile="trust.json"
cwd=$(dirname $(realpath $0))
# if using an existing cluster, use the cluster arn to get the region and cluster name
# example, cluster_arn=arn:aws:eks:us-east-1:12345678910:cluster/test
cluster_name=$(echo ${CLUSTER_ARN} | cut -d'/' -f2)
# A function to get the OIDC_ID associated with an EKS cluster
function get_oidc_id {
# TODO: Ideally this should be based on version compatibility instead of command failure
eksctl utils associate-iam-oidc-provider --cluster ${cluster_name} --region ${CLUSTER_REGION} --approve
if [[ $? -ge 1 ]]; then
eksctl utils associate-iam-oidc-provider --name ${cluster_name} --region ${CLUSTER_REGION} --approve
fi
local oidc=$(aws eks describe-cluster --name ${cluster_name} --region ${CLUSTER_REGION} --query cluster.identity.oidc.issuer --output text)
oidc_id=$(echo ${oidc} | rev | cut -d'/' -f1 | rev)
}
# A function that generates an IAM role for the given account, cluster, namespace, region
# Parameter:
# $1: Name of the trust file to generate.
function create_namespaced_iam_role {
local trustfile="${1}"
# Check if role already exists
aws iam get-role --role-name ${ROLE_NAME}
if [[ $? -eq 0 ]]; then
echo "A role for this cluster and namespace already exists in this account, assuming sagemaker access and proceeding."
else
echo "IAM Role does not exist, creating a new Role for the cluster"
aws iam create-role --role-name ${ROLE_NAME} --assume-role-policy-document file://${trustfile} --output=text --query "Role.Arn"
aws iam attach-role-policy --role-name ${ROLE_NAME} --policy-arn arn:aws:iam::aws:policy/AmazonSageMakerFullAccess
fi
}
# Remove the generated trust file
# Parameter:
# $1: Name of the trust file to delete.
function delete_generated_file {
rm "${1}"
}
echo "Get the OIDC ID for the cluster"
get_oidc_id
echo "Delete the trust json file if it already exists"
delete_generated_file "${trustfile}"
echo "Generate a trust json"
"$cwd"/generate_trust_policy ${CLUSTER_REGION} ${aws_account} ${oidc_id} ${SERVICE_NAMESPACE} ${SERVICE_ACCOUNT} > "${trustfile}"
echo "Create the IAM Role using these values"
create_namespaced_iam_role "${trustfile}"
echo "Cleanup for the next run"
delete_generated_file "${trustfile}"

View File

@ -0,0 +1,39 @@
#!/usr/bin/env bash
# Helper script to generate trust the policy needed to assign role-based authentication to a KFP service account.
#
# Run as:
# $ ./generate_trust_policy ${EKS_CLUSTER_REGION} ${AWS_ACCOUNT_ID} ${OIDC_ID} ${SERVICE_NAMESPACE} ${SERVICE_ACCOUNT} > trust.json
#
# For example:
# $ ./generate_trust_policy us-west-2 123456789012 D48675832CA65BD10A532F597OIDCID > trust.json
# This will create a file `trust.json` containing a role policy that enables the KFP service runner in an EKS cluster to assume AWS roles.
#
# The SERVICE_NAMESPACE parameter is for when you want to run Kubeflow in a custom namespace other than "kubeflow".
# The SERVICE_ACCOUNT parameter is for when you want to give permissions to a service account other than the default "pipeline-runner".
cluster_region="$1"
account_number="$2"
oidc_id="$3"
service_namespace="${4}"
service_account="${5}"
printf '{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {
"Federated": "arn:aws:iam::'"${account_number}"':oidc-provider/oidc.eks.'"${cluster_region}"'.amazonaws.com/id/'"${oidc_id}"'"
},
"Action": "sts:AssumeRoleWithWebIdentity",
"Condition": {
"StringEquals": {
"oidc.eks.'"${cluster_region}"'.amazonaws.com/id/'"${oidc_id}"':aud": "sts.amazonaws.com",
"oidc.eks.'"${cluster_region}"'.amazonaws.com/id/'"${oidc_id}"':sub": "system:serviceaccount:'"${service_namespace}"':'"${service_account}"'"
}
}
}
]
}
'

View File

@ -0,0 +1,168 @@
#!/usr/bin/env bash
set -u
set -o pipefail
usage(){
echo "Usage: $0 -n <deployment name> [-r <region>]"
exit 1
}
cwd=$(dirname $(realpath $0))
### Input parameters
DEPLOY_NAME="sagemaker-kfp-"$(date '+%Y-%m-%d-%H-%M-%S')"" # The name given to the entire deployment (tagging all resources)
REGION=${REGION:-"$(aws configure get region)"} # Deployment region
### Configuration parameters
EKS_EXISTING_CLUSTER=${EKS_EXISTING_CLUSTER:-""} # Use an existing EKS cluster
EKS_CLUSTER_VERSION=${EKS_CLUSTER_VERSION:-"1.15"} # EKS cluster K8s version
EKS_NODE_COUNT=${EKS_NODE_COUNT:-"1"} # The initial node count of the EKS cluster
EKS_PUBLIC_SUBNETS=${EKS_PUBLIC_SUBNETS:-""}
EKS_PRIVATE_SUBNETS=${EKS_PRIVATE_SUBNETS:-""}
### Testing parameters
MINIO_LOCAL_PORT=${MINIO_LOCAL_PORT:-9000}
KFP_NAMESPACE=${KFP_NAMESPACE:-"kubeflow"}
KFP_SERVICE_ACCOUNT=${KFP_SERVICE_ACCOUNT:-"pipeline-runner"}
PYTEST_MARKER=${PYTEST_MARKER:-""}
S3_DATA_BUCKET=${S3_DATA_BUCKET:-""}
SAGEMAKER_EXECUTION_ROLE_ARN=${SAGEMAKER_EXECUTION_ROLE_ARN:-""}
while getopts ":n:r:s:" opt; do
case $opt in
n)
DEPLOY_NAME="$OPTARG"
;;
s)
S3_DATA_BUCKET="$OPTARG"
;;
r)
REGION="$OPTARG"
;;
\?)
echo "Invalid option: -$OPTARG" >&2
exit 1
;;
:)
echo "Option -$OPTARG requires an argument." >&2
exit 1
;;
esac
done
# Ensure a deployment name was specified
if [ "$DEPLOY_NAME" == "" ]; then
echo "Missing deployment name"
usage
exit 1
fi
if [ "$S3_DATA_BUCKET" == "" ]; then
echo "Missing S3 data bucket name"
usage
exit 1
fi
function cleanup() {
set +e
cleanup_kfp
delete_generated_role
if [[ -z "${EKS_EXISTING_CLUSTER}" ]]; then
delete_eks
fi
}
# Set the trap to clean up resources in the case of an error
trap cleanup EXIT
set -e
function launch_eks() {
EKS_CLUSTER_NAME="${DEPLOY_NAME}-eks-cluster"
echo "[Creating EKS] Launching EKS cluster $EKS_CLUSTER_NAME"
eksctl_args=( --managed --nodes "${EKS_NODE_COUNT}" --node-type=c5.xlarge --timeout=30m --region "${REGION}" --auto-kubeconfig --version "${EKS_CLUSTER_VERSION}" )
[ ! -z "${EKS_PUBLIC_SUBNETS}" ] && eksctl_args+=( --vpc-public-subnets="${EKS_PUBLIC_SUBNETS}" )
[ ! -z "${EKS_PRIVATE_SUBNETS}" ] && eksctl_args+=( --vpc-private-subnets="${EKS_PRIVATE_SUBNETS}" )
eksctl create cluster "${EKS_CLUSTER_NAME}" "${eksctl_args[@]}"
aws eks update-kubeconfig --name "$EKS_CLUSTER_NAME" --region "$REGION"
echo "[Creating EKS] $EKS_CLUSTER_NAME launched"
}
function delete_eks() {
eksctl delete cluster --name "${EKS_CLUSTER_NAME}" --region "${REGION}"
}
function install_kfp() {
echo "[Installing KFP] Applying KFP manifests"
PIPELINE_VERSION=0.5.1
kubectl apply -k github.com/kubeflow/pipelines/manifests/kustomize/cluster-scoped-resources?ref=$PIPELINE_VERSION
kubectl wait --for condition=established --timeout=60s crd/applications.app.k8s.io
kubectl apply -k github.com/kubeflow/pipelines/manifests/kustomize/env/dev?ref=$PIPELINE_VERSION
echo "[Installing KFP] Port-forwarding Minio"
kubectl wait --for=condition=ready -n "${KFP_NAMESPACE}" pod -l app=minio --timeout=5m
kubectl port-forward -n kubeflow svc/minio-service $MINIO_LOCAL_PORT:9000 &
MINIO_PID=$!
echo "[Installing KFP] Minio port-forwarded to ${MINIO_LOCAL_PORT}"
echo "[Installing KFP] Waiting for pods to stand up"
kubectl wait --for=condition=ready -n "${KFP_NAMESPACE}" pod -l app=ml-pipeline --timeout=5m
# TODO: Replace with calculated waits
# For the moment we don't know which pods will be slower, so we are just relying on a fixed interval
sleep 3m
echo "[Installing KFP] Pipeline pods are ready"
}
function generate_iam_role_name() {
OIDC_ROLE_NAME="$(echo "${DEPLOY_NAME}-kubeflow-role" | cut -c1-64)"
OIDC_ROLE_ARN="arn:aws:iam::$(aws sts get-caller-identity --query=Account --output=text):role/${OIDC_ROLE_NAME}"
}
function install_generated_role() {
kubectl patch serviceaccount -n ${KFP_NAMESPACE} ${KFP_SERVICE_ACCOUNT} --patch '{"metadata": {"annotations": {"eks.amazonaws.com/role-arn": "'"${OIDC_ROLE_ARN}"'"}}}'
}
function delete_generated_role() {
# Delete the role associated with the cluster thats being deleted
aws iam detach-role-policy --role-name "${OIDC_ROLE_NAME}" --policy-arn arn:aws:iam::aws:policy/AmazonSageMakerFullAccess
aws iam delete-role --role-name "${OIDC_ROLE_NAME}"
}
function cleanup_kfp() {
# Clean up Minio
if [ ! -z "${MINIO_PID}" ]; then
kill -9 $MINIO_PID || true
fi
}
if [[ -z "${EKS_EXISTING_CLUSTER}" ]]; then
launch_eks
else
aws eks update-kubeconfig --name "${EKS_EXISTING_CLUSTER}" --region "$REGION"
EKS_CLUSTER_NAME="${EKS_EXISTING_CLUSTER}"
DEPLOY_NAME="${EKS_EXISTING_CLUSTER}"
fi
generate_iam_role_name
"$cwd"/generate_iam_role ${EKS_CLUSTER_NAME} ${OIDC_ROLE_NAME} ${REGION} ${KFP_NAMESPACE} ${KFP_SERVICE_ACCOUNT}
install_kfp
install_generated_role
pytest_args=( --region "${REGION}" --role-arn "${SAGEMAKER_EXECUTION_ROLE_ARN}" --s3-data-bucket "${S3_DATA_BUCKET}" --minio-service-port "${MINIO_LOCAL_PORT}" --kfp-namespace "${KFP_NAMESPACE}" )
[ ! -z "${PYTEST_MARKER}" ] && pytest_args+=( -m "${PYTEST_MARKER}" )
cd tests/integration_tests && python -m pytest "${pytest_args[@]}" --junitxml ./integration_tests.log -n $(nproc)