AWS SageMaker : Use IAM Roles for Service Account (#3719)

* don't use aws-secret and update readme for sample pipelines

* Addressed comments on PR and few more readme changes

* small changes to readme

* nit change

* Address comments
This commit is contained in:
Kartik Kalamadi 2020-05-21 10:36:14 -07:00 committed by GitHub
parent 291f5b3d7a
commit d18ad7a563
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 250 additions and 239 deletions

View File

@ -44,9 +44,7 @@ def create_parser():
parser.add_argument('--input_filter', type=str, required=False, help='A JSONPath expression used to select a portion of the input data to pass to the algorithm.', default='')
parser.add_argument('--output_filter', type=str, required=False, help='A JSONPath expression used to select a portion of the joined dataset to save in the output file for a batch transform job.', default='')
parser.add_argument('--join_source', choices=['None', 'Input', ''], type=str, required=False, help='Specifies the source of the data to join with the transformed data.', default='None')
parser.add_argument('--instance_type', choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge',
'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge',
'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge'], type=str, required=True, help='The ML compute instance type for the transform job.', default='ml.m4.xlarge')
parser.add_argument('--instance_type', type=str, required=False, help='The ML compute instance type for the transform job.', default='ml.m4.xlarge')
parser.add_argument('--instance_count', type=int, required=False, help='The number of ML compute instances to use in the transform job.')
parser.add_argument('--resource_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='')
parser.add_argument('--tags', type=_utils.yaml_or_json_str, required=False, help='An array of key-value pairs, to categorize AWS resources.', default={})

View File

@ -31,7 +31,7 @@ Argument | Description | Optional (in pipeline definition
:--- | :---------- | :---------- | :---------- | :----------| :---------- | :----------|
model_name_[1, 3] | The name of the model that you want to host. This is the name that you specified when creating the model | No | No | String | | |
variant_name_[1, 3] | The name of the production variant | Yes | Yes | String | | variant_name_[1, 3] |
instance_type_[1, 3] | The ML compute instance type | Yes | Yes | String | ml.m4.xlarge, ml.m4.2xlarge, ml.m4.4xlarge, ml.m4.10xlarge, ml.m4.16xlarge, ml.m5.large, ml.m5.xlarge, ml.m5.2xlarge, ml.m5.4xlarge, ml.m5.12xlarge, ml.m5.24xlarge, ml.c4.xlarge, ml.c4.2xlarge, ml.c4.4xlarge, ml.c4.8xlarge, ml.p2.xlarge, ml.p2.8xlarge, ml.p2.16xlarge, ml.p3.2xlarge, ml.p3.8xlarge, ml.p3.16xlarge, ml.c5.xlarge, ml.c5.2xlarge, ml.c5.4xlarge, ml.c5.9xlarge, ml.c5.18xlarge | ml.m4.xlarge |
instance_type_[1, 3] | The ML compute instance type | Yes | Yes | String | ml.m4.xlarge, ml.m4.2xlarge, ml.m4.4xlarge, ml.m4.10xlarge, ml.m4.16xlarge, ml.m5.large, ml.m5.xlarge, ml.m5.2xlarge, ml.m5.4xlarge, ml.m5.12xlarge, ml.m5.24xlarge, ml.c4.xlarge, ml.c4.2xlarge, ml.c4.4xlarge, ml.c4.8xlarge, ml.p2.xlarge, ml.p2.8xlarge, ml.p2.16xlarge, ml.p3.2xlarge, ml.p3.8xlarge, ml.p3.16xlarge, ml.c5.xlarge, ml.c5.2xlarge, ml.c5.4xlarge, ml.c5.9xlarge, ml.c5.18xlarge [and many more](https://aws.amazon.com/sagemaker/pricing/instance-types/)| ml.m4.xlarge |
initial_instance_count_[1, 3] | Number of instances to launch initially | Yes | Yes | Integer | ≥ 1 | 1 |
initial_variant_weight_[1, 3] | Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. The traffic to a production variant is determined by the ratio of the VariantWeight to the sum of all VariantWeight values across all ProductionVariants. | Yes | Yes | Float | Minimum value of 0 | |
accelerator_type_[1, 3] | The size of the Elastic Inference (EI) instance to use for the production variant | Yes | Yes | String| ml.eia1.medium, ml.eia1.large, ml.eia1.xlarge | |

View File

@ -23,30 +23,23 @@ def create_parser():
parser.add_argument('--variant_name_1', type=str, required=False, help='The name of the production variant.', default='variant-name-1')
parser.add_argument('--model_name_1', type=str, required=True, help='The model name used for endpoint deployment.')
parser.add_argument('--initial_instance_count_1', type=int, required=False, help='Number of instances to launch initially.', default=1)
parser.add_argument('--instance_type_1', choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge',
'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge',
'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge', ''], type=str, required=False, help='The ML compute instance type.', default='ml.m4.xlarge')
parser.add_argument('--instance_type_1', type=str, required=False, help='The ML compute instance type.', default='ml.m4.xlarge')
parser.add_argument('--initial_variant_weight_1', type=float, required=False, help='Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.', default=1.0)
parser.add_argument('--accelerator_type_1', choices=['ml.eia1.medium', 'ml.eia1.large', 'ml.eia1.xlarge', ''], type=str, required=False, help='The size of the Elastic Inference (EI) instance to use for the production variant.', default='')
parser.add_argument('--variant_name_2', type=str, required=False, help='The name of the production variant.', default='variant-name-2')
parser.add_argument('--model_name_2', type=str, required=False, help='The model name used for endpoint deployment.', default='')
parser.add_argument('--initial_instance_count_2', type=int, required=False, help='Number of instances to launch initially.', default=1)
parser.add_argument('--instance_type_2', choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge',
'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge',
'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge', ''], type=str, required=False, help='The ML compute instance type.', default='ml.m4.xlarge')
parser.add_argument('--instance_type_2', type=str, required=False, help='The ML compute instance type.', default='ml.m4.xlarge')
parser.add_argument('--initial_variant_weight_2', type=float, required=False, help='Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.', default=1.0)
parser.add_argument('--accelerator_type_2', choices=['ml.eia1.medium', 'ml.eia1.large', 'ml.eia1.xlarge', ''], type=str, required=False, help='The size of the Elastic Inference (EI) instance to use for the production variant.', default='')
parser.add_argument('--variant_name_3', type=str, required=False, help='The name of the production variant.', default='variant-name-3')
parser.add_argument('--model_name_3', type=str, required=False, help='The model name used for endpoint deployment.', default='')
parser.add_argument('--initial_instance_count_3', type=int, required=False, help='Number of instances to launch initially.', default=1)
parser.add_argument('--instance_type_3', choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge',
'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge',
'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge', ''], type=str, required=False, help='The ML compute instance type.', default='ml.m4.xlarge')
parser.add_argument('--instance_type_3', type=str, required=False, help='The ML compute instance type.', default='ml.m4.xlarge')
parser.add_argument('--initial_variant_weight_3', type=float, required=False, help='Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.', default=1.0)
parser.add_argument('--accelerator_type_3', choices=['ml.eia1.medium', 'ml.eia1.large', 'ml.eia1.xlarge', ''], type=str, required=False, help='The size of the Elastic Inference (EI) instance to use for the production variant.', default='')
parser.add_argument('--resource_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='')
parser.add_argument('--endpoint_config_tags', type=_utils.yaml_or_json_str, required=False, help='An array of key-value pairs, to categorize AWS resources.', default={})
parser.add_argument('--endpoint_name', type=str, required=False, help='The name of the endpoint.', default='')
parser.add_argument('--endpoint_tags', type=_utils.yaml_or_json_str, required=False, help='An array of key-value pairs, to categorize AWS resources.', default={})

View File

@ -28,7 +28,7 @@ categorical_parameters | The array of CategoricalParameterRange objects that spe
channels | A list of dicts specifying the input channels (at least one); refer to [documentation](https://github.com/awsdocs/amazon-sagemaker-developer-guide/blob/master/doc_source/API_Channel.md) for parameters | No | No | List of Dicts | | |
output_location | The Amazon S3 path where you want Amazon SageMaker to store the results of the transform job | No | No | String | | |
output_encryption_key | The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts | Yes | Yes | String | | |
instance_type | The ML compute instance type | Yes | No | String | ml.m4.xlarge, ml.m4.2xlarge, ml.m4.4xlarge, ml.m4.10xlarge, ml.m4.16xlarge, ml.m5.large, ml.m5.xlarge, ml.m5.2xlarge, ml.m5.4xlarge, ml.m5.12xlarge, ml.m5.24xlarge, ml.c4.xlarge, ml.c4.2xlarge, ml.c4.4xlarge, ml.c4.8xlarge, ml.p2.xlarge, ml.p2.8xlarge, ml.p2.16xlarge, ml.p3.2xlarge, ml.p3.8xlarge, ml.p3.16xlarge, ml.c5.xlarge, ml.c5.2xlarge, ml.c5.4xlarge, ml.c5.9xlarge, ml.c5.18xlarge | ml.m4.xlarge |
instance_type | The ML compute instance type | Yes | No | String | ml.m4.xlarge, ml.m4.2xlarge, ml.m4.4xlarge, ml.m4.10xlarge, ml.m4.16xlarge, ml.m5.large, ml.m5.xlarge, ml.m5.2xlarge, ml.m5.4xlarge, ml.m5.12xlarge, ml.m5.24xlarge, ml.c4.xlarge, ml.c4.2xlarge, ml.c4.4xlarge, ml.c4.8xlarge, ml.p2.xlarge, ml.p2.8xlarge, ml.p2.16xlarge, ml.p3.2xlarge, ml.p3.8xlarge, ml.p3.16xlarge, ml.c5.xlarge, ml.c5.2xlarge, ml.c5.4xlarge, ml.c5.9xlarge, ml.c5.18xlarge [and many more](https://aws.amazon.com/sagemaker/pricing/instance-types/)| ml.m4.xlarge |
instance_count | The number of ML compute instances to use in each training job | Yes | Yes | Int | ≥ 1 | 1 |
volume_size | The size of the ML storage volume that you want to provision in GB | Yes | Yes | Int | ≥ 1 | 30 |
max_num_jobs | The maximum number of training jobs that a hyperparameter tuning job can launch | No | No | Int | [1, 500] | |

View File

@ -19,7 +19,7 @@ from common import _utils
def create_parser():
parser = argparse.ArgumentParser(description='SageMaker Hyperparameter Tuning Job')
_utils.add_default_client_arguments(parser)
parser.add_argument('--job_name', type=str, required=False, help='The name of the tuning job. Must be unique within the same AWS account and AWS region.')
parser.add_argument('--role', type=str, required=True, help='The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.')
parser.add_argument('--image', type=str, required=True, help='The registry path of the Docker image that contains the training algorithm.', default='')
@ -37,9 +37,7 @@ def create_parser():
parser.add_argument('--channels', type=_utils.yaml_or_json_str, required=True, help='A list of dicts specifying the input channels. Must have at least one.')
parser.add_argument('--output_location', type=str, required=True, help='The Amazon S3 path where you want Amazon SageMaker to store the results of the transform job.')
parser.add_argument('--output_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.', default='')
parser.add_argument('--instance_type', choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge',
'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge',
'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge'], type=str, required=False, help='The ML compute instance type.', default='ml.m4.xlarge')
parser.add_argument('--instance_type', type=str, required=False, help='The ML compute instance type.', default='ml.m4.xlarge')
parser.add_argument('--instance_count', type=int, required=False, help='The number of ML compute instances to use in each training job.', default=1)
parser.add_argument('--volume_size', type=int, required=False, help='The size of the ML storage volume that you want to provision.', default=30)
parser.add_argument('--max_num_jobs', type=int, required=True, help='The maximum number of training jobs that a hyperparameter tuning job can launch.')

View File

@ -20,8 +20,8 @@ algorithm_name | The name of the algorithm resource to use for the hyperparamete
metric_definitions | The dictionary of name-regex pairs specify the metrics that the algorithm emits | Yes | Dict | | {} |
put_mode | The input mode that the algorithm supports | No | String | File, Pipe | File |
hyperparameters | Hyperparameters for the selected algorithm | No | Dict | [Depends on Algo](https://docs.aws.amazon.com/sagemaker/latest/dg/k-means-api-config.html)| |
channels | A list of dicts specifying the input channels (at least one); refer to [documentation](https://github.com/awsdocs/amazon-sagemaker-developer-guide/blob/master/doc_source/API_Channel.md) for parameters | No | No | List of Dicts | | |
instance_type | The ML compute instance type | Yes | No | String | ml.m4.xlarge, ml.m4.2xlarge, ml.m4.4xlarge, ml.m4.10xlarge, ml.m4.16xlarge, ml.m5.large, ml.m5.xlarge, ml.m5.2xlarge, ml.m5.4xlarge, ml.m5.12xlarge, ml.m5.24xlarge, ml.c4.xlarge, ml.c4.2xlarge, ml.c4.4xlarge, ml.c4.8xlarge, ml.p2.xlarge, ml.p2.8xlarge, ml.p2.16xlarge, ml.p3.2xlarge, ml.p3.8xlarge, ml.p3.16xlarge, ml.c5.xlarge, ml.c5.2xlarge, ml.c5.4xlarge, ml.c5.9xlarge, ml.c5.18xlarge | ml.m4.xlarge |
channels | A list of dicts specifying the input channels (at least one); refer to [documentation](https://github.com/awsdocs/amazon-sagemaker-developer-guide/blob/master/doc_source/API_Channel.md) for parameters | No | List of Dicts | | |
instance_type | The ML compute instance type | Yes | String | ml.m4.xlarge, ml.m4.2xlarge, ml.m4.4xlarge, ml.m4.10xlarge, ml.m4.16xlarge, ml.m5.large, ml.m5.xlarge, ml.m5.2xlarge, ml.m5.4xlarge, ml.m5.12xlarge, ml.m5.24xlarge, ml.c4.xlarge, ml.c4.2xlarge, ml.c4.4xlarge, ml.c4.8xlarge, ml.p2.xlarge, ml.p2.8xlarge, ml.p2.16xlarge, ml.p3.2xlarge, ml.p3.8xlarge, ml.p3.16xlarge, ml.c5.xlarge, ml.c5.2xlarge, ml.c5.4xlarge, ml.c5.9xlarge, ml.c5.18xlarge [and many more](https://aws.amazon.com/sagemaker/pricing/instance-types/) | ml.m4.xlarge |
instance_count | The number of ML compute instances to use in each training job | Yes | Int | ≥ 1 | 1 |
volume_size | The size of the ML storage volume that you want to provision in GB | Yes | Int | ≥ 1 | 30 |
resource_encryption_key | The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s) | Yes | String | | |
@ -42,7 +42,7 @@ tags | Key-value pairs to categorize AWS resources | Yes | Dict | | {} |
Stores the Model in the s3 bucket you specified
# Example code
Simple example pipeline with only Train component : [simple_train_pipeline](https://github.com/kubeflow/pipelines/tree/documents/samples/contrib/aws-samples/simple_train_pipeline)
Simple example pipeline with only Train component : [simple_train_pipeline](https://github.com/kubeflow/pipelines/tree/master/samples/contrib/aws-samples/simple_train_pipeline)
# Resources
* [Using Amazon built-in algorithms](https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-algo-docker-registry-paths.html)

View File

@ -28,9 +28,7 @@ def create_parser():
parser.add_argument('--training_input_mode', choices=['File', 'Pipe'], type=str, help='The input mode that the algorithm supports. File or Pipe.', default='File')
parser.add_argument('--hyperparameters', type=_utils.yaml_or_json_str, help='Dictionary of hyperparameters for the the algorithm.', default={})
parser.add_argument('--channels', type=_utils.yaml_or_json_str, required=True, help='A list of dicts specifying the input channels. Must have at least one.')
parser.add_argument('--instance_type', required=True, choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge',
'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge',
'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge'], type=str, help='The ML compute instance type.', default='ml.m4.xlarge')
parser.add_argument('--instance_type', required=False, type=str, help='The ML compute instance type.', default='ml.m4.xlarge')
parser.add_argument('--instance_count', required=True, type=int, help='The registry path of the Docker image that contains the training algorithm.', default=1)
parser.add_argument('--volume_size', type=int, required=True, help='The size of the ML storage volume that you want to provision.', default=30)
parser.add_argument('--resource_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='')

View File

@ -0,0 +1,197 @@
# Sample AWS SageMaker Kubeflow Pipelines
This folder contains many example pipelines which use [AWS SageMaker Components for KFP](https://github.com/kubeflow/pipelines/tree/master/components/aws/sagemaker). The following sections explain the setup needed to run these pipelines. Once you are done with the setup, [simple_train_pipeline](https://github.com/kubeflow/pipelines/tree/master/samples/contrib/aws-samples/simple_train_pipeline) is a good place to start if you have never used these components before.
## Prerequisites
1. You need a cluster with Kubeflow installed on it. [Install Kubeflow on AWS cluster](https://www.kubeflow.org/docs/aws/deploy/install-kubeflow/)
2. Install the following on your local machine or EC2 instance (These are recommended tools. Not all of these are required)
1. [AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-install.html). If you are using an IAM user, configure your [Access Key ID, Secret Access Key](https://docs.aws.amazon.com/general/latest/gr/aws-sec-cred-types.html#access-keys-and-secret-access-keys) and preferred AWS Region by running:
`aws configure`
2. [aws-iam-authenticator](https://docs.aws.amazon.com/eks/latest/userguide/install-aws-iam-authenticator.html) version 0.1.31 and above
3. [eksctl](https://github.com/weaveworks/eksctl) version above 0.15
4. [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/#install-kubectl) version needs to be your k8s version +/- 1 minor version.
5. [KFP SDK](https://www.kubeflow.org/docs/pipelines/sdk/install-sdk/#install-the-kubeflow-pipelines-sdk) (installs the dsl-compile and kfp cli)
## IAM Permissions
To use AWS KFP Components the KFP component pods need access to AWS SageMaker.
There are two ways you can give them access to SageMaker.
(You need EKS cluster for Option 1)
**Option 1** (Recommended) [IAM roles for service account](https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html).
1. Enable OIDC support on EKS cluster
```
eksctl utils associate-iam-oidc-provider --cluster <cluster_name> \
--region <cluster_region> --approve
```
2. Take note of the OIDC issuer URL. This URL is in the form `oidc.eks.<region>.amazonaws.com/id/<OIDC_ID>` . Note down the URL.
```
aws eks describe-cluster --name <cluster_name> --query "cluster.identity.oidc.issuer" --output text
```
3. Create a file named trust.json with the following content.
Replace `<OIDC_URL>` with your OIDC issuer URL **(Dont include https://)** and `<AWS_ACCOUNT_NUMBER>` with your AWS account number.
```
# Replace these two with proper values
OIDC_URL="<OIDC_URL>"
AWS_ACC_NUM="<AWS_ACCOUNT_NUMBER>"
# Run this to create trust.json file
cat <<EOF > trust.json
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {
"Federated": "arn:aws:iam::$AWS_ACC_NUM:oidc-provider/$OIDC_URL"
},
"Action": "sts:AssumeRoleWithWebIdentity",
"Condition": {
"StringEquals": {
"$OIDC_URL:aud": "sts.amazonaws.com",
"$OIDC_URL:sub": "system:serviceaccount:kubeflow:pipeline-runner"
}
}
}
]
}
EOF
```
4. Create an IAM role using trust.json. Make a note of the ARN returned in the output.
```
aws iam create-role --role-name kfp-example-pod-role --assume-role-policy-document file://trust.json
aws iam attach-role-policy --role-name kfp-example-pod-role --policy-arn arn:aws:iam::aws:policy/AmazonSageMakerFullAccess
aws iam get-role --role-name kfp-example-pod-role --output text --query 'Role.Arn'
```
5. Edit your pipeline-runner service account.
```
kubectl edit -n kubeflow serviceaccount pipeline-runner
```
Add `eks.amazonaws.com/role-arn: <role_arn>` to annotations, then save the file. Example: **(add only line 5)**
```
apiVersion: v1
kind: ServiceAccount
metadata:
annotations:
eks.amazonaws.com/role-arn: <role_arn>
creationTimestamp: "2020-04-16T05:48:06Z"
labels:
app: pipeline-runner
app.kubernetes.io/component: pipelines-runner
app.kubernetes.io/instance: pipelines-runner-0.2.0
app.kubernetes.io/managed-by: kfctl
app.kubernetes.io/name: pipelines-runner
app.kubernetes.io/part-of: kubeflow
app.kubernetes.io/version: 0.2.0
name: pipeline-runner
namespace: kubeflow
resourceVersion: "11787"
selfLink: /api/v1/namespaces/kubeflow/serviceaccounts/pipeline-runner
uid: d86234bd-7fa5-11ea-a8f2-02934be6dc88
secrets:
- name: pipeline-runner-token-dkjrk
```
**Option 2** Store the IAM credentials as a `aws-secret` in kubernetes cluster. Then use those in the components.
1. You need credentials for an IAM user with SageMakerFullAccess. Apply them to k8s cluster.
Replace `AWS_ACCESS_KEY_IN_BASE64` and `AWS_SECRET_ACCESS_IN_BASE64`.
> Note: To get base64 string you can do `echo -n $AWS_ACCESS_KEY_ID | base64`
```
cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: Secret
metadata:
name: aws-secret
namespace: kubeflow
type: Opaque
data:
AWS_ACCESS_KEY_ID: <AWS_ACCESS_KEY_IN_BASE64>
AWS_SECRET_ACCESS_KEY: <AWS_SECRET_ACCESS_IN_BASE64>
EOF
```
2. Use the stored `aws-secret` in pipeline code by adding this line to each component in your pipeline `.apply(use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'))`
[Kubeflow Document](https://www.kubeflow.org/docs/aws/pipeline/)
[Example Code](https://github.com/kubeflow/pipelines/blob/master/samples/contrib/aws-samples/simple_train_pipeline/training-pipeline.py#L76) (uncomment this line)
## Inputs to the pipeline
### Sample MNIST dataset
Use the following python script to copy train_data, test_data, and valid_data to your bucket.
[Create a bucket](https://docs.aws.amazon.com/AmazonS3/latest/gsg/CreatingABucket.html) in `us-east-1` region if you don't have one already.
For the purposes of this demonstration, all resources will be created in the us-east-1 region.
Create a new file named s3_sample_data_creator.py with following content :
```
import pickle, gzip, numpy, urllib.request, json
from urllib.parse import urlparse
###################################################################
# This is the only thing that you need to change to run this code
# Give the name of your S3 bucket
bucket = '<bucket-name>'
# If you are gonna use the default values of the pipeline then
# give a bucket name which is in us-east-1 region
###################################################################
# Load the dataset
urllib.request.urlretrieve("http://deeplearning.net/data/mnist/mnist.pkl.gz", "mnist.pkl.gz")
with gzip.open('mnist.pkl.gz', 'rb') as f:
train_set, valid_set, test_set = pickle.load(f, encoding='latin1')
# Upload dataset to S3
from sagemaker.amazon.common import write_numpy_to_dense_tensor
import io
import boto3
train_data_key = 'mnist_kmeans_example/train_data'
test_data_key = 'mnist_kmeans_example/test_data'
train_data_location = 's3://{}/{}'.format(bucket, train_data_key)
test_data_location = 's3://{}/{}'.format(bucket, test_data_key)
print('training data will be uploaded to: {}'.format(train_data_location))
print('training data will be uploaded to: {}'.format(test_data_location))
# Convert the training data into the format required by the SageMaker KMeans algorithm
buf = io.BytesIO()
write_numpy_to_dense_tensor(buf, train_set[0], train_set[1])
buf.seek(0)
boto3.resource('s3').Bucket(bucket).Object(train_data_key).upload_fileobj(buf)
# Convert the test data into the format required by the SageMaker KMeans algorithm
write_numpy_to_dense_tensor(buf, test_set[0], test_set[1])
buf.seek(0)
boto3.resource('s3').Bucket(bucket).Object(test_data_key).upload_fileobj(buf)
# Convert the valid data into the format required by the SageMaker KMeans algorithm
numpy.savetxt('valid-data.csv', valid_set[0], delimiter=',', fmt='%g')
s3_client = boto3.client('s3')
input_key = "{}/valid_data.csv".format("mnist_kmeans_example/input")
s3_client.upload_file('valid-data.csv', bucket, input_key)
```
Run this file `python s3_sample_data_creator.py`
### Role Input
This role is used by SageMaker jobs created by the KFP to access the S3 buckets and other AWS resources.
Run these commands to create the sagemaker-execution-role.
Note down the Role ARN. You need to give this Role ARN as input in pipeline.
```
TRUST="{ \"Version\": \"2012-10-17\", \"Statement\": [ { \"Effect\": \"Allow\", \"Principal\": { \"Service\": \"sagemaker.amazonaws.com\" }, \"Action\": \"sts:AssumeRole\" } ] }"
aws iam create-role --role-name kfp-example-sagemaker-execution-role --assume-role-policy-document "$TRUST"
aws iam attach-role-policy --role-name kfp-example-sagemaker-execution-role --policy-arn arn:aws:iam::aws:policy/AmazonSageMakerFullAccess
aws iam attach-role-policy --role-name kfp-example-sagemaker-execution-role --policy-arn arn:aws:iam::aws:policy/AmazonS3FullAccess
aws iam get-role --role-name kfp-example-sagemaker-execution-role --output text --query 'Role.Arn'
# note down the Role ARN.
```

View File

@ -4,6 +4,10 @@ This sample is based on [this example](https://github.com/awslabs/amazon-sagemak
The sample goes through the workflow of creating a private workteam, creating data labeling jobs for that team, and running a training job using the new labeled data.
## Prerequisites
Make sure you have the setup explained in this [README.md](https://github.com/kubeflow/pipelines/blob/master/samples/contrib/aws-samples/README.md)
(This pipeline does not use mnist dataset. Follow the instruction bellow to get sample dataset)
## Prep the dataset, label categories, and UI template
@ -34,26 +38,6 @@ client_ID = App client
> Note : Once you start a run on the pipeline you will receive the ground_truth labeling jobs at "Labeling portal sign-in URL" link
## SageMaker permission
In order to run this pipeline, we need to prepare an IAM Role to run Sagemaker jobs. You need this `role_arn` to run a pipeline. Check [here](https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-roles.html) for details.
This pipeline also use aws-secret to get access to Sagemaker services, please also make sure you have a `aws-secret` in the kubeflow namespace.
```yaml
apiVersion: v1
kind: Secret
metadata:
name: aws-secret
namespace: kubeflow
type: Opaque
data:
AWS_ACCESS_KEY_ID: YOUR_BASE64_ACCESS_KEY
AWS_SECRET_ACCESS_KEY: YOUR_BASE64_SECRET_ACCESS
```
> Note: To get base64 string, try `echo -n $AWS_ACCESS_KEY_ID | base64`
## Compiling the pipeline template

View File

@ -59,7 +59,7 @@ def ground_truth_test(region='us-west-2',
training_input_mode='Pipe',
training_hyperparameters={"num_classes": "2", "num_training_samples": "14", "mini_batch_size": "2"},
training_output_location='s3://your-bucket-name/mini-image-classification/training-output',
training_instance_type='ml.p2.xlarge',
training_instance_type='ml.m5.2xlarge',
training_instance_count=1,
training_volume_size=50,
training_max_run_time=3600,
@ -73,7 +73,7 @@ def ground_truth_test(region='us-west-2',
user_pool=user_pool,
user_groups=user_groups,
client_id=client_id
).apply(use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'))
)
ground_truth_train = sagemaker_gt_op(
region=region,
@ -93,7 +93,7 @@ def ground_truth_test(region='us-west-2',
time_limit=ground_truth_time_limit,
task_availibility=ground_truth_task_availibility,
max_concurrent_tasks=ground_truth_max_concurrent_tasks
).apply(use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'))
)
ground_truth_validation = sagemaker_gt_op(
region=region,
@ -113,7 +113,7 @@ def ground_truth_test(region='us-west-2',
time_limit=ground_truth_time_limit,
task_availibility=ground_truth_task_availibility,
max_concurrent_tasks=ground_truth_max_concurrent_tasks
).apply(use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'))
)
channelObj['ChannelName'] = 'train'
channelObj['DataSource']['S3DataSource']['S3Uri'] = str(ground_truth_train.outputs['output_manifest_location'])
@ -134,7 +134,8 @@ def ground_truth_test(region='us-west-2',
max_run_time=training_max_run_time,
model_artifact_path=training_output_location,
role=role_arn
).apply(use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'))
)
if __name__ == '__main__':
kfp.compiler.Compiler().compile(ground_truth_test, __file__ + '.zip')

View File

@ -1,93 +1,10 @@
The `mnist-classification-pipeline.py` sample runs a pipeline to train a classficiation model using Kmeans with MNIST dataset on Sagemaker.
The `kmeans-hpo-pipeline.py` is a single component hyper parameter optimisation pipeline which has default values set to use Kmeans.
If you do not have `train_data`, `test_data`, and `valid_data` you can use the following code to get sample data which
(This data can be used for both of these pipelines)
## The sample dataset
## Prerequisites
This sample is based on the [Train a Model with a Built-in Algorithm and Deploy it](https://docs.aws.amazon.com/sagemaker/latest/dg/ex1.html).
The sample trains and deploy a model based on the [MNIST dataset](http://www.deeplearning.net/tutorial/gettingstarted.html).
Create an S3 bucket and use the following python script to copy `train_data`, `test_data`, and `valid_data.csv` to your buckets.
(create the bucket in `us-west-2` region if you are gonna use default values of the pipeline)
https://docs.aws.amazon.com/AmazonS3/latest/gsg/CreatingABucket.html
Create a new file named `s3_sample_data_creator.py` with following content :
```python
import pickle, gzip, numpy, urllib.request, json
from urllib.parse import urlparse
# Load the dataset
urllib.request.urlretrieve("http://deeplearning.net/data/mnist/mnist.pkl.gz", "mnist.pkl.gz")
with gzip.open('mnist.pkl.gz', 'rb') as f:
train_set, valid_set, test_set = pickle.load(f, encoding='latin1')
# Upload dataset to S3
from sagemaker.amazon.common import write_numpy_to_dense_tensor
import io
import boto3
###################################################################
# This is the only thing that you need to change to run this code
# Give the name of your S3 bucket
bucket = 'bucket-name'
# If you are gonna use the default values of the pipeline then
# give a bucket name which is in us-west-2 region
###################################################################
train_data_key = 'mnist_kmeans_example/train_data'
test_data_key = 'mnist_kmeans_example/test_data'
train_data_location = 's3://{}/{}'.format(bucket, train_data_key)
test_data_location = 's3://{}/{}'.format(bucket, test_data_key)
print('training data will be uploaded to: {}'.format(train_data_location))
print('training data will be uploaded to: {}'.format(test_data_location))
# Convert the training data into the format required by the SageMaker KMeans algorithm
buf = io.BytesIO()
write_numpy_to_dense_tensor(buf, train_set[0], train_set[1])
buf.seek(0)
boto3.resource('s3').Bucket(bucket).Object(train_data_key).upload_fileobj(buf)
# Convert the test data into the format required by the SageMaker KMeans algorithm
write_numpy_to_dense_tensor(buf, test_set[0], test_set[1])
buf.seek(0)
boto3.resource('s3').Bucket(bucket).Object(test_data_key).upload_fileobj(buf)
# Convert the valid data into the format required by the SageMaker KMeans algorithm
numpy.savetxt('valid-data.csv', valid_set[0], delimiter=',', fmt='%g')
s3_client = boto3.client('s3')
input_key = "{}/valid_data.csv".format("mnist_kmeans_example/input")
s3_client.upload_file('valid-data.csv', bucket, input_key)
```
Run this file `python s3_sample_data_creator.py`
## SageMaker permission
In order to run this pipeline, we need to prepare an IAM Role to run Sagemaker jobs. You need this `role_arn` to run a pipeline. Check [here](https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-roles.html) for details.
This pipeline also use aws-secret to get access to Sagemaker services, please also make sure you have a `aws-secret` in the kubeflow namespace.
```yaml
apiVersion: v1
kind: Secret
metadata:
name: aws-secret
namespace: kubeflow
type: Opaque
data:
AWS_ACCESS_KEY_ID: YOUR_BASE64_ACCESS_KEY
AWS_SECRET_ACCESS_KEY: YOUR_BASE64_SECRET_ACCESS
```
> Note: To get base64 string, try `echo -n $AWS_ACCESS_KEY_ID | base64`
Make sure you have the setup explained in this [README.md](https://github.com/kubeflow/pipelines/blob/master/samples/contrib/aws-samples/README.md)
## Compiling the pipeline template
@ -98,6 +15,7 @@ Follow the guide to [building a pipeline](https://www.kubeflow.org/docs/guides/p
dsl-compile --py mnist-classification-pipeline.py --output mnist-classification-pipeline.tar.gz
```
## Deploying the pipeline
Open the Kubeflow pipelines UI. Create a new pipeline, and then upload the compiled specification (`.tar.gz` file) as a new pipeline template.

View File

@ -1,5 +1,6 @@
#!/usr/bin/env python3
import kfp
import json
import copy
@ -38,7 +39,7 @@ channelObjList.append(copy.deepcopy(channelObj))
name='MNIST HPO test pipeline',
description='SageMaker hyperparameter tuning job test'
)
def hpo_test(region='us-west-2',
def hpo_test(region='us-east-1',
hpo_job_name='HPO-kmeans-sample',
image='',
algorithm_name='K-Means',
@ -56,7 +57,7 @@ def hpo_test(region='us-west-2',
channels=channelObjList,
output_location='s3://kubeflow-pipeline-data/mnist_kmeans_example/output',
output_encryption_key='',
instance_type='ml.p2.16xlarge',
instance_type='ml.m5.2xlarge',
instance_count=1,
volume_size=50,
max_num_jobs=1,
@ -114,7 +115,8 @@ def hpo_test(region='us-west-2',
checkpoint_config=checkpoint_config,
tags=tags,
role=role_arn,
).apply(use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'))
)
if __name__ == '__main__':
kfp.compiler.Compiler().compile(hpo_test, __file__ + '.zip')

View File

@ -1,5 +1,6 @@
#!/usr/bin/env python3
import kfp
import json
import copy
@ -44,8 +45,8 @@ hpoChannels.append(copy.deepcopy(channelObj))
name='MNIST Classification pipeline',
description='MNIST Classification using KMEANS in SageMaker'
)
def mnist_classification(region='us-west-2',
image='174872318107.dkr.ecr.us-west-2.amazonaws.com/kmeans:1',
def mnist_classification(region='us-east-1',
image='382416733822.dkr.ecr.us-east-1.amazonaws.com/kmeans:1',
training_input_mode='File',
hpo_strategy='Bayesian',
hpo_metric_name='test:msd',
@ -61,7 +62,7 @@ def mnist_classification(region='us-west-2',
hpo_checkpoint_config={},
output_location='s3://kubeflow-pipeline-data/mnist_kmeans_example/output',
output_encryption_key='',
instance_type='ml.p2.16xlarge',
instance_type='ml.m5.2xlarge',
instance_count=1,
volume_size=50,
hpo_max_num_jobs=9,
@ -115,7 +116,7 @@ def mnist_classification(region='us-west-2',
max_wait_time=hpo_max_wait_time,
checkpoint_config=hpo_checkpoint_config,
role=role_arn,
).apply(use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'))
)
training = sagemaker_train_op(
region=region,
@ -136,7 +137,7 @@ def mnist_classification(region='us-west-2',
max_wait_time=train_max_wait_time,
checkpoint_config=train_checkpoint_config,
role=role_arn,
).apply(use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'))
)
create_model = sagemaker_model_op(
region=region,
@ -146,13 +147,13 @@ def mnist_classification(region='us-west-2',
model_artifact_url=training.outputs['model_artifact_url'],
network_isolation=network_isolation,
role=role_arn
).apply(use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'))
)
prediction = sagemaker_deploy_op(
region=region,
endpoint_url=endpoint_url,
model_name_1=create_model.output,
).apply(use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'))
)
batch_transform = sagemaker_batch_transform_op(
region=region,
@ -169,7 +170,7 @@ def mnist_classification(region='us-west-2',
split_type=batch_transform_split_type,
compression_type=batch_transform_compression_type,
output_location=batch_transform_ouput
).apply(use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'))
)
if __name__ == '__main__':
kfp.compiler.Compiler().compile(mnist_classification, __file__ + '.zip')

View File

@ -2,100 +2,17 @@
An example pipeline with only [train component](https://github.com/kubeflow/pipelines/tree/master/components/aws/sagemaker/train).
# Prerequisites
1. Install Kubeflow on an EKS cluster in AWS. https://www.kubeflow.org/docs/aws/deploy/install-kubeflow/
2. Get and store data in S3 buckets. You can get sample data using this code.
Create a new file `s3_sample_data_creator.py` with following content :
```buildoutcfg
import io
import boto3
import pickle, gzip, numpy, urllib.request, json
from urllib.parse import urlparse
from sagemaker.amazon.common import write_numpy_to_dense_tensor
###########################################################################################
# This is the only thing that you need to change in this code
# Give the name of your S3 bucket
# To use the example input below give a bucket name which is in us-east-1 region
bucket = '<bucket-name>'
## Prerequisites
###########################################################################################
# Load the dataset
urllib.request.urlretrieve("http://deeplearning.net/data/mnist/mnist.pkl.gz", "mnist.pkl.gz")
with gzip.open('mnist.pkl.gz', 'rb') as f:
train_set, valid_set, test_set = pickle.load(f, encoding='latin1')
Make sure you have the setup explained in this [README.md](https://github.com/kubeflow/pipelines/blob/master/samples/contrib/aws-samples/README.md)
# Upload dataset to S3
data_key = 'mnist_kmeans_example/data'
data_location = 's3://{}/{}'.format(bucket, data_key)
print('Data will be uploaded to: {}'.format(data_location))
# Convert the training data into the format required by the SageMaker KMeans algorithm
buf = io.BytesIO()
write_numpy_to_dense_tensor(buf, train_set[0], train_set[1])
buf.seek(0)
boto3.resource('s3').Bucket(bucket).Object(data_key).upload_fileobj(buf)
```
Run this file `python s3_sample_data_creator.py`
3. Prepare an IAM role with permissions to run SageMaker jobs and access to S3 buckets.
create a new file "trust.json" with following content
```buildoutcfg
{
"Version": "2012-10-17",
"Statement": [
{
"Sid": "",
"Effect": "Allow",
"Principal": {
"Service": "sagemaker.amazonaws.com"
},
"Action": "sts:AssumeRole"
}
]
}
```
```buildoutcfg
# run these commands to create a role named "SageMakerExecutorKFP" with SageMaker and S3 access
aws iam create-role --role-name SageMakerExecutorKFP --assume-role-policy-document file://trust.json
aws iam attach-role-policy --policy-arn arn:aws:iam::aws:policy/AmazonSageMakerFullAccess --role-name SageMakerExecutorKFP
aws iam attach-role-policy --policy-arn arn:aws:iam::aws:policy/AmazonS3FullAccess --role-name SageMakerExecutorKFP
# Note down the role ARN
aws iam get-role --role-name SageMakerExecutorKFP # | jq .Role.Arn
```
4. Add 'aws-secret' to your Kubeflow namespace.
```
# 1. get aws key and secret in base64 format:
echo -n "<AWS_ACCESS_KEY_ID>" | base64
echo -n "<AWS_SECRET_ACCESS_KEY>" | base64
# 2. Create new file secret.yaml with following content
apiVersion: v1
kind: Secret
metadata:
name: aws-secret
namespace: kubeflow
type: Opaque
data:
AWS_ACCESS_KEY_ID: <base64_AWS_ACCESS_KEY_ID>
AWS_SECRET_ACCESS_KEY: <base64_AWS_SECRET_ACCESS_KEY>
# 3. Now apply to the cluster's kubeflow namespace:
kubectl -n kubeflow apply -f secret.yaml
```
5. Compile the pipeline:
## Steps
1. Compile the pipeline:
`dsl-compile --py training-pipeline.py --output training-pipeline.tar.gz`
6. In the Kubeflow UI, upload this compiled pipeline specification (the .tar.gz file) and click on create run.
7. Once the pipeline completes, you can see the outputs under 'Output parameters' in the HPO component's Input/Output section.
2. In the Kubeflow UI, upload this compiled pipeline specification (the .tar.gz file) and click on create run.
3. Once the pipeline completes, you can see the outputs under 'Output parameters' in the HPO component's Input/Output section.
Example inputs to this pipeline :
```buildoutcfg
@ -111,7 +28,7 @@ channels : In this JSON, along with other parameters you need to pass the S3 Uri
"ChannelName": "train",
"DataSource": {
"S3DataSource": {
"S3Uri": "s3://<your_bucket_name>/mnist_kmeans_example/data",
"S3Uri": "s3://<your_bucket_name>/mnist_kmeans_example/train_data",
"S3DataType": "S3Prefix",
"S3DataDistributionType": "FullyReplicated"
}
@ -123,7 +40,7 @@ channels : In this JSON, along with other parameters you need to pass the S3 Uri
}
]
instance_type : ml.p2.xlarge
instance_type : ml.m5.2xlarge
instance_count : 1
volume_size : 50
max_run_time : 3600

View File

@ -1,5 +1,8 @@
#!/usr/bin/env python3
# Uncomment the apply(use_aws_secret()) below if you are not using OIDC
# more info : https://github.com/kubeflow/pipelines/tree/master/samples/contrib/aws-samples/README.md
import kfp
import json
import copy
@ -41,7 +44,7 @@ def training(
training_input_mode='File',
hyperparameters={"k": "10", "feature_dim": "784"},
channels=channelObjList,
instance_type='ml.p2.xlarge',
instance_type='ml.m5.2xlarge',
instance_count=1,
volume_size=50,
max_run_time=3600,
@ -73,7 +76,8 @@ def training(
max_wait_time=max_wait_time,
checkpoint_config=checkpoint_config,
role=role,
).apply(use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'))
)#.apply(use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'))
if __name__ == '__main__':
kfp.compiler.Compiler().compile(training, __file__ + '.zip')