feat(components): New sagemaker training job parameters (#8538)

* unit tests

* feature: generated new sagemaker features

* update unit test

* remove unit tests

* Release: Staging component for release

* reformatted files
This commit is contained in:
ananth102 2022-12-12 13:32:28 -08:00 committed by GitHub
parent acd3113454
commit 6a6cfdbafb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 53 additions and 7 deletions

View File

@ -4,6 +4,11 @@ The version of the AWS SageMaker Components is determined by the docker image ta
Repository: [Public ECR](https://gallery.ecr.aws/kubeflow-on-aws/aws-sagemaker-kfp-components) or [Dockerhub](https://hub.docker.com/repository/docker/amazon/aws-sagemaker-kfp-components). New releases after v1.1.1 will be using the public ECR repository
---------------------------------------------
**Change log for version 2.1.0**
- Adds support for Managed Warm Pool clusters, Instance Groups, Retry Strategy in the Training Job component.
> Pull request : [#8538](https://github.com/kubeflow/pipelines/pull/8538)
**Change log for version 2.0.0**
- Makes SageMaker TrainingJob component version 2 GA. This release only includes the [TrainingJob component](./TrainingJob/). Open sources version 2 component code.
- Errors out when component recieves Invalid Parameter error.

View File

@ -1,4 +1,4 @@
** Amazon SageMaker Components for Kubeflow Pipelines; version 2.0.0 --
** Amazon SageMaker Components for Kubeflow Pipelines; version 2.1.0 --
https://github.com/kubeflow/pipelines/tree/master/components/aws/sagemaker
Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
** pathlib2; version 2.3.5 --

View File

@ -98,11 +98,17 @@ inputs:
default: '{}',
description: "The resources, including the ML compute instances and ML storage volumes, to use for model training.",
}
- {
name: retry_strategy,
type: JsonObject,
default: '{}',
description: "The number of times to retry the job when the job fails due to an InternalServerError.",
}
- {
name: role_arn,
type: String,
default: '',
description: "The Amazon Resource Name (ARN) of an IAM role that Amazon SageMaker can assume to perform tasks on your behalf.",
description: "The Amazon Resource Name (ARN) of an IAM role that SageMaker can assume to perform tasks on your behalf.",
}
- {
name: stopping_condition,
@ -169,6 +175,11 @@ outputs:
type: JsonArray,
description: "Evaluation status of Debugger rules for profiling on a training job.",
}
- {
name: profiling_status,
type: String,
description: "Profiling status of a training job.",
}
- {
name: secondary_status,
type: String,
@ -179,11 +190,16 @@ outputs:
type: String,
description: "The status of the training job.",
}
- {
name: warm_pool_status,
type: JsonObject,
description: "The status of the warm pool associated with the training job.",
}
###########################GENERATED SECTION ABOVE############################
implementation:
container:
image: public.ecr.aws/kubeflow-on-aws/aws-sagemaker-kfp-components:2.0.0
image: public.ecr.aws/kubeflow-on-aws/aws-sagemaker-kfp-components:2.1.0
command: [python3]
args:
- TrainingJob/src/TrainingJob_component.py
@ -220,6 +236,8 @@ implementation:
- { inputValue: profiler_rule_configurations }
- --resource_config
- { inputValue: resource_config }
- --retry_strategy
- { inputValue: retry_strategy }
- --role_arn
- { inputValue: role_arn }
- --stopping_condition

View File

@ -216,6 +216,11 @@ class SageMakerTrainingJobComponent(SageMakerComponent):
if "profilerRuleEvaluationStatuses" in ack_statuses
else None
)
outputs.profiling_status = str(
ack_statuses["profilingStatus"]
if "profilingStatus" in ack_statuses
else None
)
outputs.secondary_status = str(
ack_statuses["secondaryStatus"]
if "secondaryStatus" in ack_statuses
@ -226,6 +231,9 @@ class SageMakerTrainingJobComponent(SageMakerComponent):
if "trainingJobStatus" in ack_statuses
else None
)
outputs.warm_pool_status = str(
ack_statuses["warmPoolStatus"] if "warmPoolStatus" in ack_statuses else None
)
############GENERATED SECTION ABOVE############

View File

@ -20,6 +20,7 @@ spec:
profilerConfig:
profilerRuleConfigurations:
resourceConfig:
retryStrategy:
roleARN:
stoppingCondition:
tags:

View File

@ -49,6 +49,7 @@ class SageMakerTrainingJobInputs(SageMakerComponentCommonInputs):
profiler_config: Input
profiler_rule_configurations: Input
resource_config: Input
retry_strategy: Input
role_arn: Input
stopping_condition: Input
tags: Input
@ -67,8 +68,10 @@ class SageMakerTrainingJobOutputs(SageMakerComponentBaseOutputs):
failure_reason: Output
model_artifacts: Output
profiler_rule_evaluation_statuses: Output
profiling_status: Output
secondary_status: Output
training_job_status: Output
warm_pool_status: Output
class SageMakerTrainingJobSpec(
@ -132,7 +135,7 @@ class SageMakerTrainingJobSpec(
),
output_data_config=InputValidator(
input_type=SpecInputParsers.yaml_or_json_dict,
description="Specifies the path to the S3 location where you want to store model artifacts. Amazon SageMaker crea",
description="Specifies the path to the S3 location where you want to store model artifacts. SageMaker creates sub",
required=True,
),
profiler_config=InputValidator(
@ -150,9 +153,14 @@ class SageMakerTrainingJobSpec(
description="The resources, including the ML compute instances and ML storage volumes, to use for model training.",
required=True,
),
retry_strategy=InputValidator(
input_type=SpecInputParsers.yaml_or_json_dict,
description="The number of times to retry the job when the job fails due to an InternalServerError.",
required=False,
),
role_arn=InputValidator(
input_type=str,
description="The Amazon Resource Name (ARN) of an IAM role that Amazon SageMaker can assume to perform tasks on y",
description="The Amazon Resource Name (ARN) of an IAM role that SageMaker can assume to perform tasks on your beh",
required=True,
),
stopping_condition=InputValidator(
@ -202,11 +210,17 @@ class SageMakerTrainingJobSpec(
profiler_rule_evaluation_statuses=OutputValidator(
description="Evaluation status of Debugger rules for profiling on a training job.",
),
profiling_status=OutputValidator(
description="Profiling status of a training job.",
),
secondary_status=OutputValidator(
description="Provides detailed information about the state of the training job. For detailed information on the s",
),
training_job_status=OutputValidator(
description="The status of the training job. Amazon SageMaker provides the following training job statuses:",
description="The status of the training job. SageMaker provides the following training job statuses: * InProg",
),
warm_pool_status=OutputValidator(
description="The status of the warm pool associated with the training job.",
),
)

View File

@ -38,7 +38,7 @@ ROBOMAKER_EXECUTION_ROLE_ARN=${ROBOMAKER_EXECUTION_ROLE_ARN:-""}
SKIP_FSX_TESTS=${SKIP_FSX_TESTS:-"false"}
ACK_RELEASE_VERSION=${ACK_RELEASE_VERSION:-"v0.4.3"}
ACK_RELEASE_VERSION=${ACK_RELEASE_VERSION:-"v0.5.0"}
HELM_EXPERIMENTAL_OCI=1
SERVICE=sagemaker
CHART_EXPORT_PATH=/tmp/chart