feat(components): New sagemaker training job parameters (#8538)
* unit tests * feature: generated new sagemaker features * update unit test * remove unit tests * Release: Staging component for release * reformatted files
This commit is contained in:
parent
acd3113454
commit
6a6cfdbafb
|
|
@ -4,6 +4,11 @@ The version of the AWS SageMaker Components is determined by the docker image ta
|
|||
Repository: [Public ECR](https://gallery.ecr.aws/kubeflow-on-aws/aws-sagemaker-kfp-components) or [Dockerhub](https://hub.docker.com/repository/docker/amazon/aws-sagemaker-kfp-components). New releases after v1.1.1 will be using the public ECR repository
|
||||
|
||||
---------------------------------------------
|
||||
**Change log for version 2.1.0**
|
||||
- Adds support for Managed Warm Pool clusters, Instance Groups, Retry Strategy in the Training Job component.
|
||||
|
||||
> Pull request : [#8538](https://github.com/kubeflow/pipelines/pull/8538)
|
||||
|
||||
**Change log for version 2.0.0**
|
||||
- Makes SageMaker TrainingJob component version 2 GA. This release only includes the [TrainingJob component](./TrainingJob/). Open sources version 2 component code.
|
||||
- Errors out when component recieves Invalid Parameter error.
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
** Amazon SageMaker Components for Kubeflow Pipelines; version 2.0.0 --
|
||||
** Amazon SageMaker Components for Kubeflow Pipelines; version 2.1.0 --
|
||||
https://github.com/kubeflow/pipelines/tree/master/components/aws/sagemaker
|
||||
Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
||||
** pathlib2; version 2.3.5 --
|
||||
|
|
|
|||
|
|
@ -98,11 +98,17 @@ inputs:
|
|||
default: '{}',
|
||||
description: "The resources, including the ML compute instances and ML storage volumes, to use for model training.",
|
||||
}
|
||||
- {
|
||||
name: retry_strategy,
|
||||
type: JsonObject,
|
||||
default: '{}',
|
||||
description: "The number of times to retry the job when the job fails due to an InternalServerError.",
|
||||
}
|
||||
- {
|
||||
name: role_arn,
|
||||
type: String,
|
||||
default: '',
|
||||
description: "The Amazon Resource Name (ARN) of an IAM role that Amazon SageMaker can assume to perform tasks on your behalf.",
|
||||
description: "The Amazon Resource Name (ARN) of an IAM role that SageMaker can assume to perform tasks on your behalf.",
|
||||
}
|
||||
- {
|
||||
name: stopping_condition,
|
||||
|
|
@ -169,6 +175,11 @@ outputs:
|
|||
type: JsonArray,
|
||||
description: "Evaluation status of Debugger rules for profiling on a training job.",
|
||||
}
|
||||
- {
|
||||
name: profiling_status,
|
||||
type: String,
|
||||
description: "Profiling status of a training job.",
|
||||
}
|
||||
- {
|
||||
name: secondary_status,
|
||||
type: String,
|
||||
|
|
@ -179,11 +190,16 @@ outputs:
|
|||
type: String,
|
||||
description: "The status of the training job.",
|
||||
}
|
||||
- {
|
||||
name: warm_pool_status,
|
||||
type: JsonObject,
|
||||
description: "The status of the warm pool associated with the training job.",
|
||||
}
|
||||
###########################GENERATED SECTION ABOVE############################
|
||||
|
||||
implementation:
|
||||
container:
|
||||
image: public.ecr.aws/kubeflow-on-aws/aws-sagemaker-kfp-components:2.0.0
|
||||
image: public.ecr.aws/kubeflow-on-aws/aws-sagemaker-kfp-components:2.1.0
|
||||
command: [python3]
|
||||
args:
|
||||
- TrainingJob/src/TrainingJob_component.py
|
||||
|
|
@ -220,6 +236,8 @@ implementation:
|
|||
- { inputValue: profiler_rule_configurations }
|
||||
- --resource_config
|
||||
- { inputValue: resource_config }
|
||||
- --retry_strategy
|
||||
- { inputValue: retry_strategy }
|
||||
- --role_arn
|
||||
- { inputValue: role_arn }
|
||||
- --stopping_condition
|
||||
|
|
|
|||
|
|
@ -216,6 +216,11 @@ class SageMakerTrainingJobComponent(SageMakerComponent):
|
|||
if "profilerRuleEvaluationStatuses" in ack_statuses
|
||||
else None
|
||||
)
|
||||
outputs.profiling_status = str(
|
||||
ack_statuses["profilingStatus"]
|
||||
if "profilingStatus" in ack_statuses
|
||||
else None
|
||||
)
|
||||
outputs.secondary_status = str(
|
||||
ack_statuses["secondaryStatus"]
|
||||
if "secondaryStatus" in ack_statuses
|
||||
|
|
@ -226,6 +231,9 @@ class SageMakerTrainingJobComponent(SageMakerComponent):
|
|||
if "trainingJobStatus" in ack_statuses
|
||||
else None
|
||||
)
|
||||
outputs.warm_pool_status = str(
|
||||
ack_statuses["warmPoolStatus"] if "warmPoolStatus" in ack_statuses else None
|
||||
)
|
||||
############GENERATED SECTION ABOVE############
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ spec:
|
|||
profilerConfig:
|
||||
profilerRuleConfigurations:
|
||||
resourceConfig:
|
||||
retryStrategy:
|
||||
roleARN:
|
||||
stoppingCondition:
|
||||
tags:
|
||||
|
|
|
|||
|
|
@ -49,6 +49,7 @@ class SageMakerTrainingJobInputs(SageMakerComponentCommonInputs):
|
|||
profiler_config: Input
|
||||
profiler_rule_configurations: Input
|
||||
resource_config: Input
|
||||
retry_strategy: Input
|
||||
role_arn: Input
|
||||
stopping_condition: Input
|
||||
tags: Input
|
||||
|
|
@ -67,8 +68,10 @@ class SageMakerTrainingJobOutputs(SageMakerComponentBaseOutputs):
|
|||
failure_reason: Output
|
||||
model_artifacts: Output
|
||||
profiler_rule_evaluation_statuses: Output
|
||||
profiling_status: Output
|
||||
secondary_status: Output
|
||||
training_job_status: Output
|
||||
warm_pool_status: Output
|
||||
|
||||
|
||||
class SageMakerTrainingJobSpec(
|
||||
|
|
@ -132,7 +135,7 @@ class SageMakerTrainingJobSpec(
|
|||
),
|
||||
output_data_config=InputValidator(
|
||||
input_type=SpecInputParsers.yaml_or_json_dict,
|
||||
description="Specifies the path to the S3 location where you want to store model artifacts. Amazon SageMaker crea",
|
||||
description="Specifies the path to the S3 location where you want to store model artifacts. SageMaker creates sub",
|
||||
required=True,
|
||||
),
|
||||
profiler_config=InputValidator(
|
||||
|
|
@ -150,9 +153,14 @@ class SageMakerTrainingJobSpec(
|
|||
description="The resources, including the ML compute instances and ML storage volumes, to use for model training.",
|
||||
required=True,
|
||||
),
|
||||
retry_strategy=InputValidator(
|
||||
input_type=SpecInputParsers.yaml_or_json_dict,
|
||||
description="The number of times to retry the job when the job fails due to an InternalServerError.",
|
||||
required=False,
|
||||
),
|
||||
role_arn=InputValidator(
|
||||
input_type=str,
|
||||
description="The Amazon Resource Name (ARN) of an IAM role that Amazon SageMaker can assume to perform tasks on y",
|
||||
description="The Amazon Resource Name (ARN) of an IAM role that SageMaker can assume to perform tasks on your beh",
|
||||
required=True,
|
||||
),
|
||||
stopping_condition=InputValidator(
|
||||
|
|
@ -202,11 +210,17 @@ class SageMakerTrainingJobSpec(
|
|||
profiler_rule_evaluation_statuses=OutputValidator(
|
||||
description="Evaluation status of Debugger rules for profiling on a training job.",
|
||||
),
|
||||
profiling_status=OutputValidator(
|
||||
description="Profiling status of a training job.",
|
||||
),
|
||||
secondary_status=OutputValidator(
|
||||
description="Provides detailed information about the state of the training job. For detailed information on the s",
|
||||
),
|
||||
training_job_status=OutputValidator(
|
||||
description="The status of the training job. Amazon SageMaker provides the following training job statuses:",
|
||||
description="The status of the training job. SageMaker provides the following training job statuses: * InProg",
|
||||
),
|
||||
warm_pool_status=OutputValidator(
|
||||
description="The status of the warm pool associated with the training job.",
|
||||
),
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ ROBOMAKER_EXECUTION_ROLE_ARN=${ROBOMAKER_EXECUTION_ROLE_ARN:-""}
|
|||
|
||||
SKIP_FSX_TESTS=${SKIP_FSX_TESTS:-"false"}
|
||||
|
||||
ACK_RELEASE_VERSION=${ACK_RELEASE_VERSION:-"v0.4.3"}
|
||||
ACK_RELEASE_VERSION=${ACK_RELEASE_VERSION:-"v0.5.0"}
|
||||
HELM_EXPERIMENTAL_OCI=1
|
||||
SERVICE=sagemaker
|
||||
CHART_EXPORT_PATH=/tmp/chart
|
||||
|
|
|
|||
Loading…
Reference in New Issue