pipelines/api/v2alpha1/pipeline_spec.proto

677 lines
24 KiB
Protocol Buffer

syntax = "proto3";
package ml_pipelines;
import "google/protobuf/any.proto";
import "google/protobuf/struct.proto";
// The spec of a pipeline job.
message PipelineJob {
string name = 1; // Name of the job.
// User friendly display name
string display_name = 2;
reserved 3, 4, 5, 6;
// Definition of the pipeline that is being executed.
google.protobuf.Struct pipeline_spec = 7;
reserved 8, 9, 10;
// The labels with user-defined metadata to organize PipelineJob.
map<string, string> labels = 11;
// The runtime config of a PipelineJob.
message RuntimeConfig {
// The runtime parameters of the PipelineJob. The parameters will be
// passed into [PipelineJob.pipeline_spec][] to replace the placeholders
// at runtime.
map<string, Value> parameters = 1;
// A path in a Cloud Storage bucket which will be treated as the root
// output directory of the pipeline. It is used by the system to
// generate the paths of output artifacts.
// This is a GCP-specific optimization.
string gcs_output_directory = 2;
}
// Runtime config of the pipeline.
RuntimeConfig runtime_config = 12;
}
// The spec of a pipeline.
message PipelineSpec {
// The metadata of the pipeline.
PipelineInfo pipeline_info = 1;
// A list of pipeline tasks, which form a DAG.
// Deprecated, use [PipelineSpec.root][] instead.
repeated PipelineTaskSpec tasks = 2 [deprecated = true];
// The deployment config of the pipeline.
// The deployment config can be extended to provide platform specific configs.
// The supported config is [PipelineDeploymentConifg]().
// Deprecated in favor of deployment_spec.
google.protobuf.Any deployment_config = 3 [deprecated = true];
// The deployment config of the pipeline.
// The deployment config can be extended to provide platform specific configs.
google.protobuf.Struct deployment_spec = 7;
// The version of the sdk, which compiles the spec.
string sdk_version = 4;
// The version of the schema.
string schema_version = 5;
// The definition of the runtime parameter.
message RuntimeParameter {
// Required field. The type of the runtime parameter.
PrimitiveType.PrimitiveTypeEnum type = 1;
// Optional field. Default value of the runtime parameter. If not set and
// the runtime parameter value is not provided during runtime, an error will
// be raised.
Value default_value = 2;
}
// The runtime parameters of the pipeline. Keyed by parameter name.
// Deprecated, instead of defining the runtime_parameters, user can define
// [ComponentSpec.input_definitions][] field of the [PipelineSpec.root][] to
// define the pipeline parameters.
map<string, RuntimeParameter> runtime_parameters = 6 [deprecated = true];
// The map of name to definition of all components used in this pipeline.
map<string, ComponentSpec> components = 8;
// The definition of the main pipeline. Execution of the pipeline is
// completed upon the completion of this component.
ComponentSpec root = 9;
}
// Definition of a component.
message ComponentSpec {
// Definition of the input parameters and artifacts of the component.
ComponentInputsSpec input_definitions = 1;
// Definition of the output parameters and artifacts of the component.
ComponentOutputsSpec output_definitions = 2;
// Either a DAG or a single execution.
oneof implementation {
DagSpec dag = 3;
string executor_label = 4;
}
}
// A DAG contains multiple tasks.
message DagSpec {
// The tasks inside the dag.
map<string, PipelineTaskSpec> tasks = 1;
// Defines how the outputs of the dag are linked to the sub tasks.
DagOutputsSpec outputs = 2;
}
// Definition of the output artifacts and parameters of the DAG component.
message DagOutputsSpec {
// Selects a defined output artifact from a sub task of the DAG.
message ArtifactSelectorSpec {
// The name of the sub task which produces the output that matches with
// the `output_artifact_key`.
string producer_subtask = 1;
// The key of [ComponentOutputsSpec.artifacts][] map of the producer task.
string output_artifact_key = 2;
}
// Selects a list of output artifacts that will be aggregated to the single
// output artifact channel of the DAG.
message DagOutputArtifactSpec {
// The selected artifacts will be aggregated as output as a single
// output channel of the DAG.
repeated ArtifactSelectorSpec artifact_selectors = 1;
}
// Name to the output artifact channel of the DAG.
map<string, DagOutputArtifactSpec> artifacts = 1;
// Selects a defined output parameter from a sub task of the DAG.
message ParameterSelectorSpec {
// The name of the sub task which produces the output that matches with
// the `output_parameter_key`.
string producer_subtask = 1;
// The key of [ComponentOutputsSpec.parameters][] map of the producer task.
string output_parameter_key = 2;
}
// Aggregate output parameters from sub tasks into a list object.
message ParameterSelectorsSpec {
repeated ParameterSelectorSpec parameter_selectors = 1;
}
// Aggregates output parameters from sub tasks into a map object.
message MapParameterSelectorsSpec {
map<string, ParameterSelectorSpec> mapped_parameters = 2;
}
// We support four ways to fan-in output parameters from sub tasks to the DAG
// parent task.
// 1. Directly expose a single output parameter from a sub task,
// 2. (Conditional flow) Expose a list of output from multiple tasks
// (some might be skipped) but allows only one of the output being generated.
// 3. Expose a list of outputs from multiple tasks (e.g. iterator flow).
// 4. Expose the aggregation of output parameters as a name-value map.
message DagOutputParameterSpec {
oneof kind {
// Returns the sub-task parameter as a DAG parameter. The selected
// parameter must have the same type as the DAG parameter type.
ParameterSelectorSpec value_from_parameter = 1;
// Returns one of the sub-task parameters as a DAG parameter. If there are
// multiple values are available to select, the DAG will fail. All the
// selected parameters must have the same type as the DAG parameter type.
ParameterSelectorsSpec value_from_oneof = 2;
}
}
// The name to the output parameter.
map<string, DagOutputParameterSpec> parameters = 2;
}
// Definition specification of the component input parameters and artifacts.
message ComponentInputsSpec {
// Definition of an artifact input.
message ArtifactSpec {
ArtifactTypeSchema artifact_type = 1;
}
// Definition of a parameter input.
message ParameterSpec {
PrimitiveType.PrimitiveTypeEnum type = 1;
}
// Name to artifact input.
map<string, ArtifactSpec> artifacts = 1;
// Name to parameter input.
map<string, ParameterSpec> parameters = 2;
}
// Definition specification of the component output parameters and artifacts.
message ComponentOutputsSpec {
// Definition of an artifact output.
message ArtifactSpec {
ArtifactTypeSchema artifact_type = 1;
map<string, ValueOrRuntimeParameter> properties = 2;
map<string, ValueOrRuntimeParameter> custom_properties = 3;
}
// Definition of a parameter output.
message ParameterSpec {
PrimitiveType.PrimitiveTypeEnum type = 1;
}
// Name to artifact output.
map<string, ArtifactSpec> artifacts = 1;
// Name to parameter output.
map<string, ParameterSpec> parameters = 2;
}
// The spec of task inputs.
message TaskInputsSpec {
// The specification of a task input artifact.
message InputArtifactSpec {
// The name of the upstream task which produces the output that matches with
// the `output_artifact_key`.
// Deprecated, use
// [TaskInputSpec.InputArtifactSpec.TaskOutputArtifactSpec][] instead.
string producer_task = 1 [deprecated = true];
// The key of [TaskOutputsSpec.artifacts][] map of the producer task.
// Deprecated, use
// [TaskInputSpec.InputArtifactSpec.TaskOutputArtifactSpec][] instead.
string output_artifact_key = 2 [deprecated = true];
message TaskOutputArtifactSpec {
// The name of the upstream task which produces the output that matches
// with the `output_artifact_key`.
string producer_task = 1;
// The key of [TaskOutputsSpec.artifacts][] map of the producer task.
string output_artifact_key = 2;
}
oneof kind {
// Pass the input artifact from another task within the same parent
// component.
TaskOutputArtifactSpec task_output_artifact = 3;
// Pass the input artifact from parent component input artifact.
string component_input_artifact = 4;
}
}
// Represents an input parameter. The value can be taken from an upstream
// task's output parameter (if specifying `producer_task` and
// `output_parameter_key`, or it can be a runtime value, which can either be
// determined at compile-time, or from a pipeline parameter.
message InputParameterSpec {
// Represents an upstream task's output parameter.
message TaskOutputParameterSpec {
// The name of the upstream task which produces the output parameter that
// matches with the `output_parameter_key`.
string producer_task = 1;
// The key of [TaskOutputsSpec.parameters][] map of the producer task.
string output_parameter_key = 2;
}
oneof kind {
// Output parameter from an upstream task.
TaskOutputParameterSpec task_output_parameter = 1;
// A constant value or runtime parameter.
ValueOrRuntimeParameter runtime_value = 2;
// Pass the input parameter from parent component input parameter.
string component_input_parameter = 3;
}
}
// A map of input parameters which are small values, stored by the system and
// can be queriable.
map<string, InputParameterSpec> parameters = 1;
// A map of input artifacts.
map<string, InputArtifactSpec> artifacts = 2;
}
// The spec of task outputs.
message TaskOutputsSpec {
// The specification of a task output artifact.
message OutputArtifactSpec {
// The type of the artifact.
ArtifactTypeSchema artifact_type = 1;
// The properties of the artifact, which are determined either at
// compile-time, or at pipeline submission time through runtime parameters
map<string, ValueOrRuntimeParameter> properties = 2;
// The custom properties of the artifact, which are determined either at
// compile-time, or at pipeline submission time through runtime parameters
map<string, ValueOrRuntimeParameter> custom_properties = 3;
}
// Specification for output parameters produced by the task.
message OutputParameterSpec {
// Required field. The type of the output parameter.
PrimitiveType.PrimitiveTypeEnum type = 1;
}
// A map of output parameters which are small values, stored by the system and
// can be queriable. The output key is used
// by [TaskInputsSpec.InputParameterSpec][] of the downstream task to specify
// the data dependency. The same key will also be used by
// [ExecutorInput.Inputs][] to reference the output parameter.
map<string, OutputParameterSpec> parameters = 1;
// A map of output artifacts. Keyed by output key. The output key is used
// by [TaskInputsSpec.InputArtifactSpec][] of the downstream task to specify
// the data dependency. The same key will also be used by
// [ExecutorInput.Inputs][] to reference the output artifact.
map<string, OutputArtifactSpec> artifacts = 2;
}
// Represent primitive types. The wrapper is needed to give a namespace of
// enum value so we don't need add `PRIMITIVE_TYPE_` prefix of each enum value.
message PrimitiveType {
// The primitive types.
enum PrimitiveTypeEnum {
PRIMITIVE_TYPE_UNSPECIFIED = 0;
INT = 1;
DOUBLE = 2;
STRING = 3;
}
}
// The spec of a pipeline task.
message PipelineTaskSpec {
// Basic info of a pipeline task.
PipelineTaskInfo task_info = 1;
// Specification for task inputs which contains parameters and artifacts.
TaskInputsSpec inputs = 2;
// Specification for task outputs.
// Deprecated, the output definition is moved to [ComponentSpec.outputs][].
TaskOutputsSpec outputs = 3 [deprecated = true];
// Label for the executor of the task.
// The specification will be specified in the deployment config.
// For example:
// ```
// tasks:
// - task_info:
// name: trainer
// executor_label: trainer
// deployment_config:
// @type: cloud.ml.pipelines.v1alpha3.proto.PipelineDeploymentConfig
// executors:
// trainer:
// container:
// image: gcr.io/tfx:latest
// args: []
// ```
// Deprecated, the executor_label is moved to
// [ComponentSpec.executor_label][].
string executor_label = 4 [deprecated = true];
// A list of names of upstream tasks that do not provide input
// artifacts for this task, but nonetheless whose completion this task depends
// on.
repeated string dependent_tasks = 5;
message CachingOptions {
// Whether or not to enable cache for this task. Defaults to false.
bool enable_cache = 1;
}
CachingOptions caching_options = 6;
// Reference to a component. Use this field to define either a DAG or an
// executor.
ComponentRef component_ref = 7;
}
message ComponentRef {
// The name of a component. Refer to the key of the
// [PipelineSpec.components][] map.
string name = 1;
}
// Basic info of a pipeline.
message PipelineInfo {
// Required field. The name of the pipeline.
// The name will be used to create or find pipeline context in MLMD.
string name = 1;
}
// The definition of a artifact type in MLMD.
message ArtifactTypeSchema {
oneof kind {
// The name of the type. The format of the title must be:
// `<namespace>.<title>.<version>`.
// Examples:
// - `aiplatform.Model.v1`
// - `acme.CustomModel.v2`
// When this field is set, the type must be pre-registered in the MLMD
// store.
string schema_title = 1;
// Points to a YAML file stored on Google Cloud Storage describing the
// format.
string schema_uri = 2;
// Contains a raw YAML string, describing the format of
// the properties of the type.
string instance_schema = 3;
}
}
// The basic info of a task.
message PipelineTaskInfo {
// The unique name of the task within the pipeline definition. This name
// will be used in downstream tasks to indicate task and data dependencies.
string name = 1;
}
// Definition for a value or reference to a runtime parameter. A
// ValueOrRuntimeParameter instance can be either a field value that is
// determined during compilation time, or a runtime parameter which will be
// determined during runtime.
message ValueOrRuntimeParameter {
oneof value {
// Constant value which is determined in compile time.
Value constant_value = 1;
// Name of the runtime parameter.
string runtime_parameter = 2;
}
}
// The definition of the deployment config of the pipeline. It contains the
// the platform specific executor configs for KFP OSS.
message PipelineDeploymentConfig {
// The specification on a container invocation.
// The string fields of the message support string based placeholder contract
// defined in [ExecutorInput](). The output of the container follows the
// contract of [ExecutorOutput]().
message PipelineContainerSpec {
// The image uri of the container.
string image = 1;
// The main entrypoint commands of the container to run. If not provided,
// fallback to use the entry point command defined in the container image.
repeated string command = 2;
// The arguments to pass into the main entrypoint of the container.
repeated string args = 3;
// The lifecycle hooks of the container.
// Each hook follows the same I/O contract as the main container entrypoint.
// See [ExecutorInput]() and [ExecutorOutput]() for details.
// (-- TODO(b/165323565): add more documentation on caching and lifecycle
// hooks. --)
message Lifecycle {
// The command and args to execute a program.
message Exec {
// The command of the exec program.
repeated string command = 2;
// The args of the exec program.
repeated string args = 3;
}
// This hook is invoked before caching check. It can change the properties
// of the execution and output artifacts before they are used to compute
// the cache key. The updated metadata will be passed into the main
// container entrypoint.
Exec pre_cache_check = 1;
}
// The lifecycle hooks of the container executor.
Lifecycle lifecycle = 4;
// The specification on the resource requirements of a container execution.
// This can include specification of vCPU, memory requirements, as well as
// accelerator types and counts.
message ResourceSpec {
// The limit of the number of vCPU cores. This container execution needs
// at most cpu_limit vCPU to run.
double cpu_limit = 1;
// The memory limit in GB. This container execution needs at most
// memory_limit RAM to run.
double memory_limit = 2;
// The specification on the accelerators being attached to this container.
message AcceleratorConfig {
// The type of accelerators.
string type = 1;
// The number of accelerators.
int64 count = 2;
}
AcceleratorConfig accelerator = 3;
}
ResourceSpec resources = 5;
}
// The specification to import or reimport a new artifact to the pipeline.
message ImporterSpec {
// The URI of the artifact.
ValueOrRuntimeParameter artifact_uri = 1;
// The type of the artifact.
ArtifactTypeSchema type_schema = 2;
// The properties of the artifact.
map<string, ValueOrRuntimeParameter> properties = 3;
// The custom properties of the artifact.
map<string, ValueOrRuntimeParameter> custom_properties = 4;
// Whether or not import an artifact regardless it has been imported before.
bool reimport = 5;
}
// ResolverSpec resolves artifacts from historical metadata and returns them
// to the pipeline as output artifacts of the resolver task. The downstream
// tasks can consume them as their input artifacts.
message ResolverSpec {
// The query to fetch artifacts.
message ArtifactQuerySpec {
// The filter of the artifact query. The supported syntax are:
// - `contexts.name='<context name>'`
// - `artifact_type='<artifact type name>'`
// - `uri='<uri>'`
// - `state=<state>`
// - `properties['key']='value'`
// - `custom_properties['key']='value'`
// - `name='value'`
// - `and` to combine two conditions and returns when both are true.
// If no `contexts.name` filter is set, the query will be scoped to the
// the current pipeline context.
string filter = 1;
// The maximum number of the artifacts to be returned from the
// query. If not defined, the default limit is `1`.
int32 limit = 2;
}
// A list of resolver output definitions. The
// key of the map must be exactly the same as
// the keys in the [TaskOutputsSpec.artifacts][] map.
// At least one output must be defined.
map<string, ArtifactQuerySpec> output_artifact_queries = 1;
}
message AIPlatformCustomJobSpec {
// API Specification for invoking a Google Cloud AI Platform CustomJob.
// The fields must match the field names and structures of CustomJob
// defined in
// https://cloud.google.com/ai-platform-unified/docs/reference/rest/v1beta1/projects.locations.customJobs.
// The field types must be either the same, or be a string containing the
// string based placeholder contract defined in [ExecutorInput](). The
// placeholders will be replaced with the actual value during the runtime
// before the job is launched.
google.protobuf.Struct custom_job = 1;
}
// The specification of the executor.
message ExecutorSpec {
oneof spec {
// Starts a container.
PipelineContainerSpec container = 1;
// Import an artifact.
ImporterSpec importer = 2;
// Resolves an existing artifact.
ResolverSpec resolver = 3;
// Starts a Google Cloud AI Platform CustomJob.
AIPlatformCustomJobSpec custom_job = 4;
}
}
// Map from executor label to executor spec.
map<string, ExecutorSpec> executors = 1;
}
// Value is the value of the field.
message Value {
oneof value {
// An integer value
int64 int_value = 1;
// A double value
double double_value = 2;
// A string value
string string_value = 3;
}
}
// The definition of a runtime artifact.
message RuntimeArtifact {
// The name of an artifact.
string name = 1;
// The type of the artifact.
ArtifactTypeSchema type = 2;
// The URI of the artifact.
string uri = 3;
// The properties of the artifact.
map<string, Value> properties = 4;
// The custom properties of the artifact.
map<string, Value> custom_properties = 5;
}
// Message that represents a list of artifacts.
message ArtifactList {
// A list of artifacts.
repeated RuntimeArtifact artifacts = 1;
}
// The input of an executor, which includes all the data that
// can be passed into the executor spec by a string based placeholder.
//
// The string based placeholder uses a JSON path to reference to the data
// in the [ExecutionInput]().
//
// `{{$}}`: prints the full [ExecutorInput]() as a JSON string.
// `{{$.inputs.artifacts['<name>'].uri}}`: prints the URI of an input
// artifact.
// `{{$.inputs.artifacts['<name>'].properties['<property name>']}}`: prints
// the
// property of an input artifact.
// `{{$.inputs.parameters['<name>']}}`: prints the value of an input
// parameter.
// `{{$.outputs.artifacts['<name>'].uri}}: prints the URI of an output artifact.
// `{{$.outputs.artifacts['<name>'].properties['<property name>']}}`: prints the
// property of an output artifact.
// `{{$.outputs.parameters['<name>'].output_file}}`: prints a file path which
// points to a file and container can write to it to return the value of the
// parameter..
// `{{$.outputs.output_file}}`: prints a file path of the output metadata file
// which is used to send output metadata from executor to orchestrator. The
// contract of the output metadata is [ExecutorOutput](). When both parameter
// output file and executor output metadata files are set by the container, the
// output metadata file will have higher precedence to set output parameters.
message ExecutorInput {
// The runtime inputs data of the execution.
message Inputs {
// Input parameters of the execution.
map<string, Value> parameters = 1;
// Input artifacts of the execution.
map<string, ArtifactList> artifacts = 2;
}
// The runtime input artifacts of the task invocation.
Inputs inputs = 1;
// The runtime output parameter.
message OutputParameter {
// The file path which is used by the executor to pass the parameter value
// to the system.
string output_file = 1;
}
// The runtime outputs data of the execution.
message Outputs {
// The runtime output parameters.
map<string, OutputParameter> parameters = 1;
// The runtime output artifacts.
map<string, ArtifactList> artifacts = 2;
// The file path of the full output metadata JSON. The schema of the output
// file is [ExecutorOutput][].
//
// When the full output metadata file is set by the container, the output
// parameter files will be ignored.
string output_file = 3;
}
// The runtime output artifacts of the task invocation.
Outputs outputs = 2;
}
// The schema of the output metadata of an execution. It will be used to parse
// the output metadata file.
message ExecutorOutput {
// The values for output parameters.
map<string, Value> parameters = 1;
// The updated metadata for output artifact.
map<string, ArtifactList> artifacts = 2;
}