syntax = "proto3"; package ml_pipelines; import "google/protobuf/any.proto"; import "google/protobuf/struct.proto"; // The spec of a pipeline job. message PipelineJob { string name = 1; // Name of the job. // User friendly display name string display_name = 2; reserved 3, 4, 5, 6; // Definition of the pipeline that is being executed. google.protobuf.Struct pipeline_spec = 7; reserved 8, 9, 10; // The labels with user-defined metadata to organize PipelineJob. map labels = 11; // The runtime config of a PipelineJob. message RuntimeConfig { // The runtime parameters of the PipelineJob. The parameters will be // passed into [PipelineJob.pipeline_spec][] to replace the placeholders // at runtime. map parameters = 1; // A path in a Cloud Storage bucket which will be treated as the root // output directory of the pipeline. It is used by the system to // generate the paths of output artifacts. // This is a GCP-specific optimization. string gcs_output_directory = 2; } // Runtime config of the pipeline. RuntimeConfig runtime_config = 12; } // The spec of a pipeline. message PipelineSpec { // The metadata of the pipeline. PipelineInfo pipeline_info = 1; // A list of pipeline tasks, which form a DAG. // Deprecated, use [PipelineSpec.root][] instead. repeated PipelineTaskSpec tasks = 2 [deprecated = true]; // The deployment config of the pipeline. // The deployment config can be extended to provide platform specific configs. // The supported config is [PipelineDeploymentConifg](). // Deprecated in favor of deployment_spec. google.protobuf.Any deployment_config = 3 [deprecated = true]; // The deployment config of the pipeline. // The deployment config can be extended to provide platform specific configs. google.protobuf.Struct deployment_spec = 7; // The version of the sdk, which compiles the spec. string sdk_version = 4; // The version of the schema. string schema_version = 5; // The definition of the runtime parameter. message RuntimeParameter { // Required field. The type of the runtime parameter. PrimitiveType.PrimitiveTypeEnum type = 1; // Optional field. Default value of the runtime parameter. If not set and // the runtime parameter value is not provided during runtime, an error will // be raised. Value default_value = 2; } // The runtime parameters of the pipeline. Keyed by parameter name. // Deprecated, instead of defining the runtime_parameters, user can define // [ComponentSpec.input_definitions][] field of the [PipelineSpec.root][] to // define the pipeline parameters. map runtime_parameters = 6 [deprecated = true]; // The map of name to definition of all components used in this pipeline. map components = 8; // The definition of the main pipeline. Execution of the pipeline is // completed upon the completion of this component. ComponentSpec root = 9; } // Definition of a component. message ComponentSpec { // Definition of the input parameters and artifacts of the component. ComponentInputsSpec input_definitions = 1; // Definition of the output parameters and artifacts of the component. ComponentOutputsSpec output_definitions = 2; // Either a DAG or a single execution. oneof implementation { DagSpec dag = 3; string executor_label = 4; } } // A DAG contains multiple tasks. message DagSpec { // The tasks inside the dag. map tasks = 1; // Defines how the outputs of the dag are linked to the sub tasks. DagOutputsSpec outputs = 2; } // Definition of the output artifacts and parameters of the DAG component. message DagOutputsSpec { // Selects a defined output artifact from a sub task of the DAG. message ArtifactSelectorSpec { // The name of the sub task which produces the output that matches with // the `output_artifact_key`. string producer_subtask = 1; // The key of [ComponentOutputsSpec.artifacts][] map of the producer task. string output_artifact_key = 2; } // Selects a list of output artifacts that will be aggregated to the single // output artifact channel of the DAG. message DagOutputArtifactSpec { // The selected artifacts will be aggregated as output as a single // output channel of the DAG. repeated ArtifactSelectorSpec artifact_selectors = 1; } // Name to the output artifact channel of the DAG. map artifacts = 1; // Selects a defined output parameter from a sub task of the DAG. message ParameterSelectorSpec { // The name of the sub task which produces the output that matches with // the `output_parameter_key`. string producer_subtask = 1; // The key of [ComponentOutputsSpec.parameters][] map of the producer task. string output_parameter_key = 2; } // Aggregate output parameters from sub tasks into a list object. message ParameterSelectorsSpec { repeated ParameterSelectorSpec parameter_selectors = 1; } // Aggregates output parameters from sub tasks into a map object. message MapParameterSelectorsSpec { map mapped_parameters = 2; } // We support four ways to fan-in output parameters from sub tasks to the DAG // parent task. // 1. Directly expose a single output parameter from a sub task, // 2. (Conditional flow) Expose a list of output from multiple tasks // (some might be skipped) but allows only one of the output being generated. // 3. Expose a list of outputs from multiple tasks (e.g. iterator flow). // 4. Expose the aggregation of output parameters as a name-value map. message DagOutputParameterSpec { oneof kind { // Returns the sub-task parameter as a DAG parameter. The selected // parameter must have the same type as the DAG parameter type. ParameterSelectorSpec value_from_parameter = 1; // Returns one of the sub-task parameters as a DAG parameter. If there are // multiple values are available to select, the DAG will fail. All the // selected parameters must have the same type as the DAG parameter type. ParameterSelectorsSpec value_from_oneof = 2; } } // The name to the output parameter. map parameters = 2; } // Definition specification of the component input parameters and artifacts. message ComponentInputsSpec { // Definition of an artifact input. message ArtifactSpec { ArtifactTypeSchema artifact_type = 1; } // Definition of a parameter input. message ParameterSpec { PrimitiveType.PrimitiveTypeEnum type = 1; } // Name to artifact input. map artifacts = 1; // Name to parameter input. map parameters = 2; } // Definition specification of the component output parameters and artifacts. message ComponentOutputsSpec { // Definition of an artifact output. message ArtifactSpec { ArtifactTypeSchema artifact_type = 1; map properties = 2; map custom_properties = 3; } // Definition of a parameter output. message ParameterSpec { PrimitiveType.PrimitiveTypeEnum type = 1; } // Name to artifact output. map artifacts = 1; // Name to parameter output. map parameters = 2; } // The spec of task inputs. message TaskInputsSpec { // The specification of a task input artifact. message InputArtifactSpec { // The name of the upstream task which produces the output that matches with // the `output_artifact_key`. // Deprecated, use // [TaskInputSpec.InputArtifactSpec.TaskOutputArtifactSpec][] instead. string producer_task = 1 [deprecated = true]; // The key of [TaskOutputsSpec.artifacts][] map of the producer task. // Deprecated, use // [TaskInputSpec.InputArtifactSpec.TaskOutputArtifactSpec][] instead. string output_artifact_key = 2 [deprecated = true]; message TaskOutputArtifactSpec { // The name of the upstream task which produces the output that matches // with the `output_artifact_key`. string producer_task = 1; // The key of [TaskOutputsSpec.artifacts][] map of the producer task. string output_artifact_key = 2; } oneof kind { // Pass the input artifact from another task within the same parent // component. TaskOutputArtifactSpec task_output_artifact = 3; // Pass the input artifact from parent component input artifact. string component_input_artifact = 4; } } // Represents an input parameter. The value can be taken from an upstream // task's output parameter (if specifying `producer_task` and // `output_parameter_key`, or it can be a runtime value, which can either be // determined at compile-time, or from a pipeline parameter. message InputParameterSpec { // Represents an upstream task's output parameter. message TaskOutputParameterSpec { // The name of the upstream task which produces the output parameter that // matches with the `output_parameter_key`. string producer_task = 1; // The key of [TaskOutputsSpec.parameters][] map of the producer task. string output_parameter_key = 2; } oneof kind { // Output parameter from an upstream task. TaskOutputParameterSpec task_output_parameter = 1; // A constant value or runtime parameter. ValueOrRuntimeParameter runtime_value = 2; // Pass the input parameter from parent component input parameter. string component_input_parameter = 3; } } // A map of input parameters which are small values, stored by the system and // can be queriable. map parameters = 1; // A map of input artifacts. map artifacts = 2; } // The spec of task outputs. message TaskOutputsSpec { // The specification of a task output artifact. message OutputArtifactSpec { // The type of the artifact. ArtifactTypeSchema artifact_type = 1; // The properties of the artifact, which are determined either at // compile-time, or at pipeline submission time through runtime parameters map properties = 2; // The custom properties of the artifact, which are determined either at // compile-time, or at pipeline submission time through runtime parameters map custom_properties = 3; } // Specification for output parameters produced by the task. message OutputParameterSpec { // Required field. The type of the output parameter. PrimitiveType.PrimitiveTypeEnum type = 1; } // A map of output parameters which are small values, stored by the system and // can be queriable. The output key is used // by [TaskInputsSpec.InputParameterSpec][] of the downstream task to specify // the data dependency. The same key will also be used by // [ExecutorInput.Inputs][] to reference the output parameter. map parameters = 1; // A map of output artifacts. Keyed by output key. The output key is used // by [TaskInputsSpec.InputArtifactSpec][] of the downstream task to specify // the data dependency. The same key will also be used by // [ExecutorInput.Inputs][] to reference the output artifact. map artifacts = 2; } // Represent primitive types. The wrapper is needed to give a namespace of // enum value so we don't need add `PRIMITIVE_TYPE_` prefix of each enum value. message PrimitiveType { // The primitive types. enum PrimitiveTypeEnum { PRIMITIVE_TYPE_UNSPECIFIED = 0; INT = 1; DOUBLE = 2; STRING = 3; } } // The spec of a pipeline task. message PipelineTaskSpec { // Basic info of a pipeline task. PipelineTaskInfo task_info = 1; // Specification for task inputs which contains parameters and artifacts. TaskInputsSpec inputs = 2; // Specification for task outputs. // Deprecated, the output definition is moved to [ComponentSpec.outputs][]. TaskOutputsSpec outputs = 3 [deprecated = true]; // Label for the executor of the task. // The specification will be specified in the deployment config. // For example: // ``` // tasks: // - task_info: // name: trainer // executor_label: trainer // deployment_config: // @type: cloud.ml.pipelines.v1alpha3.proto.PipelineDeploymentConfig // executors: // trainer: // container: // image: gcr.io/tfx:latest // args: [] // ``` // Deprecated, the executor_label is moved to // [ComponentSpec.executor_label][]. string executor_label = 4 [deprecated = true]; // A list of names of upstream tasks that do not provide input // artifacts for this task, but nonetheless whose completion this task depends // on. repeated string dependent_tasks = 5; message CachingOptions { // Whether or not to enable cache for this task. Defaults to false. bool enable_cache = 1; } CachingOptions caching_options = 6; // Reference to a component. Use this field to define either a DAG or an // executor. ComponentRef component_ref = 7; } message ComponentRef { // The name of a component. Refer to the key of the // [PipelineSpec.components][] map. string name = 1; } // Basic info of a pipeline. message PipelineInfo { // Required field. The name of the pipeline. // The name will be used to create or find pipeline context in MLMD. string name = 1; } // The definition of a artifact type in MLMD. message ArtifactTypeSchema { oneof kind { // The name of the type. The format of the title must be: // `..<version>`. // Examples: // - `aiplatform.Model.v1` // - `acme.CustomModel.v2` // When this field is set, the type must be pre-registered in the MLMD // store. string schema_title = 1; // Points to a YAML file stored on Google Cloud Storage describing the // format. string schema_uri = 2; // Contains a raw YAML string, describing the format of // the properties of the type. string instance_schema = 3; } } // The basic info of a task. message PipelineTaskInfo { // The unique name of the task within the pipeline definition. This name // will be used in downstream tasks to indicate task and data dependencies. string name = 1; } // Definition for a value or reference to a runtime parameter. A // ValueOrRuntimeParameter instance can be either a field value that is // determined during compilation time, or a runtime parameter which will be // determined during runtime. message ValueOrRuntimeParameter { oneof value { // Constant value which is determined in compile time. Value constant_value = 1; // Name of the runtime parameter. string runtime_parameter = 2; } } // The definition of the deployment config of the pipeline. It contains the // the platform specific executor configs for KFP OSS. message PipelineDeploymentConfig { // The specification on a container invocation. // The string fields of the message support string based placeholder contract // defined in [ExecutorInput](). The output of the container follows the // contract of [ExecutorOutput](). message PipelineContainerSpec { // The image uri of the container. string image = 1; // The main entrypoint commands of the container to run. If not provided, // fallback to use the entry point command defined in the container image. repeated string command = 2; // The arguments to pass into the main entrypoint of the container. repeated string args = 3; // The lifecycle hooks of the container. // Each hook follows the same I/O contract as the main container entrypoint. // See [ExecutorInput]() and [ExecutorOutput]() for details. // (-- TODO(b/165323565): add more documentation on caching and lifecycle // hooks. --) message Lifecycle { // The command and args to execute a program. message Exec { // The command of the exec program. repeated string command = 2; // The args of the exec program. repeated string args = 3; } // This hook is invoked before caching check. It can change the properties // of the execution and output artifacts before they are used to compute // the cache key. The updated metadata will be passed into the main // container entrypoint. Exec pre_cache_check = 1; } // The lifecycle hooks of the container executor. Lifecycle lifecycle = 4; // The specification on the resource requirements of a container execution. // This can include specification of vCPU, memory requirements, as well as // accelerator types and counts. message ResourceSpec { // The limit of the number of vCPU cores. This container execution needs // at most cpu_limit vCPU to run. double cpu_limit = 1; // The memory limit in GB. This container execution needs at most // memory_limit RAM to run. double memory_limit = 2; // The specification on the accelerators being attached to this container. message AcceleratorConfig { // The type of accelerators. string type = 1; // The number of accelerators. int64 count = 2; } AcceleratorConfig accelerator = 3; } ResourceSpec resources = 5; } // The specification to import or reimport a new artifact to the pipeline. message ImporterSpec { // The URI of the artifact. ValueOrRuntimeParameter artifact_uri = 1; // The type of the artifact. ArtifactTypeSchema type_schema = 2; // The properties of the artifact. map<string, ValueOrRuntimeParameter> properties = 3; // The custom properties of the artifact. map<string, ValueOrRuntimeParameter> custom_properties = 4; // Whether or not import an artifact regardless it has been imported before. bool reimport = 5; } // ResolverSpec resolves artifacts from historical metadata and returns them // to the pipeline as output artifacts of the resolver task. The downstream // tasks can consume them as their input artifacts. message ResolverSpec { // The query to fetch artifacts. message ArtifactQuerySpec { // The filter of the artifact query. The supported syntax are: // - `contexts.name='<context name>'` // - `artifact_type='<artifact type name>'` // - `uri='<uri>'` // - `state=<state>` // - `properties['key']='value'` // - `custom_properties['key']='value'` // - `name='value'` // - `and` to combine two conditions and returns when both are true. // If no `contexts.name` filter is set, the query will be scoped to the // the current pipeline context. string filter = 1; // The maximum number of the artifacts to be returned from the // query. If not defined, the default limit is `1`. int32 limit = 2; } // A list of resolver output definitions. The // key of the map must be exactly the same as // the keys in the [TaskOutputsSpec.artifacts][] map. // At least one output must be defined. map<string, ArtifactQuerySpec> output_artifact_queries = 1; } message AIPlatformCustomJobSpec { // API Specification for invoking a Google Cloud AI Platform CustomJob. // The fields must match the field names and structures of CustomJob // defined in // https://cloud.google.com/ai-platform-unified/docs/reference/rest/v1beta1/projects.locations.customJobs. // The field types must be either the same, or be a string containing the // string based placeholder contract defined in [ExecutorInput](). The // placeholders will be replaced with the actual value during the runtime // before the job is launched. google.protobuf.Struct custom_job = 1; } // The specification of the executor. message ExecutorSpec { oneof spec { // Starts a container. PipelineContainerSpec container = 1; // Import an artifact. ImporterSpec importer = 2; // Resolves an existing artifact. ResolverSpec resolver = 3; // Starts a Google Cloud AI Platform CustomJob. AIPlatformCustomJobSpec custom_job = 4; } } // Map from executor label to executor spec. map<string, ExecutorSpec> executors = 1; } // Value is the value of the field. message Value { oneof value { // An integer value int64 int_value = 1; // A double value double double_value = 2; // A string value string string_value = 3; } } // The definition of a runtime artifact. message RuntimeArtifact { // The name of an artifact. string name = 1; // The type of the artifact. ArtifactTypeSchema type = 2; // The URI of the artifact. string uri = 3; // The properties of the artifact. map<string, Value> properties = 4; // The custom properties of the artifact. map<string, Value> custom_properties = 5; } // Message that represents a list of artifacts. message ArtifactList { // A list of artifacts. repeated RuntimeArtifact artifacts = 1; } // The input of an executor, which includes all the data that // can be passed into the executor spec by a string based placeholder. // // The string based placeholder uses a JSON path to reference to the data // in the [ExecutionInput](). // // `{{$}}`: prints the full [ExecutorInput]() as a JSON string. // `{{$.inputs.artifacts['<name>'].uri}}`: prints the URI of an input // artifact. // `{{$.inputs.artifacts['<name>'].properties['<property name>']}}`: prints // the // property of an input artifact. // `{{$.inputs.parameters['<name>']}}`: prints the value of an input // parameter. // `{{$.outputs.artifacts['<name>'].uri}}: prints the URI of an output artifact. // `{{$.outputs.artifacts['<name>'].properties['<property name>']}}`: prints the // property of an output artifact. // `{{$.outputs.parameters['<name>'].output_file}}`: prints a file path which // points to a file and container can write to it to return the value of the // parameter.. // `{{$.outputs.output_file}}`: prints a file path of the output metadata file // which is used to send output metadata from executor to orchestrator. The // contract of the output metadata is [ExecutorOutput](). When both parameter // output file and executor output metadata files are set by the container, the // output metadata file will have higher precedence to set output parameters. message ExecutorInput { // The runtime inputs data of the execution. message Inputs { // Input parameters of the execution. map<string, Value> parameters = 1; // Input artifacts of the execution. map<string, ArtifactList> artifacts = 2; } // The runtime input artifacts of the task invocation. Inputs inputs = 1; // The runtime output parameter. message OutputParameter { // The file path which is used by the executor to pass the parameter value // to the system. string output_file = 1; } // The runtime outputs data of the execution. message Outputs { // The runtime output parameters. map<string, OutputParameter> parameters = 1; // The runtime output artifacts. map<string, ArtifactList> artifacts = 2; // The file path of the full output metadata JSON. The schema of the output // file is [ExecutorOutput][]. // // When the full output metadata file is set by the container, the output // parameter files will be ignored. string output_file = 3; } // The runtime output artifacts of the task invocation. Outputs outputs = 2; } // The schema of the output metadata of an execution. It will be used to parse // the output metadata file. message ExecutorOutput { // The values for output parameters. map<string, Value> parameters = 1; // The updated metadata for output artifact. map<string, ArtifactList> artifacts = 2; }