fix(sdk): add default value for inputs (#7405)
* fix(sdk): add default value for inputs * merge conflict * release * fix sample
This commit is contained in:
parent
09ca1c2ec2
commit
04123280a4
|
|
@ -40,6 +40,7 @@ def verify(run: kfp_server_api.ApiRun, mlmd_connection_config, **kwargs):
|
|||
{
|
||||
'inputs': {
|
||||
'parameters': {
|
||||
'empty_message': '',
|
||||
'message': 'message',
|
||||
}
|
||||
},
|
||||
|
|
@ -61,8 +62,8 @@ def verify(run: kfp_server_api.ApiRun, mlmd_connection_config, **kwargs):
|
|||
'parameters': {
|
||||
'output_bool_parameter_path': True,
|
||||
'output_dict_parameter_path': {
|
||||
"A": 1,
|
||||
"B": 2
|
||||
"A": 1.0,
|
||||
"B": 2.0
|
||||
},
|
||||
'output_list_parameter_path': ["a", "b", "c"],
|
||||
'output_parameter_path': 'message'
|
||||
|
|
@ -96,7 +97,8 @@ def verify(run: kfp_server_api.ApiRun, mlmd_connection_config, **kwargs):
|
|||
"B": 2.0,
|
||||
},
|
||||
'input_list': ["a", "b", "c"],
|
||||
'message': 'message'
|
||||
'message': 'message',
|
||||
'num_steps': 100.0
|
||||
}
|
||||
},
|
||||
'name': 'train',
|
||||
|
|
|
|||
|
|
@ -83,6 +83,8 @@
|
|||
* Depends on `typing-extensions>=3.7.4,<5; python_version<"3.9"` [\#7288](https://github.com/kubeflow/pipelines/pull/7288)
|
||||
* Depends on `google-api-core>=1.31.5, >=2.3.2` [\#7377](https://github.com/kubeflow/pipelines/pull/7377)
|
||||
* Fix bug that required KFP API server for `kfp components build` command to work [\#7430](https://github.com/kubeflow/pipelines/pull/7430)
|
||||
* Pass default value for inputs and remove deprecated items in v1 [\#7405](https://github.com/kubeflow/pipelines/pull/7405)
|
||||
|
||||
|
||||
## Documentation Updates
|
||||
|
||||
|
|
|
|||
|
|
@ -13,8 +13,7 @@
|
|||
# limitations under the License.
|
||||
"""KFP DSL compiler.
|
||||
|
||||
This is an experimental implementation of KFP compiler that compiles KFP
|
||||
pipeline into Pipeline IR:
|
||||
Implementation of KFP compiler that compiles KFP pipeline into Pipeline IR:
|
||||
https://docs.google.com/document/d/1PUDuSQ8vmeKSBloli53mp7GIvzekaY7sggg6ywy35Dk/
|
||||
"""
|
||||
import collections
|
||||
|
|
|
|||
|
|
@ -727,6 +727,37 @@ class TestWriteToFileTypes(parameterized.TestCase):
|
|||
finally:
|
||||
shutil.rmtree(tmpdir)
|
||||
|
||||
def test_compile_pipeline_with_default_value(self):
|
||||
|
||||
tmpdir = tempfile.mkdtemp()
|
||||
try:
|
||||
producer_op = components.load_component_from_text("""
|
||||
name: producer
|
||||
inputs:
|
||||
- {name: location, type: String, default: 'us-central1'}
|
||||
- {name: name, type: Integer, default: 1}
|
||||
- {name: noDefault, type: String}
|
||||
implementation:
|
||||
container:
|
||||
image: gcr.io/my-project/my-image:tag
|
||||
args:
|
||||
- {inputValue: location}
|
||||
""")
|
||||
|
||||
@dsl.pipeline(name='test-pipeline')
|
||||
def simple_pipeline():
|
||||
producer = producer_op(location="1")
|
||||
|
||||
target_json_file = os.path.join(tmpdir, 'result.json')
|
||||
compiler.Compiler().compile(
|
||||
pipeline_func=simple_pipeline, package_path=target_json_file)
|
||||
|
||||
self.assertTrue(os.path.exists(target_json_file))
|
||||
with open(target_json_file, 'r') as f:
|
||||
print(f.read())
|
||||
pass
|
||||
finally:
|
||||
shutil.rmtree(tmpdir)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
|
|||
|
|
@ -339,13 +339,17 @@ def build_component_spec_for_task(
|
|||
continue
|
||||
|
||||
# skip inputs not present, as a workaround to support optional inputs.
|
||||
if input_name not in task.inputs:
|
||||
if input_name not in task.inputs and input_spec.default is None:
|
||||
continue
|
||||
|
||||
if type_utils.is_parameter_type(input_spec.type):
|
||||
component_spec.input_definitions.parameters[
|
||||
input_name].parameter_type = type_utils.get_parameter_type(
|
||||
input_spec.type)
|
||||
if input_spec.default is not None:
|
||||
component_spec.input_definitions.parameters[
|
||||
input_name].default_value.CopyFrom(_to_protobuf_value(input_spec.default))
|
||||
|
||||
else:
|
||||
component_spec.input_definitions.artifacts[
|
||||
input_name].artifact_type.CopyFrom(
|
||||
|
|
@ -503,7 +507,6 @@ def build_component_spec_for_group(
|
|||
input_name].parameter_type = type_utils.get_parameter_type(
|
||||
channel.channel_type)
|
||||
|
||||
# TODO: should we fill in default value for all groups and tasks?
|
||||
if is_root_group:
|
||||
_fill_in_component_input_default_value(
|
||||
component_spec=component_spec,
|
||||
|
|
|
|||
|
|
@ -7,6 +7,11 @@ deploymentSpec:
|
|||
exec-preprocess:
|
||||
container:
|
||||
image: python:3.7
|
||||
args:
|
||||
- --executor_input
|
||||
- '{{$}}'
|
||||
- --function_to_execute
|
||||
- preprocess
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
|
|
@ -51,14 +56,13 @@ deploymentSpec:
|
|||
\ 'w') as f:\n f.write(json.dumps(input_dict_parameter))\n\n with\
|
||||
\ open(output_list_parameter_path, 'w') as f:\n f.write(json.dumps(input_list_parameter))\n\
|
||||
\n"
|
||||
exec-train:
|
||||
container:
|
||||
args:
|
||||
- --executor_input
|
||||
- '{{$}}'
|
||||
- --function_to_execute
|
||||
- preprocess
|
||||
exec-train:
|
||||
container:
|
||||
image: python:3.7
|
||||
- train
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
|
|
@ -101,40 +105,36 @@ deploymentSpec:
|
|||
\ Model artifact, which has a .metadata dictionary\n # to store arbitrary\
|
||||
\ metadata for the output artifact.\n model.metadata['accuracy'] = 0.9\n\
|
||||
\n"
|
||||
args:
|
||||
- --executor_input
|
||||
- '{{$}}'
|
||||
- --function_to_execute
|
||||
- train
|
||||
image: python:3.7
|
||||
components:
|
||||
comp-preprocess:
|
||||
inputDefinitions:
|
||||
parameters:
|
||||
message:
|
||||
parameterType: STRING
|
||||
input_dict_parameter:
|
||||
parameterType: STRUCT
|
||||
message:
|
||||
parameterType: STRING
|
||||
input_list_parameter:
|
||||
parameterType: LIST
|
||||
outputDefinitions:
|
||||
artifacts:
|
||||
output_dataset_one:
|
||||
artifactType:
|
||||
schemaTitle: system.Dataset
|
||||
schemaVersion: 0.0.1
|
||||
output_dataset_two_path:
|
||||
artifactType:
|
||||
schemaTitle: system.Dataset
|
||||
schemaVersion: 0.0.1
|
||||
output_dataset_one:
|
||||
artifactType:
|
||||
schemaTitle: system.Dataset
|
||||
schemaVersion: 0.0.1
|
||||
parameters:
|
||||
output_parameter_path:
|
||||
parameterType: STRING
|
||||
output_bool_parameter_path:
|
||||
parameterType: BOOLEAN
|
||||
output_dict_parameter_path:
|
||||
parameterType: STRUCT
|
||||
output_list_parameter_path:
|
||||
parameterType: LIST
|
||||
output_parameter_path:
|
||||
parameterType: STRING
|
||||
output_dict_parameter_path:
|
||||
parameterType: STRUCT
|
||||
executorLabel: exec-preprocess
|
||||
comp-train:
|
||||
inputDefinitions:
|
||||
|
|
@ -148,12 +148,15 @@ components:
|
|||
schemaTitle: system.Dataset
|
||||
schemaVersion: 0.0.1
|
||||
parameters:
|
||||
message:
|
||||
parameterType: STRING
|
||||
input_bool:
|
||||
parameterType: BOOLEAN
|
||||
message:
|
||||
parameterType: STRING
|
||||
input_dict:
|
||||
parameterType: STRUCT
|
||||
num_steps:
|
||||
parameterType: NUMBER_INTEGER
|
||||
defaultValue: 100.0
|
||||
input_list:
|
||||
parameterType: LIST
|
||||
outputDefinitions:
|
||||
|
|
@ -175,30 +178,15 @@ root:
|
|||
B: 2.0
|
||||
dag:
|
||||
tasks:
|
||||
preprocess:
|
||||
taskInfo:
|
||||
name: preprocess
|
||||
inputs:
|
||||
parameters:
|
||||
message:
|
||||
componentInputParameter: message
|
||||
input_dict_parameter:
|
||||
componentInputParameter: input_dict
|
||||
input_list_parameter:
|
||||
runtimeValue:
|
||||
constant:
|
||||
- a
|
||||
- b
|
||||
- c
|
||||
cachingOptions:
|
||||
enableCache: true
|
||||
componentRef:
|
||||
name: comp-preprocess
|
||||
train:
|
||||
taskInfo:
|
||||
name: train
|
||||
inputs:
|
||||
parameters:
|
||||
input_list:
|
||||
taskOutputParameter:
|
||||
producerTask: preprocess
|
||||
outputParameterKey: output_list_parameter_path
|
||||
message:
|
||||
taskOutputParameter:
|
||||
producerTask: preprocess
|
||||
|
|
@ -211,23 +199,38 @@ root:
|
|||
taskOutputParameter:
|
||||
producerTask: preprocess
|
||||
outputParameterKey: output_dict_parameter_path
|
||||
input_list:
|
||||
taskOutputParameter:
|
||||
producerTask: preprocess
|
||||
outputParameterKey: output_list_parameter_path
|
||||
artifacts:
|
||||
dataset_one_path:
|
||||
taskOutputArtifact:
|
||||
producerTask: preprocess
|
||||
outputArtifactKey: output_dataset_one
|
||||
dataset_two:
|
||||
taskOutputArtifact:
|
||||
producerTask: preprocess
|
||||
outputArtifactKey: output_dataset_two_path
|
||||
dataset_one_path:
|
||||
taskOutputArtifact:
|
||||
producerTask: preprocess
|
||||
outputArtifactKey: output_dataset_one
|
||||
dependentTasks:
|
||||
- preprocess
|
||||
cachingOptions:
|
||||
enableCache: true
|
||||
componentRef:
|
||||
name: comp-train
|
||||
preprocess:
|
||||
taskInfo:
|
||||
name: preprocess
|
||||
inputs:
|
||||
parameters:
|
||||
input_list_parameter:
|
||||
runtimeValue:
|
||||
constant:
|
||||
- a
|
||||
- b
|
||||
- c
|
||||
message:
|
||||
componentInputParameter: message
|
||||
input_dict_parameter:
|
||||
componentInputParameter: input_dict
|
||||
cachingOptions:
|
||||
enableCache: true
|
||||
componentRef:
|
||||
name: comp-preprocess
|
||||
defaultPipelineRoot: dummy_root
|
||||
|
|
|
|||
|
|
@ -6,7 +6,6 @@ deploymentSpec:
|
|||
executors:
|
||||
exec-component-op:
|
||||
container:
|
||||
image: python:3.7
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
|
|
@ -29,6 +28,7 @@ deploymentSpec:
|
|||
\ {input1}, type: {type(input1)}')\n print(f'input2: {input2}, type:\
|
||||
\ {type(input2)}')\n print(f'input3: {input3}, type: {type(input3)}')\n\
|
||||
\n"
|
||||
image: python:3.7
|
||||
args:
|
||||
- --executor_input
|
||||
- '{{$}}'
|
||||
|
|
@ -38,10 +38,11 @@ components:
|
|||
comp-component-op:
|
||||
inputDefinitions:
|
||||
parameters:
|
||||
input1:
|
||||
parameterType: STRING
|
||||
input2:
|
||||
parameterType: STRING
|
||||
input1:
|
||||
parameterType: STRING
|
||||
defaultValue: default value
|
||||
executorLabel: exec-component-op
|
||||
root:
|
||||
dag:
|
||||
|
|
@ -51,12 +52,12 @@ root:
|
|||
name: component-op
|
||||
inputs:
|
||||
parameters:
|
||||
input1:
|
||||
runtimeValue:
|
||||
constant: Hello
|
||||
input2:
|
||||
runtimeValue:
|
||||
constant: World
|
||||
input1:
|
||||
runtimeValue:
|
||||
constant: Hello
|
||||
cachingOptions:
|
||||
enableCache: true
|
||||
componentRef:
|
||||
|
|
|
|||
|
|
@ -4,26 +4,62 @@ sdkVersion: kfp-2.0.0-alpha.1
|
|||
schemaVersion: 2.1.0
|
||||
deploymentSpec:
|
||||
executors:
|
||||
exec-chicago-taxi-trips-dataset:
|
||||
exec-xgboost-predict-2:
|
||||
container:
|
||||
image: byrnedo/alpine-curl@sha256:548379d0a4a0c08b9e55d9d87a592b7d35d9ab3037f4936f5ccd09d0b625a342
|
||||
image: python:3.7
|
||||
args:
|
||||
- --data
|
||||
- '{{$.inputs.artifacts[''data''].path}}'
|
||||
- --model
|
||||
- '{{$.inputs.artifacts[''model''].path}}'
|
||||
- --label-column-name
|
||||
- '{{$.inputs.parameters[''label_column_name'']}}'
|
||||
- --predictions
|
||||
- '{{$.outputs.artifacts[''predictions''].path}}'
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- "set -e -x -o pipefail\noutput_path=\"$0\"\nselect=\"$1\"\nwhere=\"$2\"\n\
|
||||
limit=\"$3\"\nformat=\"$4\"\nmkdir -p \"$(dirname \"$output_path\")\"\n\
|
||||
curl --get 'https://data.cityofchicago.org/resource/wrvz-psew.'\"${format}\"\
|
||||
\ \\\n --data-urlencode '$limit='\"${limit}\" \\\n --data-urlencode\
|
||||
\ '$where='\"${where}\" \\\n --data-urlencode '$select='\"${select}\"\
|
||||
\ \\\n | tr -d '\"' > \"$output_path\" # Removing unneeded quotes around\
|
||||
\ all numbers\n"
|
||||
- '{{$.outputs.artifacts[''table''].path}}'
|
||||
- '{{$.inputs.parameters[''select'']}}'
|
||||
- '{{$.inputs.parameters[''where'']}}'
|
||||
- '{{$.inputs.parameters[''limit'']}}'
|
||||
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
|
||||
'xgboost==1.1.1' 'pandas==1.0.5' 'pyarrow==0.17.1' || PIP_DISABLE_PIP_VERSION_CHECK=1
|
||||
python3 -m pip install --quiet --no-warn-script-location 'xgboost==1.1.1'
|
||||
'pandas==1.0.5' 'pyarrow==0.17.1' --user) && "$0" "$@"
|
||||
- python3
|
||||
- -u
|
||||
- -c
|
||||
- "def _make_parent_dirs_and_return_path(file_path: str):\n import os\n\
|
||||
\ os.makedirs(os.path.dirname(file_path), exist_ok=True)\n return\
|
||||
\ file_path\n\ndef xgboost_predict(\n data_path,\n model_path,\n \
|
||||
\ predictions_path,\n label_column_name = None,\n):\n '''Make predictions\
|
||||
\ using a trained XGBoost model.\n\n Args:\n data_path: Path for\
|
||||
\ the feature data in Apache Parquet format.\n model_path: Path for\
|
||||
\ the trained model in binary XGBoost format.\n predictions_path:\
|
||||
\ Output path for the predictions.\n label_column_name: Optional.\
|
||||
\ Name of the column containing the label data that is excluded during the\
|
||||
\ prediction.\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>\n\
|
||||
\ '''\n from pathlib import Path\n\n import numpy\n import pandas\n\
|
||||
\ import xgboost\n\n # Loading data\n df = pandas.read_parquet(data_path)\n\
|
||||
\ if label_column_name:\n df = df.drop(columns=[label_column_name])\n\
|
||||
\n evaluation_data = xgboost.DMatrix(\n data=df,\n )\n\n \
|
||||
\ # Training\n model = xgboost.Booster(model_file=model_path)\n\n \
|
||||
\ predictions = model.predict(evaluation_data)\n\n Path(predictions_path).parent.mkdir(parents=True,\
|
||||
\ exist_ok=True)\n numpy.savetxt(predictions_path, predictions)\n\nimport\
|
||||
\ argparse\n_parser = argparse.ArgumentParser(prog='Xgboost predict', description='Make\
|
||||
\ predictions using a trained XGBoost model.\\n\\n Args:\\n data_path:\
|
||||
\ Path for the feature data in Apache Parquet format.\\n model_path:\
|
||||
\ Path for the trained model in binary XGBoost format.\\n predictions_path:\
|
||||
\ Output path for the predictions.\\n label_column_name: Optional.\
|
||||
\ Name of the column containing the label data that is excluded during the\
|
||||
\ prediction.\\n\\n Annotations:\\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')\n\
|
||||
_parser.add_argument(\"--data\", dest=\"data_path\", type=str, required=True,\
|
||||
\ default=argparse.SUPPRESS)\n_parser.add_argument(\"--model\", dest=\"\
|
||||
model_path\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"\
|
||||
--label-column-name\", dest=\"label_column_name\", type=str, required=False,\
|
||||
\ default=argparse.SUPPRESS)\n_parser.add_argument(\"--predictions\", dest=\"\
|
||||
predictions_path\", type=_make_parent_dirs_and_return_path, required=True,\
|
||||
\ default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\
|
||||
\n_outputs = xgboost_predict(**_parsed_args)\n"
|
||||
exec-xgboost-train:
|
||||
container:
|
||||
image: python:3.7
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
|
|
@ -129,9 +165,116 @@ deploymentSpec:
|
|||
- '{{$.outputs.artifacts[''model''].path}}'
|
||||
- --model-config
|
||||
- '{{$.outputs.artifacts[''model_config''].path}}'
|
||||
exec-xgboost-predict:
|
||||
image: python:3.7
|
||||
exec-xgboost-predict-3:
|
||||
container:
|
||||
image: python:3.7
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
|
||||
'xgboost==1.1.1' 'pandas==1.0.5' 'pyarrow==0.17.1' || PIP_DISABLE_PIP_VERSION_CHECK=1
|
||||
python3 -m pip install --quiet --no-warn-script-location 'xgboost==1.1.1'
|
||||
'pandas==1.0.5' 'pyarrow==0.17.1' --user) && "$0" "$@"
|
||||
- python3
|
||||
- -u
|
||||
- -c
|
||||
- "def _make_parent_dirs_and_return_path(file_path: str):\n import os\n\
|
||||
\ os.makedirs(os.path.dirname(file_path), exist_ok=True)\n return\
|
||||
\ file_path\n\ndef xgboost_predict(\n data_path,\n model_path,\n \
|
||||
\ predictions_path,\n label_column_name = None,\n):\n '''Make predictions\
|
||||
\ using a trained XGBoost model.\n\n Args:\n data_path: Path for\
|
||||
\ the feature data in Apache Parquet format.\n model_path: Path for\
|
||||
\ the trained model in binary XGBoost format.\n predictions_path:\
|
||||
\ Output path for the predictions.\n label_column_name: Optional.\
|
||||
\ Name of the column containing the label data that is excluded during the\
|
||||
\ prediction.\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>\n\
|
||||
\ '''\n from pathlib import Path\n\n import numpy\n import pandas\n\
|
||||
\ import xgboost\n\n # Loading data\n df = pandas.read_parquet(data_path)\n\
|
||||
\ if label_column_name:\n df = df.drop(columns=[label_column_name])\n\
|
||||
\n evaluation_data = xgboost.DMatrix(\n data=df,\n )\n\n \
|
||||
\ # Training\n model = xgboost.Booster(model_file=model_path)\n\n \
|
||||
\ predictions = model.predict(evaluation_data)\n\n Path(predictions_path).parent.mkdir(parents=True,\
|
||||
\ exist_ok=True)\n numpy.savetxt(predictions_path, predictions)\n\nimport\
|
||||
\ argparse\n_parser = argparse.ArgumentParser(prog='Xgboost predict', description='Make\
|
||||
\ predictions using a trained XGBoost model.\\n\\n Args:\\n data_path:\
|
||||
\ Path for the feature data in Apache Parquet format.\\n model_path:\
|
||||
\ Path for the trained model in binary XGBoost format.\\n predictions_path:\
|
||||
\ Output path for the predictions.\\n label_column_name: Optional.\
|
||||
\ Name of the column containing the label data that is excluded during the\
|
||||
\ prediction.\\n\\n Annotations:\\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')\n\
|
||||
_parser.add_argument(\"--data\", dest=\"data_path\", type=str, required=True,\
|
||||
\ default=argparse.SUPPRESS)\n_parser.add_argument(\"--model\", dest=\"\
|
||||
model_path\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"\
|
||||
--label-column-name\", dest=\"label_column_name\", type=str, required=False,\
|
||||
\ default=argparse.SUPPRESS)\n_parser.add_argument(\"--predictions\", dest=\"\
|
||||
predictions_path\", type=_make_parent_dirs_and_return_path, required=True,\
|
||||
\ default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\
|
||||
\n_outputs = xgboost_predict(**_parsed_args)\n"
|
||||
args:
|
||||
- --data
|
||||
- '{{$.inputs.artifacts[''data''].path}}'
|
||||
- --model
|
||||
- '{{$.inputs.artifacts[''model''].path}}'
|
||||
- --label-column-name
|
||||
- '{{$.inputs.parameters[''label_column_name'']}}'
|
||||
- --predictions
|
||||
- '{{$.outputs.artifacts[''predictions''].path}}'
|
||||
exec-chicago-taxi-trips-dataset:
|
||||
container:
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- "set -e -x -o pipefail\noutput_path=\"$0\"\nselect=\"$1\"\nwhere=\"$2\"\n\
|
||||
limit=\"$3\"\nformat=\"$4\"\nmkdir -p \"$(dirname \"$output_path\")\"\n\
|
||||
curl --get 'https://data.cityofchicago.org/resource/wrvz-psew.'\"${format}\"\
|
||||
\ \\\n --data-urlencode '$limit='\"${limit}\" \\\n --data-urlencode\
|
||||
\ '$where='\"${where}\" \\\n --data-urlencode '$select='\"${select}\"\
|
||||
\ \\\n | tr -d '\"' > \"$output_path\" # Removing unneeded quotes around\
|
||||
\ all numbers\n"
|
||||
- '{{$.outputs.artifacts[''table''].path}}'
|
||||
- '{{$.inputs.parameters[''select'']}}'
|
||||
- '{{$.inputs.parameters[''where'']}}'
|
||||
- '{{$.inputs.parameters[''limit'']}}'
|
||||
image: byrnedo/alpine-curl@sha256:548379d0a4a0c08b9e55d9d87a592b7d35d9ab3037f4936f5ccd09d0b625a342
|
||||
exec-convert-csv-to-apache-parquet:
|
||||
container:
|
||||
image: python:3.7
|
||||
args:
|
||||
- --data
|
||||
- '{{$.inputs.artifacts[''data''].path}}'
|
||||
- --output-data
|
||||
- '{{$.outputs.artifacts[''output_data''].path}}'
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
|
||||
'pyarrow==0.17.1' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install
|
||||
--quiet --no-warn-script-location 'pyarrow==0.17.1' --user) && "$0" "$@"
|
||||
- python3
|
||||
- -u
|
||||
- -c
|
||||
- "def _make_parent_dirs_and_return_path(file_path: str):\n import os\n\
|
||||
\ os.makedirs(os.path.dirname(file_path), exist_ok=True)\n return\
|
||||
\ file_path\n\ndef convert_csv_to_apache_parquet(\n data_path,\n output_data_path,\n\
|
||||
):\n '''Converts CSV table to Apache Parquet.\n\n [Apache Parquet](https://parquet.apache.org/)\n\
|
||||
\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>\n\
|
||||
\ '''\n from pyarrow import csv, parquet\n\n table = csv.read_csv(data_path)\n\
|
||||
\ parquet.write_table(table, output_data_path)\n\nimport argparse\n_parser\
|
||||
\ = argparse.ArgumentParser(prog='Convert csv to apache parquet', description='Converts\
|
||||
\ CSV table to Apache Parquet.\\n\\n [Apache Parquet](https://parquet.apache.org/)\\\
|
||||
n\\n Annotations:\\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')\n\
|
||||
_parser.add_argument(\"--data\", dest=\"data_path\", type=str, required=True,\
|
||||
\ default=argparse.SUPPRESS)\n_parser.add_argument(\"--output-data\", dest=\"\
|
||||
output_data_path\", type=_make_parent_dirs_and_return_path, required=True,\
|
||||
\ default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\
|
||||
_output_files = _parsed_args.pop(\"_output_paths\", [])\n\n_outputs = convert_csv_to_apache_parquet(**_parsed_args)\n\
|
||||
\n_output_serializers = [\n\n]\n\nimport os\nfor idx, output_file in enumerate(_output_files):\n\
|
||||
\ try:\n os.makedirs(os.path.dirname(output_file))\n except\
|
||||
\ OSError:\n pass\n with open(output_file, 'w') as f:\n \
|
||||
\ f.write(_output_serializers[idx](_outputs[idx]))\n"
|
||||
exec-xgboost-predict-4:
|
||||
container:
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
|
|
@ -171,6 +314,7 @@ deploymentSpec:
|
|||
_parser.add_argument(\"--predictions\", dest=\"predictions_path\", type=_make_parent_dirs_and_return_path,\
|
||||
\ required=True, default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\
|
||||
\n_outputs = xgboost_predict(**_parsed_args)\n"
|
||||
image: python:3.7
|
||||
args:
|
||||
- --data
|
||||
- '{{$.inputs.artifacts[''data''].path}}'
|
||||
|
|
@ -180,45 +324,21 @@ deploymentSpec:
|
|||
- '{{$.inputs.parameters[''label_column'']}}'
|
||||
- --predictions
|
||||
- '{{$.outputs.artifacts[''predictions''].path}}'
|
||||
exec-convert-csv-to-apache-parquet:
|
||||
container:
|
||||
image: python:3.7
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
|
||||
'pyarrow==0.17.1' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install
|
||||
--quiet --no-warn-script-location 'pyarrow==0.17.1' --user) && "$0" "$@"
|
||||
- python3
|
||||
- -u
|
||||
- -c
|
||||
- "def _make_parent_dirs_and_return_path(file_path: str):\n import os\n\
|
||||
\ os.makedirs(os.path.dirname(file_path), exist_ok=True)\n return\
|
||||
\ file_path\n\ndef convert_csv_to_apache_parquet(\n data_path,\n output_data_path,\n\
|
||||
):\n '''Converts CSV table to Apache Parquet.\n\n [Apache Parquet](https://parquet.apache.org/)\n\
|
||||
\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>\n\
|
||||
\ '''\n from pyarrow import csv, parquet\n\n table = csv.read_csv(data_path)\n\
|
||||
\ parquet.write_table(table, output_data_path)\n\nimport argparse\n_parser\
|
||||
\ = argparse.ArgumentParser(prog='Convert csv to apache parquet', description='Converts\
|
||||
\ CSV table to Apache Parquet.\\n\\n [Apache Parquet](https://parquet.apache.org/)\\\
|
||||
n\\n Annotations:\\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')\n\
|
||||
_parser.add_argument(\"--data\", dest=\"data_path\", type=str, required=True,\
|
||||
\ default=argparse.SUPPRESS)\n_parser.add_argument(\"--output-data\", dest=\"\
|
||||
output_data_path\", type=_make_parent_dirs_and_return_path, required=True,\
|
||||
\ default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\
|
||||
_output_files = _parsed_args.pop(\"_output_paths\", [])\n\n_outputs = convert_csv_to_apache_parquet(**_parsed_args)\n\
|
||||
\n_output_serializers = [\n\n]\n\nimport os\nfor idx, output_file in enumerate(_output_files):\n\
|
||||
\ try:\n os.makedirs(os.path.dirname(output_file))\n except\
|
||||
\ OSError:\n pass\n with open(output_file, 'w') as f:\n \
|
||||
\ f.write(_output_serializers[idx](_outputs[idx]))\n"
|
||||
args:
|
||||
- --data
|
||||
- '{{$.inputs.artifacts[''data''].path}}'
|
||||
- --output-data
|
||||
- '{{$.outputs.artifacts[''output_data''].path}}'
|
||||
exec-xgboost-train-2:
|
||||
container:
|
||||
image: python:3.7
|
||||
args:
|
||||
- --training-data
|
||||
- '{{$.inputs.artifacts[''training_data''].path}}'
|
||||
- --label-column-name
|
||||
- '{{$.inputs.parameters[''label_column_name'']}}'
|
||||
- --num-iterations
|
||||
- '{{$.inputs.parameters[''num_iterations'']}}'
|
||||
- --objective
|
||||
- '{{$.inputs.parameters[''objective'']}}'
|
||||
- --model
|
||||
- '{{$.outputs.artifacts[''model''].path}}'
|
||||
- --model-config
|
||||
- '{{$.outputs.artifacts[''model_config''].path}}'
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
|
|
@ -311,130 +431,9 @@ deploymentSpec:
|
|||
, dest=\"model_config_path\", type=_make_parent_dirs_and_return_path, required=True,\
|
||||
\ default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\
|
||||
\n_outputs = xgboost_train(**_parsed_args)\n"
|
||||
args:
|
||||
- --training-data
|
||||
- '{{$.inputs.artifacts[''training_data''].path}}'
|
||||
- --label-column-name
|
||||
- '{{$.inputs.parameters[''label_column_name'']}}'
|
||||
- --num-iterations
|
||||
- '{{$.inputs.parameters[''num_iterations'']}}'
|
||||
- --objective
|
||||
- '{{$.inputs.parameters[''objective'']}}'
|
||||
- --model
|
||||
- '{{$.outputs.artifacts[''model''].path}}'
|
||||
- --model-config
|
||||
- '{{$.outputs.artifacts[''model_config''].path}}'
|
||||
exec-xgboost-predict-2:
|
||||
container:
|
||||
image: python:3.7
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
|
||||
'xgboost==1.1.1' 'pandas==1.0.5' 'pyarrow==0.17.1' || PIP_DISABLE_PIP_VERSION_CHECK=1
|
||||
python3 -m pip install --quiet --no-warn-script-location 'xgboost==1.1.1'
|
||||
'pandas==1.0.5' 'pyarrow==0.17.1' --user) && "$0" "$@"
|
||||
- python3
|
||||
- -u
|
||||
- -c
|
||||
- "def _make_parent_dirs_and_return_path(file_path: str):\n import os\n\
|
||||
\ os.makedirs(os.path.dirname(file_path), exist_ok=True)\n return\
|
||||
\ file_path\n\ndef xgboost_predict(\n data_path,\n model_path,\n \
|
||||
\ predictions_path,\n label_column_name = None,\n):\n '''Make predictions\
|
||||
\ using a trained XGBoost model.\n\n Args:\n data_path: Path for\
|
||||
\ the feature data in Apache Parquet format.\n model_path: Path for\
|
||||
\ the trained model in binary XGBoost format.\n predictions_path:\
|
||||
\ Output path for the predictions.\n label_column_name: Optional.\
|
||||
\ Name of the column containing the label data that is excluded during the\
|
||||
\ prediction.\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>\n\
|
||||
\ '''\n from pathlib import Path\n\n import numpy\n import pandas\n\
|
||||
\ import xgboost\n\n # Loading data\n df = pandas.read_parquet(data_path)\n\
|
||||
\ if label_column_name:\n df = df.drop(columns=[label_column_name])\n\
|
||||
\n evaluation_data = xgboost.DMatrix(\n data=df,\n )\n\n \
|
||||
\ # Training\n model = xgboost.Booster(model_file=model_path)\n\n \
|
||||
\ predictions = model.predict(evaluation_data)\n\n Path(predictions_path).parent.mkdir(parents=True,\
|
||||
\ exist_ok=True)\n numpy.savetxt(predictions_path, predictions)\n\nimport\
|
||||
\ argparse\n_parser = argparse.ArgumentParser(prog='Xgboost predict', description='Make\
|
||||
\ predictions using a trained XGBoost model.\\n\\n Args:\\n data_path:\
|
||||
\ Path for the feature data in Apache Parquet format.\\n model_path:\
|
||||
\ Path for the trained model in binary XGBoost format.\\n predictions_path:\
|
||||
\ Output path for the predictions.\\n label_column_name: Optional.\
|
||||
\ Name of the column containing the label data that is excluded during the\
|
||||
\ prediction.\\n\\n Annotations:\\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')\n\
|
||||
_parser.add_argument(\"--data\", dest=\"data_path\", type=str, required=True,\
|
||||
\ default=argparse.SUPPRESS)\n_parser.add_argument(\"--model\", dest=\"\
|
||||
model_path\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"\
|
||||
--label-column-name\", dest=\"label_column_name\", type=str, required=False,\
|
||||
\ default=argparse.SUPPRESS)\n_parser.add_argument(\"--predictions\", dest=\"\
|
||||
predictions_path\", type=_make_parent_dirs_and_return_path, required=True,\
|
||||
\ default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\
|
||||
\n_outputs = xgboost_predict(**_parsed_args)\n"
|
||||
args:
|
||||
- --data
|
||||
- '{{$.inputs.artifacts[''data''].path}}'
|
||||
- --model
|
||||
- '{{$.inputs.artifacts[''model''].path}}'
|
||||
- --label-column-name
|
||||
- '{{$.inputs.parameters[''label_column_name'']}}'
|
||||
- --predictions
|
||||
- '{{$.outputs.artifacts[''predictions''].path}}'
|
||||
exec-xgboost-predict-3:
|
||||
exec-xgboost-predict:
|
||||
container:
|
||||
image: python:3.7
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
|
||||
'xgboost==1.1.1' 'pandas==1.0.5' 'pyarrow==0.17.1' || PIP_DISABLE_PIP_VERSION_CHECK=1
|
||||
python3 -m pip install --quiet --no-warn-script-location 'xgboost==1.1.1'
|
||||
'pandas==1.0.5' 'pyarrow==0.17.1' --user) && "$0" "$@"
|
||||
- python3
|
||||
- -u
|
||||
- -c
|
||||
- "def _make_parent_dirs_and_return_path(file_path: str):\n import os\n\
|
||||
\ os.makedirs(os.path.dirname(file_path), exist_ok=True)\n return\
|
||||
\ file_path\n\ndef xgboost_predict(\n data_path,\n model_path,\n \
|
||||
\ predictions_path,\n label_column_name = None,\n):\n '''Make predictions\
|
||||
\ using a trained XGBoost model.\n\n Args:\n data_path: Path for\
|
||||
\ the feature data in Apache Parquet format.\n model_path: Path for\
|
||||
\ the trained model in binary XGBoost format.\n predictions_path:\
|
||||
\ Output path for the predictions.\n label_column_name: Optional.\
|
||||
\ Name of the column containing the label data that is excluded during the\
|
||||
\ prediction.\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>\n\
|
||||
\ '''\n from pathlib import Path\n\n import numpy\n import pandas\n\
|
||||
\ import xgboost\n\n # Loading data\n df = pandas.read_parquet(data_path)\n\
|
||||
\ if label_column_name:\n df = df.drop(columns=[label_column_name])\n\
|
||||
\n evaluation_data = xgboost.DMatrix(\n data=df,\n )\n\n \
|
||||
\ # Training\n model = xgboost.Booster(model_file=model_path)\n\n \
|
||||
\ predictions = model.predict(evaluation_data)\n\n Path(predictions_path).parent.mkdir(parents=True,\
|
||||
\ exist_ok=True)\n numpy.savetxt(predictions_path, predictions)\n\nimport\
|
||||
\ argparse\n_parser = argparse.ArgumentParser(prog='Xgboost predict', description='Make\
|
||||
\ predictions using a trained XGBoost model.\\n\\n Args:\\n data_path:\
|
||||
\ Path for the feature data in Apache Parquet format.\\n model_path:\
|
||||
\ Path for the trained model in binary XGBoost format.\\n predictions_path:\
|
||||
\ Output path for the predictions.\\n label_column_name: Optional.\
|
||||
\ Name of the column containing the label data that is excluded during the\
|
||||
\ prediction.\\n\\n Annotations:\\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')\n\
|
||||
_parser.add_argument(\"--data\", dest=\"data_path\", type=str, required=True,\
|
||||
\ default=argparse.SUPPRESS)\n_parser.add_argument(\"--model\", dest=\"\
|
||||
model_path\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"\
|
||||
--label-column-name\", dest=\"label_column_name\", type=str, required=False,\
|
||||
\ default=argparse.SUPPRESS)\n_parser.add_argument(\"--predictions\", dest=\"\
|
||||
predictions_path\", type=_make_parent_dirs_and_return_path, required=True,\
|
||||
\ default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\
|
||||
\n_outputs = xgboost_predict(**_parsed_args)\n"
|
||||
args:
|
||||
- --data
|
||||
- '{{$.inputs.artifacts[''data''].path}}'
|
||||
- --model
|
||||
- '{{$.inputs.artifacts[''model''].path}}'
|
||||
- --label-column-name
|
||||
- '{{$.inputs.parameters[''label_column_name'']}}'
|
||||
- --predictions
|
||||
- '{{$.outputs.artifacts[''predictions''].path}}'
|
||||
exec-xgboost-predict-4:
|
||||
container:
|
||||
image: python:3.7
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
|
|
@ -474,6 +473,7 @@ deploymentSpec:
|
|||
_parser.add_argument(\"--predictions\", dest=\"predictions_path\", type=_make_parent_dirs_and_return_path,\
|
||||
\ required=True, default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\
|
||||
\n_outputs = xgboost_predict(**_parsed_args)\n"
|
||||
image: python:3.7
|
||||
args:
|
||||
- --data
|
||||
- '{{$.inputs.artifacts[''data''].path}}'
|
||||
|
|
@ -487,12 +487,18 @@ components:
|
|||
comp-chicago-taxi-trips-dataset:
|
||||
inputDefinitions:
|
||||
parameters:
|
||||
where:
|
||||
parameterType: STRING
|
||||
limit:
|
||||
parameterType: NUMBER_INTEGER
|
||||
select:
|
||||
parameterType: STRING
|
||||
defaultValue: trip_id,taxi_id,trip_start_timestamp,trip_end_timestamp,trip_seconds,trip_miles,pickup_census_tract,dropoff_census_tract,pickup_community_area,dropoff_community_area,fare,tips,tolls,extras,trip_total,payment_type,company,pickup_centroid_latitude,pickup_centroid_longitude,pickup_centroid_location,dropoff_centroid_latitude,dropoff_centroid_longitude,dropoff_centroid_location
|
||||
limit:
|
||||
parameterType: NUMBER_INTEGER
|
||||
defaultValue: '1000'
|
||||
format:
|
||||
parameterType: STRING
|
||||
defaultValue: csv
|
||||
where:
|
||||
parameterType: STRING
|
||||
defaultValue: trip_start_timestamp>="1900-01-01" AND trip_start_timestamp<"2100-01-01"
|
||||
outputDefinitions:
|
||||
artifacts:
|
||||
table:
|
||||
|
|
@ -500,52 +506,6 @@ components:
|
|||
schemaTitle: system.Artifact
|
||||
schemaVersion: 0.0.1
|
||||
executorLabel: exec-chicago-taxi-trips-dataset
|
||||
comp-xgboost-train:
|
||||
inputDefinitions:
|
||||
artifacts:
|
||||
training_data:
|
||||
artifactType:
|
||||
schemaTitle: system.Artifact
|
||||
schemaVersion: 0.0.1
|
||||
parameters:
|
||||
label_column:
|
||||
parameterType: NUMBER_INTEGER
|
||||
num_iterations:
|
||||
parameterType: NUMBER_INTEGER
|
||||
objective:
|
||||
parameterType: STRING
|
||||
outputDefinitions:
|
||||
artifacts:
|
||||
model:
|
||||
artifactType:
|
||||
schemaTitle: system.Artifact
|
||||
schemaVersion: 0.0.1
|
||||
model_config:
|
||||
artifactType:
|
||||
schemaTitle: system.Artifact
|
||||
schemaVersion: 0.0.1
|
||||
executorLabel: exec-xgboost-train
|
||||
comp-xgboost-predict:
|
||||
inputDefinitions:
|
||||
artifacts:
|
||||
data:
|
||||
artifactType:
|
||||
schemaTitle: system.Artifact
|
||||
schemaVersion: 0.0.1
|
||||
model:
|
||||
artifactType:
|
||||
schemaTitle: system.Artifact
|
||||
schemaVersion: 0.0.1
|
||||
parameters:
|
||||
label_column:
|
||||
parameterType: NUMBER_INTEGER
|
||||
outputDefinitions:
|
||||
artifacts:
|
||||
predictions:
|
||||
artifactType:
|
||||
schemaTitle: system.Artifact
|
||||
schemaVersion: 0.0.1
|
||||
executorLabel: exec-xgboost-predict
|
||||
comp-convert-csv-to-apache-parquet:
|
||||
inputDefinitions:
|
||||
artifacts:
|
||||
|
|
@ -560,7 +520,7 @@ components:
|
|||
schemaTitle: system.Artifact
|
||||
schemaVersion: 0.0.1
|
||||
executorLabel: exec-convert-csv-to-apache-parquet
|
||||
comp-xgboost-train-2:
|
||||
comp-xgboost-train:
|
||||
inputDefinitions:
|
||||
artifacts:
|
||||
training_data:
|
||||
|
|
@ -568,65 +528,38 @@ components:
|
|||
schemaTitle: system.Artifact
|
||||
schemaVersion: 0.0.1
|
||||
parameters:
|
||||
label_column_name:
|
||||
booster:
|
||||
parameterType: STRING
|
||||
num_iterations:
|
||||
defaultValue: gbtree
|
||||
learning_rate:
|
||||
parameterType: NUMBER_DOUBLE
|
||||
defaultValue: '0.3'
|
||||
max_depth:
|
||||
parameterType: NUMBER_INTEGER
|
||||
defaultValue: '6'
|
||||
objective:
|
||||
parameterType: STRING
|
||||
defaultValue: reg:squarederror
|
||||
num_iterations:
|
||||
parameterType: NUMBER_INTEGER
|
||||
defaultValue: '10'
|
||||
label_column:
|
||||
parameterType: NUMBER_INTEGER
|
||||
defaultValue: '0'
|
||||
min_split_loss:
|
||||
parameterType: NUMBER_DOUBLE
|
||||
defaultValue: '0'
|
||||
outputDefinitions:
|
||||
artifacts:
|
||||
model:
|
||||
artifactType:
|
||||
schemaTitle: system.Artifact
|
||||
schemaVersion: 0.0.1
|
||||
model_config:
|
||||
artifactType:
|
||||
schemaTitle: system.Artifact
|
||||
schemaVersion: 0.0.1
|
||||
executorLabel: exec-xgboost-train-2
|
||||
comp-xgboost-predict-2:
|
||||
inputDefinitions:
|
||||
artifacts:
|
||||
data:
|
||||
artifactType:
|
||||
schemaTitle: system.Artifact
|
||||
schemaVersion: 0.0.1
|
||||
model:
|
||||
artifactType:
|
||||
schemaTitle: system.Artifact
|
||||
schemaVersion: 0.0.1
|
||||
parameters:
|
||||
label_column_name:
|
||||
parameterType: STRING
|
||||
outputDefinitions:
|
||||
artifacts:
|
||||
predictions:
|
||||
artifactType:
|
||||
schemaTitle: system.Artifact
|
||||
schemaVersion: 0.0.1
|
||||
executorLabel: exec-xgboost-predict-2
|
||||
comp-xgboost-predict-3:
|
||||
inputDefinitions:
|
||||
artifacts:
|
||||
data:
|
||||
artifactType:
|
||||
schemaTitle: system.Artifact
|
||||
schemaVersion: 0.0.1
|
||||
model:
|
||||
artifactType:
|
||||
schemaTitle: system.Artifact
|
||||
schemaVersion: 0.0.1
|
||||
parameters:
|
||||
label_column_name:
|
||||
parameterType: STRING
|
||||
outputDefinitions:
|
||||
artifacts:
|
||||
predictions:
|
||||
artifactType:
|
||||
schemaTitle: system.Artifact
|
||||
schemaVersion: 0.0.1
|
||||
executorLabel: exec-xgboost-predict-3
|
||||
executorLabel: exec-xgboost-train
|
||||
comp-xgboost-predict-4:
|
||||
inputDefinitions:
|
||||
artifacts:
|
||||
|
|
@ -648,53 +581,111 @@ components:
|
|||
schemaTitle: system.Artifact
|
||||
schemaVersion: 0.0.1
|
||||
executorLabel: exec-xgboost-predict-4
|
||||
comp-xgboost-predict-2:
|
||||
inputDefinitions:
|
||||
artifacts:
|
||||
model:
|
||||
artifactType:
|
||||
schemaTitle: system.Artifact
|
||||
schemaVersion: 0.0.1
|
||||
data:
|
||||
artifactType:
|
||||
schemaTitle: system.Artifact
|
||||
schemaVersion: 0.0.1
|
||||
parameters:
|
||||
label_column_name:
|
||||
parameterType: STRING
|
||||
outputDefinitions:
|
||||
artifacts:
|
||||
predictions:
|
||||
artifactType:
|
||||
schemaTitle: system.Artifact
|
||||
schemaVersion: 0.0.1
|
||||
executorLabel: exec-xgboost-predict-2
|
||||
comp-xgboost-train-2:
|
||||
inputDefinitions:
|
||||
artifacts:
|
||||
training_data:
|
||||
artifactType:
|
||||
schemaTitle: system.Artifact
|
||||
schemaVersion: 0.0.1
|
||||
parameters:
|
||||
learning_rate:
|
||||
parameterType: NUMBER_DOUBLE
|
||||
defaultValue: '0.3'
|
||||
objective:
|
||||
parameterType: STRING
|
||||
defaultValue: reg:squarederror
|
||||
min_split_loss:
|
||||
parameterType: NUMBER_DOUBLE
|
||||
defaultValue: '0'
|
||||
max_depth:
|
||||
parameterType: NUMBER_INTEGER
|
||||
defaultValue: '6'
|
||||
num_iterations:
|
||||
parameterType: NUMBER_INTEGER
|
||||
defaultValue: '10'
|
||||
label_column_name:
|
||||
parameterType: STRING
|
||||
booster:
|
||||
parameterType: STRING
|
||||
defaultValue: gbtree
|
||||
outputDefinitions:
|
||||
artifacts:
|
||||
model:
|
||||
artifactType:
|
||||
schemaTitle: system.Artifact
|
||||
schemaVersion: 0.0.1
|
||||
model_config:
|
||||
artifactType:
|
||||
schemaTitle: system.Artifact
|
||||
schemaVersion: 0.0.1
|
||||
executorLabel: exec-xgboost-train-2
|
||||
comp-xgboost-predict-3:
|
||||
inputDefinitions:
|
||||
artifacts:
|
||||
model:
|
||||
artifactType:
|
||||
schemaTitle: system.Artifact
|
||||
schemaVersion: 0.0.1
|
||||
data:
|
||||
artifactType:
|
||||
schemaTitle: system.Artifact
|
||||
schemaVersion: 0.0.1
|
||||
parameters:
|
||||
label_column_name:
|
||||
parameterType: STRING
|
||||
outputDefinitions:
|
||||
artifacts:
|
||||
predictions:
|
||||
artifactType:
|
||||
schemaTitle: system.Artifact
|
||||
schemaVersion: 0.0.1
|
||||
executorLabel: exec-xgboost-predict-3
|
||||
comp-xgboost-predict:
|
||||
inputDefinitions:
|
||||
artifacts:
|
||||
model:
|
||||
artifactType:
|
||||
schemaTitle: system.Artifact
|
||||
schemaVersion: 0.0.1
|
||||
data:
|
||||
artifactType:
|
||||
schemaTitle: system.Artifact
|
||||
schemaVersion: 0.0.1
|
||||
parameters:
|
||||
label_column:
|
||||
parameterType: NUMBER_INTEGER
|
||||
outputDefinitions:
|
||||
artifacts:
|
||||
predictions:
|
||||
artifactType:
|
||||
schemaTitle: system.Artifact
|
||||
schemaVersion: 0.0.1
|
||||
executorLabel: exec-xgboost-predict
|
||||
root:
|
||||
dag:
|
||||
tasks:
|
||||
chicago-taxi-trips-dataset:
|
||||
taskInfo:
|
||||
name: chicago-taxi-trips-dataset
|
||||
inputs:
|
||||
parameters:
|
||||
where:
|
||||
runtimeValue:
|
||||
constant: trip_start_timestamp >= "2019-01-01" AND trip_start_timestamp
|
||||
< "2019-02-01"
|
||||
select:
|
||||
runtimeValue:
|
||||
constant: tips,trip_seconds,trip_miles,pickup_community_area,dropoff_community_area,fare,tolls,extras,trip_total
|
||||
limit:
|
||||
runtimeValue:
|
||||
constant: 10000.0
|
||||
cachingOptions:
|
||||
enableCache: true
|
||||
componentRef:
|
||||
name: comp-chicago-taxi-trips-dataset
|
||||
xgboost-train:
|
||||
taskInfo:
|
||||
name: xgboost-train
|
||||
inputs:
|
||||
parameters:
|
||||
label_column:
|
||||
runtimeValue:
|
||||
constant: 0.0
|
||||
objective:
|
||||
runtimeValue:
|
||||
constant: reg:squarederror
|
||||
num_iterations:
|
||||
runtimeValue:
|
||||
constant: 200.0
|
||||
artifacts:
|
||||
training_data:
|
||||
taskOutputArtifact:
|
||||
producerTask: chicago-taxi-trips-dataset
|
||||
outputArtifactKey: table
|
||||
dependentTasks:
|
||||
- chicago-taxi-trips-dataset
|
||||
cachingOptions:
|
||||
enableCache: true
|
||||
componentRef:
|
||||
name: comp-xgboost-train
|
||||
xgboost-predict:
|
||||
taskInfo:
|
||||
name: xgboost-predict
|
||||
|
|
@ -719,6 +710,55 @@ root:
|
|||
enableCache: true
|
||||
componentRef:
|
||||
name: comp-xgboost-predict
|
||||
xgboost-predict-3:
|
||||
taskInfo:
|
||||
name: xgboost-predict-3
|
||||
inputs:
|
||||
parameters:
|
||||
label_column_name:
|
||||
runtimeValue:
|
||||
constant: tips
|
||||
artifacts:
|
||||
model:
|
||||
taskOutputArtifact:
|
||||
producerTask: xgboost-train
|
||||
outputArtifactKey: model
|
||||
data:
|
||||
taskOutputArtifact:
|
||||
producerTask: convert-csv-to-apache-parquet
|
||||
outputArtifactKey: output_data
|
||||
dependentTasks:
|
||||
- convert-csv-to-apache-parquet
|
||||
- xgboost-train
|
||||
cachingOptions:
|
||||
enableCache: true
|
||||
componentRef:
|
||||
name: comp-xgboost-predict-3
|
||||
xgboost-train:
|
||||
taskInfo:
|
||||
name: xgboost-train
|
||||
inputs:
|
||||
parameters:
|
||||
num_iterations:
|
||||
runtimeValue:
|
||||
constant: 200.0
|
||||
label_column:
|
||||
runtimeValue:
|
||||
constant: 0.0
|
||||
objective:
|
||||
runtimeValue:
|
||||
constant: reg:squarederror
|
||||
artifacts:
|
||||
training_data:
|
||||
taskOutputArtifact:
|
||||
producerTask: chicago-taxi-trips-dataset
|
||||
outputArtifactKey: table
|
||||
dependentTasks:
|
||||
- chicago-taxi-trips-dataset
|
||||
cachingOptions:
|
||||
enableCache: true
|
||||
componentRef:
|
||||
name: comp-xgboost-train
|
||||
convert-csv-to-apache-parquet:
|
||||
taskInfo:
|
||||
name: convert-csv-to-apache-parquet
|
||||
|
|
@ -734,79 +774,6 @@ root:
|
|||
enableCache: true
|
||||
componentRef:
|
||||
name: comp-convert-csv-to-apache-parquet
|
||||
xgboost-train-2:
|
||||
taskInfo:
|
||||
name: xgboost-train-2
|
||||
inputs:
|
||||
parameters:
|
||||
label_column_name:
|
||||
runtimeValue:
|
||||
constant: tips
|
||||
objective:
|
||||
runtimeValue:
|
||||
constant: reg:squarederror
|
||||
num_iterations:
|
||||
runtimeValue:
|
||||
constant: 200.0
|
||||
artifacts:
|
||||
training_data:
|
||||
taskOutputArtifact:
|
||||
producerTask: convert-csv-to-apache-parquet
|
||||
outputArtifactKey: output_data
|
||||
dependentTasks:
|
||||
- convert-csv-to-apache-parquet
|
||||
cachingOptions:
|
||||
enableCache: true
|
||||
componentRef:
|
||||
name: comp-xgboost-train-2
|
||||
xgboost-predict-2:
|
||||
taskInfo:
|
||||
name: xgboost-predict-2
|
||||
inputs:
|
||||
parameters:
|
||||
label_column_name:
|
||||
runtimeValue:
|
||||
constant: tips
|
||||
artifacts:
|
||||
data:
|
||||
taskOutputArtifact:
|
||||
producerTask: convert-csv-to-apache-parquet
|
||||
outputArtifactKey: output_data
|
||||
model:
|
||||
taskOutputArtifact:
|
||||
producerTask: xgboost-train-2
|
||||
outputArtifactKey: model
|
||||
dependentTasks:
|
||||
- convert-csv-to-apache-parquet
|
||||
- xgboost-train-2
|
||||
cachingOptions:
|
||||
enableCache: true
|
||||
componentRef:
|
||||
name: comp-xgboost-predict-2
|
||||
xgboost-predict-3:
|
||||
taskInfo:
|
||||
name: xgboost-predict-3
|
||||
inputs:
|
||||
parameters:
|
||||
label_column_name:
|
||||
runtimeValue:
|
||||
constant: tips
|
||||
artifacts:
|
||||
data:
|
||||
taskOutputArtifact:
|
||||
producerTask: convert-csv-to-apache-parquet
|
||||
outputArtifactKey: output_data
|
||||
model:
|
||||
taskOutputArtifact:
|
||||
producerTask: xgboost-train
|
||||
outputArtifactKey: model
|
||||
dependentTasks:
|
||||
- convert-csv-to-apache-parquet
|
||||
- xgboost-train
|
||||
cachingOptions:
|
||||
enableCache: true
|
||||
componentRef:
|
||||
name: comp-xgboost-predict-3
|
||||
xgboost-predict-4:
|
||||
taskInfo:
|
||||
name: xgboost-predict-4
|
||||
|
|
@ -831,4 +798,72 @@ root:
|
|||
enableCache: true
|
||||
componentRef:
|
||||
name: comp-xgboost-predict-4
|
||||
chicago-taxi-trips-dataset:
|
||||
taskInfo:
|
||||
name: chicago-taxi-trips-dataset
|
||||
inputs:
|
||||
parameters:
|
||||
limit:
|
||||
runtimeValue:
|
||||
constant: 10000.0
|
||||
where:
|
||||
runtimeValue:
|
||||
constant: trip_start_timestamp >= "2019-01-01" AND trip_start_timestamp
|
||||
< "2019-02-01"
|
||||
select:
|
||||
runtimeValue:
|
||||
constant: tips,trip_seconds,trip_miles,pickup_community_area,dropoff_community_area,fare,tolls,extras,trip_total
|
||||
cachingOptions:
|
||||
enableCache: true
|
||||
componentRef:
|
||||
name: comp-chicago-taxi-trips-dataset
|
||||
xgboost-train-2:
|
||||
taskInfo:
|
||||
name: xgboost-train-2
|
||||
inputs:
|
||||
parameters:
|
||||
objective:
|
||||
runtimeValue:
|
||||
constant: reg:squarederror
|
||||
num_iterations:
|
||||
runtimeValue:
|
||||
constant: 200.0
|
||||
label_column_name:
|
||||
runtimeValue:
|
||||
constant: tips
|
||||
artifacts:
|
||||
training_data:
|
||||
taskOutputArtifact:
|
||||
producerTask: convert-csv-to-apache-parquet
|
||||
outputArtifactKey: output_data
|
||||
dependentTasks:
|
||||
- convert-csv-to-apache-parquet
|
||||
cachingOptions:
|
||||
enableCache: true
|
||||
componentRef:
|
||||
name: comp-xgboost-train-2
|
||||
xgboost-predict-2:
|
||||
taskInfo:
|
||||
name: xgboost-predict-2
|
||||
inputs:
|
||||
parameters:
|
||||
label_column_name:
|
||||
runtimeValue:
|
||||
constant: tips
|
||||
artifacts:
|
||||
model:
|
||||
taskOutputArtifact:
|
||||
producerTask: xgboost-train-2
|
||||
outputArtifactKey: model
|
||||
data:
|
||||
taskOutputArtifact:
|
||||
producerTask: convert-csv-to-apache-parquet
|
||||
outputArtifactKey: output_data
|
||||
dependentTasks:
|
||||
- convert-csv-to-apache-parquet
|
||||
- xgboost-train-2
|
||||
cachingOptions:
|
||||
enableCache: true
|
||||
componentRef:
|
||||
name: comp-xgboost-predict-2
|
||||
defaultPipelineRoot: dummy_root
|
||||
|
|
|
|||
Loading…
Reference in New Issue