examples/titanic-kaggle-competition/titanic-kaggle-competition-...

190 lines
7.1 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import kfp\n",
"from kfp import dsl\n",
"\n",
"def preprocess_op():\n",
"\n",
" return dsl.ContainerOp(\n",
" name='Preprocess Data',\n",
" image='hubdocker76/titanic-pre-process-data:v9',\n",
" arguments=[],\n",
" file_outputs={\n",
" 'train_pickle': '/app/train',\n",
" 'test_pickle': '/app/test',\n",
" }\n",
" )\n",
"\n",
"def featureengineering_op(train_pickle, test_pickle):\n",
"\n",
" return dsl.ContainerOp(\n",
" name='featureengineering',\n",
" image='hubdocker76/titanic-feature-engineering:v8',\n",
" arguments=[\n",
" '--train_pickle', train_pickle,\n",
" '--test_pickle', test_pickle\n",
" ],\n",
" file_outputs={\n",
" 'train_pickle_out': '/app/train_v2',\n",
" 'train_label_out': '/app/train_label_v2',\n",
" }\n",
" )\n",
"\n",
"def regression_op(train_pickle_out, train_label_out):\n",
"\n",
" return dsl.ContainerOp(\n",
" name='regression',\n",
" image='hubdocker76/titanic-logistic-regression:v5',\n",
" arguments=[\n",
" '--train_pickle', train_pickle_out,\n",
" '--train_label', train_label_out,\n",
" ],\n",
" file_outputs={\n",
" 'regression_acc': '/app/regression_acc.txt'\n",
" }\n",
" )\n",
"\n",
"def bayes_op(train_pickle_out, train_label_out):\n",
"\n",
" return dsl.ContainerOp(\n",
" name='bayes',\n",
" image='hubdocker76/titanic-bayes:v6',\n",
" arguments=[\n",
" '--train_pickle', train_pickle_out,\n",
" '--train_label', train_label_out,\n",
" ],\n",
" file_outputs={\n",
" 'bayes_acc': '/app/bayes_acc.txt'\n",
" }\n",
" )\n",
"\n",
"def random_forest_op(train_pickle_out, train_label_out):\n",
"\n",
" return dsl.ContainerOp(\n",
" name='random_forest',\n",
" image='hubdocker76/titanic-randomforest:v4',\n",
" arguments=[\n",
" '--train_pickle', train_pickle_out,\n",
" '--train_label', train_label_out,\n",
" ],\n",
" file_outputs={\n",
" 'random_forest_acc': '/app/random_forest_acc.txt'\n",
" }\n",
" )\n",
"\n",
"def decision_tree_op(train_pickle_out, train_label_out):\n",
"\n",
" return dsl.ContainerOp(\n",
" name='decision_tree',\n",
" image='hubdocker76/titanic-decision-tree:v1',\n",
" arguments=[\n",
" '--train_pickle', train_pickle_out,\n",
" '--train_label', train_label_out,\n",
" ],\n",
" file_outputs={\n",
" 'decision_tree_acc': '/app/decision_tree_acc.txt'\n",
" }\n",
" )\n",
"\n",
"def svm_op(train_pickle_out, train_label_out):\n",
"\n",
" return dsl.ContainerOp(\n",
" name='svm',\n",
" image='hubdocker76/titanic-svm:v2',\n",
" arguments=[\n",
" '--train_pickle', train_pickle_out,\n",
" '--train_label', train_label_out,\n",
" ],\n",
" file_outputs={\n",
" 'svm_acc': '/app/svm_acc.txt'\n",
" }\n",
" )\n",
"\n",
"def result_model_op(bayes_acc, regression_acc, random_forest_acc, decision_tree_acc, svm_acc):\n",
"\n",
" return dsl.ContainerOp(\n",
" name='results',\n",
" image='hubdocker76/titanic-results:v9',\n",
" arguments=[\n",
" '--bayes_acc', bayes_acc,\n",
" '--regression_acc', regression_acc,\n",
" '--random_forest_acc', random_forest_acc,\n",
" '--decision_tree_acc', decision_tree_acc,\n",
" '--svm_acc', svm_acc\n",
" ]\n",
" )\n",
"\n",
"@dsl.pipeline(\n",
" name='Titanic',\n",
" description='Kubeflow pipeline of kaggle Titanic competition '\n",
")\n",
"def boston_pipeline():\n",
" _preprocess_op = preprocess_op().add_pod_label(\"kaggle-secret\", \"true\")\n",
" \n",
" _featureengineering_op = featureengineering_op(\n",
" dsl.InputArgumentPath(_preprocess_op.outputs['train_pickle']),\n",
" dsl.InputArgumentPath(_preprocess_op.outputs['test_pickle'])\n",
" ).after(_preprocess_op)\n",
"\n",
" _regression_op = regression_op(\n",
" dsl.InputArgumentPath(_featureengineering_op.outputs['train_pickle_out']),\n",
" dsl.InputArgumentPath(_featureengineering_op.outputs['train_label_out'])\n",
" ).after(_featureengineering_op)\n",
"\n",
" _bayes_op = bayes_op(\n",
" dsl.InputArgumentPath(_featureengineering_op.outputs['train_pickle_out']),\n",
" dsl.InputArgumentPath(_featureengineering_op.outputs['train_label_out'])\n",
" ).after(_featureengineering_op)\n",
"\n",
" _random_forest_op = random_forest_op(\n",
" dsl.InputArgumentPath(_featureengineering_op.outputs['train_pickle_out']),\n",
" dsl.InputArgumentPath(_featureengineering_op.outputs['train_label_out'])\n",
" ).after(_featureengineering_op)\n",
"\n",
" _decision_tree_op = decision_tree_op(\n",
" dsl.InputArgumentPath(_featureengineering_op.outputs['train_pickle_out']),\n",
" dsl.InputArgumentPath(_featureengineering_op.outputs['train_label_out'])\n",
" ).after(_featureengineering_op)\n",
"\n",
" _svm_op = svm_op(\n",
" dsl.InputArgumentPath(_featureengineering_op.outputs['train_pickle_out']),\n",
" dsl.InputArgumentPath(_featureengineering_op.outputs['train_label_out'])\n",
" ).after(_featureengineering_op)\n",
"\n",
" # result_model_op(\n",
" # dsl.InputArgumentPath(_featureengineering_op.outputs['model'])\n",
" # ).after(_test_op, _test_op2)\n",
"\n",
" result_model_op(\n",
" dsl.InputArgumentPath(_bayes_op.outputs['bayes_acc']),\n",
" dsl.InputArgumentPath(_regression_op.outputs['regression_acc']),\n",
" dsl.InputArgumentPath(_random_forest_op.outputs['random_forest_acc']),\n",
" dsl.InputArgumentPath(_decision_tree_op.outputs['decision_tree_acc']),\n",
" dsl.InputArgumentPath(_svm_op.outputs['svm_acc'])\n",
" ).after(_regression_op, _bayes_op, _random_forest_op, _decision_tree_op, _svm_op)\n",
"\n",
"# client = kfp.Client()\n",
"# client.create_run_from_pipeline_func(boston_pipeline, arguments={})\n",
"\n",
"if __name__ == '__main__':\n",
" import kfp.compiler as compiler\n",
" compiler.Compiler().compile(boston_pipeline, __file__[:-3] + '.yaml')"
]
}
],
"metadata": {
"language_info": {
"name": "python"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}