examples/named_entity_recognition/notebooks/Pipeline.ipynb

272 lines
7.5 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Named Entity Recognition pipeline"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [],
"source": [
"EXPERIMENT_NAME = 'named-entity-recognition'\n",
"BUCKET = \"your-bucket-name\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Imports"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [],
"source": [
"import kfp\n",
"from kfp import compiler\n",
"import kfp.components as comp\n",
"import kfp.dsl as dsl\n",
"from kfp import gcp"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load components"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Help on function preprocess:\n",
"\n",
"preprocess(input_1_uri:'GCSPath', output_x_uri_template:'GCSPath', output_y_uri_template:'GCSPath', output_preprocessing_state_uri_template:'GCSPath')\n",
" Performs the IOB preprocessing.\n",
"\n",
"Help on function train:\n",
"\n",
"train(input_x_uri:'GCSPath', input_y_uri:'GCSPath', input_job_dir_uri:'GCSPath', input_tags:'Integer', input_words:'Integer', input_dropout, output_model_uri_template:'GCSPath')\n",
" Trains the NER Bi-LSTM.\n",
"\n",
"Help on function deploy:\n",
"\n",
"deploy(model_path:'GCSPath', model_name:'String', model_region:'String', model_version:'String', model_runtime_version:'String', model_prediction_class:'String', model_python_version:'String', model_package_uris:'String')\n",
" Deploy the model with custom prediction route\n",
"\n"
]
}
],
"source": [
"preprocess_operation = kfp.components.load_component_from_url(\n",
" 'https://storage.googleapis.com/{}/components/preprocess/component.yaml'.format(BUCKET))\n",
"help(preprocess_operation)\n",
"\n",
"train_operation = kfp.components.load_component_from_url(\n",
" 'https://storage.googleapis.com/{}/components/train/component.yaml'.format(BUCKET))\n",
"help(train_operation)\n",
"\n",
"ai_platform_deploy_operation = comp.load_component_from_url(\n",
" \"https://storage.googleapis.com/{}/components/deploy/component.yaml\".format(BUCKET))\n",
"help(ai_platform_deploy_operation)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Build the Pipeline "
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
"@dsl.pipeline(\n",
" name='Named Entity Recognition Pipeline',\n",
" description='Performs preprocessing, training and deployment.'\n",
")\n",
"def pipeline():\n",
" \n",
" preprocess_task = preprocess_operation(\n",
" input_1_uri='gs://kubeflow-examples-data/named_entity_recognition_dataset/ner.csv,\n",
" output_y_uri_template=\"gs://{}/{{workflow.uid}}/preprocess/y/data\".format(BUCKET),\n",
" output_x_uri_template=\"gs://{}/{{workflow.uid}}/preprocess/x/data\".format(BUCKET),\n",
" output_preprocessing_state_uri_template=\"gs://{}/{{workflow.uid}}/model\".format(BUCKET)\n",
" ).apply(kfp.gcp.use_gcp_secret('user-gcp-sa')) \n",
" \n",
" \n",
" train_task = train_operation(\n",
" input_x_uri=preprocess_task.outputs['output-x-uri'],\n",
" input_y_uri=preprocess_task.outputs['output-y-uri'],\n",
" input_job_dir_uri=\"gs://{}/{{workflow.uid}}/job\".format(BUCKET),\n",
" input_tags=preprocess_task.outputs['output-tags'],\n",
" input_words=preprocess_task.outputs['output-words'],\n",
" input_dropout=0.1,\n",
" output_model_uri_template=\"gs://{}/{{workflow.uid}}/model\".format(BUCKET)\n",
" ).apply(kfp.gcp.use_gcp_secret('user-gcp-sa')) \n",
" \n",
" \n",
" deploy_task = ai_platform_deploy_operation(\n",
" model_path= train_task.output,\n",
" model_name=\"named_entity_recognition_kubeflow\",\n",
" model_region=\"us-central1\",\n",
" model_version=\"version1\",\n",
" model_runtime_version=\"1.13\",\n",
" model_prediction_class=\"model_prediction.CustomModelPrediction\",\n",
" model_python_version=\"3.5\",\n",
" model_package_uris=\"gs://{}/routine/custom_prediction_routine-0.2.tar.gz\".format(BUCKET)\n",
" ).apply(kfp.gcp.use_gcp_secret('user-gcp-sa'))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Compile the Pipeline"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [],
"source": [
"pipeline_func = pipeline\n",
"pipeline_filename = pipeline_func.__name__ + '.pipeline.zip'\n",
"\n",
"import kfp.compiler as compiler\n",
"compiler.Compiler().compile(pipeline_func, pipeline_filename)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create a Kubeflow Experiment"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'created_at': datetime.datetime(2019, 7, 5, 10, 32, 13, tzinfo=tzlocal()),\n",
" 'description': None,\n",
" 'id': '84e88563-7774-4bae-aa33-4a67649c136a',\n",
" 'name': 'named-entity-recognition'}\n"
]
}
],
"source": [
"client = kfp.Client()\n",
"\n",
"try:\n",
" experiment = client.get_experiment(experiment_name=EXPERIMENT_NAME)\n",
"except:\n",
" experiment = client.create_experiment(EXPERIMENT_NAME)\n",
" \n",
"print(experiment)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Run the Pipeline"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"Run link <a href=\"/pipeline/#/runs/details/705a2bc2-9f1c-11e9-9120-42010a800045\" target=\"_blank\" >here</a>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"84e88563-7774-4bae-aa33-4a67649c136a\n",
"pipeline run\n",
"pipeline.pipeline.zip\n",
"{}\n"
]
}
],
"source": [
"arguments = {}\n",
"\n",
"run_name = pipeline_func.__name__ + ' run'\n",
"run_result = client.run_pipeline(experiment.id, \n",
" run_name, \n",
" pipeline_filename, \n",
" arguments)\n",
"\n",
"print(experiment.id)\n",
"print(run_name)\n",
"print(pipeline_filename)\n",
"print(arguments)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}