{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Named Entity Recognition pipeline" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [], "source": [ "EXPERIMENT_NAME = 'named-entity-recognition'\n", "BUCKET = \"your-bucket-name\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Imports" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "import kfp\n", "from kfp import compiler\n", "import kfp.components as comp\n", "import kfp.dsl as dsl\n", "from kfp import gcp" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load components" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Help on function preprocess:\n", "\n", "preprocess(input_1_uri:'GCSPath', output_x_uri_template:'GCSPath', output_y_uri_template:'GCSPath', output_preprocessing_state_uri_template:'GCSPath')\n", " Performs the IOB preprocessing.\n", "\n", "Help on function train:\n", "\n", "train(input_x_uri:'GCSPath', input_y_uri:'GCSPath', input_job_dir_uri:'GCSPath', input_tags:'Integer', input_words:'Integer', input_dropout, output_model_uri_template:'GCSPath')\n", " Trains the NER Bi-LSTM.\n", "\n", "Help on function deploy:\n", "\n", "deploy(model_path:'GCSPath', model_name:'String', model_region:'String', model_version:'String', model_runtime_version:'String', model_prediction_class:'String', model_python_version:'String', model_package_uris:'String')\n", " Deploy the model with custom prediction route\n", "\n" ] } ], "source": [ "preprocess_operation = kfp.components.load_component_from_url(\n", " 'https://storage.googleapis.com/{}/components/preprocess/component.yaml'.format(BUCKET))\n", "help(preprocess_operation)\n", "\n", "train_operation = kfp.components.load_component_from_url(\n", " 'https://storage.googleapis.com/{}/components/train/component.yaml'.format(BUCKET))\n", "help(train_operation)\n", "\n", "ai_platform_deploy_operation = comp.load_component_from_url(\n", " \"https://storage.googleapis.com/{}/components/deploy/component.yaml\".format(BUCKET))\n", "help(ai_platform_deploy_operation)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Build the Pipeline " ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [], "source": [ "@dsl.pipeline(\n", " name='Named Entity Recognition Pipeline',\n", " description='Performs preprocessing, training and deployment.'\n", ")\n", "def pipeline():\n", " \n", " preprocess_task = preprocess_operation(\n", " input_1_uri='gs://kubeflow-examples-data/named_entity_recognition_dataset/ner.csv,\n", " output_y_uri_template=\"gs://{}/{{workflow.uid}}/preprocess/y/data\".format(BUCKET),\n", " output_x_uri_template=\"gs://{}/{{workflow.uid}}/preprocess/x/data\".format(BUCKET),\n", " output_preprocessing_state_uri_template=\"gs://{}/{{workflow.uid}}/model\".format(BUCKET)\n", " ).apply(kfp.gcp.use_gcp_secret('user-gcp-sa')) \n", " \n", " \n", " train_task = train_operation(\n", " input_x_uri=preprocess_task.outputs['output-x-uri'],\n", " input_y_uri=preprocess_task.outputs['output-y-uri'],\n", " input_job_dir_uri=\"gs://{}/{{workflow.uid}}/job\".format(BUCKET),\n", " input_tags=preprocess_task.outputs['output-tags'],\n", " input_words=preprocess_task.outputs['output-words'],\n", " input_dropout=0.1,\n", " output_model_uri_template=\"gs://{}/{{workflow.uid}}/model\".format(BUCKET)\n", " ).apply(kfp.gcp.use_gcp_secret('user-gcp-sa')) \n", " \n", " \n", " deploy_task = ai_platform_deploy_operation(\n", " model_path= train_task.output,\n", " model_name=\"named_entity_recognition_kubeflow\",\n", " model_region=\"us-central1\",\n", " model_version=\"version1\",\n", " model_runtime_version=\"1.13\",\n", " model_prediction_class=\"model_prediction.CustomModelPrediction\",\n", " model_python_version=\"3.5\",\n", " model_package_uris=\"gs://{}/routine/custom_prediction_routine-0.2.tar.gz\".format(BUCKET)\n", " ).apply(kfp.gcp.use_gcp_secret('user-gcp-sa'))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Compile the Pipeline" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [], "source": [ "pipeline_func = pipeline\n", "pipeline_filename = pipeline_func.__name__ + '.pipeline.zip'\n", "\n", "import kfp.compiler as compiler\n", "compiler.Compiler().compile(pipeline_func, pipeline_filename)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Create a Kubeflow Experiment" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'created_at': datetime.datetime(2019, 7, 5, 10, 32, 13, tzinfo=tzlocal()),\n", " 'description': None,\n", " 'id': '84e88563-7774-4bae-aa33-4a67649c136a',\n", " 'name': 'named-entity-recognition'}\n" ] } ], "source": [ "client = kfp.Client()\n", "\n", "try:\n", " experiment = client.get_experiment(experiment_name=EXPERIMENT_NAME)\n", "except:\n", " experiment = client.create_experiment(EXPERIMENT_NAME)\n", " \n", "print(experiment)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Run the Pipeline" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "data": { "text/html": [ "Run link here" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "84e88563-7774-4bae-aa33-4a67649c136a\n", "pipeline run\n", "pipeline.pipeline.zip\n", "{}\n" ] } ], "source": [ "arguments = {}\n", "\n", "run_name = pipeline_func.__name__ + ' run'\n", "run_result = client.run_pipeline(experiment.id, \n", " run_name, \n", " pipeline_filename, \n", " arguments)\n", "\n", "print(experiment.id)\n", "print(run_name)\n", "print(pipeline_filename)\n", "print(arguments)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.7" } }, "nbformat": 4, "nbformat_minor": 2 }