981 lines
35 KiB
Plaintext
981 lines
35 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 25,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Copyright (c) Facebook, Inc. and its affiliates.\n",
|
|
"# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
|
|
"# you may not use this file except in compliance with the License.\n",
|
|
"# You may obtain a copy of the License at\n",
|
|
"#\n",
|
|
"# http://www.apache.org/licenses/LICENSE-2.0\n",
|
|
"#\n",
|
|
"# Unless required by applicable law or agreed to in writing, software\n",
|
|
"# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
|
|
"# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
|
|
"# See the License for the specific language governing permissions and\n",
|
|
"# limitations under the License."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Bert Pipeline : PyTorch BERT News Classfication\n",
|
|
"\n",
|
|
"This notebook shows PyTorch BERT end-to-end news classification example using Kubeflow Pipelines.\n",
|
|
"\n",
|
|
"\n",
|
|
"An example notebook that demonstrates how to:\n",
|
|
"\n",
|
|
"* Get different tasks needed for the pipeline\n",
|
|
"* Create a Kubeflow pipeline\n",
|
|
"* Include Pytorch KFP components to preprocess, train, visualize and deploy the model in the pipeline\n",
|
|
"* Submit a job for execution\n",
|
|
"* Query(prediction and explain) the final deployed model\n",
|
|
"* Interpretation of the model using the Captum Insights\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 13,
|
|
"metadata": {
|
|
"tags": []
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"! pip uninstall -y kfp\n",
|
|
"! pip install --no-cache-dir kfp torch captum"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 72,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"'1.6.4'"
|
|
]
|
|
},
|
|
"execution_count": 63,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"import kfp\n",
|
|
"import json\n",
|
|
"import os\n",
|
|
"from kfp.onprem import use_k8s_secret\n",
|
|
"from kfp import components\n",
|
|
"from kfp.components import load_component_from_file, load_component_from_url, InputPath\n",
|
|
"from kfp import dsl\n",
|
|
"from kfp import compiler\n",
|
|
"\n",
|
|
"kfp.__version__"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Enter your gateway and the cookie\n",
|
|
"[Use this extension on chrome to get token]( https://chrome.google.com/webstore/detail/editthiscookie/fngmhnnpilhplaeedifhccceomclgfbg?hl=en)\n",
|
|
"\n",
|
|
""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Update values for the ingress gateway and auth session"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 92,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"INGRESS_GATEWAY='http://istio-ingressgateway.istio-system.svc.cluster.local'\n",
|
|
"AUTH=\"<enter your token here>\"\n",
|
|
"NAMESPACE=\"kubeflow-user-example-com\"\n",
|
|
"COOKIE=\"authservice_session=\"+AUTH\n",
|
|
"EXPERIMENT=\"Default\"\n",
|
|
"dist_volume = 'dist-vol'\n",
|
|
"volume_mount_path =\"/model\"\n",
|
|
"dataset_path = volume_mount_path+\"/dataset\"\n",
|
|
"checkpoint_dir = volume_mount_path+\"/checkpoint\"\n",
|
|
"tensorboard_root = volume_mount_path+\"/tensorboard\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Set Log bucket and Tensorboard Image"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 93,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"MINIO_ENDPOINT=\"http://minio-service.kubeflow:9000\"\n",
|
|
"LOG_BUCKET=\"mlpipeline\"\n",
|
|
"TENSORBOARD_IMAGE=\"public.ecr.aws/pytorch-samples/tboard:latest\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 94,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"client = kfp.Client(host=INGRESS_GATEWAY+\"/pipeline\", cookies=COOKIE)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 95,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<a href=\"http://istio-ingressgateway.istio-system.svc.cluster.local/pipeline/#/experiments/details/ba9b7266-2b1c-4729-afcd-be808c25c5af\" target=\"_blank\" >Experiment details</a>."
|
|
],
|
|
"text/plain": [
|
|
"<IPython.core.display.HTML object>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"{'created_at': datetime.datetime(2021, 6, 21, 13, 13, 6, tzinfo=tzlocal()),\n",
|
|
" 'description': None,\n",
|
|
" 'id': 'ba9b7266-2b1c-4729-afcd-be808c25c5af',\n",
|
|
" 'name': 'Default',\n",
|
|
" 'resource_references': [{'key': {'id': 'kubeflow-user-example-com',\n",
|
|
" 'type': 'NAMESPACE'},\n",
|
|
" 'name': None,\n",
|
|
" 'relationship': 'OWNER'}],\n",
|
|
" 'storage_state': 'STORAGESTATE_AVAILABLE'}"
|
|
]
|
|
},
|
|
"execution_count": 67,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"client.create_experiment(EXPERIMENT)\n",
|
|
"experiments = client.list_experiments(namespace=NAMESPACE)\n",
|
|
"my_experiment = experiments.experiments[0]\n",
|
|
"my_experiment"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 96,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"DEPLOY_NAME=\"bert-dist\"\n",
|
|
"MODEL_NAME=\"bert\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 97,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"! python utils/generate_templates.py bert/template_mapping.json"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 98,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"prepare_tensorboard_op = load_component_from_file(\n",
|
|
" \"yaml/tensorboard_component.yaml\"\n",
|
|
")\n",
|
|
"prep_op = components.load_component_from_file(\n",
|
|
" \"yaml/preprocess_component.yaml\"\n",
|
|
")\n",
|
|
"# Use GPU image in train component\n",
|
|
"train_op = components.load_component_from_file(\n",
|
|
" \"yaml/train_component.yaml\"\n",
|
|
")\n",
|
|
"deploy_op = load_component_from_file(\"../../../components/kserve/component.yaml\")\n",
|
|
"minio_op = components.load_component_from_file(\n",
|
|
" \"yaml/minio_component.yaml\"\n",
|
|
")\n",
|
|
"pytorch_job_op = load_component_from_file(\"../../../components/kubeflow/pytorch-launcher/component.yaml\")\n",
|
|
"kubernetes_create_pvc_op = load_component_from_file(\n",
|
|
" \"../../../components/contrib/kubernetes/Create_PersistentVolumeClaim/component.yaml\"\n",
|
|
")\n",
|
|
"cp_op = load_component_from_file(\n",
|
|
" \"yaml/copy_component.yaml\"\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 99,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from kubernetes.client.models import V1Volume, V1PersistentVolumeClaimVolumeSource\n",
|
|
"def create_dist_pipeline():\n",
|
|
" kubernetes_create_pvc_op(name=dist_volume, storage_size= \"2Gi\", namespace=NAMESPACE)\n",
|
|
"\n",
|
|
"create_volume_run = client.create_run_from_pipeline_func(create_dist_pipeline, arguments={})\n",
|
|
"create_volume_run.wait_for_run_completion()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Define pipeline"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 100,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"@dsl.pipeline(name=\"Training pipeline\", description=\"Sample training job test\")\n",
|
|
"def pytorch_bert(\n",
|
|
" minio_endpoint=MINIO_ENDPOINT,\n",
|
|
" log_bucket=LOG_BUCKET,\n",
|
|
" log_dir=f\"tensorboard/logs/{dsl.RUN_ID_PLACEHOLDER}\",\n",
|
|
" confusion_matrix_log_dir=f\"confusion_matrix/{dsl.RUN_ID_PLACEHOLDER}/\",\n",
|
|
" mar_path=f\"mar/{dsl.RUN_ID_PLACEHOLDER}/model-store/\",\n",
|
|
" config_prop_path=f\"mar/{dsl.RUN_ID_PLACEHOLDER}/config/\",\n",
|
|
" model_uri=f\"pvc://{dist_volume}/mar/{dsl.RUN_ID_PLACEHOLDER}\",\n",
|
|
" tf_image=TENSORBOARD_IMAGE,\n",
|
|
" deploy=DEPLOY_NAME,\n",
|
|
" namespace=NAMESPACE,\n",
|
|
" num_samples=1000,\n",
|
|
" max_epochs=1,\n",
|
|
" gpus=2,\n",
|
|
" num_nodes=2\n",
|
|
"):\n",
|
|
" \n",
|
|
" prepare_tb_task = prepare_tensorboard_op(\n",
|
|
" log_dir_uri=f\"s3://{log_bucket}/{log_dir}\",\n",
|
|
" image=tf_image,\n",
|
|
" pod_template_spec=json.dumps({\n",
|
|
" \"spec\": {\n",
|
|
" \"containers\": [{\n",
|
|
" \"env\": [\n",
|
|
" {\n",
|
|
" \"name\": \"AWS_ACCESS_KEY_ID\",\n",
|
|
" \"valueFrom\": {\n",
|
|
" \"secretKeyRef\": {\n",
|
|
" \"name\": \"mlpipeline-minio-artifact\",\n",
|
|
" \"key\": \"accesskey\",\n",
|
|
" }\n",
|
|
" },\n",
|
|
" },\n",
|
|
" {\n",
|
|
" \"name\": \"AWS_SECRET_ACCESS_KEY\",\n",
|
|
" \"valueFrom\": {\n",
|
|
" \"secretKeyRef\": {\n",
|
|
" \"name\": \"mlpipeline-minio-artifact\",\n",
|
|
" \"key\": \"secretkey\",\n",
|
|
" }\n",
|
|
" },\n",
|
|
" },\n",
|
|
" {\n",
|
|
" \"name\": \"AWS_REGION\",\n",
|
|
" \"value\": \"minio\"\n",
|
|
" },\n",
|
|
" {\n",
|
|
" \"name\": \"S3_ENDPOINT\",\n",
|
|
" \"value\": f\"{minio_endpoint}\",\n",
|
|
" },\n",
|
|
" {\n",
|
|
" \"name\": \"S3_USE_HTTPS\",\n",
|
|
" \"value\": \"0\"\n",
|
|
" },\n",
|
|
" {\n",
|
|
" \"name\": \"S3_VERIFY_SSL\",\n",
|
|
" \"value\": \"0\"\n",
|
|
" },\n",
|
|
" ]\n",
|
|
" }]\n",
|
|
" }\n",
|
|
" }),\n",
|
|
" ).set_display_name(\"Visualization\")\n",
|
|
"\n",
|
|
" prep_task = prep_op().after(prepare_tb_task).set_display_name(\"Preprocess & Transform\")\n",
|
|
" copy_task = cp_op(\"true\", prep_task.outputs['output_data'], dataset_path,\"\").add_pvolumes({volume_mount_path: dsl.PipelineVolume(pvc=dist_volume)}).after(prep_task).set_display_name(\"Copy Dataset\")\n",
|
|
" confusion_matrix_url = f\"minio://{log_bucket}/{confusion_matrix_log_dir}\"\n",
|
|
" train_task = pytorch_job_op(\n",
|
|
" name=\"pytorch-bert-dist\", \n",
|
|
" namespace=namespace, \n",
|
|
" master_spec=\n",
|
|
" {\n",
|
|
" \"replicas\": 1,\n",
|
|
" \"imagePullPolicy\": \"Always\",\n",
|
|
" \"restartPolicy\": \"OnFailure\",\n",
|
|
" \"template\": {\n",
|
|
" \"metadata\": {\n",
|
|
" \"annotations\": {\n",
|
|
" \"sidecar.istio.io/inject\": \"false\"\n",
|
|
" }\n",
|
|
" },\n",
|
|
" \"spec\": {\n",
|
|
" \"containers\": [\n",
|
|
" {\n",
|
|
" \"name\": \"pytorch\",\n",
|
|
" \"image\": \"public.ecr.aws/pytorch-samples/kfp_samples:latest-gpu\",\n",
|
|
" \"command\": [\"python\", \"bert/agnews_classification_pytorch.py\"],\n",
|
|
" \"args\": [\n",
|
|
" \"--dataset_path\", dataset_path,\n",
|
|
" \"--checkpoint_dir\", checkpoint_dir,\n",
|
|
" \"--script_args\", f\"model_name=bert.pth,num_samples={num_samples}\",\n",
|
|
" \"--tensorboard_root\", tensorboard_root,\n",
|
|
" \"--ptl_args\", f\"max_epochs={max_epochs},profiler=pytorch,devices={gpus},accelerator=gpu,strategy=ddp,num_nodes={num_nodes},confusion_matrix_url={confusion_matrix_url}\"\n",
|
|
" ],\n",
|
|
" \"env\": [\n",
|
|
" {\n",
|
|
" \"name\": \"MINIO_ACCESS_KEY\",\n",
|
|
" \"valueFrom\": {\n",
|
|
" \"secretKeyRef\": {\n",
|
|
" \"name\": \"mlpipeline-minio-artifact\",\n",
|
|
" \"key\": \"accesskey\",\n",
|
|
" }\n",
|
|
" },\n",
|
|
" },\n",
|
|
" {\n",
|
|
" \"name\": \"MINIO_SECRET_KEY\",\n",
|
|
" \"valueFrom\": {\n",
|
|
" \"secretKeyRef\": {\n",
|
|
" \"name\": \"mlpipeline-minio-artifact\",\n",
|
|
" \"key\": \"secretkey\",\n",
|
|
" }\n",
|
|
" },\n",
|
|
" }\n",
|
|
" ],\n",
|
|
" \"ports\": [\n",
|
|
" {\n",
|
|
" \"containerPort\": 24456,\n",
|
|
" \"name\": \"pytorchjob-port\"\n",
|
|
" }\n",
|
|
" ],\n",
|
|
" \"resources\": {\n",
|
|
" \"limits\": {\n",
|
|
" \"nvidia.com/gpu\": 2\n",
|
|
" }\n",
|
|
" },\n",
|
|
" \"volumeMounts\": [\n",
|
|
" {\n",
|
|
" \"mountPath\": volume_mount_path,\n",
|
|
" \"name\": \"model-volume\"\n",
|
|
" }\n",
|
|
" ]\n",
|
|
" }\n",
|
|
" ],\n",
|
|
" \"volumes\": [\n",
|
|
" {\n",
|
|
" \"name\": \"model-volume\",\n",
|
|
" \"persistentVolumeClaim\": {\n",
|
|
" \"claimName\": dist_volume\n",
|
|
" }\n",
|
|
" }\n",
|
|
" ]\n",
|
|
" }\n",
|
|
" }\n",
|
|
" }, \n",
|
|
" worker_spec=\n",
|
|
" {\n",
|
|
" \"replicas\": 1,\n",
|
|
" \"imagePullPolicy\": \"Always\",\n",
|
|
" \"restartPolicy\": \"OnFailure\",\n",
|
|
" \"template\": {\n",
|
|
" \"metadata\": {\n",
|
|
" \"annotations\": {\n",
|
|
" \"sidecar.istio.io/inject\": \"false\"\n",
|
|
" }\n",
|
|
" },\n",
|
|
" \"spec\": {\n",
|
|
" \"containers\": [\n",
|
|
" {\n",
|
|
" \"name\": \"pytorch\",\n",
|
|
" \"image\": \"public.ecr.aws/pytorch-samples/kfp_samples:latest-gpu\",\n",
|
|
" \"command\": [\"python\", \"bert/agnews_classification_pytorch.py\"],\n",
|
|
" \"args\": [\n",
|
|
" \"--dataset_path\", dataset_path,\n",
|
|
" \"--checkpoint_dir\", checkpoint_dir,\n",
|
|
" \"--script_args\", f\"model_name=bert.pth,num_samples={num_samples}\",\n",
|
|
" \"--tensorboard_root\", tensorboard_root,\n",
|
|
" \"--ptl_args\", f\"max_epochs={max_epochs},profiler=pytorch,devices={gpus},strategy=ddp,accelerator=gpu,num_nodes={num_nodes},confusion_matrix_url={confusion_matrix_url}\"\n",
|
|
" ],\n",
|
|
" \"env\": [\n",
|
|
" {\n",
|
|
" \"name\": \"MINIO_ACCESS_KEY\",\n",
|
|
" \"valueFrom\": {\n",
|
|
" \"secretKeyRef\": {\n",
|
|
" \"name\": \"mlpipeline-minio-artifact\",\n",
|
|
" \"key\": \"accesskey\",\n",
|
|
" }\n",
|
|
" },\n",
|
|
" },\n",
|
|
" {\n",
|
|
" \"name\": \"MINIO_SECRET_KEY\",\n",
|
|
" \"valueFrom\": {\n",
|
|
" \"secretKeyRef\": {\n",
|
|
" \"name\": \"mlpipeline-minio-artifact\",\n",
|
|
" \"key\": \"secretkey\",\n",
|
|
" }\n",
|
|
" },\n",
|
|
" }\n",
|
|
" ],\n",
|
|
" \"ports\": [\n",
|
|
" {\n",
|
|
" \"containerPort\": 24456,\n",
|
|
" \"name\": \"pytorchjob-port\"\n",
|
|
" }\n",
|
|
" ],\n",
|
|
" \"resources\": {\n",
|
|
" \"limits\": {\n",
|
|
" \"nvidia.com/gpu\": 2\n",
|
|
" }\n",
|
|
" },\n",
|
|
" \"volumeMounts\": [\n",
|
|
" {\n",
|
|
" \"mountPath\": volume_mount_path,\n",
|
|
" \"name\": \"model-volume\"\n",
|
|
" }\n",
|
|
" ]\n",
|
|
" }\n",
|
|
" ],\n",
|
|
" \"volumes\": [\n",
|
|
" {\n",
|
|
" \"name\": \"model-volume\",\n",
|
|
" \"persistentVolumeClaim\": {\n",
|
|
" \"claimName\": dist_volume\n",
|
|
" }\n",
|
|
" }\n",
|
|
" ]\n",
|
|
" }\n",
|
|
" }\n",
|
|
" },\n",
|
|
" delete_after_done=False\n",
|
|
" ).after(copy_task)\n",
|
|
" \n",
|
|
" mar_folder_restructure_task = dsl.ContainerOp(\n",
|
|
" name='mar restructure',\n",
|
|
" image='library/bash:4.4.23',\n",
|
|
" command=['sh', '-c'],\n",
|
|
" arguments=[f'mkdir -p {volume_mount_path}/{mar_path}; mkdir -p {volume_mount_path}/{config_prop_path}; cp {checkpoint_dir}/*.mar {volume_mount_path}/{mar_path}; cp {checkpoint_dir}/config.properties {volume_mount_path}/{config_prop_path}']).add_pvolumes({volume_mount_path: dsl.PipelineVolume(pvc=dist_volume)}).after(train_task).set_display_name(\"Restructure MAR and config.properties path\")\n",
|
|
" mar_folder_restructure_task.execution_options.caching_strategy.max_cache_staleness = \"P0D\"\n",
|
|
" copy_tensorboard = cp_op(\"false\", \"\", \"\", tensorboard_root).add_pvolumes({volume_mount_path: dsl.PipelineVolume(pvc=dist_volume)}).after(mar_folder_restructure_task).set_display_name(\"Copy Tensorboard Logs\")\n",
|
|
" copy_tensorboard.execution_options.caching_strategy.max_cache_staleness = \"P0D\"\n",
|
|
"\n",
|
|
" minio_tb_upload = (\n",
|
|
" minio_op(\n",
|
|
" bucket_name=log_bucket,\n",
|
|
" folder_name=log_dir,\n",
|
|
" input_path=copy_tensorboard.outputs[\"destination_path\"],\n",
|
|
" filename=\"\",\n",
|
|
" ).after(copy_tensorboard)\n",
|
|
" .set_display_name(\"Tensorboard Events Pusher\")\n",
|
|
" )\n",
|
|
" \n",
|
|
" # Deploy inferenceservice in gpu\n",
|
|
" gpu_count = \"1\"\n",
|
|
" isvc_gpu_yaml = \"\"\"\n",
|
|
" apiVersion: \"serving.kserve.io/v1beta1\"\n",
|
|
" kind: \"InferenceService\"\n",
|
|
" metadata:\n",
|
|
" name: {}\n",
|
|
" namespace: {}\n",
|
|
" spec:\n",
|
|
" predictor:\n",
|
|
" serviceAccountName: sa\n",
|
|
" pytorch:\n",
|
|
" storageUri: {}\n",
|
|
" protocolVersion: v2\n",
|
|
" resources:\n",
|
|
" requests: \n",
|
|
" cpu: 4\n",
|
|
" memory: 8Gi\n",
|
|
" limits:\n",
|
|
" cpu: 4\n",
|
|
" memory: 8Gi\n",
|
|
" nvidia.com/gpu: {}\n",
|
|
" \"\"\".format(\n",
|
|
" deploy, namespace, model_uri, gpu_count\n",
|
|
" )\n",
|
|
" \n",
|
|
" deploy_task = (\n",
|
|
" deploy_op(action=\"apply\", inferenceservice_yaml=isvc_gpu_yaml)\n",
|
|
" .after(minio_tb_upload)\n",
|
|
" .set_display_name(\"Deployer\")\n",
|
|
" )\n",
|
|
" deploy_task.execution_options.caching_strategy.max_cache_staleness = \"P0D\"\n",
|
|
" \n",
|
|
" dsl.get_pipeline_conf().add_op_transformer(\n",
|
|
" use_k8s_secret(\n",
|
|
" secret_name=\"mlpipeline-minio-artifact\",\n",
|
|
" k8s_secret_key_to_env={\n",
|
|
" \"secretkey\": \"MINIO_SECRET_KEY\",\n",
|
|
" \"accesskey\": \"MINIO_ACCESS_KEY\",\n",
|
|
" },\n",
|
|
" )\n",
|
|
" )"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 101,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Compile pipeline\n",
|
|
"compiler.Compiler().compile(pytorch_bert, 'pytorch.tar.gz', type_check=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 102,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<a href=\"http://istio-ingressgateway.istio-system.svc.cluster.local/pipeline/#/runs/details/12583e76-4814-4ff1-9661-47c0f4cb1b14\" target=\"_blank\" >Run details</a>."
|
|
],
|
|
"text/plain": [
|
|
"<IPython.core.display.HTML object>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"# Execute pipeline\n",
|
|
"run = client.run_pipeline(my_experiment.id, 'pytorch-bert', 'pytorch.tar.gz')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Wait for inference service below to go to `READY True` state."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 103,
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"NAME URL READY PREV LATEST PREVROLLEDOUTREVISION LATESTREADYREVISION AGE\n",
|
|
"bert-dist http://bert-dist.kubeflow-user-example-com.example.com True 100 bert-dist-predictor-default-00001 4m12s\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"!kubectl get isvc $DEPLOY"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Get Inferenceservice name"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 104,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"'bert-dist.kubeflow-user-example-com.example.com'"
|
|
]
|
|
},
|
|
"execution_count": 104,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"INFERENCE_SERVICE_LIST = ! kubectl get isvc {DEPLOY_NAME} -n {NAMESPACE} -o json | python3 -c \"import sys, json; print(json.load(sys.stdin)['status']['url'])\"| tr -d '\"' | cut -d \"/\" -f 3\n",
|
|
"INFERENCE_SERVICE_NAME = INFERENCE_SERVICE_LIST[0]\n",
|
|
"INFERENCE_SERVICE_NAME"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Prediction Request"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 105,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" % Total % Received % Xferd Average Speed Time Time Time Current\n",
|
|
" Dload Upload Total Spent Left Speed\n",
|
|
" 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0* Trying 10.100.176.44:80...\n",
|
|
"* TCP_NODELAY set\n",
|
|
"* Connected to istio-ingressgateway.istio-system.svc.cluster.local (10.100.176.44) port 80 (#0)\n",
|
|
"> POST /v1/models/bert:predict HTTP/1.1\n",
|
|
"> Host: bert-dist.kubeflow-user-example-com.example.com\n",
|
|
"> User-Agent: curl/7.68.0\n",
|
|
"> Accept: */*\n",
|
|
"> Cookie: authservice_session=MTY1MTQyNjA3MnxOd3dBTkVoTVZUSk1URmRaTmxkQ04xQk1WelpSTWpKYU1rMU5UVTFJTlZGWFNVYzNUMHRUV0ZWRVNFRlJNMGxJTTFOUE5FeFJRVUU9fIcMBBMyWExQz5ZZSXeVDwn4jPm3MrRX0hExC_vYeREr\n",
|
|
"> Content-Length: 84\n",
|
|
"> Content-Type: application/x-www-form-urlencoded\n",
|
|
"> \n",
|
|
"} [84 bytes data]\n",
|
|
"* upload completely sent off: 84 out of 84 bytes\n",
|
|
"* Mark bundle as not supporting multiuse\n",
|
|
"< HTTP/1.1 200 OK\n",
|
|
"< content-length: 33\n",
|
|
"< content-type: application/json; charset=UTF-8\n",
|
|
"< date: Mon, 02 May 2022 08:40:23 GMT\n",
|
|
"< server: istio-envoy\n",
|
|
"< x-envoy-upstream-service-time: 176\n",
|
|
"< \n",
|
|
"{ [33 bytes data]\n",
|
|
"100 117 100 33 100 84 162 413 --:--:-- --:--:-- --:--:-- 576\n",
|
|
"* Connection #0 to host istio-ingressgateway.istio-system.svc.cluster.local left intact\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"!curl -v -H \"Host: $INFERENCE_SERVICE_NAME\" -H \"Cookie: $COOKIE\" \"$INGRESS_GATEWAY/v2/models/$MODEL_NAME/infer\" -d @./bert/sample.txt > bert_prediction_output.json"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 106,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"{\"predictions\": [\"\\\"Sci/Tech\\\"\"]}"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"! cat bert_prediction_output.json"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Explanation Request"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 107,
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" % Total % Received % Xferd Average Speed Time Time Time Current\n",
|
|
" Dload Upload Total Spent Left Speed\n",
|
|
" 0 0 0 0 0 0 0 0 --:--:-- 0:00:04 --:--:-- 0* Trying 10.100.176.44:80...\n",
|
|
"* TCP_NODELAY set\n",
|
|
"* Connected to istio-ingressgateway.istio-system.svc.cluster.local (10.100.176.44) port 80 (#0)\n",
|
|
"> POST /v1/models/bert:explain HTTP/1.1\n",
|
|
"> Host: bert-dist.kubeflow-user-example-com.example.com\n",
|
|
"> User-Agent: curl/7.68.0\n",
|
|
"> Accept: */*\n",
|
|
"> Cookie: authservice_session=MTY1MTQyNjA3MnxOd3dBTkVoTVZUSk1URmRaTmxkQ04xQk1WelpSTWpKYU1rMU5UVTFJTlZGWFNVYzNUMHRUV0ZWRVNFRlJNMGxJTTFOUE5FeFJRVUU9fIcMBBMyWExQz5ZZSXeVDwn4jPm3MrRX0hExC_vYeREr\n",
|
|
"> Content-Length: 84\n",
|
|
"> Content-Type: application/x-www-form-urlencoded\n",
|
|
"> \n",
|
|
"} [84 bytes data]\n",
|
|
"* upload completely sent off: 84 out of 84 bytes\n",
|
|
"* Mark bundle as not supporting multiuse\n",
|
|
"< HTTP/1.1 200 OK\n",
|
|
"< content-length: 264\n",
|
|
"< content-type: application/json; charset=UTF-8\n",
|
|
"< date: Mon, 02 May 2022 08:40:44 GMT\n",
|
|
"< server: istio-envoy\n",
|
|
"< x-envoy-upstream-service-time: 284\n",
|
|
"< \n",
|
|
"{ [264 bytes data]\n",
|
|
"100 348 100 264 100 84 49 15 0:00:05 0:00:05 --:--:-- 75\n",
|
|
"* Connection #0 to host istio-ingressgateway.istio-system.svc.cluster.local left intact\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"!curl -v -H \"Host: $INFERENCE_SERVICE_NAME\" -H \"Cookie: $COOKIE\" \"$INGRESS_GATEWAY/v2/models/$MODEL_NAME/explain\" -d @./bert/sample.txt > bert_explaination_output.json"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 108,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"{\"explanations\": [{\"words\": [\"bloomberg\", \"has\", \"reported\", \"on\", \"the\", \"economy\"], \"importances\": [-0.49426081646662806, 0.09581777446473196, -0.09546984597236165, -0.19612933767921537, -0.2438196769639178, 0.7996849104110348], \"delta\": -0.005089809745116192}]}"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"! cat bert_explaination_output.json"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 16,
|
|
"metadata": {
|
|
"scrolled": true,
|
|
"tags": []
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"{'explanations': [{'words': ['[CLS]',\n",
|
|
" 'bloomberg',\n",
|
|
" 'has',\n",
|
|
" 'reported',\n",
|
|
" 'on',\n",
|
|
" 'the',\n",
|
|
" 'economy',\n",
|
|
" '[SEP]'],\n",
|
|
" 'importances': [0.18556156547587432,\n",
|
|
" -0.04754466449824699,\n",
|
|
" -0.09005958599003015,\n",
|
|
" 0.056995451538874545,\n",
|
|
" 0.10996221573727777,\n",
|
|
" 0.148971232294231,\n",
|
|
" 0.398128678194734,\n",
|
|
" -0.8712959534101352],\n",
|
|
" 'delta': 0.008833148050828438}]}"
|
|
]
|
|
},
|
|
"execution_count": 16,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"explanations_json = json.loads(open(\"./bert_explaination_output.json\", \"r\").read())\n",
|
|
"explanations_json"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 17,
|
|
"metadata": {
|
|
"tags": []
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"prediction_json = json.loads(open(\"./bert_prediction_output.json\", \"r\").read())"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 23,
|
|
"metadata": {
|
|
"tags": []
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import torch\n",
|
|
"attributions = explanations_json[\"outputs\"][0][\"data\"][0]['importances']\n",
|
|
"tokens = explanations_json[\"outputs\"][0][\"data\"][0]['words']\n",
|
|
"delta = explanations_json[\"outputs\"][0][\"data\"][0]['delta']\n",
|
|
"\n",
|
|
"attributions = torch.tensor(attributions)\n",
|
|
"pred_prob = 0.75\n",
|
|
"pred_class = str(prediction_json[\"outputs\"][0][\"data\"][0]).strip('\"\"')\n",
|
|
"true_class = \"Business\"\n",
|
|
"attr_class =\"world\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Visualization of Predictions"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 24,
|
|
"metadata": {
|
|
"tags": []
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from captum.attr import visualization\n",
|
|
"vis_data_records =[]\n",
|
|
"vis_data_records.append(visualization.VisualizationDataRecord(\n",
|
|
" attributions,\n",
|
|
" pred_prob,\n",
|
|
" pred_class,\n",
|
|
" true_class,\n",
|
|
" attr_class,\n",
|
|
" attributions.sum(), \n",
|
|
" tokens,\n",
|
|
" delta))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 25,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<table width: 100%><div style=\"border-top: 1px solid; margin-top: 5px; padding-top: 5px; display: inline-block\"><b>Legend: </b><span style=\"display: inline-block; width: 10px; height: 10px; border: 1px solid; background-color: hsl(0, 75%, 60%)\"></span> Negative <span style=\"display: inline-block; width: 10px; height: 10px; border: 1px solid; background-color: hsl(0, 75%, 100%)\"></span> Neutral <span style=\"display: inline-block; width: 10px; height: 10px; border: 1px solid; background-color: hsl(120, 75%, 50%)\"></span> Positive </div><tr><th>True Label</th><th>Predicted Label</th><th>Attribution Label</th><th>Attribution Score</th><th>Word Importance</th><tr><td><text style=\"padding-right:2em\"><b>Business</b></text></td><td><text style=\"padding-right:2em\"><b>\"Sci/Tech\" (0.75)</b></text></td><td><text style=\"padding-right:2em\"><b>world</b></text></td><td><text style=\"padding-right:2em\"><b>-0.11</b></text></td><td><mark style=\"background-color: hsl(120, 75%, 91%); opacity:1.0; line-height:1.75\"><font color=\"black\"> [CLS] </font></mark><mark style=\"background-color: hsl(0, 75%, 99%); opacity:1.0; line-height:1.75\"><font color=\"black\"> bloomberg </font></mark><mark style=\"background-color: hsl(0, 75%, 97%); opacity:1.0; line-height:1.75\"><font color=\"black\"> has </font></mark><mark style=\"background-color: hsl(120, 75%, 98%); opacity:1.0; line-height:1.75\"><font color=\"black\"> reported </font></mark><mark style=\"background-color: hsl(120, 75%, 95%); opacity:1.0; line-height:1.75\"><font color=\"black\"> on </font></mark><mark style=\"background-color: hsl(120, 75%, 93%); opacity:1.0; line-height:1.75\"><font color=\"black\"> the </font></mark><mark style=\"background-color: hsl(120, 75%, 81%); opacity:1.0; line-height:1.75\"><font color=\"black\"> economy </font></mark><mark style=\"background-color: hsl(0, 75%, 66%); opacity:1.0; line-height:1.75\"><font color=\"black\"> [SEP] </font></mark></td><tr></table>"
|
|
],
|
|
"text/plain": [
|
|
"<IPython.core.display.HTML object>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"vis = visualization.visualize_text(vis_data_records)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### visualization appreas as below\n",
|
|
""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Cleanup Script"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 85,
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"inferenceservice.serving.kserve.io \"bert-dist\" deleted\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"! kubectl delete --all isvc -n $NAMESPACE"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 84,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"pod \"create-dist-pipeline-444nk-3959473792\" deleted\n",
|
|
"pod \"training-pipeline-trb5h-1876153621\" deleted\n",
|
|
"pod \"training-pipeline-trb5h-284914308\" deleted\n",
|
|
"pod \"training-pipeline-trb5h-3177383612\" deleted\n",
|
|
"pod \"training-pipeline-trb5h-3252145113\" deleted\n",
|
|
"pod \"training-pipeline-trb5h-3265872190\" deleted\n",
|
|
"pod \"training-pipeline-trb5h-3331631297\" deleted\n",
|
|
"pod \"training-pipeline-trb5h-3651310105\" deleted\n",
|
|
"pod \"training-pipeline-trb5h-3914481085\" deleted\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"! kubectl delete pod --field-selector=status.phase==Succeeded -n $NAMESPACE"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.10"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 4
|
|
}
|