Merge branch 'kubeflow:master' into master

This commit is contained in:
Vedant Padwal 2022-03-06 02:42:27 +05:30 committed by GitHub
commit 3cb59b1888
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
33 changed files with 638 additions and 94 deletions

View File

@ -139,6 +139,6 @@ After starting, confirm whether the Pod is running. After running, you can execu
# Relevant part
* [About Version](https://github.com/WEICHINLIN/Kubeflow---Natural-Language-Processing/blob/main/README.md)
* [About Docker](https://github.com/WEICHINLIN/Kubeflow---Natural-Language-Processing/blob/main/2.%20Docker/Docker.md)
* [About Jupyter Notebook](https://github.com/WEICHINLIN/Kubeflow---Natural-Language-Processing/blob/main/3.%20Jupyter%20Notebook/Jupyter%20Notebook.md)
* [About Version](https://github.com/dfm871002/examples/blob/master/Natural-Language-Processing/README.md)
* [About Docker](https://github.com/dfm871002/examples/blob/master/Natural-Language-Processing/2.%20Docker/Docker.md)
* [About Jupyter Notebook](https://github.com/dfm871002/examples/blob/master/Natural-Language-Processing/3.%20Jupyter%20Notebook/Jupyter%20Notebook.md)

View File

@ -3,13 +3,15 @@
> I have provided a public docker image. < m10913018/nltk_env:2.3.0 > \
> There are only python packages and html files inside, no code related to natural language processing.
<img src="https://github.com/WEICHINLIN/Kubeflow---Natural-Language-Processing/blob/main/4.%20Image/NLP.png" alt="NLP"/><br/>
> Note: The docker image has been updated recently, is to add the SVM prediction result in html file, please use the image <dfm871002/nltk_env:2.4.2 > instead.
<img src="https://github.com/dfm871002/examples/blob/master/Natural-Language-Processing/4.%20Image/NLP.png" alt="NLP"/><br/>
> The font may be displayed differently due to environmental issues, but it does not affect the function. \
> You can customize the web page and remake the docker image.
# Relevant part
* [About Version](https://github.com/WEICHINLIN/Kubeflow---Natural-Language-Processing/blob/main/README.md)
* [About Install](https://github.com/WEICHINLIN/Kubeflow---Natural-Language-Processing/blob/main/1.%20Install/Install.md)
* [About Jupyter Notebook](https://github.com/WEICHINLIN/Kubeflow---Natural-Language-Processing/blob/main/3.%20Jupyter%20Notebook/Jupyter%20Notebook.md)
* [About Version](https://github.com/dfm871002/examples/blob/master/Natural-Language-Processing/README.md)
* [About Install](https://github.com/dfm871002/examples/blob/master/Natural-Language-Processing/1.%20Install/Install.md)
* [About Jupyter Notebook](https://github.com/dfm871002/examples/blob/master/Natural-Language-Processing/3.%20Jupyter%20Notebook/Jupyter%20Notebook.md)

View File

@ -1,4 +1,3 @@
<!DOCTYPE html>
<html>
<head>
@ -26,6 +25,7 @@
<td class="predict-title">Numpy</td>
<td class="predict-title">SKlearn</td>
<td class="predict-title">Pytorch</td>
<td class="predict-title">SVM</td>
</tr>
<tr>
<td>
@ -49,6 +49,13 @@
<div style="font-size: 8vh; color: blue;">-</div>
{% endif %}
</td>
<td>
{% if my_prediction_svm == 1%}
<div style="font-size: 8vh; color: red;">+</div>
{% elif my_prediction_svm == 0%}
<div style="font-size: 8vh; color: blue;">-</div>
{% endif %}
</td>
</tr>
</table>
</div>
@ -142,7 +149,7 @@
}
.predict-title {
width: 33.33%;
width: 25%;
height: 20%;
color: green;
}

View File

@ -369,7 +369,42 @@
"metadata": {},
"outputs": [],
"source": [
"def accuracy(sklearn_score:float,logistic_score:float,torch_score:float) -> NamedTuple('Outputs', [('mlpipeline_metrics', 'Metrics')]):\n",
"def svm_process(log_folder:str, numpy_folder:str) -> NamedTuple('Outputs', [('svmdir',str), ('svmscore',float)]):\n",
" import joblib\n",
" import os\n",
" import numpy as np\n",
" from sklearn.metrics import accuracy_score\n",
" from sklearn.svm import SVC\n",
" from sklearn.preprocessing import StandardScaler\n",
"\n",
" train_X = joblib.load(open(numpy_folder + '/train_X.pkl','rb'))\n",
" test_X = joblib.load(open(numpy_folder + '/test_X.pkl','rb'))\n",
" train_Y = joblib.load(open(log_folder + '/train_Y.pkl','rb'))\n",
" test_Y = joblib.load(open(log_folder + '/test_Y.pkl','rb'))\n",
" \n",
" scaler = StandardScaler()\n",
" train_X_s = scaler.fit(train_X).transform(train_X)\n",
" \n",
" clf = SVC(kernel='linear')\n",
" t = clf.fit(train_X_s, np.array(train_Y).reshape(-1,1))\n",
" y_pred = clf.predict(test_X)\n",
" svm_score = accuracy_score(test_Y , y_pred)\n",
" \n",
" if not os.path.isdir(numpy_folder + '/svm'):\n",
" os.makedirs(numpy_folder + '/svm')\n",
" svm_folder = numpy_folder + '/svm'\n",
" joblib.dump(t, svm_folder + '/svm.pkl')\n",
"\n",
" return ([svm_folder, svm_score])"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"def accuracy(sklearn_score:float,logistic_score:float,torch_score:float,svm_score:float) -> NamedTuple('Outputs', [('mlpipeline_metrics', 'Metrics')]):\n",
" import json\n",
"\n",
" metrics = {\n",
@ -389,6 +424,11 @@
" 'numberValue': torch_score,\n",
" 'format': \"PERCENTAGE\",\n",
" },\n",
" {\n",
" 'name': 'svm_score',\n",
" 'numberValue': svm_score,\n",
" 'format': \"PERCENTAGE\",\n",
" },\n",
" ]\n",
" }\n",
" return [json.dumps(metrics)]"
@ -396,11 +436,11 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"def http_port(log_folder:str, sklearn_folder:str, logistic_folder:str, torch_folder:str):\n",
"def http_port(log_folder:str, sklearn_folder:str, logistic_folder:str, torch_folder:str, svm_folder:str):\n",
" \n",
" import re\n",
" import string\n",
@ -506,6 +546,7 @@
" theta_ns = joblib.load(open(logistic_folder + '/logistic.pkl','rb'))\n",
" clf = joblib.load(open(sklearn_folder + '/sklearn.pkl','rb'))\n",
" theta_toc = joblib.load(open(torch_folder + '/torch.pkl','rb'))\n",
" svm = joblib.load(open(svm_folder + '/svm.pkl','rb'))\n",
"\n",
" if request.method == 'POST':\n",
" message = request.form['message']\n",
@ -531,12 +572,15 @@
" my_prediction_toc = torch.where(\n",
" predicted_probs_toc > 0.5, torch.tensor(1), torch.tensor(0))\n",
" \n",
" my_prediction_svm = svm.predict(vect)\n",
" \n",
" return render_template('home.html',\n",
" message = message,\n",
" data = data_o,\n",
" my_prediction_np = my_prediction_np,\n",
" my_prediction_skl = my_prediction_skl,\n",
" my_prediction_toc = my_prediction_toc)\n",
" my_prediction_toc = my_prediction_toc,\n",
" my_prediction_svm = my_prediction_svm)\n",
"\n",
" if __name__ == '__main__':\n",
" \n",
@ -545,7 +589,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
@ -559,7 +603,7 @@
" log_folder = '/information'\n",
" pvc_name = \"cornell-1000\"\n",
"\n",
" image = \"m10913018/nltk_env:2.3.0\"\n",
" image = \"dfm871002/nltk_env:2.4.2\"\n",
" \n",
" vop = dsl.VolumeOp(\n",
" name=pvc_name,\n",
@ -593,6 +637,11 @@
" base_image = image,\n",
" )\n",
" \n",
" svm_op = func_to_container_op(\n",
" func = svm_process,\n",
" base_image = image,\n",
" )\n",
" \n",
" accuracy_op = func_to_container_op(\n",
" func = accuracy,\n",
" base_image = image,\n",
@ -607,6 +656,8 @@
" \n",
" numpy_task = numpy_op(dowload_task.outputs['logdir']).add_pvolumes({ log_folder:vop.volume, })\n",
" \n",
" svm_task = svm_op(numpy_task.outputs['logdir'], numpy_task.outputs['numpydir']).add_pvolumes({ log_folder:vop.volume, })\n",
" \n",
" sklearn_task = sklearn_op(\n",
" numpy_task.outputs['logdir'],\n",
" numpy_task.outputs['numpydir']\n",
@ -624,20 +675,22 @@
" accuracy_task = accuracy_op(\n",
" sklearn_task.outputs['sklearnscore'],\n",
" logistic_task.outputs['logisticscore'],\n",
" torch_task.outputs['torchscore']\n",
" torch_task.outputs['torchscore'],\n",
" svm_task.outputs['svmscore']\n",
" )\n",
" \n",
" http_task = http_op(\n",
" sklearn_task.outputs['logdir'],\n",
" sklearn_task.outputs['sklearndir'],\n",
" logistic_task.outputs['logisticdir'],\n",
" torch_task.outputs['torchdir']\n",
" torch_task.outputs['torchdir'],\n",
" svm_task.outputs['svmdir']\n",
" ).add_pvolumes({ log_folder:vop.volume, })"
]
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [

View File

@ -9,24 +9,24 @@
> Cornell-1000.zip and twitter-5000.zip are compressed files generated after executing Cornell-1000-nltk.ipynb and Twitter-5000-nltk.ipynb. \
> The content of the compressed file is the yaml file of the pipeline.
<img src="https://github.com/WEICHINLIN/Kubeflow---Natural-Language-Processing/blob/main/4.%20Image/pipeline.png" alt="pipeline"/><br/>
<img src="https://github.com/dfm871002/examples/blob/master/Natural-Language-Processing/4.%20Image/pipeline.png" alt="pipeline"/><br/>
## Custom data
> Twitter-5000-nltk and Cornell-1000-nltk use similar code, and the difference is in downloading and reading data. \
> If you want to use other data, you only need to classify the data and save it in str format into pos_tweets and neg_tweets.
<img src="https://github.com/WEICHINLIN/Kubeflow---Natural-Language-Processing/blob/main/4.%20Image/data%20list.png" alt="data list"/><br/>
<img src="https://github.com/dfm871002/examples/blob/master/Natural-Language-Processing/4.%20Image/data%20list.png" alt="data list"/><br/>
# Port Forward
### Step 1Find the pod name of Http port
<img src="https://github.com/WEICHINLIN/Kubeflow---Natural-Language-Processing/blob/main/4.%20Image/nltk.jpg" alt="nltk pod"/><br/>
<img src="https://github.com/dfm871002/examples/blob/master/Natural-Language-Processing/4.%20Image/nltk.jpg" alt="nltk pod"/><br/>
### Step 2Port-forward
```Bash
kubectl port-forward -n kubeflow-user-example-com <pod name> 3000:5000
```
<img src="https://github.com/WEICHINLIN/Kubeflow---Natural-Language-Processing/blob/main/4.%20Image/port%20forward.png" alt="nltk pod port forward"/><br/>
<img src="https://github.com/dfm871002/examples/blob/master/Natural-Language-Processing/4.%20Image/port%20forward.png" alt="nltk pod port forward"/><br/>
### Step 3Input in the browser
```Bash
@ -36,18 +36,19 @@ or
```Bash
127.0.0.1:3000
```
<img src="https://github.com/WEICHINLIN/Kubeflow---Natural-Language-Processing/blob/main/4.%20Image/NLP.png" alt="NLP"/><br/>
<img src="https://github.com/dfm871002/examples/blob/master/Natural-Language-Processing/4.%20Image/NLP.png" alt="NLP"/><br/>
### Step 4Predict
<img src="https://github.com/WEICHINLIN/Kubeflow---Natural-Language-Processing/blob/main/4.%20Image/nice%20to%20meet%20you.png" alt="nice to meet you"/><br/>
<img src="https://github.com/WEICHINLIN/Kubeflow---Natural-Language-Processing/blob/main/4.%20Image/NLP%20N.png" alt="i hate you"/><br/>
<img src="https://github.com/dfm871002/examples/blob/master/Natural-Language-Processing/4.%20Image/nice%20to%20meet%20you.png" alt="nice to meet you"/><br/>
<img src="https://github.com/dfm871002/examples/blob/master/Natural-Language-Processing/4.%20Image/NLP%20N.png" alt="i hate you"/><br/>
# Accuracy
You can confirm the accuracy of the NLP individually, \
<img src="https://github.com/WEICHINLIN/Kubeflow---Natural-Language-Processing/blob/main/4.%20Image/twitter-5000%20accuracy.png" alt="twitter"/>
<img src="https://github.com/WEICHINLIN/Kubeflow---Natural-Language-Processing/blob/main/4.%20Image/cornell-1000%20accuracy.png" alt="cornell"/><br/>
<img src="https://github.com/dfm871002/examples/blob/master/Natural-Language-Processing/4.%20Image/twitter-5000%20accuracy.png" alt="twitter"/>
<img src="https://github.com/dfm871002/examples/blob/master/Natural-Language-Processing/4.%20Image/cornell-1000%20accuracy.png" alt="cornell"/><br/>
or you can use a comparison run for comparison. \
<img src="https://github.com/WEICHINLIN/Kubeflow---Natural-Language-Processing/blob/main/4.%20Image/compare%20runs.png" alt="compare"/><br/>
<br>
<img src="https://github.com/dfm871002/examples/blob/master/Natural-Language-Processing/4.%20Image/compare%20runs.png" alt="compare"/><br/>
# Disabling caching in your Kubeflow Pipelines deployment
> If you delete the pvc and execute the pipeline again, you find that it does not work properly, it may be a cache problem. \
@ -60,7 +61,7 @@ kubectl patch mutatingwebhookconfiguration cache-webhook-${NAMESPACE} --type='js
# Relevant part
* [About Version](https://github.com/WEICHINLIN/Kubeflow---Natural-Language-Processing/blob/main/README.md)
* [About Install](https://github.com/WEICHINLIN/Kubeflow---Natural-Language-Processing/blob/main/1.%20Install/Install.md)
* [About Docker](https://github.com/WEICHINLIN/Kubeflow---Natural-Language-Processing/blob/main/2.%20Docker/Docker.md)
* [About Version](https://github.com/dfm871002/examples/blob/master/Natural-Language-Processing/README.md)
* [About Install](https://github.com/dfm871002/examples/blob/master/Natural-Language-Processing/1.%20Install/Install.md)
* [About Docker](https://github.com/dfm871002/examples/blob/master/Natural-Language-Processing/2.%20Docker/Docker.md)

View File

@ -359,7 +359,42 @@
"metadata": {},
"outputs": [],
"source": [
"def accuracy(sklearn_score:float,logistic_score:float,torch_score:float) -> NamedTuple('Outputs', [('mlpipeline_metrics', 'Metrics')]):\n",
"def svm_process(log_folder:str, numpy_folder:str) -> NamedTuple('Outputs', [('svmdir',str), ('svmscore',float)]):\n",
" import joblib\n",
" import os\n",
" import numpy as np\n",
" from sklearn.metrics import accuracy_score\n",
" from sklearn.svm import SVC\n",
" from sklearn.preprocessing import StandardScaler\n",
"\n",
" train_X = joblib.load(open(numpy_folder + '/train_X.pkl','rb'))\n",
" test_X = joblib.load(open(numpy_folder + '/test_X.pkl','rb'))\n",
" train_Y = joblib.load(open(log_folder + '/train_Y.pkl','rb'))\n",
" test_Y = joblib.load(open(log_folder + '/test_Y.pkl','rb'))\n",
" \n",
" scaler = StandardScaler()\n",
" train_X_s = scaler.fit(train_X).transform(train_X)\n",
" \n",
" clf = SVC(kernel='linear')\n",
" t = clf.fit(train_X_s, np.array(train_Y).reshape(-1,1))\n",
" y_pred = clf.predict(test_X)\n",
" svm_score = accuracy_score(test_Y , y_pred)\n",
" \n",
" if not os.path.isdir(numpy_folder + '/svm'):\n",
" os.makedirs(numpy_folder + '/svm')\n",
" svm_folder = numpy_folder + '/svm'\n",
" joblib.dump(t, svm_folder + '/svm.pkl')\n",
"\n",
" return ([svm_folder, svm_score])"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"def accuracy(sklearn_score:float,logistic_score:float,torch_score:float,svm_score:float) -> NamedTuple('Outputs', [('mlpipeline_metrics', 'Metrics')]):\n",
" import json\n",
"\n",
" metrics = {\n",
@ -379,6 +414,11 @@
" 'numberValue': torch_score,\n",
" 'format': \"PERCENTAGE\",\n",
" },\n",
" {\n",
" 'name': 'svm_score',\n",
" 'numberValue': svm_score,\n",
" 'format': \"PERCENTAGE\",\n",
" },\n",
" ]\n",
" }\n",
" return [json.dumps(metrics)]"
@ -386,11 +426,11 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"def http_port(log_folder:str, sklearn_folder:str, logistic_folder:str, torch_folder:str):\n",
"def http_port(log_folder:str, sklearn_folder:str, logistic_folder:str, torch_folder:str, svm_folder:str):\n",
" \n",
" import re\n",
" import string\n",
@ -496,6 +536,7 @@
" theta_ns = joblib.load(open(logistic_folder + '/logistic.pkl','rb'))\n",
" clf = joblib.load(open(sklearn_folder + '/sklearn.pkl','rb'))\n",
" theta_toc = joblib.load(open(torch_folder + '/torch.pkl','rb'))\n",
" svm = joblib.load(open(svm_folder + '/svm.pkl','rb'))\n",
"\n",
" if request.method == 'POST':\n",
" message = request.form['message']\n",
@ -521,12 +562,15 @@
" my_prediction_toc = torch.where(\n",
" predicted_probs_toc > 0.5, torch.tensor(1), torch.tensor(0))\n",
" \n",
" my_prediction_svm = svm.predict(vect)\n",
" \n",
" return render_template('home.html',\n",
" message = message,\n",
" data = data_o,\n",
" my_prediction_np = my_prediction_np,\n",
" my_prediction_skl = my_prediction_skl,\n",
" my_prediction_toc = my_prediction_toc)\n",
" my_prediction_toc = my_prediction_toc,\n",
" my_prediction_svm = my_prediction_svm)\n",
"\n",
" if __name__ == '__main__':\n",
" \n",
@ -535,7 +579,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
@ -549,7 +593,7 @@
" log_folder = '/information'\n",
" pvc_name = \"twitter-5000\"\n",
"\n",
" image = \"m10913018/nltk_env:2.3.0\"\n",
" image = \"dfm871002/nltk_env:2.4.2\"\n",
" \n",
" vop = dsl.VolumeOp(\n",
" name=pvc_name,\n",
@ -583,6 +627,11 @@
" base_image = image,\n",
" )\n",
" \n",
" svm_op = func_to_container_op(\n",
" func = svm_process,\n",
" base_image = image,\n",
" )\n",
" \n",
" accuracy_op = func_to_container_op(\n",
" func = accuracy,\n",
" base_image = image,\n",
@ -597,6 +646,8 @@
" \n",
" numpy_task = numpy_op(dowload_task.outputs['logdir']).add_pvolumes({ log_folder:vop.volume, })\n",
" \n",
" svm_task = svm_op(numpy_task.outputs['logdir'], numpy_task.outputs['numpydir']).add_pvolumes({ log_folder:vop.volume, })\n",
" \n",
" sklearn_task = sklearn_op(\n",
" numpy_task.outputs['logdir'],\n",
" numpy_task.outputs['numpydir']\n",
@ -614,20 +665,22 @@
" accuracy_task = accuracy_op(\n",
" sklearn_task.outputs['sklearnscore'],\n",
" logistic_task.outputs['logisticscore'],\n",
" torch_task.outputs['torchscore']\n",
" torch_task.outputs['torchscore'],\n",
" svm_task.outputs['svmscore']\n",
" )\n",
" \n",
" http_task = http_op(\n",
" sklearn_task.outputs['logdir'],\n",
" sklearn_task.outputs['sklearndir'],\n",
" logistic_task.outputs['logisticdir'],\n",
" torch_task.outputs['torchdir']\n",
" torch_task.outputs['torchdir'],\n",
" svm_task.outputs['svmdir']\n",
" ).add_pvolumes({ log_folder:vop.volume, })"
]
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [

Binary file not shown.

Before

Width:  |  Height:  |  Size: 60 KiB

After

Width:  |  Height:  |  Size: 81 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 60 KiB

After

Width:  |  Height:  |  Size: 85 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 67 KiB

After

Width:  |  Height:  |  Size: 66 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 21 KiB

After

Width:  |  Height:  |  Size: 38 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 63 KiB

After

Width:  |  Height:  |  Size: 85 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 116 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 26 KiB

After

Width:  |  Height:  |  Size: 33 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 40 KiB

After

Width:  |  Height:  |  Size: 33 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 22 KiB

After

Width:  |  Height:  |  Size: 30 KiB

View File

@ -33,6 +33,6 @@ kustomize version
# Relevant part
* [About Install](https://github.com/WEICHINLIN/Kubeflow---Natural-Language-Processing/blob/main/1.%20Install/Install.md)
* [About Docker](https://github.com/WEICHINLIN/Kubeflow---Natural-Language-Processing/blob/main/2.%20Docker/Docker.md)
* [About Jupyter Notebook](https://github.com/WEICHINLIN/Kubeflow---Natural-Language-Processing/blob/main/3.%20Jupyter%20Notebook/Jupyter%20Notebook.md)
* [About Install](https://github.com/dfm871002/examples/blob/master/Natural-Language-Processing/1.%20Install/Install.md)
* [About Docker](https://github.com/dfm871002/examples/blob/master/Natural-Language-Processing/2.%20Docker/Docker.md)
* [About Jupyter Notebook](https://github.com/dfm871002/examples/blob/master/Natural-Language-Processing/3.%20Jupyter%20Notebook/Jupyter%20Notebook.md)

2
OWNERS
View File

@ -1,3 +1,5 @@
approvers:
- aronchick
- jinchihe
- js-ts
- connor-mccarthy

View File

@ -1,3 +1,11 @@
## Notice
Blog post: [HELP WANTED: Repackaging Kaggle Getting Started into Kubeflow Examples](https://www.arrikto.com/blog/help-wanted-kaggle-competitors-to-contribute-to-the-open-source-kubeflow-machine-learning-project/)
higlights:
- We'd like to help bolster the kubeflow/examples repo
- Help people get involved in open source/kubeflow project/community
- Give people an opportunity to make a little side hustle income
# kubeflow-examples
A repository to share extended Kubeflow examples and tutorials to demonstrate machine learning

View File

@ -3994,22 +3994,9 @@
"integrity": "sha1-2uRqnXj74lKSJYzB54CkHZXAN4I="
},
"follow-redirects": {
"version": "1.5.1",
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.5.1.tgz",
"integrity": "sha512-v9GI1hpaqq1ZZR6pBD1+kI7O24PhDvNGNodjS3MdcEqyrahCp8zbtpv+2B/krUnSmUH80lbAS7MrdeK5IylgKg==",
"requires": {
"debug": "^3.1.0"
},
"dependencies": {
"debug": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/debug/-/debug-3.1.0.tgz",
"integrity": "sha512-OX8XqP7/1a9cqkxYw2yXss15f26NKWBpDXQd0/uK/KPqdQhxbPa994hnzjcE2VqQpDslf55723cKPUOGSmMY3g==",
"requires": {
"ms": "2.0.0"
}
}
}
"version": "1.14.8",
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.14.8.tgz",
"integrity": "sha512-1x0S9UVJHsQprFcEC/qnNzBLcIxsjAV905f/UkQxbclCsoTWlacCNOpQa/anodLl2uaEKFhfWOvM2Qg77+15zA=="
},
"for-in": {
"version": "1.0.2",
@ -4104,7 +4091,8 @@
},
"ansi-regex": {
"version": "2.1.1",
"bundled": true
"bundled": true,
"optional": true
},
"aproba": {
"version": "1.2.0",
@ -4122,11 +4110,13 @@
},
"balanced-match": {
"version": "1.0.0",
"bundled": true
"bundled": true,
"optional": true
},
"brace-expansion": {
"version": "1.1.11",
"bundled": true,
"optional": true,
"requires": {
"balanced-match": "^1.0.0",
"concat-map": "0.0.1"
@ -4139,15 +4129,18 @@
},
"code-point-at": {
"version": "1.1.0",
"bundled": true
"bundled": true,
"optional": true
},
"concat-map": {
"version": "0.0.1",
"bundled": true
"bundled": true,
"optional": true
},
"console-control-strings": {
"version": "1.1.0",
"bundled": true
"bundled": true,
"optional": true
},
"core-util-is": {
"version": "1.0.2",
@ -4250,7 +4243,8 @@
},
"inherits": {
"version": "2.0.3",
"bundled": true
"bundled": true,
"optional": true
},
"ini": {
"version": "1.3.5",
@ -4260,6 +4254,7 @@
"is-fullwidth-code-point": {
"version": "1.0.0",
"bundled": true,
"optional": true,
"requires": {
"number-is-nan": "^1.0.0"
}
@ -4272,17 +4267,20 @@
"minimatch": {
"version": "3.0.4",
"bundled": true,
"optional": true,
"requires": {
"brace-expansion": "^1.1.7"
}
},
"minimist": {
"version": "0.0.8",
"bundled": true
"bundled": true,
"optional": true
},
"minipass": {
"version": "2.2.4",
"bundled": true,
"optional": true,
"requires": {
"safe-buffer": "^5.1.1",
"yallist": "^3.0.0"
@ -4299,6 +4297,7 @@
"mkdirp": {
"version": "0.5.1",
"bundled": true,
"optional": true,
"requires": {
"minimist": "0.0.8"
}
@ -4371,7 +4370,8 @@
},
"number-is-nan": {
"version": "1.0.1",
"bundled": true
"bundled": true,
"optional": true
},
"object-assign": {
"version": "4.1.1",
@ -4381,6 +4381,7 @@
"once": {
"version": "1.4.0",
"bundled": true,
"optional": true,
"requires": {
"wrappy": "1"
}
@ -4456,7 +4457,8 @@
},
"safe-buffer": {
"version": "5.1.1",
"bundled": true
"bundled": true,
"optional": true
},
"safer-buffer": {
"version": "2.1.2",
@ -4486,6 +4488,7 @@
"string-width": {
"version": "1.0.2",
"bundled": true,
"optional": true,
"requires": {
"code-point-at": "^1.0.0",
"is-fullwidth-code-point": "^1.0.0",
@ -4503,6 +4506,7 @@
"strip-ansi": {
"version": "3.0.1",
"bundled": true,
"optional": true,
"requires": {
"ansi-regex": "^2.0.0"
}
@ -4541,11 +4545,13 @@
},
"wrappy": {
"version": "1.0.2",
"bundled": true
"bundled": true,
"optional": true
},
"yallist": {
"version": "3.0.2",
"bundled": true
"bundled": true,
"optional": true
}
}
},
@ -7700,9 +7706,9 @@
"integrity": "sha1-QRyttXTFoUDTpLGRDUDYDMn0C0A="
},
"path-parse": {
"version": "1.0.5",
"resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.5.tgz",
"integrity": "sha1-PBrfhx6pzWyUMbbqK9dKD/BVxME="
"version": "1.0.7",
"resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.7.tgz",
"integrity": "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw=="
},
"path-to-regexp": {
"version": "1.7.0",
@ -10759,9 +10765,9 @@
}
},
"tmpl": {
"version": "1.0.4",
"resolved": "https://registry.npmjs.org/tmpl/-/tmpl-1.0.4.tgz",
"integrity": "sha1-I2QN17QtAEM5ERQIIOXPRA5SHdE="
"version": "1.0.5",
"resolved": "https://registry.npmjs.org/tmpl/-/tmpl-1.0.5.tgz",
"integrity": "sha512-3f0uOEAQwIqGuWW2MVzYg8fV/QNnc/IpuJNG837rLuczAaLVHslWHZQj4IGiEl5Hs3kkbhwL9Ab7Hrsmuj+Smw=="
},
"to-arraybuffer": {
"version": "1.0.1",
@ -11100,9 +11106,9 @@
}
},
"urijs": {
"version": "1.19.1",
"resolved": "https://registry.npmjs.org/urijs/-/urijs-1.19.1.tgz",
"integrity": "sha512-xVrGVi94ueCJNrBSTjWqjvtgvl3cyOTThp2zaMaFNGp3F542TR6sM3f2o8RqZl+AwteClSVmoCyt0ka4RjQOQg=="
"version": "1.19.7",
"resolved": "https://registry.npmjs.org/urijs/-/urijs-1.19.7.tgz",
"integrity": "sha512-Id+IKjdU0Hx+7Zx717jwLPsPeUqz7rAtuVBRLLs+qn+J2nf9NGITWVCxcijgYxBqe83C7sqsQPs6H1pyz3x9gA=="
},
"urix": {
"version": "0.1.0",

View File

@ -11,3 +11,7 @@ that's suitable for presentation to public audiences.
* [Simple pipeline](simple_pipeline/): highlights the use of pipelines and
hyperparameter tuning on a GKE cluster with node autoprovisioning.
* [Recurring Run](recurring_run/): A simple demo that illustrates how to use
the Kubeflow Pipelines SDK to provision [recurring
runs](https://www.kubeflow.org/docs/components/pipelines/concepts/run/).

27
demos/recurring/README.md Normal file
View File

@ -0,0 +1,27 @@
# Kubeflow demo - Recurring runs with the KFP SDK
## 1. Setup your environment
This demo assumes that you have a functioning Kubeflow Pipelines deployment. If
not, follow the instructions
[here](https://www.kubeflow.org/docs/components/pipelines/installation/) and
[here](https://www.kubeflow.org/docs/components/pipelines/sdk/install-sdk/).
This demo has been verified to work with:
- KFP version `1.7.1`
- KFP SDK version `1.8.11`
Activate the conda environment you created following the above steps.
Create a Jupyter kernel for your conda environment.
```bash
ipython kernel install --name "kfp" --user
```
## 2. Run the KFP SDK script
Step through the provided [notebook](recurring.ipynb) to create a recurring run
using the KFP SDK. Make sure to select the `kfp` kernel that you created
earlier.

View File

@ -0,0 +1,19 @@
name: Download
inputs:
- {name: Url, type: URI}
metadata:
annotations:
author: Alexander Perlman <mythicalsunlight@gmail.com>
implementation:
container:
image: alpine/curl
command:
- sh
- -exc
- |
url="$0"
path='/tmp/script'
curl "$url" -o "$path"
chmod 700 "$path"
/bin/sh "$path"
- inputValue: Url

View File

@ -0,0 +1,56 @@
apiVersion: argoproj.io/v1alpha1
kind: Workflow
metadata:
generateName: pipeline-
annotations: {pipelines.kubeflow.org/kfp_sdk_version: 1.8.11, pipelines.kubeflow.org/pipeline_compilation_time: '2022-01-31T21:51:10.599476',
pipelines.kubeflow.org/pipeline_spec: '{"inputs": [{"name": "url"}], "name": "Pipeline"}'}
labels: {pipelines.kubeflow.org/kfp_sdk_version: 1.8.11}
spec:
entrypoint: pipeline
templates:
- name: download
container:
args: []
command:
- sh
- -exc
- |
url="$0"
path='/tmp/script'
curl "$url" -o "$path"
chmod 700 "$path"
/bin/sh "$path"
- '{{inputs.parameters.url}}'
image: alpine/curl
inputs:
parameters:
- {name: url}
metadata:
annotations: {author: Alexander Perlman <mythicalsunlight@gmail.com>, pipelines.kubeflow.org/component_spec: '{"implementation":
{"container": {"command": ["sh", "-exc", "url=\"$0\"\npath=''/tmp/script''\ncurl
\"$url\" -o \"$path\"\nchmod 700 \"$path\"\n/bin/sh \"$path\"\n", {"inputValue":
"Url"}], "image": "alpine/curl"}}, "inputs": [{"name": "Url", "type": "URI"}],
"metadata": {"annotations": {"author": "Alexander Perlman <mythicalsunlight@gmail.com>"}},
"name": "Download"}', pipelines.kubeflow.org/component_ref: '{"digest":
"1bb47e384d056817b16202398d1e5fc8ce02daf1e40f69e3103218402c05437b", "url":
"https://raw.githubusercontent.com/droctothorpe/examples/master/demos/recurring/component.yaml"}',
pipelines.kubeflow.org/arguments.parameters: '{"Url": "{{inputs.parameters.url}}"}'}
labels:
pipelines.kubeflow.org/kfp_sdk_version: 1.8.11
pipelines.kubeflow.org/pipeline-sdk-type: kfp
pipelines.kubeflow.org/enable_caching: "true"
- name: pipeline
inputs:
parameters:
- {name: url}
dag:
tasks:
- name: download
template: download
arguments:
parameters:
- {name: url, value: '{{inputs.parameters.url}}'}
arguments:
parameters:
- {name: url}
serviceAccountName: pipeline-runner

View File

@ -0,0 +1,302 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Recurring runs with the KFP SDK"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"If you're running on a local cluster, expose the GUI and API, respectively, with\n",
"the following commands:\n",
"\n",
"```\n",
"kubectl port-forward -n kubeflow svc/ml-pipeline-ui 8080:80\n",
"kubectl port-forward -n kubeflow svc/ml-pipeline-ui 3000:80\n",
"```\n",
"\n",
"The rest of this demo assumes that you're running locally.\n",
"\n",
"Instantiate the KFP SDK client. Set the host variable to the url and port where\n",
"you expose the KFP API. "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import kfp\n",
"\n",
"host = 'http://localhost:3000'\n",
"client = kfp.Client(host=host)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Create a pipeline component from the provided component file. This component\n",
"retrieves and executes a script from a provided URL."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run_script = kfp.components.load_component_from_url(\n",
" 'https://raw.githubusercontent.com/kubeflow/examples/master/demos/recurring/component.yaml'\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Create a pipeline function."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def pipeline(url):\n",
" run_script_task = run_script(url=url)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Compile the pipeline function. We will pass the resulting yaml to the pipeline\n",
"execution invocations."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"kfp.compiler.Compiler().compile(\n",
" pipeline_func=pipeline,\n",
" package_path='download.yaml',\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Create a parameters dictionary with the url key. "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"parameters = {\n",
" 'url': 'https://raw.githubusercontent.com/kubeflow/examples/master/demos/recurring/success.sh'\n",
"}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We can _optionally_ validate the pipeline with a single run before creating a recurring run."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"result = client.create_run_from_pipeline_func(\n",
" pipeline_func=pipeline,\n",
" arguments=parameters,\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We can retrieve the result of the pipeline run through the Kubeflow GUI, which\n",
"is the recommended approach. That being said, we can also interrogate the result\n",
"programmatically."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"result.wait_for_run_completion()\n",
"print(result.run_info)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now that we've validated a single run, let's create a recurring run.\n",
"\n",
"We first need to create an experiment since the `create_recurring_run` method\n",
"requires an `experiment_id`."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"experiment = client.create_experiment('test')\n",
"\n",
"job = client.create_recurring_run(\n",
" experiment_id=experiment.id,\n",
" job_name='test',\n",
" cron_expression='*/2 * * * *', # Runs once every two minutes.\n",
" pipeline_package_path='download.yaml', # Pass in compiled output.\n",
" params=parameters,\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The Kubeflow Pipelines GUI provides an excellent interface for interacting with\n",
"recurring runs, but you can interrogate the job programmatically if you prefer."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(job)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"In the GUI, you can retrieve the logs of an individual run. They should\n",
"culminate with `Success!`."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"To disable the recurring run:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"client.disable_job(job.id)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"To list recurring runs:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"client.list_recurring_runs()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"To get details about an individual recurring run:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"client.get_recurring_run(job.id)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"To delete a recurring run programmatically:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"result = client.delete_job(job.id)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Additional recurring run interactions via the SDK are documented [here](https://kubeflow-pipelines.readthedocs.io/en/stable/)."
]
}
],
"metadata": {
"interpreter": {
"hash": "8d1899d3d453529ab54a548c453eb03872168ef6a9900e12952b62a455030e12"
},
"kernelspec": {
"display_name": "Python 3.7.9 64-bit ('base': conda)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.9"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -0,0 +1 @@
echo "Success!"

View File

@ -2,4 +2,4 @@ google-cloud-storage==1.17.0
google-cloud-bigquery==1.6.0
pandas==0.23.4
numpy==1.16.0
tensorflow==1.15.0
tensorflow==2.5.3

View File

@ -71,7 +71,7 @@ ijson==2.3
imageio==2.2.0
imagesize==0.7.1
ipykernel==4.6.1
ipython==6.2.1
ipython==7.16.3
ipython-genutils==0.2.0
ipywidgets==7.0.0
isort==4.2.15
@ -85,14 +85,14 @@ jsonschema==2.6.0
jupyter-client==5.1.0
jupyter-console==5.2.0
jupyter-core==4.3.0
jupyterlab==0.27.0
jupyterlab==1.2.21
jupyterlab-launcher==0.4.0
Keras==2.1.2
ktext==0.27
lazy-object-proxy==1.3.1
llvmlite==0.20.0
locket==0.2.0
lxml==3.8.0
lxml==4.6.5
Markdown==2.6.9
MarkupSafe==1.0
matplotlib==2.1.0
@ -134,7 +134,7 @@ patsy==0.4.1
pep8==1.7.0
pexpect==4.3.0
pickleshare==0.7.4
Pillow==4.3.0
Pillow==9.0.0
pkginfo==1.4.1
plac==0.9.6
ply==3.10
@ -142,7 +142,7 @@ pox==0.2.3
ppft==1.6.4.7.1
preshed==1.0.0
prompt-toolkit==1.0.15
protobuf==3.5.0
protobuf==3.15.0
psutil==5.2.2
ptyprocess==0.5.2
py==1.4.34
@ -201,7 +201,7 @@ sympy==1.1.1
tables==3.4.2
tabulate==0.8.2
tblib==1.3.2
tensorflow-gpu==1.15.0
tensorflow-gpu==2.5.3
tensorflow-tensorboard==0.1.8
termcolor==1.1.0
terminado==0.6

View File

@ -25,26 +25,26 @@ kubectl apply -f ./scripts/spark-rbac.yaml
## Access Kubflow/KFP UI
![image](/images/central-ui.png)
![image](./images/central-ui.png)
## OR
![image](/images/pipelines-ui.png)
![image](./images/pipelines-ui.png)
## Upload pipeline
Upload the spark_job_pipeline.yaml file
![image](/images/upload-pipeline.png)
![image](./images/upload-pipeline.png)
# Create Run
![image](/images/create-run.png)
![image](./images/create-run.png)
# Start Pipeline add service account `spark-sa`
![image](/images/start_run.png)
![image](./images/start_run.png)
# Wait till the execution is finished. check the `print-message` logs to view the result
![image](/images/final-output.png)
![image](./images/final-output.png)

View File

@ -1,4 +1,4 @@
tensorflow==2.2.0
tensorflow==2.5.3
tensorflow-datasets==3.1.0
scikit-learn==0.23.1
numpy==1.19.0

View File

@ -1,2 +1,2 @@
tensorflow==2.2.0
tensorflow==2.5.3
tensorflow-datasets==3.1.0

View File

@ -9,3 +9,6 @@ Kubeflow fairing, pipelines demo using synthetic data. This notebook `build-trai
```
1. In the directory `xgboost_synthetic`, open the notebook `build-train-deploy.ipynb`
```
cd xgboost_synthetic
```