From fd7a98fed192237aada54bafd03ef15f54a154a9 Mon Sep 17 00:00:00 2001 From: Amy Date: Mon, 24 Feb 2020 08:10:48 -0800 Subject: [PATCH] GIS KFP example: kfp-related syntax changes; use gcs client libs instead of gsutil (#749) * some mods to accommodate (perhaps temporary) changes in how the kfp sdk works * Use gcs client libs rather than gsutil for a gcs copy; required due to changes in node service account permissions. * more mods to address kfp syntax changes --- .../components/t2t/t2t-proc/datagen.py | 19 ++++++---- .../pipelines/example_pipelines/gh_summ.py | 14 ++++---- .../example_pipelines/gh_summ.py.tar.gz | Bin 2150 -> 2165 bytes .../example_pipelines/gh_summ_serve.py | 4 +-- .../example_pipelines/gh_summ_serve.py.tar.gz | Bin 849 -> 888 bytes .../pipelines-notebook.ipynb | 34 +++++++++--------- 6 files changed, 38 insertions(+), 33 deletions(-) diff --git a/github_issue_summarization/pipelines/components/t2t/t2t-proc/datagen.py b/github_issue_summarization/pipelines/components/t2t/t2t-proc/datagen.py index 74e8c394..ac5d6b19 100644 --- a/github_issue_summarization/pipelines/components/t2t/t2t-proc/datagen.py +++ b/github_issue_summarization/pipelines/components/t2t/t2t-proc/datagen.py @@ -39,6 +39,16 @@ def copy_local_directory_to_gcs(project, local_path, bucket_name, gcs_path): blob = bucket.blob(remote_path) blob.upload_from_filename(local_file) +def download_blob(bucket_name, source_blob_name, destination_file_name): + """Downloads a blob from the bucket.""" + storage_client = storage.Client() + bucket = storage_client.bucket(bucket_name) + blob = bucket.blob(source_blob_name) + blob.download_to_filename(destination_file_name) + print("Blob {} downloaded to {}.".format( + source_blob_name, destination_file_name) + ) + def main(): parser = argparse.ArgumentParser(description='ML Trainer') parser.add_argument( @@ -57,13 +67,8 @@ def main(): local_data_dir = '/ml/t2t_gh_data' local_source_data_file = '/ml/gh_data/github_issues.csv' - data_copy_command1 = ['gsutil', 'cp', - 'gs://aju-dev-demos-codelabs/kubecon/gh_data/github_issues.csv', - local_source_data_file - ] - print(data_copy_command1) - result = subprocess.call(data_copy_command1) - print(result) + download_blob('aju-dev-demos-codelabs', 'kubecon/gh_data/github_issues.csv', + local_source_data_file) datagen_command = ['t2t-datagen', '--data_dir', local_data_dir, '--t2t_usr_dir', '/ml/ghsumm/trainer', diff --git a/github_issue_summarization/pipelines/example_pipelines/gh_summ.py b/github_issue_summarization/pipelines/example_pipelines/gh_summ.py index 183ef871..d2104534 100644 --- a/github_issue_summarization/pipelines/example_pipelines/gh_summ.py +++ b/github_issue_summarization/pipelines/example_pipelines/gh_summ.py @@ -16,7 +16,7 @@ import kfp.dsl as dsl import kfp.gcp as gcp import kfp.components as comp -from kfp.dsl.types import GCSPath, String +# from kfp.dsl.types import GCSPath, String COPY_ACTION = 'copy_data' @@ -43,12 +43,12 @@ metadata_log_op = comp.load_component_from_url( ) def gh_summ( #pylint: disable=unused-argument train_steps: 'Integer' = 2019300, - project: String = 'YOUR_PROJECT_HERE', - github_token: String = 'YOUR_GITHUB_TOKEN_HERE', - working_dir: GCSPath = 'gs://YOUR_GCS_DIR_HERE', - checkpoint_dir: GCSPath = 'gs://aju-dev-demos-codelabs/kubecon/model_output_tbase.bak2019000/', - deploy_webapp: String = 'true', - data_dir: GCSPath = 'gs://aju-dev-demos-codelabs/kubecon/t2t_data_gh_all/' + project: str = 'YOUR_PROJECT_HERE', + github_token: str = 'YOUR_GITHUB_TOKEN_HERE', + working_dir: 'GCSPath' = 'gs://YOUR_GCS_DIR_HERE', + checkpoint_dir: 'GCSPath' = 'gs://aju-dev-demos-codelabs/kubecon/model_output_tbase.bak2019000/', + deploy_webapp: str = 'true', + data_dir: 'GCSPath' = 'gs://aju-dev-demos-codelabs/kubecon/t2t_data_gh_all/' ): diff --git a/github_issue_summarization/pipelines/example_pipelines/gh_summ.py.tar.gz b/github_issue_summarization/pipelines/example_pipelines/gh_summ.py.tar.gz index 1f04240d3ba5105be1a6b072b535bc98a9d867ee..1ff03e97201a09d78524d533799b57e6906c529c 100644 GIT binary patch literal 2165 zcmV-*2#WU~iwFp_@KRm^|7U1lb9HTPE^v7+bYXG;?Hg-T+c=QV{0i5f>zF{VyLI;yW0YFfi zK{xb*!}ynS3qoi}_=)9Ot|bZ?(?av;Rzu!BbXzT9hR3QJuXA#( zosOoZB_8m3V@%Dkg6b78feAo!{u1Cvq!@64e;hAVbkBe`xD8dJ7rN)F4w=xr2;oE1 z5INl5Ex0F#h0RH2+0o}vzxO@MMeR8vU@T1o`nLC|J-}Pw`|ZV8vqyA@S;pp~nR5-; zc5YX6zWIK>GpOfO1tcpBBPfNDkpo?{Hfa!GItV+BWJbw2$;?=i7?Voob~ zHYX;_m`4RwzL|-s=C!7_hI(0Qnr=^u4FSALRnfu zT@_0&iJi+h;`P?6^;j~iRffP_Hh0R*)+^w$7`rdts9=+&4VIwrnHv`MNBvmzcmiRs zg5z(t9+o$|60?G$5M5E$YL|P;lU1swnVbsr))u)5)w~^xXahab&(bs9^B<|&^AV|K zZ^94btyU{_xqL`2bG^ti@C55A1Y*DpDKUWJ<5t2$zV1b zUuh@P;py;dHX2-B*DRxkP!AwdP2E@U9|m>;7O+D#!VX6vlOgE+2$?hHvT$-azB#=< zz0d|Xvro+G+Sv$mHvTf4PDiK1XJMs6#Aku$0JA%X!8f;@ zh31y-^?ITVK#;#wp_kL57DQtA-yT1XV=^aQA_>d-XfYvtw}W;_{{)7iU?;6m$Dbr; zs})1`r0NW4#uUezCSopzBkoSWr=cQabFFuIZgbj|-Ge!o85}j!9ptVOX)s*xVSTwl z)*Y5;y;W8bSBe{EEQsfE8i(!;yhcm@1n%AEdhmvfCqB3`d98DxG>ssBnN^E;=8CiZ zJo-!cK1zDDqJXMl!LD5616C_l|_t@&Nmd^)jk`ylR!yLLrKzX^OC(tYK z(o0l{l|%tVu|eeObB%5h-=<~hxQ!8@osyjl3oSsrOJWcr4WlY9TzTn;iyi!t=&jz z$<0q=BS*?gR5{%21Ty&U47NIhd{`8Uo(0=!Fll!qYbO3m;=IGyjGGr6v3T|JR_n5K znl4^`<B`=VumS-~EwA_h_X|(Pp?5(K6&b)?3XIjy~ zexn9E>+T>R4XU@;;^hY2C-v0gI;@D?R<%?j_Y8U0WzIsfw%ahmUJPDC5Rv7k(V-^X zG*4E#=+Z@(n#*Htp529=F6_MGutO_Un+KkF>!Ju?&Pn7@ngxQGl&CjmO9$V;ng37w zSlW5n)N?85juzXF7l%Jhw?;#MUqoFO9bdxB(*L;YPu!VDrMaW7yp|Qv!m{ys=Ziy4 zJWrwASClBNV=09?R-IV66-+L_oAeTvG`MRf?3xMxr_F=`4817ObM<)Hwj8VYddPu} z7d)2PeD~`)3+65?!vfe9IjZSL82rr}(qdIW>i;~wjBVb5J=Yt7Vx9lRC1sRB3FJS@ z`{vTf$IHpmEF}5lr#2NSe3K{m0`Cd?DW9$tvH|PG&(U_;O*x}SH#?lgsq}{Zlewm+ zcH1jOaiNG)jL(;u7T%09@5-XF)4Olq~V}nQMh%`4xm;{8CwQ+AD4i z>_aYbQ(x0KNSs|PwuQjRjI2gqkSGVaApd=bq$HCPCCf@0V>>-4MAf%WA zZo(cBiZWwLMz`rTVe>~}?3nqml|;mzpy9({NfJADBZfMdpusWmfd-8+C1^o{B#aNl zk8zC?vgEW8g7V)RWob)75{h#r*U1tb*^jbd_FPG`uB2g|m}POjFP}y0ngG7a4j^J|}Vq`vbbL zmTs)4fxoB5lC0sMA0~z!E{TV)6O-+e9R{Y)2COippht@`t>pEz=*^vRUP4uZVpc zg>FE#ENPq)d5udFdEvcvN3L-c)o7}XnDhYnf%RF=%EF#!Dm>UY7v@_<+!L;2N4 z8i0YpEK2;y14H9xVt}YlZvPY~w@LnBlfu57$K;mnXY#|KSFv<#!Py@q~W`K-;F7_Qc4 zYbdC5RXW(tc3lkEVRX+8N1DRTdpj5D1U^2R4-a=)pdF>|5(eZ$qAUbB8e-UpFkKa| zOzKCbc#)V~uIz-dS*9eT9fJL`(Ej(gY=i{s@}7PEcy67~kFB%AFVnrMIYr@e#3sth z5r`-A`N`?jIy^r=ogE!6X7e-a=wf<2JzLBUPcO@)%!Jr6p{8R;2K=!EyLYjdm?<18 zgAFEl^gWRjE|?sh&aaLyk3U<7SBp;)ee2^4c+dZsUR=zMr_aQ!UKJnX(3j8SjZ3`c z1-Y3hhmen+!zN5nou$?n=eeAFkX~!(pbHdQ^z@cF{+$1r5 z&nWyBI7A-7ChK>&aJ-=hp6Ltoa?-i<*|T6ox{IT}xV)Pz_OXEYjV)26qZJQ!j=+e%2(B%0d@l~O`2xiS%1iSDAmO&cA&Yd zXdA5>&VM)lT93MuP{I$E^8IaIjpd2VIc~U$XOS1UQM5K9ZC7<&zS$7ZwiB(U!Ohpd?G%-Sby(TZZEmH!#oOn1+Jk3NRGpExTcr@h z3mqd5F&YJEUUyn=*~rDS&he>==zNMQ4B9BJ*&E2ZFb=jTtC;Z1s!MgX8!gG>-TYU# zh^o_6BPpHUcH{jbPEuEVYC%)R(i5)M2Dg)yaGu*!)mhudvUD?kUDeLJJAHW!a7IhamdwQ~^LdOntMcrgO;SRMAorc*? z6=XUVBT7kd7nduCte1AUTU=^=7;lvx_MZ5)dE$1Lhu%f^F8b$m(fsP&!byu|D&D|Z zG2d&H{c8nD`K0+&532E(KhNjM)=n>XFb2Kh6@tn7;B|h`Yv*3eHINsm#{baHgi=QV zU-a(X1YOG~jD_yn+`b0E*RQ>=?@}Jw$g#DzK{WF9%?*8X!*ez_#3TvR*p{nkp6k16 z`P3(V7~fCO`+uKEqruu;x)_e2Z#q#rLE~52FVUsC`-DPW*{!^qyqF7yu-b6^HQ=6B zmlo#s&q;Admp3-w%+ilz~?QU+@*dgzJT7Si+t73b`-I!u4jlqQ^GSMeAYi#%| zq_+&}1aR{sn6-rm7}XUE&!c?W2zu<*P$DY9PNe*zzKatT5b zM*Xh$8?`(Si-d8(uG@qK%RQX(Io@me3qs?S##bg5S1G$``57o2V}<>$5&M`@rm!-; zWG*lwUiO!4FZ2woQJaY*ScyZ)AzHb$xbtI>ksw?Z8Toz4j5{T;0&fabGSMj>Xoe-k zo;e|u(8R+u_NGVZ9K_PR`X4->P_C89#QSc&USXXo>+#$8ViFz?rzaPm!s+<)U=*GV zCIf3gaRyc+A!@E?p<)Z5R?hNlaB9w!&|oC$mgk!1O4wlcKWf&;xwS{6)Zj^-Wmu43 zIV=9XWhns)wwzI+UzT3PM7b5(utF`rU@50qBsv4C)&yf={3vsUr36)(RHUxF7ZsI4 ze~DB5K&_b?8`p#sx115GUXrr4koF=X=tU9JRBaZ!x_%@yHDp+I1sP8bD@7Tmh!`hj zMfMcSg{)p1OY}S&5vupf<2E()n9(uqt_4IW zV=<(m4bpU;%xwg8cXw0nu(l>~MRTJoj@7);mFxxqzSby)rTJw{qR1NHnGEc*GkFK_ z6B}|NR2>!O-h#p>)5;KPjyV%*RTa?Xb8H@u$H%9Guy=NLIy~%6hvQLrI2rT@qv^1B zdfwXJae;(LfC`d`BlE}fk|H>jfi_KptWe_iwPbXA3WulTi~f24bJ)9>p46g;M?;g@ z_;N6r4Euw(O~oe}P9CnbNZ2n_!dQ};zR7u(L6)Lwxvai8NL}S)(Oq_`-N#Zh^k3{| z+UVfSO3uuMzcAtLllDLv)v9YAh4u878E#+d&$!Q*u+6%*E=tL$A=^#!e>!*#t8_!- z#e4I0jZwbh literal 849 zcmV-X1FrlZiwFqm-oaf0|7U1lb9HTPUvp)0c4aPbc`kHeascg=O>f&c5QcO1uOM{E zrL>wfO#$7!v6~o8B*Y|^77z6pfr|gLVin-?cn|= zFvUNBHe8B`Fr7)I<$a&aaCO9TwO~iRB^PN#K1#j3%jH9_0LxNlS=1wh2_Y@BxYxKL zA!>V>On+IkpkX18JpA z2(EL%MTY05WcHTcfL`G+ctHX#RA~)LeRkS+tjw*`{WQ6q#^>YN#qA&QZ1Qe&6<>^| zBey}Y0)l8O9Aw-s%6V+%5=5~1^KqR7=hX_~#*{!zSrkn3Z&k>_<`B1MAi%kOh4vAt zb!cTx_pE8JirpnmXJrAxHYsBow^}SgJ*Gsor7D-UZmw8f;>y!Op$&@auS#-Z|H>0I zG%j%o43b2a!q%=gHKMu{7!q1pC{&K2RgO`TvaHFMWoBubLF2F!h3(vK^Zq>TMqks- z!M^Ylnb^z6;d)({zrWMzw>NF7O4X#GK%^iN4z0sz$NT!=2beRZo(QS6P@mi={TRqk zgAl*CFjdYIDJr}a3QEu0QruOGH8+b?;#+5E6cF}v7pGEo?^ z`;w-x!e1c`O`H{*o6xl~IugPch6#Y!9^1H*2x}o(TFe11E2-^P5yYySf%D1a{Bjfz zuCFi0XM@>zauuIVN5j$8Y&^KU>2-=48b~zQFiRB0KUgu#S#H7-3q{?eVf}j}Mdy&S z%gOEVX80~1+|Djq`tjQ_o;~?6noh^V(Z4*57c6tNOSF3^Gbytii&rJbghfstTd8>j zslBW2SF}2AT3P1U2Jc<_wdz^8@io@vr!IAFXlIqS9By5*-QBK_?Y=a>5OME?^JUjA z^|*dF_N|}m%C;Zq$LW92_0|p$vE(MJRXn7