kfp-tekton/sdk/python/tests/compiler/testdata/old_kfp_volume.py

998 lines
38 KiB
Python

# Copyright 2020 kubeflow.org
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import kfp.dsl as dsl
import json
import kfp.components as comp
from collections import OrderedDict
from kubernetes import client as k8s_client
def loaddata():
from kale.utils import mlmd_utils as _kale_mlmd_utils
_kale_mlmd_utils.init_metadata()
block1 = '''
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from matplotlib import style
from sklearn import linear_model
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import Perceptron
from sklearn.linear_model import SGDClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
'''
block2 = '''
path = "data/"
PREDICTION_LABEL = 'Survived'
test_df = pd.read_csv(path + "test.csv")
train_df = pd.read_csv(path + "train.csv")
'''
data_saving_block = '''
# -----------------------DATA SAVING START---------------------------------
from kale.marshal import utils as _kale_marshal_utils
_kale_marshal_utils.set_kale_data_directory("/marshal")
_kale_marshal_utils.save(PREDICTION_LABEL, "PREDICTION_LABEL")
_kale_marshal_utils.save(test_df, "test_df")
_kale_marshal_utils.save(train_df, "train_df")
# -----------------------DATA SAVING END-----------------------------------
'''
# run the code blocks inside a jupyter kernel
from kale.utils.jupyter_utils import run_code as _kale_run_code
from kale.utils.kfp_utils import \
update_uimetadata as _kale_update_uimetadata
blocks = (
block1,
block2,
data_saving_block)
html_artifact = _kale_run_code(blocks)
with open("/loaddata.html", "w") as f:
f.write(html_artifact)
_kale_update_uimetadata('loaddata')
_kale_mlmd_utils.call("mark_execution_complete")
def datapreprocessing():
from kale.utils import mlmd_utils as _kale_mlmd_utils
_kale_mlmd_utils.init_metadata()
data_loading_block = '''
# -----------------------DATA LOADING START--------------------------------
from kale.marshal import utils as _kale_marshal_utils
_kale_marshal_utils.set_kale_data_directory("/marshal")
_kale_marshal_utils.set_kale_directory_file_names()
test_df = _kale_marshal_utils.load("test_df")
train_df = _kale_marshal_utils.load("train_df")
# -----------------------DATA LOADING END----------------------------------
'''
block1 = '''
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from matplotlib import style
from sklearn import linear_model
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import Perceptron
from sklearn.linear_model import SGDClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
'''
block2 = '''
data = [train_df, test_df]
for dataset in data:
dataset['relatives'] = dataset['SibSp'] + dataset['Parch']
dataset.loc[dataset['relatives'] > 0, 'not_alone'] = 0
dataset.loc[dataset['relatives'] == 0, 'not_alone'] = 1
dataset['not_alone'] = dataset['not_alone'].astype(int)
train_df['not_alone'].value_counts()
'''
block3 = '''
# This does not contribute to a person survival probability
train_df = train_df.drop(['PassengerId'], axis=1)
'''
block4 = '''
import re
deck = {"A": 1, "B": 2, "C": 3, "D": 4, "E": 5, "F": 6, "G": 7, "U": 8}
data = [train_df, test_df]
for dataset in data:
dataset['Cabin'] = dataset['Cabin'].fillna("U0")
dataset['Deck'] = dataset['Cabin'].map(lambda x: re.compile("([a-zA-Z]+)").search(x).group())
dataset['Deck'] = dataset['Deck'].map(deck)
dataset['Deck'] = dataset['Deck'].fillna(0)
dataset['Deck'] = dataset['Deck'].astype(int)
# we can now drop the cabin feature
train_df = train_df.drop(['Cabin'], axis=1)
test_df = test_df.drop(['Cabin'], axis=1)
'''
block5 = '''
data = [train_df, test_df]
for dataset in data:
mean = train_df["Age"].mean()
std = test_df["Age"].std()
is_null = dataset["Age"].isnull().sum()
# compute random numbers between the mean, std and is_null
rand_age = np.random.randint(mean - std, mean + std, size = is_null)
# fill NaN values in Age column with random values generated
age_slice = dataset["Age"].copy()
age_slice[np.isnan(age_slice)] = rand_age
dataset["Age"] = age_slice
dataset["Age"] = train_df["Age"].astype(int)
train_df["Age"].isnull().sum()
'''
block6 = '''
train_df['Embarked'].describe()
'''
block7 = '''
# fill with most common value
common_value = 'S'
data = [train_df, test_df]
for dataset in data:
dataset['Embarked'] = dataset['Embarked'].fillna(common_value)
'''
block8 = '''
train_df.info()
'''
data_saving_block = '''
# -----------------------DATA SAVING START---------------------------------
from kale.marshal import utils as _kale_marshal_utils
_kale_marshal_utils.set_kale_data_directory("/marshal")
_kale_marshal_utils.save(test_df, "test_df")
_kale_marshal_utils.save(train_df, "train_df")
# -----------------------DATA SAVING END-----------------------------------
'''
# run the code blocks inside a jupyter kernel
from kale.utils.jupyter_utils import run_code as _kale_run_code
from kale.utils.kfp_utils import \
update_uimetadata as _kale_update_uimetadata
blocks = (data_loading_block,
block1,
block2,
block3,
block4,
block5,
block6,
block7,
block8,
data_saving_block)
html_artifact = _kale_run_code(blocks)
with open("/datapreprocessing.html", "w") as f:
f.write(html_artifact)
_kale_update_uimetadata('datapreprocessing')
_kale_mlmd_utils.call("mark_execution_complete")
def featureengineering():
from kale.utils import mlmd_utils as _kale_mlmd_utils
_kale_mlmd_utils.init_metadata()
data_loading_block = '''
# -----------------------DATA LOADING START--------------------------------
from kale.marshal import utils as _kale_marshal_utils
_kale_marshal_utils.set_kale_data_directory("/marshal")
_kale_marshal_utils.set_kale_directory_file_names()
PREDICTION_LABEL = _kale_marshal_utils.load("PREDICTION_LABEL")
test_df = _kale_marshal_utils.load("test_df")
train_df = _kale_marshal_utils.load("train_df")
# -----------------------DATA LOADING END----------------------------------
'''
block1 = '''
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from matplotlib import style
from sklearn import linear_model
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import Perceptron
from sklearn.linear_model import SGDClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
'''
block2 = '''
data = [train_df, test_df]
for dataset in data:
dataset['Fare'] = dataset['Fare'].fillna(0)
dataset['Fare'] = dataset['Fare'].astype(int)
'''
block3 = '''
data = [train_df, test_df]
titles = {"Mr": 1, "Miss": 2, "Mrs": 3, "Master": 4, "Rare": 5}
for dataset in data:
# extract titles
dataset['Title'] = dataset.Name.str.extract(' ([A-Za-z]+)\\.', expand=False)
# replace titles with a more common title or as Rare
dataset['Title'] = dataset['Title'].replace(['Lady', 'Countess','Capt', 'Col','Don', 'Dr',\\
'Major', 'Rev', 'Sir', 'Jonkheer', 'Dona'], 'Rare')
dataset['Title'] = dataset['Title'].replace('Mlle', 'Miss')
dataset['Title'] = dataset['Title'].replace('Ms', 'Miss')
dataset['Title'] = dataset['Title'].replace('Mme', 'Mrs')
# convert titles into numbers
dataset['Title'] = dataset['Title'].map(titles)
# filling NaN with 0, to get safe
dataset['Title'] = dataset['Title'].fillna(0)
train_df = train_df.drop(['Name'], axis=1)
test_df = test_df.drop(['Name'], axis=1)
'''
block4 = '''
genders = {"male": 0, "female": 1}
data = [train_df, test_df]
for dataset in data:
dataset['Sex'] = dataset['Sex'].map(genders)
'''
block5 = '''
train_df = train_df.drop(['Ticket'], axis=1)
test_df = test_df.drop(['Ticket'], axis=1)
'''
block6 = '''
ports = {"S": 0, "C": 1, "Q": 2}
data = [train_df, test_df]
for dataset in data:
dataset['Embarked'] = dataset['Embarked'].map(ports)
'''
block7 = '''
data = [train_df, test_df]
for dataset in data:
dataset['Age'] = dataset['Age'].astype(int)
dataset.loc[ dataset['Age'] <= 11, 'Age'] = 0
dataset.loc[(dataset['Age'] > 11) & (dataset['Age'] <= 18), 'Age'] = 1
dataset.loc[(dataset['Age'] > 18) & (dataset['Age'] <= 22), 'Age'] = 2
dataset.loc[(dataset['Age'] > 22) & (dataset['Age'] <= 27), 'Age'] = 3
dataset.loc[(dataset['Age'] > 27) & (dataset['Age'] <= 33), 'Age'] = 4
dataset.loc[(dataset['Age'] > 33) & (dataset['Age'] <= 40), 'Age'] = 5
dataset.loc[(dataset['Age'] > 40) & (dataset['Age'] <= 66), 'Age'] = 6
dataset.loc[ dataset['Age'] > 66, 'Age'] = 6
# let's see how it's distributed train_df['Age'].value_counts()
'''
block8 = '''
data = [train_df, test_df]
for dataset in data:
dataset.loc[ dataset['Fare'] <= 7.91, 'Fare'] = 0
dataset.loc[(dataset['Fare'] > 7.91) & (dataset['Fare'] <= 14.454), 'Fare'] = 1
dataset.loc[(dataset['Fare'] > 14.454) & (dataset['Fare'] <= 31), 'Fare'] = 2
dataset.loc[(dataset['Fare'] > 31) & (dataset['Fare'] <= 99), 'Fare'] = 3
dataset.loc[(dataset['Fare'] > 99) & (dataset['Fare'] <= 250), 'Fare'] = 4
dataset.loc[ dataset['Fare'] > 250, 'Fare'] = 5
dataset['Fare'] = dataset['Fare'].astype(int)
'''
block9 = '''
data = [train_df, test_df]
for dataset in data:
dataset['Age_Class']= dataset['Age']* dataset['Pclass']
'''
block10 = '''
for dataset in data:
dataset['Fare_Per_Person'] = dataset['Fare']/(dataset['relatives']+1)
dataset['Fare_Per_Person'] = dataset['Fare_Per_Person'].astype(int)
# Let's take a last look at the training set, before we start training the models.
train_df.head(10)
'''
block11 = '''
train_labels = train_df[PREDICTION_LABEL]
train_df = train_df.drop(PREDICTION_LABEL, axis=1)
'''
data_saving_block = '''
# -----------------------DATA SAVING START---------------------------------
from kale.marshal import utils as _kale_marshal_utils
_kale_marshal_utils.set_kale_data_directory("/marshal")
_kale_marshal_utils.save(train_df, "train_df")
_kale_marshal_utils.save(train_labels, "train_labels")
# -----------------------DATA SAVING END-----------------------------------
'''
# run the code blocks inside a jupyter kernel
from kale.utils.jupyter_utils import run_code as _kale_run_code
from kale.utils.kfp_utils import \
update_uimetadata as _kale_update_uimetadata
blocks = (data_loading_block,
block1,
block2,
block3,
block4,
block5,
block6,
block7,
block8,
block9,
block10,
block11,
data_saving_block)
html_artifact = _kale_run_code(blocks)
with open("/featureengineering.html", "w") as f:
f.write(html_artifact)
_kale_update_uimetadata('featureengineering')
_kale_mlmd_utils.call("mark_execution_complete")
def decisiontree():
from kale.utils import mlmd_utils as _kale_mlmd_utils
_kale_mlmd_utils.init_metadata()
data_loading_block = '''
# -----------------------DATA LOADING START--------------------------------
from kale.marshal import utils as _kale_marshal_utils
_kale_marshal_utils.set_kale_data_directory("/marshal")
_kale_marshal_utils.set_kale_directory_file_names()
train_df = _kale_marshal_utils.load("train_df")
train_labels = _kale_marshal_utils.load("train_labels")
# -----------------------DATA LOADING END----------------------------------
'''
block1 = '''
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from matplotlib import style
from sklearn import linear_model
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import Perceptron
from sklearn.linear_model import SGDClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
'''
block2 = '''
decision_tree = DecisionTreeClassifier()
decision_tree.fit(train_df, train_labels)
acc_decision_tree = round(decision_tree.score(train_df, train_labels) * 100, 2)
'''
data_saving_block = '''
# -----------------------DATA SAVING START---------------------------------
from kale.marshal import utils as _kale_marshal_utils
_kale_marshal_utils.set_kale_data_directory("/marshal")
_kale_marshal_utils.save(acc_decision_tree, "acc_decision_tree")
# -----------------------DATA SAVING END-----------------------------------
'''
# run the code blocks inside a jupyter kernel
from kale.utils.jupyter_utils import run_code as _kale_run_code
from kale.utils.kfp_utils import \
update_uimetadata as _kale_update_uimetadata
blocks = (data_loading_block,
block1,
block2,
data_saving_block)
html_artifact = _kale_run_code(blocks)
with open("/decisiontree.html", "w") as f:
f.write(html_artifact)
_kale_update_uimetadata('decisiontree')
_kale_mlmd_utils.call("mark_execution_complete")
def svm():
from kale.utils import mlmd_utils as _kale_mlmd_utils
_kale_mlmd_utils.init_metadata()
data_loading_block = '''
# -----------------------DATA LOADING START--------------------------------
from kale.marshal import utils as _kale_marshal_utils
_kale_marshal_utils.set_kale_data_directory("/marshal")
_kale_marshal_utils.set_kale_directory_file_names()
train_df = _kale_marshal_utils.load("train_df")
train_labels = _kale_marshal_utils.load("train_labels")
# -----------------------DATA LOADING END----------------------------------
'''
block1 = '''
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from matplotlib import style
from sklearn import linear_model
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import Perceptron
from sklearn.linear_model import SGDClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
'''
block2 = '''
linear_svc = SVC(gamma='auto')
linear_svc.fit(train_df, train_labels)
acc_linear_svc = round(linear_svc.score(train_df, train_labels) * 100, 2)
'''
data_saving_block = '''
# -----------------------DATA SAVING START---------------------------------
from kale.marshal import utils as _kale_marshal_utils
_kale_marshal_utils.set_kale_data_directory("/marshal")
_kale_marshal_utils.save(acc_linear_svc, "acc_linear_svc")
# -----------------------DATA SAVING END-----------------------------------
'''
# run the code blocks inside a jupyter kernel
from kale.utils.jupyter_utils import run_code as _kale_run_code
from kale.utils.kfp_utils import \
update_uimetadata as _kale_update_uimetadata
blocks = (data_loading_block,
block1,
block2,
data_saving_block)
html_artifact = _kale_run_code(blocks)
with open("/svm.html", "w") as f:
f.write(html_artifact)
_kale_update_uimetadata('svm')
_kale_mlmd_utils.call("mark_execution_complete")
def naivebayes():
from kale.utils import mlmd_utils as _kale_mlmd_utils
_kale_mlmd_utils.init_metadata()
data_loading_block = '''
# -----------------------DATA LOADING START--------------------------------
from kale.marshal import utils as _kale_marshal_utils
_kale_marshal_utils.set_kale_data_directory("/marshal")
_kale_marshal_utils.set_kale_directory_file_names()
train_df = _kale_marshal_utils.load("train_df")
train_labels = _kale_marshal_utils.load("train_labels")
# -----------------------DATA LOADING END----------------------------------
'''
block1 = '''
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from matplotlib import style
from sklearn import linear_model
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import Perceptron
from sklearn.linear_model import SGDClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
'''
block2 = '''
gaussian = GaussianNB()
gaussian.fit(train_df, train_labels)
acc_gaussian = round(gaussian.score(train_df, train_labels) * 100, 2)
'''
data_saving_block = '''
# -----------------------DATA SAVING START---------------------------------
from kale.marshal import utils as _kale_marshal_utils
_kale_marshal_utils.set_kale_data_directory("/marshal")
_kale_marshal_utils.save(acc_gaussian, "acc_gaussian")
# -----------------------DATA SAVING END-----------------------------------
'''
# run the code blocks inside a jupyter kernel
from kale.utils.jupyter_utils import run_code as _kale_run_code
from kale.utils.kfp_utils import \
update_uimetadata as _kale_update_uimetadata
blocks = (data_loading_block,
block1,
block2,
data_saving_block)
html_artifact = _kale_run_code(blocks)
with open("/naivebayes.html", "w") as f:
f.write(html_artifact)
_kale_update_uimetadata('naivebayes')
_kale_mlmd_utils.call("mark_execution_complete")
def logisticregression():
from kale.utils import mlmd_utils as _kale_mlmd_utils
_kale_mlmd_utils.init_metadata()
data_loading_block = '''
# -----------------------DATA LOADING START--------------------------------
from kale.marshal import utils as _kale_marshal_utils
_kale_marshal_utils.set_kale_data_directory("/marshal")
_kale_marshal_utils.set_kale_directory_file_names()
train_df = _kale_marshal_utils.load("train_df")
train_labels = _kale_marshal_utils.load("train_labels")
# -----------------------DATA LOADING END----------------------------------
'''
block1 = '''
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from matplotlib import style
from sklearn import linear_model
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import Perceptron
from sklearn.linear_model import SGDClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
'''
block2 = '''
logreg = LogisticRegression(solver='lbfgs', max_iter=110)
logreg.fit(train_df, train_labels)
acc_log = round(logreg.score(train_df, train_labels) * 100, 2)
'''
data_saving_block = '''
# -----------------------DATA SAVING START---------------------------------
from kale.marshal import utils as _kale_marshal_utils
_kale_marshal_utils.set_kale_data_directory("/marshal")
_kale_marshal_utils.save(acc_log, "acc_log")
# -----------------------DATA SAVING END-----------------------------------
'''
# run the code blocks inside a jupyter kernel
from kale.utils.jupyter_utils import run_code as _kale_run_code
from kale.utils.kfp_utils import \
update_uimetadata as _kale_update_uimetadata
blocks = (data_loading_block,
block1,
block2,
data_saving_block)
html_artifact = _kale_run_code(blocks)
with open("/logisticregression.html", "w") as f:
f.write(html_artifact)
_kale_update_uimetadata('logisticregression')
_kale_mlmd_utils.call("mark_execution_complete")
def randomforest():
from kale.utils import mlmd_utils as _kale_mlmd_utils
_kale_mlmd_utils.init_metadata()
data_loading_block = '''
# -----------------------DATA LOADING START--------------------------------
from kale.marshal import utils as _kale_marshal_utils
_kale_marshal_utils.set_kale_data_directory("/marshal")
_kale_marshal_utils.set_kale_directory_file_names()
train_df = _kale_marshal_utils.load("train_df")
train_labels = _kale_marshal_utils.load("train_labels")
# -----------------------DATA LOADING END----------------------------------
'''
block1 = '''
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from matplotlib import style
from sklearn import linear_model
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import Perceptron
from sklearn.linear_model import SGDClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
'''
block2 = '''
random_forest = RandomForestClassifier(n_estimators=100)
random_forest.fit(train_df, train_labels)
acc_random_forest = round(random_forest.score(train_df, train_labels) * 100, 2)
'''
data_saving_block = '''
# -----------------------DATA SAVING START---------------------------------
from kale.marshal import utils as _kale_marshal_utils
_kale_marshal_utils.set_kale_data_directory("/marshal")
_kale_marshal_utils.save(acc_random_forest, "acc_random_forest")
# -----------------------DATA SAVING END-----------------------------------
'''
# run the code blocks inside a jupyter kernel
from kale.utils.jupyter_utils import run_code as _kale_run_code
from kale.utils.kfp_utils import \
update_uimetadata as _kale_update_uimetadata
blocks = (data_loading_block,
block1,
block2,
data_saving_block)
html_artifact = _kale_run_code(blocks)
with open("/randomforest.html", "w") as f:
f.write(html_artifact)
_kale_update_uimetadata('randomforest')
_kale_mlmd_utils.call("mark_execution_complete")
def results():
from kale.utils import mlmd_utils as _kale_mlmd_utils
_kale_mlmd_utils.init_metadata()
data_loading_block = '''
# -----------------------DATA LOADING START--------------------------------
from kale.marshal import utils as _kale_marshal_utils
_kale_marshal_utils.set_kale_data_directory("/marshal")
_kale_marshal_utils.set_kale_directory_file_names()
acc_decision_tree = _kale_marshal_utils.load("acc_decision_tree")
acc_gaussian = _kale_marshal_utils.load("acc_gaussian")
acc_linear_svc = _kale_marshal_utils.load("acc_linear_svc")
acc_log = _kale_marshal_utils.load("acc_log")
acc_random_forest = _kale_marshal_utils.load("acc_random_forest")
# -----------------------DATA LOADING END----------------------------------
'''
block1 = '''
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from matplotlib import style
from sklearn import linear_model
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import Perceptron
from sklearn.linear_model import SGDClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
'''
block2 = '''
results = pd.DataFrame({
'Model': ['Support Vector Machines', 'logistic Regression',
'Random Forest', 'Naive Bayes', 'Decision Tree'],
'Score': [acc_linear_svc, acc_log,
acc_random_forest, acc_gaussian, acc_decision_tree]})
result_df = results.sort_values(by='Score', ascending=False)
result_df = result_df.set_index('Score')
print(result_df)
'''
# run the code blocks inside a jupyter kernel
from kale.utils.jupyter_utils import run_code as _kale_run_code
from kale.utils.kfp_utils import \
update_uimetadata as _kale_update_uimetadata
blocks = (data_loading_block,
block1,
block2,
)
html_artifact = _kale_run_code(blocks)
with open("/results.html", "w") as f:
f.write(html_artifact)
_kale_update_uimetadata('results')
_kale_mlmd_utils.call("mark_execution_complete")
loaddata_op = comp.func_to_container_op(loaddata)
datapreprocessing_op = comp.func_to_container_op(datapreprocessing)
featureengineering_op = comp.func_to_container_op(featureengineering)
decisiontree_op = comp.func_to_container_op(decisiontree)
svm_op = comp.func_to_container_op(svm)
naivebayes_op = comp.func_to_container_op(naivebayes)
logisticregression_op = comp.func_to_container_op(logisticregression)
randomforest_op = comp.func_to_container_op(randomforest)
results_op = comp.func_to_container_op(results)
@dsl.pipeline(
name='titanic-ml-gxj28',
description='Predict which passengers survived the Titanic shipwreck'
)
def auto_generated_pipeline():
pvolumes_dict = OrderedDict()
volume_step_names = []
volume_name_parameters = []
marshal_vop = dsl.VolumeOp(
name="kale-marshal-volume",
resource_name="kale-marshal-pvc",
modes=dsl.VOLUME_MODE_RWM,
size="1Gi"
)
volume_step_names.append(marshal_vop.name)
volume_name_parameters.append(marshal_vop.outputs["name"].full_name)
pvolumes_dict['/marshal'] = marshal_vop.volume
volume_step_names.sort()
volume_name_parameters.sort()
loaddata_task = loaddata_op()\
.add_pvolumes(pvolumes_dict)\
.after()
loaddata_task.container.working_dir = "/Users/animeshsingh/go/src/github.com/kubeflow/kale/examples/titanic-ml-dataset"
loaddata_task.container.set_security_context(
k8s_client.V1SecurityContext(run_as_user=0))
output_artifacts = {}
output_artifacts.update(
{'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json'})
output_artifacts.update({'loaddata': '/loaddata.html'})
loaddata_task.output_artifact_paths.update(output_artifacts)
loaddata_task.add_pod_label(
"pipelines.kubeflow.org/metadata_written", "true")
dep_names = loaddata_task.dependent_names + volume_step_names
loaddata_task.add_pod_annotation(
"kubeflow-kale.org/dependent-templates", json.dumps(dep_names))
if volume_name_parameters:
loaddata_task.add_pod_annotation(
"kubeflow-kale.org/volume-name-parameters",
json.dumps(volume_name_parameters))
datapreprocessing_task = datapreprocessing_op()\
.add_pvolumes(pvolumes_dict)\
.after(loaddata_task)
datapreprocessing_task.container.working_dir = "/Users/animeshsingh/go/src/github.com/kubeflow/kale/examples/titanic-ml-dataset"
datapreprocessing_task.container.set_security_context(
k8s_client.V1SecurityContext(run_as_user=0))
output_artifacts = {}
output_artifacts.update(
{'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json'})
output_artifacts.update({'datapreprocessing': '/datapreprocessing.html'})
datapreprocessing_task.output_artifact_paths.update(output_artifacts)
datapreprocessing_task.add_pod_label(
"pipelines.kubeflow.org/metadata_written", "true")
dep_names = datapreprocessing_task.dependent_names + volume_step_names
datapreprocessing_task.add_pod_annotation(
"kubeflow-kale.org/dependent-templates", json.dumps(dep_names))
if volume_name_parameters:
datapreprocessing_task.add_pod_annotation(
"kubeflow-kale.org/volume-name-parameters",
json.dumps(volume_name_parameters))
featureengineering_task = featureengineering_op()\
.add_pvolumes(pvolumes_dict)\
.after(datapreprocessing_task)
featureengineering_task.container.working_dir = "/Users/animeshsingh/go/src/github.com/kubeflow/kale/examples/titanic-ml-dataset"
featureengineering_task.container.set_security_context(
k8s_client.V1SecurityContext(run_as_user=0))
output_artifacts = {}
output_artifacts.update(
{'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json'})
output_artifacts.update({'featureengineering': '/featureengineering.html'})
featureengineering_task.output_artifact_paths.update(output_artifacts)
featureengineering_task.add_pod_label(
"pipelines.kubeflow.org/metadata_written", "true")
dep_names = featureengineering_task.dependent_names + volume_step_names
featureengineering_task.add_pod_annotation(
"kubeflow-kale.org/dependent-templates", json.dumps(dep_names))
if volume_name_parameters:
featureengineering_task.add_pod_annotation(
"kubeflow-kale.org/volume-name-parameters",
json.dumps(volume_name_parameters))
decisiontree_task = decisiontree_op()\
.add_pvolumes(pvolumes_dict)\
.after(featureengineering_task)
decisiontree_task.container.working_dir = "/Users/animeshsingh/go/src/github.com/kubeflow/kale/examples/titanic-ml-dataset"
decisiontree_task.container.set_security_context(
k8s_client.V1SecurityContext(run_as_user=0))
output_artifacts = {}
output_artifacts.update(
{'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json'})
output_artifacts.update({'decisiontree': '/decisiontree.html'})
decisiontree_task.output_artifact_paths.update(output_artifacts)
decisiontree_task.add_pod_label(
"pipelines.kubeflow.org/metadata_written", "true")
dep_names = decisiontree_task.dependent_names + volume_step_names
decisiontree_task.add_pod_annotation(
"kubeflow-kale.org/dependent-templates", json.dumps(dep_names))
if volume_name_parameters:
decisiontree_task.add_pod_annotation(
"kubeflow-kale.org/volume-name-parameters",
json.dumps(volume_name_parameters))
svm_task = svm_op()\
.add_pvolumes(pvolumes_dict)\
.after(featureengineering_task)
svm_task.container.working_dir = "/Users/animeshsingh/go/src/github.com/kubeflow/kale/examples/titanic-ml-dataset"
svm_task.container.set_security_context(
k8s_client.V1SecurityContext(run_as_user=0))
output_artifacts = {}
output_artifacts.update(
{'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json'})
output_artifacts.update({'svm': '/svm.html'})
svm_task.output_artifact_paths.update(output_artifacts)
svm_task.add_pod_label("pipelines.kubeflow.org/metadata_written", "true")
dep_names = svm_task.dependent_names + volume_step_names
svm_task.add_pod_annotation(
"kubeflow-kale.org/dependent-templates", json.dumps(dep_names))
if volume_name_parameters:
svm_task.add_pod_annotation(
"kubeflow-kale.org/volume-name-parameters",
json.dumps(volume_name_parameters))
naivebayes_task = naivebayes_op()\
.add_pvolumes(pvolumes_dict)\
.after(featureengineering_task)
naivebayes_task.container.working_dir = "/Users/animeshsingh/go/src/github.com/kubeflow/kale/examples/titanic-ml-dataset"
naivebayes_task.container.set_security_context(
k8s_client.V1SecurityContext(run_as_user=0))
output_artifacts = {}
output_artifacts.update(
{'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json'})
output_artifacts.update({'naivebayes': '/naivebayes.html'})
naivebayes_task.output_artifact_paths.update(output_artifacts)
naivebayes_task.add_pod_label(
"pipelines.kubeflow.org/metadata_written", "true")
dep_names = naivebayes_task.dependent_names + volume_step_names
naivebayes_task.add_pod_annotation(
"kubeflow-kale.org/dependent-templates", json.dumps(dep_names))
if volume_name_parameters:
naivebayes_task.add_pod_annotation(
"kubeflow-kale.org/volume-name-parameters",
json.dumps(volume_name_parameters))
logisticregression_task = logisticregression_op()\
.add_pvolumes(pvolumes_dict)\
.after(featureengineering_task)
logisticregression_task.container.working_dir = "/Users/animeshsingh/go/src/github.com/kubeflow/kale/examples/titanic-ml-dataset"
logisticregression_task.container.set_security_context(
k8s_client.V1SecurityContext(run_as_user=0))
output_artifacts = {}
output_artifacts.update(
{'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json'})
output_artifacts.update({'logisticregression': '/logisticregression.html'})
logisticregression_task.output_artifact_paths.update(output_artifacts)
logisticregression_task.add_pod_label(
"pipelines.kubeflow.org/metadata_written", "true")
dep_names = logisticregression_task.dependent_names + volume_step_names
logisticregression_task.add_pod_annotation(
"kubeflow-kale.org/dependent-templates", json.dumps(dep_names))
if volume_name_parameters:
logisticregression_task.add_pod_annotation(
"kubeflow-kale.org/volume-name-parameters",
json.dumps(volume_name_parameters))
randomforest_task = randomforest_op()\
.add_pvolumes(pvolumes_dict)\
.after(featureengineering_task)
randomforest_task.container.working_dir = "/Users/animeshsingh/go/src/github.com/kubeflow/kale/examples/titanic-ml-dataset"
randomforest_task.container.set_security_context(
k8s_client.V1SecurityContext(run_as_user=0))
output_artifacts = {}
output_artifacts.update(
{'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json'})
output_artifacts.update({'randomforest': '/randomforest.html'})
randomforest_task.output_artifact_paths.update(output_artifacts)
randomforest_task.add_pod_label(
"pipelines.kubeflow.org/metadata_written", "true")
dep_names = randomforest_task.dependent_names + volume_step_names
randomforest_task.add_pod_annotation(
"kubeflow-kale.org/dependent-templates", json.dumps(dep_names))
if volume_name_parameters:
randomforest_task.add_pod_annotation(
"kubeflow-kale.org/volume-name-parameters",
json.dumps(volume_name_parameters))
results_task = results_op()\
.add_pvolumes(pvolumes_dict)\
.after(randomforest_task, logisticregression_task, naivebayes_task, svm_task, decisiontree_task)
results_task.container.working_dir = "/Users/animeshsingh/go/src/github.com/kubeflow/kale/examples/titanic-ml-dataset"
results_task.container.set_security_context(
k8s_client.V1SecurityContext(run_as_user=0))
output_artifacts = {}
output_artifacts.update(
{'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json'})
output_artifacts.update({'results': '/results.html'})
results_task.output_artifact_paths.update(output_artifacts)
results_task.add_pod_label(
"pipelines.kubeflow.org/metadata_written", "true")
dep_names = results_task.dependent_names + volume_step_names
results_task.add_pod_annotation(
"kubeflow-kale.org/dependent-templates", json.dumps(dep_names))
if volume_name_parameters:
results_task.add_pod_annotation(
"kubeflow-kale.org/volume-name-parameters",
json.dumps(volume_name_parameters))
if __name__ == "__main__":
pipeline_func = auto_generated_pipeline
pipeline_filename = pipeline_func.__name__ + '.pipeline.tar.gz'
import kfp.compiler as compiler
compiler.Compiler().compile(pipeline_func, pipeline_filename)
# Get or create an experiment and submit a pipeline run
import kfp
client = kfp.Client()
experiment = client.create_experiment('titanic')
# Submit a pipeline run
from kale.utils.kfp_utils import generate_run_name
run_name = generate_run_name('titanic-ml-gxj28')
run_result = client.run_pipeline(
experiment.id, run_name, pipeline_filename, {})