182 lines
5.9 KiB
Python
182 lines
5.9 KiB
Python
# Copyright 2020 kubeflow.org
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import json
|
|
from kfp import dsl, components
|
|
|
|
|
|
@dsl.pipeline(
|
|
name="launch-katib-experiment",
|
|
description="An example to launch katib experiment."
|
|
)
|
|
def mnist_hpo(
|
|
name: str = "mnist",
|
|
namespace: str = "kubeflow",
|
|
goal: float = 0.99,
|
|
parallelTrialCount: int = 3,
|
|
maxTrialCount: int = 12,
|
|
experimentTimeoutMinutes: int = 60,
|
|
deleteAfterDone: bool = True):
|
|
objectiveConfig = {
|
|
"type": "maximize",
|
|
"goal": goal,
|
|
"objectiveMetricName": "Validation-accuracy",
|
|
"additionalMetricNames": ["accuracy"]
|
|
}
|
|
algorithmConfig = {"algorithmName": "random"}
|
|
parameters = [
|
|
{"name": "--lr", "parameterType": "double", "feasibleSpace": {"min": "0.01", "max": "0.03"}},
|
|
{"name": "--num-layers", "parameterType": "int", "feasibleSpace": {"min": "2", "max": "5"}},
|
|
{"name": "--optimizer", "parameterType": "categorical", "feasibleSpace": {"list": ["sgd", "adam", "ftrl"]}}
|
|
]
|
|
rawTemplate = {
|
|
"apiVersion": "batch/v1",
|
|
"kind": "Job",
|
|
"metadata": {
|
|
"name": "{{.Trial}}",
|
|
"namespace": "{{.NameSpace}}"
|
|
},
|
|
"spec": {
|
|
"template": {
|
|
"spec": {
|
|
"restartPolicy": "Never",
|
|
"containers": [
|
|
{"name": "{{.Trial}}",
|
|
"image": "docker.io/katib/mxnet-mnist-example",
|
|
"command": [
|
|
"python /mxnet/example/image-classification/train_mnist.py --batch-size=64 {{- with .HyperParameters}} {{- range .}} {{.Name}}={{.Value}} {{- end}} {{- end}}" # noqa E501
|
|
]
|
|
}
|
|
]
|
|
}
|
|
}
|
|
}
|
|
}
|
|
trialTemplate = {
|
|
"goTemplate": {
|
|
"rawTemplate": json.dumps(rawTemplate)
|
|
}
|
|
}
|
|
op1 = katib_experiment_launcher_op(
|
|
name,
|
|
namespace,
|
|
parallelTrialCount=parallelTrialCount,
|
|
maxTrialCount=maxTrialCount,
|
|
objectiveConfig=str(objectiveConfig),
|
|
algorithmConfig=str(algorithmConfig),
|
|
trialTemplate=str(trialTemplate),
|
|
parameters=str(parameters),
|
|
experimentTimeoutMinutes=experimentTimeoutMinutes,
|
|
deleteAfterDone=deleteAfterDone
|
|
)
|
|
|
|
op_out = components.load_component_from_text("""
|
|
name: my-out-cop
|
|
description: output component
|
|
inputs:
|
|
- {name: text, type: String}
|
|
implementation:
|
|
container:
|
|
image: library/bash:4.4.23
|
|
command:
|
|
- sh
|
|
- -c
|
|
- |
|
|
echo hyperparameter: $0
|
|
- {inputValue: text}
|
|
""")(op1.outputs['bestHyperParameter'])
|
|
|
|
|
|
def katib_experiment_launcher_op(
|
|
name: str,
|
|
namespace: str,
|
|
maxTrialCount: int = 100,
|
|
parallelTrialCount: int = 3,
|
|
maxFailedTrialCount: int = 3,
|
|
objectiveConfig: str = '{}',
|
|
algorithmConfig: str = '{}',
|
|
metricsCollector: str = '{}',
|
|
trialTemplate: str = '{}',
|
|
parameters: str = '[]',
|
|
experimentTimeoutMinutes: int = 60,
|
|
deleteAfterDone: bool = True):
|
|
|
|
component_str = """
|
|
name: mnist-hpo
|
|
description: mnist hpo
|
|
inputs:
|
|
- {name: name, type: String}
|
|
- {name: namespace, type: String}
|
|
- {name: maxtrialcount, type: Integer}
|
|
- {name: maxfailedtrialcount, type: Integer}
|
|
- {name: paralleltrialcount, type: Integer}
|
|
- {name: objectiveconfig, type: String}
|
|
- {name: algorithmconfig, type: String}
|
|
- {name: metricscollector, type: String}
|
|
- {name: trialtemplate, type: String}
|
|
- {name: parameters, type: String}
|
|
- {name: deleteafterdone, type: Boolean}
|
|
- {name: experimenttimeoutminutes, type: Integer}
|
|
outputs:
|
|
- {name: bestHyperParameter, type: String}
|
|
implementation:
|
|
container:
|
|
image: liuhougangxa/katib-experiment-launcher:latest
|
|
args:
|
|
- --name
|
|
- {inputValue: name}
|
|
- --namespace
|
|
- {inputValue: namespace}
|
|
- --maxTrialCount
|
|
- {inputValue: maxtrialcount}
|
|
- --maxFailedTrialCount
|
|
- {inputValue: maxfailedtrialcount}
|
|
- --parallelTrialCount
|
|
- {inputValue: paralleltrialcount}
|
|
- --objectiveConfig
|
|
- {inputValue: objectiveconfig}
|
|
- --algorithmConfig
|
|
- {inputValue: algorithmconfig}
|
|
- --metricsCollector
|
|
- {inputValue: metricscollector}
|
|
- --trialTemplate
|
|
- {inputValue: trialtemplate}
|
|
- --parameters
|
|
- {inputValue: parameters}
|
|
- --outputFile
|
|
- {outputPath: bestHyperParameter}
|
|
- --deleteAfterDone
|
|
- {inputValue: deleteafterdone}
|
|
- --experimentTimeoutMinutes
|
|
- {inputValue: experimenttimeoutminutes}
|
|
"""
|
|
return components.load_component_from_text(component_str)(
|
|
name=name,
|
|
namespace=namespace,
|
|
maxtrialcount=maxTrialCount,
|
|
maxfailedtrialcount=maxFailedTrialCount,
|
|
paralleltrialcount=parallelTrialCount,
|
|
objectiveconfig=objectiveConfig,
|
|
algorithmconfig=algorithmConfig,
|
|
metricscollector=metricsCollector,
|
|
trialtemplate=trialTemplate,
|
|
parameters=parameters,
|
|
deleteafterdone=deleteAfterDone,
|
|
experimenttimeoutminutes=experimentTimeoutMinutes)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
from kfp_tekton.compiler import TektonCompiler
|
|
TektonCompiler().compile(mnist_hpo, __file__.replace('.py', '.yaml'))
|