mirror of https://github.com/kubeflow/katib.git
Compare commits
4 Commits
Author | SHA1 | Date |
---|---|---|
|
0a7453d212 | |
|
12a4896ae0 | |
|
8dcc7d3398 | |
|
73177dc229 |
|
@ -31,8 +31,10 @@ jobs:
|
|||
include:
|
||||
- trial-name: mxnet-mnist
|
||||
dockerfile: examples/v1beta1/trial-images/mxnet-mnist/Dockerfile
|
||||
- trial-name: pytorch-mnist
|
||||
dockerfile: examples/v1beta1/trial-images/pytorch-mnist/Dockerfile
|
||||
- trial-name: pytorch-mnist-cpu
|
||||
dockerfile: examples/v1beta1/trial-images/pytorch-mnist/Dockerfile.cpu
|
||||
- trial-name: pytorch-mnist-gpu
|
||||
dockerfile: examples/v1beta1/trial-images/pytorch-mnist/Dockerfile.gpu
|
||||
- trial-name: tf-mnist-with-summaries
|
||||
dockerfile: examples/v1beta1/trial-images/tf-mnist-with-summaries/Dockerfile
|
||||
- trial-name: enas-cnn-cifar10-gpu
|
||||
|
|
|
@ -24,7 +24,7 @@ jobs:
|
|||
experiments: ${{ matrix.experiments }}
|
||||
training-operator: true
|
||||
# Comma Delimited
|
||||
trial-images: pytorch-mnist
|
||||
trial-images: pytorch-mnist-cpu
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
|
|
|
@ -273,13 +273,24 @@ The following table shows images for training containers which are used in the
|
|||
</tr>
|
||||
<tr align="center">
|
||||
<td>
|
||||
<code>docker.io/kubeflowkatib/pytorch-mnist</code>
|
||||
<code>docker.io/kubeflowkatib/pytorch-mnist-cpu</code>
|
||||
</td>
|
||||
<td>
|
||||
PyTorch MNIST example with printing metrics to the file or StdOut
|
||||
PyTorch MNIST example with printing metrics to the file or StdOut with CPU support
|
||||
</td>
|
||||
<td>
|
||||
<a href="https://github.com/kubeflow/katib/blob/master/examples/v1beta1/trial-images/pytorch-mnist/Dockerfile">Dockerfile</a>
|
||||
<a href="https://github.com/kubeflow/katib/blob/master/examples/v1beta1/trial-images/pytorch-mnist/Dockerfile.cpu">Dockerfile</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr align="center">
|
||||
<td>
|
||||
<code>docker.io/kubeflowkatib/pytorch-mnist-gpu</code>
|
||||
</td>
|
||||
<td>
|
||||
PyTorch MNIST example with printing metrics to the file or StdOut with GPU support
|
||||
</td>
|
||||
<td>
|
||||
<a href="https://github.com/kubeflow/katib/blob/master/examples/v1beta1/trial-images/pytorch-mnist/Dockerfile.gpu">Dockerfile</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr align="center">
|
||||
|
|
|
@ -76,7 +76,7 @@ spec:
|
|||
- name: num-examples
|
||||
container:
|
||||
name: model-training
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.14.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -62,7 +62,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist:latest
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.14.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/pytorch-mnist/mnist.py"
|
||||
|
|
|
@ -54,7 +54,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.14.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -57,7 +57,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.14.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -57,7 +57,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.14.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -55,7 +55,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.14.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -69,7 +69,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.14.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -54,7 +54,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.14.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -54,7 +54,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.14.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -43,7 +43,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/simple-pbt:latest
|
||||
image: docker.io/kubeflowkatib/simple-pbt:v0.14.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/pbt/pbt_test.py"
|
||||
|
|
|
@ -54,7 +54,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.14.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -54,7 +54,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.14.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -46,7 +46,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: pytorch
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist:latest
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.14.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/pytorch-mnist/mnist.py"
|
||||
|
@ -61,7 +61,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: pytorch
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist:latest
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.14.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/pytorch-mnist/mnist.py"
|
||||
|
|
|
@ -56,7 +56,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: tensorflow
|
||||
image: docker.io/kubeflowkatib/tf-mnist-with-summaries:latest
|
||||
image: docker.io/kubeflowkatib/tf-mnist-with-summaries:v0.14.0
|
||||
command:
|
||||
- "python"
|
||||
- "/opt/tf-mnist-with-summaries/mnist.py"
|
||||
|
|
|
@ -67,7 +67,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist:latest
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.14.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/pytorch-mnist/mnist.py"
|
||||
|
|
|
@ -52,7 +52,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist:latest
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.14.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/pytorch-mnist/mnist.py"
|
||||
|
|
|
@ -54,7 +54,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist:latest
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.14.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/pytorch-mnist/mnist.py"
|
||||
|
|
|
@ -59,7 +59,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.14.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -60,7 +60,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/darts-cnn-cifar10-cpu:latest
|
||||
image: docker.io/kubeflowkatib/darts-cnn-cifar10-cpu:v0.14.0
|
||||
command:
|
||||
- python3
|
||||
- run_trial.py
|
||||
|
|
|
@ -77,7 +77,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/darts-cnn-cifar10-gpu:latest
|
||||
image: docker.io/kubeflowkatib/darts-cnn-cifar10-gpu:v0.14.0
|
||||
command:
|
||||
- python3
|
||||
- run_trial.py
|
||||
|
|
|
@ -139,7 +139,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:latest
|
||||
image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:v0.14.0
|
||||
command:
|
||||
- python3
|
||||
- -u
|
||||
|
|
|
@ -136,7 +136,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/enas-cnn-cifar10-gpu:latest
|
||||
image: docker.io/kubeflowkatib/enas-cnn-cifar10-gpu:v0.14.0
|
||||
command:
|
||||
- python3
|
||||
- -u
|
||||
|
|
|
@ -55,7 +55,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.14.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -55,7 +55,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.14.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -89,7 +89,7 @@ spec:
|
|||
description: Number of training examples
|
||||
steps:
|
||||
- name: model-training
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.14.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
FROM pytorch/pytorch:1.11.0-cuda11.3-cudnn8-runtime
|
||||
|
||||
ADD examples/v1beta1/trial-images/pytorch-mnist /opt/pytorch-mnist
|
||||
WORKDIR /opt/pytorch-mnist
|
||||
|
||||
# Add folder for the logs.
|
||||
RUN mkdir /katib
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
RUN chgrp -R 0 /opt/pytorch-mnist \
|
||||
&& chmod -R g+rwX /opt/pytorch-mnist \
|
||||
&& chgrp -R 0 /katib \
|
||||
&& chmod -R g+rwX /katib
|
||||
|
||||
ENTRYPOINT ["python3", "/opt/pytorch-mnist/mnist.py"]
|
|
@ -60,7 +60,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.14.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -8,13 +8,13 @@ data:
|
|||
metrics-collector-sidecar: |-
|
||||
{
|
||||
"StdOut": {
|
||||
"image": "docker.io/kubeflowkatib/file-metrics-collector:latest"
|
||||
"image": "docker.io/kubeflowkatib/file-metrics-collector:v0.14.0"
|
||||
},
|
||||
"File": {
|
||||
"image": "docker.io/kubeflowkatib/file-metrics-collector:latest"
|
||||
"image": "docker.io/kubeflowkatib/file-metrics-collector:v0.14.0"
|
||||
},
|
||||
"TensorFlowEvent": {
|
||||
"image": "docker.io/kubeflowkatib/tfevent-metrics-collector:latest",
|
||||
"image": "docker.io/kubeflowkatib/tfevent-metrics-collector:v0.14.0",
|
||||
"resources": {
|
||||
"limits": {
|
||||
"memory": "1Gi"
|
||||
|
@ -25,31 +25,31 @@ data:
|
|||
suggestion: |-
|
||||
{
|
||||
"random": {
|
||||
"image": "docker.io/kubeflowkatib/suggestion-hyperopt:latest"
|
||||
"image": "docker.io/kubeflowkatib/suggestion-hyperopt:v0.14.0"
|
||||
},
|
||||
"tpe": {
|
||||
"image": "docker.io/kubeflowkatib/suggestion-hyperopt:latest"
|
||||
"image": "docker.io/kubeflowkatib/suggestion-hyperopt:v0.14.0"
|
||||
},
|
||||
"grid": {
|
||||
"image": "docker.io/kubeflowkatib/suggestion-chocolate:latest"
|
||||
"image": "docker.io/kubeflowkatib/suggestion-chocolate:v0.14.0"
|
||||
},
|
||||
"hyperband": {
|
||||
"image": "docker.io/kubeflowkatib/suggestion-hyperband:latest"
|
||||
"image": "docker.io/kubeflowkatib/suggestion-hyperband:v0.14.0"
|
||||
},
|
||||
"bayesianoptimization": {
|
||||
"image": "docker.io/kubeflowkatib/suggestion-skopt:latest"
|
||||
"image": "docker.io/kubeflowkatib/suggestion-skopt:v0.14.0"
|
||||
},
|
||||
"cmaes": {
|
||||
"image": "docker.io/kubeflowkatib/suggestion-goptuna:latest"
|
||||
"image": "docker.io/kubeflowkatib/suggestion-goptuna:v0.14.0"
|
||||
},
|
||||
"sobol": {
|
||||
"image": "docker.io/kubeflowkatib/suggestion-goptuna:latest"
|
||||
"image": "docker.io/kubeflowkatib/suggestion-goptuna:v0.14.0"
|
||||
},
|
||||
"multivariate-tpe": {
|
||||
"image": "docker.io/kubeflowkatib/suggestion-optuna:latest"
|
||||
"image": "docker.io/kubeflowkatib/suggestion-optuna:v0.14.0"
|
||||
},
|
||||
"enas": {
|
||||
"image": "docker.io/kubeflowkatib/suggestion-enas:latest",
|
||||
"image": "docker.io/kubeflowkatib/suggestion-enas:v0.14.0",
|
||||
"resources": {
|
||||
"limits": {
|
||||
"memory": "200Mi"
|
||||
|
@ -57,10 +57,10 @@ data:
|
|||
}
|
||||
},
|
||||
"darts": {
|
||||
"image": "docker.io/kubeflowkatib/suggestion-darts:latest"
|
||||
"image": "docker.io/kubeflowkatib/suggestion-darts:v0.14.0"
|
||||
},
|
||||
"pbt": {
|
||||
"image": "docker.io/kubeflowkatib/suggestion-pbt:latest",
|
||||
"image": "docker.io/kubeflowkatib/suggestion-pbt:v0.14.0",
|
||||
"persistentVolumeClaimSpec": {
|
||||
"accessModes": [
|
||||
"ReadWriteMany"
|
||||
|
@ -76,6 +76,6 @@ data:
|
|||
early-stopping: |-
|
||||
{
|
||||
"medianstop": {
|
||||
"image": "docker.io/kubeflowkatib/earlystopping-medianstop:latest"
|
||||
"image": "docker.io/kubeflowkatib/earlystopping-medianstop:v0.14.0"
|
||||
}
|
||||
}
|
||||
|
|
|
@ -15,7 +15,7 @@ data:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.14.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
@ -33,7 +33,7 @@ data:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:latest
|
||||
image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:v0.14.0
|
||||
command:
|
||||
- python3
|
||||
- -u
|
||||
|
@ -54,7 +54,7 @@ data:
|
|||
spec:
|
||||
containers:
|
||||
- name: pytorch
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist:latest
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.14.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/pytorch-mnist/mnist.py"
|
||||
|
@ -68,7 +68,7 @@ data:
|
|||
spec:
|
||||
containers:
|
||||
- name: pytorch
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist:latest
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.14.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/pytorch-mnist/mnist.py"
|
||||
|
|
|
@ -22,13 +22,13 @@ resources:
|
|||
images:
|
||||
- name: docker.io/kubeflowkatib/katib-controller
|
||||
newName: docker.io/kubeflowkatib/katib-controller
|
||||
newTag: latest
|
||||
newTag: v0.14.0
|
||||
- name: docker.io/kubeflowkatib/katib-db-manager
|
||||
newName: docker.io/kubeflowkatib/katib-db-manager
|
||||
newTag: latest
|
||||
newTag: v0.14.0
|
||||
- name: docker.io/kubeflowkatib/katib-ui
|
||||
newName: docker.io/kubeflowkatib/katib-ui
|
||||
newTag: latest
|
||||
newTag: v0.14.0
|
||||
|
||||
patchesStrategicMerge:
|
||||
- patches/katib-cert-injection.yaml
|
||||
|
|
|
@ -20,16 +20,16 @@ resources:
|
|||
images:
|
||||
- name: docker.io/kubeflowkatib/katib-controller
|
||||
newName: docker.io/kubeflowkatib/katib-controller
|
||||
newTag: latest
|
||||
newTag: v0.14.0
|
||||
- name: docker.io/kubeflowkatib/katib-db-manager
|
||||
newName: docker.io/kubeflowkatib/katib-db-manager
|
||||
newTag: latest
|
||||
newTag: v0.14.0
|
||||
- name: docker.io/kubeflowkatib/katib-ui
|
||||
newName: docker.io/kubeflowkatib/katib-ui
|
||||
newTag: latest
|
||||
newTag: v0.14.0
|
||||
- name: docker.io/kubeflowkatib/cert-generator
|
||||
newName: docker.io/kubeflowkatib/cert-generator
|
||||
newTag: latest
|
||||
newTag: v0.14.0
|
||||
patchesStrategicMerge:
|
||||
- patches/db-manager.yaml
|
||||
# Modify katib-mysql-secrets with parameters for the DB.
|
||||
|
|
|
@ -30,13 +30,13 @@ resources:
|
|||
images:
|
||||
- name: docker.io/kubeflowkatib/katib-controller
|
||||
newName: docker.io/kubeflowkatib/katib-controller
|
||||
newTag: latest
|
||||
newTag: v0.14.0
|
||||
- name: docker.io/kubeflowkatib/katib-db-manager
|
||||
newName: docker.io/kubeflowkatib/katib-db-manager
|
||||
newTag: latest
|
||||
newTag: v0.14.0
|
||||
- name: docker.io/kubeflowkatib/katib-ui
|
||||
newName: docker.io/kubeflowkatib/katib-ui
|
||||
newTag: latest
|
||||
newTag: v0.14.0
|
||||
|
||||
patchesJson6902:
|
||||
# Annotate Service to delegate TLS-secret generation to OpenShift service controller
|
||||
|
|
|
@ -22,13 +22,13 @@ resources:
|
|||
images:
|
||||
- name: docker.io/kubeflowkatib/katib-controller
|
||||
newName: docker.io/kubeflowkatib/katib-controller
|
||||
newTag: latest
|
||||
newTag: v0.14.0
|
||||
- name: docker.io/kubeflowkatib/katib-db-manager
|
||||
newName: docker.io/kubeflowkatib/katib-db-manager
|
||||
newTag: latest
|
||||
newTag: v0.14.0
|
||||
- name: docker.io/kubeflowkatib/katib-ui
|
||||
newName: docker.io/kubeflowkatib/katib-ui
|
||||
newTag: latest
|
||||
newTag: v0.14.0
|
||||
- name: docker.io/kubeflowkatib/cert-generator
|
||||
newName: docker.io/kubeflowkatib/cert-generator
|
||||
newTag: latest
|
||||
newTag: v0.14.0
|
||||
|
|
|
@ -10,13 +10,13 @@ resources:
|
|||
images:
|
||||
- name: docker.io/kubeflowkatib/katib-controller
|
||||
newName: docker.io/kubeflowkatib/katib-controller
|
||||
newTag: latest
|
||||
newTag: v0.14.0
|
||||
- name: docker.io/kubeflowkatib/katib-db-manager
|
||||
newName: docker.io/kubeflowkatib/katib-db-manager
|
||||
newTag: latest
|
||||
newTag: v0.14.0
|
||||
- name: docker.io/kubeflowkatib/katib-ui
|
||||
newName: docker.io/kubeflowkatib/katib-ui
|
||||
newTag: latest
|
||||
newTag: v0.14.0
|
||||
|
||||
patchesStrategicMerge:
|
||||
- patches/remove-namespace.yaml
|
||||
|
|
|
@ -5,7 +5,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.14.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -5,7 +5,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:latest
|
||||
image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:v0.14.0
|
||||
command:
|
||||
- python3
|
||||
- -u
|
||||
|
|
|
@ -9,7 +9,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: pytorch
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist:latest
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist:v0.14.0-rc.0
|
||||
imagePullPolicy: Always
|
||||
command:
|
||||
- "python3"
|
||||
|
@ -24,7 +24,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: pytorch
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist:latest
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist:v0.14.0-rc.0
|
||||
imagePullPolicy: Always
|
||||
command:
|
||||
- "python3"
|
||||
|
|
|
@ -123,8 +123,11 @@ else
|
|||
echo -e "\nBuilding mxnet mnist training container example...\n"
|
||||
docker build --platform linux/amd64 -t "${REGISTRY}/mxnet-mnist:${TAG}" -f examples/${VERSION}/trial-images/mxnet-mnist/Dockerfile .
|
||||
|
||||
echo -e "\nBuilding PyTorch mnist training container example...\n"
|
||||
docker build --platform linux/amd64 -t "${REGISTRY}/pytorch-mnist:${TAG}" -f examples/${VERSION}/trial-images/pytorch-mnist/Dockerfile .
|
||||
echo -e "\nBuilding PyTorch mnist training container example with CPU support...\n"
|
||||
docker build --platform linux/amd64 -t "${REGISTRY}/pytorch-mnist-cpu:${TAG}" -f examples/${VERSION}/trial-images/pytorch-mnist/Dockerfile.cpu .
|
||||
|
||||
echo -e "\nBuilding PyTorch mnist training container example with GPU support...\n"
|
||||
docker build --platform linux/amd64 -t "${REGISTRY}/pytorch-mnist-gpu:${TAG}" -f examples/${VERSION}/trial-images/pytorch-mnist/Dockerfile.gpu .
|
||||
|
||||
echo -e "\nBuilding Keras CIFAR-10 CNN training container example for ENAS with GPU support...\n"
|
||||
docker build --platform linux/amd64 -t "${REGISTRY}/enas-cnn-cifar10-gpu:${TAG}" -f examples/${VERSION}/trial-images/enas-cnn-cifar10/Dockerfile.gpu .
|
||||
|
|
|
@ -98,8 +98,11 @@ docker push "${REGISTRY}/mxnet-mnist:${TAG}"
|
|||
echo -e "\nPushing Tensorflow with summaries mnist training container example...\n"
|
||||
docker push "${REGISTRY}/tf-mnist-with-summaries:${TAG}"
|
||||
|
||||
echo -e "\nPushing PyTorch mnist training container example...\n"
|
||||
docker push "${REGISTRY}/pytorch-mnist:${TAG}"
|
||||
echo -e "\nPushing PyTorch mnist training container example with CPU support...\n"
|
||||
docker push "${REGISTRY}/pytorch-mnist-cpu:${TAG}"
|
||||
|
||||
echo -e "\nPushing PyTorch mnist training container example with GPU support...\n"
|
||||
docker push "${REGISTRY}/pytorch-mnist-gpu:${TAG}"
|
||||
|
||||
echo -e "\nPushing Keras CIFAR-10 CNN training container example for ENAS with GPU support...\n"
|
||||
docker push "${REGISTRY}/enas-cnn-cifar10-gpu:${TAG}"
|
||||
|
@ -107,8 +110,11 @@ docker push "${REGISTRY}/enas-cnn-cifar10-gpu:${TAG}"
|
|||
echo -e "\nPushing Keras CIFAR-10 CNN training container example for ENAS with CPU support...\n"
|
||||
docker push "${REGISTRY}/enas-cnn-cifar10-cpu:${TAG}"
|
||||
|
||||
echo -e "\nPushing PyTorch CIFAR-10 CNN training container example for DARTS...\n"
|
||||
docker push "${REGISTRY}/darts-cnn-cifar10:${TAG}"
|
||||
echo -e "\nPushing PyTorch CIFAR-10 CNN training container example for DARTS with CPU support...\n"
|
||||
docker push "${REGISTRY}/darts-cnn-cifar10-cpu:${TAG}"
|
||||
|
||||
echo -e "\nPushing PyTorch CIFAR-10 CNN training container example for DARTS with GPU support...\n"
|
||||
docker push "${REGISTRY}/darts-cnn-cifar10-gpu:${TAG}"
|
||||
|
||||
echo -e "\nPushing dynamic learning rate training container example for PBT...\n"
|
||||
docker push "${REGISTRY}/simple-pbt:${TAG}"
|
||||
|
|
|
@ -83,7 +83,8 @@ update_yaml_files "${CONFIG_PATH}" ":[^[:space:]].*\"" ":${TAG}\""
|
|||
|
||||
# Postfixes for the each Trial image.
|
||||
MXNET_MNIST="mxnet-mnist"
|
||||
PYTORCH_MNIST="pytorch-mnist"
|
||||
PYTORCH_MNIST_CPU="pytorch-mnist-cpu"
|
||||
PYTORCH_MNIST_GPU="pytorch-mnist-gpu"
|
||||
TF_MNIST_WITH_SUMMARIES="tf-mnist-with-summaries"
|
||||
ENAS_GPU="enas-cnn-cifar10-gpu"
|
||||
ENAS_CPU="enas-cnn-cifar10-cpu"
|
||||
|
@ -93,7 +94,8 @@ SIMPLE_PBT="simple-pbt"
|
|||
|
||||
echo -e "Update Katib Trial training container images\n"
|
||||
update_yaml_files "./" "${OLD_PREFIX}${MXNET_MNIST}:.*" "${NEW_PREFIX}${MXNET_MNIST}:${TAG}"
|
||||
update_yaml_files "./" "${OLD_PREFIX}${PYTORCH_MNIST}:.*" "${NEW_PREFIX}${PYTORCH_MNIST}:${TAG}"
|
||||
update_yaml_files "./" "${OLD_PREFIX}${PYTORCH_MNIST_CPU}:.*" "${NEW_PREFIX}${PYTORCH_MNIST_CPU}:${TAG}"
|
||||
update_yaml_files "./" "${OLD_PREFIX}${PYTORCH_MNIST_GPU}:.*" "${NEW_PREFIX}${PYTORCH_MNIST_GPU}:${TAG}"
|
||||
update_yaml_files "./" "${OLD_PREFIX}${TF_MNIST_WITH_SUMMARIES}:.*" "${NEW_PREFIX}${TF_MNIST_WITH_SUMMARIES}:${TAG}"
|
||||
update_yaml_files "./" "${OLD_PREFIX}${ENAS_GPU}:.*" "${NEW_PREFIX}${ENAS_GPU}:${TAG}"
|
||||
update_yaml_files "./" "${OLD_PREFIX}${ENAS_CPU}:.*" "${NEW_PREFIX}${ENAS_CPU}:${TAG}"
|
||||
|
|
|
@ -19,7 +19,7 @@ with open('requirements.txt') as f:
|
|||
|
||||
setuptools.setup(
|
||||
name='kubeflow-katib',
|
||||
version='0.13.0',
|
||||
version='0.14.0',
|
||||
author="Kubeflow Authors",
|
||||
author_email='premnath.vel@gmail.com',
|
||||
license="Apache License Version 2.0",
|
||||
|
|
|
@ -30,7 +30,7 @@ REGISTRY="docker.io/kubeflowkatib"
|
|||
TAG="e2e-test"
|
||||
VERSION="v1beta1"
|
||||
CMD_PREFIX="cmd"
|
||||
SPECIFIED_DEVICE_TYPE_IMAGES=("enas-cnn-cifar10-cpu" "darts-cnn-cifar10-cpu")
|
||||
SPECIFIED_DEVICE_TYPE_IMAGES=("enas-cnn-cifar10-cpu" "darts-cnn-cifar10-cpu" "pytorch-mnist-cpu")
|
||||
|
||||
IFS="," read -r -a TRIAL_IMAGE_ARRAY <<< "$TRIAL_IMAGES"
|
||||
IFS="," read -r -a EXPERIMENT_ARRAY <<< "$EXPERIMENTS"
|
||||
|
@ -51,7 +51,7 @@ _build_containers() {
|
|||
docker build --platform "$(uname -m)" -t "$REGISTRY/$CONTAINER_NAME:$TAG" -f "../../../../../$DOCKERFILE" ../../../../../
|
||||
}
|
||||
|
||||
_load_kind_cluster() {
|
||||
_load_minikube_cluster() {
|
||||
CONTAINER_NAME=${1:-"katib-controller"}
|
||||
|
||||
echo -e "\n\nLoading $CONTAINER_NAME image...\n\n"
|
||||
|
@ -99,7 +99,7 @@ run() {
|
|||
for s in "${suggestions[@]}"; do
|
||||
if [ "$s" == "$CONTAINER_NAME" ]; then
|
||||
_build_containers "$CONTAINER_NAME" "$DOCKERFILE"
|
||||
_load_kind_cluster "$CONTAINER_NAME"
|
||||
_load_minikube_cluster "$CONTAINER_NAME"
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
@ -126,7 +126,7 @@ run() {
|
|||
for e in "${earlystoppings[@]}"; do
|
||||
if [ "$e" == "$CONTAINER_NAME" ]; then
|
||||
_build_containers "$CONTAINER_NAME" "$DOCKERFILE"
|
||||
_load_kind_cluster "$CONTAINER_NAME"
|
||||
_load_minikube_cluster "$CONTAINER_NAME"
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
@ -134,7 +134,7 @@ run() {
|
|||
# Others
|
||||
else
|
||||
_build_containers "$CONTAINER_NAME" "$DOCKERFILE"
|
||||
_load_kind_cluster "$CONTAINER_NAME"
|
||||
_load_minikube_cluster "$CONTAINER_NAME"
|
||||
fi
|
||||
}
|
||||
|
||||
|
|
|
@ -52,7 +52,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.14.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -52,7 +52,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.14.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
Loading…
Reference in New Issue