mirror of https://github.com/kubeflow/katib.git
Compare commits
4 Commits
master
...
v0.13.0-rc
Author | SHA1 | Date |
---|---|---|
|
1f9dff0307 | |
|
c00cf67074 | |
|
4458e7bdcd | |
|
6329f48685 |
|
@ -123,7 +123,7 @@ In the namespace with `katib.kubeflow.org/metrics-collector-injection=enabled` l
|
||||||
|
|
||||||
In **Pod Level Injecting**,
|
In **Pod Level Injecting**,
|
||||||
|
|
||||||
1. Job operators (_e.x. TFjob/PyTorchjob_) tag the `job-role: master` ([#1064](https://github.com/kubeflow/tf-operator/pull/1064)) label on the master pod.
|
1. Job operators (_e.x. TFjob/PyTorchjob_) tag the `training.kubeflow.org/job-role: master` ([#1064](https://github.com/kubeflow/tf-operator/pull/1064)) label on the master pod.
|
||||||
2. The webhook inject the metric collector only if the webhook recognizes this label.
|
2. The webhook inject the metric collector only if the webhook recognizes this label.
|
||||||
3. The webhook uses [ObjectSelector](https://github.com/kubernetes/kubernetes/pull/78505) to skip on irrelevant objects in order to optimize the performance.
|
3. The webhook uses [ObjectSelector](https://github.com/kubernetes/kubernetes/pull/78505) to skip on irrelevant objects in order to optimize the performance.
|
||||||
4. ObjectSelector is only supported above _Kubernetes v1.15_. Without this new feature, there may be a [performance issue](https://github.com/kubeflow/katib/issues/685#issuecomment-516226070) in webhook. In this situation, the following **Job Level Injecting** mode may be a better option.
|
4. ObjectSelector is only supported above _Kubernetes v1.15_. Without this new feature, there may be a [performance issue](https://github.com/kubeflow/katib/issues/685#issuecomment-516226070) in webhook. In this situation, the following **Job Level Injecting** mode may be a better option.
|
||||||
|
|
|
@ -124,7 +124,7 @@ For example, for TFJob:
|
||||||
```yaml
|
```yaml
|
||||||
. . .
|
. . .
|
||||||
PrimaryPodLabel:
|
PrimaryPodLabel:
|
||||||
"job-role": "master"
|
"training.kubeflow.org/job-role": "master"
|
||||||
. . .
|
. . .
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -75,7 +75,7 @@ spec:
|
||||||
- name: num-examples
|
- name: num-examples
|
||||||
container:
|
container:
|
||||||
name: model-training
|
name: model-training
|
||||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/mxnet-mnist/mnist.py"
|
- "/opt/mxnet-mnist/mnist.py"
|
||||||
|
|
|
@ -53,7 +53,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/mxnet-mnist/mnist.py"
|
- "/opt/mxnet-mnist/mnist.py"
|
||||||
|
|
|
@ -56,7 +56,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/mxnet-mnist/mnist.py"
|
- "/opt/mxnet-mnist/mnist.py"
|
||||||
|
|
|
@ -56,7 +56,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/mxnet-mnist/mnist.py"
|
- "/opt/mxnet-mnist/mnist.py"
|
||||||
|
|
|
@ -54,7 +54,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/mxnet-mnist/mnist.py"
|
- "/opt/mxnet-mnist/mnist.py"
|
||||||
|
|
|
@ -68,7 +68,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/mxnet-mnist/mnist.py"
|
- "/opt/mxnet-mnist/mnist.py"
|
||||||
|
|
|
@ -53,7 +53,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/mxnet-mnist/mnist.py"
|
- "/opt/mxnet-mnist/mnist.py"
|
||||||
|
|
|
@ -53,7 +53,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/mxnet-mnist/mnist.py"
|
- "/opt/mxnet-mnist/mnist.py"
|
||||||
|
|
|
@ -53,7 +53,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/mxnet-mnist/mnist.py"
|
- "/opt/mxnet-mnist/mnist.py"
|
||||||
|
|
|
@ -53,7 +53,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/mxnet-mnist/mnist.py"
|
- "/opt/mxnet-mnist/mnist.py"
|
||||||
|
|
|
@ -23,7 +23,7 @@ spec:
|
||||||
primaryContainerName: mxnet
|
primaryContainerName: mxnet
|
||||||
# In this example we can collect metrics only from the Worker pods.
|
# In this example we can collect metrics only from the Worker pods.
|
||||||
primaryPodLabels:
|
primaryPodLabels:
|
||||||
replica-type: worker
|
training.kubeflow.org/replica-type: worker
|
||||||
trialParameters:
|
trialParameters:
|
||||||
- name: learningRate
|
- name: learningRate
|
||||||
description: Learning rate for the training model
|
description: Learning rate for the training model
|
||||||
|
|
|
@ -45,7 +45,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: pytorch
|
- name: pytorch
|
||||||
image: docker.io/kubeflowkatib/pytorch-mnist:latest
|
image: docker.io/kubeflowkatib/pytorch-mnist:v0.13.0-rc.1
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/pytorch-mnist/mnist.py"
|
- "/opt/pytorch-mnist/mnist.py"
|
||||||
|
@ -59,7 +59,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: pytorch
|
- name: pytorch
|
||||||
image: docker.io/kubeflowkatib/pytorch-mnist:latest
|
image: docker.io/kubeflowkatib/pytorch-mnist:v0.13.0-rc.1
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/pytorch-mnist/mnist.py"
|
- "/opt/pytorch-mnist/mnist.py"
|
||||||
|
|
|
@ -52,7 +52,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: tensorflow
|
- name: tensorflow
|
||||||
image: docker.io/kubeflowkatib/tf-mnist-with-summaries:latest
|
image: docker.io/kubeflowkatib/tf-mnist-with-summaries:v0.13.0-rc.1
|
||||||
command:
|
command:
|
||||||
- "python"
|
- "python"
|
||||||
- "/opt/tf-mnist-with-summaries/mnist.py"
|
- "/opt/tf-mnist-with-summaries/mnist.py"
|
||||||
|
|
|
@ -66,7 +66,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/pytorch-mnist:latest
|
image: docker.io/kubeflowkatib/pytorch-mnist:v0.13.0-rc.1
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/pytorch-mnist/mnist.py"
|
- "/opt/pytorch-mnist/mnist.py"
|
||||||
|
|
|
@ -53,7 +53,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/pytorch-mnist:latest
|
image: docker.io/kubeflowkatib/pytorch-mnist:v0.13.0-rc.1
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/pytorch-mnist/mnist.py"
|
- "/opt/pytorch-mnist/mnist.py"
|
||||||
|
|
|
@ -58,7 +58,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/mxnet-mnist/mnist.py"
|
- "/opt/mxnet-mnist/mnist.py"
|
||||||
|
|
|
@ -59,7 +59,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/darts-cnn-cifar10:latest
|
image: docker.io/kubeflowkatib/darts-cnn-cifar10:v0.13.0-rc.1
|
||||||
command:
|
command:
|
||||||
- python3
|
- python3
|
||||||
- run_trial.py
|
- run_trial.py
|
||||||
|
|
|
@ -76,7 +76,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/darts-cnn-cifar10:latest
|
image: docker.io/kubeflowkatib/darts-cnn-cifar10:v0.13.0-rc.1
|
||||||
command:
|
command:
|
||||||
- python3
|
- python3
|
||||||
- run_trial.py
|
- run_trial.py
|
||||||
|
|
|
@ -138,7 +138,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:latest
|
image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:v0.13.0-rc.1
|
||||||
command:
|
command:
|
||||||
- python3
|
- python3
|
||||||
- -u
|
- -u
|
||||||
|
|
|
@ -135,7 +135,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/enas-cnn-cifar10-gpu:latest
|
image: docker.io/kubeflowkatib/enas-cnn-cifar10-gpu:v0.13.0-rc.1
|
||||||
command:
|
command:
|
||||||
- python3
|
- python3
|
||||||
- -u
|
- -u
|
||||||
|
|
|
@ -54,7 +54,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/mxnet-mnist/mnist.py"
|
- "/opt/mxnet-mnist/mnist.py"
|
||||||
|
|
|
@ -54,7 +54,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/mxnet-mnist/mnist.py"
|
- "/opt/mxnet-mnist/mnist.py"
|
||||||
|
|
|
@ -88,7 +88,7 @@ spec:
|
||||||
description: Number of training examples
|
description: Number of training examples
|
||||||
steps:
|
steps:
|
||||||
- name: model-training
|
- name: model-training
|
||||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/mxnet-mnist/mnist.py"
|
- "/opt/mxnet-mnist/mnist.py"
|
||||||
|
|
|
@ -59,7 +59,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/mxnet-mnist/mnist.py"
|
- "/opt/mxnet-mnist/mnist.py"
|
||||||
|
|
|
@ -7,13 +7,13 @@ data:
|
||||||
metrics-collector-sidecar: |-
|
metrics-collector-sidecar: |-
|
||||||
{
|
{
|
||||||
"StdOut": {
|
"StdOut": {
|
||||||
"image": "docker.io/kubeflowkatib/file-metrics-collector:latest"
|
"image": "docker.io/kubeflowkatib/file-metrics-collector:v0.13.0-rc.1"
|
||||||
},
|
},
|
||||||
"File": {
|
"File": {
|
||||||
"image": "docker.io/kubeflowkatib/file-metrics-collector:latest"
|
"image": "docker.io/kubeflowkatib/file-metrics-collector:v0.13.0-rc.1"
|
||||||
},
|
},
|
||||||
"TensorFlowEvent": {
|
"TensorFlowEvent": {
|
||||||
"image": "docker.io/kubeflowkatib/tfevent-metrics-collector:latest",
|
"image": "docker.io/kubeflowkatib/tfevent-metrics-collector:v0.13.0-rc.1",
|
||||||
"resources": {
|
"resources": {
|
||||||
"limits": {
|
"limits": {
|
||||||
"memory": "1Gi"
|
"memory": "1Gi"
|
||||||
|
@ -24,31 +24,31 @@ data:
|
||||||
suggestion: |-
|
suggestion: |-
|
||||||
{
|
{
|
||||||
"random": {
|
"random": {
|
||||||
"image": "docker.io/kubeflowkatib/suggestion-hyperopt:latest"
|
"image": "docker.io/kubeflowkatib/suggestion-hyperopt:v0.13.0-rc.1"
|
||||||
},
|
},
|
||||||
"tpe": {
|
"tpe": {
|
||||||
"image": "docker.io/kubeflowkatib/suggestion-hyperopt:latest"
|
"image": "docker.io/kubeflowkatib/suggestion-hyperopt:v0.13.0-rc.1"
|
||||||
},
|
},
|
||||||
"grid": {
|
"grid": {
|
||||||
"image": "docker.io/kubeflowkatib/suggestion-chocolate:latest"
|
"image": "docker.io/kubeflowkatib/suggestion-chocolate:v0.13.0-rc.1"
|
||||||
},
|
},
|
||||||
"hyperband": {
|
"hyperband": {
|
||||||
"image": "docker.io/kubeflowkatib/suggestion-hyperband:latest"
|
"image": "docker.io/kubeflowkatib/suggestion-hyperband:v0.13.0-rc.1"
|
||||||
},
|
},
|
||||||
"bayesianoptimization": {
|
"bayesianoptimization": {
|
||||||
"image": "docker.io/kubeflowkatib/suggestion-skopt:latest"
|
"image": "docker.io/kubeflowkatib/suggestion-skopt:v0.13.0-rc.1"
|
||||||
},
|
},
|
||||||
"cmaes": {
|
"cmaes": {
|
||||||
"image": "docker.io/kubeflowkatib/suggestion-goptuna:latest"
|
"image": "docker.io/kubeflowkatib/suggestion-goptuna:v0.13.0-rc.1"
|
||||||
},
|
},
|
||||||
"sobol": {
|
"sobol": {
|
||||||
"image": "docker.io/kubeflowkatib/suggestion-goptuna:latest"
|
"image": "docker.io/kubeflowkatib/suggestion-goptuna:v0.13.0-rc.1"
|
||||||
},
|
},
|
||||||
"multivariate-tpe": {
|
"multivariate-tpe": {
|
||||||
"image": "docker.io/kubeflowkatib/suggestion-optuna:latest"
|
"image": "docker.io/kubeflowkatib/suggestion-optuna:v0.13.0-rc.1"
|
||||||
},
|
},
|
||||||
"enas": {
|
"enas": {
|
||||||
"image": "docker.io/kubeflowkatib/suggestion-enas:latest",
|
"image": "docker.io/kubeflowkatib/suggestion-enas:v0.13.0-rc.1",
|
||||||
"resources": {
|
"resources": {
|
||||||
"limits": {
|
"limits": {
|
||||||
"memory": "200Mi"
|
"memory": "200Mi"
|
||||||
|
@ -56,12 +56,12 @@ data:
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"darts": {
|
"darts": {
|
||||||
"image": "docker.io/kubeflowkatib/suggestion-darts:latest"
|
"image": "docker.io/kubeflowkatib/suggestion-darts:v0.13.0-rc.1"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
early-stopping: |-
|
early-stopping: |-
|
||||||
{
|
{
|
||||||
"medianstop": {
|
"medianstop": {
|
||||||
"image": "docker.io/kubeflowkatib/earlystopping-medianstop:latest"
|
"image": "docker.io/kubeflowkatib/earlystopping-medianstop:v0.13.0-rc.1"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,7 +14,7 @@ data:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/mxnet-mnist/mnist.py"
|
- "/opt/mxnet-mnist/mnist.py"
|
||||||
|
@ -32,7 +32,7 @@ data:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:latest
|
image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:v0.13.0-rc.1
|
||||||
command:
|
command:
|
||||||
- python3
|
- python3
|
||||||
- -u
|
- -u
|
||||||
|
@ -53,7 +53,7 @@ data:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: pytorch
|
- name: pytorch
|
||||||
image: docker.io/kubeflowkatib/pytorch-mnist:latest
|
image: docker.io/kubeflowkatib/pytorch-mnist:v0.13.0-rc.1
|
||||||
imagePullPolicy: Always
|
imagePullPolicy: Always
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
|
@ -68,7 +68,7 @@ data:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: pytorch
|
- name: pytorch
|
||||||
image: docker.io/kubeflowkatib/pytorch-mnist:latest
|
image: docker.io/kubeflowkatib/pytorch-mnist:v0.13.0-rc.1
|
||||||
imagePullPolicy: Always
|
imagePullPolicy: Always
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
|
|
|
@ -21,13 +21,13 @@ resources:
|
||||||
images:
|
images:
|
||||||
- name: docker.io/kubeflowkatib/katib-controller
|
- name: docker.io/kubeflowkatib/katib-controller
|
||||||
newName: docker.io/kubeflowkatib/katib-controller
|
newName: docker.io/kubeflowkatib/katib-controller
|
||||||
newTag: latest
|
newTag: v0.13.0-rc.1
|
||||||
- name: docker.io/kubeflowkatib/katib-db-manager
|
- name: docker.io/kubeflowkatib/katib-db-manager
|
||||||
newName: docker.io/kubeflowkatib/katib-db-manager
|
newName: docker.io/kubeflowkatib/katib-db-manager
|
||||||
newTag: latest
|
newTag: v0.13.0-rc.1
|
||||||
- name: docker.io/kubeflowkatib/katib-ui
|
- name: docker.io/kubeflowkatib/katib-ui
|
||||||
newName: docker.io/kubeflowkatib/katib-ui
|
newName: docker.io/kubeflowkatib/katib-ui
|
||||||
newTag: latest
|
newTag: v0.13.0-rc.1
|
||||||
|
|
||||||
patchesStrategicMerge:
|
patchesStrategicMerge:
|
||||||
- patches/katib-cert-injection.yaml
|
- patches/katib-cert-injection.yaml
|
||||||
|
|
|
@ -19,16 +19,16 @@ resources:
|
||||||
images:
|
images:
|
||||||
- name: docker.io/kubeflowkatib/katib-controller
|
- name: docker.io/kubeflowkatib/katib-controller
|
||||||
newName: docker.io/kubeflowkatib/katib-controller
|
newName: docker.io/kubeflowkatib/katib-controller
|
||||||
newTag: latest
|
newTag: v0.13.0-rc.1
|
||||||
- name: docker.io/kubeflowkatib/katib-db-manager
|
- name: docker.io/kubeflowkatib/katib-db-manager
|
||||||
newName: docker.io/kubeflowkatib/katib-db-manager
|
newName: docker.io/kubeflowkatib/katib-db-manager
|
||||||
newTag: latest
|
newTag: v0.13.0-rc.1
|
||||||
- name: docker.io/kubeflowkatib/katib-ui
|
- name: docker.io/kubeflowkatib/katib-ui
|
||||||
newName: docker.io/kubeflowkatib/katib-ui
|
newName: docker.io/kubeflowkatib/katib-ui
|
||||||
newTag: latest
|
newTag: v0.13.0-rc.1
|
||||||
- name: docker.io/kubeflowkatib/cert-generator
|
- name: docker.io/kubeflowkatib/cert-generator
|
||||||
newName: docker.io/kubeflowkatib/cert-generator
|
newName: docker.io/kubeflowkatib/cert-generator
|
||||||
newTag: latest
|
newTag: v0.13.0-rc.1
|
||||||
patchesStrategicMerge:
|
patchesStrategicMerge:
|
||||||
- patches/db-manager.yaml
|
- patches/db-manager.yaml
|
||||||
# Modify katib-mysql-secrets with parameters for the DB.
|
# Modify katib-mysql-secrets with parameters for the DB.
|
||||||
|
|
|
@ -30,13 +30,13 @@ resources:
|
||||||
images:
|
images:
|
||||||
- name: docker.io/kubeflowkatib/katib-controller
|
- name: docker.io/kubeflowkatib/katib-controller
|
||||||
newName: docker.io/kubeflowkatib/katib-controller
|
newName: docker.io/kubeflowkatib/katib-controller
|
||||||
newTag: latest
|
newTag: v0.13.0-rc.1
|
||||||
- name: docker.io/kubeflowkatib/katib-db-manager
|
- name: docker.io/kubeflowkatib/katib-db-manager
|
||||||
newName: docker.io/kubeflowkatib/katib-db-manager
|
newName: docker.io/kubeflowkatib/katib-db-manager
|
||||||
newTag: latest
|
newTag: v0.13.0-rc.1
|
||||||
- name: docker.io/kubeflowkatib/katib-ui
|
- name: docker.io/kubeflowkatib/katib-ui
|
||||||
newName: docker.io/kubeflowkatib/katib-ui
|
newName: docker.io/kubeflowkatib/katib-ui
|
||||||
newTag: latest
|
newTag: v0.13.0-rc.1
|
||||||
|
|
||||||
patchesJson6902:
|
patchesJson6902:
|
||||||
# Annotate Service to delegate TLS-secret generation to OpenShift service controller
|
# Annotate Service to delegate TLS-secret generation to OpenShift service controller
|
||||||
|
|
|
@ -21,13 +21,13 @@ resources:
|
||||||
images:
|
images:
|
||||||
- name: docker.io/kubeflowkatib/katib-controller
|
- name: docker.io/kubeflowkatib/katib-controller
|
||||||
newName: docker.io/kubeflowkatib/katib-controller
|
newName: docker.io/kubeflowkatib/katib-controller
|
||||||
newTag: latest
|
newTag: v0.13.0-rc.1
|
||||||
- name: docker.io/kubeflowkatib/katib-db-manager
|
- name: docker.io/kubeflowkatib/katib-db-manager
|
||||||
newName: docker.io/kubeflowkatib/katib-db-manager
|
newName: docker.io/kubeflowkatib/katib-db-manager
|
||||||
newTag: latest
|
newTag: v0.13.0-rc.1
|
||||||
- name: docker.io/kubeflowkatib/katib-ui
|
- name: docker.io/kubeflowkatib/katib-ui
|
||||||
newName: docker.io/kubeflowkatib/katib-ui
|
newName: docker.io/kubeflowkatib/katib-ui
|
||||||
newTag: latest
|
newTag: v0.13.0-rc.1
|
||||||
- name: docker.io/kubeflowkatib/cert-generator
|
- name: docker.io/kubeflowkatib/cert-generator
|
||||||
newName: docker.io/kubeflowkatib/cert-generator
|
newName: docker.io/kubeflowkatib/cert-generator
|
||||||
newTag: latest
|
newTag: v0.13.0-rc.1
|
||||||
|
|
|
@ -9,13 +9,13 @@ resources:
|
||||||
images:
|
images:
|
||||||
- name: docker.io/kubeflowkatib/katib-controller
|
- name: docker.io/kubeflowkatib/katib-controller
|
||||||
newName: docker.io/kubeflowkatib/katib-controller
|
newName: docker.io/kubeflowkatib/katib-controller
|
||||||
newTag: latest
|
newTag: v0.13.0-rc.1
|
||||||
- name: docker.io/kubeflowkatib/katib-db-manager
|
- name: docker.io/kubeflowkatib/katib-db-manager
|
||||||
newName: docker.io/kubeflowkatib/katib-db-manager
|
newName: docker.io/kubeflowkatib/katib-db-manager
|
||||||
newTag: latest
|
newTag: v0.13.0-rc.1
|
||||||
- name: docker.io/kubeflowkatib/katib-ui
|
- name: docker.io/kubeflowkatib/katib-ui
|
||||||
newName: docker.io/kubeflowkatib/katib-ui
|
newName: docker.io/kubeflowkatib/katib-ui
|
||||||
newTag: latest
|
newTag: v0.13.0-rc.1
|
||||||
|
|
||||||
patchesStrategicMerge:
|
patchesStrategicMerge:
|
||||||
- patches/remove-namespace.yaml
|
- patches/remove-namespace.yaml
|
||||||
|
|
|
@ -5,7 +5,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/mxnet-mnist/mnist.py"
|
- "/opt/mxnet-mnist/mnist.py"
|
||||||
|
|
|
@ -5,7 +5,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:latest
|
image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:v0.13.0-rc.1
|
||||||
command:
|
command:
|
||||||
- python3
|
- python3
|
||||||
- -u
|
- -u
|
||||||
|
|
|
@ -9,7 +9,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: pytorch
|
- name: pytorch
|
||||||
image: docker.io/kubeflowkatib/pytorch-mnist:latest
|
image: docker.io/kubeflowkatib/pytorch-mnist:v0.13.0-rc.1
|
||||||
imagePullPolicy: Always
|
imagePullPolicy: Always
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
|
@ -24,7 +24,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: pytorch
|
- name: pytorch
|
||||||
image: docker.io/kubeflowkatib/pytorch-mnist:latest
|
image: docker.io/kubeflowkatib/pytorch-mnist:v0.13.0-rc.1
|
||||||
imagePullPolicy: Always
|
imagePullPolicy: Always
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
|
|
|
@ -38,7 +38,7 @@ const (
|
||||||
|
|
||||||
var (
|
var (
|
||||||
// DefaultKubeflowJobPrimaryPodLabels is the default value of spec.trialTemplate.primaryPodLabels for Kubeflow Training Job.
|
// DefaultKubeflowJobPrimaryPodLabels is the default value of spec.trialTemplate.primaryPodLabels for Kubeflow Training Job.
|
||||||
DefaultKubeflowJobPrimaryPodLabels = map[string]string{"job-role": "master"}
|
DefaultKubeflowJobPrimaryPodLabels = map[string]string{"training.kubeflow.org/job-role": "master"}
|
||||||
|
|
||||||
// KubeflowJobKinds is the list of Kubeflow Training Job kinds.
|
// KubeflowJobKinds is the list of Kubeflow Training Job kinds.
|
||||||
KubeflowJobKinds = map[string]bool{
|
KubeflowJobKinds = map[string]bool{
|
||||||
|
@ -46,5 +46,6 @@ var (
|
||||||
"PyTorchJob": true,
|
"PyTorchJob": true,
|
||||||
"XGBoostJob": true,
|
"XGBoostJob": true,
|
||||||
"MXJob": true,
|
"MXJob": true,
|
||||||
|
"MPIJob": true,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
|
@ -50,14 +50,14 @@ const initialState = {
|
||||||
value: 'status.conditions.#(type=="Complete")#|#(status=="True")#',
|
value: 'status.conditions.#(type=="Complete")#|#(status=="True")#',
|
||||||
description: `Condition when Trial custom resource is succeeded.
|
description: `Condition when Trial custom resource is succeeded.
|
||||||
Default value for k8s BatchJob: status.conditions.#(type=="Complete")#|#(status=="True")#.
|
Default value for k8s BatchJob: status.conditions.#(type=="Complete")#|#(status=="True")#.
|
||||||
Default value for Kubeflow Job (TFJob, PyTorchJob, XGBoostJob, MXJob): status.conditions.#(type=="Succeeded")#|#(status=="True")#.`,
|
Default value for Kubeflow Job (TFJob, PyTorchJob, XGBoostJob, MXJob, MPIJob): status.conditions.#(type=="Succeeded")#|#(status=="True")#.`,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: 'FailureCondition',
|
name: 'FailureCondition',
|
||||||
value: 'status.conditions.#(type=="Failed")#|#(status=="True")#',
|
value: 'status.conditions.#(type=="Failed")#|#(status=="True")#',
|
||||||
description: `Condition when Trial custom resource is failed.
|
description: `Condition when Trial custom resource is failed.
|
||||||
Default value for k8s BatchJob: status.conditions.#(type=="Failed")#|#(status=="True")#.
|
Default value for k8s BatchJob: status.conditions.#(type=="Failed")#|#(status=="True")#.
|
||||||
Default value for Kubeflow Job (TFJob, PyTorchJob, XGBoostJob, MXJob): status.conditions.#(type=="Failed")#|#(status=="True")#.`,
|
Default value for Kubeflow Job (TFJob, PyTorchJob, XGBoostJob, MXJob, MPIJob): status.conditions.#(type=="Failed")#|#(status=="True")#.`,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: 'Retain',
|
name: 'Retain',
|
||||||
|
|
|
@ -19,7 +19,7 @@ with open('requirements.txt') as f:
|
||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='kubeflow-katib',
|
name='kubeflow-katib',
|
||||||
version='0.12.0',
|
version='0.13.0rc1',
|
||||||
author="Kubeflow Authors",
|
author="Kubeflow Authors",
|
||||||
author_email='premnath.vel@gmail.com',
|
author_email='premnath.vel@gmail.com',
|
||||||
license="Apache License Version 2.0",
|
license="Apache License Version 2.0",
|
||||||
|
@ -35,11 +35,12 @@ setuptools.setup(
|
||||||
'Intended Audience :: Developers',
|
'Intended Audience :: Developers',
|
||||||
'Intended Audience :: Education',
|
'Intended Audience :: Education',
|
||||||
'Intended Audience :: Science/Research',
|
'Intended Audience :: Science/Research',
|
||||||
'Programming Language :: Python :: 2',
|
|
||||||
'Programming Language :: Python :: 2.7',
|
|
||||||
'Programming Language :: Python :: 3',
|
'Programming Language :: Python :: 3',
|
||||||
'Programming Language :: Python :: 3.6',
|
'Programming Language :: Python :: 3 :: Only',
|
||||||
'Programming Language :: Python :: 3.7',
|
'Programming Language :: Python :: 3.7',
|
||||||
|
'Programming Language :: Python :: 3.8',
|
||||||
|
'Programming Language :: Python :: 3.9',
|
||||||
|
'Programming Language :: Python :: 3.10',
|
||||||
"License :: OSI Approved :: Apache Software License",
|
"License :: OSI Approved :: Apache Software License",
|
||||||
"Operating System :: OS Independent",
|
"Operating System :: OS Independent",
|
||||||
'Topic :: Scientific/Engineering',
|
'Topic :: Scientific/Engineering',
|
||||||
|
|
|
@ -163,7 +163,7 @@ class WorkflowBuilder(object):
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "EXTRA_REPOS",
|
"name": "EXTRA_REPOS",
|
||||||
"value": "kubeflow/testing@HEAD;kubeflow/manifests@v1.4-branch"
|
"value": "kubeflow/testing@HEAD;kubeflow/manifests@v1.5-branch"
|
||||||
},
|
},
|
||||||
# Set GOPATH to test_dir because Katib repo is located under /src/github.com/kubeflow/katib
|
# Set GOPATH to test_dir because Katib repo is located under /src/github.com/kubeflow/katib
|
||||||
{
|
{
|
||||||
|
|
|
@ -52,7 +52,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/mxnet-mnist/mnist.py"
|
- "/opt/mxnet-mnist/mnist.py"
|
||||||
|
|
|
@ -41,8 +41,8 @@ cat "manifests/v1beta1/components/controller/katib-config.yaml"
|
||||||
echo "Creating Kubeflow namespace"
|
echo "Creating Kubeflow namespace"
|
||||||
kubectl create namespace kubeflow
|
kubectl create namespace kubeflow
|
||||||
|
|
||||||
echo "Deploying training-operator from kubeflow/manifests v1.4 branch"
|
|
||||||
cd "${MANIFESTS_DIR}/apps/training-operator/upstream/overlays/kubeflow"
|
cd "${MANIFESTS_DIR}/apps/training-operator/upstream/overlays/kubeflow"
|
||||||
|
echo "Deploying Training Operator from kubeflow/manifests $(git rev-parse --abbrev-ref HEAD)"
|
||||||
kustomize build . | kubectl apply -f -
|
kustomize build . | kubectl apply -f -
|
||||||
|
|
||||||
echo "Deploying Katib"
|
echo "Deploying Katib"
|
||||||
|
|
|
@ -52,7 +52,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/mxnet-mnist/mnist.py"
|
- "/opt/mxnet-mnist/mnist.py"
|
||||||
|
|
Loading…
Reference in New Issue