mirror of https://github.com/kubeflow/katib.git
Compare commits
4 Commits
master
...
v0.13.0-rc
Author | SHA1 | Date |
---|---|---|
|
1f9dff0307 | |
|
c00cf67074 | |
|
4458e7bdcd | |
|
6329f48685 |
|
@ -123,7 +123,7 @@ In the namespace with `katib.kubeflow.org/metrics-collector-injection=enabled` l
|
|||
|
||||
In **Pod Level Injecting**,
|
||||
|
||||
1. Job operators (_e.x. TFjob/PyTorchjob_) tag the `job-role: master` ([#1064](https://github.com/kubeflow/tf-operator/pull/1064)) label on the master pod.
|
||||
1. Job operators (_e.x. TFjob/PyTorchjob_) tag the `training.kubeflow.org/job-role: master` ([#1064](https://github.com/kubeflow/tf-operator/pull/1064)) label on the master pod.
|
||||
2. The webhook inject the metric collector only if the webhook recognizes this label.
|
||||
3. The webhook uses [ObjectSelector](https://github.com/kubernetes/kubernetes/pull/78505) to skip on irrelevant objects in order to optimize the performance.
|
||||
4. ObjectSelector is only supported above _Kubernetes v1.15_. Without this new feature, there may be a [performance issue](https://github.com/kubeflow/katib/issues/685#issuecomment-516226070) in webhook. In this situation, the following **Job Level Injecting** mode may be a better option.
|
||||
|
|
|
@ -124,7 +124,7 @@ For example, for TFJob:
|
|||
```yaml
|
||||
. . .
|
||||
PrimaryPodLabel:
|
||||
"job-role": "master"
|
||||
"training.kubeflow.org/job-role": "master"
|
||||
. . .
|
||||
```
|
||||
|
||||
|
|
|
@ -75,7 +75,7 @@ spec:
|
|||
- name: num-examples
|
||||
container:
|
||||
name: model-training
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -53,7 +53,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -56,7 +56,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -56,7 +56,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -54,7 +54,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -68,7 +68,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -53,7 +53,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -53,7 +53,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -53,7 +53,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -53,7 +53,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -23,7 +23,7 @@ spec:
|
|||
primaryContainerName: mxnet
|
||||
# In this example we can collect metrics only from the Worker pods.
|
||||
primaryPodLabels:
|
||||
replica-type: worker
|
||||
training.kubeflow.org/replica-type: worker
|
||||
trialParameters:
|
||||
- name: learningRate
|
||||
description: Learning rate for the training model
|
||||
|
|
|
@ -45,7 +45,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: pytorch
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist:latest
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist:v0.13.0-rc.1
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/pytorch-mnist/mnist.py"
|
||||
|
@ -59,7 +59,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: pytorch
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist:latest
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist:v0.13.0-rc.1
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/pytorch-mnist/mnist.py"
|
||||
|
|
|
@ -52,7 +52,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: tensorflow
|
||||
image: docker.io/kubeflowkatib/tf-mnist-with-summaries:latest
|
||||
image: docker.io/kubeflowkatib/tf-mnist-with-summaries:v0.13.0-rc.1
|
||||
command:
|
||||
- "python"
|
||||
- "/opt/tf-mnist-with-summaries/mnist.py"
|
||||
|
|
|
@ -66,7 +66,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist:latest
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist:v0.13.0-rc.1
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/pytorch-mnist/mnist.py"
|
||||
|
|
|
@ -53,7 +53,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist:latest
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist:v0.13.0-rc.1
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/pytorch-mnist/mnist.py"
|
||||
|
|
|
@ -58,7 +58,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -59,7 +59,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/darts-cnn-cifar10:latest
|
||||
image: docker.io/kubeflowkatib/darts-cnn-cifar10:v0.13.0-rc.1
|
||||
command:
|
||||
- python3
|
||||
- run_trial.py
|
||||
|
|
|
@ -76,7 +76,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/darts-cnn-cifar10:latest
|
||||
image: docker.io/kubeflowkatib/darts-cnn-cifar10:v0.13.0-rc.1
|
||||
command:
|
||||
- python3
|
||||
- run_trial.py
|
||||
|
|
|
@ -138,7 +138,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:latest
|
||||
image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:v0.13.0-rc.1
|
||||
command:
|
||||
- python3
|
||||
- -u
|
||||
|
|
|
@ -135,7 +135,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/enas-cnn-cifar10-gpu:latest
|
||||
image: docker.io/kubeflowkatib/enas-cnn-cifar10-gpu:v0.13.0-rc.1
|
||||
command:
|
||||
- python3
|
||||
- -u
|
||||
|
|
|
@ -54,7 +54,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -54,7 +54,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -88,7 +88,7 @@ spec:
|
|||
description: Number of training examples
|
||||
steps:
|
||||
- name: model-training
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -59,7 +59,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -7,13 +7,13 @@ data:
|
|||
metrics-collector-sidecar: |-
|
||||
{
|
||||
"StdOut": {
|
||||
"image": "docker.io/kubeflowkatib/file-metrics-collector:latest"
|
||||
"image": "docker.io/kubeflowkatib/file-metrics-collector:v0.13.0-rc.1"
|
||||
},
|
||||
"File": {
|
||||
"image": "docker.io/kubeflowkatib/file-metrics-collector:latest"
|
||||
"image": "docker.io/kubeflowkatib/file-metrics-collector:v0.13.0-rc.1"
|
||||
},
|
||||
"TensorFlowEvent": {
|
||||
"image": "docker.io/kubeflowkatib/tfevent-metrics-collector:latest",
|
||||
"image": "docker.io/kubeflowkatib/tfevent-metrics-collector:v0.13.0-rc.1",
|
||||
"resources": {
|
||||
"limits": {
|
||||
"memory": "1Gi"
|
||||
|
@ -24,31 +24,31 @@ data:
|
|||
suggestion: |-
|
||||
{
|
||||
"random": {
|
||||
"image": "docker.io/kubeflowkatib/suggestion-hyperopt:latest"
|
||||
"image": "docker.io/kubeflowkatib/suggestion-hyperopt:v0.13.0-rc.1"
|
||||
},
|
||||
"tpe": {
|
||||
"image": "docker.io/kubeflowkatib/suggestion-hyperopt:latest"
|
||||
"image": "docker.io/kubeflowkatib/suggestion-hyperopt:v0.13.0-rc.1"
|
||||
},
|
||||
"grid": {
|
||||
"image": "docker.io/kubeflowkatib/suggestion-chocolate:latest"
|
||||
"image": "docker.io/kubeflowkatib/suggestion-chocolate:v0.13.0-rc.1"
|
||||
},
|
||||
"hyperband": {
|
||||
"image": "docker.io/kubeflowkatib/suggestion-hyperband:latest"
|
||||
"image": "docker.io/kubeflowkatib/suggestion-hyperband:v0.13.0-rc.1"
|
||||
},
|
||||
"bayesianoptimization": {
|
||||
"image": "docker.io/kubeflowkatib/suggestion-skopt:latest"
|
||||
"image": "docker.io/kubeflowkatib/suggestion-skopt:v0.13.0-rc.1"
|
||||
},
|
||||
"cmaes": {
|
||||
"image": "docker.io/kubeflowkatib/suggestion-goptuna:latest"
|
||||
"image": "docker.io/kubeflowkatib/suggestion-goptuna:v0.13.0-rc.1"
|
||||
},
|
||||
"sobol": {
|
||||
"image": "docker.io/kubeflowkatib/suggestion-goptuna:latest"
|
||||
"image": "docker.io/kubeflowkatib/suggestion-goptuna:v0.13.0-rc.1"
|
||||
},
|
||||
"multivariate-tpe": {
|
||||
"image": "docker.io/kubeflowkatib/suggestion-optuna:latest"
|
||||
"image": "docker.io/kubeflowkatib/suggestion-optuna:v0.13.0-rc.1"
|
||||
},
|
||||
"enas": {
|
||||
"image": "docker.io/kubeflowkatib/suggestion-enas:latest",
|
||||
"image": "docker.io/kubeflowkatib/suggestion-enas:v0.13.0-rc.1",
|
||||
"resources": {
|
||||
"limits": {
|
||||
"memory": "200Mi"
|
||||
|
@ -56,12 +56,12 @@ data:
|
|||
}
|
||||
},
|
||||
"darts": {
|
||||
"image": "docker.io/kubeflowkatib/suggestion-darts:latest"
|
||||
"image": "docker.io/kubeflowkatib/suggestion-darts:v0.13.0-rc.1"
|
||||
}
|
||||
}
|
||||
early-stopping: |-
|
||||
{
|
||||
"medianstop": {
|
||||
"image": "docker.io/kubeflowkatib/earlystopping-medianstop:latest"
|
||||
"image": "docker.io/kubeflowkatib/earlystopping-medianstop:v0.13.0-rc.1"
|
||||
}
|
||||
}
|
||||
|
|
|
@ -14,7 +14,7 @@ data:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
@ -32,7 +32,7 @@ data:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:latest
|
||||
image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:v0.13.0-rc.1
|
||||
command:
|
||||
- python3
|
||||
- -u
|
||||
|
@ -53,7 +53,7 @@ data:
|
|||
spec:
|
||||
containers:
|
||||
- name: pytorch
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist:latest
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist:v0.13.0-rc.1
|
||||
imagePullPolicy: Always
|
||||
command:
|
||||
- "python3"
|
||||
|
@ -68,7 +68,7 @@ data:
|
|||
spec:
|
||||
containers:
|
||||
- name: pytorch
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist:latest
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist:v0.13.0-rc.1
|
||||
imagePullPolicy: Always
|
||||
command:
|
||||
- "python3"
|
||||
|
|
|
@ -21,13 +21,13 @@ resources:
|
|||
images:
|
||||
- name: docker.io/kubeflowkatib/katib-controller
|
||||
newName: docker.io/kubeflowkatib/katib-controller
|
||||
newTag: latest
|
||||
newTag: v0.13.0-rc.1
|
||||
- name: docker.io/kubeflowkatib/katib-db-manager
|
||||
newName: docker.io/kubeflowkatib/katib-db-manager
|
||||
newTag: latest
|
||||
newTag: v0.13.0-rc.1
|
||||
- name: docker.io/kubeflowkatib/katib-ui
|
||||
newName: docker.io/kubeflowkatib/katib-ui
|
||||
newTag: latest
|
||||
newTag: v0.13.0-rc.1
|
||||
|
||||
patchesStrategicMerge:
|
||||
- patches/katib-cert-injection.yaml
|
||||
|
|
|
@ -19,16 +19,16 @@ resources:
|
|||
images:
|
||||
- name: docker.io/kubeflowkatib/katib-controller
|
||||
newName: docker.io/kubeflowkatib/katib-controller
|
||||
newTag: latest
|
||||
newTag: v0.13.0-rc.1
|
||||
- name: docker.io/kubeflowkatib/katib-db-manager
|
||||
newName: docker.io/kubeflowkatib/katib-db-manager
|
||||
newTag: latest
|
||||
newTag: v0.13.0-rc.1
|
||||
- name: docker.io/kubeflowkatib/katib-ui
|
||||
newName: docker.io/kubeflowkatib/katib-ui
|
||||
newTag: latest
|
||||
newTag: v0.13.0-rc.1
|
||||
- name: docker.io/kubeflowkatib/cert-generator
|
||||
newName: docker.io/kubeflowkatib/cert-generator
|
||||
newTag: latest
|
||||
newTag: v0.13.0-rc.1
|
||||
patchesStrategicMerge:
|
||||
- patches/db-manager.yaml
|
||||
# Modify katib-mysql-secrets with parameters for the DB.
|
||||
|
|
|
@ -30,13 +30,13 @@ resources:
|
|||
images:
|
||||
- name: docker.io/kubeflowkatib/katib-controller
|
||||
newName: docker.io/kubeflowkatib/katib-controller
|
||||
newTag: latest
|
||||
newTag: v0.13.0-rc.1
|
||||
- name: docker.io/kubeflowkatib/katib-db-manager
|
||||
newName: docker.io/kubeflowkatib/katib-db-manager
|
||||
newTag: latest
|
||||
newTag: v0.13.0-rc.1
|
||||
- name: docker.io/kubeflowkatib/katib-ui
|
||||
newName: docker.io/kubeflowkatib/katib-ui
|
||||
newTag: latest
|
||||
newTag: v0.13.0-rc.1
|
||||
|
||||
patchesJson6902:
|
||||
# Annotate Service to delegate TLS-secret generation to OpenShift service controller
|
||||
|
|
|
@ -21,13 +21,13 @@ resources:
|
|||
images:
|
||||
- name: docker.io/kubeflowkatib/katib-controller
|
||||
newName: docker.io/kubeflowkatib/katib-controller
|
||||
newTag: latest
|
||||
newTag: v0.13.0-rc.1
|
||||
- name: docker.io/kubeflowkatib/katib-db-manager
|
||||
newName: docker.io/kubeflowkatib/katib-db-manager
|
||||
newTag: latest
|
||||
newTag: v0.13.0-rc.1
|
||||
- name: docker.io/kubeflowkatib/katib-ui
|
||||
newName: docker.io/kubeflowkatib/katib-ui
|
||||
newTag: latest
|
||||
newTag: v0.13.0-rc.1
|
||||
- name: docker.io/kubeflowkatib/cert-generator
|
||||
newName: docker.io/kubeflowkatib/cert-generator
|
||||
newTag: latest
|
||||
newTag: v0.13.0-rc.1
|
||||
|
|
|
@ -9,13 +9,13 @@ resources:
|
|||
images:
|
||||
- name: docker.io/kubeflowkatib/katib-controller
|
||||
newName: docker.io/kubeflowkatib/katib-controller
|
||||
newTag: latest
|
||||
newTag: v0.13.0-rc.1
|
||||
- name: docker.io/kubeflowkatib/katib-db-manager
|
||||
newName: docker.io/kubeflowkatib/katib-db-manager
|
||||
newTag: latest
|
||||
newTag: v0.13.0-rc.1
|
||||
- name: docker.io/kubeflowkatib/katib-ui
|
||||
newName: docker.io/kubeflowkatib/katib-ui
|
||||
newTag: latest
|
||||
newTag: v0.13.0-rc.1
|
||||
|
||||
patchesStrategicMerge:
|
||||
- patches/remove-namespace.yaml
|
||||
|
|
|
@ -5,7 +5,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -5,7 +5,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:latest
|
||||
image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:v0.13.0-rc.1
|
||||
command:
|
||||
- python3
|
||||
- -u
|
||||
|
|
|
@ -9,7 +9,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: pytorch
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist:latest
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist:v0.13.0-rc.1
|
||||
imagePullPolicy: Always
|
||||
command:
|
||||
- "python3"
|
||||
|
@ -24,7 +24,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: pytorch
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist:latest
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist:v0.13.0-rc.1
|
||||
imagePullPolicy: Always
|
||||
command:
|
||||
- "python3"
|
||||
|
|
|
@ -38,7 +38,7 @@ const (
|
|||
|
||||
var (
|
||||
// DefaultKubeflowJobPrimaryPodLabels is the default value of spec.trialTemplate.primaryPodLabels for Kubeflow Training Job.
|
||||
DefaultKubeflowJobPrimaryPodLabels = map[string]string{"job-role": "master"}
|
||||
DefaultKubeflowJobPrimaryPodLabels = map[string]string{"training.kubeflow.org/job-role": "master"}
|
||||
|
||||
// KubeflowJobKinds is the list of Kubeflow Training Job kinds.
|
||||
KubeflowJobKinds = map[string]bool{
|
||||
|
@ -46,5 +46,6 @@ var (
|
|||
"PyTorchJob": true,
|
||||
"XGBoostJob": true,
|
||||
"MXJob": true,
|
||||
"MPIJob": true,
|
||||
}
|
||||
)
|
||||
|
|
|
@ -50,14 +50,14 @@ const initialState = {
|
|||
value: 'status.conditions.#(type=="Complete")#|#(status=="True")#',
|
||||
description: `Condition when Trial custom resource is succeeded.
|
||||
Default value for k8s BatchJob: status.conditions.#(type=="Complete")#|#(status=="True")#.
|
||||
Default value for Kubeflow Job (TFJob, PyTorchJob, XGBoostJob, MXJob): status.conditions.#(type=="Succeeded")#|#(status=="True")#.`,
|
||||
Default value for Kubeflow Job (TFJob, PyTorchJob, XGBoostJob, MXJob, MPIJob): status.conditions.#(type=="Succeeded")#|#(status=="True")#.`,
|
||||
},
|
||||
{
|
||||
name: 'FailureCondition',
|
||||
value: 'status.conditions.#(type=="Failed")#|#(status=="True")#',
|
||||
description: `Condition when Trial custom resource is failed.
|
||||
Default value for k8s BatchJob: status.conditions.#(type=="Failed")#|#(status=="True")#.
|
||||
Default value for Kubeflow Job (TFJob, PyTorchJob, XGBoostJob, MXJob): status.conditions.#(type=="Failed")#|#(status=="True")#.`,
|
||||
Default value for Kubeflow Job (TFJob, PyTorchJob, XGBoostJob, MXJob, MPIJob): status.conditions.#(type=="Failed")#|#(status=="True")#.`,
|
||||
},
|
||||
{
|
||||
name: 'Retain',
|
||||
|
|
|
@ -19,7 +19,7 @@ with open('requirements.txt') as f:
|
|||
|
||||
setuptools.setup(
|
||||
name='kubeflow-katib',
|
||||
version='0.12.0',
|
||||
version='0.13.0rc1',
|
||||
author="Kubeflow Authors",
|
||||
author_email='premnath.vel@gmail.com',
|
||||
license="Apache License Version 2.0",
|
||||
|
@ -35,11 +35,12 @@ setuptools.setup(
|
|||
'Intended Audience :: Developers',
|
||||
'Intended Audience :: Education',
|
||||
'Intended Audience :: Science/Research',
|
||||
'Programming Language :: Python :: 2',
|
||||
'Programming Language :: Python :: 2.7',
|
||||
'Programming Language :: Python :: 3',
|
||||
'Programming Language :: Python :: 3.6',
|
||||
'Programming Language :: Python :: 3 :: Only',
|
||||
'Programming Language :: Python :: 3.7',
|
||||
'Programming Language :: Python :: 3.8',
|
||||
'Programming Language :: Python :: 3.9',
|
||||
'Programming Language :: Python :: 3.10',
|
||||
"License :: OSI Approved :: Apache Software License",
|
||||
"Operating System :: OS Independent",
|
||||
'Topic :: Scientific/Engineering',
|
||||
|
|
|
@ -163,7 +163,7 @@ class WorkflowBuilder(object):
|
|||
},
|
||||
{
|
||||
"name": "EXTRA_REPOS",
|
||||
"value": "kubeflow/testing@HEAD;kubeflow/manifests@v1.4-branch"
|
||||
"value": "kubeflow/testing@HEAD;kubeflow/manifests@v1.5-branch"
|
||||
},
|
||||
# Set GOPATH to test_dir because Katib repo is located under /src/github.com/kubeflow/katib
|
||||
{
|
||||
|
|
|
@ -52,7 +52,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -41,8 +41,8 @@ cat "manifests/v1beta1/components/controller/katib-config.yaml"
|
|||
echo "Creating Kubeflow namespace"
|
||||
kubectl create namespace kubeflow
|
||||
|
||||
echo "Deploying training-operator from kubeflow/manifests v1.4 branch"
|
||||
cd "${MANIFESTS_DIR}/apps/training-operator/upstream/overlays/kubeflow"
|
||||
echo "Deploying Training Operator from kubeflow/manifests $(git rev-parse --abbrev-ref HEAD)"
|
||||
kustomize build . | kubectl apply -f -
|
||||
|
||||
echo "Deploying Katib"
|
||||
|
|
|
@ -52,7 +52,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
Loading…
Reference in New Issue