Compare commits

...

4 Commits

Author SHA1 Message Date
Andrey Velichkevich 1f9dff0307 Katib official release v0.13.0-rc.1 2022-02-15 16:12:09 +00:00
Andrey Velichkevich c00cf67074
Automated cherry pick of #1808: Fix default label for Training Operators on release-0.13 (#1813)
* Fix default label for Training Operators

* Fix version comment

* Change the docs

* Change git command
2022-02-15 16:05:41 +00:00
Yuki Iwai 4458e7bdcd
[cherry-pick] Update supported Python version for kubeflow-katib SDK (#1798)
* update supported Python version for kubeflow-katib SDK

* stop supporting Python2
2022-01-26 17:53:44 +00:00
Andrey Velichkevich 6329f48685 Katib official release v0.13.0-rc.0 2022-01-25 13:04:06 +00:00
43 changed files with 79 additions and 77 deletions

View File

@ -123,7 +123,7 @@ In the namespace with `katib.kubeflow.org/metrics-collector-injection=enabled` l
In **Pod Level Injecting**, In **Pod Level Injecting**,
1. Job operators (_e.x. TFjob/PyTorchjob_) tag the `job-role: master` ([#1064](https://github.com/kubeflow/tf-operator/pull/1064)) label on the master pod. 1. Job operators (_e.x. TFjob/PyTorchjob_) tag the `training.kubeflow.org/job-role: master` ([#1064](https://github.com/kubeflow/tf-operator/pull/1064)) label on the master pod.
2. The webhook inject the metric collector only if the webhook recognizes this label. 2. The webhook inject the metric collector only if the webhook recognizes this label.
3. The webhook uses [ObjectSelector](https://github.com/kubernetes/kubernetes/pull/78505) to skip on irrelevant objects in order to optimize the performance. 3. The webhook uses [ObjectSelector](https://github.com/kubernetes/kubernetes/pull/78505) to skip on irrelevant objects in order to optimize the performance.
4. ObjectSelector is only supported above _Kubernetes v1.15_. Without this new feature, there may be a [performance issue](https://github.com/kubeflow/katib/issues/685#issuecomment-516226070) in webhook. In this situation, the following **Job Level Injecting** mode may be a better option. 4. ObjectSelector is only supported above _Kubernetes v1.15_. Without this new feature, there may be a [performance issue](https://github.com/kubeflow/katib/issues/685#issuecomment-516226070) in webhook. In this situation, the following **Job Level Injecting** mode may be a better option.

View File

@ -124,7 +124,7 @@ For example, for TFJob:
```yaml ```yaml
. . . . . .
PrimaryPodLabel: PrimaryPodLabel:
"job-role": "master" "training.kubeflow.org/job-role": "master"
. . . . . .
``` ```

View File

@ -75,7 +75,7 @@ spec:
- name: num-examples - name: num-examples
container: container:
name: model-training name: model-training
image: docker.io/kubeflowkatib/mxnet-mnist:latest image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
command: command:
- "python3" - "python3"
- "/opt/mxnet-mnist/mnist.py" - "/opt/mxnet-mnist/mnist.py"

View File

@ -53,7 +53,7 @@ spec:
spec: spec:
containers: containers:
- name: training-container - name: training-container
image: docker.io/kubeflowkatib/mxnet-mnist:latest image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
command: command:
- "python3" - "python3"
- "/opt/mxnet-mnist/mnist.py" - "/opt/mxnet-mnist/mnist.py"

View File

@ -56,7 +56,7 @@ spec:
spec: spec:
containers: containers:
- name: training-container - name: training-container
image: docker.io/kubeflowkatib/mxnet-mnist:latest image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
command: command:
- "python3" - "python3"
- "/opt/mxnet-mnist/mnist.py" - "/opt/mxnet-mnist/mnist.py"

View File

@ -56,7 +56,7 @@ spec:
spec: spec:
containers: containers:
- name: training-container - name: training-container
image: docker.io/kubeflowkatib/mxnet-mnist:latest image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
command: command:
- "python3" - "python3"
- "/opt/mxnet-mnist/mnist.py" - "/opt/mxnet-mnist/mnist.py"

View File

@ -54,7 +54,7 @@ spec:
spec: spec:
containers: containers:
- name: training-container - name: training-container
image: docker.io/kubeflowkatib/mxnet-mnist:latest image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
command: command:
- "python3" - "python3"
- "/opt/mxnet-mnist/mnist.py" - "/opt/mxnet-mnist/mnist.py"

View File

@ -68,7 +68,7 @@ spec:
spec: spec:
containers: containers:
- name: training-container - name: training-container
image: docker.io/kubeflowkatib/mxnet-mnist:latest image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
command: command:
- "python3" - "python3"
- "/opt/mxnet-mnist/mnist.py" - "/opt/mxnet-mnist/mnist.py"

View File

@ -53,7 +53,7 @@ spec:
spec: spec:
containers: containers:
- name: training-container - name: training-container
image: docker.io/kubeflowkatib/mxnet-mnist:latest image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
command: command:
- "python3" - "python3"
- "/opt/mxnet-mnist/mnist.py" - "/opt/mxnet-mnist/mnist.py"

View File

@ -53,7 +53,7 @@ spec:
spec: spec:
containers: containers:
- name: training-container - name: training-container
image: docker.io/kubeflowkatib/mxnet-mnist:latest image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
command: command:
- "python3" - "python3"
- "/opt/mxnet-mnist/mnist.py" - "/opt/mxnet-mnist/mnist.py"

View File

@ -53,7 +53,7 @@ spec:
spec: spec:
containers: containers:
- name: training-container - name: training-container
image: docker.io/kubeflowkatib/mxnet-mnist:latest image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
command: command:
- "python3" - "python3"
- "/opt/mxnet-mnist/mnist.py" - "/opt/mxnet-mnist/mnist.py"

View File

@ -53,7 +53,7 @@ spec:
spec: spec:
containers: containers:
- name: training-container - name: training-container
image: docker.io/kubeflowkatib/mxnet-mnist:latest image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
command: command:
- "python3" - "python3"
- "/opt/mxnet-mnist/mnist.py" - "/opt/mxnet-mnist/mnist.py"

View File

@ -23,7 +23,7 @@ spec:
primaryContainerName: mxnet primaryContainerName: mxnet
# In this example we can collect metrics only from the Worker pods. # In this example we can collect metrics only from the Worker pods.
primaryPodLabels: primaryPodLabels:
replica-type: worker training.kubeflow.org/replica-type: worker
trialParameters: trialParameters:
- name: learningRate - name: learningRate
description: Learning rate for the training model description: Learning rate for the training model

View File

@ -45,7 +45,7 @@ spec:
spec: spec:
containers: containers:
- name: pytorch - name: pytorch
image: docker.io/kubeflowkatib/pytorch-mnist:latest image: docker.io/kubeflowkatib/pytorch-mnist:v0.13.0-rc.1
command: command:
- "python3" - "python3"
- "/opt/pytorch-mnist/mnist.py" - "/opt/pytorch-mnist/mnist.py"
@ -59,7 +59,7 @@ spec:
spec: spec:
containers: containers:
- name: pytorch - name: pytorch
image: docker.io/kubeflowkatib/pytorch-mnist:latest image: docker.io/kubeflowkatib/pytorch-mnist:v0.13.0-rc.1
command: command:
- "python3" - "python3"
- "/opt/pytorch-mnist/mnist.py" - "/opt/pytorch-mnist/mnist.py"

View File

@ -52,7 +52,7 @@ spec:
spec: spec:
containers: containers:
- name: tensorflow - name: tensorflow
image: docker.io/kubeflowkatib/tf-mnist-with-summaries:latest image: docker.io/kubeflowkatib/tf-mnist-with-summaries:v0.13.0-rc.1
command: command:
- "python" - "python"
- "/opt/tf-mnist-with-summaries/mnist.py" - "/opt/tf-mnist-with-summaries/mnist.py"

View File

@ -66,7 +66,7 @@ spec:
spec: spec:
containers: containers:
- name: training-container - name: training-container
image: docker.io/kubeflowkatib/pytorch-mnist:latest image: docker.io/kubeflowkatib/pytorch-mnist:v0.13.0-rc.1
command: command:
- "python3" - "python3"
- "/opt/pytorch-mnist/mnist.py" - "/opt/pytorch-mnist/mnist.py"

View File

@ -53,7 +53,7 @@ spec:
spec: spec:
containers: containers:
- name: training-container - name: training-container
image: docker.io/kubeflowkatib/pytorch-mnist:latest image: docker.io/kubeflowkatib/pytorch-mnist:v0.13.0-rc.1
command: command:
- "python3" - "python3"
- "/opt/pytorch-mnist/mnist.py" - "/opt/pytorch-mnist/mnist.py"

View File

@ -58,7 +58,7 @@ spec:
spec: spec:
containers: containers:
- name: training-container - name: training-container
image: docker.io/kubeflowkatib/mxnet-mnist:latest image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
command: command:
- "python3" - "python3"
- "/opt/mxnet-mnist/mnist.py" - "/opt/mxnet-mnist/mnist.py"

View File

@ -59,7 +59,7 @@ spec:
spec: spec:
containers: containers:
- name: training-container - name: training-container
image: docker.io/kubeflowkatib/darts-cnn-cifar10:latest image: docker.io/kubeflowkatib/darts-cnn-cifar10:v0.13.0-rc.1
command: command:
- python3 - python3
- run_trial.py - run_trial.py

View File

@ -76,7 +76,7 @@ spec:
spec: spec:
containers: containers:
- name: training-container - name: training-container
image: docker.io/kubeflowkatib/darts-cnn-cifar10:latest image: docker.io/kubeflowkatib/darts-cnn-cifar10:v0.13.0-rc.1
command: command:
- python3 - python3
- run_trial.py - run_trial.py

View File

@ -138,7 +138,7 @@ spec:
spec: spec:
containers: containers:
- name: training-container - name: training-container
image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:latest image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:v0.13.0-rc.1
command: command:
- python3 - python3
- -u - -u

View File

@ -135,7 +135,7 @@ spec:
spec: spec:
containers: containers:
- name: training-container - name: training-container
image: docker.io/kubeflowkatib/enas-cnn-cifar10-gpu:latest image: docker.io/kubeflowkatib/enas-cnn-cifar10-gpu:v0.13.0-rc.1
command: command:
- python3 - python3
- -u - -u

View File

@ -54,7 +54,7 @@ spec:
spec: spec:
containers: containers:
- name: training-container - name: training-container
image: docker.io/kubeflowkatib/mxnet-mnist:latest image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
command: command:
- "python3" - "python3"
- "/opt/mxnet-mnist/mnist.py" - "/opt/mxnet-mnist/mnist.py"

View File

@ -54,7 +54,7 @@ spec:
spec: spec:
containers: containers:
- name: training-container - name: training-container
image: docker.io/kubeflowkatib/mxnet-mnist:latest image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
command: command:
- "python3" - "python3"
- "/opt/mxnet-mnist/mnist.py" - "/opt/mxnet-mnist/mnist.py"

View File

@ -88,7 +88,7 @@ spec:
description: Number of training examples description: Number of training examples
steps: steps:
- name: model-training - name: model-training
image: docker.io/kubeflowkatib/mxnet-mnist:latest image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
command: command:
- "python3" - "python3"
- "/opt/mxnet-mnist/mnist.py" - "/opt/mxnet-mnist/mnist.py"

View File

@ -59,7 +59,7 @@ spec:
spec: spec:
containers: containers:
- name: training-container - name: training-container
image: docker.io/kubeflowkatib/mxnet-mnist:latest image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
command: command:
- "python3" - "python3"
- "/opt/mxnet-mnist/mnist.py" - "/opt/mxnet-mnist/mnist.py"

View File

@ -7,13 +7,13 @@ data:
metrics-collector-sidecar: |- metrics-collector-sidecar: |-
{ {
"StdOut": { "StdOut": {
"image": "docker.io/kubeflowkatib/file-metrics-collector:latest" "image": "docker.io/kubeflowkatib/file-metrics-collector:v0.13.0-rc.1"
}, },
"File": { "File": {
"image": "docker.io/kubeflowkatib/file-metrics-collector:latest" "image": "docker.io/kubeflowkatib/file-metrics-collector:v0.13.0-rc.1"
}, },
"TensorFlowEvent": { "TensorFlowEvent": {
"image": "docker.io/kubeflowkatib/tfevent-metrics-collector:latest", "image": "docker.io/kubeflowkatib/tfevent-metrics-collector:v0.13.0-rc.1",
"resources": { "resources": {
"limits": { "limits": {
"memory": "1Gi" "memory": "1Gi"
@ -24,31 +24,31 @@ data:
suggestion: |- suggestion: |-
{ {
"random": { "random": {
"image": "docker.io/kubeflowkatib/suggestion-hyperopt:latest" "image": "docker.io/kubeflowkatib/suggestion-hyperopt:v0.13.0-rc.1"
}, },
"tpe": { "tpe": {
"image": "docker.io/kubeflowkatib/suggestion-hyperopt:latest" "image": "docker.io/kubeflowkatib/suggestion-hyperopt:v0.13.0-rc.1"
}, },
"grid": { "grid": {
"image": "docker.io/kubeflowkatib/suggestion-chocolate:latest" "image": "docker.io/kubeflowkatib/suggestion-chocolate:v0.13.0-rc.1"
}, },
"hyperband": { "hyperband": {
"image": "docker.io/kubeflowkatib/suggestion-hyperband:latest" "image": "docker.io/kubeflowkatib/suggestion-hyperband:v0.13.0-rc.1"
}, },
"bayesianoptimization": { "bayesianoptimization": {
"image": "docker.io/kubeflowkatib/suggestion-skopt:latest" "image": "docker.io/kubeflowkatib/suggestion-skopt:v0.13.0-rc.1"
}, },
"cmaes": { "cmaes": {
"image": "docker.io/kubeflowkatib/suggestion-goptuna:latest" "image": "docker.io/kubeflowkatib/suggestion-goptuna:v0.13.0-rc.1"
}, },
"sobol": { "sobol": {
"image": "docker.io/kubeflowkatib/suggestion-goptuna:latest" "image": "docker.io/kubeflowkatib/suggestion-goptuna:v0.13.0-rc.1"
}, },
"multivariate-tpe": { "multivariate-tpe": {
"image": "docker.io/kubeflowkatib/suggestion-optuna:latest" "image": "docker.io/kubeflowkatib/suggestion-optuna:v0.13.0-rc.1"
}, },
"enas": { "enas": {
"image": "docker.io/kubeflowkatib/suggestion-enas:latest", "image": "docker.io/kubeflowkatib/suggestion-enas:v0.13.0-rc.1",
"resources": { "resources": {
"limits": { "limits": {
"memory": "200Mi" "memory": "200Mi"
@ -56,12 +56,12 @@ data:
} }
}, },
"darts": { "darts": {
"image": "docker.io/kubeflowkatib/suggestion-darts:latest" "image": "docker.io/kubeflowkatib/suggestion-darts:v0.13.0-rc.1"
} }
} }
early-stopping: |- early-stopping: |-
{ {
"medianstop": { "medianstop": {
"image": "docker.io/kubeflowkatib/earlystopping-medianstop:latest" "image": "docker.io/kubeflowkatib/earlystopping-medianstop:v0.13.0-rc.1"
} }
} }

View File

@ -14,7 +14,7 @@ data:
spec: spec:
containers: containers:
- name: training-container - name: training-container
image: docker.io/kubeflowkatib/mxnet-mnist:latest image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
command: command:
- "python3" - "python3"
- "/opt/mxnet-mnist/mnist.py" - "/opt/mxnet-mnist/mnist.py"
@ -32,7 +32,7 @@ data:
spec: spec:
containers: containers:
- name: training-container - name: training-container
image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:latest image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:v0.13.0-rc.1
command: command:
- python3 - python3
- -u - -u
@ -53,7 +53,7 @@ data:
spec: spec:
containers: containers:
- name: pytorch - name: pytorch
image: docker.io/kubeflowkatib/pytorch-mnist:latest image: docker.io/kubeflowkatib/pytorch-mnist:v0.13.0-rc.1
imagePullPolicy: Always imagePullPolicy: Always
command: command:
- "python3" - "python3"
@ -68,7 +68,7 @@ data:
spec: spec:
containers: containers:
- name: pytorch - name: pytorch
image: docker.io/kubeflowkatib/pytorch-mnist:latest image: docker.io/kubeflowkatib/pytorch-mnist:v0.13.0-rc.1
imagePullPolicy: Always imagePullPolicy: Always
command: command:
- "python3" - "python3"

View File

@ -21,13 +21,13 @@ resources:
images: images:
- name: docker.io/kubeflowkatib/katib-controller - name: docker.io/kubeflowkatib/katib-controller
newName: docker.io/kubeflowkatib/katib-controller newName: docker.io/kubeflowkatib/katib-controller
newTag: latest newTag: v0.13.0-rc.1
- name: docker.io/kubeflowkatib/katib-db-manager - name: docker.io/kubeflowkatib/katib-db-manager
newName: docker.io/kubeflowkatib/katib-db-manager newName: docker.io/kubeflowkatib/katib-db-manager
newTag: latest newTag: v0.13.0-rc.1
- name: docker.io/kubeflowkatib/katib-ui - name: docker.io/kubeflowkatib/katib-ui
newName: docker.io/kubeflowkatib/katib-ui newName: docker.io/kubeflowkatib/katib-ui
newTag: latest newTag: v0.13.0-rc.1
patchesStrategicMerge: patchesStrategicMerge:
- patches/katib-cert-injection.yaml - patches/katib-cert-injection.yaml

View File

@ -19,16 +19,16 @@ resources:
images: images:
- name: docker.io/kubeflowkatib/katib-controller - name: docker.io/kubeflowkatib/katib-controller
newName: docker.io/kubeflowkatib/katib-controller newName: docker.io/kubeflowkatib/katib-controller
newTag: latest newTag: v0.13.0-rc.1
- name: docker.io/kubeflowkatib/katib-db-manager - name: docker.io/kubeflowkatib/katib-db-manager
newName: docker.io/kubeflowkatib/katib-db-manager newName: docker.io/kubeflowkatib/katib-db-manager
newTag: latest newTag: v0.13.0-rc.1
- name: docker.io/kubeflowkatib/katib-ui - name: docker.io/kubeflowkatib/katib-ui
newName: docker.io/kubeflowkatib/katib-ui newName: docker.io/kubeflowkatib/katib-ui
newTag: latest newTag: v0.13.0-rc.1
- name: docker.io/kubeflowkatib/cert-generator - name: docker.io/kubeflowkatib/cert-generator
newName: docker.io/kubeflowkatib/cert-generator newName: docker.io/kubeflowkatib/cert-generator
newTag: latest newTag: v0.13.0-rc.1
patchesStrategicMerge: patchesStrategicMerge:
- patches/db-manager.yaml - patches/db-manager.yaml
# Modify katib-mysql-secrets with parameters for the DB. # Modify katib-mysql-secrets with parameters for the DB.

View File

@ -30,13 +30,13 @@ resources:
images: images:
- name: docker.io/kubeflowkatib/katib-controller - name: docker.io/kubeflowkatib/katib-controller
newName: docker.io/kubeflowkatib/katib-controller newName: docker.io/kubeflowkatib/katib-controller
newTag: latest newTag: v0.13.0-rc.1
- name: docker.io/kubeflowkatib/katib-db-manager - name: docker.io/kubeflowkatib/katib-db-manager
newName: docker.io/kubeflowkatib/katib-db-manager newName: docker.io/kubeflowkatib/katib-db-manager
newTag: latest newTag: v0.13.0-rc.1
- name: docker.io/kubeflowkatib/katib-ui - name: docker.io/kubeflowkatib/katib-ui
newName: docker.io/kubeflowkatib/katib-ui newName: docker.io/kubeflowkatib/katib-ui
newTag: latest newTag: v0.13.0-rc.1
patchesJson6902: patchesJson6902:
# Annotate Service to delegate TLS-secret generation to OpenShift service controller # Annotate Service to delegate TLS-secret generation to OpenShift service controller

View File

@ -21,13 +21,13 @@ resources:
images: images:
- name: docker.io/kubeflowkatib/katib-controller - name: docker.io/kubeflowkatib/katib-controller
newName: docker.io/kubeflowkatib/katib-controller newName: docker.io/kubeflowkatib/katib-controller
newTag: latest newTag: v0.13.0-rc.1
- name: docker.io/kubeflowkatib/katib-db-manager - name: docker.io/kubeflowkatib/katib-db-manager
newName: docker.io/kubeflowkatib/katib-db-manager newName: docker.io/kubeflowkatib/katib-db-manager
newTag: latest newTag: v0.13.0-rc.1
- name: docker.io/kubeflowkatib/katib-ui - name: docker.io/kubeflowkatib/katib-ui
newName: docker.io/kubeflowkatib/katib-ui newName: docker.io/kubeflowkatib/katib-ui
newTag: latest newTag: v0.13.0-rc.1
- name: docker.io/kubeflowkatib/cert-generator - name: docker.io/kubeflowkatib/cert-generator
newName: docker.io/kubeflowkatib/cert-generator newName: docker.io/kubeflowkatib/cert-generator
newTag: latest newTag: v0.13.0-rc.1

View File

@ -9,13 +9,13 @@ resources:
images: images:
- name: docker.io/kubeflowkatib/katib-controller - name: docker.io/kubeflowkatib/katib-controller
newName: docker.io/kubeflowkatib/katib-controller newName: docker.io/kubeflowkatib/katib-controller
newTag: latest newTag: v0.13.0-rc.1
- name: docker.io/kubeflowkatib/katib-db-manager - name: docker.io/kubeflowkatib/katib-db-manager
newName: docker.io/kubeflowkatib/katib-db-manager newName: docker.io/kubeflowkatib/katib-db-manager
newTag: latest newTag: v0.13.0-rc.1
- name: docker.io/kubeflowkatib/katib-ui - name: docker.io/kubeflowkatib/katib-ui
newName: docker.io/kubeflowkatib/katib-ui newName: docker.io/kubeflowkatib/katib-ui
newTag: latest newTag: v0.13.0-rc.1
patchesStrategicMerge: patchesStrategicMerge:
- patches/remove-namespace.yaml - patches/remove-namespace.yaml

View File

@ -5,7 +5,7 @@ spec:
spec: spec:
containers: containers:
- name: training-container - name: training-container
image: docker.io/kubeflowkatib/mxnet-mnist:latest image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
command: command:
- "python3" - "python3"
- "/opt/mxnet-mnist/mnist.py" - "/opt/mxnet-mnist/mnist.py"

View File

@ -5,7 +5,7 @@ spec:
spec: spec:
containers: containers:
- name: training-container - name: training-container
image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:latest image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:v0.13.0-rc.1
command: command:
- python3 - python3
- -u - -u

View File

@ -9,7 +9,7 @@ spec:
spec: spec:
containers: containers:
- name: pytorch - name: pytorch
image: docker.io/kubeflowkatib/pytorch-mnist:latest image: docker.io/kubeflowkatib/pytorch-mnist:v0.13.0-rc.1
imagePullPolicy: Always imagePullPolicy: Always
command: command:
- "python3" - "python3"
@ -24,7 +24,7 @@ spec:
spec: spec:
containers: containers:
- name: pytorch - name: pytorch
image: docker.io/kubeflowkatib/pytorch-mnist:latest image: docker.io/kubeflowkatib/pytorch-mnist:v0.13.0-rc.1
imagePullPolicy: Always imagePullPolicy: Always
command: command:
- "python3" - "python3"

View File

@ -38,7 +38,7 @@ const (
var ( var (
// DefaultKubeflowJobPrimaryPodLabels is the default value of spec.trialTemplate.primaryPodLabels for Kubeflow Training Job. // DefaultKubeflowJobPrimaryPodLabels is the default value of spec.trialTemplate.primaryPodLabels for Kubeflow Training Job.
DefaultKubeflowJobPrimaryPodLabels = map[string]string{"job-role": "master"} DefaultKubeflowJobPrimaryPodLabels = map[string]string{"training.kubeflow.org/job-role": "master"}
// KubeflowJobKinds is the list of Kubeflow Training Job kinds. // KubeflowJobKinds is the list of Kubeflow Training Job kinds.
KubeflowJobKinds = map[string]bool{ KubeflowJobKinds = map[string]bool{
@ -46,5 +46,6 @@ var (
"PyTorchJob": true, "PyTorchJob": true,
"XGBoostJob": true, "XGBoostJob": true,
"MXJob": true, "MXJob": true,
"MPIJob": true,
} }
) )

View File

@ -50,14 +50,14 @@ const initialState = {
value: 'status.conditions.#(type=="Complete")#|#(status=="True")#', value: 'status.conditions.#(type=="Complete")#|#(status=="True")#',
description: `Condition when Trial custom resource is succeeded. description: `Condition when Trial custom resource is succeeded.
Default value for k8s BatchJob: status.conditions.#(type=="Complete")#|#(status=="True")#. Default value for k8s BatchJob: status.conditions.#(type=="Complete")#|#(status=="True")#.
Default value for Kubeflow Job (TFJob, PyTorchJob, XGBoostJob, MXJob): status.conditions.#(type=="Succeeded")#|#(status=="True")#.`, Default value for Kubeflow Job (TFJob, PyTorchJob, XGBoostJob, MXJob, MPIJob): status.conditions.#(type=="Succeeded")#|#(status=="True")#.`,
}, },
{ {
name: 'FailureCondition', name: 'FailureCondition',
value: 'status.conditions.#(type=="Failed")#|#(status=="True")#', value: 'status.conditions.#(type=="Failed")#|#(status=="True")#',
description: `Condition when Trial custom resource is failed. description: `Condition when Trial custom resource is failed.
Default value for k8s BatchJob: status.conditions.#(type=="Failed")#|#(status=="True")#. Default value for k8s BatchJob: status.conditions.#(type=="Failed")#|#(status=="True")#.
Default value for Kubeflow Job (TFJob, PyTorchJob, XGBoostJob, MXJob): status.conditions.#(type=="Failed")#|#(status=="True")#.`, Default value for Kubeflow Job (TFJob, PyTorchJob, XGBoostJob, MXJob, MPIJob): status.conditions.#(type=="Failed")#|#(status=="True")#.`,
}, },
{ {
name: 'Retain', name: 'Retain',

View File

@ -19,7 +19,7 @@ with open('requirements.txt') as f:
setuptools.setup( setuptools.setup(
name='kubeflow-katib', name='kubeflow-katib',
version='0.12.0', version='0.13.0rc1',
author="Kubeflow Authors", author="Kubeflow Authors",
author_email='premnath.vel@gmail.com', author_email='premnath.vel@gmail.com',
license="Apache License Version 2.0", license="Apache License Version 2.0",
@ -35,11 +35,12 @@ setuptools.setup(
'Intended Audience :: Developers', 'Intended Audience :: Developers',
'Intended Audience :: Education', 'Intended Audience :: Education',
'Intended Audience :: Science/Research', 'Intended Audience :: Science/Research',
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3 :: Only',
'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
"License :: OSI Approved :: Apache Software License", "License :: OSI Approved :: Apache Software License",
"Operating System :: OS Independent", "Operating System :: OS Independent",
'Topic :: Scientific/Engineering', 'Topic :: Scientific/Engineering',

View File

@ -163,7 +163,7 @@ class WorkflowBuilder(object):
}, },
{ {
"name": "EXTRA_REPOS", "name": "EXTRA_REPOS",
"value": "kubeflow/testing@HEAD;kubeflow/manifests@v1.4-branch" "value": "kubeflow/testing@HEAD;kubeflow/manifests@v1.5-branch"
}, },
# Set GOPATH to test_dir because Katib repo is located under /src/github.com/kubeflow/katib # Set GOPATH to test_dir because Katib repo is located under /src/github.com/kubeflow/katib
{ {

View File

@ -52,7 +52,7 @@ spec:
spec: spec:
containers: containers:
- name: training-container - name: training-container
image: docker.io/kubeflowkatib/mxnet-mnist:latest image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
command: command:
- "python3" - "python3"
- "/opt/mxnet-mnist/mnist.py" - "/opt/mxnet-mnist/mnist.py"

View File

@ -41,8 +41,8 @@ cat "manifests/v1beta1/components/controller/katib-config.yaml"
echo "Creating Kubeflow namespace" echo "Creating Kubeflow namespace"
kubectl create namespace kubeflow kubectl create namespace kubeflow
echo "Deploying training-operator from kubeflow/manifests v1.4 branch"
cd "${MANIFESTS_DIR}/apps/training-operator/upstream/overlays/kubeflow" cd "${MANIFESTS_DIR}/apps/training-operator/upstream/overlays/kubeflow"
echo "Deploying Training Operator from kubeflow/manifests $(git rev-parse --abbrev-ref HEAD)"
kustomize build . | kubectl apply -f - kustomize build . | kubectl apply -f -
echo "Deploying Katib" echo "Deploying Katib"

View File

@ -52,7 +52,7 @@ spec:
spec: spec:
containers: containers:
- name: training-container - name: training-container
image: docker.io/kubeflowkatib/mxnet-mnist:latest image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
command: command:
- "python3" - "python3"
- "/opt/mxnet-mnist/mnist.py" - "/opt/mxnet-mnist/mnist.py"