Compare commits

...

4 Commits

Author SHA1 Message Date
Andrey Velichkevich 1f9dff0307 Katib official release v0.13.0-rc.1 2022-02-15 16:12:09 +00:00
Andrey Velichkevich c00cf67074
Automated cherry pick of #1808: Fix default label for Training Operators on release-0.13 (#1813)
* Fix default label for Training Operators

* Fix version comment

* Change the docs

* Change git command
2022-02-15 16:05:41 +00:00
Yuki Iwai 4458e7bdcd
[cherry-pick] Update supported Python version for kubeflow-katib SDK (#1798)
* update supported Python version for kubeflow-katib SDK

* stop supporting Python2
2022-01-26 17:53:44 +00:00
Andrey Velichkevich 6329f48685 Katib official release v0.13.0-rc.0 2022-01-25 13:04:06 +00:00
43 changed files with 79 additions and 77 deletions

View File

@ -123,7 +123,7 @@ In the namespace with `katib.kubeflow.org/metrics-collector-injection=enabled` l
In **Pod Level Injecting**,
1. Job operators (_e.x. TFjob/PyTorchjob_) tag the `job-role: master` ([#1064](https://github.com/kubeflow/tf-operator/pull/1064)) label on the master pod.
1. Job operators (_e.x. TFjob/PyTorchjob_) tag the `training.kubeflow.org/job-role: master` ([#1064](https://github.com/kubeflow/tf-operator/pull/1064)) label on the master pod.
2. The webhook inject the metric collector only if the webhook recognizes this label.
3. The webhook uses [ObjectSelector](https://github.com/kubernetes/kubernetes/pull/78505) to skip on irrelevant objects in order to optimize the performance.
4. ObjectSelector is only supported above _Kubernetes v1.15_. Without this new feature, there may be a [performance issue](https://github.com/kubeflow/katib/issues/685#issuecomment-516226070) in webhook. In this situation, the following **Job Level Injecting** mode may be a better option.

View File

@ -124,7 +124,7 @@ For example, for TFJob:
```yaml
. . .
PrimaryPodLabel:
"job-role": "master"
"training.kubeflow.org/job-role": "master"
. . .
```

View File

@ -75,7 +75,7 @@ spec:
- name: num-examples
container:
name: model-training
image: docker.io/kubeflowkatib/mxnet-mnist:latest
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
command:
- "python3"
- "/opt/mxnet-mnist/mnist.py"

View File

@ -53,7 +53,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/mxnet-mnist:latest
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
command:
- "python3"
- "/opt/mxnet-mnist/mnist.py"

View File

@ -56,7 +56,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/mxnet-mnist:latest
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
command:
- "python3"
- "/opt/mxnet-mnist/mnist.py"

View File

@ -56,7 +56,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/mxnet-mnist:latest
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
command:
- "python3"
- "/opt/mxnet-mnist/mnist.py"

View File

@ -54,7 +54,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/mxnet-mnist:latest
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
command:
- "python3"
- "/opt/mxnet-mnist/mnist.py"

View File

@ -68,7 +68,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/mxnet-mnist:latest
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
command:
- "python3"
- "/opt/mxnet-mnist/mnist.py"

View File

@ -53,7 +53,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/mxnet-mnist:latest
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
command:
- "python3"
- "/opt/mxnet-mnist/mnist.py"

View File

@ -53,7 +53,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/mxnet-mnist:latest
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
command:
- "python3"
- "/opt/mxnet-mnist/mnist.py"

View File

@ -53,7 +53,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/mxnet-mnist:latest
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
command:
- "python3"
- "/opt/mxnet-mnist/mnist.py"

View File

@ -53,7 +53,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/mxnet-mnist:latest
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
command:
- "python3"
- "/opt/mxnet-mnist/mnist.py"

View File

@ -23,7 +23,7 @@ spec:
primaryContainerName: mxnet
# In this example we can collect metrics only from the Worker pods.
primaryPodLabels:
replica-type: worker
training.kubeflow.org/replica-type: worker
trialParameters:
- name: learningRate
description: Learning rate for the training model

View File

@ -45,7 +45,7 @@ spec:
spec:
containers:
- name: pytorch
image: docker.io/kubeflowkatib/pytorch-mnist:latest
image: docker.io/kubeflowkatib/pytorch-mnist:v0.13.0-rc.1
command:
- "python3"
- "/opt/pytorch-mnist/mnist.py"
@ -59,7 +59,7 @@ spec:
spec:
containers:
- name: pytorch
image: docker.io/kubeflowkatib/pytorch-mnist:latest
image: docker.io/kubeflowkatib/pytorch-mnist:v0.13.0-rc.1
command:
- "python3"
- "/opt/pytorch-mnist/mnist.py"

View File

@ -52,7 +52,7 @@ spec:
spec:
containers:
- name: tensorflow
image: docker.io/kubeflowkatib/tf-mnist-with-summaries:latest
image: docker.io/kubeflowkatib/tf-mnist-with-summaries:v0.13.0-rc.1
command:
- "python"
- "/opt/tf-mnist-with-summaries/mnist.py"

View File

@ -66,7 +66,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/pytorch-mnist:latest
image: docker.io/kubeflowkatib/pytorch-mnist:v0.13.0-rc.1
command:
- "python3"
- "/opt/pytorch-mnist/mnist.py"

View File

@ -53,7 +53,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/pytorch-mnist:latest
image: docker.io/kubeflowkatib/pytorch-mnist:v0.13.0-rc.1
command:
- "python3"
- "/opt/pytorch-mnist/mnist.py"

View File

@ -58,7 +58,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/mxnet-mnist:latest
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
command:
- "python3"
- "/opt/mxnet-mnist/mnist.py"

View File

@ -59,7 +59,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/darts-cnn-cifar10:latest
image: docker.io/kubeflowkatib/darts-cnn-cifar10:v0.13.0-rc.1
command:
- python3
- run_trial.py

View File

@ -76,7 +76,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/darts-cnn-cifar10:latest
image: docker.io/kubeflowkatib/darts-cnn-cifar10:v0.13.0-rc.1
command:
- python3
- run_trial.py

View File

@ -138,7 +138,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:latest
image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:v0.13.0-rc.1
command:
- python3
- -u

View File

@ -135,7 +135,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/enas-cnn-cifar10-gpu:latest
image: docker.io/kubeflowkatib/enas-cnn-cifar10-gpu:v0.13.0-rc.1
command:
- python3
- -u

View File

@ -54,7 +54,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/mxnet-mnist:latest
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
command:
- "python3"
- "/opt/mxnet-mnist/mnist.py"

View File

@ -54,7 +54,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/mxnet-mnist:latest
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
command:
- "python3"
- "/opt/mxnet-mnist/mnist.py"

View File

@ -88,7 +88,7 @@ spec:
description: Number of training examples
steps:
- name: model-training
image: docker.io/kubeflowkatib/mxnet-mnist:latest
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
command:
- "python3"
- "/opt/mxnet-mnist/mnist.py"

View File

@ -59,7 +59,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/mxnet-mnist:latest
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
command:
- "python3"
- "/opt/mxnet-mnist/mnist.py"

View File

@ -7,13 +7,13 @@ data:
metrics-collector-sidecar: |-
{
"StdOut": {
"image": "docker.io/kubeflowkatib/file-metrics-collector:latest"
"image": "docker.io/kubeflowkatib/file-metrics-collector:v0.13.0-rc.1"
},
"File": {
"image": "docker.io/kubeflowkatib/file-metrics-collector:latest"
"image": "docker.io/kubeflowkatib/file-metrics-collector:v0.13.0-rc.1"
},
"TensorFlowEvent": {
"image": "docker.io/kubeflowkatib/tfevent-metrics-collector:latest",
"image": "docker.io/kubeflowkatib/tfevent-metrics-collector:v0.13.0-rc.1",
"resources": {
"limits": {
"memory": "1Gi"
@ -24,31 +24,31 @@ data:
suggestion: |-
{
"random": {
"image": "docker.io/kubeflowkatib/suggestion-hyperopt:latest"
"image": "docker.io/kubeflowkatib/suggestion-hyperopt:v0.13.0-rc.1"
},
"tpe": {
"image": "docker.io/kubeflowkatib/suggestion-hyperopt:latest"
"image": "docker.io/kubeflowkatib/suggestion-hyperopt:v0.13.0-rc.1"
},
"grid": {
"image": "docker.io/kubeflowkatib/suggestion-chocolate:latest"
"image": "docker.io/kubeflowkatib/suggestion-chocolate:v0.13.0-rc.1"
},
"hyperband": {
"image": "docker.io/kubeflowkatib/suggestion-hyperband:latest"
"image": "docker.io/kubeflowkatib/suggestion-hyperband:v0.13.0-rc.1"
},
"bayesianoptimization": {
"image": "docker.io/kubeflowkatib/suggestion-skopt:latest"
"image": "docker.io/kubeflowkatib/suggestion-skopt:v0.13.0-rc.1"
},
"cmaes": {
"image": "docker.io/kubeflowkatib/suggestion-goptuna:latest"
"image": "docker.io/kubeflowkatib/suggestion-goptuna:v0.13.0-rc.1"
},
"sobol": {
"image": "docker.io/kubeflowkatib/suggestion-goptuna:latest"
"image": "docker.io/kubeflowkatib/suggestion-goptuna:v0.13.0-rc.1"
},
"multivariate-tpe": {
"image": "docker.io/kubeflowkatib/suggestion-optuna:latest"
"image": "docker.io/kubeflowkatib/suggestion-optuna:v0.13.0-rc.1"
},
"enas": {
"image": "docker.io/kubeflowkatib/suggestion-enas:latest",
"image": "docker.io/kubeflowkatib/suggestion-enas:v0.13.0-rc.1",
"resources": {
"limits": {
"memory": "200Mi"
@ -56,12 +56,12 @@ data:
}
},
"darts": {
"image": "docker.io/kubeflowkatib/suggestion-darts:latest"
"image": "docker.io/kubeflowkatib/suggestion-darts:v0.13.0-rc.1"
}
}
early-stopping: |-
{
"medianstop": {
"image": "docker.io/kubeflowkatib/earlystopping-medianstop:latest"
"image": "docker.io/kubeflowkatib/earlystopping-medianstop:v0.13.0-rc.1"
}
}

View File

@ -14,7 +14,7 @@ data:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/mxnet-mnist:latest
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
command:
- "python3"
- "/opt/mxnet-mnist/mnist.py"
@ -32,7 +32,7 @@ data:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:latest
image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:v0.13.0-rc.1
command:
- python3
- -u
@ -53,7 +53,7 @@ data:
spec:
containers:
- name: pytorch
image: docker.io/kubeflowkatib/pytorch-mnist:latest
image: docker.io/kubeflowkatib/pytorch-mnist:v0.13.0-rc.1
imagePullPolicy: Always
command:
- "python3"
@ -68,7 +68,7 @@ data:
spec:
containers:
- name: pytorch
image: docker.io/kubeflowkatib/pytorch-mnist:latest
image: docker.io/kubeflowkatib/pytorch-mnist:v0.13.0-rc.1
imagePullPolicy: Always
command:
- "python3"

View File

@ -21,13 +21,13 @@ resources:
images:
- name: docker.io/kubeflowkatib/katib-controller
newName: docker.io/kubeflowkatib/katib-controller
newTag: latest
newTag: v0.13.0-rc.1
- name: docker.io/kubeflowkatib/katib-db-manager
newName: docker.io/kubeflowkatib/katib-db-manager
newTag: latest
newTag: v0.13.0-rc.1
- name: docker.io/kubeflowkatib/katib-ui
newName: docker.io/kubeflowkatib/katib-ui
newTag: latest
newTag: v0.13.0-rc.1
patchesStrategicMerge:
- patches/katib-cert-injection.yaml

View File

@ -19,16 +19,16 @@ resources:
images:
- name: docker.io/kubeflowkatib/katib-controller
newName: docker.io/kubeflowkatib/katib-controller
newTag: latest
newTag: v0.13.0-rc.1
- name: docker.io/kubeflowkatib/katib-db-manager
newName: docker.io/kubeflowkatib/katib-db-manager
newTag: latest
newTag: v0.13.0-rc.1
- name: docker.io/kubeflowkatib/katib-ui
newName: docker.io/kubeflowkatib/katib-ui
newTag: latest
newTag: v0.13.0-rc.1
- name: docker.io/kubeflowkatib/cert-generator
newName: docker.io/kubeflowkatib/cert-generator
newTag: latest
newTag: v0.13.0-rc.1
patchesStrategicMerge:
- patches/db-manager.yaml
# Modify katib-mysql-secrets with parameters for the DB.

View File

@ -30,13 +30,13 @@ resources:
images:
- name: docker.io/kubeflowkatib/katib-controller
newName: docker.io/kubeflowkatib/katib-controller
newTag: latest
newTag: v0.13.0-rc.1
- name: docker.io/kubeflowkatib/katib-db-manager
newName: docker.io/kubeflowkatib/katib-db-manager
newTag: latest
newTag: v0.13.0-rc.1
- name: docker.io/kubeflowkatib/katib-ui
newName: docker.io/kubeflowkatib/katib-ui
newTag: latest
newTag: v0.13.0-rc.1
patchesJson6902:
# Annotate Service to delegate TLS-secret generation to OpenShift service controller

View File

@ -21,13 +21,13 @@ resources:
images:
- name: docker.io/kubeflowkatib/katib-controller
newName: docker.io/kubeflowkatib/katib-controller
newTag: latest
newTag: v0.13.0-rc.1
- name: docker.io/kubeflowkatib/katib-db-manager
newName: docker.io/kubeflowkatib/katib-db-manager
newTag: latest
newTag: v0.13.0-rc.1
- name: docker.io/kubeflowkatib/katib-ui
newName: docker.io/kubeflowkatib/katib-ui
newTag: latest
newTag: v0.13.0-rc.1
- name: docker.io/kubeflowkatib/cert-generator
newName: docker.io/kubeflowkatib/cert-generator
newTag: latest
newTag: v0.13.0-rc.1

View File

@ -9,13 +9,13 @@ resources:
images:
- name: docker.io/kubeflowkatib/katib-controller
newName: docker.io/kubeflowkatib/katib-controller
newTag: latest
newTag: v0.13.0-rc.1
- name: docker.io/kubeflowkatib/katib-db-manager
newName: docker.io/kubeflowkatib/katib-db-manager
newTag: latest
newTag: v0.13.0-rc.1
- name: docker.io/kubeflowkatib/katib-ui
newName: docker.io/kubeflowkatib/katib-ui
newTag: latest
newTag: v0.13.0-rc.1
patchesStrategicMerge:
- patches/remove-namespace.yaml

View File

@ -5,7 +5,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/mxnet-mnist:latest
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
command:
- "python3"
- "/opt/mxnet-mnist/mnist.py"

View File

@ -5,7 +5,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:latest
image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:v0.13.0-rc.1
command:
- python3
- -u

View File

@ -9,7 +9,7 @@ spec:
spec:
containers:
- name: pytorch
image: docker.io/kubeflowkatib/pytorch-mnist:latest
image: docker.io/kubeflowkatib/pytorch-mnist:v0.13.0-rc.1
imagePullPolicy: Always
command:
- "python3"
@ -24,7 +24,7 @@ spec:
spec:
containers:
- name: pytorch
image: docker.io/kubeflowkatib/pytorch-mnist:latest
image: docker.io/kubeflowkatib/pytorch-mnist:v0.13.0-rc.1
imagePullPolicy: Always
command:
- "python3"

View File

@ -38,7 +38,7 @@ const (
var (
// DefaultKubeflowJobPrimaryPodLabels is the default value of spec.trialTemplate.primaryPodLabels for Kubeflow Training Job.
DefaultKubeflowJobPrimaryPodLabels = map[string]string{"job-role": "master"}
DefaultKubeflowJobPrimaryPodLabels = map[string]string{"training.kubeflow.org/job-role": "master"}
// KubeflowJobKinds is the list of Kubeflow Training Job kinds.
KubeflowJobKinds = map[string]bool{
@ -46,5 +46,6 @@ var (
"PyTorchJob": true,
"XGBoostJob": true,
"MXJob": true,
"MPIJob": true,
}
)

View File

@ -50,14 +50,14 @@ const initialState = {
value: 'status.conditions.#(type=="Complete")#|#(status=="True")#',
description: `Condition when Trial custom resource is succeeded.
Default value for k8s BatchJob: status.conditions.#(type=="Complete")#|#(status=="True")#.
Default value for Kubeflow Job (TFJob, PyTorchJob, XGBoostJob, MXJob): status.conditions.#(type=="Succeeded")#|#(status=="True")#.`,
Default value for Kubeflow Job (TFJob, PyTorchJob, XGBoostJob, MXJob, MPIJob): status.conditions.#(type=="Succeeded")#|#(status=="True")#.`,
},
{
name: 'FailureCondition',
value: 'status.conditions.#(type=="Failed")#|#(status=="True")#',
description: `Condition when Trial custom resource is failed.
Default value for k8s BatchJob: status.conditions.#(type=="Failed")#|#(status=="True")#.
Default value for Kubeflow Job (TFJob, PyTorchJob, XGBoostJob, MXJob): status.conditions.#(type=="Failed")#|#(status=="True")#.`,
Default value for Kubeflow Job (TFJob, PyTorchJob, XGBoostJob, MXJob, MPIJob): status.conditions.#(type=="Failed")#|#(status=="True")#.`,
},
{
name: 'Retain',

View File

@ -19,7 +19,7 @@ with open('requirements.txt') as f:
setuptools.setup(
name='kubeflow-katib',
version='0.12.0',
version='0.13.0rc1',
author="Kubeflow Authors",
author_email='premnath.vel@gmail.com',
license="Apache License Version 2.0",
@ -35,11 +35,12 @@ setuptools.setup(
'Intended Audience :: Developers',
'Intended Audience :: Education',
'Intended Audience :: Science/Research',
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3 :: Only',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
"License :: OSI Approved :: Apache Software License",
"Operating System :: OS Independent",
'Topic :: Scientific/Engineering',

View File

@ -163,7 +163,7 @@ class WorkflowBuilder(object):
},
{
"name": "EXTRA_REPOS",
"value": "kubeflow/testing@HEAD;kubeflow/manifests@v1.4-branch"
"value": "kubeflow/testing@HEAD;kubeflow/manifests@v1.5-branch"
},
# Set GOPATH to test_dir because Katib repo is located under /src/github.com/kubeflow/katib
{

View File

@ -52,7 +52,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/mxnet-mnist:latest
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
command:
- "python3"
- "/opt/mxnet-mnist/mnist.py"

View File

@ -41,8 +41,8 @@ cat "manifests/v1beta1/components/controller/katib-config.yaml"
echo "Creating Kubeflow namespace"
kubectl create namespace kubeflow
echo "Deploying training-operator from kubeflow/manifests v1.4 branch"
cd "${MANIFESTS_DIR}/apps/training-operator/upstream/overlays/kubeflow"
echo "Deploying Training Operator from kubeflow/manifests $(git rev-parse --abbrev-ref HEAD)"
kustomize build . | kubectl apply -f -
echo "Deploying Katib"

View File

@ -52,7 +52,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/mxnet-mnist:latest
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0-rc.1
command:
- "python3"
- "/opt/mxnet-mnist/mnist.py"