Compare commits

...

5 Commits

Author SHA1 Message Date
Andrey Velichkevich f09dbf170b Katib official release v0.18.0 2025-03-25 00:38:04 +00:00
saileshd1402 d79b8d279d
[cherrypick-0.18] move manifest image references to ghcr (#2529) (#2535)
* [feature] move manifest image references to ghcr (#2529)

* move to ghcr

Signed-off-by: mahdikhashan <mahdikhashan1@gmail.com>

* move images to ghcr

Signed-off-by: mahdikhashan <mahdikhashan1@gmail.com>

* manifests

Signed-off-by: mahdikhashan <mahdikhashan1@gmail.com>

* change registry in all path

Signed-off-by: mahdikhashan <mahdikhashan1@gmail.com>

* update script

Signed-off-by: mahdikhashan <mahdikhashan1@gmail.com>

* fix

Signed-off-by: mahdikhashan <mahdikhashan1@gmail.com>

* fix

Signed-off-by: mahdikhashan <mahdikhashan1@gmail.com>

* slight fix

Signed-off-by: mahdikhashan <mahdikhashan1@gmail.com>

---------

Signed-off-by: mahdikhashan <mahdikhashan1@gmail.com>
Signed-off-by: Mahdi Khashan <58775404+mahdikhashan@users.noreply.github.com>

* revert changes for ipynb

Signed-off-by: sailesh duddupudi <saileshradar@gmail.com>

---------

Signed-off-by: mahdikhashan <mahdikhashan1@gmail.com>
Signed-off-by: Mahdi Khashan <58775404+mahdikhashan@users.noreply.github.com>
Signed-off-by: sailesh duddupudi <saileshradar@gmail.com>
Co-authored-by: Mahdi Khashan <58775404+mahdikhashan@users.noreply.github.com>
2025-03-24 21:41:45 +00:00
Google OSS Prow Robot 3d3fb391db
[release-0.18] [feature] migrate docker images to ghcr (#2531)
* update custom action

Signed-off-by: mahdikhashan <mahdikhashan1@gmail.com>

* define token as input

Signed-off-by: mahdikhashan <mahdikhashan1@gmail.com>

* clean up meta job

Signed-off-by: mahdikhashan <mahdikhashan1@gmail.com>

* change build-and-publish-imageg.yaml

Signed-off-by: mahdikhashan <mahdikhashan1@gmail.com>

* remove secret from workflow call

Signed-off-by: mahdikhashan <mahdikhashan1@gmail.com>

* remove docker credentials from publish* images

Signed-off-by: mahdikhashan <mahdikhashan1@gmail.com>

* revert meta step changes

Signed-off-by: mahdikhashan <mahdikhashan1@gmail.com>

* revert changes

Signed-off-by: mahdikhashan <mahdikhashan1@gmail.com>

* update

Signed-off-by: mahdikhashan <mahdikhashan1@gmail.com>

* add dockerhub as a job

Signed-off-by: mahdikhashan <mahdikhashan1@gmail.com>

* revert secrets

Signed-off-by: mahdikhashan <mahdikhashan1@gmail.com>

* revert docker secrets

Signed-off-by: mahdikhashan <mahdikhashan1@gmail.com>

* revert docker secrets

Signed-off-by: mahdikhashan <mahdikhashan1@gmail.com>

* consolidate/merge registeries

Signed-off-by: mahdikhashan <mahdikhashan1@gmail.com>

* fix inputs

Signed-off-by: mahdikhashan <mahdikhashan1@gmail.com>

* revert docker path name

Signed-off-by: mahdikhashan <mahdikhashan1@gmail.com>

---------

Signed-off-by: mahdikhashan <mahdikhashan1@gmail.com>
Co-authored-by: mahdikhashan <mahdikhashan1@gmail.com>
2025-03-19 06:14:45 +00:00
Mahdi Khashan 7dcdde7af9
cherry-picked `c18035e1` (#2517)
* Support old-style TensorFlow events (tensorboard)

Fixes: https://github.com/kubeflow/katib/issues/2466


* format



* test



* don't continue loops



* format



---------

Signed-off-by: Gary Miguel <garymm@garymm.org>
Co-authored-by: Gary Miguel <garymm@garymm.org>
2025-02-17 19:13:39 +00:00
Andrey Velichkevich 2daece483c Katib official release v0.18.0-rc.0 2025-02-13 13:59:27 +00:00
67 changed files with 351 additions and 281 deletions

View File

@ -1,5 +1,5 @@
# Reusable workflows for publishing Katib images.
name: Build And Publish Images
name: Build and Publish Images
on:
workflow_call:
@ -21,31 +21,50 @@ on:
jobs:
build-and-publish:
name: Publish Image
name: Build and Publish Images
runs-on: ubuntu-22.04
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Docker Login
# Trigger workflow only for kubeflow/katib repository with specific branch (master, release-.*) or tag (v.*).
if: >-
github.repository == 'kubeflow/katib' &&
(github.ref == 'refs/heads/master' || startsWith(github.ref, 'refs/heads/release-') || startsWith(github.ref, 'refs/tags/v'))
- name: Set Publish Condition
id: publish-condition
shell: bash
run: |
if [[ "${{ github.repository }}" == 'kubeflow/katib' && \
( "${{ github.ref }}" == 'refs/heads/master' || \
"${{ github.ref }}" =~ ^refs/heads/release- || \
"${{ github.ref }}" =~ ^refs/tags/v ) ]]; then
echo "should_publish=true" >> $GITHUB_OUTPUT
else
echo "should_publish=false" >> $GITHUB_OUTPUT
fi
- name: GHCR Login
if: steps.publish-condition.outputs.should_publish == 'true'
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: DockerHub Login
if: steps.publish-condition.outputs.should_publish == 'true'
uses: docker/login-action@v3
with:
registry: docker.io
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Publish Component ${{ inputs.component-name }}
# Trigger workflow only for kubeflow/katib repository with specific branch (master, release-.*) or tag (v.*).
if: >-
github.repository == 'kubeflow/katib' &&
(github.ref == 'refs/heads/master' || startsWith(github.ref, 'refs/heads/release-') || startsWith(github.ref, 'refs/tags/v'))
if: steps.publish-condition.outputs.should_publish == 'true'
id: publish
uses: ./.github/workflows/template-publish-image
with:
image: docker.io/kubeflowkatib/${{ inputs.component-name }}
image: |
ghcr.io/kubeflow/katib/${{ inputs.component-name }}
docker.io/kubeflowkatib/${{ inputs.component-name }}
dockerfile: ${{ inputs.dockerfile }}
platforms: ${{ inputs.platforms }}
push: true
@ -54,7 +73,9 @@ jobs:
if: steps.publish.outcome == 'skipped'
uses: ./.github/workflows/template-publish-image
with:
image: docker.io/kubeflowkatib/${{ inputs.component-name }}
image: |
ghcr.io/kubeflow/katib/${{ inputs.component-name }}
docker.io/kubeflowkatib/${{ inputs.component-name }}
dockerfile: ${{ inputs.dockerfile }}
platforms: ${{ inputs.platforms }}
push: false

View File

@ -5,7 +5,7 @@ HAS_SETUP_ENVTEST := $(shell command -v setup-envtest;)
HAS_MOCKGEN := $(shell command -v mockgen;)
COMMIT := v1beta1-$(shell git rev-parse --short=7 HEAD)
KATIB_REGISTRY := docker.io/kubeflowkatib
KATIB_REGISTRY := ghcr.io/kubeflow/katib
CPU_ARCH ?= linux/amd64,linux/arm64
ENVTEST_K8S_VERSION ?= 1.31
MOCKGEN_VERSION ?= $(shell grep 'go.uber.org/mock' go.mod | cut -d ' ' -f 2)

View File

@ -22,7 +22,7 @@ The following table shows images for the
</tr>
<tr align="center">
<td>
<code>docker.io/kubeflowkatib/katib-controller</code>
<code>ghcr.io/kubeflow/katib/katib-controller</code>
</td>
<td>
Katib Controller
@ -33,7 +33,7 @@ The following table shows images for the
</tr>
<tr align="center">
<td>
<code>docker.io/kubeflowkatib/katib-ui</code>
<code>ghcr.io/kubeflow/katib/katib-ui</code>
</td>
<td>
Katib User Interface
@ -44,7 +44,7 @@ The following table shows images for the
</tr>
<tr align="center">
<td>
<code>docker.io/kubeflowkatib/katib-db-manager</code>
<code>ghcr.io/kubeflow/katib/katib-db-manager</code>
</td>
<td>
Katib DB Manager
@ -87,7 +87,7 @@ The following table shows images for the
</tr>
<tr align="center">
<td>
<code>docker.io/kubeflowkatib/file-metrics-collector</code>
<code>ghcr.io/kubeflow/katib/file-metrics-collector</code>
</td>
<td>
File Metrics Collector
@ -98,7 +98,7 @@ The following table shows images for the
</tr>
<tr align="center">
<td>
<code>docker.io/kubeflowkatib/tfevent-metrics-collector</code>
<code>ghcr.io/kubeflow/katib/tfevent-metrics-collector</code>
</td>
<td>
Tensorflow Event Metrics Collector
@ -131,7 +131,7 @@ and the [Katib Early Stopping algorithms](https://www.kubeflow.org/docs/componen
</tr>
<tr align="center">
<td>
<code>docker.io/kubeflowkatib/suggestion-hyperopt</code>
<code>ghcr.io/kubeflow/katib/suggestion-hyperopt</code>
</td>
<td>
<a href="https://github.com/hyperopt/hyperopt">Hyperopt</a> Suggestion
@ -142,7 +142,7 @@ and the [Katib Early Stopping algorithms](https://www.kubeflow.org/docs/componen
</tr>
<tr align="center">
<td>
<code>docker.io/kubeflowkatib/suggestion-skopt</code>
<code>ghcr.io/kubeflow/katib/suggestion-skopt</code>
</td>
<td>
<a href="https://github.com/scikit-optimize/scikit-optimize">Skopt</a> Suggestion
@ -153,7 +153,7 @@ and the [Katib Early Stopping algorithms](https://www.kubeflow.org/docs/componen
</tr>
<tr align="center">
<td>
<code>docker.io/kubeflowkatib/suggestion-optuna</code>
<code>ghcr.io/kubeflow/katib/suggestion-optuna</code>
</td>
<td>
<a href="https://github.com/optuna/optuna">Optuna</a> Suggestion
@ -164,7 +164,7 @@ and the [Katib Early Stopping algorithms](https://www.kubeflow.org/docs/componen
</tr>
<tr align="center">
<td>
<code>docker.io/kubeflowkatib/suggestion-goptuna</code>
<code>ghcr.io/kubeflow/katib/suggestion-goptuna</code>
</td>
<td>
<a href="https://github.com/c-bata/goptuna">Goptuna</a> Suggestion
@ -175,7 +175,7 @@ and the [Katib Early Stopping algorithms](https://www.kubeflow.org/docs/componen
</tr>
<tr align="center">
<td>
<code>docker.io/kubeflowkatib/suggestion-hyperband</code>
<code>ghcr.io/kubeflow/katib/suggestion-hyperband</code>
</td>
<td>
<a href="https://www.kubeflow.org/docs/components/katib/experiment/#hyperband">Hyperband</a> Suggestion
@ -186,7 +186,7 @@ and the [Katib Early Stopping algorithms](https://www.kubeflow.org/docs/componen
</tr>
<tr align="center">
<td>
<code>docker.io/kubeflowkatib/suggestion-enas</code>
<code>ghcr.io/kubeflow/katib/suggestion-enas</code>
</td>
<td>
<a href="https://www.kubeflow.org/docs/components/katib/experiment/#enas">ENAS</a> Suggestion
@ -197,7 +197,7 @@ and the [Katib Early Stopping algorithms](https://www.kubeflow.org/docs/componen
</tr>
<tr align="center">
<td>
<code>docker.io/kubeflowkatib/suggestion-darts</code>
<code>ghcr.io/kubeflow/katib/suggestion-darts</code>
</td>
<td>
<a href="https://www.kubeflow.org/docs/components/katib/experiment/#differentiable-architecture-search-darts">DARTS</a> Suggestion
@ -208,7 +208,7 @@ and the [Katib Early Stopping algorithms](https://www.kubeflow.org/docs/componen
</tr>
<tr align="center">
<td>
<code>docker.io/kubeflowkatib/earlystopping-medianstop</code>
<code>ghcr.io/kubeflow/katib/earlystopping-medianstop</code>
</td>
<td>
<a href="https://www.kubeflow.org/docs/components/katib/early-stopping/#median-stopping-rule">Median Stopping Rule</a>
@ -240,7 +240,7 @@ The following table shows images for training containers which are used in the
</tr>
<tr align="center">
<td>
<code>docker.io/kubeflowkatib/pytorch-mnist-cpu</code>
<code>ghcr.io/kubeflow/katib/pytorch-mnist-cpu</code>
</td>
<td>
PyTorch MNIST example with printing metrics to the file or StdOut with CPU support
@ -251,7 +251,7 @@ The following table shows images for training containers which are used in the
</tr>
<tr align="center">
<td>
<code>docker.io/kubeflowkatib/pytorch-mnist-gpu</code>
<code>ghcr.io/kubeflow/katib/pytorch-mnist-gpu</code>
</td>
<td>
PyTorch MNIST example with printing metrics to the file or StdOut with GPU support
@ -262,7 +262,7 @@ The following table shows images for training containers which are used in the
</tr>
<tr align="center">
<td>
<code>docker.io/kubeflowkatib/tf-mnist-with-summaries</code>
<code>ghcr.io/kubeflow/katib/tf-mnist-with-summaries</code>
</td>
<td>
Tensorflow MNIST example with saving metrics in the summaries
@ -273,7 +273,7 @@ The following table shows images for training containers which are used in the
</tr>
<tr align="center">
<td>
<code>docker.io/kubeflowkatib/xgboost-lightgbm</code>
<code>ghcr.io/kubeflow/katib/xgboost-lightgbm</code>
</td>
<td>
Distributed LightGBM example for XGBoostJob
@ -306,7 +306,7 @@ The following table shows images for training containers which are used in the
</tr>
<tr align="center">
<td>
<code>docker.io/kubeflowkatib/enas-cnn-cifar10-gpu</code>
<code>ghcr.io/kubeflow/katib/enas-cnn-cifar10-gpu</code>
</td>
<td>
Keras CIFAR-10 CNN example for ENAS with GPU support
@ -317,7 +317,7 @@ The following table shows images for training containers which are used in the
</tr>
<tr align="center">
<td>
<code>docker.io/kubeflowkatib/enas-cnn-cifar10-cpu</code>
<code>ghcr.io/kubeflow/katib/enas-cnn-cifar10-cpu</code>
</td>
<td>
Keras CIFAR-10 CNN example for ENAS with CPU support
@ -328,7 +328,7 @@ The following table shows images for training containers which are used in the
</tr>
<tr align="center">
<td>
<code>docker.io/kubeflowkatib/darts-cnn-cifar10-gpu</code>
<code>ghcr.io/kubeflow/katib/darts-cnn-cifar10-gpu</code>
</td>
<td>
PyTorch CIFAR-10 CNN example for DARTS with GPU support
@ -339,7 +339,7 @@ The following table shows images for training containers which are used in the
</tr>
<tr align="center">
<td>
<code>docker.io/kubeflowkatib/darts-cnn-cifar10-cpu</code>
<code>ghcr.io/kubeflow/katib/darts-cnn-cifar10-cpu</code>
</td>
<td>
PyTorch CIFAR-10 CNN example for DARTS with CPU support

View File

@ -74,7 +74,7 @@ spec:
- name: epochs
container:
name: model-training
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
image: ghcr.io/kubeflow/katib/pytorch-mnist-cpu:v0.18.0
command:
- "python3"
- "/opt/pytorch-mnist/mnist.py"

View File

@ -62,7 +62,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
image: ghcr.io/kubeflow/katib/pytorch-mnist-cpu:v0.18.0
command:
- "python3"
- "/opt/pytorch-mnist/mnist.py"

View File

@ -52,7 +52,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
image: ghcr.io/kubeflow/katib/pytorch-mnist-cpu:v0.18.0
command:
- "python3"
- "/opt/pytorch-mnist/mnist.py"

View File

@ -45,7 +45,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
image: ghcr.io/kubeflow/katib/pytorch-mnist-cpu:v0.18.0
command:
- "python3"
- "/opt/pytorch-mnist/mnist.py"

View File

@ -45,7 +45,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
image: ghcr.io/kubeflow/katib/pytorch-mnist-cpu:v0.18.0
command:
- "python3"
- "/opt/pytorch-mnist/mnist.py"

View File

@ -44,7 +44,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
image: ghcr.io/kubeflow/katib/pytorch-mnist-cpu:v0.18.0
command:
- "python3"
- "/opt/pytorch-mnist/mnist.py"

View File

@ -57,7 +57,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
image: ghcr.io/kubeflow/katib/pytorch-mnist-cpu:v0.18.0
command:
- "python3"
- "/opt/pytorch-mnist/mnist.py"

View File

@ -63,7 +63,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
image: ghcr.io/kubeflow/katib/pytorch-mnist-cpu:v0.18.0
command:
- "python3"
- "/opt/pytorch-mnist/mnist.py"

View File

@ -42,7 +42,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
image: ghcr.io/kubeflow/katib/pytorch-mnist-cpu:v0.18.0
command:
- "python3"
- "/opt/pytorch-mnist/mnist.py"

View File

@ -63,7 +63,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
image: ghcr.io/kubeflow/katib/pytorch-mnist-cpu:v0.18.0
command:
- "python3"
- "/opt/pytorch-mnist/mnist.py"

View File

@ -42,7 +42,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
image: ghcr.io/kubeflow/katib/pytorch-mnist-cpu:v0.18.0
command:
- "python3"
- "/opt/pytorch-mnist/mnist.py"

View File

@ -43,7 +43,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/simple-pbt:latest
image: ghcr.io/kubeflow/katib/simple-pbt:v0.18.0
command:
- "python3"
- "/opt/pbt/pbt_test.py"

View File

@ -42,7 +42,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
image: ghcr.io/kubeflow/katib/pytorch-mnist-cpu:v0.18.0
command:
- "python3"
- "/opt/pytorch-mnist/mnist.py"

View File

@ -42,7 +42,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
image: ghcr.io/kubeflow/katib/pytorch-mnist-cpu:v0.18.0
command:
- "python3"
- "/opt/pytorch-mnist/mnist.py"

View File

@ -46,7 +46,7 @@ spec:
spec:
containers:
- name: pytorch
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
image: ghcr.io/kubeflow/katib/pytorch-mnist-cpu:v0.18.0
command:
- "python3"
- "/opt/pytorch-mnist/mnist.py"
@ -61,7 +61,7 @@ spec:
spec:
containers:
- name: pytorch
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
image: ghcr.io/kubeflow/katib/pytorch-mnist-cpu:v0.18.0
command:
- "python3"
- "/opt/pytorch-mnist/mnist.py"

View File

@ -56,7 +56,7 @@ spec:
spec:
containers:
- name: tensorflow
image: docker.io/kubeflowkatib/tf-mnist-with-summaries:latest
image: ghcr.io/kubeflow/katib/tf-mnist-with-summaries:v0.18.0
command:
- "python"
- "/opt/tf-mnist-with-summaries/mnist.py"

View File

@ -56,7 +56,7 @@ spec:
spec:
containers:
- name: xgboost
image: docker.io/kubeflowkatib/xgboost-lightgbm:1.0
image: ghcr.io/kubeflow/katib/xgboost-lightgbm:1.0
ports:
- containerPort: 9991
name: xgboostjob-port
@ -90,7 +90,7 @@ spec:
spec:
containers:
- name: xgboost
image: docker.io/kubeflowkatib/xgboost-lightgbm:1.0
image: ghcr.io/kubeflow/katib/xgboost-lightgbm:1.0
ports:
- containerPort: 9991
name: xgboostjob-port

View File

@ -26,7 +26,7 @@ spec:
- katib-db-manager.kubeflow:6789
- -path
- /katib/mnist.log
image: kubeflowkatib/custom-metrics-collector:latest
image: ghcr.io/kubeflow/katib/custom-metrics-collector:latest
imagePullPolicy: Always
name: custom-metrics-logger-and-collector
env:
@ -67,7 +67,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
image: ghcr.io/kubeflow/katib/pytorch-mnist-cpu:v0.18.0
command:
- "python3"
- "/opt/pytorch-mnist/mnist.py"

View File

@ -52,7 +52,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
image: ghcr.io/kubeflow/katib/pytorch-mnist-cpu:v0.18.0
command:
- "python3"
- "/opt/pytorch-mnist/mnist.py"

View File

@ -54,7 +54,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
image: ghcr.io/kubeflow/katib/pytorch-mnist-cpu:v0.18.0
command:
- "python3"
- "/opt/pytorch-mnist/mnist.py"

View File

@ -49,7 +49,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
image: ghcr.io/kubeflow/katib/pytorch-mnist-cpu:v0.18.0
command:
- "python3"
- "/opt/pytorch-mnist/mnist.py"

View File

@ -60,7 +60,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/darts-cnn-cifar10-cpu:latest
image: ghcr.io/kubeflow/katib/darts-cnn-cifar10-cpu:v0.18.0
command:
- python3
- run_trial.py

View File

@ -77,7 +77,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/darts-cnn-cifar10-gpu:latest
image: ghcr.io/kubeflow/katib/darts-cnn-cifar10-gpu:v0.18.0
command:
- python3
- run_trial.py

View File

@ -139,7 +139,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:latest
image: ghcr.io/kubeflow/katib/enas-cnn-cifar10-cpu:v0.18.0
command:
- python3
- -u

View File

@ -136,7 +136,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/enas-cnn-cifar10-gpu:latest
image: ghcr.io/kubeflow/katib/enas-cnn-cifar10-gpu:v0.18.0
command:
- python3
- -u

View File

@ -43,7 +43,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
image: ghcr.io/kubeflow/katib/pytorch-mnist-cpu:v0.18.0
command:
- "python3"
- "/opt/pytorch-mnist/mnist.py"

View File

@ -43,7 +43,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
image: ghcr.io/kubeflow/katib/pytorch-mnist-cpu:v0.18.0
command:
- "python3"
- "/opt/pytorch-mnist/mnist.py"

View File

@ -30,13 +30,13 @@ set this `nop` image to Metrics Collector image.
For example, if you are using
[StdOut](https://www.kubeflow.org/docs/components/katib/experiment/#metrics-collector) Metrics Collector,
`nop` image must be equal to `docker.io/kubeflowkatib/file-metrics-collector`.
`nop` image must be equal to `ghcr.io/kubeflow/katib/file-metrics-collector`.
Run the following command to modify the `nop` image:
```bash
kubectl patch deploy tekton-pipelines-controller -n tekton-pipelines --type='json' \
-p='[{"op": "replace", "path": "/spec/template/spec/containers/0/args/9", "value": "docker.io/kubeflowkatib/file-metrics-collector"}]'
-p='[{"op": "replace", "path": "/spec/template/spec/containers/0/args/9", "value": "ghcr.io/kubeflow/katib/file-metrics-collector"}]'
```
Check that Tekton Pipelines Controller's pod was restarted:
@ -54,7 +54,7 @@ Verify that `nop` image was modified:
```bash
$ kubectl get $(kubectl get pods -o name -n tekton-pipelines | grep tekton-pipelines-controller) -n tekton-pipelines -o yaml | grep katib
- docker.io/kubeflowkatib/file-metrics-collector
- ghcr.io/kubeflow/katib/file-metrics-collector
```
### Katib Controller

View File

@ -88,7 +88,7 @@ spec:
description: Number of epochs
steps:
- name: model-training
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
image: ghcr.io/kubeflow/katib/pytorch-mnist-cpu:v0.18.0
command:
- "python3"
- "/opt/pytorch-mnist/mnist.py"

View File

@ -66,7 +66,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
image: ghcr.io/kubeflow/katib/pytorch-mnist-cpu:v0.18.0
command:
- "python3"
- "/opt/pytorch-mnist/mnist.py"

View File

@ -23,7 +23,7 @@ spec:
serviceAccountName: katib-controller
containers:
- name: katib-controller
image: docker.io/kubeflowkatib/katib-controller
image: ghcr.io/kubeflow/katib/katib-controller
command: ["./katib-controller"]
args:
- --katib-config=/katib-config.yaml

View File

@ -15,7 +15,7 @@ data:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
image: ghcr.io/kubeflow/katib/pytorch-mnist-cpu:v0.18.0
command:
- "python3"
- "/opt/pytorch-mnist/mnist.py"
@ -33,7 +33,7 @@ data:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:latest
image: ghcr.io/kubeflow/katib/enas-cnn-cifar10-cpu:v0.18.0
command:
- python3
- -u
@ -54,7 +54,7 @@ data:
spec:
containers:
- name: pytorch
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
image: ghcr.io/kubeflow/katib/pytorch-mnist-cpu:v0.18.0
command:
- "python3"
- "/opt/pytorch-mnist/mnist.py"
@ -68,7 +68,7 @@ data:
spec:
containers:
- name: pytorch
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
image: ghcr.io/kubeflow/katib/pytorch-mnist-cpu:v0.18.0
command:
- "python3"
- "/opt/pytorch-mnist/mnist.py"

View File

@ -20,7 +20,7 @@ spec:
spec:
containers:
- name: katib-db-manager
image: docker.io/kubeflowkatib/katib-db-manager
image: ghcr.io/kubeflow/katib/katib-db-manager
env:
- name: DB_NAME
value: "mysql"

View File

@ -20,7 +20,7 @@ spec:
spec:
containers:
- name: katib-ui
image: docker.io/kubeflowkatib/katib-ui
image: ghcr.io/kubeflow/katib/katib-ui
command:
- "./katib-ui"
args:

View File

@ -13,40 +13,40 @@ init:
runtime:
metricsCollectors:
- kind: StdOut
image: docker.io/kubeflowkatib/file-metrics-collector:latest
image: ghcr.io/kubeflow/katib/file-metrics-collector:v0.18.0
- kind: File
image: docker.io/kubeflowkatib/file-metrics-collector:latest
image: ghcr.io/kubeflow/katib/file-metrics-collector:v0.18.0
- kind: TensorFlowEvent
image: docker.io/kubeflowkatib/tfevent-metrics-collector:latest
image: ghcr.io/kubeflow/katib/tfevent-metrics-collector:v0.18.0
resources:
limits:
memory: 1Gi
suggestions:
- algorithmName: random
image: docker.io/kubeflowkatib/suggestion-hyperopt:latest
image: ghcr.io/kubeflow/katib/suggestion-hyperopt:v0.18.0
- algorithmName: tpe
image: docker.io/kubeflowkatib/suggestion-hyperopt:latest
image: ghcr.io/kubeflow/katib/suggestion-hyperopt:v0.18.0
- algorithmName: grid
image: docker.io/kubeflowkatib/suggestion-optuna:latest
image: ghcr.io/kubeflow/katib/suggestion-optuna:v0.18.0
- algorithmName: hyperband
image: docker.io/kubeflowkatib/suggestion-hyperband:latest
image: ghcr.io/kubeflow/katib/suggestion-hyperband:v0.18.0
- algorithmName: bayesianoptimization
image: docker.io/kubeflowkatib/suggestion-skopt:latest
image: ghcr.io/kubeflow/katib/suggestion-skopt:v0.18.0
- algorithmName: cmaes
image: docker.io/kubeflowkatib/suggestion-goptuna:latest
image: ghcr.io/kubeflow/katib/suggestion-goptuna:v0.18.0
- algorithmName: sobol
image: docker.io/kubeflowkatib/suggestion-goptuna:latest
image: ghcr.io/kubeflow/katib/suggestion-goptuna:v0.18.0
- algorithmName: multivariate-tpe
image: docker.io/kubeflowkatib/suggestion-optuna:latest
image: ghcr.io/kubeflow/katib/suggestion-optuna:v0.18.0
- algorithmName: enas
image: docker.io/kubeflowkatib/suggestion-enas:latest
image: ghcr.io/kubeflow/katib/suggestion-enas:v0.18.0
resources:
limits:
memory: 400Mi
- algorithmName: darts
image: docker.io/kubeflowkatib/suggestion-darts:latest
image: ghcr.io/kubeflow/katib/suggestion-darts:v0.18.0
- algorithmName: pbt
image: docker.io/kubeflowkatib/suggestion-pbt:latest
image: ghcr.io/kubeflow/katib/suggestion-pbt:v0.18.0
persistentVolumeClaimSpec:
accessModes:
- ReadWriteMany
@ -55,4 +55,4 @@ runtime:
storage: 5Gi
earlyStoppings:
- algorithmName: medianstop
image: docker.io/kubeflowkatib/earlystopping-medianstop:latest
image: ghcr.io/kubeflow/katib/earlystopping-medianstop:v0.18.0

View File

@ -20,15 +20,15 @@ resources:
# Cert-manager certificate for webhooks
- certificate.yaml
images:
- name: docker.io/kubeflowkatib/katib-controller
newName: docker.io/kubeflowkatib/katib-controller
newTag: latest
- name: docker.io/kubeflowkatib/katib-db-manager
newName: docker.io/kubeflowkatib/katib-db-manager
newTag: latest
- name: docker.io/kubeflowkatib/katib-ui
newName: docker.io/kubeflowkatib/katib-ui
newTag: latest
- name: ghcr.io/kubeflow/katib/katib-controller
newName: ghcr.io/kubeflow/katib/katib-controller
newTag: v0.18.0
- name: ghcr.io/kubeflow/katib/katib-db-manager
newName: ghcr.io/kubeflow/katib/katib-db-manager
newTag: v0.18.0
- name: ghcr.io/kubeflow/katib/katib-ui
newName: ghcr.io/kubeflow/katib/katib-ui
newTag: v0.18.0
patchesStrategicMerge:
- patches/katib-cert-injection.yaml

View File

@ -15,40 +15,40 @@ init:
runtime:
metricsCollectors:
- kind: StdOut
image: docker.io/kubeflowkatib/file-metrics-collector:latest
image: ghcr.io/kubeflow/katib/file-metrics-collector:v0.18.0
- kind: File
image: docker.io/kubeflowkatib/file-metrics-collector:latest
image: ghcr.io/kubeflow/katib/file-metrics-collector:v0.18.0
- kind: TensorFlowEvent
image: docker.io/kubeflowkatib/tfevent-metrics-collector:latest
image: ghcr.io/kubeflow/katib/tfevent-metrics-collector:v0.18.0
resources:
limits:
memory: 1Gi
suggestions:
- algorithmName: random
image: docker.io/kubeflowkatib/suggestion-hyperopt:latest
image: ghcr.io/kubeflow/katib/suggestion-hyperopt:v0.18.0
- algorithmName: tpe
image: docker.io/kubeflowkatib/suggestion-hyperopt:latest
image: ghcr.io/kubeflow/katib/suggestion-hyperopt:v0.18.0
- algorithmName: grid
image: docker.io/kubeflowkatib/suggestion-optuna:latest
image: ghcr.io/kubeflow/katib/suggestion-optuna:v0.18.0
- algorithmName: hyperband
image: docker.io/kubeflowkatib/suggestion-hyperband:latest
image: ghcr.io/kubeflow/katib/suggestion-hyperband:v0.18.0
- algorithmName: bayesianoptimization
image: docker.io/kubeflowkatib/suggestion-skopt:latest
image: ghcr.io/kubeflow/katib/suggestion-skopt:v0.18.0
- algorithmName: cmaes
image: docker.io/kubeflowkatib/suggestion-goptuna:latest
image: ghcr.io/kubeflow/katib/suggestion-goptuna:v0.18.0
- algorithmName: sobol
image: docker.io/kubeflowkatib/suggestion-goptuna:latest
image: ghcr.io/kubeflow/katib/suggestion-goptuna:v0.18.0
- algorithmName: multivariate-tpe
image: docker.io/kubeflowkatib/suggestion-optuna:latest
image: ghcr.io/kubeflow/katib/suggestion-optuna:v0.18.0
- algorithmName: enas
image: docker.io/kubeflowkatib/suggestion-enas:latest
image: ghcr.io/kubeflow/katib/suggestion-enas:v0.18.0
resources:
limits:
memory: 400Mi
- algorithmName: darts
image: docker.io/kubeflowkatib/suggestion-darts:latest
image: ghcr.io/kubeflow/katib/suggestion-darts:v0.18.0
- algorithmName: pbt
image: docker.io/kubeflowkatib/suggestion-pbt:latest
image: ghcr.io/kubeflow/katib/suggestion-pbt:v0.18.0
persistentVolumeClaimSpec:
accessModes:
- ReadWriteMany
@ -57,4 +57,4 @@ runtime:
storage: 5Gi
earlyStoppings:
- algorithmName: medianstop
image: docker.io/kubeflowkatib/earlystopping-medianstop:latest
image: ghcr.io/kubeflow/katib/earlystopping-medianstop:v0.18.0

View File

@ -16,15 +16,15 @@ resources:
# Katib webhooks.
- ../../components/webhook/
images:
- name: docker.io/kubeflowkatib/katib-controller
newName: docker.io/kubeflowkatib/katib-controller
newTag: latest
- name: docker.io/kubeflowkatib/katib-db-manager
newName: docker.io/kubeflowkatib/katib-db-manager
newTag: latest
- name: docker.io/kubeflowkatib/katib-ui
newName: docker.io/kubeflowkatib/katib-ui
newTag: latest
- name: ghcr.io/kubeflow/katib/katib-controller
newName: ghcr.io/kubeflow/katib/katib-controller
newTag: v0.18.0
- name: ghcr.io/kubeflow/katib/katib-db-manager
newName: ghcr.io/kubeflow/katib/katib-db-manager
newTag: v0.18.0
- name: ghcr.io/kubeflow/katib/katib-ui
newName: ghcr.io/kubeflow/katib/katib-ui
newTag: v0.18.0
patchesStrategicMerge:
- patches/db-manager.yaml
# Modify katib-mysql-secrets with parameters for the DB.

View File

@ -16,40 +16,40 @@ init:
runtime:
metricsCollectors:
- kind: StdOut
image: docker.io/kubeflowkatib/file-metrics-collector:latest
image: ghcr.io/kubeflow/katib/file-metrics-collector:v0.18.0
- kind: File
image: docker.io/kubeflowkatib/file-metrics-collector:latest
image: ghcr.io/kubeflow/katib/file-metrics-collector:v0.18.0
- kind: TensorFlowEvent
image: docker.io/kubeflowkatib/tfevent-metrics-collector:latest
image: ghcr.io/kubeflow/katib/tfevent-metrics-collector:v0.18.0
resources:
limits:
memory: 1Gi
suggestions:
- algorithmName: random
image: docker.io/kubeflowkatib/suggestion-hyperopt:latest
image: ghcr.io/kubeflow/katib/suggestion-hyperopt:v0.18.0
- algorithmName: tpe
image: docker.io/kubeflowkatib/suggestion-hyperopt:latest
image: ghcr.io/kubeflow/katib/suggestion-hyperopt:v0.18.0
- algorithmName: grid
image: docker.io/kubeflowkatib/suggestion-optuna:latest
image: ghcr.io/kubeflow/katib/suggestion-optuna:v0.18.0
- algorithmName: hyperband
image: docker.io/kubeflowkatib/suggestion-hyperband:latest
image: ghcr.io/kubeflow/katib/suggestion-hyperband:v0.18.0
- algorithmName: bayesianoptimization
image: docker.io/kubeflowkatib/suggestion-skopt:latest
image: ghcr.io/kubeflow/katib/suggestion-skopt:v0.18.0
- algorithmName: cmaes
image: docker.io/kubeflowkatib/suggestion-goptuna:latest
image: ghcr.io/kubeflow/katib/suggestion-goptuna:v0.18.0
- algorithmName: sobol
image: docker.io/kubeflowkatib/suggestion-goptuna:latest
image: ghcr.io/kubeflow/katib/suggestion-goptuna:v0.18.0
- algorithmName: multivariate-tpe
image: docker.io/kubeflowkatib/suggestion-optuna:latest
image: ghcr.io/kubeflow/katib/suggestion-optuna:v0.18.0
- algorithmName: enas
image: docker.io/kubeflowkatib/suggestion-enas:latest
image: ghcr.io/kubeflow/katib/suggestion-enas:v0.18.0
resources:
limits:
memory: 400Mi
- algorithmName: darts
image: docker.io/kubeflowkatib/suggestion-darts:latest
image: ghcr.io/kubeflow/katib/suggestion-darts:v0.18.0
- algorithmName: pbt
image: docker.io/kubeflowkatib/suggestion-pbt:latest
image: ghcr.io/kubeflow/katib/suggestion-pbt:v0.18.0
persistentVolumeClaimSpec:
accessModes:
- ReadWriteMany
@ -58,4 +58,4 @@ runtime:
storage: 5Gi
earlyStoppings:
- algorithmName: medianstop
image: docker.io/kubeflowkatib/earlystopping-medianstop:latest
image: ghcr.io/kubeflow/katib/earlystopping-medianstop:v0.18.0

View File

@ -13,40 +13,40 @@ init:
runtime:
metricsCollectors:
- kind: StdOut
image: docker.io/kubeflowkatib/file-metrics-collector:latest
image: ghcr.io/kubeflow/katib/file-metrics-collector:v0.18.0
- kind: File
image: docker.io/kubeflowkatib/file-metrics-collector:latest
image: ghcr.io/kubeflow/katib/file-metrics-collector:v0.18.0
- kind: TensorFlowEvent
image: docker.io/kubeflowkatib/tfevent-metrics-collector:latest
image: ghcr.io/kubeflow/katib/tfevent-metrics-collector:v0.18.0
resources:
limits:
memory: 1Gi
suggestions:
- algorithmName: random
image: docker.io/kubeflowkatib/suggestion-hyperopt:latest
image: ghcr.io/kubeflow/katib/suggestion-hyperopt:v0.18.0
- algorithmName: tpe
image: docker.io/kubeflowkatib/suggestion-hyperopt:latest
image: ghcr.io/kubeflow/katib/suggestion-hyperopt:v0.18.0
- algorithmName: grid
image: docker.io/kubeflowkatib/suggestion-optuna:latest
image: ghcr.io/kubeflow/katib/suggestion-optuna:v0.18.0
- algorithmName: hyperband
image: docker.io/kubeflowkatib/suggestion-hyperband:latest
image: ghcr.io/kubeflow/katib/suggestion-hyperband:v0.18.0
- algorithmName: bayesianoptimization
image: docker.io/kubeflowkatib/suggestion-skopt:latest
image: ghcr.io/kubeflow/katib/suggestion-skopt:v0.18.0
- algorithmName: cmaes
image: docker.io/kubeflowkatib/suggestion-goptuna:latest
image: ghcr.io/kubeflow/katib/suggestion-goptuna:v0.18.0
- algorithmName: sobol
image: docker.io/kubeflowkatib/suggestion-goptuna:latest
image: ghcr.io/kubeflow/katib/suggestion-goptuna:v0.18.0
- algorithmName: multivariate-tpe
image: docker.io/kubeflowkatib/suggestion-optuna:latest
image: ghcr.io/kubeflow/katib/suggestion-optuna:v0.18.0
- algorithmName: enas
image: docker.io/kubeflowkatib/suggestion-enas:latest
image: ghcr.io/kubeflow/katib/suggestion-enas:v0.18.0
resources:
limits:
memory: 400Mi
- algorithmName: darts
image: docker.io/kubeflowkatib/suggestion-darts:latest
image: ghcr.io/kubeflow/katib/suggestion-darts:v0.18.0
- algorithmName: pbt
image: docker.io/kubeflowkatib/suggestion-pbt:latest
image: ghcr.io/kubeflow/katib/suggestion-pbt:v0.18.0
persistentVolumeClaimSpec:
accessModes:
- ReadWriteMany
@ -55,4 +55,4 @@ runtime:
storage: 5Gi
earlyStoppings:
- algorithmName: medianstop
image: docker.io/kubeflowkatib/earlystopping-medianstop:latest
image: ghcr.io/kubeflow/katib/earlystopping-medianstop:v0.18.0

View File

@ -28,15 +28,15 @@ resources:
# Katib webhooks.
- ../../components/webhook/
images:
- name: docker.io/kubeflowkatib/katib-controller
newName: docker.io/kubeflowkatib/katib-controller
newTag: latest
- name: docker.io/kubeflowkatib/katib-db-manager
newName: docker.io/kubeflowkatib/katib-db-manager
newTag: latest
- name: docker.io/kubeflowkatib/katib-ui
newName: docker.io/kubeflowkatib/katib-ui
newTag: latest
- name: ghcr.io/kubeflow/katib/katib-controller
newName: ghcr.io/kubeflow/katib/katib-controller
newTag: v0.18.0
- name: ghcr.io/kubeflow/katib/katib-db-manager
newName: ghcr.io/kubeflow/katib/katib-db-manager
newTag: v0.18.0
- name: ghcr.io/kubeflow/katib/katib-ui
newName: ghcr.io/kubeflow/katib/katib-ui
newTag: v0.18.0
patchesJson6902:
# Annotate Service to delegate TLS-secret generation to OpenShift service controller

View File

@ -15,40 +15,40 @@ init:
runtime:
metricsCollectors:
- kind: StdOut
image: docker.io/kubeflowkatib/file-metrics-collector:latest
image: ghcr.io/kubeflow/katib/file-metrics-collector:v0.18.0
- kind: File
image: docker.io/kubeflowkatib/file-metrics-collector:latest
image: ghcr.io/kubeflow/katib/file-metrics-collector:v0.18.0
- kind: TensorFlowEvent
image: docker.io/kubeflowkatib/tfevent-metrics-collector:latest
image: ghcr.io/kubeflow/katib/tfevent-metrics-collector:v0.18.0
resources:
limits:
memory: 1Gi
suggestions:
- algorithmName: random
image: docker.io/kubeflowkatib/suggestion-hyperopt:latest
image: ghcr.io/kubeflow/katib/suggestion-hyperopt:v0.18.0
- algorithmName: tpe
image: docker.io/kubeflowkatib/suggestion-hyperopt:latest
image: ghcr.io/kubeflow/katib/suggestion-hyperopt:v0.18.0
- algorithmName: grid
image: docker.io/kubeflowkatib/suggestion-optuna:latest
image: ghcr.io/kubeflow/katib/suggestion-optuna:v0.18.0
- algorithmName: hyperband
image: docker.io/kubeflowkatib/suggestion-hyperband:latest
image: ghcr.io/kubeflow/katib/suggestion-hyperband:v0.18.0
- algorithmName: bayesianoptimization
image: docker.io/kubeflowkatib/suggestion-skopt:latest
image: ghcr.io/kubeflow/katib/suggestion-skopt:v0.18.0
- algorithmName: cmaes
image: docker.io/kubeflowkatib/suggestion-goptuna:latest
image: ghcr.io/kubeflow/katib/suggestion-goptuna:v0.18.0
- algorithmName: sobol
image: docker.io/kubeflowkatib/suggestion-goptuna:latest
image: ghcr.io/kubeflow/katib/suggestion-goptuna:v0.18.0
- algorithmName: multivariate-tpe
image: docker.io/kubeflowkatib/suggestion-optuna:latest
image: ghcr.io/kubeflow/katib/suggestion-optuna:v0.18.0
- algorithmName: enas
image: docker.io/kubeflowkatib/suggestion-enas:latest
image: ghcr.io/kubeflow/katib/suggestion-enas:v0.18.0
resources:
limits:
memory: 400Mi
- algorithmName: darts
image: docker.io/kubeflowkatib/suggestion-darts:latest
image: ghcr.io/kubeflow/katib/suggestion-darts:v0.18.0
- algorithmName: pbt
image: docker.io/kubeflowkatib/suggestion-pbt:latest
image: ghcr.io/kubeflow/katib/suggestion-pbt:v0.18.0
persistentVolumeClaimSpec:
accessModes:
- ReadWriteMany
@ -57,4 +57,4 @@ runtime:
storage: 5Gi
earlyStoppings:
- algorithmName: medianstop
image: docker.io/kubeflowkatib/earlystopping-medianstop:latest
image: ghcr.io/kubeflow/katib/earlystopping-medianstop:v0.18.0

View File

@ -18,15 +18,15 @@ resources:
# Katib webhooks.
- ../../components/webhook/
images:
- name: docker.io/kubeflowkatib/katib-controller
newName: docker.io/kubeflowkatib/katib-controller
newTag: latest
- name: docker.io/kubeflowkatib/katib-db-manager
newName: docker.io/kubeflowkatib/katib-db-manager
newTag: latest
- name: docker.io/kubeflowkatib/katib-ui
newName: docker.io/kubeflowkatib/katib-ui
newTag: latest
- name: ghcr.io/kubeflow/katib/katib-controller
newName: ghcr.io/kubeflow/katib/katib-controller
newTag: v0.18.0
- name: ghcr.io/kubeflow/katib/katib-db-manager
newName: ghcr.io/kubeflow/katib/katib-db-manager
newTag: v0.18.0
- name: ghcr.io/kubeflow/katib/katib-ui
newName: ghcr.io/kubeflow/katib/katib-ui
newTag: v0.18.0
patchesJson6902:
- target:
group: apps

View File

@ -1,4 +1,3 @@
---
apiVersion: config.kubeflow.org/v1beta1
kind: KatibConfig
init:
@ -15,40 +14,40 @@ init:
runtime:
metricsCollectors:
- kind: StdOut
image: docker.io/kubeflowkatib/file-metrics-collector:latest
image: ghcr.io/kubeflow/katib/file-metrics-collector:v0.18.0
- kind: File
image: docker.io/kubeflowkatib/file-metrics-collector:latest
image: ghcr.io/kubeflow/katib/file-metrics-collector:v0.18.0
- kind: TensorFlowEvent
image: docker.io/kubeflowkatib/tfevent-metrics-collector:latest
image: ghcr.io/kubeflow/katib/tfevent-metrics-collector:v0.18.0
resources:
limits:
memory: 1Gi
suggestions:
- algorithmName: random
image: docker.io/kubeflowkatib/suggestion-hyperopt:latest
image: ghcr.io/kubeflow/katib/suggestion-hyperopt:v0.18.0
- algorithmName: tpe
image: docker.io/kubeflowkatib/suggestion-hyperopt:latest
image: ghcr.io/kubeflow/katib/suggestion-hyperopt:v0.18.0
- algorithmName: grid
image: docker.io/kubeflowkatib/suggestion-optuna:latest
image: ghcr.io/kubeflow/katib/suggestion-optuna:v0.18.0
- algorithmName: hyperband
image: docker.io/kubeflowkatib/suggestion-hyperband:latest
image: ghcr.io/kubeflow/katib/suggestion-hyperband:v0.18.0
- algorithmName: bayesianoptimization
image: docker.io/kubeflowkatib/suggestion-skopt:latest
image: ghcr.io/kubeflow/katib/suggestion-skopt:v0.18.0
- algorithmName: cmaes
image: docker.io/kubeflowkatib/suggestion-goptuna:latest
image: ghcr.io/kubeflow/katib/suggestion-goptuna:v0.18.0
- algorithmName: sobol
image: docker.io/kubeflowkatib/suggestion-goptuna:latest
image: ghcr.io/kubeflow/katib/suggestion-goptuna:v0.18.0
- algorithmName: multivariate-tpe
image: docker.io/kubeflowkatib/suggestion-optuna:latest
image: ghcr.io/kubeflow/katib/suggestion-optuna:v0.18.0
- algorithmName: enas
image: docker.io/kubeflowkatib/suggestion-enas:latest
image: ghcr.io/kubeflow/katib/suggestion-enas:v0.18.0
resources:
limits:
memory: 400Mi
- algorithmName: darts
image: docker.io/kubeflowkatib/suggestion-darts:latest
image: ghcr.io/kubeflow/katib/suggestion-darts:v0.18.0
- algorithmName: pbt
image: docker.io/kubeflowkatib/suggestion-pbt:latest
image: ghcr.io/kubeflow/katib/suggestion-pbt:v0.18.0
persistentVolumeClaimSpec:
accessModes:
- ReadWriteMany
@ -57,4 +56,4 @@ runtime:
storage: 5Gi
earlyStoppings:
- algorithmName: medianstop
image: docker.io/kubeflowkatib/earlystopping-medianstop:latest
image: ghcr.io/kubeflow/katib/earlystopping-medianstop:v0.18.0

View File

@ -18,15 +18,15 @@ resources:
# Katib webhooks.
- ../../components/webhook/
images:
- name: docker.io/kubeflowkatib/katib-controller
newName: docker.io/kubeflowkatib/katib-controller
newTag: latest
- name: docker.io/kubeflowkatib/katib-db-manager
newName: docker.io/kubeflowkatib/katib-db-manager
newTag: latest
- name: docker.io/kubeflowkatib/katib-ui
newName: docker.io/kubeflowkatib/katib-ui
newTag: latest
- name: ghcr.io/kubeflow/katib/katib-controller
newName: ghcr.io/kubeflow/katib/katib-controller
newTag: v0.18.0
- name: ghcr.io/kubeflow/katib/katib-db-manager
newName: ghcr.io/kubeflow/katib/katib-db-manager
newTag: v0.18.0
- name: ghcr.io/kubeflow/katib/katib-ui
newName: ghcr.io/kubeflow/katib/katib-ui
newTag: v0.18.0
configMapGenerator:
- name: katib-config
behavior: create

View File

@ -9,15 +9,15 @@ resources:
- ui-virtual-service.yaml
- istio-authorizationpolicy.yaml
images:
- name: docker.io/kubeflowkatib/katib-controller
newName: docker.io/kubeflowkatib/katib-controller
newTag: latest
- name: docker.io/kubeflowkatib/katib-db-manager
newName: docker.io/kubeflowkatib/katib-db-manager
newTag: latest
- name: docker.io/kubeflowkatib/katib-ui
newName: docker.io/kubeflowkatib/katib-ui
newTag: latest
- name: ghcr.io/kubeflow/katib/katib-controller
newName: ghcr.io/kubeflow/katib/katib-controller
newTag: v0.18.0
- name: ghcr.io/kubeflow/katib/katib-db-manager
newName: ghcr.io/kubeflow/katib/katib-db-manager
newTag: v0.18.0
- name: ghcr.io/kubeflow/katib/katib-ui
newName: ghcr.io/kubeflow/katib/katib-ui
newTag: v0.18.0
patchesStrategicMerge:
- patches/remove-namespace.yaml

View File

@ -483,7 +483,7 @@ func newFakeInstance() *experimentsv1beta1.Experiment {
Containers: []corev1.Container{
{
Name: primaryContainer,
Image: "docker.io/kubeflowkatib/pytorch-mnist-cpu",
Image: "ghcr.io/kubeflow/katib/pytorch-mnist-cpu",
Command: []string{
"python3",
"/opt/pytorch-mnist/mnist.py",
@ -619,7 +619,7 @@ func newFakeBatchJob() *batchv1.Job {
Containers: []corev1.Container{
{
Name: primaryContainer,
Image: "docker.io/kubeflowkatib/pytorch-mnist-cpu",
Image: "ghcr.io/kubeflow/katib/pytorch-mnist-cpu",
Command: []string{
"python3",
"/opt/pytorch-mnist/mnist.py",

View File

@ -61,7 +61,7 @@ func TestGetRunSpecWithHP(t *testing.T) {
Containers: []v1.Container{
{
Name: "training-container",
Image: "docker.io/kubeflowkatib/pytorch-mnist-cpu",
Image: "ghcr.io/kubeflow/katib/pytorch-mnist-cpu",
Command: []string{
"python3",
"/opt/pytorch-mnist/mnist.py",
@ -170,7 +170,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/pytorch-mnist-cpu
image: ghcr.io/kubeflow/katib/pytorch-mnist-cpu
command:
- "python3"
- "/opt/pytorch-mnist/mnist.py"
@ -186,7 +186,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/pytorch-mnist-cpu
image: ghcr.io/kubeflow/katib/pytorch-mnist-cpu
command:
- python3
- /opt/pytorch-mnist/mnist.py
@ -207,7 +207,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/pytorch-mnist-cpu
image: ghcr.io/kubeflow/katib/pytorch-mnist-cpu
command:
- "python3"
- "/opt/pytorch-mnist/mnist.py"
@ -337,7 +337,7 @@ func newFakeInstance() *experimentsv1beta1.Experiment {
Containers: []v1.Container{
{
Name: "training-container",
Image: "docker.io/kubeflowkatib/pytorch-mnist-cpu",
Image: "ghcr.io/kubeflow/katib/pytorch-mnist-cpu",
Command: []string{
"python3",
"/opt/pytorch-mnist/mnist.py",

View File

@ -440,7 +440,7 @@ func newFakeTrialBatchJob(mcType commonv1beta1.CollectorKind, trialName string)
Containers: []corev1.Container{
{
Name: primaryContainer,
Image: "docker.io/kubeflowkatib/pytorch-mnist-cpu",
Image: "ghcr.io/kubeflow/katib/pytorch-mnist-cpu",
Command: []string{
"python3",
"/opt/pytorch-mnist/mnist.py",

View File

@ -30,11 +30,23 @@ import api_pb2
import rfc3339
import tensorflow as tf
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
from tensorboard.backend.event_processing.tag_types import TENSORS
from tensorboard.backend.event_processing.tag_types import SCALARS, TENSORS
from pkg.metricscollector.v1beta1.common import const
def _should_consider(tag: str, metric_name: str, tfefile: str) -> bool:
tfefile_parent_dir = (
os.path.dirname(metric_name)
if len(metric_name.split("/")) >= 2
else os.path.dirname(tfefile)
)
basedir_name = os.path.dirname(tfefile)
return tag.startswith(metric_name.split("/")[-1]) and basedir_name.endswith(
tfefile_parent_dir
)
class TFEventFileParser:
def __init__(self, metric_names):
self.metric_names = metric_names
@ -47,31 +59,36 @@ class TFEventFileParser:
def parse_summary(self, tfefile):
metric_logs = []
event_accumulator = EventAccumulator(tfefile, size_guidance={TENSORS: 0})
event_accumulator = EventAccumulator(
tfefile, size_guidance={SCALARS: 0, TENSORS: 0}
)
event_accumulator.Reload()
for tag in event_accumulator.Tags()[TENSORS]:
tags = event_accumulator.Tags()
for tag in tags[TENSORS]:
for m in self.metric_names:
tfefile_parent_dir = (
os.path.dirname(m)
if len(m.split("/")) >= 2
else os.path.dirname(tfefile)
)
basedir_name = os.path.dirname(tfefile)
if not tag.startswith(m.split("/")[-1]) or not basedir_name.endswith(
tfefile_parent_dir
):
continue
for tensor in event_accumulator.Tensors(tag):
ml = api_pb2.MetricLog(
time_stamp=rfc3339.rfc3339(
datetime.fromtimestamp(tensor.wall_time)
),
metric=api_pb2.Metric(
name=m, value=str(tf.make_ndarray(tensor.tensor_proto))
),
)
metric_logs.append(ml)
if _should_consider(tag, m, tfefile):
for tensor in event_accumulator.Tensors(tag):
ml = api_pb2.MetricLog(
time_stamp=rfc3339.rfc3339(
datetime.fromtimestamp(tensor.wall_time)
),
metric=api_pb2.Metric(
name=m, value=str(tf.make_ndarray(tensor.tensor_proto))
),
)
metric_logs.append(ml)
# support old-style tensorboard metrics too
for tag in tags[SCALARS]:
for m in self.metric_names:
if _should_consider(tag, m, tfefile):
for scalar in event_accumulator.Scalars(tag):
ml = api_pb2.MetricLog(
time_stamp=rfc3339.rfc3339(
datetime.fromtimestamp(scalar.wall_time)
),
metric=api_pb2.Metric(name=m, value=str(scalar.value)),
)
metric_logs.append(ml)
return metric_logs

View File

@ -8,15 +8,15 @@
"Templates": [
{
"Path": "defaultTrialTemplate.yaml",
"Yaml": "apiVersion: batch/v1\nkind: Job\nspec:\n template:\n spec:\n containers:\n - name: training-container\n image: docker.io/kubeflowkatib/pytorch-mnist:v1beta1-45c5727\n command:\n - \"python3\"\n - \"/opt/pytorch-mnist/mnist.py\"\n - \"--epochs=1\"\n - \"--lr=${trialParameters.learningRate}\"\n - \"--momentum=${trialParameters.momentum}\"\n restartPolicy: Never"
"Yaml": "apiVersion: batch/v1\nkind: Job\nspec:\n template:\n spec:\n containers:\n - name: training-container\n image: ghcr.io/kubeflow/katib/pytorch-mnist:v1beta1-45c5727\n command:\n - \"python3\"\n - \"/opt/pytorch-mnist/mnist.py\"\n - \"--epochs=1\"\n - \"--lr=${trialParameters.learningRate}\"\n - \"--momentum=${trialParameters.momentum}\"\n restartPolicy: Never"
},
{
"Path": "enasCPUTemplate",
"Yaml": "apiVersion: batch/v1\nkind: Job\nspec:\n template:\n spec:\n containers:\n - name: training-container\n image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:v1beta1-45c5727\n command:\n - python3\n - -u\n - RunTrial.py\n - --num_epochs=1\n - \"--architecture=\\\"${trialParameters.neuralNetworkArchitecture}\\\"\"\n - \"--nn_config=\\\"${trialParameters.neuralNetworkConfig}\\\"\"\n restartPolicy: Never"
"Yaml": "apiVersion: batch/v1\nkind: Job\nspec:\n template:\n spec:\n containers:\n - name: training-container\n image: ghcr.io/kubeflow/katib/enas-cnn-cifar10-cpu:v1beta1-45c5727\n command:\n - python3\n - -u\n - RunTrial.py\n - --num_epochs=1\n - \"--architecture=\\\"${trialParameters.neuralNetworkArchitecture}\\\"\"\n - \"--nn_config=\\\"${trialParameters.neuralNetworkConfig}\\\"\"\n restartPolicy: Never"
},
{
"Path": "pytorchJobTemplate",
"Yaml": "apiVersion: \"kubeflow.org/v1\"\nkind: PyTorchJob\nspec:\n pytorchReplicaSpecs:\n Master:\n replicas: 1\n restartPolicy: OnFailure\n template:\n spec:\n containers:\n - name: pytorch\n image: docker.io/kubeflowkatib/pytorch-mnist:v1beta1-45c5727\n imagePullPolicy: Always\n command:\n - \"python3\"\n - \"/opt/pytorch-mnist/mnist.py\"\n - \"--epochs=1\"\n - \"--lr=${trialParameters.learningRate}\"\n - \"--momentum=${trialParameters.momentum}\"\n Worker:\n replicas: 2\n restartPolicy: OnFailure\n template:\n spec:\n containers:\n - name: pytorch\n image: docker.io/kubeflowkatib/pytorch-mnist:v1beta1-45c5727\n imagePullPolicy: Always\n command:\n - \"python3\"\n - \"/opt/pytorch-mnist/mnist.py\"\n - \"--epochs=1\"\n - \"--lr=${trialParameters.learningRate}\"\n - \"--momentum=${trialParameters.momentum}\""
"Yaml": "apiVersion: \"kubeflow.org/v1\"\nkind: PyTorchJob\nspec:\n pytorchReplicaSpecs:\n Master:\n replicas: 1\n restartPolicy: OnFailure\n template:\n spec:\n containers:\n - name: pytorch\n image: ghcr.io/kubeflow/katib/pytorch-mnist:v1beta1-45c5727\n imagePullPolicy: Always\n command:\n - \"python3\"\n - \"/opt/pytorch-mnist/mnist.py\"\n - \"--epochs=1\"\n - \"--lr=${trialParameters.learningRate}\"\n - \"--momentum=${trialParameters.momentum}\"\n Worker:\n replicas: 2\n restartPolicy: OnFailure\n template:\n spec:\n containers:\n - name: pytorch\n image: ghcr.io/kubeflow/katib/pytorch-mnist:v1beta1-45c5727\n imagePullPolicy: Always\n command:\n - \"python3\"\n - \"/opt/pytorch-mnist/mnist.py\"\n - \"--epochs=1\"\n - \"--lr=${trialParameters.learningRate}\"\n - \"--momentum=${trialParameters.momentum}\""
}
]
}

View File

@ -407,7 +407,7 @@ init:
runtime:
suggestions:
- algorithmName: random
image: docker.io/kubeflowkatib/suggestion-hyperopt:latest
image: ghcr.io/kubeflow/katib/suggestion-hyperopt:latest
`), os.FileMode(0600)); err != nil {
t.Fatal(err)
}

View File

@ -1433,7 +1433,7 @@ func newFakeBatchJob() *batchv1.Job {
Containers: []v1.Container{
{
Name: "training-container",
Image: "docker.io/kubeflowkatib/pytorch-mnist-cpu",
Image: "ghcr.io/kubeflow/katib/pytorch-mnist-cpu",
Command: []string{
"python3",
"--epochs=1",

View File

@ -68,7 +68,7 @@ fi
# ------------------ Change image tag ------------------
# Change Katib image tags to the new release tag.
make update-images OLD_PREFIX="docker.io/kubeflowkatib/" NEW_PREFIX="docker.io/kubeflowkatib/" TAG="${TAG}"
make update-images OLD_PREFIX="ghcr.io/kubeflow/katib/" NEW_PREFIX="ghcr.io/kubeflow/katib/" TAG="${TAG}"
# ------------------ Publish Katib SDK ------------------
# Remove first "v" for the SDK version.

View File

@ -28,8 +28,8 @@
# 5. Katib Trial training containers
#
# Run ./scripts/v1beta1/update-images.sh <OLD_PREFIX> <NEW_PREFIX> <TAG> to execute it.
# For example, to update images from: docker.io/kubeflowkatib/ to: docker.io/private/ registry with tag: v0.12.0, run:
# ./scripts/v1beta1/update-images.sh docker.io/kubeflowkatib/ docker.io/private/ v0.12.0
# For example, to update images from: ghcr.io/kubeflow/katib/ to: ghcr.io/private/ registry with tag: v0.12.0, run:
# ./scripts/v1beta1/update-images.sh ghcr.io/kubeflow/katib/ ghcr.io/private/ v0.12.0
set -o errexit
set -o pipefail
@ -42,8 +42,8 @@ TAG=${3:-""}
if [[ -z "$OLD_PREFIX" || -z "$NEW_PREFIX" || -z "$TAG" ]]; then
echo "Image old prefix, new prefix, and tag must be set"
echo -e "Usage: $0 <OLD_PREFIX> <NEW_PREFIX> <TAG>\n" 1>&2
echo "For example, to update images from: docker.io/kubeflowkatib/ to: docker.io/private/ registry with tag: v0.12.0, run:"
echo "$0 docker.io/kubeflowkatib/ docker.io/private/ v0.12.0"
echo "For example, to update images from: ghcr.io/kubeflow/katib/ to: ghcr.io/private/ registry with tag: v0.12.0, run:"
echo "$0 ghcr.io/kubeflow/katib/ ghcr.io/private/ v0.12.0"
exit 1
fi

View File

@ -100,7 +100,7 @@ def generate_trial_template() -> V1beta1TrialTemplate:
"containers": [
{
"name": "training-container",
"image": "docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.14.0",
"image": "ghcr.io/kubeflow/katib/pytorch-mnist-cpu:v0.14.0",
"command": [
"python3",
"/opt/pytorch-mnist/mnist.py",

View File

@ -56,7 +56,7 @@ if os.path.exists(katib_grpc_svc_file):
setuptools.setup(
name="kubeflow-katib",
version="0.17.0",
version="0.18.0",
author="Kubeflow Authors",
author_email="premnath.vel@gmail.com",
license="Apache License Version 2.0",

View File

@ -32,7 +32,7 @@ kubectl version
kubectl cluster-info
# Update Katib images with the current PULL SHA.
make update-images OLD_PREFIX="docker.io/kubeflowkatib/" NEW_PREFIX="${ECR_REGISTRY}/${REPO_NAME}/v1beta1/" TAG="${PULL_PULL_SHA}"
make update-images OLD_PREFIX="ghcr.io/kubeflow/katib/" NEW_PREFIX="${ECR_REGISTRY}/${REPO_NAME}/v1beta1/" TAG="${PULL_PULL_SHA}"
echo -e "\n The Katib will be deployed with the following configs"
cat "manifests/v1beta1/installs/katib-standalone/kustomization.yaml"

View File

@ -30,7 +30,7 @@ TUNE_API=${2:-false}
TRIAL_IMAGES=${3:-""}
EXPERIMENTS=${4:-""}
REGISTRY="docker.io/kubeflowkatib"
REGISTRY="ghcr.io/kubeflow/katib"
TAG="e2e-test"
VERSION="v1beta1"
CMD_PREFIX="cmd"

View File

@ -30,7 +30,7 @@ TRAINING_OPERATOR_VERSION="v1.9.0"
echo "Start to install Katib"
# Update Katib images with `e2e-test`.
cd ../../../../../ && make update-images OLD_PREFIX="docker.io/kubeflowkatib/" NEW_PREFIX="docker.io/kubeflowkatib/" TAG="$E2E_TEST_IMAGE_TAG" && cd -
cd ../../../../../ && make update-images OLD_PREFIX="ghcr.io/kubeflow/katib/" NEW_PREFIX="ghcr.io/kubeflow/katib/" TAG="$E2E_TEST_IMAGE_TAG" && cd -
# first declare the which kustomization file to use, by default use mysql.
KUSTOMIZATION_FILE="../../../../../manifests/v1beta1/installs/katib-standalone/kustomization.yaml"

View File

@ -40,7 +40,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
image: ghcr.io/kubeflow/katib/pytorch-mnist-cpu:v0.18.0
command:
- "python3"
- "/opt/pytorch-mnist/mnist.py"

View File

@ -40,7 +40,7 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
image: ghcr.io/kubeflow/katib/pytorch-mnist-cpu:v0.18.0
command:
- "python3"
- "/opt/pytorch-mnist/mnist.py"

View File

@ -13,10 +13,19 @@
# limitations under the License.
import os
import tempfile
import unittest
import tensorboardX
import utils
METRIC_DIR_NAMES = ("train", "test")
METRIC_NAMES = ("accuracy", "loss")
QUALIFIED_METRIC_NAMES = tuple(
f"{dir}/{metric}"
for dir in METRIC_DIR_NAMES
for metric in METRIC_NAMES
)
class TestTFEventMetricsCollector(unittest.TestCase):
def test_parse_file(self):
@ -24,24 +33,47 @@ class TestTFEventMetricsCollector(unittest.TestCase):
current_dir = os.path.dirname(os.path.abspath(__file__))
logs_dir = os.path.join(current_dir, "testdata/tfevent-metricscollector/logs")
# Metric format is "{{dirname}}/{{metrics name}}"
metric_names = ["train/accuracy", "train/loss", "test/loss", "test/accuracy"]
metric_logs = utils.get_metric_logs(logs_dir, metric_names)
metric_logs = utils.get_metric_logs(logs_dir, QUALIFIED_METRIC_NAMES)
self.assertEqual(20, len(metric_logs))
for log in metric_logs:
actual = log["metric"]["name"]
self.assertIn(actual, metric_names)
self.assertIn(actual, QUALIFIED_METRIC_NAMES)
# Metric format is "{{metrics name}}"
metric_names = ["accuracy", "loss"]
metrics_file_dir = os.path.join(logs_dir, "train")
metric_logs = utils.get_metric_logs(metrics_file_dir, metric_names)
self.assertEqual(10, len(metric_logs))
train_metric_logs = utils.get_metric_logs(
os.path.join(logs_dir, "train"), METRIC_NAMES)
self.assertEqual(10, len(train_metric_logs))
for log in train_metric_logs:
actual = log["metric"]["name"]
self.assertIn(actual, METRIC_NAMES)
def test_parse_file_with_tensorboardX(self):
logs_dir = tempfile.mkdtemp()
num_iters = 3
for dir_name in METRIC_DIR_NAMES:
with tensorboardX.SummaryWriter(os.path.join(logs_dir, dir_name)) as writer:
for metric_name in METRIC_NAMES:
for iter in range(num_iters):
writer.add_scalar(metric_name, 0.1, iter)
metric_logs = utils.get_metric_logs(logs_dir, QUALIFIED_METRIC_NAMES)
self.assertEqual(num_iters * len(QUALIFIED_METRIC_NAMES), len(metric_logs))
for log in metric_logs:
actual = log["metric"]["name"]
self.assertIn(actual, metric_names)
self.assertIn(actual, QUALIFIED_METRIC_NAMES)
train_metric_logs = utils.get_metric_logs(
os.path.join(logs_dir, "train"), METRIC_NAMES)
self.assertEqual(num_iters * len(METRIC_NAMES), len(train_metric_logs))
for log in train_metric_logs:
actual = log["metric"]["name"]
self.assertIn(actual, METRIC_NAMES)
if __name__ == '__main__':

View File

@ -1,3 +1,4 @@
grpcio-testing==1.64.1
pytest==7.2.0
tensorboardX==2.6.2.2
kubeflow-training[huggingface]==1.9.0