mirror of https://github.com/kubeflow/examples.git
Delete obsolete HP tuning code. (#451)
* Katib no longer uses custom go programs. Instead it uses the new StudyJobController custom resource. * This code is no longer needed so delete it.
This commit is contained in:
parent
37dd52f49d
commit
7990408207
|
@ -1,4 +0,0 @@
|
|||
FROM golang:1.9
|
||||
|
||||
RUN mkdir -p /opt/kubeflow
|
||||
COPY ./build/git-issue-summarize-demo /opt/kubeflow
|
|
@ -1,100 +0,0 @@
|
|||
# This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'.
|
||||
|
||||
|
||||
[[projects]]
|
||||
name = "github.com/golang/protobuf"
|
||||
packages = [
|
||||
"proto",
|
||||
"ptypes",
|
||||
"ptypes/any",
|
||||
"ptypes/duration",
|
||||
"ptypes/timestamp"
|
||||
]
|
||||
revision = "b4deda0973fb4c70b50d226b1af49f3da59f5265"
|
||||
version = "v1.1.0"
|
||||
|
||||
[[projects]]
|
||||
name = "github.com/kubeflow/katib"
|
||||
packages = ["pkg/api"]
|
||||
revision = "f24b520cc52920ae511aeea235636462ebc21d21"
|
||||
version = "v0.1.2-alpha"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "golang.org/x/net"
|
||||
packages = [
|
||||
"context",
|
||||
"http/httpguts",
|
||||
"http2",
|
||||
"http2/hpack",
|
||||
"idna",
|
||||
"internal/timeseries",
|
||||
"trace"
|
||||
]
|
||||
revision = "4cb1c02c05b0e749b0365f61ae859a8e0cfceed9"
|
||||
|
||||
[[projects]]
|
||||
name = "golang.org/x/text"
|
||||
packages = [
|
||||
"collate",
|
||||
"collate/build",
|
||||
"internal/colltab",
|
||||
"internal/gen",
|
||||
"internal/tag",
|
||||
"internal/triegen",
|
||||
"internal/ucd",
|
||||
"language",
|
||||
"secure/bidirule",
|
||||
"transform",
|
||||
"unicode/bidi",
|
||||
"unicode/cldr",
|
||||
"unicode/norm",
|
||||
"unicode/rangetable"
|
||||
]
|
||||
revision = "f21a4dfb5e38f5895301dc265a8def02365cc3d0"
|
||||
version = "v0.3.0"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "google.golang.org/genproto"
|
||||
packages = ["googleapis/rpc/status"]
|
||||
revision = "ff3583edef7de132f219f0efc00e097cabcc0ec0"
|
||||
|
||||
[[projects]]
|
||||
name = "google.golang.org/grpc"
|
||||
packages = [
|
||||
".",
|
||||
"balancer",
|
||||
"balancer/base",
|
||||
"balancer/roundrobin",
|
||||
"codes",
|
||||
"connectivity",
|
||||
"credentials",
|
||||
"encoding",
|
||||
"encoding/proto",
|
||||
"grpclog",
|
||||
"internal",
|
||||
"internal/backoff",
|
||||
"internal/channelz",
|
||||
"internal/grpcrand",
|
||||
"keepalive",
|
||||
"metadata",
|
||||
"naming",
|
||||
"peer",
|
||||
"resolver",
|
||||
"resolver/dns",
|
||||
"resolver/passthrough",
|
||||
"stats",
|
||||
"status",
|
||||
"tap",
|
||||
"transport"
|
||||
]
|
||||
revision = "168a6198bcb0ef175f7dacec0b8691fc141dc9b8"
|
||||
version = "v1.13.0"
|
||||
|
||||
[solve-meta]
|
||||
analyzer-name = "dep"
|
||||
analyzer-version = 1
|
||||
inputs-digest = "3d9f4c7de4665d6a45accfb3d5a5a6a6ae9b98229cea14e0a8dfba942a4e49f8"
|
||||
solver-name = "gps-cdcl"
|
||||
solver-version = 1
|
|
@ -1,38 +0,0 @@
|
|||
# Gopkg.toml example
|
||||
#
|
||||
# Refer to https://github.com/golang/dep/blob/master/docs/Gopkg.toml.md
|
||||
# for detailed Gopkg.toml documentation.
|
||||
#
|
||||
# required = ["github.com/user/thing/cmd/thing"]
|
||||
# ignored = ["github.com/user/project/pkgX", "bitbucket.org/user/project/pkgA/pkgY"]
|
||||
#
|
||||
# [[constraint]]
|
||||
# name = "github.com/user/project"
|
||||
# version = "1.0.0"
|
||||
#
|
||||
# [[constraint]]
|
||||
# name = "github.com/user/project2"
|
||||
# branch = "dev"
|
||||
# source = "github.com/myfork/project2"
|
||||
#
|
||||
# [[override]]
|
||||
# name = "github.com/x/y"
|
||||
# version = "2.4.0"
|
||||
#
|
||||
# [prune]
|
||||
# non-go = false
|
||||
# go-tests = true
|
||||
# unused-packages = true
|
||||
|
||||
|
||||
[[constraint]]
|
||||
name = "github.com/kubeflow/katib"
|
||||
version = "0.1.2-alpha"
|
||||
|
||||
[[constraint]]
|
||||
name = "google.golang.org/grpc"
|
||||
version = "1.13.0"
|
||||
|
||||
[prune]
|
||||
go-tests = true
|
||||
unused-packages = true
|
|
@ -1,53 +0,0 @@
|
|||
# Copyright 2017 The Kubernetes Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
# Requirements:
|
||||
# Make sure ${GOPATH}/src/github.com/kubeflow/examples
|
||||
# points at a checked out version of the examples repository.
|
||||
IMG = gcr.io/kubeflow-examples/gh-issue-hp-tuner
|
||||
DIR := ${CURDIR}
|
||||
|
||||
# List any changed files.
|
||||
CHANGED_FILES := $(shell git diff-files --relative=examples/GKEDemo)
|
||||
|
||||
ifeq ($(strip $(CHANGED_FILES)),)
|
||||
# Changed files is empty; not dirty
|
||||
# Don't include --dirty because it could be dirty if files outside the ones we care
|
||||
# about changed.
|
||||
TAG := $(shell date +v%Y%m%d)-$(shell git describe --always)
|
||||
else
|
||||
TAG := $(shell date +v%Y%m%d)-$(shell git describe --always --dirty)-$(shell git diff | shasum -a256 | cut -c -6)
|
||||
endif
|
||||
|
||||
all: build
|
||||
|
||||
# To build without the cache set the environment variable
|
||||
# export DOCKER_BUILD_OPTS=--no-cache
|
||||
build: Dockerfile git-issue-summarize-demo.go
|
||||
mkdir -p build
|
||||
dep ensure
|
||||
go build -i -o ./build/git-issue-summarize-demo ${GOPATH}/src/github.com/kubeflow/examples/github_issue_summarization/hp-tune/git-issue-summarize-demo.go
|
||||
docker build ${DOCKER_BUILD_OPTS} -t $(IMG):$(TAG) .
|
||||
docker tag $(IMG):$(TAG) $(IMG):latest
|
||||
@echo Built $(IMG):$(TAG)
|
||||
|
||||
# Build but don't attach the latest tag. This allows manual testing/inspection of the image
|
||||
# first.
|
||||
push: build
|
||||
gcloud docker -- push $(IMG):$(TAG)
|
||||
@echo Pushed $(IMG) with :$(TAG) tags
|
||||
|
||||
push-latest: push
|
||||
gcloud container images add-tag --quiet $(IMG):$(TAG) $(IMG):latest --verbosity=info
|
||||
echo created $(IMG):latest
|
|
@ -1,134 +0,0 @@
|
|||
# Experimental: HP Tuning for GitHub Issue Summarization
|
||||
|
||||
This directoy contains experimental code for adding hyperparameter
|
||||
tuning support to the GitHub issue summarization example using Katib.
|
||||
|
||||
## Instructions
|
||||
|
||||
1. Deploy Kubeflow
|
||||
1. [Deploy Katib](https://github.com/kubeflow/kubeflow/blob/master/kubeflow/katib/README.md)
|
||||
1. Create the katib namespace
|
||||
|
||||
```
|
||||
kubectl create namespace katib
|
||||
```
|
||||
|
||||
* This is a known issue [kubeflow/katib#134](https://github.com/kubeflow/katib/issues/134)
|
||||
|
||||
1. Deploy the hyperparameter tuning job
|
||||
|
||||
```
|
||||
cd kubeflow/examples/github_issue_summarization/ks-kubeflow
|
||||
ks apply ${ENVIRONMENT} -c hp-tune
|
||||
```
|
||||
|
||||
## UI
|
||||
|
||||
You can check your Model with Web UI.
|
||||
|
||||
Access to `http://${ENDPOINT}/katib/projects`
|
||||
|
||||
* If you are using GKE and IAP then ENDPOINT is the endpoint you
|
||||
are serving Kubeflow on
|
||||
|
||||
* Otherwise you can port-forward to one of the AMBASSADOR pods
|
||||
and ENDPOINT
|
||||
|
||||
```
|
||||
kubectl port-forward `kubectl get pods --selector=service=ambassador -o jsonpath='{.items[0].metadata.name}'` 8080:80
|
||||
ENDPOINT=localhost:8080
|
||||
```
|
||||
|
||||
The Results will be saved automatically.
|
||||
|
||||
## Description of git-issue-summarize-demo.go
|
||||
You can make hyperparameter and evaluate it by Katib-API.
|
||||
Katib-APIs are grpc. So you can use any language grpc supported(e.g. golang, python, c++).
|
||||
A typical case, you will call APIs in the order as below.
|
||||
In git-issue-summarize-demo.go, it wait for the status of all workers will be Completed.
|
||||
|
||||
### CreateStudy
|
||||
First, you should create Study.
|
||||
The input is StudyConfig.
|
||||
It has Study name, owner, optimization info, and Parameter config(parameter name, min, and max).
|
||||
This function generates a unique ID for your study and stores the config to DB.
|
||||
Input:
|
||||
* StudyConfig:
|
||||
* Name: string
|
||||
* Owner: string
|
||||
* OptimizationType: enum(OptimizationType_MAXIMIZE, OptimizationType_MINIMIZE)
|
||||
* OptimizationGoal: float
|
||||
* DefaultSuggestionAlgorithm: string
|
||||
* DefaultEarlyStoppingAlgorithm: string
|
||||
* ObjectiveValueName: string
|
||||
* Metrics: List of Metrics name
|
||||
* ParameterConfigs: List of parameter config.
|
||||
Return:
|
||||
* StudyID
|
||||
|
||||
### SetSuggestionParameters
|
||||
Hyperparameters are generated by suggestion services with Parameter config of Study.
|
||||
You can set the specific config for each suggestion.
|
||||
Input:
|
||||
* StudyID: ID of your study.
|
||||
* SuggestionAlgorithm: name of suggestion service (e.g. random, grid)
|
||||
* SuggestionParameters: key-value pairs parameter for suggestions. The wanted key is different for each suggestion.
|
||||
Return:
|
||||
* ParameterID
|
||||
|
||||
### GetSuggestions
|
||||
This function will create Trials(set of Parameters).
|
||||
Input:
|
||||
* StudyID: ID of your study.
|
||||
* SuggestionAlgorithm: name of suggestion service (e.g. random, grid)
|
||||
* RequestNumber: the number you want to evaluate.
|
||||
* ParamID: ParameterID you got from SetSuggestionParameters func.
|
||||
Return
|
||||
* List of Trials
|
||||
* TrialID
|
||||
* Parameter Sets
|
||||
|
||||
### RunTrial
|
||||
Start to evaluate Trial.
|
||||
When you use kubernetes runtime, the pods are created the specified config.
|
||||
Input:
|
||||
* StudyId: ID of your study.
|
||||
* TrialId: ID of Trial.
|
||||
* Runtime: worker type(e.g. kubernetes)
|
||||
* WorkerConfig: runtime config
|
||||
* Image: name of docker image
|
||||
* Command: running commands
|
||||
* GPU: number of GPU
|
||||
* Scheduler: scheduler name
|
||||
Return:
|
||||
* List of WorkerID
|
||||
|
||||
### GetMetrics
|
||||
Get metrics of running workers.
|
||||
Input:
|
||||
* StudyId: ID of your study.
|
||||
* WorkerIDs: List of worker ID you want to get metrics from.
|
||||
Return:
|
||||
* List of Metrics
|
||||
|
||||
### SaveModel
|
||||
Save the Model date to KatibDB. After you called this function, you can look model info in the KatibUI.
|
||||
When you call this API multiple time, only Metrics will be updated.
|
||||
Input:
|
||||
* ModelInfo
|
||||
* StudyName
|
||||
* WorkerId
|
||||
* Parameters: List of Parameter
|
||||
* Metrics: List of Metrics
|
||||
* ModelPath: path to model saved. (PVCname:mountpath)
|
||||
* DataSet: informatino of input date
|
||||
* Name
|
||||
* Path: path to input data.(PVCname:mountpath)
|
||||
|
||||
Return:
|
||||
|
||||
### GetWorkers
|
||||
You can get worker list and status of workers.
|
||||
Input:
|
||||
Return:
|
||||
* List of worker information
|
|
@ -1,210 +0,0 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"flag"
|
||||
"log"
|
||||
"time"
|
||||
|
||||
"github.com/kubeflow/katib/pkg/api"
|
||||
"google.golang.org/grpc"
|
||||
)
|
||||
|
||||
var studyConfig = api.StudyConfig{
|
||||
Name: "grid-demo",
|
||||
Owner: "katib",
|
||||
OptimizationType: api.OptimizationType_MAXIMIZE,
|
||||
OptimizationGoal: 0.99,
|
||||
ObjectiveValueName: "Validation-accuracy",
|
||||
Metrics: []string{
|
||||
"accuracy",
|
||||
},
|
||||
ParameterConfigs: &api.StudyConfig_ParameterConfigs{
|
||||
Configs: []*api.ParameterConfig{
|
||||
&api.ParameterConfig{
|
||||
Name: "--learning_rate",
|
||||
ParameterType: api.ParameterType_DOUBLE,
|
||||
Feasible: &api.FeasibleSpace{
|
||||
Min: "0.005",
|
||||
Max: "0.5",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
var gridConfig = []*api.SuggestionParameter{
|
||||
&api.SuggestionParameter{
|
||||
Name: "DefaultGrid",
|
||||
Value: "4",
|
||||
},
|
||||
&api.SuggestionParameter{
|
||||
Name: "--learning_rate",
|
||||
Value: "2",
|
||||
},
|
||||
}
|
||||
|
||||
var managerAddr = flag.String("katib_endpoint", "127.0.0.1:6789", "Endpoint of manager default 127.0.0.1:6789")
|
||||
var trainerImage = flag.String("trainer_image", "gcr.io/kubeflow-dev/tf-job-issue-summarization:v20180425-e79f888", "The docker image containing the training code")
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
conn, err := grpc.Dial(*managerAddr, grpc.WithInsecure())
|
||||
if err != nil {
|
||||
log.Fatalf("could not connect: %v", err)
|
||||
}
|
||||
|
||||
workerConfig := api.WorkerConfig{
|
||||
Image: *trainerImage,
|
||||
Command: []string{
|
||||
"python",
|
||||
"/workdir/train.py",
|
||||
"--sample_size",
|
||||
"20000",
|
||||
// "--input_data_gcs_bucket",
|
||||
// "katib-gi-example",
|
||||
// "--input_data_gcs_path",
|
||||
// "github-issue-summarization-data/github-issues.zip",
|
||||
// "--output_model_gcs_bucket",
|
||||
// "katib-gi-example",
|
||||
},
|
||||
Gpu: 0,
|
||||
Scheduler: "default-scheduler",
|
||||
}
|
||||
|
||||
defer conn.Close()
|
||||
ctx := context.Background()
|
||||
c := api.NewManagerClient(conn)
|
||||
createStudyreq := &api.CreateStudyRequest{
|
||||
StudyConfig: &studyConfig,
|
||||
}
|
||||
createStudyreply, err := c.CreateStudy(ctx, createStudyreq)
|
||||
if err != nil {
|
||||
log.Fatalf("StudyConfig Error %v", err)
|
||||
}
|
||||
studyId := createStudyreply.StudyId
|
||||
log.Printf("Study ID %s", studyId)
|
||||
getStudyreq := &api.GetStudyRequest{
|
||||
StudyId: studyId,
|
||||
}
|
||||
getStudyReply, err := c.GetStudy(ctx, getStudyreq)
|
||||
if err != nil {
|
||||
log.Fatalf("GetConfig Error %v", err)
|
||||
}
|
||||
log.Printf("Study ID %s StudyConf%v", studyId, getStudyReply.StudyConfig)
|
||||
setSuggesitonParameterRequest := &api.SetSuggestionParametersRequest{
|
||||
StudyId: studyId,
|
||||
SuggestionAlgorithm: "grid",
|
||||
SuggestionParameters: gridConfig,
|
||||
}
|
||||
setSuggesitonParameterReply, err := c.SetSuggestionParameters(ctx, setSuggesitonParameterRequest)
|
||||
if err != nil {
|
||||
log.Fatalf("SetConfig Error %v", err)
|
||||
}
|
||||
log.Printf("Grid Prameter ID %s", setSuggesitonParameterReply.ParamId)
|
||||
getGridSuggestRequest := &api.GetSuggestionsRequest{
|
||||
StudyId: studyId,
|
||||
SuggestionAlgorithm: "grid",
|
||||
RequestNumber: 0,
|
||||
//RequestNumber=0 means get all grids.
|
||||
ParamId: setSuggesitonParameterReply.ParamId,
|
||||
}
|
||||
getGridSuggestReply, err := c.GetSuggestions(ctx, getGridSuggestRequest)
|
||||
if err != nil {
|
||||
log.Fatalf("GetSuggestion Error %v", err)
|
||||
}
|
||||
log.Println("Get Grid Suggestions:")
|
||||
for _, t := range getGridSuggestReply.Trials {
|
||||
log.Printf("%v", t)
|
||||
}
|
||||
workerIds := make([]string, len(getGridSuggestReply.Trials))
|
||||
workerParameter := make(map[string][]*api.Parameter)
|
||||
for i, t := range getGridSuggestReply.Trials {
|
||||
ws := workerConfig
|
||||
rtr := &api.RunTrialRequest{
|
||||
StudyId: studyId,
|
||||
TrialId: t.TrialId,
|
||||
Runtime: "kubernetes",
|
||||
WorkerConfig: &ws,
|
||||
}
|
||||
rtr.WorkerConfig.Command = append(rtr.WorkerConfig.Command, "--output_model_gcs_path")
|
||||
rtr.WorkerConfig.Command = append(rtr.WorkerConfig.Command, "github-issue-summarization-data/"+t.TrialId+"output_model.h5")
|
||||
for _, p := range t.ParameterSet {
|
||||
rtr.WorkerConfig.Command = append(rtr.WorkerConfig.Command, p.Name)
|
||||
rtr.WorkerConfig.Command = append(rtr.WorkerConfig.Command, p.Value)
|
||||
}
|
||||
workerReply, err := c.RunTrial(ctx, rtr)
|
||||
if err != nil {
|
||||
log.Fatalf("RunTrial Error %v", err)
|
||||
}
|
||||
workerIds[i] = workerReply.WorkerId
|
||||
workerParameter[workerReply.WorkerId] = t.ParameterSet
|
||||
saveModelRequest := &api.SaveModelRequest{
|
||||
Model: &api.ModelInfo{
|
||||
StudyName: studyConfig.Name,
|
||||
WorkerId: workerReply.WorkerId,
|
||||
Parameters: t.ParameterSet,
|
||||
Metrics: []*api.Metrics{},
|
||||
ModelPath: "pvc:/Path/to/Model",
|
||||
},
|
||||
DataSet: &api.DataSetInfo{
|
||||
Name: "GitHub",
|
||||
Path: "/path/to/data",
|
||||
},
|
||||
}
|
||||
_, err = c.SaveModel(ctx, saveModelRequest)
|
||||
if err != nil {
|
||||
log.Fatalf("SaveModel Error %v", err)
|
||||
}
|
||||
log.Printf("WorkerID %s start\n", workerReply.WorkerId)
|
||||
}
|
||||
for true {
|
||||
time.Sleep(10 * time.Second)
|
||||
getMetricsRequest := &api.GetMetricsRequest{
|
||||
StudyId: studyId,
|
||||
WorkerIds: workerIds,
|
||||
}
|
||||
getMetricsReply, err := c.GetMetrics(ctx, getMetricsRequest)
|
||||
if err != nil {
|
||||
log.Printf("GetMetErr %v", err)
|
||||
continue
|
||||
}
|
||||
for _, mls := range getMetricsReply.MetricsLogSets {
|
||||
if len(mls.MetricsLogs) > 0 {
|
||||
//Only Metrics can be updated.
|
||||
saveModelRequest := &api.SaveModelRequest{
|
||||
Model: &api.ModelInfo{
|
||||
StudyName: studyConfig.Name,
|
||||
WorkerId: mls.WorkerId,
|
||||
Metrics: []*api.Metrics{},
|
||||
},
|
||||
}
|
||||
for _, ml := range mls.MetricsLogs {
|
||||
if len(ml.Values) > 0 {
|
||||
log.Printf("WorkerID %s :\t Metrics Name %s Value %v", mls.WorkerId, ml.Name, ml.Values[len(ml.Values)-1])
|
||||
saveModelRequest.Model.Metrics = append(saveModelRequest.Model.Metrics, &api.Metrics{Name: ml.Name, Value: ml.Values[len(ml.Values)-1]})
|
||||
}
|
||||
}
|
||||
_, err = c.SaveModel(ctx, saveModelRequest)
|
||||
if err != nil {
|
||||
log.Fatalf("SaveModel Error %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
getWorkerRequest := &api.GetWorkersRequest{StudyId: studyId}
|
||||
getWorkerReply, err := c.GetWorkers(ctx, getWorkerRequest)
|
||||
if err != nil {
|
||||
log.Fatalf("GetWorker Error %v", err)
|
||||
}
|
||||
completeCount := 0
|
||||
for _, w := range getWorkerReply.Workers {
|
||||
if w.Status == api.State_COMPLETED {
|
||||
completeCount++
|
||||
}
|
||||
}
|
||||
if completeCount == len(getWorkerReply.Workers) {
|
||||
log.Printf("All Worker Completed!")
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue