Delete obsolete HP tuning code. (#451)

* Katib no longer uses custom go programs. Instead it uses the new
  StudyJobController custom resource.

* This code is no longer needed so delete it.
This commit is contained in:
Jeremy Lewi 2018-12-29 19:00:14 -08:00 committed by Kubernetes Prow Robot
parent 37dd52f49d
commit 7990408207
6 changed files with 0 additions and 539 deletions

View File

@ -1,4 +0,0 @@
FROM golang:1.9
RUN mkdir -p /opt/kubeflow
COPY ./build/git-issue-summarize-demo /opt/kubeflow

View File

@ -1,100 +0,0 @@
# This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'.
[[projects]]
name = "github.com/golang/protobuf"
packages = [
"proto",
"ptypes",
"ptypes/any",
"ptypes/duration",
"ptypes/timestamp"
]
revision = "b4deda0973fb4c70b50d226b1af49f3da59f5265"
version = "v1.1.0"
[[projects]]
name = "github.com/kubeflow/katib"
packages = ["pkg/api"]
revision = "f24b520cc52920ae511aeea235636462ebc21d21"
version = "v0.1.2-alpha"
[[projects]]
branch = "master"
name = "golang.org/x/net"
packages = [
"context",
"http/httpguts",
"http2",
"http2/hpack",
"idna",
"internal/timeseries",
"trace"
]
revision = "4cb1c02c05b0e749b0365f61ae859a8e0cfceed9"
[[projects]]
name = "golang.org/x/text"
packages = [
"collate",
"collate/build",
"internal/colltab",
"internal/gen",
"internal/tag",
"internal/triegen",
"internal/ucd",
"language",
"secure/bidirule",
"transform",
"unicode/bidi",
"unicode/cldr",
"unicode/norm",
"unicode/rangetable"
]
revision = "f21a4dfb5e38f5895301dc265a8def02365cc3d0"
version = "v0.3.0"
[[projects]]
branch = "master"
name = "google.golang.org/genproto"
packages = ["googleapis/rpc/status"]
revision = "ff3583edef7de132f219f0efc00e097cabcc0ec0"
[[projects]]
name = "google.golang.org/grpc"
packages = [
".",
"balancer",
"balancer/base",
"balancer/roundrobin",
"codes",
"connectivity",
"credentials",
"encoding",
"encoding/proto",
"grpclog",
"internal",
"internal/backoff",
"internal/channelz",
"internal/grpcrand",
"keepalive",
"metadata",
"naming",
"peer",
"resolver",
"resolver/dns",
"resolver/passthrough",
"stats",
"status",
"tap",
"transport"
]
revision = "168a6198bcb0ef175f7dacec0b8691fc141dc9b8"
version = "v1.13.0"
[solve-meta]
analyzer-name = "dep"
analyzer-version = 1
inputs-digest = "3d9f4c7de4665d6a45accfb3d5a5a6a6ae9b98229cea14e0a8dfba942a4e49f8"
solver-name = "gps-cdcl"
solver-version = 1

View File

@ -1,38 +0,0 @@
# Gopkg.toml example
#
# Refer to https://github.com/golang/dep/blob/master/docs/Gopkg.toml.md
# for detailed Gopkg.toml documentation.
#
# required = ["github.com/user/thing/cmd/thing"]
# ignored = ["github.com/user/project/pkgX", "bitbucket.org/user/project/pkgA/pkgY"]
#
# [[constraint]]
# name = "github.com/user/project"
# version = "1.0.0"
#
# [[constraint]]
# name = "github.com/user/project2"
# branch = "dev"
# source = "github.com/myfork/project2"
#
# [[override]]
# name = "github.com/x/y"
# version = "2.4.0"
#
# [prune]
# non-go = false
# go-tests = true
# unused-packages = true
[[constraint]]
name = "github.com/kubeflow/katib"
version = "0.1.2-alpha"
[[constraint]]
name = "google.golang.org/grpc"
version = "1.13.0"
[prune]
go-tests = true
unused-packages = true

View File

@ -1,53 +0,0 @@
# Copyright 2017 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requirements:
# Make sure ${GOPATH}/src/github.com/kubeflow/examples
# points at a checked out version of the examples repository.
IMG = gcr.io/kubeflow-examples/gh-issue-hp-tuner
DIR := ${CURDIR}
# List any changed files.
CHANGED_FILES := $(shell git diff-files --relative=examples/GKEDemo)
ifeq ($(strip $(CHANGED_FILES)),)
# Changed files is empty; not dirty
# Don't include --dirty because it could be dirty if files outside the ones we care
# about changed.
TAG := $(shell date +v%Y%m%d)-$(shell git describe --always)
else
TAG := $(shell date +v%Y%m%d)-$(shell git describe --always --dirty)-$(shell git diff | shasum -a256 | cut -c -6)
endif
all: build
# To build without the cache set the environment variable
# export DOCKER_BUILD_OPTS=--no-cache
build: Dockerfile git-issue-summarize-demo.go
mkdir -p build
dep ensure
go build -i -o ./build/git-issue-summarize-demo ${GOPATH}/src/github.com/kubeflow/examples/github_issue_summarization/hp-tune/git-issue-summarize-demo.go
docker build ${DOCKER_BUILD_OPTS} -t $(IMG):$(TAG) .
docker tag $(IMG):$(TAG) $(IMG):latest
@echo Built $(IMG):$(TAG)
# Build but don't attach the latest tag. This allows manual testing/inspection of the image
# first.
push: build
gcloud docker -- push $(IMG):$(TAG)
@echo Pushed $(IMG) with :$(TAG) tags
push-latest: push
gcloud container images add-tag --quiet $(IMG):$(TAG) $(IMG):latest --verbosity=info
echo created $(IMG):latest

View File

@ -1,134 +0,0 @@
# Experimental: HP Tuning for GitHub Issue Summarization
This directoy contains experimental code for adding hyperparameter
tuning support to the GitHub issue summarization example using Katib.
## Instructions
1. Deploy Kubeflow
1. [Deploy Katib](https://github.com/kubeflow/kubeflow/blob/master/kubeflow/katib/README.md)
1. Create the katib namespace
```
kubectl create namespace katib
```
* This is a known issue [kubeflow/katib#134](https://github.com/kubeflow/katib/issues/134)
1. Deploy the hyperparameter tuning job
```
cd kubeflow/examples/github_issue_summarization/ks-kubeflow
ks apply ${ENVIRONMENT} -c hp-tune
```
## UI
You can check your Model with Web UI.
Access to `http://${ENDPOINT}/katib/projects`
* If you are using GKE and IAP then ENDPOINT is the endpoint you
are serving Kubeflow on
* Otherwise you can port-forward to one of the AMBASSADOR pods
and ENDPOINT
```
kubectl port-forward `kubectl get pods --selector=service=ambassador -o jsonpath='{.items[0].metadata.name}'` 8080:80
ENDPOINT=localhost:8080
```
The Results will be saved automatically.
## Description of git-issue-summarize-demo.go
You can make hyperparameter and evaluate it by Katib-API.
Katib-APIs are grpc. So you can use any language grpc supported(e.g. golang, python, c++).
A typical case, you will call APIs in the order as below.
In git-issue-summarize-demo.go, it wait for the status of all workers will be Completed.
### CreateStudy
First, you should create Study.
The input is StudyConfig.
It has Study name, owner, optimization info, and Parameter config(parameter name, min, and max).
This function generates a unique ID for your study and stores the config to DB.
Input:
* StudyConfig:
* Name: string
* Owner: string
* OptimizationType: enum(OptimizationType_MAXIMIZE, OptimizationType_MINIMIZE)
* OptimizationGoal: float
* DefaultSuggestionAlgorithm: string
* DefaultEarlyStoppingAlgorithm: string
* ObjectiveValueName: string
* Metrics: List of Metrics name
* ParameterConfigs: List of parameter config.
Return:
* StudyID
### SetSuggestionParameters
Hyperparameters are generated by suggestion services with Parameter config of Study.
You can set the specific config for each suggestion.
Input:
* StudyID: ID of your study.
* SuggestionAlgorithm: name of suggestion service (e.g. random, grid)
* SuggestionParameters: key-value pairs parameter for suggestions. The wanted key is different for each suggestion.
Return:
* ParameterID
### GetSuggestions
This function will create Trials(set of Parameters).
Input:
* StudyID: ID of your study.
* SuggestionAlgorithm: name of suggestion service (e.g. random, grid)
* RequestNumber: the number you want to evaluate.
* ParamID: ParameterID you got from SetSuggestionParameters func.
Return
* List of Trials
* TrialID
* Parameter Sets
### RunTrial
Start to evaluate Trial.
When you use kubernetes runtime, the pods are created the specified config.
Input:
* StudyId: ID of your study.
* TrialId: ID of Trial.
* Runtime: worker type(e.g. kubernetes)
* WorkerConfig: runtime config
* Image: name of docker image
* Command: running commands
* GPU: number of GPU
* Scheduler: scheduler name
Return:
* List of WorkerID
### GetMetrics
Get metrics of running workers.
Input:
* StudyId: ID of your study.
* WorkerIDs: List of worker ID you want to get metrics from.
Return:
* List of Metrics
### SaveModel
Save the Model date to KatibDB. After you called this function, you can look model info in the KatibUI.
When you call this API multiple time, only Metrics will be updated.
Input:
* ModelInfo
* StudyName
* WorkerId
* Parameters: List of Parameter
* Metrics: List of Metrics
* ModelPath: path to model saved. (PVCname:mountpath)
* DataSet: informatino of input date
* Name
* Path: path to input data.(PVCname:mountpath)
Return:
### GetWorkers
You can get worker list and status of workers.
Input:
Return:
* List of worker information

View File

@ -1,210 +0,0 @@
package main
import (
"context"
"flag"
"log"
"time"
"github.com/kubeflow/katib/pkg/api"
"google.golang.org/grpc"
)
var studyConfig = api.StudyConfig{
Name: "grid-demo",
Owner: "katib",
OptimizationType: api.OptimizationType_MAXIMIZE,
OptimizationGoal: 0.99,
ObjectiveValueName: "Validation-accuracy",
Metrics: []string{
"accuracy",
},
ParameterConfigs: &api.StudyConfig_ParameterConfigs{
Configs: []*api.ParameterConfig{
&api.ParameterConfig{
Name: "--learning_rate",
ParameterType: api.ParameterType_DOUBLE,
Feasible: &api.FeasibleSpace{
Min: "0.005",
Max: "0.5",
},
},
},
},
}
var gridConfig = []*api.SuggestionParameter{
&api.SuggestionParameter{
Name: "DefaultGrid",
Value: "4",
},
&api.SuggestionParameter{
Name: "--learning_rate",
Value: "2",
},
}
var managerAddr = flag.String("katib_endpoint", "127.0.0.1:6789", "Endpoint of manager default 127.0.0.1:6789")
var trainerImage = flag.String("trainer_image", "gcr.io/kubeflow-dev/tf-job-issue-summarization:v20180425-e79f888", "The docker image containing the training code")
func main() {
flag.Parse()
conn, err := grpc.Dial(*managerAddr, grpc.WithInsecure())
if err != nil {
log.Fatalf("could not connect: %v", err)
}
workerConfig := api.WorkerConfig{
Image: *trainerImage,
Command: []string{
"python",
"/workdir/train.py",
"--sample_size",
"20000",
// "--input_data_gcs_bucket",
// "katib-gi-example",
// "--input_data_gcs_path",
// "github-issue-summarization-data/github-issues.zip",
// "--output_model_gcs_bucket",
// "katib-gi-example",
},
Gpu: 0,
Scheduler: "default-scheduler",
}
defer conn.Close()
ctx := context.Background()
c := api.NewManagerClient(conn)
createStudyreq := &api.CreateStudyRequest{
StudyConfig: &studyConfig,
}
createStudyreply, err := c.CreateStudy(ctx, createStudyreq)
if err != nil {
log.Fatalf("StudyConfig Error %v", err)
}
studyId := createStudyreply.StudyId
log.Printf("Study ID %s", studyId)
getStudyreq := &api.GetStudyRequest{
StudyId: studyId,
}
getStudyReply, err := c.GetStudy(ctx, getStudyreq)
if err != nil {
log.Fatalf("GetConfig Error %v", err)
}
log.Printf("Study ID %s StudyConf%v", studyId, getStudyReply.StudyConfig)
setSuggesitonParameterRequest := &api.SetSuggestionParametersRequest{
StudyId: studyId,
SuggestionAlgorithm: "grid",
SuggestionParameters: gridConfig,
}
setSuggesitonParameterReply, err := c.SetSuggestionParameters(ctx, setSuggesitonParameterRequest)
if err != nil {
log.Fatalf("SetConfig Error %v", err)
}
log.Printf("Grid Prameter ID %s", setSuggesitonParameterReply.ParamId)
getGridSuggestRequest := &api.GetSuggestionsRequest{
StudyId: studyId,
SuggestionAlgorithm: "grid",
RequestNumber: 0,
//RequestNumber=0 means get all grids.
ParamId: setSuggesitonParameterReply.ParamId,
}
getGridSuggestReply, err := c.GetSuggestions(ctx, getGridSuggestRequest)
if err != nil {
log.Fatalf("GetSuggestion Error %v", err)
}
log.Println("Get Grid Suggestions:")
for _, t := range getGridSuggestReply.Trials {
log.Printf("%v", t)
}
workerIds := make([]string, len(getGridSuggestReply.Trials))
workerParameter := make(map[string][]*api.Parameter)
for i, t := range getGridSuggestReply.Trials {
ws := workerConfig
rtr := &api.RunTrialRequest{
StudyId: studyId,
TrialId: t.TrialId,
Runtime: "kubernetes",
WorkerConfig: &ws,
}
rtr.WorkerConfig.Command = append(rtr.WorkerConfig.Command, "--output_model_gcs_path")
rtr.WorkerConfig.Command = append(rtr.WorkerConfig.Command, "github-issue-summarization-data/"+t.TrialId+"output_model.h5")
for _, p := range t.ParameterSet {
rtr.WorkerConfig.Command = append(rtr.WorkerConfig.Command, p.Name)
rtr.WorkerConfig.Command = append(rtr.WorkerConfig.Command, p.Value)
}
workerReply, err := c.RunTrial(ctx, rtr)
if err != nil {
log.Fatalf("RunTrial Error %v", err)
}
workerIds[i] = workerReply.WorkerId
workerParameter[workerReply.WorkerId] = t.ParameterSet
saveModelRequest := &api.SaveModelRequest{
Model: &api.ModelInfo{
StudyName: studyConfig.Name,
WorkerId: workerReply.WorkerId,
Parameters: t.ParameterSet,
Metrics: []*api.Metrics{},
ModelPath: "pvc:/Path/to/Model",
},
DataSet: &api.DataSetInfo{
Name: "GitHub",
Path: "/path/to/data",
},
}
_, err = c.SaveModel(ctx, saveModelRequest)
if err != nil {
log.Fatalf("SaveModel Error %v", err)
}
log.Printf("WorkerID %s start\n", workerReply.WorkerId)
}
for true {
time.Sleep(10 * time.Second)
getMetricsRequest := &api.GetMetricsRequest{
StudyId: studyId,
WorkerIds: workerIds,
}
getMetricsReply, err := c.GetMetrics(ctx, getMetricsRequest)
if err != nil {
log.Printf("GetMetErr %v", err)
continue
}
for _, mls := range getMetricsReply.MetricsLogSets {
if len(mls.MetricsLogs) > 0 {
//Only Metrics can be updated.
saveModelRequest := &api.SaveModelRequest{
Model: &api.ModelInfo{
StudyName: studyConfig.Name,
WorkerId: mls.WorkerId,
Metrics: []*api.Metrics{},
},
}
for _, ml := range mls.MetricsLogs {
if len(ml.Values) > 0 {
log.Printf("WorkerID %s :\t Metrics Name %s Value %v", mls.WorkerId, ml.Name, ml.Values[len(ml.Values)-1])
saveModelRequest.Model.Metrics = append(saveModelRequest.Model.Metrics, &api.Metrics{Name: ml.Name, Value: ml.Values[len(ml.Values)-1]})
}
}
_, err = c.SaveModel(ctx, saveModelRequest)
if err != nil {
log.Fatalf("SaveModel Error %v", err)
}
}
}
getWorkerRequest := &api.GetWorkersRequest{StudyId: studyId}
getWorkerReply, err := c.GetWorkers(ctx, getWorkerRequest)
if err != nil {
log.Fatalf("GetWorker Error %v", err)
}
completeCount := 0
for _, w := range getWorkerReply.Workers {
if w.Status == api.State_COMPLETED {
completeCount++
}
}
if completeCount == len(getWorkerReply.Workers) {
log.Printf("All Worker Completed!")
break
}
}
}