Delete obsolete HP tuning code. (#451)

* Katib no longer uses custom go programs. Instead it uses the new StudyJobController custom resource. * This code is no longer needed so delete it.
2018-12-29 19:00:14 -08:00 · 2018-12-29 19:00:14 -08:00 · 7990408207
parent 37dd52f49d
commit 7990408207
6 changed files with 0 additions and 539 deletions
--- a/github_issue_summarization/hp-tune/Dockerfile
+++ b/github_issue_summarization/hp-tune/Dockerfile
@ -1,4 +0,0 @@
-FROM golang:1.9
-
-RUN mkdir -p /opt/kubeflow
-COPY ./build/git-issue-summarize-demo /opt/kubeflow
--- a/github_issue_summarization/hp-tune/Gopkg.lock
+++ b/github_issue_summarization/hp-tune/Gopkg.lock
@ -1,100 +0,0 @@
-# This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'.
-
-
-[[projects]]
-  name = "github.com/golang/protobuf"
-  packages = [
-    "proto",
-    "ptypes",
-    "ptypes/any",
-    "ptypes/duration",
-    "ptypes/timestamp"
-  ]
-  revision = "b4deda0973fb4c70b50d226b1af49f3da59f5265"
-  version = "v1.1.0"
-
-[[projects]]
-  name = "github.com/kubeflow/katib"
-  packages = ["pkg/api"]
-  revision = "f24b520cc52920ae511aeea235636462ebc21d21"
-  version = "v0.1.2-alpha"
-
-[[projects]]
-  branch = "master"
-  name = "golang.org/x/net"
-  packages = [
-    "context",
-    "http/httpguts",
-    "http2",
-    "http2/hpack",
-    "idna",
-    "internal/timeseries",
-    "trace"
-  ]
-  revision = "4cb1c02c05b0e749b0365f61ae859a8e0cfceed9"
-
-[[projects]]
-  name = "golang.org/x/text"
-  packages = [
-    "collate",
-    "collate/build",
-    "internal/colltab",
-    "internal/gen",
-    "internal/tag",
-    "internal/triegen",
-    "internal/ucd",
-    "language",
-    "secure/bidirule",
-    "transform",
-    "unicode/bidi",
-    "unicode/cldr",
-    "unicode/norm",
-    "unicode/rangetable"
-  ]
-  revision = "f21a4dfb5e38f5895301dc265a8def02365cc3d0"
-  version = "v0.3.0"
-
-[[projects]]
-  branch = "master"
-  name = "google.golang.org/genproto"
-  packages = ["googleapis/rpc/status"]
-  revision = "ff3583edef7de132f219f0efc00e097cabcc0ec0"
-
-[[projects]]
-  name = "google.golang.org/grpc"
-  packages = [
-    ".",
-    "balancer",
-    "balancer/base",
-    "balancer/roundrobin",
-    "codes",
-    "connectivity",
-    "credentials",
-    "encoding",
-    "encoding/proto",
-    "grpclog",
-    "internal",
-    "internal/backoff",
-    "internal/channelz",
-    "internal/grpcrand",
-    "keepalive",
-    "metadata",
-    "naming",
-    "peer",
-    "resolver",
-    "resolver/dns",
-    "resolver/passthrough",
-    "stats",
-    "status",
-    "tap",
-    "transport"
-  ]
-  revision = "168a6198bcb0ef175f7dacec0b8691fc141dc9b8"
-  version = "v1.13.0"
-
-[solve-meta]
-  analyzer-name = "dep"
-  analyzer-version = 1
-  inputs-digest = "3d9f4c7de4665d6a45accfb3d5a5a6a6ae9b98229cea14e0a8dfba942a4e49f8"
-  solver-name = "gps-cdcl"
-  solver-version = 1
--- a/github_issue_summarization/hp-tune/Gopkg.toml
+++ b/github_issue_summarization/hp-tune/Gopkg.toml
@ -1,38 +0,0 @@
-# Gopkg.toml example
-#
-# Refer to https://github.com/golang/dep/blob/master/docs/Gopkg.toml.md
-# for detailed Gopkg.toml documentation.
-#
-# required = ["github.com/user/thing/cmd/thing"]
-# ignored = ["github.com/user/project/pkgX", "bitbucket.org/user/project/pkgA/pkgY"]
-#
-# [[constraint]]
-#   name = "github.com/user/project"
-#   version = "1.0.0"
-#
-# [[constraint]]
-#   name = "github.com/user/project2"
-#   branch = "dev"
-#   source = "github.com/myfork/project2"
-#
-# [[override]]
-#   name = "github.com/x/y"
-#   version = "2.4.0"
-#
-# [prune]
-#   non-go = false
-#   go-tests = true
-#   unused-packages = true
-
-
-[[constraint]]
-  name = "github.com/kubeflow/katib"
-  version = "0.1.2-alpha"
-
-[[constraint]]
-  name = "google.golang.org/grpc"
-  version = "1.13.0"
-
-[prune]
-  go-tests = true
-  unused-packages = true
--- a/github_issue_summarization/hp-tune/Makefile
+++ b/github_issue_summarization/hp-tune/Makefile
@ -1,53 +0,0 @@
-# Copyright 2017 The Kubernetes Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# Requirements:
-#   Make sure ${GOPATH}/src/github.com/kubeflow/examples
-#   points at a checked out version of the examples repository.
-IMG = gcr.io/kubeflow-examples/gh-issue-hp-tuner
-DIR := ${CURDIR}
-
-# List any changed  files. 
-CHANGED_FILES := $(shell git diff-files --relative=examples/GKEDemo)
-
-ifeq ($(strip $(CHANGED_FILES)),)
-# Changed files is empty; not dirty
-# Don't include --dirty because it could be dirty if files outside the ones we care
-# about changed.
-TAG := $(shell date +v%Y%m%d)-$(shell git describe --always)
-else
-TAG := $(shell date +v%Y%m%d)-$(shell git describe --always --dirty)-$(shell git diff | shasum -a256 | cut -c -6)
-endif
-
-all: build
-
-# To build without the cache set the environment variable
-# export DOCKER_BUILD_OPTS=--no-cache
-build: Dockerfile git-issue-summarize-demo.go
-	mkdir -p build
-	dep ensure
-	go build -i -o ./build/git-issue-summarize-demo ${GOPATH}/src/github.com/kubeflow/examples/github_issue_summarization/hp-tune/git-issue-summarize-demo.go 
-	docker build ${DOCKER_BUILD_OPTS} -t $(IMG):$(TAG) .
-	docker tag $(IMG):$(TAG) $(IMG):latest
-	@echo Built $(IMG):$(TAG)
-
-# Build but don't attach the latest tag. This allows manual testing/inspection of the image
-# first.
-push: build
-	gcloud docker -- push $(IMG):$(TAG)	
-	@echo Pushed $(IMG) with  :$(TAG) tags
-	
-push-latest: push
-	gcloud container images add-tag --quiet $(IMG):$(TAG) $(IMG):latest --verbosity=info
-	echo created $(IMG):latest
--- a/github_issue_summarization/hp-tune/README.md
+++ b/github_issue_summarization/hp-tune/README.md
@ -1,134 +0,0 @@
-# Experimental: HP Tuning for GitHub Issue Summarization
-
-This directoy contains experimental code for adding hyperparameter
-tuning support to the GitHub issue summarization example using Katib.
-
-## Instructions
-
-1. Deploy Kubeflow
-1. [Deploy Katib](https://github.com/kubeflow/kubeflow/blob/master/kubeflow/katib/README.md)
-1. Create the katib namespace
-
-    ```
-    kubectl create namespace katib
-    ```
-
-    * This is a known issue [kubeflow/katib#134](https://github.com/kubeflow/katib/issues/134)
-
-1. Deploy the hyperparameter tuning job 
-
-   ```
-   cd kubeflow/examples/github_issue_summarization/ks-kubeflow
-   ks apply ${ENVIRONMENT} -c hp-tune
-   ```
-
-## UI
-
-You can check your Model with Web UI.
-
-Access to `http://${ENDPOINT}/katib/projects`
-
-    * If you are using GKE and IAP then ENDPOINT is the endpoint you
-      are serving Kubeflow on
-
-    * Otherwise you can port-forward to one of the AMBASSADOR pods
-      and ENDPOINT
-
-      ```
-      kubectl port-forward `kubectl get pods --selector=service=ambassador -o jsonpath='{.items[0].metadata.name}'` 8080:80
-      ENDPOINT=localhost:8080
-      ```
-
-The Results will be saved automatically.
-
-## Description of git-issue-summarize-demo.go
-You can make hyperparameter and evaluate it by Katib-API.
-Katib-APIs are grpc. So you can use any language grpc supported(e.g. golang, python, c++).
-A typical case, you will call APIs in the order as below.
-In git-issue-summarize-demo.go, it wait for the status of all workers will be Completed.
-
-### CreateStudy
-First, you should create Study.
-The input is StudyConfig.
-It has Study name, owner, optimization info, and Parameter config(parameter name, min, and max).
-This function generates a unique ID for your study and stores the config to DB.
-Input:
-* StudyConfig:
-    * Name: string
-    * Owner: string
-    * OptimizationType: enum(OptimizationType_MAXIMIZE, OptimizationType_MINIMIZE)
-    * OptimizationGoal: float
-    * DefaultSuggestionAlgorithm: string
-    * DefaultEarlyStoppingAlgorithm: string
-    * ObjectiveValueName: string
-    * Metrics: List of Metrics name
-    * ParameterConfigs: List of parameter config.
-Return:
-* StudyID
-
-### SetSuggestionParameters
-Hyperparameters are generated by suggestion services with Parameter config of Study.
-You can set the specific config for each suggestion.
-Input: 
-* StudyID: ID of your study.
-* SuggestionAlgorithm: name of suggestion service (e.g. random, grid)
-* SuggestionParameters: key-value pairs parameter for suggestions. The wanted key is different for each suggestion.
-Return:
-* ParameterID
-
-### GetSuggestions
-This function will create Trials(set of Parameters).
-Input:
-* StudyID: ID of your study.
-* SuggestionAlgorithm: name of suggestion service (e.g. random, grid)
-* RequestNumber: the number you want to evaluate.
-* ParamID: ParameterID you got from SetSuggestionParameters func.
-Return
-* List of Trials
-    * TrialID
-    * Parameter Sets
-
-### RunTrial
-Start to evaluate Trial.
-When you use kubernetes runtime, the pods are created the specified config.
-Input:
-* StudyId: ID of your study.
-* TrialId: ID of Trial.
-* Runtime: worker type(e.g. kubernetes)
-* WorkerConfig: runtime config
-    * Image: name of docker image
-    * Command: running commands
-    * GPU: number of GPU
-    * Scheduler: scheduler name
-Return:
-* List of WorkerID
-
-### GetMetrics
-Get metrics of running workers.
-Input:
-* StudyId: ID of your study.
-* WorkerIDs: List of worker ID you want to get metrics from.
-Return:
-* List of Metrics
-
-### SaveModel
-Save the Model date to KatibDB. After you called this function, you can look model info in the KatibUI.
-When you call this API multiple time, only Metrics will be updated.
-Input:
-* ModelInfo
-    * StudyName
-    * WorkerId
-    * Parameters: List of Parameter
-    * Metrics: List of Metrics
-    * ModelPath: path to model saved. (PVCname:mountpath)
-* DataSet: informatino of input date
-    * Name
-    * Path: path to input data.(PVCname:mountpath)
-
-Return:
-    
-### GetWorkers
-You can get worker list and status of workers.
-Input:
-Return:
-* List of worker information
--- a/github_issue_summarization/hp-tune/git-issue-summarize-demo.go
+++ b/github_issue_summarization/hp-tune/git-issue-summarize-demo.go
@ -1,210 +0,0 @@
-package main
-
-import (
-	"context"
-	"flag"
-	"log"
-	"time"
-
-	"github.com/kubeflow/katib/pkg/api"
-	"google.golang.org/grpc"
-)
-
-var studyConfig = api.StudyConfig{
-	Name:               "grid-demo",
-	Owner:              "katib",
-	OptimizationType:   api.OptimizationType_MAXIMIZE,
-	OptimizationGoal:   0.99,
-	ObjectiveValueName: "Validation-accuracy",
-	Metrics: []string{
-		"accuracy",
-	},
-	ParameterConfigs: &api.StudyConfig_ParameterConfigs{
-		Configs: []*api.ParameterConfig{
-			&api.ParameterConfig{
-				Name:          "--learning_rate",
-				ParameterType: api.ParameterType_DOUBLE,
-				Feasible: &api.FeasibleSpace{
-					Min: "0.005",
-					Max: "0.5",
-				},
-			},
-		},
-	},
-}
-
-var gridConfig = []*api.SuggestionParameter{
-	&api.SuggestionParameter{
-		Name:  "DefaultGrid",
-		Value: "4",
-	},
-	&api.SuggestionParameter{
-		Name:  "--learning_rate",
-		Value: "2",
-	},
-}
-
-var managerAddr = flag.String("katib_endpoint", "127.0.0.1:6789", "Endpoint of manager default 127.0.0.1:6789")
-var trainerImage = flag.String("trainer_image", "gcr.io/kubeflow-dev/tf-job-issue-summarization:v20180425-e79f888", "The docker image containing the training code")
-
-func main() {
-	flag.Parse()
-	conn, err := grpc.Dial(*managerAddr, grpc.WithInsecure())
-	if err != nil {
-		log.Fatalf("could not connect: %v", err)
-	}
-
-	workerConfig := api.WorkerConfig{
-		Image: *trainerImage,
-		Command: []string{
-			"python",
-			"/workdir/train.py",
-			"--sample_size",
-			"20000",
-			//		"--input_data_gcs_bucket",
-			//		"katib-gi-example",
-			//		"--input_data_gcs_path",
-			//		"github-issue-summarization-data/github-issues.zip",
-			//		"--output_model_gcs_bucket",
-			//		"katib-gi-example",
-		},
-		Gpu:       0,
-		Scheduler: "default-scheduler",
-	}
-
-	defer conn.Close()
-	ctx := context.Background()
-	c := api.NewManagerClient(conn)
-	createStudyreq := &api.CreateStudyRequest{
-		StudyConfig: &studyConfig,
-	}
-	createStudyreply, err := c.CreateStudy(ctx, createStudyreq)
-	if err != nil {
-		log.Fatalf("StudyConfig Error %v", err)
-	}
-	studyId := createStudyreply.StudyId
-	log.Printf("Study ID %s", studyId)
-	getStudyreq := &api.GetStudyRequest{
-		StudyId: studyId,
-	}
-	getStudyReply, err := c.GetStudy(ctx, getStudyreq)
-	if err != nil {
-		log.Fatalf("GetConfig Error %v", err)
-	}
-	log.Printf("Study ID %s StudyConf%v", studyId, getStudyReply.StudyConfig)
-	setSuggesitonParameterRequest := &api.SetSuggestionParametersRequest{
-		StudyId:              studyId,
-		SuggestionAlgorithm:  "grid",
-		SuggestionParameters: gridConfig,
-	}
-	setSuggesitonParameterReply, err := c.SetSuggestionParameters(ctx, setSuggesitonParameterRequest)
-	if err != nil {
-		log.Fatalf("SetConfig Error %v", err)
-	}
-	log.Printf("Grid Prameter ID %s", setSuggesitonParameterReply.ParamId)
-	getGridSuggestRequest := &api.GetSuggestionsRequest{
-		StudyId:             studyId,
-		SuggestionAlgorithm: "grid",
-		RequestNumber:       0,
-		//RequestNumber=0 means get all grids.
-		ParamId: setSuggesitonParameterReply.ParamId,
-	}
-	getGridSuggestReply, err := c.GetSuggestions(ctx, getGridSuggestRequest)
-	if err != nil {
-		log.Fatalf("GetSuggestion Error %v", err)
-	}
-	log.Println("Get Grid Suggestions:")
-	for _, t := range getGridSuggestReply.Trials {
-		log.Printf("%v", t)
-	}
-	workerIds := make([]string, len(getGridSuggestReply.Trials))
-	workerParameter := make(map[string][]*api.Parameter)
-	for i, t := range getGridSuggestReply.Trials {
-		ws := workerConfig
-		rtr := &api.RunTrialRequest{
-			StudyId:      studyId,
-			TrialId:      t.TrialId,
-			Runtime:      "kubernetes",
-			WorkerConfig: &ws,
-		}
-		rtr.WorkerConfig.Command = append(rtr.WorkerConfig.Command, "--output_model_gcs_path")
-		rtr.WorkerConfig.Command = append(rtr.WorkerConfig.Command, "github-issue-summarization-data/"+t.TrialId+"output_model.h5")
-		for _, p := range t.ParameterSet {
-			rtr.WorkerConfig.Command = append(rtr.WorkerConfig.Command, p.Name)
-			rtr.WorkerConfig.Command = append(rtr.WorkerConfig.Command, p.Value)
-		}
-		workerReply, err := c.RunTrial(ctx, rtr)
-		if err != nil {
-			log.Fatalf("RunTrial Error %v", err)
-		}
-		workerIds[i] = workerReply.WorkerId
-		workerParameter[workerReply.WorkerId] = t.ParameterSet
-		saveModelRequest := &api.SaveModelRequest{
-			Model: &api.ModelInfo{
-				StudyName:  studyConfig.Name,
-				WorkerId:   workerReply.WorkerId,
-				Parameters: t.ParameterSet,
-				Metrics:    []*api.Metrics{},
-				ModelPath:  "pvc:/Path/to/Model",
-			},
-			DataSet: &api.DataSetInfo{
-				Name: "GitHub",
-				Path: "/path/to/data",
-			},
-		}
-		_, err = c.SaveModel(ctx, saveModelRequest)
-		if err != nil {
-			log.Fatalf("SaveModel Error %v", err)
-		}
-		log.Printf("WorkerID %s start\n", workerReply.WorkerId)
-	}
-	for true {
-		time.Sleep(10 * time.Second)
-		getMetricsRequest := &api.GetMetricsRequest{
-			StudyId:   studyId,
-			WorkerIds: workerIds,
-		}
-		getMetricsReply, err := c.GetMetrics(ctx, getMetricsRequest)
-		if err != nil {
-			log.Printf("GetMetErr %v", err)
-			continue
-		}
-		for _, mls := range getMetricsReply.MetricsLogSets {
-			if len(mls.MetricsLogs) > 0 {
-				//Only Metrics can be updated.
-				saveModelRequest := &api.SaveModelRequest{
-					Model: &api.ModelInfo{
-						StudyName: studyConfig.Name,
-						WorkerId:  mls.WorkerId,
-						Metrics:   []*api.Metrics{},
-					},
-				}
-				for _, ml := range mls.MetricsLogs {
-					if len(ml.Values) > 0 {
-						log.Printf("WorkerID %s :\t Metrics Name %s Value %v", mls.WorkerId, ml.Name, ml.Values[len(ml.Values)-1])
-						saveModelRequest.Model.Metrics = append(saveModelRequest.Model.Metrics, &api.Metrics{Name: ml.Name, Value: ml.Values[len(ml.Values)-1]})
-					}
-				}
-				_, err = c.SaveModel(ctx, saveModelRequest)
-				if err != nil {
-					log.Fatalf("SaveModel Error %v", err)
-				}
-			}
-		}
-		getWorkerRequest := &api.GetWorkersRequest{StudyId: studyId}
-		getWorkerReply, err := c.GetWorkers(ctx, getWorkerRequest)
-		if err != nil {
-			log.Fatalf("GetWorker Error %v", err)
-		}
-		completeCount := 0
-		for _, w := range getWorkerReply.Workers {
-			if w.Status == api.State_COMPLETED {
-				completeCount++
-			}
-		}
-		if completeCount == len(getWorkerReply.Workers) {
-			log.Printf("All Worker Completed!")
-			break
-		}
-	}
-}