mirror of https://github.com/kubeflow/examples.git
Delete obsolete HP tuning code. (#451)
* Katib no longer uses custom go programs. Instead it uses the new StudyJobController custom resource. * This code is no longer needed so delete it.
This commit is contained in:
parent
37dd52f49d
commit
7990408207
|
|
@ -1,4 +0,0 @@
|
||||||
FROM golang:1.9
|
|
||||||
|
|
||||||
RUN mkdir -p /opt/kubeflow
|
|
||||||
COPY ./build/git-issue-summarize-demo /opt/kubeflow
|
|
||||||
|
|
@ -1,100 +0,0 @@
|
||||||
# This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'.
|
|
||||||
|
|
||||||
|
|
||||||
[[projects]]
|
|
||||||
name = "github.com/golang/protobuf"
|
|
||||||
packages = [
|
|
||||||
"proto",
|
|
||||||
"ptypes",
|
|
||||||
"ptypes/any",
|
|
||||||
"ptypes/duration",
|
|
||||||
"ptypes/timestamp"
|
|
||||||
]
|
|
||||||
revision = "b4deda0973fb4c70b50d226b1af49f3da59f5265"
|
|
||||||
version = "v1.1.0"
|
|
||||||
|
|
||||||
[[projects]]
|
|
||||||
name = "github.com/kubeflow/katib"
|
|
||||||
packages = ["pkg/api"]
|
|
||||||
revision = "f24b520cc52920ae511aeea235636462ebc21d21"
|
|
||||||
version = "v0.1.2-alpha"
|
|
||||||
|
|
||||||
[[projects]]
|
|
||||||
branch = "master"
|
|
||||||
name = "golang.org/x/net"
|
|
||||||
packages = [
|
|
||||||
"context",
|
|
||||||
"http/httpguts",
|
|
||||||
"http2",
|
|
||||||
"http2/hpack",
|
|
||||||
"idna",
|
|
||||||
"internal/timeseries",
|
|
||||||
"trace"
|
|
||||||
]
|
|
||||||
revision = "4cb1c02c05b0e749b0365f61ae859a8e0cfceed9"
|
|
||||||
|
|
||||||
[[projects]]
|
|
||||||
name = "golang.org/x/text"
|
|
||||||
packages = [
|
|
||||||
"collate",
|
|
||||||
"collate/build",
|
|
||||||
"internal/colltab",
|
|
||||||
"internal/gen",
|
|
||||||
"internal/tag",
|
|
||||||
"internal/triegen",
|
|
||||||
"internal/ucd",
|
|
||||||
"language",
|
|
||||||
"secure/bidirule",
|
|
||||||
"transform",
|
|
||||||
"unicode/bidi",
|
|
||||||
"unicode/cldr",
|
|
||||||
"unicode/norm",
|
|
||||||
"unicode/rangetable"
|
|
||||||
]
|
|
||||||
revision = "f21a4dfb5e38f5895301dc265a8def02365cc3d0"
|
|
||||||
version = "v0.3.0"
|
|
||||||
|
|
||||||
[[projects]]
|
|
||||||
branch = "master"
|
|
||||||
name = "google.golang.org/genproto"
|
|
||||||
packages = ["googleapis/rpc/status"]
|
|
||||||
revision = "ff3583edef7de132f219f0efc00e097cabcc0ec0"
|
|
||||||
|
|
||||||
[[projects]]
|
|
||||||
name = "google.golang.org/grpc"
|
|
||||||
packages = [
|
|
||||||
".",
|
|
||||||
"balancer",
|
|
||||||
"balancer/base",
|
|
||||||
"balancer/roundrobin",
|
|
||||||
"codes",
|
|
||||||
"connectivity",
|
|
||||||
"credentials",
|
|
||||||
"encoding",
|
|
||||||
"encoding/proto",
|
|
||||||
"grpclog",
|
|
||||||
"internal",
|
|
||||||
"internal/backoff",
|
|
||||||
"internal/channelz",
|
|
||||||
"internal/grpcrand",
|
|
||||||
"keepalive",
|
|
||||||
"metadata",
|
|
||||||
"naming",
|
|
||||||
"peer",
|
|
||||||
"resolver",
|
|
||||||
"resolver/dns",
|
|
||||||
"resolver/passthrough",
|
|
||||||
"stats",
|
|
||||||
"status",
|
|
||||||
"tap",
|
|
||||||
"transport"
|
|
||||||
]
|
|
||||||
revision = "168a6198bcb0ef175f7dacec0b8691fc141dc9b8"
|
|
||||||
version = "v1.13.0"
|
|
||||||
|
|
||||||
[solve-meta]
|
|
||||||
analyzer-name = "dep"
|
|
||||||
analyzer-version = 1
|
|
||||||
inputs-digest = "3d9f4c7de4665d6a45accfb3d5a5a6a6ae9b98229cea14e0a8dfba942a4e49f8"
|
|
||||||
solver-name = "gps-cdcl"
|
|
||||||
solver-version = 1
|
|
||||||
|
|
@ -1,38 +0,0 @@
|
||||||
# Gopkg.toml example
|
|
||||||
#
|
|
||||||
# Refer to https://github.com/golang/dep/blob/master/docs/Gopkg.toml.md
|
|
||||||
# for detailed Gopkg.toml documentation.
|
|
||||||
#
|
|
||||||
# required = ["github.com/user/thing/cmd/thing"]
|
|
||||||
# ignored = ["github.com/user/project/pkgX", "bitbucket.org/user/project/pkgA/pkgY"]
|
|
||||||
#
|
|
||||||
# [[constraint]]
|
|
||||||
# name = "github.com/user/project"
|
|
||||||
# version = "1.0.0"
|
|
||||||
#
|
|
||||||
# [[constraint]]
|
|
||||||
# name = "github.com/user/project2"
|
|
||||||
# branch = "dev"
|
|
||||||
# source = "github.com/myfork/project2"
|
|
||||||
#
|
|
||||||
# [[override]]
|
|
||||||
# name = "github.com/x/y"
|
|
||||||
# version = "2.4.0"
|
|
||||||
#
|
|
||||||
# [prune]
|
|
||||||
# non-go = false
|
|
||||||
# go-tests = true
|
|
||||||
# unused-packages = true
|
|
||||||
|
|
||||||
|
|
||||||
[[constraint]]
|
|
||||||
name = "github.com/kubeflow/katib"
|
|
||||||
version = "0.1.2-alpha"
|
|
||||||
|
|
||||||
[[constraint]]
|
|
||||||
name = "google.golang.org/grpc"
|
|
||||||
version = "1.13.0"
|
|
||||||
|
|
||||||
[prune]
|
|
||||||
go-tests = true
|
|
||||||
unused-packages = true
|
|
||||||
|
|
@ -1,53 +0,0 @@
|
||||||
# Copyright 2017 The Kubernetes Authors.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
#
|
|
||||||
# Requirements:
|
|
||||||
# Make sure ${GOPATH}/src/github.com/kubeflow/examples
|
|
||||||
# points at a checked out version of the examples repository.
|
|
||||||
IMG = gcr.io/kubeflow-examples/gh-issue-hp-tuner
|
|
||||||
DIR := ${CURDIR}
|
|
||||||
|
|
||||||
# List any changed files.
|
|
||||||
CHANGED_FILES := $(shell git diff-files --relative=examples/GKEDemo)
|
|
||||||
|
|
||||||
ifeq ($(strip $(CHANGED_FILES)),)
|
|
||||||
# Changed files is empty; not dirty
|
|
||||||
# Don't include --dirty because it could be dirty if files outside the ones we care
|
|
||||||
# about changed.
|
|
||||||
TAG := $(shell date +v%Y%m%d)-$(shell git describe --always)
|
|
||||||
else
|
|
||||||
TAG := $(shell date +v%Y%m%d)-$(shell git describe --always --dirty)-$(shell git diff | shasum -a256 | cut -c -6)
|
|
||||||
endif
|
|
||||||
|
|
||||||
all: build
|
|
||||||
|
|
||||||
# To build without the cache set the environment variable
|
|
||||||
# export DOCKER_BUILD_OPTS=--no-cache
|
|
||||||
build: Dockerfile git-issue-summarize-demo.go
|
|
||||||
mkdir -p build
|
|
||||||
dep ensure
|
|
||||||
go build -i -o ./build/git-issue-summarize-demo ${GOPATH}/src/github.com/kubeflow/examples/github_issue_summarization/hp-tune/git-issue-summarize-demo.go
|
|
||||||
docker build ${DOCKER_BUILD_OPTS} -t $(IMG):$(TAG) .
|
|
||||||
docker tag $(IMG):$(TAG) $(IMG):latest
|
|
||||||
@echo Built $(IMG):$(TAG)
|
|
||||||
|
|
||||||
# Build but don't attach the latest tag. This allows manual testing/inspection of the image
|
|
||||||
# first.
|
|
||||||
push: build
|
|
||||||
gcloud docker -- push $(IMG):$(TAG)
|
|
||||||
@echo Pushed $(IMG) with :$(TAG) tags
|
|
||||||
|
|
||||||
push-latest: push
|
|
||||||
gcloud container images add-tag --quiet $(IMG):$(TAG) $(IMG):latest --verbosity=info
|
|
||||||
echo created $(IMG):latest
|
|
||||||
|
|
@ -1,134 +0,0 @@
|
||||||
# Experimental: HP Tuning for GitHub Issue Summarization
|
|
||||||
|
|
||||||
This directoy contains experimental code for adding hyperparameter
|
|
||||||
tuning support to the GitHub issue summarization example using Katib.
|
|
||||||
|
|
||||||
## Instructions
|
|
||||||
|
|
||||||
1. Deploy Kubeflow
|
|
||||||
1. [Deploy Katib](https://github.com/kubeflow/kubeflow/blob/master/kubeflow/katib/README.md)
|
|
||||||
1. Create the katib namespace
|
|
||||||
|
|
||||||
```
|
|
||||||
kubectl create namespace katib
|
|
||||||
```
|
|
||||||
|
|
||||||
* This is a known issue [kubeflow/katib#134](https://github.com/kubeflow/katib/issues/134)
|
|
||||||
|
|
||||||
1. Deploy the hyperparameter tuning job
|
|
||||||
|
|
||||||
```
|
|
||||||
cd kubeflow/examples/github_issue_summarization/ks-kubeflow
|
|
||||||
ks apply ${ENVIRONMENT} -c hp-tune
|
|
||||||
```
|
|
||||||
|
|
||||||
## UI
|
|
||||||
|
|
||||||
You can check your Model with Web UI.
|
|
||||||
|
|
||||||
Access to `http://${ENDPOINT}/katib/projects`
|
|
||||||
|
|
||||||
* If you are using GKE and IAP then ENDPOINT is the endpoint you
|
|
||||||
are serving Kubeflow on
|
|
||||||
|
|
||||||
* Otherwise you can port-forward to one of the AMBASSADOR pods
|
|
||||||
and ENDPOINT
|
|
||||||
|
|
||||||
```
|
|
||||||
kubectl port-forward `kubectl get pods --selector=service=ambassador -o jsonpath='{.items[0].metadata.name}'` 8080:80
|
|
||||||
ENDPOINT=localhost:8080
|
|
||||||
```
|
|
||||||
|
|
||||||
The Results will be saved automatically.
|
|
||||||
|
|
||||||
## Description of git-issue-summarize-demo.go
|
|
||||||
You can make hyperparameter and evaluate it by Katib-API.
|
|
||||||
Katib-APIs are grpc. So you can use any language grpc supported(e.g. golang, python, c++).
|
|
||||||
A typical case, you will call APIs in the order as below.
|
|
||||||
In git-issue-summarize-demo.go, it wait for the status of all workers will be Completed.
|
|
||||||
|
|
||||||
### CreateStudy
|
|
||||||
First, you should create Study.
|
|
||||||
The input is StudyConfig.
|
|
||||||
It has Study name, owner, optimization info, and Parameter config(parameter name, min, and max).
|
|
||||||
This function generates a unique ID for your study and stores the config to DB.
|
|
||||||
Input:
|
|
||||||
* StudyConfig:
|
|
||||||
* Name: string
|
|
||||||
* Owner: string
|
|
||||||
* OptimizationType: enum(OptimizationType_MAXIMIZE, OptimizationType_MINIMIZE)
|
|
||||||
* OptimizationGoal: float
|
|
||||||
* DefaultSuggestionAlgorithm: string
|
|
||||||
* DefaultEarlyStoppingAlgorithm: string
|
|
||||||
* ObjectiveValueName: string
|
|
||||||
* Metrics: List of Metrics name
|
|
||||||
* ParameterConfigs: List of parameter config.
|
|
||||||
Return:
|
|
||||||
* StudyID
|
|
||||||
|
|
||||||
### SetSuggestionParameters
|
|
||||||
Hyperparameters are generated by suggestion services with Parameter config of Study.
|
|
||||||
You can set the specific config for each suggestion.
|
|
||||||
Input:
|
|
||||||
* StudyID: ID of your study.
|
|
||||||
* SuggestionAlgorithm: name of suggestion service (e.g. random, grid)
|
|
||||||
* SuggestionParameters: key-value pairs parameter for suggestions. The wanted key is different for each suggestion.
|
|
||||||
Return:
|
|
||||||
* ParameterID
|
|
||||||
|
|
||||||
### GetSuggestions
|
|
||||||
This function will create Trials(set of Parameters).
|
|
||||||
Input:
|
|
||||||
* StudyID: ID of your study.
|
|
||||||
* SuggestionAlgorithm: name of suggestion service (e.g. random, grid)
|
|
||||||
* RequestNumber: the number you want to evaluate.
|
|
||||||
* ParamID: ParameterID you got from SetSuggestionParameters func.
|
|
||||||
Return
|
|
||||||
* List of Trials
|
|
||||||
* TrialID
|
|
||||||
* Parameter Sets
|
|
||||||
|
|
||||||
### RunTrial
|
|
||||||
Start to evaluate Trial.
|
|
||||||
When you use kubernetes runtime, the pods are created the specified config.
|
|
||||||
Input:
|
|
||||||
* StudyId: ID of your study.
|
|
||||||
* TrialId: ID of Trial.
|
|
||||||
* Runtime: worker type(e.g. kubernetes)
|
|
||||||
* WorkerConfig: runtime config
|
|
||||||
* Image: name of docker image
|
|
||||||
* Command: running commands
|
|
||||||
* GPU: number of GPU
|
|
||||||
* Scheduler: scheduler name
|
|
||||||
Return:
|
|
||||||
* List of WorkerID
|
|
||||||
|
|
||||||
### GetMetrics
|
|
||||||
Get metrics of running workers.
|
|
||||||
Input:
|
|
||||||
* StudyId: ID of your study.
|
|
||||||
* WorkerIDs: List of worker ID you want to get metrics from.
|
|
||||||
Return:
|
|
||||||
* List of Metrics
|
|
||||||
|
|
||||||
### SaveModel
|
|
||||||
Save the Model date to KatibDB. After you called this function, you can look model info in the KatibUI.
|
|
||||||
When you call this API multiple time, only Metrics will be updated.
|
|
||||||
Input:
|
|
||||||
* ModelInfo
|
|
||||||
* StudyName
|
|
||||||
* WorkerId
|
|
||||||
* Parameters: List of Parameter
|
|
||||||
* Metrics: List of Metrics
|
|
||||||
* ModelPath: path to model saved. (PVCname:mountpath)
|
|
||||||
* DataSet: informatino of input date
|
|
||||||
* Name
|
|
||||||
* Path: path to input data.(PVCname:mountpath)
|
|
||||||
|
|
||||||
Return:
|
|
||||||
|
|
||||||
### GetWorkers
|
|
||||||
You can get worker list and status of workers.
|
|
||||||
Input:
|
|
||||||
Return:
|
|
||||||
* List of worker information
|
|
||||||
|
|
@ -1,210 +0,0 @@
|
||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"flag"
|
|
||||||
"log"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/kubeflow/katib/pkg/api"
|
|
||||||
"google.golang.org/grpc"
|
|
||||||
)
|
|
||||||
|
|
||||||
var studyConfig = api.StudyConfig{
|
|
||||||
Name: "grid-demo",
|
|
||||||
Owner: "katib",
|
|
||||||
OptimizationType: api.OptimizationType_MAXIMIZE,
|
|
||||||
OptimizationGoal: 0.99,
|
|
||||||
ObjectiveValueName: "Validation-accuracy",
|
|
||||||
Metrics: []string{
|
|
||||||
"accuracy",
|
|
||||||
},
|
|
||||||
ParameterConfigs: &api.StudyConfig_ParameterConfigs{
|
|
||||||
Configs: []*api.ParameterConfig{
|
|
||||||
&api.ParameterConfig{
|
|
||||||
Name: "--learning_rate",
|
|
||||||
ParameterType: api.ParameterType_DOUBLE,
|
|
||||||
Feasible: &api.FeasibleSpace{
|
|
||||||
Min: "0.005",
|
|
||||||
Max: "0.5",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
var gridConfig = []*api.SuggestionParameter{
|
|
||||||
&api.SuggestionParameter{
|
|
||||||
Name: "DefaultGrid",
|
|
||||||
Value: "4",
|
|
||||||
},
|
|
||||||
&api.SuggestionParameter{
|
|
||||||
Name: "--learning_rate",
|
|
||||||
Value: "2",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
var managerAddr = flag.String("katib_endpoint", "127.0.0.1:6789", "Endpoint of manager default 127.0.0.1:6789")
|
|
||||||
var trainerImage = flag.String("trainer_image", "gcr.io/kubeflow-dev/tf-job-issue-summarization:v20180425-e79f888", "The docker image containing the training code")
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
flag.Parse()
|
|
||||||
conn, err := grpc.Dial(*managerAddr, grpc.WithInsecure())
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("could not connect: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
workerConfig := api.WorkerConfig{
|
|
||||||
Image: *trainerImage,
|
|
||||||
Command: []string{
|
|
||||||
"python",
|
|
||||||
"/workdir/train.py",
|
|
||||||
"--sample_size",
|
|
||||||
"20000",
|
|
||||||
// "--input_data_gcs_bucket",
|
|
||||||
// "katib-gi-example",
|
|
||||||
// "--input_data_gcs_path",
|
|
||||||
// "github-issue-summarization-data/github-issues.zip",
|
|
||||||
// "--output_model_gcs_bucket",
|
|
||||||
// "katib-gi-example",
|
|
||||||
},
|
|
||||||
Gpu: 0,
|
|
||||||
Scheduler: "default-scheduler",
|
|
||||||
}
|
|
||||||
|
|
||||||
defer conn.Close()
|
|
||||||
ctx := context.Background()
|
|
||||||
c := api.NewManagerClient(conn)
|
|
||||||
createStudyreq := &api.CreateStudyRequest{
|
|
||||||
StudyConfig: &studyConfig,
|
|
||||||
}
|
|
||||||
createStudyreply, err := c.CreateStudy(ctx, createStudyreq)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("StudyConfig Error %v", err)
|
|
||||||
}
|
|
||||||
studyId := createStudyreply.StudyId
|
|
||||||
log.Printf("Study ID %s", studyId)
|
|
||||||
getStudyreq := &api.GetStudyRequest{
|
|
||||||
StudyId: studyId,
|
|
||||||
}
|
|
||||||
getStudyReply, err := c.GetStudy(ctx, getStudyreq)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("GetConfig Error %v", err)
|
|
||||||
}
|
|
||||||
log.Printf("Study ID %s StudyConf%v", studyId, getStudyReply.StudyConfig)
|
|
||||||
setSuggesitonParameterRequest := &api.SetSuggestionParametersRequest{
|
|
||||||
StudyId: studyId,
|
|
||||||
SuggestionAlgorithm: "grid",
|
|
||||||
SuggestionParameters: gridConfig,
|
|
||||||
}
|
|
||||||
setSuggesitonParameterReply, err := c.SetSuggestionParameters(ctx, setSuggesitonParameterRequest)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("SetConfig Error %v", err)
|
|
||||||
}
|
|
||||||
log.Printf("Grid Prameter ID %s", setSuggesitonParameterReply.ParamId)
|
|
||||||
getGridSuggestRequest := &api.GetSuggestionsRequest{
|
|
||||||
StudyId: studyId,
|
|
||||||
SuggestionAlgorithm: "grid",
|
|
||||||
RequestNumber: 0,
|
|
||||||
//RequestNumber=0 means get all grids.
|
|
||||||
ParamId: setSuggesitonParameterReply.ParamId,
|
|
||||||
}
|
|
||||||
getGridSuggestReply, err := c.GetSuggestions(ctx, getGridSuggestRequest)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("GetSuggestion Error %v", err)
|
|
||||||
}
|
|
||||||
log.Println("Get Grid Suggestions:")
|
|
||||||
for _, t := range getGridSuggestReply.Trials {
|
|
||||||
log.Printf("%v", t)
|
|
||||||
}
|
|
||||||
workerIds := make([]string, len(getGridSuggestReply.Trials))
|
|
||||||
workerParameter := make(map[string][]*api.Parameter)
|
|
||||||
for i, t := range getGridSuggestReply.Trials {
|
|
||||||
ws := workerConfig
|
|
||||||
rtr := &api.RunTrialRequest{
|
|
||||||
StudyId: studyId,
|
|
||||||
TrialId: t.TrialId,
|
|
||||||
Runtime: "kubernetes",
|
|
||||||
WorkerConfig: &ws,
|
|
||||||
}
|
|
||||||
rtr.WorkerConfig.Command = append(rtr.WorkerConfig.Command, "--output_model_gcs_path")
|
|
||||||
rtr.WorkerConfig.Command = append(rtr.WorkerConfig.Command, "github-issue-summarization-data/"+t.TrialId+"output_model.h5")
|
|
||||||
for _, p := range t.ParameterSet {
|
|
||||||
rtr.WorkerConfig.Command = append(rtr.WorkerConfig.Command, p.Name)
|
|
||||||
rtr.WorkerConfig.Command = append(rtr.WorkerConfig.Command, p.Value)
|
|
||||||
}
|
|
||||||
workerReply, err := c.RunTrial(ctx, rtr)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("RunTrial Error %v", err)
|
|
||||||
}
|
|
||||||
workerIds[i] = workerReply.WorkerId
|
|
||||||
workerParameter[workerReply.WorkerId] = t.ParameterSet
|
|
||||||
saveModelRequest := &api.SaveModelRequest{
|
|
||||||
Model: &api.ModelInfo{
|
|
||||||
StudyName: studyConfig.Name,
|
|
||||||
WorkerId: workerReply.WorkerId,
|
|
||||||
Parameters: t.ParameterSet,
|
|
||||||
Metrics: []*api.Metrics{},
|
|
||||||
ModelPath: "pvc:/Path/to/Model",
|
|
||||||
},
|
|
||||||
DataSet: &api.DataSetInfo{
|
|
||||||
Name: "GitHub",
|
|
||||||
Path: "/path/to/data",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
_, err = c.SaveModel(ctx, saveModelRequest)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("SaveModel Error %v", err)
|
|
||||||
}
|
|
||||||
log.Printf("WorkerID %s start\n", workerReply.WorkerId)
|
|
||||||
}
|
|
||||||
for true {
|
|
||||||
time.Sleep(10 * time.Second)
|
|
||||||
getMetricsRequest := &api.GetMetricsRequest{
|
|
||||||
StudyId: studyId,
|
|
||||||
WorkerIds: workerIds,
|
|
||||||
}
|
|
||||||
getMetricsReply, err := c.GetMetrics(ctx, getMetricsRequest)
|
|
||||||
if err != nil {
|
|
||||||
log.Printf("GetMetErr %v", err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
for _, mls := range getMetricsReply.MetricsLogSets {
|
|
||||||
if len(mls.MetricsLogs) > 0 {
|
|
||||||
//Only Metrics can be updated.
|
|
||||||
saveModelRequest := &api.SaveModelRequest{
|
|
||||||
Model: &api.ModelInfo{
|
|
||||||
StudyName: studyConfig.Name,
|
|
||||||
WorkerId: mls.WorkerId,
|
|
||||||
Metrics: []*api.Metrics{},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
for _, ml := range mls.MetricsLogs {
|
|
||||||
if len(ml.Values) > 0 {
|
|
||||||
log.Printf("WorkerID %s :\t Metrics Name %s Value %v", mls.WorkerId, ml.Name, ml.Values[len(ml.Values)-1])
|
|
||||||
saveModelRequest.Model.Metrics = append(saveModelRequest.Model.Metrics, &api.Metrics{Name: ml.Name, Value: ml.Values[len(ml.Values)-1]})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_, err = c.SaveModel(ctx, saveModelRequest)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("SaveModel Error %v", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
getWorkerRequest := &api.GetWorkersRequest{StudyId: studyId}
|
|
||||||
getWorkerReply, err := c.GetWorkers(ctx, getWorkerRequest)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("GetWorker Error %v", err)
|
|
||||||
}
|
|
||||||
completeCount := 0
|
|
||||||
for _, w := range getWorkerReply.Workers {
|
|
||||||
if w.Status == api.State_COMPLETED {
|
|
||||||
completeCount++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if completeCount == len(getWorkerReply.Workers) {
|
|
||||||
log.Printf("All Worker Completed!")
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Loading…
Reference in New Issue