mirror of https://github.com/kubeflow/examples.git
Fix model file upload (#160)
* Add component parameters Add model_url & port arguments to flask app Add service_type, image, and model_url parameters to ui component Fix problem argument in tensor2tensor component * Fix broken UI component Fix broken UI component structure by adding all, service, & deployment parts Add parameter defaults for tfjob to resolve failures deploying other components * Add missing imports in flask app Fix syntax error in argument parsing Remove underscores from parameter names to workaround ksonnet bug #554: https://github.com/ksonnet/ksonnet/issues/554 * Fix syntax errors in t2t instructions Add CPU image build arg to docker build command for t2t-training Fix link to ksonnet app dir Correct param names for tensor2tensor component Add missing params for tensor2tensor component Fix apply command syntax Swap out log view pod for t2t-master instead of tf-operator Fix link to training with tfjob * Fix model file upload Update default params for tfjob-v1alpha2 Fix build directory path in Makefile * Resolve lint issues Lines too long * Add specific image tag to tfjob-v1alpha2 default * Fix defaults for training output files Update image tag Add UI image tag * Revert service account secret details Update associated readme
This commit is contained in:
parent
98ed4b4a69
commit
836ad70421
|
@ -53,7 +53,7 @@ and the resulting model.
|
|||
|
||||
* Give the storage account `roles/storage.admin` role so that it can access GCS Buckets.
|
||||
|
||||
* Download its key as a json file and create a secret named `gcp-credentials` with the key `key.json`
|
||||
* Download its key as a json file and create a secret named `user-gcp-sa` with the key `user-gcp-sa.json`
|
||||
|
||||
```commandline
|
||||
SERVICE_ACCOUNT=github-issue-summarization
|
||||
|
@ -68,7 +68,7 @@ KEY_FILE=/home/agwl/secrets/${SERVICE_ACCOUNT}@${PROJECT}.iam.gserviceaccount.co
|
|||
gcloud iam service-accounts keys create ${KEY_FILE} \
|
||||
--iam-account ${SERVICE_ACCOUNT}@${PROJECT}.iam.gserviceaccount.com
|
||||
|
||||
kubectl --namespace=${NAMESPACE} create secret generic gcp-credentials --from-file=key.json="${KEY_FILE}"
|
||||
kubectl --namespace=${NAMESPACE} create secret generic user-gcp-sa --from-file=user-gcp-sa.json="${KEY_FILE}"
|
||||
```
|
||||
|
||||
|
||||
|
|
|
@ -53,14 +53,15 @@
|
|||
ui: {
|
||||
namespace: "null",
|
||||
githubToken: "",
|
||||
image: "gcr.io/kubeflow-examples/issue-summarization-ui:v20180629-v0.1-2-g98ed4b4-dirty-182929",
|
||||
},
|
||||
"tfjob-v1alpha2": {
|
||||
name: "tfjob-v1alpha2",
|
||||
image: "gcr.io/kubeflow-dev/tf-job-issue-summarization:v20180425-e79f888",
|
||||
name: "tfjob-issue-summarization",
|
||||
image: "gcr.io/kubeflow-examples/tf-job-issue-summarization:v20180629-v0.1-2-g98ed4b4-dirty-182929",
|
||||
input_data_gcs_bucket: "kubeflow-examples",
|
||||
input_data_gcs_path: "github-issue-summarization-data/github-issues.zip",
|
||||
output_model_gcs_bucket: "kubeflow-examples",
|
||||
output_model_gcs_path: "github-issue-summarization-data/output_model.h5",
|
||||
output_model_gcs_path: "github-issue-summarization-data",
|
||||
sample_size: "100000",
|
||||
gcpSecretName: "user-gcp-sa",
|
||||
gcpSecretFile: "user-gcp-sa.json",
|
||||
|
|
|
@ -35,7 +35,7 @@ DIR := $(shell pwd)
|
|||
|
||||
# Use a subdirectory of the root directory
|
||||
# this way it will be excluded by git diff-files
|
||||
BUILD_DIR := $(shell cd ../build/notebook_build && pwd)
|
||||
BUILD_DIR := $(pwd)
|
||||
|
||||
MODEL_GCS := gs://kubeflow-examples-data/gh_issue_summarization/model/v20180426
|
||||
# You can override this on the command line as
|
||||
|
|
|
@ -72,28 +72,26 @@ def main(): # pylint: disable=too-many-statements
|
|||
default="",
|
||||
help="The output location for the model GCS or local file path.")
|
||||
|
||||
# TODO(jlewi): We should get rid of the following arguments and just use
|
||||
# --output_model_h5. If the output is a gs:// location we should use
|
||||
# a local file and then upload it to GCS.
|
||||
parser.add_argument("--output_model_gcs_bucket", type=str, default="")
|
||||
parser.add_argument(
|
||||
"--output_model_gcs_path",
|
||||
type=str,
|
||||
default="github-issue-summarization-data/output_model.h5")
|
||||
default="github-issue-summarization-data")
|
||||
|
||||
parser.add_argument(
|
||||
"--output_body_preprocessor_dpkl",
|
||||
type=str,
|
||||
default="body_preprocessor.dpkl")
|
||||
default="body_pp.dpkl")
|
||||
parser.add_argument(
|
||||
"--output_title_preprocessor_dpkl",
|
||||
type=str,
|
||||
default="title_preprocessor.dpkl")
|
||||
default="title_pp.dpkl")
|
||||
parser.add_argument(
|
||||
"--output_train_title_vecs_npy", type=str, default="train_title_vecs.npy")
|
||||
parser.add_argument(
|
||||
"--output_train_body_vecs_npy", type=str, default="train_body_vecs.npy")
|
||||
parser.add_argument("--output_model_h5", type=str, default="output_model.h5")
|
||||
parser.add_argument(
|
||||
"--output_model_h5", type=str, default="seq2seq_model_tutorial.h5")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
@ -273,11 +271,16 @@ def main(): # pylint: disable=too-many-statements
|
|||
args.output_model)
|
||||
|
||||
if output_model_gcs_bucket:
|
||||
logging.info("Uploading model to bucket %s path %s.",
|
||||
logging.info("Uploading model files to bucket %s path %s.",
|
||||
output_model_gcs_bucket, output_model_gcs_path)
|
||||
bucket = storage.Bucket(storage.Client(), output_model_gcs_bucket)
|
||||
storage.Blob(output_model_gcs_path, bucket).upload_from_filename(
|
||||
storage.Blob(
|
||||
output_model_gcs_path + "/" + args.output_model_h5, bucket).upload_from_filename(
|
||||
args.output_model_h5)
|
||||
storage.Blob(output_model_gcs_path + "/" + args.output_body_preprocessor_dpkl,
|
||||
bucket).upload_from_filename(args.output_body_preprocessor_dpkl)
|
||||
storage.Blob(output_model_gcs_path + "/" + args.output_title_preprocessor_dpkl,
|
||||
bucket).upload_from_filename(args.output_title_preprocessor_dpkl)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
Loading…
Reference in New Issue