Continuously build the docker images used by mnist. (#462)

* This is the first step in adding E2E tests for the mnist example.

* Add a Makefile and .jsonnet file to build the Docker images using GCB

* Define an Argo workflow to trigger the image builds on pre & post submit.

Related to: #460
This commit is contained in:
Jeremy Lewi 2019-01-08 15:21:49 -08:00 committed by Kubernetes Prow Robot
parent 1cc4550b7d
commit d28ba7c4db
6 changed files with 498 additions and 0 deletions

1
mnist/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
build/**

63
mnist/Makefile Executable file
View File

@ -0,0 +1,63 @@
# Copyright 2017 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requirements:
# https://github.com/mattrobenolt/jinja2-cli
# pip install jinja2-clie
#
# To override variables do
# make ${TARGET} ${VAR}=${VALUE}
#
# IMG is the base path for images..
# Individual images will be
# $(IMG)/$(NAME):$(TAG)
IMG ?= gcr.io/kubeflow-examples/mnist
# List any changed files. We only include files in the notebooks directory.
# because that is the code in the docker image.
# In particular we exclude changes to the ksonnet configs.
CHANGED_FILES := $(shell git diff-files --relative=mnist/)
# Whether to use cached images with GCB
USE_IMAGE_CACHE ?= true
ifeq ($(strip $(CHANGED_FILES)),)
# Changed files is empty; not dirty
# Don't include --dirty because it could be dirty if files outside the ones we care
# about changed.
GIT_VERSION := $(shell git describe --always)
else
GIT_VERSION := $(shell git describe --always)-dirty-$(shell git diff | shasum -a256 | cut -c -6)
endif
TAG := $(shell date +v%Y%m%d)-$(GIT_VERSION)
all: build
# Build the GCB workflow
build-gcb-spec:
rm -rf ./build
mkdir -p build
jsonnet ./image_build.jsonnet --ext-str imageBase=$(IMG) \
--ext-str gitVersion=$(GIT_VERSION) --ext-str tag=$(TAG) \
--ext-str useImageCache=$(USE_IMAGE_CACHE) \
> ./build/image_build.json
# Build using GCB. This is useful if we are on a slow internet connection
# and don't want to pull images locally.
# Its also used to build from our CI system.
build-gcb: build-gcb-spec
gcloud builds submit --machine-type=n1-highcpu-32 --project=kubeflow-ci \
--config=./build/image_build.json \
--timeout=3600 .

93
mnist/image_build.jsonnet Normal file
View File

@ -0,0 +1,93 @@
// TODO(jlewi): We should tag the image latest and then
// use latest as a cache so that rebuilds are fast
// https://cloud.google.com/cloud-build/docs/speeding-up-builds#using_a_cached_docker_image
{
// Convert non-boolean types like string,number to a boolean.
// This is primarily intended for dealing with parameters that should be booleans.
local toBool = function(x) {
result::
if std.type(x) == "boolean" then
x
else if std.type(x) == "string" then
std.asciiUpper(x) == "TRUE"
else if std.type(x) == "number" then
x != 0
else
false,
}.result,
local useImageCache = toBool(std.extVar("useImageCache")),
// A tempalte for defining the steps for building each image.
//
// TODO(jlewi): This logic is reused across a lot of examples; can we put in a shared
// location and just import it?
local subGraphTemplate = {
// following variables must be set
name: null,
dockerFile: null,
buildArg: null,
contextDir: ".",
local template = self,
local pullStep = if useImageCache then [
{
id: "pull-" + template.name,
name: "gcr.io/cloud-builders/docker",
args: ["pull", std.extVar("imageBase") + "/" + template.name + ":latest"],
waitFor: ["-"],
},
] else [],
local image = std.extVar("imageBase") + "/" + template.name + ":" + std.extVar("tag"),
local imageLatest = std.extVar("imageBase") + "/" + template.name + ":latest",
images: [image, imageLatest],
steps: pullStep +
[
{
local buildArgList = if template.buildArg != null then ["--build-arg", template.buildArg] else [],
local cacheList = if useImageCache then ["--cache-from=" + imageLatest] else [],
id: "build-" + template.name,
name: "gcr.io/cloud-builders/docker",
args: [
"build",
"-t",
image,
"--label=git-versions=" + std.extVar("gitVersion"),
]
+ buildArgList
+ [
"--file=" + template.dockerFile,
]
+ cacheList + [template.contextDir],
waitFor: if useImageCache then ["pull-" + template.name] else ["-"],
},
{
id: "tag-" + template.name,
name: "gcr.io/cloud-builders/docker",
args: ["tag", image, imageLatest],
waitFor: ["build-" + template.name],
},
],
},
local modelSteps = subGraphTemplate {
name: "model",
dockerFile: "./Dockerfile.model",
contextDir: "."
},
local ksonnetSteps = subGraphTemplate {
name: "ksonnet",
dockerFile: "./Dockerfile.ksonnet",
contextDir: "."
},
steps: modelSteps.steps + ksonnetSteps.steps,
images: modelSteps.images + ksonnetSteps.images,
}

View File

@ -17,6 +17,16 @@ workflows:
include_dirs:
- code_search/*
# E2E test for mnist example
- app_dir: kubeflow/examples/test/workflows
component: mnist
name: mnist
job_types:
- presubmit
- postsubmit
include_dirs:
- mnist/*
# E2E test for github issue summarization example
- app_dir: kubeflow/examples/test/workflows
component: gis

View File

@ -0,0 +1,325 @@
// Test workflow for GitHub Issue Summarization.
//
local env = std.extVar("__ksonnet/environments");
local overrides = std.extVar("__ksonnet/params").components.mnist;
local k = import "k.libsonnet";
local util = import "util.libsonnet";
// Define default params and then combine them with any overrides
local defaultParams = {
// local nfsVolumeClaim: "kubeflow-testing",
nfsVolumeClaim: "nfs-external",
// The name to use for the volume to use to contain test data.
dataVolume: "kubeflow-test-volume",
// Default step image:
stepImage: "gcr.io/kubeflow-ci/test-worker:v20181017-bfeaaf5-dirty-4adcd0",
};
local params = defaultParams + overrides;
local prowEnv = util.parseEnv(params.prow_env);
// Create a dictionary of the different prow variables so we can refer to them in the workflow.
//
// Important: We want to initialize all variables we reference to some value. If we don't
// and we reference a variable which doesn't get set then we get very hard to debug failure messages.
// In particular, we've seen problems where if we add a new environment and evaluate one component eg. "workflows"
// and another component e.g "code_search.jsonnet" doesn't have a default value for BUILD_ID then ksonnet
// fails because BUILD_ID is undefined.
local prowDict = {
BUILD_ID: "notset",
BUILD_NUMBER: "notset",
REPO_OWNER: "notset",
REPO_NAME: "notset",
JOB_NAME: "notset",
JOB_TYPE: "notset",
PULL_NUMBER: "notset",
} + util.listOfDictToMap(prowEnv);
local bucket = params.bucket;
// mountPath is the directory where the volume to store the test data
// should be mounted.
local mountPath = "/mnt/" + "test-data-volume";
// testDir is the root directory for all data for a particular test run.
local testDir = mountPath + "/" + params.name;
// outputDir is the directory to sync to GCS to contain the output for this job.
local outputDir = testDir + "/output";
local artifactsDir = outputDir + "/artifacts";
// Source directory where all repos should be checked out
local srcRootDir = testDir + "/src";
// The directory containing the kubeflow/kubeflow repo
local srcDir = srcRootDir + "/" + prowDict.REPO_OWNER + "/" + prowDict.REPO_NAME;
// These variables control where the docker images get pushed and what
// tag to use
local imageBase = "gcr.io/kubeflow-ci/github-issue-summarization";
local imageTag = "build-" + prowDict["BUILD_ID"];
// Build template is a template for constructing Argo step templates.
//
// step_name: Name for the template
// command: List to pass as the container command.
//
// We customize the defaults for each step in the workflow by modifying
// buildTemplate.argoTemplate
local buildTemplate = {
// name & command variables should be overwritten for every test.
// Other variables can be changed per step as needed.
// They are hidden because they shouldn't be included in the Argo template
name: "",
command:: "",
image: params.stepImage,
workingDir:: null,
env_vars:: [],
side_cars: [],
activeDeadlineSeconds: 1800, // Set 30 minute timeout for each template
local template = self,
// The directory within the kubeflow_testing submodule containing
// py scripts to use.
local kubeflowTestingPy = srcRootDir + "/kubeflow/testing/py",
// Actual template for Argo
argoTemplate: {
name: template.name,
container: {
command: template.command,
name: template.name,
image: template.image,
workingDir: template.workingDir,
env: [
{
// Add the source directories to the python path.
name: "PYTHONPATH",
value: kubeflowTestingPy,
},
{
name: "GOOGLE_APPLICATION_CREDENTIALS",
value: "/secret/gcp-credentials/key.json",
},
{
name: "GITHUB_TOKEN",
valueFrom: {
secretKeyRef: {
name: "github-token",
key: "github_token",
},
},
},
] + prowEnv + template.env_vars,
volumeMounts: [
{
name: params.dataVolume,
mountPath: mountPath,
},
{
name: "github-token",
mountPath: "/secret/github-token",
},
{
name: "gcp-credentials",
mountPath: "/secret/gcp-credentials",
},
],
},
},
}; // buildTemplate
// Create a list of dictionary.
// Each item is a dictionary describing one step in the graph.
local dagTemplates = [
{
template: buildTemplate {
name: "checkout",
command:
["/usr/local/bin/checkout.sh", srcRootDir],
env_vars: [{
name: "EXTRA_REPOS",
value: "kubeflow/testing@HEAD",
}],
},
dependencies: null,
}, // checkout
{
// TODO(https://github.com/kubeflow/testing/issues/257): Create-pr-symlink
// should be done by run_e2e_workflow.py
template: buildTemplate {
name: "create-pr-symlink",
command: [
"python",
"-m",
"kubeflow.testing.prow_artifacts",
"--artifacts_dir=" + outputDir,
"create_pr_symlink",
"--bucket=" + params.bucket,
],
}, // create-pr-symlink
dependencies: ["checkout"],
}, // create-pr-symlink
{
// Submit a GCB job to build the images
template: buildTemplate {
name: "build-images",
command: util.buildCommand([
[
"gcloud",
"auth",
"activate-service-account",
"--key-file=${GOOGLE_APPLICATION_CREDENTIALS}",
],
[
"make",
"build-gcb",
"IMG=" + imageBase,
"TAG=" + imageTag,
]]
),
workingDir: srcDir + "/github_issue_summarization",
},
dependencies: ["checkout"],
}, // build-images
{
// Run the python test to train the model
template: buildTemplate {
name: "train-test",
command: [
"python",
"train_test.py",
],
// Use the newly built image.
image: imageBase + "/trainer-estimator:" + imageTag,
workingDir: "/issues",
},
dependencies: ["build-images"],
}, // train-test
];
// Dag defines the tasks in the graph
local dag = {
name: "e2e",
// Construct tasks from the templates
// we will give the steps the same name as the template
dag: {
tasks: util.toArgoTaskList(dagTemplates),
},
}; // dag
// Define templates for the steps to be performed when the
// test exits
local exitTemplates =
[
{
// Copy artifacts to GCS for gubernator.
// TODO(https://github.com/kubeflow/testing/issues/257): Create-pr-symlink
// should be done by run_e2e_workflow.py
template: buildTemplate {
name: "copy-artifacts",
command: [
"python",
"-m",
"kubeflow.testing.prow_artifacts",
"--artifacts_dir=" + outputDir,
"copy_artifacts",
"--bucket=" + bucket,
],
}, // copy-artifacts,
},
{
// Delete the test directory in NFS.
// TODO(https://github.com/kubeflow/testing/issues/256): Use an external process to do this.
template:
buildTemplate {
name: "test-dir-delete",
command: [
"rm",
"-rf",
testDir,
],
argoTemplate+: {
retryStrategy: {
limit: 3,
},
},
}, // test-dir-delete
dependencies: ["copy-artifacts"],
},
];
// Create a DAG representing the set of steps to execute on exit
local exitDag = {
name: "exit-handler",
// Construct tasks from the templates
// we will give the steps the same name as the template
dag: {
tasks: util.toArgoTaskList(exitTemplates),
},
};
// A list of templates for the actual steps
local stepTemplates = std.map(function(i) i.template.argoTemplate
, dagTemplates) +
std.map(function(i) i.template.argoTemplate
, exitTemplates);
// Define the Argo Workflow.
local workflow = {
apiVersion: "argoproj.io/v1alpha1",
kind: "Workflow",
metadata: {
name: params.name,
namespace: env.namespace,
labels: {
org: prowDict.REPO_OWNER,
repo: prowDict.REPO_NAME,
workflow: "gis",
[if std.objectHas(prowDict, "PULL_NUMBER") then "pr"]: prowDict.PULL_NUMBER,
},
},
spec: {
entrypoint: "e2e",
volumes: [
{
name: "github-token",
secret: {
secretName: "github-token",
},
},
{
name: "gcp-credentials",
secret: {
secretName: "kubeflow-testing-credentials",
},
},
{
name: params.dataVolume,
persistentVolumeClaim: {
claimName: params.nfsVolumeClaim,
},
},
], // volumes
// onExit specifies the template that should always run when the workflow completes.
onExit: "exit-handler",
// The templates will be a combination of the templates
// defining the dags executed by Argo as well as the templates
// for the individual steps.
templates: [dag, exitDag] + stepTemplates, // templates
}, // spec
}; // workflow
std.prune(k.core.v1.list.new([workflow]))

View File

@ -18,6 +18,12 @@
namespace: "kubeflow-test-infra",
prow_env: "BUILD_NUMBER=997a,BUILD_ID=997a,JOB_NAME=kubeflow-examples-presubmit-test,JOB_TYPE=presubmit,PULL_NUMBER=374,REPO_NAME=examples,REPO_OWNER=kubeflow",
},
mnist: {
bucket: "kubeflow-ci_temp",
name: "kubeflow-mnist",
namespace: "kubeflow-test-infra",
prow_env: "BUILD_NUMBER=997a,BUILD_ID=997a,JOB_NAME=kubeflow-examples-presubmit-test,JOB_TYPE=presubmit,PULL_NUMBER=374,REPO_NAME=examples,REPO_OWNER=kubeflow",
},
workflows: {
bucket: "kubeflow-ci_temp",
name: "kubeflow-examples-presubmit-test-374-6e32",