mirror of https://github.com/kubeflow/examples.git
Continuously build the docker images used by mnist. (#462)
* This is the first step in adding E2E tests for the mnist example. * Add a Makefile and .jsonnet file to build the Docker images using GCB * Define an Argo workflow to trigger the image builds on pre & post submit. Related to: #460
This commit is contained in:
parent
1cc4550b7d
commit
d28ba7c4db
|
@ -0,0 +1 @@
|
|||
build/**
|
|
@ -0,0 +1,63 @@
|
|||
# Copyright 2017 The Kubernetes Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
# Requirements:
|
||||
# https://github.com/mattrobenolt/jinja2-cli
|
||||
# pip install jinja2-clie
|
||||
#
|
||||
# To override variables do
|
||||
# make ${TARGET} ${VAR}=${VALUE}
|
||||
#
|
||||
|
||||
# IMG is the base path for images..
|
||||
# Individual images will be
|
||||
# $(IMG)/$(NAME):$(TAG)
|
||||
IMG ?= gcr.io/kubeflow-examples/mnist
|
||||
|
||||
# List any changed files. We only include files in the notebooks directory.
|
||||
# because that is the code in the docker image.
|
||||
# In particular we exclude changes to the ksonnet configs.
|
||||
CHANGED_FILES := $(shell git diff-files --relative=mnist/)
|
||||
|
||||
# Whether to use cached images with GCB
|
||||
USE_IMAGE_CACHE ?= true
|
||||
|
||||
ifeq ($(strip $(CHANGED_FILES)),)
|
||||
# Changed files is empty; not dirty
|
||||
# Don't include --dirty because it could be dirty if files outside the ones we care
|
||||
# about changed.
|
||||
GIT_VERSION := $(shell git describe --always)
|
||||
else
|
||||
GIT_VERSION := $(shell git describe --always)-dirty-$(shell git diff | shasum -a256 | cut -c -6)
|
||||
endif
|
||||
|
||||
TAG := $(shell date +v%Y%m%d)-$(GIT_VERSION)
|
||||
all: build
|
||||
|
||||
# Build the GCB workflow
|
||||
build-gcb-spec:
|
||||
rm -rf ./build
|
||||
mkdir -p build
|
||||
jsonnet ./image_build.jsonnet --ext-str imageBase=$(IMG) \
|
||||
--ext-str gitVersion=$(GIT_VERSION) --ext-str tag=$(TAG) \
|
||||
--ext-str useImageCache=$(USE_IMAGE_CACHE) \
|
||||
> ./build/image_build.json
|
||||
|
||||
# Build using GCB. This is useful if we are on a slow internet connection
|
||||
# and don't want to pull images locally.
|
||||
# Its also used to build from our CI system.
|
||||
build-gcb: build-gcb-spec
|
||||
gcloud builds submit --machine-type=n1-highcpu-32 --project=kubeflow-ci \
|
||||
--config=./build/image_build.json \
|
||||
--timeout=3600 .
|
|
@ -0,0 +1,93 @@
|
|||
// TODO(jlewi): We should tag the image latest and then
|
||||
// use latest as a cache so that rebuilds are fast
|
||||
// https://cloud.google.com/cloud-build/docs/speeding-up-builds#using_a_cached_docker_image
|
||||
{
|
||||
|
||||
// Convert non-boolean types like string,number to a boolean.
|
||||
// This is primarily intended for dealing with parameters that should be booleans.
|
||||
local toBool = function(x) {
|
||||
result::
|
||||
if std.type(x) == "boolean" then
|
||||
x
|
||||
else if std.type(x) == "string" then
|
||||
std.asciiUpper(x) == "TRUE"
|
||||
else if std.type(x) == "number" then
|
||||
x != 0
|
||||
else
|
||||
false,
|
||||
}.result,
|
||||
|
||||
local useImageCache = toBool(std.extVar("useImageCache")),
|
||||
|
||||
// A tempalte for defining the steps for building each image.
|
||||
//
|
||||
// TODO(jlewi): This logic is reused across a lot of examples; can we put in a shared
|
||||
// location and just import it?
|
||||
local subGraphTemplate = {
|
||||
// following variables must be set
|
||||
name: null,
|
||||
|
||||
dockerFile: null,
|
||||
buildArg: null,
|
||||
contextDir: ".",
|
||||
|
||||
local template = self,
|
||||
|
||||
local pullStep = if useImageCache then [
|
||||
{
|
||||
id: "pull-" + template.name,
|
||||
name: "gcr.io/cloud-builders/docker",
|
||||
args: ["pull", std.extVar("imageBase") + "/" + template.name + ":latest"],
|
||||
waitFor: ["-"],
|
||||
},
|
||||
] else [],
|
||||
|
||||
local image = std.extVar("imageBase") + "/" + template.name + ":" + std.extVar("tag"),
|
||||
local imageLatest = std.extVar("imageBase") + "/" + template.name + ":latest",
|
||||
|
||||
images: [image, imageLatest],
|
||||
steps: pullStep +
|
||||
[
|
||||
{
|
||||
local buildArgList = if template.buildArg != null then ["--build-arg", template.buildArg] else [],
|
||||
local cacheList = if useImageCache then ["--cache-from=" + imageLatest] else [],
|
||||
|
||||
id: "build-" + template.name,
|
||||
name: "gcr.io/cloud-builders/docker",
|
||||
args: [
|
||||
"build",
|
||||
"-t",
|
||||
image,
|
||||
"--label=git-versions=" + std.extVar("gitVersion"),
|
||||
]
|
||||
+ buildArgList
|
||||
+ [
|
||||
"--file=" + template.dockerFile,
|
||||
]
|
||||
+ cacheList + [template.contextDir],
|
||||
waitFor: if useImageCache then ["pull-" + template.name] else ["-"],
|
||||
},
|
||||
{
|
||||
id: "tag-" + template.name,
|
||||
name: "gcr.io/cloud-builders/docker",
|
||||
args: ["tag", image, imageLatest],
|
||||
waitFor: ["build-" + template.name],
|
||||
},
|
||||
],
|
||||
},
|
||||
|
||||
local modelSteps = subGraphTemplate {
|
||||
name: "model",
|
||||
dockerFile: "./Dockerfile.model",
|
||||
contextDir: "."
|
||||
},
|
||||
|
||||
local ksonnetSteps = subGraphTemplate {
|
||||
name: "ksonnet",
|
||||
dockerFile: "./Dockerfile.ksonnet",
|
||||
contextDir: "."
|
||||
},
|
||||
|
||||
steps: modelSteps.steps + ksonnetSteps.steps,
|
||||
images: modelSteps.images + ksonnetSteps.images,
|
||||
}
|
|
@ -17,6 +17,16 @@ workflows:
|
|||
include_dirs:
|
||||
- code_search/*
|
||||
|
||||
# E2E test for mnist example
|
||||
- app_dir: kubeflow/examples/test/workflows
|
||||
component: mnist
|
||||
name: mnist
|
||||
job_types:
|
||||
- presubmit
|
||||
- postsubmit
|
||||
include_dirs:
|
||||
- mnist/*
|
||||
|
||||
# E2E test for github issue summarization example
|
||||
- app_dir: kubeflow/examples/test/workflows
|
||||
component: gis
|
||||
|
|
|
@ -0,0 +1,325 @@
|
|||
// Test workflow for GitHub Issue Summarization.
|
||||
//
|
||||
local env = std.extVar("__ksonnet/environments");
|
||||
local overrides = std.extVar("__ksonnet/params").components.mnist;
|
||||
|
||||
local k = import "k.libsonnet";
|
||||
local util = import "util.libsonnet";
|
||||
|
||||
// Define default params and then combine them with any overrides
|
||||
local defaultParams = {
|
||||
// local nfsVolumeClaim: "kubeflow-testing",
|
||||
nfsVolumeClaim: "nfs-external",
|
||||
|
||||
// The name to use for the volume to use to contain test data.
|
||||
dataVolume: "kubeflow-test-volume",
|
||||
|
||||
// Default step image:
|
||||
stepImage: "gcr.io/kubeflow-ci/test-worker:v20181017-bfeaaf5-dirty-4adcd0",
|
||||
};
|
||||
|
||||
local params = defaultParams + overrides;
|
||||
|
||||
local prowEnv = util.parseEnv(params.prow_env);
|
||||
|
||||
// Create a dictionary of the different prow variables so we can refer to them in the workflow.
|
||||
//
|
||||
// Important: We want to initialize all variables we reference to some value. If we don't
|
||||
// and we reference a variable which doesn't get set then we get very hard to debug failure messages.
|
||||
// In particular, we've seen problems where if we add a new environment and evaluate one component eg. "workflows"
|
||||
// and another component e.g "code_search.jsonnet" doesn't have a default value for BUILD_ID then ksonnet
|
||||
// fails because BUILD_ID is undefined.
|
||||
local prowDict = {
|
||||
BUILD_ID: "notset",
|
||||
BUILD_NUMBER: "notset",
|
||||
REPO_OWNER: "notset",
|
||||
REPO_NAME: "notset",
|
||||
JOB_NAME: "notset",
|
||||
JOB_TYPE: "notset",
|
||||
PULL_NUMBER: "notset",
|
||||
} + util.listOfDictToMap(prowEnv);
|
||||
|
||||
local bucket = params.bucket;
|
||||
|
||||
// mountPath is the directory where the volume to store the test data
|
||||
// should be mounted.
|
||||
local mountPath = "/mnt/" + "test-data-volume";
|
||||
// testDir is the root directory for all data for a particular test run.
|
||||
local testDir = mountPath + "/" + params.name;
|
||||
// outputDir is the directory to sync to GCS to contain the output for this job.
|
||||
local outputDir = testDir + "/output";
|
||||
local artifactsDir = outputDir + "/artifacts";
|
||||
|
||||
// Source directory where all repos should be checked out
|
||||
local srcRootDir = testDir + "/src";
|
||||
|
||||
// The directory containing the kubeflow/kubeflow repo
|
||||
local srcDir = srcRootDir + "/" + prowDict.REPO_OWNER + "/" + prowDict.REPO_NAME;
|
||||
|
||||
|
||||
// These variables control where the docker images get pushed and what
|
||||
// tag to use
|
||||
local imageBase = "gcr.io/kubeflow-ci/github-issue-summarization";
|
||||
local imageTag = "build-" + prowDict["BUILD_ID"];
|
||||
|
||||
// Build template is a template for constructing Argo step templates.
|
||||
//
|
||||
// step_name: Name for the template
|
||||
// command: List to pass as the container command.
|
||||
//
|
||||
// We customize the defaults for each step in the workflow by modifying
|
||||
// buildTemplate.argoTemplate
|
||||
local buildTemplate = {
|
||||
// name & command variables should be overwritten for every test.
|
||||
// Other variables can be changed per step as needed.
|
||||
// They are hidden because they shouldn't be included in the Argo template
|
||||
name: "",
|
||||
command:: "",
|
||||
image: params.stepImage,
|
||||
workingDir:: null,
|
||||
env_vars:: [],
|
||||
side_cars: [],
|
||||
|
||||
|
||||
activeDeadlineSeconds: 1800, // Set 30 minute timeout for each template
|
||||
|
||||
local template = self,
|
||||
|
||||
// The directory within the kubeflow_testing submodule containing
|
||||
// py scripts to use.
|
||||
local kubeflowTestingPy = srcRootDir + "/kubeflow/testing/py",
|
||||
|
||||
// Actual template for Argo
|
||||
argoTemplate: {
|
||||
name: template.name,
|
||||
container: {
|
||||
command: template.command,
|
||||
name: template.name,
|
||||
image: template.image,
|
||||
workingDir: template.workingDir,
|
||||
env: [
|
||||
{
|
||||
// Add the source directories to the python path.
|
||||
name: "PYTHONPATH",
|
||||
value: kubeflowTestingPy,
|
||||
},
|
||||
{
|
||||
name: "GOOGLE_APPLICATION_CREDENTIALS",
|
||||
value: "/secret/gcp-credentials/key.json",
|
||||
},
|
||||
{
|
||||
name: "GITHUB_TOKEN",
|
||||
valueFrom: {
|
||||
secretKeyRef: {
|
||||
name: "github-token",
|
||||
key: "github_token",
|
||||
},
|
||||
},
|
||||
},
|
||||
] + prowEnv + template.env_vars,
|
||||
volumeMounts: [
|
||||
{
|
||||
name: params.dataVolume,
|
||||
mountPath: mountPath,
|
||||
},
|
||||
{
|
||||
name: "github-token",
|
||||
mountPath: "/secret/github-token",
|
||||
},
|
||||
{
|
||||
name: "gcp-credentials",
|
||||
mountPath: "/secret/gcp-credentials",
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
}; // buildTemplate
|
||||
|
||||
|
||||
// Create a list of dictionary.
|
||||
// Each item is a dictionary describing one step in the graph.
|
||||
local dagTemplates = [
|
||||
{
|
||||
template: buildTemplate {
|
||||
name: "checkout",
|
||||
command:
|
||||
["/usr/local/bin/checkout.sh", srcRootDir],
|
||||
|
||||
env_vars: [{
|
||||
name: "EXTRA_REPOS",
|
||||
value: "kubeflow/testing@HEAD",
|
||||
}],
|
||||
},
|
||||
dependencies: null,
|
||||
}, // checkout
|
||||
{
|
||||
// TODO(https://github.com/kubeflow/testing/issues/257): Create-pr-symlink
|
||||
// should be done by run_e2e_workflow.py
|
||||
template: buildTemplate {
|
||||
name: "create-pr-symlink",
|
||||
command: [
|
||||
"python",
|
||||
"-m",
|
||||
"kubeflow.testing.prow_artifacts",
|
||||
"--artifacts_dir=" + outputDir,
|
||||
"create_pr_symlink",
|
||||
"--bucket=" + params.bucket,
|
||||
],
|
||||
}, // create-pr-symlink
|
||||
dependencies: ["checkout"],
|
||||
}, // create-pr-symlink
|
||||
{
|
||||
// Submit a GCB job to build the images
|
||||
template: buildTemplate {
|
||||
name: "build-images",
|
||||
command: util.buildCommand([
|
||||
[
|
||||
"gcloud",
|
||||
"auth",
|
||||
"activate-service-account",
|
||||
"--key-file=${GOOGLE_APPLICATION_CREDENTIALS}",
|
||||
],
|
||||
[
|
||||
"make",
|
||||
"build-gcb",
|
||||
"IMG=" + imageBase,
|
||||
"TAG=" + imageTag,
|
||||
]]
|
||||
),
|
||||
workingDir: srcDir + "/github_issue_summarization",
|
||||
},
|
||||
dependencies: ["checkout"],
|
||||
}, // build-images
|
||||
{
|
||||
// Run the python test to train the model
|
||||
template: buildTemplate {
|
||||
name: "train-test",
|
||||
command: [
|
||||
"python",
|
||||
"train_test.py",
|
||||
],
|
||||
// Use the newly built image.
|
||||
image: imageBase + "/trainer-estimator:" + imageTag,
|
||||
workingDir: "/issues",
|
||||
},
|
||||
dependencies: ["build-images"],
|
||||
}, // train-test
|
||||
];
|
||||
|
||||
// Dag defines the tasks in the graph
|
||||
local dag = {
|
||||
name: "e2e",
|
||||
// Construct tasks from the templates
|
||||
// we will give the steps the same name as the template
|
||||
dag: {
|
||||
tasks: util.toArgoTaskList(dagTemplates),
|
||||
},
|
||||
}; // dag
|
||||
|
||||
// Define templates for the steps to be performed when the
|
||||
// test exits
|
||||
local exitTemplates =
|
||||
[
|
||||
{
|
||||
// Copy artifacts to GCS for gubernator.
|
||||
// TODO(https://github.com/kubeflow/testing/issues/257): Create-pr-symlink
|
||||
// should be done by run_e2e_workflow.py
|
||||
template: buildTemplate {
|
||||
name: "copy-artifacts",
|
||||
command: [
|
||||
"python",
|
||||
"-m",
|
||||
"kubeflow.testing.prow_artifacts",
|
||||
"--artifacts_dir=" + outputDir,
|
||||
"copy_artifacts",
|
||||
"--bucket=" + bucket,
|
||||
],
|
||||
}, // copy-artifacts,
|
||||
|
||||
},
|
||||
{
|
||||
// Delete the test directory in NFS.
|
||||
// TODO(https://github.com/kubeflow/testing/issues/256): Use an external process to do this.
|
||||
template:
|
||||
buildTemplate {
|
||||
name: "test-dir-delete",
|
||||
command: [
|
||||
"rm",
|
||||
"-rf",
|
||||
testDir,
|
||||
],
|
||||
|
||||
argoTemplate+: {
|
||||
retryStrategy: {
|
||||
limit: 3,
|
||||
},
|
||||
},
|
||||
}, // test-dir-delete
|
||||
dependencies: ["copy-artifacts"],
|
||||
},
|
||||
];
|
||||
|
||||
// Create a DAG representing the set of steps to execute on exit
|
||||
local exitDag = {
|
||||
name: "exit-handler",
|
||||
// Construct tasks from the templates
|
||||
// we will give the steps the same name as the template
|
||||
dag: {
|
||||
tasks: util.toArgoTaskList(exitTemplates),
|
||||
},
|
||||
};
|
||||
|
||||
// A list of templates for the actual steps
|
||||
local stepTemplates = std.map(function(i) i.template.argoTemplate
|
||||
, dagTemplates) +
|
||||
std.map(function(i) i.template.argoTemplate
|
||||
, exitTemplates);
|
||||
|
||||
// Define the Argo Workflow.
|
||||
local workflow = {
|
||||
apiVersion: "argoproj.io/v1alpha1",
|
||||
kind: "Workflow",
|
||||
metadata: {
|
||||
name: params.name,
|
||||
namespace: env.namespace,
|
||||
labels: {
|
||||
org: prowDict.REPO_OWNER,
|
||||
repo: prowDict.REPO_NAME,
|
||||
workflow: "gis",
|
||||
[if std.objectHas(prowDict, "PULL_NUMBER") then "pr"]: prowDict.PULL_NUMBER,
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
entrypoint: "e2e",
|
||||
volumes: [
|
||||
{
|
||||
name: "github-token",
|
||||
secret: {
|
||||
secretName: "github-token",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "gcp-credentials",
|
||||
secret: {
|
||||
secretName: "kubeflow-testing-credentials",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: params.dataVolume,
|
||||
persistentVolumeClaim: {
|
||||
claimName: params.nfsVolumeClaim,
|
||||
},
|
||||
},
|
||||
], // volumes
|
||||
|
||||
// onExit specifies the template that should always run when the workflow completes.
|
||||
onExit: "exit-handler",
|
||||
|
||||
// The templates will be a combination of the templates
|
||||
// defining the dags executed by Argo as well as the templates
|
||||
// for the individual steps.
|
||||
templates: [dag, exitDag] + stepTemplates, // templates
|
||||
}, // spec
|
||||
}; // workflow
|
||||
|
||||
std.prune(k.core.v1.list.new([workflow]))
|
|
@ -18,6 +18,12 @@
|
|||
namespace: "kubeflow-test-infra",
|
||||
prow_env: "BUILD_NUMBER=997a,BUILD_ID=997a,JOB_NAME=kubeflow-examples-presubmit-test,JOB_TYPE=presubmit,PULL_NUMBER=374,REPO_NAME=examples,REPO_OWNER=kubeflow",
|
||||
},
|
||||
mnist: {
|
||||
bucket: "kubeflow-ci_temp",
|
||||
name: "kubeflow-mnist",
|
||||
namespace: "kubeflow-test-infra",
|
||||
prow_env: "BUILD_NUMBER=997a,BUILD_ID=997a,JOB_NAME=kubeflow-examples-presubmit-test,JOB_TYPE=presubmit,PULL_NUMBER=374,REPO_NAME=examples,REPO_OWNER=kubeflow",
|
||||
},
|
||||
workflows: {
|
||||
bucket: "kubeflow-ci_temp",
|
||||
name: "kubeflow-examples-presubmit-test-374-6e32",
|
||||
|
|
Loading…
Reference in New Issue