mirror of https://github.com/kubeflow/examples.git
add testing for xgboost_synthetic (#633)
This commit is contained in:
parent
e37a9d7acd
commit
4f8cf87d4f
|
|
@ -61,3 +61,14 @@ workflows:
|
||||||
- postsubmit
|
- postsubmit
|
||||||
include_dirs:
|
include_dirs:
|
||||||
- pytorch_mnist/*
|
- pytorch_mnist/*
|
||||||
|
|
||||||
|
# E2E test for xgboost-synthetic
|
||||||
|
- app_dir: kubeflow/examples/test/workflows
|
||||||
|
component: xgboost_synthetic
|
||||||
|
name: xgboost2
|
||||||
|
job_types:
|
||||||
|
- periodic
|
||||||
|
- presubmit
|
||||||
|
- postsubmit
|
||||||
|
include_dirs:
|
||||||
|
- xgboost_synthetic/*
|
||||||
|
|
|
||||||
|
|
@ -36,6 +36,12 @@
|
||||||
namespace: "kubeflow-test-infra",
|
namespace: "kubeflow-test-infra",
|
||||||
prow_env: "BUILD_NUMBER=997a,BUILD_ID=997a,JOB_NAME=kubeflow-examples-presubmit-test,JOB_TYPE=presubmit,PULL_NUMBER=374,REPO_NAME=examples,REPO_OWNER=kubeflow",
|
prow_env: "BUILD_NUMBER=997a,BUILD_ID=997a,JOB_NAME=kubeflow-examples-presubmit-test,JOB_TYPE=presubmit,PULL_NUMBER=374,REPO_NAME=examples,REPO_OWNER=kubeflow",
|
||||||
},
|
},
|
||||||
|
xgboost_synthetic: {
|
||||||
|
bucket: "kubeflow-ci_temp",
|
||||||
|
name: "kubeflow-xgboost_synthetic",
|
||||||
|
namespace: "kubeflow-test-infra",
|
||||||
|
prow_env: "BUILD_NUMBER=997a,BUILD_ID=997a,JOB_NAME=kubeflow-examples-presubmit-test,JOB_TYPE=presubmit,PULL_NUMBER=374,REPO_NAME=examples,REPO_OWNER=kubeflow",
|
||||||
|
},
|
||||||
workflows: {
|
workflows: {
|
||||||
bucket: "kubeflow-ci_temp",
|
bucket: "kubeflow-ci_temp",
|
||||||
name: "kubeflow-examples-presubmit-test-374-6e32",
|
name: "kubeflow-examples-presubmit-test-374-6e32",
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,439 @@
|
||||||
|
// Test workflow for XGBoost Housing example.
|
||||||
|
//
|
||||||
|
local env = std.extVar("__ksonnet/environments");
|
||||||
|
local overrides = std.extVar("__ksonnet/params").components.xgboost_synthetic;
|
||||||
|
|
||||||
|
local k = import "k.libsonnet";
|
||||||
|
local util = import "util.libsonnet";
|
||||||
|
|
||||||
|
// Define default params and then combine them with any overrides
|
||||||
|
local defaultParams = {
|
||||||
|
// local nfsVolumeClaim: "kubeflow-testing",
|
||||||
|
nfsVolumeClaim: "nfs-external",
|
||||||
|
|
||||||
|
// The name to use for the volume to use to contain test data.
|
||||||
|
dataVolume: "kubeflow-test-volume",
|
||||||
|
|
||||||
|
// Default step image:
|
||||||
|
stepImage: "gcr.io/kubeflow-ci/test-worker:v20190802-c6f9140-e3b0c4",
|
||||||
|
|
||||||
|
// Which Kubeflow cluster to use for running PytorchJobs on.
|
||||||
|
kfProject: "kubeflow-ci-deployment",
|
||||||
|
kfZone: "us-east1-b",
|
||||||
|
kfCluster: "kf-vmaster-n00",
|
||||||
|
|
||||||
|
// The bucket where the model should be written
|
||||||
|
// This needs to be writable by the GCP service account in the Kubeflow cluster (not the test cluster)
|
||||||
|
modelBucket: "kubeflow-ci_temp",
|
||||||
|
|
||||||
|
// Whether to delete the namespace at the end.
|
||||||
|
// Leaving the namespace around can be useful for debugging.
|
||||||
|
//
|
||||||
|
// TODO(jlewi): We should consider running a cronjob to GC namespaces.
|
||||||
|
// But if we leave namespaces up; then we end up leaving the servers up which
|
||||||
|
// uses up CPU.
|
||||||
|
//
|
||||||
|
deleteNamespace: true,
|
||||||
|
};
|
||||||
|
|
||||||
|
local params = defaultParams + overrides;
|
||||||
|
|
||||||
|
local prowEnv = util.parseEnv(params.prow_env);
|
||||||
|
|
||||||
|
// Create a dictionary of the different prow variables so we can refer to them in the workflow.
|
||||||
|
//
|
||||||
|
// Important: We want to initialize all variables we reference to some value. If we don't
|
||||||
|
// and we reference a variable which doesn't get set then we get very hard to debug failure messages.
|
||||||
|
// In particular, we've seen problems where if we add a new environment and evaluate one component eg. "workflows"
|
||||||
|
// and another component e.g "code_search.jsonnet" doesn't have a default value for BUILD_ID then ksonnet
|
||||||
|
// fails because BUILD_ID is undefined.
|
||||||
|
local prowDict = {
|
||||||
|
BUILD_ID: "notset",
|
||||||
|
BUILD_NUMBER: "notset",
|
||||||
|
REPO_OWNER: "notset",
|
||||||
|
REPO_NAME: "notset",
|
||||||
|
JOB_NAME: "notset",
|
||||||
|
JOB_TYPE: "notset",
|
||||||
|
PULL_NUMBER: "notset",
|
||||||
|
PULL_BASE_SHA: "notset",
|
||||||
|
} + util.listOfDictToMap(prowEnv);
|
||||||
|
|
||||||
|
local bucket = params.bucket;
|
||||||
|
|
||||||
|
// mountPath is the directory where the volume to store the test data
|
||||||
|
// should be mounted.
|
||||||
|
local mountPath = "/mnt/" + "test-data-volume";
|
||||||
|
// testDir is the root directory for all data for a particular test run.
|
||||||
|
local testDir = mountPath + "/" + params.name;
|
||||||
|
// outputDir is the directory to sync to GCS to contain the output for this job.
|
||||||
|
local outputDir = testDir + "/output";
|
||||||
|
local artifactsDir = outputDir + "/artifacts";
|
||||||
|
|
||||||
|
// Source directory where all repos should be checked out
|
||||||
|
local srcRootDir = testDir + "/src";
|
||||||
|
|
||||||
|
// The directory containing the kubeflow/kubeflow repo
|
||||||
|
local srcDir = srcRootDir + "/" + prowDict.REPO_OWNER + "/" + prowDict.REPO_NAME;
|
||||||
|
|
||||||
|
// These variables control where the docker images get pushed and what
|
||||||
|
// tag to use
|
||||||
|
local executeImage = "gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0";
|
||||||
|
|
||||||
|
// value of KUBECONFIG environment variable. This should be a full path.
|
||||||
|
local kubeConfig = testDir + "/.kube/kubeconfig";
|
||||||
|
|
||||||
|
// Namespace where tests should run
|
||||||
|
local testNamespace = "xgboost-synthetic-" + prowDict["BUILD_ID"];
|
||||||
|
|
||||||
|
// The directory within the kubeflow_testing submodule containing
|
||||||
|
// py scripts to use.
|
||||||
|
local kubeflowTestingPy = srcRootDir + "/kubeflow/testing/py";
|
||||||
|
|
||||||
|
// Workflow template is the name of the workflow template; typically the name of the ks component.
|
||||||
|
// This is used as a label to make it easy to identify all Argo workflows created from a given
|
||||||
|
// template.
|
||||||
|
local workflow_template = "xgboost_synthetic";
|
||||||
|
|
||||||
|
// Build template is a template for constructing Argo step templates.
|
||||||
|
//
|
||||||
|
// step_name: Name for the template
|
||||||
|
// command: List to pass as the container command.
|
||||||
|
//
|
||||||
|
// We customize the defaults for each step in the workflow by modifying
|
||||||
|
// buildTemplate.argoTemplate
|
||||||
|
local buildTemplate = {
|
||||||
|
// name & command variables should be overwritten for every test.
|
||||||
|
// Other variables can be changed per step as needed.
|
||||||
|
// They are hidden because they shouldn't be included in the Argo template
|
||||||
|
name: "",
|
||||||
|
command:: "",
|
||||||
|
image: params.stepImage,
|
||||||
|
workingDir:: null,
|
||||||
|
env_vars:: [],
|
||||||
|
side_cars: [],
|
||||||
|
pythonPath: kubeflowTestingPy,
|
||||||
|
|
||||||
|
activeDeadlineSeconds: 1800, // Set 30 minute timeout for each template
|
||||||
|
|
||||||
|
local template = self,
|
||||||
|
|
||||||
|
// Actual template for Argo
|
||||||
|
argoTemplate: {
|
||||||
|
name: template.name,
|
||||||
|
metadata: {
|
||||||
|
labels: prowDict + {
|
||||||
|
workflow: params.name,
|
||||||
|
workflow_template: workflow_template,
|
||||||
|
step_name: template.name,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
container: {
|
||||||
|
command: template.command,
|
||||||
|
name: template.name,
|
||||||
|
image: template.image,
|
||||||
|
workingDir: template.workingDir,
|
||||||
|
env: [
|
||||||
|
{
|
||||||
|
// Add the source directories to the python path.
|
||||||
|
name: "PYTHONPATH",
|
||||||
|
value: template.pythonPath,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "GOOGLE_APPLICATION_CREDENTIALS",
|
||||||
|
value: "/secret/gcp-credentials/key.json",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "GITHUB_TOKEN",
|
||||||
|
valueFrom: {
|
||||||
|
secretKeyRef: {
|
||||||
|
name: "github-token",
|
||||||
|
key: "github_token",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
// We use a directory in our NFS share to store our kube config.
|
||||||
|
// This way we can configure it on a single step and reuse it on subsequent steps.
|
||||||
|
name: "KUBECONFIG",
|
||||||
|
value: kubeConfig,
|
||||||
|
},
|
||||||
|
] + prowEnv + template.env_vars,
|
||||||
|
volumeMounts: [
|
||||||
|
{
|
||||||
|
name: params.dataVolume,
|
||||||
|
mountPath: mountPath,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "github-token",
|
||||||
|
mountPath: "/secret/github-token",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "gcp-credentials",
|
||||||
|
mountPath: "/secret/gcp-credentials",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}; // buildTemplate
|
||||||
|
|
||||||
|
|
||||||
|
// Create a list of dictionary.
|
||||||
|
// Each item is a dictionary describing one step in the graph.
|
||||||
|
local dagTemplates = [
|
||||||
|
{
|
||||||
|
template: buildTemplate {
|
||||||
|
name: "checkout",
|
||||||
|
command:
|
||||||
|
["/usr/local/bin/checkout.sh", srcRootDir],
|
||||||
|
|
||||||
|
env_vars: [{
|
||||||
|
name: "EXTRA_REPOS",
|
||||||
|
// TODO(jlewi): Pin to commit on master when #281 is checked in.
|
||||||
|
value: "kubeflow/testing@HEAD:281",
|
||||||
|
}],
|
||||||
|
},
|
||||||
|
dependencies: null,
|
||||||
|
}, // checkout
|
||||||
|
{
|
||||||
|
// TODO(https://github.com/kubeflow/testing/issues/257): Create-pr-symlink
|
||||||
|
// should be done by run_e2e_workflow.py
|
||||||
|
template: buildTemplate {
|
||||||
|
name: "create-pr-symlink",
|
||||||
|
command: [
|
||||||
|
"python",
|
||||||
|
"-m",
|
||||||
|
"kubeflow.testing.prow_artifacts",
|
||||||
|
"--artifacts_dir=" + outputDir,
|
||||||
|
"create_pr_symlink",
|
||||||
|
"--bucket=" + params.bucket,
|
||||||
|
],
|
||||||
|
}, // create-pr-symlink
|
||||||
|
dependencies: ["checkout"],
|
||||||
|
}, // create-pr-symlink
|
||||||
|
{
|
||||||
|
// Configure KUBECONFIG
|
||||||
|
template: buildTemplate {
|
||||||
|
name: "get-kubeconfig",
|
||||||
|
command: util.buildCommand([
|
||||||
|
[
|
||||||
|
"gcloud",
|
||||||
|
"auth",
|
||||||
|
"activate-service-account",
|
||||||
|
"--key-file=${GOOGLE_APPLICATION_CREDENTIALS}",
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"gcloud",
|
||||||
|
"--project=" + params.kfProject,
|
||||||
|
"container",
|
||||||
|
"clusters",
|
||||||
|
"get-credentials",
|
||||||
|
"--zone=" + params.kfZone,
|
||||||
|
params.kfCluster,
|
||||||
|
]]
|
||||||
|
),
|
||||||
|
},
|
||||||
|
dependencies: ["checkout"],
|
||||||
|
}, // get-kubeconfig
|
||||||
|
{
|
||||||
|
// Create the namespace
|
||||||
|
// TODO(jlewi): We should add some sort of retry.
|
||||||
|
template: buildTemplate {
|
||||||
|
name: "create-namespace",
|
||||||
|
command: util.buildCommand([
|
||||||
|
[
|
||||||
|
"echo",
|
||||||
|
"KUBECONFIG=",
|
||||||
|
"${KUBECONFIG}",
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"gcloud",
|
||||||
|
"auth",
|
||||||
|
"activate-service-account",
|
||||||
|
"--key-file=${GOOGLE_APPLICATION_CREDENTIALS}",
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"kubectl",
|
||||||
|
"config" ,
|
||||||
|
"current-context",
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"kubectl",
|
||||||
|
"create",
|
||||||
|
"namespace",
|
||||||
|
testNamespace,
|
||||||
|
],
|
||||||
|
# Copy the GCP secret from the kubeflow namespace to the test namespace
|
||||||
|
[
|
||||||
|
srcDir + "/test/copy_secret.sh",
|
||||||
|
"kubeflow",
|
||||||
|
testNamespace,
|
||||||
|
"user-gcp-sa",
|
||||||
|
]]
|
||||||
|
),
|
||||||
|
},
|
||||||
|
dependencies: ["get-kubeconfig"],
|
||||||
|
}, // create-namespace
|
||||||
|
{
|
||||||
|
template: buildTemplate {
|
||||||
|
name: "execute-notebook",
|
||||||
|
command: [
|
||||||
|
"python3",
|
||||||
|
"xgboost_test.py",
|
||||||
|
"--name=" + "xgboost-test-" + prowDict["BUILD_ID"],
|
||||||
|
"--namespace=" + testNamespace,
|
||||||
|
"--image=" + executeImage,
|
||||||
|
"--jobType=" + prowDict["JOB_TYPE"],
|
||||||
|
"--pullNumber=" + prowDict["PULL_NUMBER"],
|
||||||
|
"--pullBaseSHA=" + prowDict["PULL_BASE_SHA"],
|
||||||
|
"--cluster=" + params.kfCluster,
|
||||||
|
],
|
||||||
|
pythonPath: kubeflowTestingPy,
|
||||||
|
workingDir: srcDir + "/xgboost_synthetic/testing",
|
||||||
|
},
|
||||||
|
dependencies: ["create-namespace"],
|
||||||
|
}, // execute-notebook
|
||||||
|
];
|
||||||
|
|
||||||
|
// Dag defines the tasks in the graph
|
||||||
|
local dag = {
|
||||||
|
name: "e2e",
|
||||||
|
// Construct tasks from the templates
|
||||||
|
// we will give the steps the same name as the template
|
||||||
|
dag: {
|
||||||
|
tasks: util.toArgoTaskList(dagTemplates),
|
||||||
|
},
|
||||||
|
}; // dag
|
||||||
|
|
||||||
|
// Define templates for the steps to be performed when the
|
||||||
|
// test exits
|
||||||
|
|
||||||
|
local deleteTemplates = if params.deleteNamespace then
|
||||||
|
[
|
||||||
|
{
|
||||||
|
// Delete the namespace
|
||||||
|
// TODO(jlewi): We should add some sort of retry.
|
||||||
|
template: buildTemplate {
|
||||||
|
name: "delete-namespace",
|
||||||
|
command: util.buildCommand([
|
||||||
|
[
|
||||||
|
"gcloud",
|
||||||
|
"auth",
|
||||||
|
"activate-service-account",
|
||||||
|
"--key-file=${GOOGLE_APPLICATION_CREDENTIALS}",
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"kubectl",
|
||||||
|
"delete",
|
||||||
|
"namespace",
|
||||||
|
testNamespace,
|
||||||
|
]]
|
||||||
|
),
|
||||||
|
},
|
||||||
|
}, // delete-namespace
|
||||||
|
] else [];
|
||||||
|
|
||||||
|
local exitTemplates =
|
||||||
|
deleteTemplates +
|
||||||
|
[
|
||||||
|
{
|
||||||
|
// Copy artifacts to GCS for gubernator.
|
||||||
|
// TODO(https://github.com/kubeflow/testing/issues/257): Create-pr-symlink
|
||||||
|
// should be done by run_e2e_workflow.py
|
||||||
|
template: buildTemplate {
|
||||||
|
name: "copy-artifacts",
|
||||||
|
command: [
|
||||||
|
"python",
|
||||||
|
"-m",
|
||||||
|
"kubeflow.testing.prow_artifacts",
|
||||||
|
"--artifacts_dir=" + outputDir,
|
||||||
|
"copy_artifacts",
|
||||||
|
"--bucket=" + bucket,
|
||||||
|
],
|
||||||
|
}, // copy-artifacts,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
// Delete the test directory in NFS.
|
||||||
|
// TODO(https://github.com/kubeflow/testing/issues/256): Use an external process to do this.
|
||||||
|
template:
|
||||||
|
buildTemplate {
|
||||||
|
name: "test-dir-delete",
|
||||||
|
command: [
|
||||||
|
"rm",
|
||||||
|
"-rf",
|
||||||
|
testDir,
|
||||||
|
],
|
||||||
|
|
||||||
|
argoTemplate+: {
|
||||||
|
retryStrategy: {
|
||||||
|
limit: 3,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}, // test-dir-delete
|
||||||
|
dependencies: ["copy-artifacts"] + if params.deleteNamespace then ["delete-namespace"] else [],
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
// Create a DAG representing the set of steps to execute on exit
|
||||||
|
local exitDag = {
|
||||||
|
name: "exit-handler",
|
||||||
|
// Construct tasks from the templates
|
||||||
|
// we will give the steps the same name as the template
|
||||||
|
dag: {
|
||||||
|
tasks: util.toArgoTaskList(exitTemplates),
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
// A list of templates for the actual steps
|
||||||
|
local stepTemplates = std.map(function(i) i.template.argoTemplate
|
||||||
|
, dagTemplates) +
|
||||||
|
std.map(function(i) i.template.argoTemplate
|
||||||
|
, exitTemplates);
|
||||||
|
|
||||||
|
// Define the Argo Workflow.
|
||||||
|
local workflow = {
|
||||||
|
apiVersion: "argoproj.io/v1alpha1",
|
||||||
|
kind: "Workflow",
|
||||||
|
metadata: {
|
||||||
|
name: params.name,
|
||||||
|
namespace: env.namespace,
|
||||||
|
labels: prowDict + {
|
||||||
|
workflow: params.name,
|
||||||
|
workflow_template: workflow_template,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
spec: {
|
||||||
|
entrypoint: "e2e",
|
||||||
|
// Have argo garbage collect old workflows otherwise we overload the API server.
|
||||||
|
ttlSecondsAfterFinished: 7 * 24 * 60 * 60,
|
||||||
|
volumes: [
|
||||||
|
{
|
||||||
|
name: "github-token",
|
||||||
|
secret: {
|
||||||
|
secretName: "github-token",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "gcp-credentials",
|
||||||
|
secret: {
|
||||||
|
secretName: "kubeflow-testing-credentials",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: params.dataVolume,
|
||||||
|
persistentVolumeClaim: {
|
||||||
|
claimName: params.nfsVolumeClaim,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
], // volumes
|
||||||
|
|
||||||
|
// onExit specifies the template that should always run when the workflow completes.
|
||||||
|
onExit: "exit-handler",
|
||||||
|
|
||||||
|
// The templates will be a combination of the templates
|
||||||
|
// defining the dags executed by Argo as well as the templates
|
||||||
|
// for the individual steps.
|
||||||
|
templates: [dag, exitDag] + stepTemplates, // templates
|
||||||
|
}, // spec
|
||||||
|
}; // workflow
|
||||||
|
|
||||||
|
std.prune(k.core.v1.list.new([workflow]))
|
||||||
|
|
@ -44,7 +44,7 @@ build: build-dir
|
||||||
|
|
||||||
build-gcb: build-dir
|
build-gcb: build-dir
|
||||||
gcloud builds submit --machine-type=n1-highcpu-32 --project=$(PROJECT) --tag=$(IMG):$(TAG) \
|
gcloud builds submit --machine-type=n1-highcpu-32 --project=$(PROJECT) --tag=$(IMG):$(TAG) \
|
||||||
--timeout=3600 ./build
|
--timeout=3600 ./.build
|
||||||
@echo Built $(IMG):$(TAG)
|
@echo Built $(IMG):$(TAG)
|
||||||
|
|
||||||
# Build but don't attach the latest tag. This allows manual testing/inspection of the image
|
# Build but don't attach the latest tag. This allows manual testing/inspection of the image
|
||||||
|
|
|
||||||
|
|
@ -692,7 +692,7 @@
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"from fairing.builders import cluster\n",
|
"from fairing.builders import cluster\n",
|
||||||
"preprocessor = ConvertNotebookPreprocessorWithFire(\"ModelServe\")\n",
|
"preprocessor = ConvertNotebookPreprocessorWithFire(class_name='ModelServe', notebook_file='build-train-deploy.ipynb')\n",
|
||||||
"\n",
|
"\n",
|
||||||
"if not preprocessor.input_files:\n",
|
"if not preprocessor.input_files:\n",
|
||||||
" preprocessor.input_files = set()\n",
|
" preprocessor.input_files = set()\n",
|
||||||
|
|
|
||||||
|
|
@ -10,3 +10,4 @@ retrying
|
||||||
seldon-core
|
seldon-core
|
||||||
sklearn
|
sklearn
|
||||||
xgboost
|
xgboost
|
||||||
|
tornado>=6.0.3
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,43 @@
|
||||||
|
import tempfile
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
def prepare_env():
|
||||||
|
subprocess.check_call(["pip3", "install", "-U", "papermill"])
|
||||||
|
subprocess.check_call(["pip3", "install", "-r", "../requirements.txt"])
|
||||||
|
|
||||||
|
|
||||||
|
def execute_notebook(notebook_path, parameters=None):
|
||||||
|
temp_dir = tempfile.mkdtemp()
|
||||||
|
notebook_output_path = os.path.join(temp_dir, "out.ipynb")
|
||||||
|
papermill.execute_notebook(notebook_path, notebook_output_path,
|
||||||
|
cwd=os.path.dirname(notebook_path),
|
||||||
|
parameters=parameters,
|
||||||
|
log_output=True)
|
||||||
|
return notebook_output_path
|
||||||
|
|
||||||
|
def run_notebook_test(notebook_path, expected_messages, parameters=None):
|
||||||
|
output_path = execute_notebook(notebook_path, parameters=parameters)
|
||||||
|
actual_output = open(output_path, 'r').read()
|
||||||
|
for expected_message in expected_messages:
|
||||||
|
if not expected_message in actual_output:
|
||||||
|
logger.error(actual_output)
|
||||||
|
assert False, "Unable to find from output: " + expected_message
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
prepare_env()
|
||||||
|
import papermill #pylint: disable=import-error
|
||||||
|
FILE_DIR = os.path.dirname(__file__)
|
||||||
|
NOTEBOOK_REL_PATH = "../build-train-deploy.ipynb"
|
||||||
|
NOTEBOOK_ABS_PATH = os.path.normpath(os.path.join(FILE_DIR, NOTEBOOK_REL_PATH))
|
||||||
|
EXPECTED_MGS = [
|
||||||
|
"Finished upload of",
|
||||||
|
"Model export success: mockup-model.dat",
|
||||||
|
"Pod started running True",
|
||||||
|
"Cluster endpoint: http:",
|
||||||
|
]
|
||||||
|
run_notebook_test(NOTEBOOK_ABS_PATH, EXPECTED_MGS)
|
||||||
|
|
@ -0,0 +1,51 @@
|
||||||
|
apiVersion: batch/v1
|
||||||
|
kind: Job
|
||||||
|
metadata:
|
||||||
|
name: $(job_name)
|
||||||
|
spec:
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: xgboost-synthetics-testing
|
||||||
|
spec:
|
||||||
|
restartPolicy: Never
|
||||||
|
securityContext:
|
||||||
|
runAsUser: 0
|
||||||
|
initContainers:
|
||||||
|
# This init container checks out the source code.
|
||||||
|
- command:
|
||||||
|
- /usr/local/bin/checkout_repos.sh
|
||||||
|
- --repos=kubeflow/examples@$(CHECK_TAG)
|
||||||
|
- --src_dir=/src
|
||||||
|
- --depth=all
|
||||||
|
name: checkout
|
||||||
|
image: gcr.io/kubeflow-ci/test-worker:v20190802-c6f9140-e3b0c4
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /src
|
||||||
|
name: src
|
||||||
|
env:
|
||||||
|
- name: CHECK_TAG
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: xgb-notebooks-tests
|
||||||
|
key: checkTag
|
||||||
|
containers:
|
||||||
|
- name: executing-notebooks
|
||||||
|
image: execute-image
|
||||||
|
command: ["python3", "execute_notebook.py"]
|
||||||
|
workingDir: /src/kubeflow/examples/xgboost_synthetic/testing
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /var/secrets
|
||||||
|
name: user-gcp-sa
|
||||||
|
readOnly: true
|
||||||
|
- mountPath: /src
|
||||||
|
name: src
|
||||||
|
env:
|
||||||
|
- name: GOOGLE_APPLICATION_CREDENTIALS
|
||||||
|
value: /var/secrets/user-gcp-sa.json
|
||||||
|
volumes:
|
||||||
|
- name: user-gcp-sa
|
||||||
|
secret:
|
||||||
|
secretName: user-gcp-sa
|
||||||
|
- name: src
|
||||||
|
emptyDir: {}
|
||||||
|
|
@ -0,0 +1,28 @@
|
||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
|
||||||
|
namespace: kubeflow
|
||||||
|
|
||||||
|
generatorOptions:
|
||||||
|
disableNameSuffixHash: true
|
||||||
|
|
||||||
|
resources:
|
||||||
|
- job.yaml
|
||||||
|
- role.yaml
|
||||||
|
- rolebinding.yaml
|
||||||
|
|
||||||
|
configurations:
|
||||||
|
- params.yaml
|
||||||
|
|
||||||
|
configMapGenerator:
|
||||||
|
- name: xgb-notebooks-tests
|
||||||
|
literals:
|
||||||
|
|
||||||
|
vars:
|
||||||
|
- fieldref:
|
||||||
|
fieldPath: data.name
|
||||||
|
name: job_name
|
||||||
|
objref:
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
name: xgb-notebooks-tests
|
||||||
|
|
@ -0,0 +1,3 @@
|
||||||
|
varReference:
|
||||||
|
- path: metadata/name
|
||||||
|
kind: Job
|
||||||
|
|
@ -0,0 +1,37 @@
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: Role
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: xgboost-synthetics-testing
|
||||||
|
name: xgboost-synthetics-testing-role
|
||||||
|
rules:
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- pods
|
||||||
|
- pods/log
|
||||||
|
- secrets
|
||||||
|
- services
|
||||||
|
verbs:
|
||||||
|
- '*'
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
- apps
|
||||||
|
- extensions
|
||||||
|
resources:
|
||||||
|
- deployments
|
||||||
|
- replicasets
|
||||||
|
verbs:
|
||||||
|
- '*'
|
||||||
|
- apiGroups:
|
||||||
|
- kubeflow.org
|
||||||
|
resources:
|
||||||
|
- '*'
|
||||||
|
verbs:
|
||||||
|
- '*'
|
||||||
|
- apiGroups:
|
||||||
|
- batch
|
||||||
|
resources:
|
||||||
|
- jobs
|
||||||
|
verbs:
|
||||||
|
- '*'
|
||||||
|
|
@ -0,0 +1,14 @@
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: RoleBinding
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: xgboost-synthetics-testing
|
||||||
|
name: xgboost-synthetics-testing-role-binding
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: Role
|
||||||
|
name: xgboost-synthetics-testing-role
|
||||||
|
subjects:
|
||||||
|
- kind: Group
|
||||||
|
name: system:serviceaccounts
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
|
@ -0,0 +1,100 @@
|
||||||
|
import argparse
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import time
|
||||||
|
|
||||||
|
from kubeflow.testing import util
|
||||||
|
|
||||||
|
def create_job(args, app_dir): #pylint:disable=redefined-outer-name
|
||||||
|
'''Generate Job and summit.'''
|
||||||
|
util.run(['gcloud', 'auth', 'activate-service-account',
|
||||||
|
"--key-file=/secret/gcp-credentials/key.json"], cwd=app_dir)
|
||||||
|
util.run(['gcloud', '--project=kubeflow-ci-deployment', 'container',
|
||||||
|
"clusters", "get-credentials", "--zone=us-east1-b", args.cluster], cwd=app_dir)
|
||||||
|
|
||||||
|
configmap = 'xgb-notebooks-tests'
|
||||||
|
util.run(['kustomize', 'edit', 'add', 'configmap', configmap,
|
||||||
|
'--from-literal=name=' + args.name], cwd=app_dir)
|
||||||
|
# For presubmit, set the checkout tag as HEAD:$(PULL_NUMBER), others set to PULL_BASE_SHA
|
||||||
|
if args.jobType == 'presubmit':
|
||||||
|
util.run(['kustomize', 'edit', 'add', 'configmap', configmap,
|
||||||
|
'--from-literal=checkTag=HEAD:' + args.pullNumber], cwd=app_dir)
|
||||||
|
else:
|
||||||
|
util.run(['kustomize', 'edit', 'add', 'configmap', configmap,
|
||||||
|
'--from-literal=checkTag=' + args.pullBaseSHA], cwd=app_dir)
|
||||||
|
util.run(['kustomize', 'edit', 'set', 'namespace', args.namespace], cwd=app_dir)
|
||||||
|
util.run(['kustomize', 'edit', 'set', 'image', 'execute-image=' + args.image], cwd=app_dir)
|
||||||
|
util.run(['kustomize', 'build', app_dir, '-o', 'generated.yaml'], cwd=app_dir)
|
||||||
|
util.run(['kubectl', 'apply', '-f', 'generated.yaml'], cwd=app_dir)
|
||||||
|
logging.info("Created job %s in namespaces %s", args.name, args.namespace)
|
||||||
|
|
||||||
|
def get_pod_logs(name, namespace, app_dir): #pylint:disable=redefined-outer-name
|
||||||
|
'''Cannot get logs by k8s python api, using kubectl command to get logs.'''
|
||||||
|
logging.info("Getting pod %s logs...", name)
|
||||||
|
util.run(['kubectl', 'logs', name, '-n', namespace], cwd=app_dir)
|
||||||
|
|
||||||
|
def check_job_status(namespace, app_dir): #pylint:disable=redefined-outer-name
|
||||||
|
'''Cannot get job by k8s python api, using kubectl command to check job status.'''
|
||||||
|
is_successed = False
|
||||||
|
pod_info, pod_name, pod_status = '', '', ''
|
||||||
|
for _ in range(0, 30):
|
||||||
|
time.sleep(60)
|
||||||
|
subCmd = "kubectl get pod -n " + namespace + " | grep -m1 xgboost-test"
|
||||||
|
pod_info = subprocess.run(subCmd,
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.PIPE,
|
||||||
|
shell=True,
|
||||||
|
universal_newlines=True)
|
||||||
|
if len(str(pod_info.stdout).split()) >= 2:
|
||||||
|
pod_name = str(pod_info.stdout).split()[0]
|
||||||
|
pod_status = str(pod_info.stdout).split()[2]
|
||||||
|
|
||||||
|
if pod_name:
|
||||||
|
if pod_status == "Pending":
|
||||||
|
logging.info("Pod %s is Pending.", pod_name)
|
||||||
|
elif pod_status == "Running":
|
||||||
|
logging.info("Pod %s is Running.", pod_name)
|
||||||
|
elif pod_status == "Completed":
|
||||||
|
logging.info("Pod %s is Completed.", pod_name)
|
||||||
|
get_pod_logs(pod_name, namespace, app_dir)
|
||||||
|
is_successed = True
|
||||||
|
break
|
||||||
|
elif pod_status == "Error":
|
||||||
|
get_pod_logs(pod_name, namespace, app_dir)
|
||||||
|
raise RuntimeError("Failed to execute notebook.")
|
||||||
|
else:
|
||||||
|
logging.warning("Pod %s status %s.", pod_name, pod_status)
|
||||||
|
else:
|
||||||
|
logging.warning("Cannot get the pod name, retry after 60 seconds.")
|
||||||
|
|
||||||
|
if not is_successed:
|
||||||
|
raise RuntimeError("Timeout to get the executing notebook pod after 30 munites.")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument(
|
||||||
|
"--name", help="deploy application name", type=str, required=True)
|
||||||
|
parser.add_argument(
|
||||||
|
"--namespace", help="The namespace for the application", type=str, required=True)
|
||||||
|
parser.add_argument(
|
||||||
|
"--image", help="Image name for the application", type=str, required=True)
|
||||||
|
parser.add_argument(
|
||||||
|
"--pullNumber", help="The PR number", type=str, required=True)
|
||||||
|
parser.add_argument(
|
||||||
|
"--pullBaseSHA", help="The pull base SHA", type=str, required=True)
|
||||||
|
parser.add_argument(
|
||||||
|
"--jobType", help="The job type such as presubmit or postsubmit", type=str, required=True)
|
||||||
|
parser.add_argument(
|
||||||
|
"--cluster", help="The cluster which the applition running in", type=str, required=True)
|
||||||
|
|
||||||
|
app_dir = os.path.dirname(__file__)
|
||||||
|
app_dir = os.path.abspath(app_dir)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
create_job(args, app_dir)
|
||||||
|
check_job_status(args.namespace, app_dir)
|
||||||
Loading…
Reference in New Issue