mirror of https://github.com/kubeflow/examples.git
				
				
				
			add testing for xgboost_synthetic (#633)
This commit is contained in:
		
							parent
							
								
									e37a9d7acd
								
							
						
					
					
						commit
						4f8cf87d4f
					
				|  | @ -61,3 +61,14 @@ workflows: | |||
|       - postsubmit | ||||
|     include_dirs: | ||||
|       - pytorch_mnist/* | ||||
| 
 | ||||
|   # E2E test for xgboost-synthetic | ||||
|   - app_dir: kubeflow/examples/test/workflows | ||||
|     component: xgboost_synthetic | ||||
|     name: xgboost2 | ||||
|     job_types: | ||||
|       - periodic | ||||
|       - presubmit | ||||
|       - postsubmit | ||||
|     include_dirs: | ||||
|       - xgboost_synthetic/* | ||||
|  |  | |||
|  | @ -36,6 +36,12 @@ | |||
|           namespace: "kubeflow-test-infra", | ||||
|           prow_env: "BUILD_NUMBER=997a,BUILD_ID=997a,JOB_NAME=kubeflow-examples-presubmit-test,JOB_TYPE=presubmit,PULL_NUMBER=374,REPO_NAME=examples,REPO_OWNER=kubeflow", | ||||
|         }, | ||||
|     xgboost_synthetic: { | ||||
|       bucket: "kubeflow-ci_temp", | ||||
|       name: "kubeflow-xgboost_synthetic", | ||||
|       namespace: "kubeflow-test-infra", | ||||
|       prow_env: "BUILD_NUMBER=997a,BUILD_ID=997a,JOB_NAME=kubeflow-examples-presubmit-test,JOB_TYPE=presubmit,PULL_NUMBER=374,REPO_NAME=examples,REPO_OWNER=kubeflow", | ||||
|     }, | ||||
|     workflows: { | ||||
|       bucket: "kubeflow-ci_temp", | ||||
|       name: "kubeflow-examples-presubmit-test-374-6e32", | ||||
|  |  | |||
|  | @ -0,0 +1,439 @@ | |||
| // Test workflow for XGBoost Housing example. | ||||
| // | ||||
| local env = std.extVar("__ksonnet/environments"); | ||||
| local overrides = std.extVar("__ksonnet/params").components.xgboost_synthetic; | ||||
| 
 | ||||
| local k = import "k.libsonnet"; | ||||
| local util = import "util.libsonnet"; | ||||
| 
 | ||||
| // Define default params and then combine them with any overrides | ||||
| local defaultParams = { | ||||
|   // local nfsVolumeClaim: "kubeflow-testing", | ||||
|   nfsVolumeClaim: "nfs-external", | ||||
| 
 | ||||
|   // The name to use for the volume to use to contain test data. | ||||
|   dataVolume: "kubeflow-test-volume", | ||||
| 
 | ||||
|   // Default step image: | ||||
|   stepImage: "gcr.io/kubeflow-ci/test-worker:v20190802-c6f9140-e3b0c4", | ||||
| 
 | ||||
|   // Which Kubeflow cluster to use for running PytorchJobs on. | ||||
|   kfProject: "kubeflow-ci-deployment", | ||||
|   kfZone: "us-east1-b", | ||||
|   kfCluster: "kf-vmaster-n00", | ||||
| 
 | ||||
|   // The bucket where the model should be written | ||||
|   // This needs to be writable by the GCP service account in the Kubeflow cluster (not the test cluster) | ||||
|   modelBucket: "kubeflow-ci_temp", | ||||
| 
 | ||||
|   // Whether to delete the namespace at the end. | ||||
|   // Leaving the namespace around can be useful for debugging. | ||||
|   // | ||||
|   // TODO(jlewi): We should consider running a cronjob to GC namespaces. | ||||
|   // But if we leave namespaces up; then we end up leaving the servers up which | ||||
|   // uses up CPU. | ||||
|   // | ||||
|   deleteNamespace: true, | ||||
| }; | ||||
| 
 | ||||
| local params = defaultParams + overrides; | ||||
| 
 | ||||
| local prowEnv = util.parseEnv(params.prow_env); | ||||
| 
 | ||||
| // Create a dictionary of the different prow variables so we can refer to them in the workflow. | ||||
| // | ||||
| // Important: We want to initialize all variables we reference to some value. If we don't | ||||
| // and we reference a variable which doesn't get set then we get very hard to debug failure messages. | ||||
| // In particular, we've seen problems where if we add a new environment and evaluate one component eg. "workflows" | ||||
| // and another component e.g "code_search.jsonnet" doesn't have a default value for BUILD_ID then ksonnet | ||||
| // fails because BUILD_ID is undefined. | ||||
| local prowDict = { | ||||
| 	BUILD_ID: "notset", | ||||
| 	BUILD_NUMBER: "notset", | ||||
| 	REPO_OWNER: "notset", | ||||
| 	REPO_NAME: "notset", | ||||
| 	JOB_NAME: "notset", | ||||
| 	JOB_TYPE: "notset", | ||||
| 	PULL_NUMBER: "notset", | ||||
| 	PULL_BASE_SHA: "notset", | ||||
|  } + util.listOfDictToMap(prowEnv); | ||||
| 
 | ||||
| local bucket = params.bucket; | ||||
| 
 | ||||
| // mountPath is the directory where the volume to store the test data | ||||
| // should be mounted. | ||||
| local mountPath = "/mnt/" + "test-data-volume"; | ||||
| // testDir is the root directory for all data for a particular test run. | ||||
| local testDir = mountPath + "/" + params.name; | ||||
| // outputDir is the directory to sync to GCS to contain the output for this job. | ||||
| local outputDir = testDir + "/output"; | ||||
| local artifactsDir = outputDir + "/artifacts"; | ||||
| 
 | ||||
| // Source directory where all repos should be checked out | ||||
| local srcRootDir = testDir + "/src"; | ||||
| 
 | ||||
| // The directory containing the kubeflow/kubeflow repo | ||||
| local srcDir = srcRootDir + "/" + prowDict.REPO_OWNER + "/" + prowDict.REPO_NAME; | ||||
| 
 | ||||
| // These variables control where the docker images get pushed and what | ||||
| // tag to use | ||||
| local executeImage = "gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0"; | ||||
| 
 | ||||
| // value of KUBECONFIG environment variable. This should be  a full path. | ||||
| local kubeConfig = testDir + "/.kube/kubeconfig"; | ||||
| 
 | ||||
| // Namespace where tests should run | ||||
| local testNamespace = "xgboost-synthetic-" + prowDict["BUILD_ID"]; | ||||
| 
 | ||||
| // The directory within the kubeflow_testing submodule containing | ||||
| // py scripts to use. | ||||
| local kubeflowTestingPy = srcRootDir + "/kubeflow/testing/py"; | ||||
| 
 | ||||
| // Workflow template is the name of the workflow template; typically the name of the ks component. | ||||
| // This is used as a label to make it easy to identify all Argo workflows created from a given | ||||
| // template. | ||||
| local workflow_template = "xgboost_synthetic"; | ||||
| 
 | ||||
| // Build template is a template for constructing Argo step templates. | ||||
| // | ||||
| // step_name: Name for the template | ||||
| // command: List to pass as the container command. | ||||
| // | ||||
| // We customize the defaults for each step in the workflow by modifying | ||||
| // buildTemplate.argoTemplate | ||||
| local buildTemplate = { | ||||
|   // name & command variables should be overwritten for every test. | ||||
|   // Other variables can be changed per step as needed. | ||||
|   // They are hidden because they shouldn't be included in the Argo template | ||||
|   name: "", | ||||
|   command:: "", | ||||
|   image: params.stepImage, | ||||
|   workingDir:: null, | ||||
|   env_vars:: [], | ||||
|   side_cars: [], | ||||
|   pythonPath: kubeflowTestingPy, | ||||
| 
 | ||||
|   activeDeadlineSeconds: 1800,  // Set 30 minute timeout for each template | ||||
| 
 | ||||
|   local template = self, | ||||
| 
 | ||||
|   // Actual template for Argo | ||||
|   argoTemplate: { | ||||
|     name: template.name, | ||||
|     metadata: { | ||||
|       labels: prowDict + { | ||||
|         workflow: params.name, | ||||
|         workflow_template: workflow_template, | ||||
|         step_name: template.name, | ||||
|       }, | ||||
|     }, | ||||
|     container: { | ||||
|       command: template.command, | ||||
|       name: template.name, | ||||
|       image: template.image, | ||||
|       workingDir: template.workingDir, | ||||
|       env: [ | ||||
|         { | ||||
|           // Add the source directories to the python path. | ||||
|           name: "PYTHONPATH", | ||||
|           value: template.pythonPath, | ||||
|         }, | ||||
|         { | ||||
|           name: "GOOGLE_APPLICATION_CREDENTIALS", | ||||
|           value: "/secret/gcp-credentials/key.json", | ||||
|         }, | ||||
|         { | ||||
|           name: "GITHUB_TOKEN", | ||||
|           valueFrom: { | ||||
|             secretKeyRef: { | ||||
|               name: "github-token", | ||||
|               key: "github_token", | ||||
|             }, | ||||
|           }, | ||||
|         }, | ||||
|         { | ||||
|           // We use a directory in our NFS share to store our kube config. | ||||
|           // This way we can configure it on a single step and reuse it on subsequent steps. | ||||
|           name: "KUBECONFIG", | ||||
|           value: kubeConfig, | ||||
|         }, | ||||
|       ] + prowEnv + template.env_vars, | ||||
|       volumeMounts: [ | ||||
|         { | ||||
|           name: params.dataVolume, | ||||
|           mountPath: mountPath, | ||||
|         }, | ||||
|         { | ||||
|           name: "github-token", | ||||
|           mountPath: "/secret/github-token", | ||||
|         }, | ||||
|         { | ||||
|           name: "gcp-credentials", | ||||
|           mountPath: "/secret/gcp-credentials", | ||||
|         }, | ||||
|       ], | ||||
|     }, | ||||
|   }, | ||||
| };  // buildTemplate | ||||
| 
 | ||||
| 
 | ||||
| // Create a list of dictionary. | ||||
| // Each item is a dictionary describing one step in the graph. | ||||
| local dagTemplates = [ | ||||
|   { | ||||
|     template: buildTemplate { | ||||
|       name: "checkout", | ||||
|       command: | ||||
|         ["/usr/local/bin/checkout.sh", srcRootDir], | ||||
| 
 | ||||
|       env_vars: [{ | ||||
|         name: "EXTRA_REPOS", | ||||
|         // TODO(jlewi): Pin to commit on master when #281 is checked in. | ||||
|         value: "kubeflow/testing@HEAD:281", | ||||
|       }], | ||||
|     }, | ||||
|     dependencies: null, | ||||
|   },  // checkout | ||||
|   { | ||||
|     // TODO(https://github.com/kubeflow/testing/issues/257): Create-pr-symlink | ||||
|     // should be done by run_e2e_workflow.py | ||||
|     template: buildTemplate { | ||||
|       name: "create-pr-symlink", | ||||
|       command: [ | ||||
|         "python", | ||||
|         "-m", | ||||
|         "kubeflow.testing.prow_artifacts", | ||||
|         "--artifacts_dir=" + outputDir, | ||||
|         "create_pr_symlink", | ||||
|         "--bucket=" + params.bucket, | ||||
|       ], | ||||
|     },  // create-pr-symlink | ||||
|     dependencies: ["checkout"], | ||||
|   },  // create-pr-symlink | ||||
|   { | ||||
|     // Configure KUBECONFIG | ||||
|     template: buildTemplate { | ||||
|       name: "get-kubeconfig", | ||||
|       command: util.buildCommand([ | ||||
|       [ | ||||
|         "gcloud", | ||||
|         "auth", | ||||
|         "activate-service-account", | ||||
|         "--key-file=${GOOGLE_APPLICATION_CREDENTIALS}", | ||||
|       ], | ||||
|       [ | ||||
|         "gcloud", | ||||
|         "--project=" + params.kfProject, | ||||
|         "container", | ||||
|         "clusters", | ||||
|         "get-credentials", | ||||
|         "--zone=" + params.kfZone, | ||||
|         params.kfCluster, | ||||
|       ]] | ||||
|       ), | ||||
|     }, | ||||
|     dependencies: ["checkout"], | ||||
|   }, // get-kubeconfig | ||||
|   { | ||||
|     // Create the namespace | ||||
|     // TODO(jlewi): We should add some sort of retry. | ||||
|     template: buildTemplate { | ||||
|       name: "create-namespace", | ||||
|       command: util.buildCommand([ | ||||
|       [ | ||||
|         "echo", | ||||
|         "KUBECONFIG=", | ||||
|         "${KUBECONFIG}", | ||||
|       ], | ||||
|       [ | ||||
|         "gcloud", | ||||
|         "auth", | ||||
|         "activate-service-account", | ||||
|         "--key-file=${GOOGLE_APPLICATION_CREDENTIALS}", | ||||
|       ], | ||||
|       [ | ||||
|         "kubectl", | ||||
|         "config" , | ||||
|         "current-context", | ||||
|       ], | ||||
|       [ | ||||
|         "kubectl", | ||||
|         "create", | ||||
|         "namespace", | ||||
|         testNamespace, | ||||
|       ], | ||||
|       # Copy the GCP secret from the kubeflow namespace to the test namespace | ||||
|       [ | ||||
|         srcDir + "/test/copy_secret.sh", | ||||
|         "kubeflow", | ||||
|         testNamespace, | ||||
|         "user-gcp-sa", | ||||
|       ]] | ||||
|       ), | ||||
|     }, | ||||
|     dependencies: ["get-kubeconfig"], | ||||
|   }, // create-namespace | ||||
|   { | ||||
|     template: buildTemplate { | ||||
|       name: "execute-notebook", | ||||
|       command: [ | ||||
|         "python3", | ||||
|         "xgboost_test.py", | ||||
|         "--name=" + "xgboost-test-" + prowDict["BUILD_ID"], | ||||
|         "--namespace=" + testNamespace, | ||||
|         "--image=" + executeImage, | ||||
|         "--jobType=" + prowDict["JOB_TYPE"], | ||||
|         "--pullNumber=" + prowDict["PULL_NUMBER"], | ||||
|         "--pullBaseSHA=" + prowDict["PULL_BASE_SHA"], | ||||
|         "--cluster=" + params.kfCluster, | ||||
|       ], | ||||
|       pythonPath: kubeflowTestingPy, | ||||
|       workingDir: srcDir + "/xgboost_synthetic/testing", | ||||
|     }, | ||||
|     dependencies: ["create-namespace"], | ||||
|   },  // execute-notebook | ||||
| ]; | ||||
| 
 | ||||
| // Dag defines the tasks in the graph | ||||
| local dag = { | ||||
|   name: "e2e", | ||||
|   // Construct tasks from the templates | ||||
|   // we will give the steps the same name as the template | ||||
|   dag: { | ||||
|     tasks: util.toArgoTaskList(dagTemplates), | ||||
|   }, | ||||
| };  // dag | ||||
| 
 | ||||
| // Define templates for the steps to be performed when the | ||||
| // test exits | ||||
| 
 | ||||
| local deleteTemplates = if params.deleteNamespace then | ||||
|  [ | ||||
|     { | ||||
|       // Delete the namespace | ||||
|       // TODO(jlewi): We should add some sort of retry. | ||||
|       template: buildTemplate { | ||||
|         name: "delete-namespace", | ||||
|         command: util.buildCommand([ | ||||
|         [ | ||||
|           "gcloud", | ||||
|           "auth", | ||||
|           "activate-service-account", | ||||
|           "--key-file=${GOOGLE_APPLICATION_CREDENTIALS}", | ||||
|         ], | ||||
|         [ | ||||
|           "kubectl", | ||||
|           "delete", | ||||
|           "namespace", | ||||
|           testNamespace, | ||||
|         ]] | ||||
|         ), | ||||
|       }, | ||||
|     }, // delete-namespace | ||||
|   ] else []; | ||||
| 
 | ||||
| local exitTemplates = | ||||
|   deleteTemplates + | ||||
|   [ | ||||
|     { | ||||
|       // Copy artifacts to GCS for gubernator. | ||||
|       // TODO(https://github.com/kubeflow/testing/issues/257): Create-pr-symlink | ||||
|       // should be done by run_e2e_workflow.py | ||||
|       template: buildTemplate { | ||||
|         name: "copy-artifacts", | ||||
|         command: [ | ||||
|           "python", | ||||
|           "-m", | ||||
|           "kubeflow.testing.prow_artifacts", | ||||
|           "--artifacts_dir=" + outputDir, | ||||
|           "copy_artifacts", | ||||
|           "--bucket=" + bucket, | ||||
|         ], | ||||
|       },  // copy-artifacts, | ||||
|     }, | ||||
|     { | ||||
|       // Delete the test directory in NFS. | ||||
|       // TODO(https://github.com/kubeflow/testing/issues/256): Use an external process to do this. | ||||
|       template: | ||||
|         buildTemplate { | ||||
|           name: "test-dir-delete", | ||||
|           command: [            | ||||
|             "rm", | ||||
|             "-rf", | ||||
|             testDir, | ||||
|           ], | ||||
| 
 | ||||
|           argoTemplate+: { | ||||
|         	  retryStrategy: { | ||||
|         	  	limit: 3, | ||||
|         	  }, | ||||
|           }, | ||||
|         },  // test-dir-delete | ||||
|       dependencies: ["copy-artifacts"] + if params.deleteNamespace then ["delete-namespace"] else [], | ||||
|     }, | ||||
|   ]; | ||||
| 
 | ||||
| // Create a DAG representing the set of steps to execute on exit | ||||
| local exitDag = { | ||||
|   name: "exit-handler", | ||||
|   // Construct tasks from the templates | ||||
|   // we will give the steps the same name as the template | ||||
|   dag: {     | ||||
|     tasks: util.toArgoTaskList(exitTemplates),     | ||||
|   }, | ||||
| }; | ||||
| 
 | ||||
| // A list of templates for the actual steps | ||||
| local stepTemplates = std.map(function(i) i.template.argoTemplate | ||||
|                               , dagTemplates) + | ||||
|                       std.map(function(i) i.template.argoTemplate | ||||
|                               , exitTemplates); | ||||
| 
 | ||||
| // Define the Argo Workflow. | ||||
| local workflow = { | ||||
|   apiVersion: "argoproj.io/v1alpha1", | ||||
|   kind: "Workflow", | ||||
|   metadata: { | ||||
|     name: params.name, | ||||
|     namespace: env.namespace, | ||||
|     labels: prowDict + { | ||||
|         workflow: params.name, | ||||
|         workflow_template: workflow_template, | ||||
|     }, | ||||
|   }, | ||||
|   spec: { | ||||
|     entrypoint: "e2e", | ||||
|     // Have argo garbage collect old workflows otherwise we overload the API server. | ||||
|     ttlSecondsAfterFinished: 7 * 24 * 60 * 60, | ||||
|     volumes: [ | ||||
|       { | ||||
|         name: "github-token", | ||||
|         secret: { | ||||
|           secretName: "github-token", | ||||
|         }, | ||||
|       }, | ||||
|       { | ||||
|         name: "gcp-credentials", | ||||
|         secret: { | ||||
|           secretName: "kubeflow-testing-credentials", | ||||
|         }, | ||||
|       }, | ||||
|       { | ||||
|         name: params.dataVolume, | ||||
|         persistentVolumeClaim: { | ||||
|           claimName: params.nfsVolumeClaim, | ||||
|         }, | ||||
|       }, | ||||
|     ],  // volumes | ||||
| 
 | ||||
|     // onExit specifies the template that should always run when the workflow completes. | ||||
|     onExit: "exit-handler", | ||||
| 
 | ||||
|     // The templates will be a combination of the templates | ||||
|     // defining the dags executed by Argo as well as the templates | ||||
|     // for the individual steps. | ||||
|     templates: [dag, exitDag] + stepTemplates,  // templates | ||||
|   },  // spec | ||||
| };  // workflow | ||||
| 
 | ||||
| std.prune(k.core.v1.list.new([workflow])) | ||||
|  | @ -44,7 +44,7 @@ build: build-dir | |||
| 
 | ||||
| build-gcb: build-dir | ||||
| 	gcloud builds submit --machine-type=n1-highcpu-32 --project=$(PROJECT) --tag=$(IMG):$(TAG) \
 | ||||
| 		--timeout=3600 ./build | ||||
| 		--timeout=3600 ./.build | ||||
| 	@echo Built $(IMG):$(TAG) | ||||
| 
 | ||||
| # Build but don't attach the latest tag. This allows manual testing/inspection of the image
 | ||||
|  |  | |||
|  | @ -692,7 +692,7 @@ | |||
|    ], | ||||
|    "source": [ | ||||
|     "from fairing.builders import cluster\n", | ||||
|     "preprocessor = ConvertNotebookPreprocessorWithFire(\"ModelServe\")\n", | ||||
|     "preprocessor = ConvertNotebookPreprocessorWithFire(class_name='ModelServe', notebook_file='build-train-deploy.ipynb')\n", | ||||
|     "\n", | ||||
|     "if not preprocessor.input_files:\n", | ||||
|     "    preprocessor.input_files = set()\n", | ||||
|  |  | |||
|  | @ -10,3 +10,4 @@ retrying | |||
| seldon-core | ||||
| sklearn | ||||
| xgboost | ||||
| tornado>=6.0.3 | ||||
|  |  | |||
|  | @ -0,0 +1,43 @@ | |||
| import tempfile | ||||
| import logging | ||||
| import os | ||||
| import subprocess | ||||
| 
 | ||||
| 
 | ||||
| logger = logging.getLogger(__name__) | ||||
| 
 | ||||
| def prepare_env(): | ||||
|   subprocess.check_call(["pip3", "install", "-U", "papermill"]) | ||||
|   subprocess.check_call(["pip3", "install", "-r", "../requirements.txt"]) | ||||
| 
 | ||||
| 
 | ||||
| def execute_notebook(notebook_path, parameters=None): | ||||
|   temp_dir = tempfile.mkdtemp() | ||||
|   notebook_output_path = os.path.join(temp_dir, "out.ipynb") | ||||
|   papermill.execute_notebook(notebook_path, notebook_output_path, | ||||
|                              cwd=os.path.dirname(notebook_path), | ||||
|                              parameters=parameters, | ||||
|                              log_output=True) | ||||
|   return notebook_output_path | ||||
| 
 | ||||
| def run_notebook_test(notebook_path, expected_messages, parameters=None): | ||||
|   output_path = execute_notebook(notebook_path, parameters=parameters) | ||||
|   actual_output = open(output_path, 'r').read() | ||||
|   for expected_message in expected_messages: | ||||
|     if not expected_message in actual_output: | ||||
|       logger.error(actual_output) | ||||
|       assert False, "Unable to find from output: " + expected_message | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|   prepare_env() | ||||
|   import papermill #pylint: disable=import-error | ||||
|   FILE_DIR = os.path.dirname(__file__) | ||||
|   NOTEBOOK_REL_PATH = "../build-train-deploy.ipynb" | ||||
|   NOTEBOOK_ABS_PATH = os.path.normpath(os.path.join(FILE_DIR, NOTEBOOK_REL_PATH)) | ||||
|   EXPECTED_MGS = [ | ||||
|       "Finished upload of", | ||||
|       "Model export success: mockup-model.dat", | ||||
|       "Pod started running True", | ||||
|       "Cluster endpoint: http:", | ||||
|   ] | ||||
|   run_notebook_test(NOTEBOOK_ABS_PATH, EXPECTED_MGS) | ||||
|  | @ -0,0 +1,51 @@ | |||
| apiVersion: batch/v1 | ||||
| kind: Job | ||||
| metadata: | ||||
|   name: $(job_name) | ||||
| spec: | ||||
|   template: | ||||
|     metadata: | ||||
|       labels: | ||||
|         app: xgboost-synthetics-testing | ||||
|     spec: | ||||
|       restartPolicy: Never | ||||
|       securityContext: | ||||
|         runAsUser: 0 | ||||
|       initContainers: | ||||
|       # This init container checks out the source code. | ||||
|       - command: | ||||
|         - /usr/local/bin/checkout_repos.sh | ||||
|         - --repos=kubeflow/examples@$(CHECK_TAG) | ||||
|         - --src_dir=/src | ||||
|         - --depth=all | ||||
|         name: checkout | ||||
|         image: gcr.io/kubeflow-ci/test-worker:v20190802-c6f9140-e3b0c4 | ||||
|         volumeMounts: | ||||
|         - mountPath: /src | ||||
|           name: src | ||||
|         env: | ||||
|         - name: CHECK_TAG | ||||
|           valueFrom: | ||||
|             configMapKeyRef: | ||||
|               name: xgb-notebooks-tests | ||||
|               key: checkTag | ||||
|       containers: | ||||
|       - name: executing-notebooks | ||||
|         image: execute-image | ||||
|         command: ["python3", "execute_notebook.py"] | ||||
|         workingDir: /src/kubeflow/examples/xgboost_synthetic/testing | ||||
|         volumeMounts: | ||||
|         - mountPath: /var/secrets | ||||
|           name: user-gcp-sa | ||||
|           readOnly: true | ||||
|         - mountPath: /src | ||||
|           name: src | ||||
|         env: | ||||
|         - name: GOOGLE_APPLICATION_CREDENTIALS | ||||
|           value: /var/secrets/user-gcp-sa.json | ||||
|       volumes: | ||||
|       - name: user-gcp-sa | ||||
|         secret: | ||||
|           secretName: user-gcp-sa | ||||
|       - name: src | ||||
|         emptyDir: {} | ||||
|  | @ -0,0 +1,28 @@ | |||
| apiVersion: kustomize.config.k8s.io/v1beta1 | ||||
| kind: Kustomization | ||||
| 
 | ||||
| namespace: kubeflow | ||||
| 
 | ||||
| generatorOptions: | ||||
|   disableNameSuffixHash: true | ||||
| 
 | ||||
| resources: | ||||
| - job.yaml | ||||
| - role.yaml | ||||
| - rolebinding.yaml | ||||
| 
 | ||||
| configurations: | ||||
| - params.yaml | ||||
| 
 | ||||
| configMapGenerator: | ||||
| - name: xgb-notebooks-tests | ||||
|   literals: | ||||
| 
 | ||||
| vars: | ||||
| - fieldref: | ||||
|     fieldPath: data.name | ||||
|   name: job_name | ||||
|   objref: | ||||
|     apiVersion: v1 | ||||
|     kind: ConfigMap | ||||
|     name: xgb-notebooks-tests | ||||
|  | @ -0,0 +1,3 @@ | |||
| varReference: | ||||
| - path: metadata/name | ||||
|   kind: Job | ||||
|  | @ -0,0 +1,37 @@ | |||
| apiVersion: rbac.authorization.k8s.io/v1 | ||||
| kind: Role | ||||
| metadata: | ||||
|   labels: | ||||
|     app: xgboost-synthetics-testing | ||||
|   name: xgboost-synthetics-testing-role | ||||
| rules: | ||||
| - apiGroups: | ||||
|   - "" | ||||
|   resources: | ||||
|   - pods | ||||
|   - pods/log | ||||
|   - secrets | ||||
|   - services | ||||
|   verbs: | ||||
|   - '*' | ||||
| - apiGroups: | ||||
|   - "" | ||||
|   - apps | ||||
|   - extensions | ||||
|   resources: | ||||
|   - deployments | ||||
|   - replicasets | ||||
|   verbs: | ||||
|   - '*' | ||||
| - apiGroups: | ||||
|   - kubeflow.org | ||||
|   resources: | ||||
|   - '*' | ||||
|   verbs: | ||||
|   - '*' | ||||
| - apiGroups: | ||||
|   - batch | ||||
|   resources: | ||||
|   - jobs | ||||
|   verbs: | ||||
|   - '*' | ||||
|  | @ -0,0 +1,14 @@ | |||
| apiVersion: rbac.authorization.k8s.io/v1 | ||||
| kind: RoleBinding | ||||
| metadata: | ||||
|   labels: | ||||
|     app: xgboost-synthetics-testing | ||||
|   name: xgboost-synthetics-testing-role-binding | ||||
| roleRef: | ||||
|   apiGroup: rbac.authorization.k8s.io | ||||
|   kind: Role | ||||
|   name: xgboost-synthetics-testing-role | ||||
| subjects: | ||||
| - kind: Group | ||||
|   name: system:serviceaccounts | ||||
|   apiGroup: rbac.authorization.k8s.io | ||||
|  | @ -0,0 +1,100 @@ | |||
| import argparse | ||||
| import logging | ||||
| import os | ||||
| import subprocess | ||||
| import time | ||||
| 
 | ||||
| from kubeflow.testing import util | ||||
| 
 | ||||
| def create_job(args, app_dir): #pylint:disable=redefined-outer-name | ||||
|   '''Generate Job and summit.''' | ||||
|   util.run(['gcloud', 'auth', 'activate-service-account', | ||||
|           "--key-file=/secret/gcp-credentials/key.json"], cwd=app_dir) | ||||
|   util.run(['gcloud', '--project=kubeflow-ci-deployment', 'container', | ||||
|           "clusters", "get-credentials", "--zone=us-east1-b", args.cluster], cwd=app_dir) | ||||
| 
 | ||||
|   configmap = 'xgb-notebooks-tests' | ||||
|   util.run(['kustomize', 'edit', 'add', 'configmap', configmap, | ||||
|           '--from-literal=name=' + args.name], cwd=app_dir) | ||||
|   # For presubmit, set the checkout tag as HEAD:$(PULL_NUMBER), others set to PULL_BASE_SHA | ||||
|   if args.jobType == 'presubmit': | ||||
|     util.run(['kustomize', 'edit', 'add', 'configmap', configmap, | ||||
|             '--from-literal=checkTag=HEAD:' + args.pullNumber], cwd=app_dir) | ||||
|   else: | ||||
|     util.run(['kustomize', 'edit', 'add', 'configmap', configmap, | ||||
|             '--from-literal=checkTag=' + args.pullBaseSHA], cwd=app_dir) | ||||
|   util.run(['kustomize', 'edit', 'set', 'namespace', args.namespace], cwd=app_dir) | ||||
|   util.run(['kustomize', 'edit', 'set', 'image', 'execute-image=' + args.image], cwd=app_dir) | ||||
|   util.run(['kustomize', 'build', app_dir, '-o', 'generated.yaml'], cwd=app_dir) | ||||
|   util.run(['kubectl', 'apply', '-f', 'generated.yaml'], cwd=app_dir) | ||||
|   logging.info("Created job %s in namespaces %s", args.name, args.namespace) | ||||
| 
 | ||||
| def get_pod_logs(name, namespace, app_dir): #pylint:disable=redefined-outer-name | ||||
|   '''Cannot get logs by k8s python api, using kubectl command to get logs.''' | ||||
|   logging.info("Getting pod %s logs...", name) | ||||
|   util.run(['kubectl', 'logs', name, '-n', namespace], cwd=app_dir) | ||||
| 
 | ||||
| def check_job_status(namespace, app_dir): #pylint:disable=redefined-outer-name | ||||
|   '''Cannot get job by k8s python api, using kubectl command to check job status.''' | ||||
|   is_successed = False | ||||
|   pod_info, pod_name, pod_status = '', '', '' | ||||
|   for _ in range(0, 30): | ||||
|     time.sleep(60) | ||||
|     subCmd = "kubectl get pod -n " + namespace + " | grep -m1 xgboost-test" | ||||
|     pod_info = subprocess.run(subCmd, | ||||
|                               stdout=subprocess.PIPE, | ||||
|                               stderr=subprocess.PIPE, | ||||
|                               shell=True, | ||||
|                               universal_newlines=True) | ||||
|     if len(str(pod_info.stdout).split()) >= 2: | ||||
|       pod_name = str(pod_info.stdout).split()[0] | ||||
|       pod_status = str(pod_info.stdout).split()[2] | ||||
| 
 | ||||
|     if pod_name: | ||||
|       if pod_status == "Pending": | ||||
|         logging.info("Pod %s is Pending.", pod_name) | ||||
|       elif pod_status == "Running": | ||||
|         logging.info("Pod %s is Running.", pod_name) | ||||
|       elif pod_status == "Completed": | ||||
|         logging.info("Pod %s is Completed.", pod_name) | ||||
|         get_pod_logs(pod_name, namespace, app_dir) | ||||
|         is_successed = True | ||||
|         break | ||||
|       elif pod_status == "Error": | ||||
|         get_pod_logs(pod_name, namespace, app_dir) | ||||
|         raise RuntimeError("Failed to execute notebook.") | ||||
|       else: | ||||
|         logging.warning("Pod %s status %s.", pod_name, pod_status) | ||||
|     else: | ||||
|       logging.warning("Cannot get the pod name, retry after 60 seconds.") | ||||
| 
 | ||||
|   if not is_successed: | ||||
|     raise RuntimeError("Timeout to get the executing notebook pod after 30 munites.") | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
| 
 | ||||
|   logging.basicConfig(level=logging.INFO) | ||||
| 
 | ||||
|   parser = argparse.ArgumentParser() | ||||
|   parser.add_argument( | ||||
|     "--name", help="deploy application name", type=str, required=True) | ||||
|   parser.add_argument( | ||||
|     "--namespace", help="The namespace for the application", type=str, required=True) | ||||
|   parser.add_argument( | ||||
|     "--image", help="Image name for the application", type=str, required=True) | ||||
|   parser.add_argument( | ||||
|     "--pullNumber", help="The PR number", type=str, required=True) | ||||
|   parser.add_argument( | ||||
|     "--pullBaseSHA", help="The pull base SHA", type=str, required=True) | ||||
|   parser.add_argument( | ||||
|     "--jobType", help="The job type such as presubmit or postsubmit", type=str, required=True) | ||||
|   parser.add_argument( | ||||
|     "--cluster", help="The cluster which the applition running in", type=str, required=True) | ||||
| 
 | ||||
|   app_dir = os.path.dirname(__file__) | ||||
|   app_dir = os.path.abspath(app_dir) | ||||
| 
 | ||||
|   args = parser.parse_args() | ||||
|   create_job(args, app_dir) | ||||
|   check_job_status(args.namespace, app_dir) | ||||
		Loading…
	
		Reference in New Issue