add testing for xgboost_synthetic (#633)

2019-09-17 06:28:24 +08:00 · 2019-09-17 06:28:24 +08:00 · 4f8cf87d4f
parent e37a9d7acd
commit 4f8cf87d4f
13 changed files with 735 additions and 2 deletions
--- a/prow_config.yaml
+++ b/prow_config.yaml
@ -61,3 +61,14 @@ workflows:
      - postsubmit
    include_dirs:
      - pytorch_mnist/*
+
+  # E2E test for xgboost-synthetic
+  - app_dir: kubeflow/examples/test/workflows
+    component: xgboost_synthetic
+    name: xgboost2
+    job_types:
+      - periodic
+      - presubmit
+      - postsubmit
+    include_dirs:
+      - xgboost_synthetic/*
--- a/test/workflows/components/params.libsonnet
+++ b/test/workflows/components/params.libsonnet
@ -36,6 +36,12 @@
          namespace: "kubeflow-test-infra",
          prow_env: "BUILD_NUMBER=997a,BUILD_ID=997a,JOB_NAME=kubeflow-examples-presubmit-test,JOB_TYPE=presubmit,PULL_NUMBER=374,REPO_NAME=examples,REPO_OWNER=kubeflow",
        },
+    xgboost_synthetic: {
+      bucket: "kubeflow-ci_temp",
+      name: "kubeflow-xgboost_synthetic",
+      namespace: "kubeflow-test-infra",
+      prow_env: "BUILD_NUMBER=997a,BUILD_ID=997a,JOB_NAME=kubeflow-examples-presubmit-test,JOB_TYPE=presubmit,PULL_NUMBER=374,REPO_NAME=examples,REPO_OWNER=kubeflow",
+    },
    workflows: {
      bucket: "kubeflow-ci_temp",
      name: "kubeflow-examples-presubmit-test-374-6e32",
--- a/test/workflows/components/xgboost_synthetic.jsonnet
+++ b/test/workflows/components/xgboost_synthetic.jsonnet
@ -0,0 +1,439 @@
+// Test workflow for XGBoost Housing example.
+//
+local env = std.extVar("__ksonnet/environments");
+local overrides = std.extVar("__ksonnet/params").components.xgboost_synthetic;
+
+local k = import "k.libsonnet";
+local util = import "util.libsonnet";
+
+// Define default params and then combine them with any overrides
+local defaultParams = {
+  // local nfsVolumeClaim: "kubeflow-testing",
+  nfsVolumeClaim: "nfs-external",
+
+  // The name to use for the volume to use to contain test data.
+  dataVolume: "kubeflow-test-volume",
+
+  // Default step image:
+  stepImage: "gcr.io/kubeflow-ci/test-worker:v20190802-c6f9140-e3b0c4",
+
+  // Which Kubeflow cluster to use for running PytorchJobs on.
+  kfProject: "kubeflow-ci-deployment",
+  kfZone: "us-east1-b",
+  kfCluster: "kf-vmaster-n00",
+
+  // The bucket where the model should be written
+  // This needs to be writable by the GCP service account in the Kubeflow cluster (not the test cluster)
+  modelBucket: "kubeflow-ci_temp",
+
+  // Whether to delete the namespace at the end.
+  // Leaving the namespace around can be useful for debugging.
+  //
+  // TODO(jlewi): We should consider running a cronjob to GC namespaces.
+  // But if we leave namespaces up; then we end up leaving the servers up which
+  // uses up CPU.
+  //
+  deleteNamespace: true,
+};
+
+local params = defaultParams + overrides;
+
+local prowEnv = util.parseEnv(params.prow_env);
+
+// Create a dictionary of the different prow variables so we can refer to them in the workflow.
+//
+// Important: We want to initialize all variables we reference to some value. If we don't
+// and we reference a variable which doesn't get set then we get very hard to debug failure messages.
+// In particular, we've seen problems where if we add a new environment and evaluate one component eg. "workflows"
+// and another component e.g "code_search.jsonnet" doesn't have a default value for BUILD_ID then ksonnet
+// fails because BUILD_ID is undefined.
+local prowDict = {
+	BUILD_ID: "notset",
+	BUILD_NUMBER: "notset",
+	REPO_OWNER: "notset",
+	REPO_NAME: "notset",
+	JOB_NAME: "notset",
+	JOB_TYPE: "notset",
+	PULL_NUMBER: "notset",
+	PULL_BASE_SHA: "notset",
+ } + util.listOfDictToMap(prowEnv);
+
+local bucket = params.bucket;
+
+// mountPath is the directory where the volume to store the test data
+// should be mounted.
+local mountPath = "/mnt/" + "test-data-volume";
+// testDir is the root directory for all data for a particular test run.
+local testDir = mountPath + "/" + params.name;
+// outputDir is the directory to sync to GCS to contain the output for this job.
+local outputDir = testDir + "/output";
+local artifactsDir = outputDir + "/artifacts";
+
+// Source directory where all repos should be checked out
+local srcRootDir = testDir + "/src";
+
+// The directory containing the kubeflow/kubeflow repo
+local srcDir = srcRootDir + "/" + prowDict.REPO_OWNER + "/" + prowDict.REPO_NAME;
+
+// These variables control where the docker images get pushed and what
+// tag to use
+local executeImage = "gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0";
+
+// value of KUBECONFIG environment variable. This should be  a full path.
+local kubeConfig = testDir + "/.kube/kubeconfig";
+
+// Namespace where tests should run
+local testNamespace = "xgboost-synthetic-" + prowDict["BUILD_ID"];
+
+// The directory within the kubeflow_testing submodule containing
+// py scripts to use.
+local kubeflowTestingPy = srcRootDir + "/kubeflow/testing/py";
+
+// Workflow template is the name of the workflow template; typically the name of the ks component.
+// This is used as a label to make it easy to identify all Argo workflows created from a given
+// template.
+local workflow_template = "xgboost_synthetic";
+
+// Build template is a template for constructing Argo step templates.
+//
+// step_name: Name for the template
+// command: List to pass as the container command.
+//
+// We customize the defaults for each step in the workflow by modifying
+// buildTemplate.argoTemplate
+local buildTemplate = {
+  // name & command variables should be overwritten for every test.
+  // Other variables can be changed per step as needed.
+  // They are hidden because they shouldn't be included in the Argo template
+  name: "",
+  command:: "",
+  image: params.stepImage,
+  workingDir:: null,
+  env_vars:: [],
+  side_cars: [],
+  pythonPath: kubeflowTestingPy,
+
+  activeDeadlineSeconds: 1800,  // Set 30 minute timeout for each template
+
+  local template = self,
+
+  // Actual template for Argo
+  argoTemplate: {
+    name: template.name,
+    metadata: {
+      labels: prowDict + {
+        workflow: params.name,
+        workflow_template: workflow_template,
+        step_name: template.name,
+      },
+    },
+    container: {
+      command: template.command,
+      name: template.name,
+      image: template.image,
+      workingDir: template.workingDir,
+      env: [
+        {
+          // Add the source directories to the python path.
+          name: "PYTHONPATH",
+          value: template.pythonPath,
+        },
+        {
+          name: "GOOGLE_APPLICATION_CREDENTIALS",
+          value: "/secret/gcp-credentials/key.json",
+        },
+        {
+          name: "GITHUB_TOKEN",
+          valueFrom: {
+            secretKeyRef: {
+              name: "github-token",
+              key: "github_token",
+            },
+          },
+        },
+        {
+          // We use a directory in our NFS share to store our kube config.
+          // This way we can configure it on a single step and reuse it on subsequent steps.
+          name: "KUBECONFIG",
+          value: kubeConfig,
+        },
+      ] + prowEnv + template.env_vars,
+      volumeMounts: [
+        {
+          name: params.dataVolume,
+          mountPath: mountPath,
+        },
+        {
+          name: "github-token",
+          mountPath: "/secret/github-token",
+        },
+        {
+          name: "gcp-credentials",
+          mountPath: "/secret/gcp-credentials",
+        },
+      ],
+    },
+  },
+};  // buildTemplate
+
+
+// Create a list of dictionary.
+// Each item is a dictionary describing one step in the graph.
+local dagTemplates = [
+  {
+    template: buildTemplate {
+      name: "checkout",
+      command:
+        ["/usr/local/bin/checkout.sh", srcRootDir],
+
+      env_vars: [{
+        name: "EXTRA_REPOS",
+        // TODO(jlewi): Pin to commit on master when #281 is checked in.
+        value: "kubeflow/testing@HEAD:281",
+      }],
+    },
+    dependencies: null,
+  },  // checkout
+  {
+    // TODO(https://github.com/kubeflow/testing/issues/257): Create-pr-symlink
+    // should be done by run_e2e_workflow.py
+    template: buildTemplate {
+      name: "create-pr-symlink",
+      command: [
+        "python",
+        "-m",
+        "kubeflow.testing.prow_artifacts",
+        "--artifacts_dir=" + outputDir,
+        "create_pr_symlink",
+        "--bucket=" + params.bucket,
+      ],
+    },  // create-pr-symlink
+    dependencies: ["checkout"],
+  },  // create-pr-symlink
+  {
+    // Configure KUBECONFIG
+    template: buildTemplate {
+      name: "get-kubeconfig",
+      command: util.buildCommand([
+      [
+        "gcloud",
+        "auth",
+        "activate-service-account",
+        "--key-file=${GOOGLE_APPLICATION_CREDENTIALS}",
+      ],
+      [
+        "gcloud",
+        "--project=" + params.kfProject,
+        "container",
+        "clusters",
+        "get-credentials",
+        "--zone=" + params.kfZone,
+        params.kfCluster,
+      ]]
+      ),
+    },
+    dependencies: ["checkout"],
+  }, // get-kubeconfig
+  {
+    // Create the namespace
+    // TODO(jlewi): We should add some sort of retry.
+    template: buildTemplate {
+      name: "create-namespace",
+      command: util.buildCommand([
+      [
+        "echo",
+        "KUBECONFIG=",
+        "${KUBECONFIG}",
+      ],
+      [
+        "gcloud",
+        "auth",
+        "activate-service-account",
+        "--key-file=${GOOGLE_APPLICATION_CREDENTIALS}",
+      ],
+      [
+        "kubectl",
+        "config" ,
+        "current-context",
+      ],
+      [
+        "kubectl",
+        "create",
+        "namespace",
+        testNamespace,
+      ],
+      # Copy the GCP secret from the kubeflow namespace to the test namespace
+      [
+        srcDir + "/test/copy_secret.sh",
+        "kubeflow",
+        testNamespace,
+        "user-gcp-sa",
+      ]]
+      ),
+    },
+    dependencies: ["get-kubeconfig"],
+  }, // create-namespace
+  {
+    template: buildTemplate {
+      name: "execute-notebook",
+      command: [
+        "python3",
+        "xgboost_test.py",
+        "--name=" + "xgboost-test-" + prowDict["BUILD_ID"],
+        "--namespace=" + testNamespace,
+        "--image=" + executeImage,
+        "--jobType=" + prowDict["JOB_TYPE"],
+        "--pullNumber=" + prowDict["PULL_NUMBER"],
+        "--pullBaseSHA=" + prowDict["PULL_BASE_SHA"],
+        "--cluster=" + params.kfCluster,
+      ],
+      pythonPath: kubeflowTestingPy,
+      workingDir: srcDir + "/xgboost_synthetic/testing",
+    },
+    dependencies: ["create-namespace"],
+  },  // execute-notebook
+];
+
+// Dag defines the tasks in the graph
+local dag = {
+  name: "e2e",
+  // Construct tasks from the templates
+  // we will give the steps the same name as the template
+  dag: {
+    tasks: util.toArgoTaskList(dagTemplates),
+  },
+};  // dag
+
+// Define templates for the steps to be performed when the
+// test exits
+
+local deleteTemplates = if params.deleteNamespace then
+ [
+    {
+      // Delete the namespace
+      // TODO(jlewi): We should add some sort of retry.
+      template: buildTemplate {
+        name: "delete-namespace",
+        command: util.buildCommand([
+        [
+          "gcloud",
+          "auth",
+          "activate-service-account",
+          "--key-file=${GOOGLE_APPLICATION_CREDENTIALS}",
+        ],
+        [
+          "kubectl",
+          "delete",
+          "namespace",
+          testNamespace,
+        ]]
+        ),
+      },
+    }, // delete-namespace
+  ] else [];
+
+local exitTemplates =
+  deleteTemplates +
+  [
+    {
+      // Copy artifacts to GCS for gubernator.
+      // TODO(https://github.com/kubeflow/testing/issues/257): Create-pr-symlink
+      // should be done by run_e2e_workflow.py
+      template: buildTemplate {
+        name: "copy-artifacts",
+        command: [
+          "python",
+          "-m",
+          "kubeflow.testing.prow_artifacts",
+          "--artifacts_dir=" + outputDir,
+          "copy_artifacts",
+          "--bucket=" + bucket,
+        ],
+      },  // copy-artifacts,
+    },
+    {
+      // Delete the test directory in NFS.
+      // TODO(https://github.com/kubeflow/testing/issues/256): Use an external process to do this.
+      template:
+        buildTemplate {
+          name: "test-dir-delete",
+          command: [           
+            "rm",
+            "-rf",
+            testDir,
+          ],
+
+          argoTemplate+: {
+        	  retryStrategy: {
+        	  	limit: 3,
+        	  },
+          },
+        },  // test-dir-delete
+      dependencies: ["copy-artifacts"] + if params.deleteNamespace then ["delete-namespace"] else [],
+    },
+  ];
+
+// Create a DAG representing the set of steps to execute on exit
+local exitDag = {
+  name: "exit-handler",
+  // Construct tasks from the templates
+  // we will give the steps the same name as the template
+  dag: {    
+    tasks: util.toArgoTaskList(exitTemplates),    
+  },
+};
+
+// A list of templates for the actual steps
+local stepTemplates = std.map(function(i) i.template.argoTemplate
+                              , dagTemplates) +
+                      std.map(function(i) i.template.argoTemplate
+                              , exitTemplates);
+
+// Define the Argo Workflow.
+local workflow = {
+  apiVersion: "argoproj.io/v1alpha1",
+  kind: "Workflow",
+  metadata: {
+    name: params.name,
+    namespace: env.namespace,
+    labels: prowDict + {
+        workflow: params.name,
+        workflow_template: workflow_template,
+    },
+  },
+  spec: {
+    entrypoint: "e2e",
+    // Have argo garbage collect old workflows otherwise we overload the API server.
+    ttlSecondsAfterFinished: 7 * 24 * 60 * 60,
+    volumes: [
+      {
+        name: "github-token",
+        secret: {
+          secretName: "github-token",
+        },
+      },
+      {
+        name: "gcp-credentials",
+        secret: {
+          secretName: "kubeflow-testing-credentials",
+        },
+      },
+      {
+        name: params.dataVolume,
+        persistentVolumeClaim: {
+          claimName: params.nfsVolumeClaim,
+        },
+      },
+    ],  // volumes
+
+    // onExit specifies the template that should always run when the workflow completes.
+    onExit: "exit-handler",
+
+    // The templates will be a combination of the templates
+    // defining the dags executed by Argo as well as the templates
+    // for the individual steps.
+    templates: [dag, exitDag] + stepTemplates,  // templates
+  },  // spec
+};  // workflow
+
+std.prune(k.core.v1.list.new([workflow]))
--- a/xgboost_synthetic/Makefile
+++ b/xgboost_synthetic/Makefile
@ -44,7 +44,7 @@ build: build-dir

 build-gcb: build-dir
 	gcloud builds submit --machine-type=n1-highcpu-32 --project=$(PROJECT) --tag=$(IMG):$(TAG) \
-		--timeout=3600 ./build
+		--timeout=3600 ./.build
 	@echo Built $(IMG):$(TAG)

 # Build but don't attach the latest tag. This allows manual testing/inspection of the image
--- a/xgboost_synthetic/build-train-deploy.ipynb
+++ b/xgboost_synthetic/build-train-deploy.ipynb
@ -692,7 +692,7 @@
   ],
   "source": [
    "from fairing.builders import cluster\n",
-    "preprocessor = ConvertNotebookPreprocessorWithFire(\"ModelServe\")\n",
+    "preprocessor = ConvertNotebookPreprocessorWithFire(class_name='ModelServe', notebook_file='build-train-deploy.ipynb')\n",
    "\n",
    "if not preprocessor.input_files:\n",
    "    preprocessor.input_files = set()\n",
--- a/xgboost_synthetic/requirements.txt
+++ b/xgboost_synthetic/requirements.txt
@ -10,3 +10,4 @@ retrying
 seldon-core
 sklearn
 xgboost
+tornado>=6.0.3
--- a/xgboost_synthetic/testing/execute_notebook.py
+++ b/xgboost_synthetic/testing/execute_notebook.py
@ -0,0 +1,43 @@
+import tempfile
+import logging
+import os
+import subprocess
+
+
+logger = logging.getLogger(__name__)
+
+def prepare_env():
+  subprocess.check_call(["pip3", "install", "-U", "papermill"])
+  subprocess.check_call(["pip3", "install", "-r", "../requirements.txt"])
+
+
+def execute_notebook(notebook_path, parameters=None):
+  temp_dir = tempfile.mkdtemp()
+  notebook_output_path = os.path.join(temp_dir, "out.ipynb")
+  papermill.execute_notebook(notebook_path, notebook_output_path,
+                             cwd=os.path.dirname(notebook_path),
+                             parameters=parameters,
+                             log_output=True)
+  return notebook_output_path
+
+def run_notebook_test(notebook_path, expected_messages, parameters=None):
+  output_path = execute_notebook(notebook_path, parameters=parameters)
+  actual_output = open(output_path, 'r').read()
+  for expected_message in expected_messages:
+    if not expected_message in actual_output:
+      logger.error(actual_output)
+      assert False, "Unable to find from output: " + expected_message
+
+if __name__ == "__main__":
+  prepare_env()
+  import papermill #pylint: disable=import-error
+  FILE_DIR = os.path.dirname(__file__)
+  NOTEBOOK_REL_PATH = "../build-train-deploy.ipynb"
+  NOTEBOOK_ABS_PATH = os.path.normpath(os.path.join(FILE_DIR, NOTEBOOK_REL_PATH))
+  EXPECTED_MGS = [
+      "Finished upload of",
+      "Model export success: mockup-model.dat",
+      "Pod started running True",
+      "Cluster endpoint: http:",
+  ]
+  run_notebook_test(NOTEBOOK_ABS_PATH, EXPECTED_MGS)
--- a/xgboost_synthetic/testing/job.yaml
+++ b/xgboost_synthetic/testing/job.yaml
@ -0,0 +1,51 @@
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: $(job_name)
+spec:
+  template:
+    metadata:
+      labels:
+        app: xgboost-synthetics-testing
+    spec:
+      restartPolicy: Never
+      securityContext:
+        runAsUser: 0
+      initContainers:
+      # This init container checks out the source code.
+      - command:
+        - /usr/local/bin/checkout_repos.sh
+        - --repos=kubeflow/examples@$(CHECK_TAG)
+        - --src_dir=/src
+        - --depth=all
+        name: checkout
+        image: gcr.io/kubeflow-ci/test-worker:v20190802-c6f9140-e3b0c4
+        volumeMounts:
+        - mountPath: /src
+          name: src
+        env:
+        - name: CHECK_TAG
+          valueFrom:
+            configMapKeyRef:
+              name: xgb-notebooks-tests
+              key: checkTag
+      containers:
+      - name: executing-notebooks
+        image: execute-image
+        command: ["python3", "execute_notebook.py"]
+        workingDir: /src/kubeflow/examples/xgboost_synthetic/testing
+        volumeMounts:
+        - mountPath: /var/secrets
+          name: user-gcp-sa
+          readOnly: true
+        - mountPath: /src
+          name: src
+        env:
+        - name: GOOGLE_APPLICATION_CREDENTIALS
+          value: /var/secrets/user-gcp-sa.json
+      volumes:
+      - name: user-gcp-sa
+        secret:
+          secretName: user-gcp-sa
+      - name: src
+        emptyDir: {}
--- a/xgboost_synthetic/testing/kustomization.yaml
+++ b/xgboost_synthetic/testing/kustomization.yaml
@ -0,0 +1,28 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+namespace: kubeflow
+
+generatorOptions:
+  disableNameSuffixHash: true
+
+resources:
+- job.yaml
+- role.yaml
+- rolebinding.yaml
+
+configurations:
+- params.yaml
+
+configMapGenerator:
+- name: xgb-notebooks-tests
+  literals:
+
+vars:
+- fieldref:
+    fieldPath: data.name
+  name: job_name
+  objref:
+    apiVersion: v1
+    kind: ConfigMap
+    name: xgb-notebooks-tests
--- a/xgboost_synthetic/testing/params.yaml
+++ b/xgboost_synthetic/testing/params.yaml
@ -0,0 +1,3 @@
+varReference:
+- path: metadata/name
+  kind: Job
--- a/xgboost_synthetic/testing/role.yaml
+++ b/xgboost_synthetic/testing/role.yaml
@ -0,0 +1,37 @@
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  labels:
+    app: xgboost-synthetics-testing
+  name: xgboost-synthetics-testing-role
+rules:
+- apiGroups:
+  - ""
+  resources:
+  - pods
+  - pods/log
+  - secrets
+  - services
+  verbs:
+  - '*'
+- apiGroups:
+  - ""
+  - apps
+  - extensions
+  resources:
+  - deployments
+  - replicasets
+  verbs:
+  - '*'
+- apiGroups:
+  - kubeflow.org
+  resources:
+  - '*'
+  verbs:
+  - '*'
+- apiGroups:
+  - batch
+  resources:
+  - jobs
+  verbs:
+  - '*'
--- a/xgboost_synthetic/testing/rolebinding.yaml
+++ b/xgboost_synthetic/testing/rolebinding.yaml
@ -0,0 +1,14 @@
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  labels:
+    app: xgboost-synthetics-testing
+  name: xgboost-synthetics-testing-role-binding
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: xgboost-synthetics-testing-role
+subjects:
+- kind: Group
+  name: system:serviceaccounts
+  apiGroup: rbac.authorization.k8s.io
--- a/xgboost_synthetic/testing/xgboost_test.py
+++ b/xgboost_synthetic/testing/xgboost_test.py
@ -0,0 +1,100 @@
+import argparse
+import logging
+import os
+import subprocess
+import time
+
+from kubeflow.testing import util
+
+def create_job(args, app_dir): #pylint:disable=redefined-outer-name
+  '''Generate Job and summit.'''
+  util.run(['gcloud', 'auth', 'activate-service-account',
+          "--key-file=/secret/gcp-credentials/key.json"], cwd=app_dir)
+  util.run(['gcloud', '--project=kubeflow-ci-deployment', 'container',
+          "clusters", "get-credentials", "--zone=us-east1-b", args.cluster], cwd=app_dir)
+
+  configmap = 'xgb-notebooks-tests'
+  util.run(['kustomize', 'edit', 'add', 'configmap', configmap,
+          '--from-literal=name=' + args.name], cwd=app_dir)
+  # For presubmit, set the checkout tag as HEAD:$(PULL_NUMBER), others set to PULL_BASE_SHA
+  if args.jobType == 'presubmit':
+    util.run(['kustomize', 'edit', 'add', 'configmap', configmap,
+            '--from-literal=checkTag=HEAD:' + args.pullNumber], cwd=app_dir)
+  else:
+    util.run(['kustomize', 'edit', 'add', 'configmap', configmap,
+            '--from-literal=checkTag=' + args.pullBaseSHA], cwd=app_dir)
+  util.run(['kustomize', 'edit', 'set', 'namespace', args.namespace], cwd=app_dir)
+  util.run(['kustomize', 'edit', 'set', 'image', 'execute-image=' + args.image], cwd=app_dir)
+  util.run(['kustomize', 'build', app_dir, '-o', 'generated.yaml'], cwd=app_dir)
+  util.run(['kubectl', 'apply', '-f', 'generated.yaml'], cwd=app_dir)
+  logging.info("Created job %s in namespaces %s", args.name, args.namespace)
+
+def get_pod_logs(name, namespace, app_dir): #pylint:disable=redefined-outer-name
+  '''Cannot get logs by k8s python api, using kubectl command to get logs.'''
+  logging.info("Getting pod %s logs...", name)
+  util.run(['kubectl', 'logs', name, '-n', namespace], cwd=app_dir)
+
+def check_job_status(namespace, app_dir): #pylint:disable=redefined-outer-name
+  '''Cannot get job by k8s python api, using kubectl command to check job status.'''
+  is_successed = False
+  pod_info, pod_name, pod_status = '', '', ''
+  for _ in range(0, 30):
+    time.sleep(60)
+    subCmd = "kubectl get pod -n " + namespace + " | grep -m1 xgboost-test"
+    pod_info = subprocess.run(subCmd,
+                              stdout=subprocess.PIPE,
+                              stderr=subprocess.PIPE,
+                              shell=True,
+                              universal_newlines=True)
+    if len(str(pod_info.stdout).split()) >= 2:
+      pod_name = str(pod_info.stdout).split()[0]
+      pod_status = str(pod_info.stdout).split()[2]
+
+    if pod_name:
+      if pod_status == "Pending":
+        logging.info("Pod %s is Pending.", pod_name)
+      elif pod_status == "Running":
+        logging.info("Pod %s is Running.", pod_name)
+      elif pod_status == "Completed":
+        logging.info("Pod %s is Completed.", pod_name)
+        get_pod_logs(pod_name, namespace, app_dir)
+        is_successed = True
+        break
+      elif pod_status == "Error":
+        get_pod_logs(pod_name, namespace, app_dir)
+        raise RuntimeError("Failed to execute notebook.")
+      else:
+        logging.warning("Pod %s status %s.", pod_name, pod_status)
+    else:
+      logging.warning("Cannot get the pod name, retry after 60 seconds.")
+
+  if not is_successed:
+    raise RuntimeError("Timeout to get the executing notebook pod after 30 munites.")
+
+
+if __name__ == "__main__":
+
+  logging.basicConfig(level=logging.INFO)
+
+  parser = argparse.ArgumentParser()
+  parser.add_argument(
+    "--name", help="deploy application name", type=str, required=True)
+  parser.add_argument(
+    "--namespace", help="The namespace for the application", type=str, required=True)
+  parser.add_argument(
+    "--image", help="Image name for the application", type=str, required=True)
+  parser.add_argument(
+    "--pullNumber", help="The PR number", type=str, required=True)
+  parser.add_argument(
+    "--pullBaseSHA", help="The pull base SHA", type=str, required=True)
+  parser.add_argument(
+    "--jobType", help="The job type such as presubmit or postsubmit", type=str, required=True)
+  parser.add_argument(
+    "--cluster", help="The cluster which the applition running in", type=str, required=True)
+
+  app_dir = os.path.dirname(__file__)
+  app_dir = os.path.abspath(app_dir)
+
+  args = parser.parse_args()
+  create_job(args, app_dir)
+  check_job_status(args.namespace, app_dir)