Upgrade E2E tests for tkn CLI version 0.11 (#246)

* Now including logs for 'finally' tasks
* Add env var for SLEEP_BETWEEN_TEST_PHASES
* Add 'retry.yaml' to ignored tests
* Make node_selector more general
* Use pipelinerun status.conditions[].reason instead of .type
* Treat status "Completed" as "Succeeded"
This commit is contained in:
Christian Kadner 2020-08-04 13:26:22 -07:00 committed by GitHub
parent b28c2cbca6
commit c37c600486
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 92 additions and 50 deletions

View File

@ -19,8 +19,8 @@ VENV ?= .venv
export VIRTUAL_ENV := $(abspath ${VENV})
export PATH := ${VIRTUAL_ENV}/bin:${PATH}
TKN_PIPELINE_VERSION ?= "0.14"
TKN_CLIENT_VERSION ?= "0.10"
TKN_PIPELINE_VERSION ?= "0.14."
TKN_CLIENT_VERSION ?= "0.11."
.PHONY: help
help: ## Display the Make targets
@ -30,6 +30,7 @@ help: ## Display the Make targets
.PHONY: venv
venv: $(VENV)/bin/activate ## Create and activate virtual environment
$(VENV)/bin/activate: sdk/python/setup.py
@echo "VENV=$(VENV)"
@test -d $(VENV) || python3 -m venv $(VENV)
pip install -e sdk/python
@touch $(VENV)/bin/activate
@ -40,15 +41,23 @@ install: venv ## Install the kfp_tekton compiler in a virtual environment
.PHONY: unit_test
unit_test: venv ## Run compiler unit tests
@echo "=================================================================="
@echo "Optional environment variables to configure $@, examples:"
@sed -n -e 's/# *\(make $@ .*\)/ \1/p' sdk/python/tests/compiler/compiler_tests.py
@echo "=================================================================="
@sdk/python/tests/run_tests.sh
@echo "$@: OK"
.PHONY: e2e_test
e2e_test: venv ## Run compiler end-to-end tests (requires kubectl and tkn CLI)
@which kubectl || (echo "Missing kubectl CLI" && exit 1)
@test -z "${KUBECONFIG}" && echo "KUBECONFIG not set" && exit 1 || echo "${KUBECONFIG}"
@echo "=================================================================="
@echo "Optional environment variables to configure $@, examples:"
@sed -n -e 's/# *\(make $@ .*\)/ \1/p' sdk/python/tests/compiler/compiler_tests_e2e.py
@echo "=================================================================="
@which kubectl > /dev/null || (echo "Missing kubectl CLI" && exit 1)
@test -z "${KUBECONFIG}" && echo "KUBECONFIG not set" && exit 1 || echo "KUBECONFIG: ${KUBECONFIG}"
@kubectl version --short || (echo "Failed to access kubernetes cluster" && exit 1)
@which tkn && tkn version || (echo "Missing tkn CLI" && exit 1)
@which tkn > /dev/null || (echo "Missing tkn CLI" && exit 1)
@tkn version | grep "Pipeline version: v$${TKN_PIPELINE_VERSION}" || (echo "Required Tekton Pipeline version: $${TKN_PIPELINE_VERSION}" && exit 1)
@tkn version | grep "Client version: $${TKN_CLIENT_VERSION}" || (echo "Required tkn CLI version: $${TKN_CLIENT_VERSION}" && exit 1)
@sdk/python/tests/run_e2e_tests.sh

View File

@ -51,7 +51,7 @@ SDK provides a `TektonCompiler` and a `TektonClient`:
- Python: `3.5.3` or later
- Tekton: [`0.14.0`](https://github.com/tektoncd/pipeline/releases/tag/v0.14.0)
- Tekton CLI: [`0.10.0`](https://github.com/tektoncd/cli/releases/tag/v0.10.0)
- Tekton CLI: [`0.11.0`](https://github.com/tektoncd/cli/releases/tag/v0.11.0)
- Kubeflow Pipelines: [KFP with Tekton backend](/tekton_kfp_guide.md)
Follow the instructions for [installing project prerequisites](/sdk/python/README.md#development-prerequisites)

View File

@ -31,9 +31,9 @@ from kfp_tekton import compiler
# temporarily set this flag to True in order to (re)generate new "golden" YAML
# files after making code changes that modify the expected YAML output.
# to (re)generate all "golden" YAML files from the command line run:
# GENERATE_GOLDEN_YAML=True sdk/python/tests/run_tests.sh
# GENERATE_GOLDEN_YAML=True sdk/python/tests/run_tests.sh
# or:
# make test GENERATE_GOLDEN_YAML=True
# make unit_test GENERATE_GOLDEN_YAML=True
GENERATE_GOLDEN_YAML = env.get("GENERATE_GOLDEN_YAML", "False") == "True"
if GENERATE_GOLDEN_YAML:

View File

@ -43,11 +43,11 @@ if env.get("TKN_PIPELINE_VERSION"):
logging.warning("The environment variable 'TKN_PIPELINE_VERSION' was set to '{}'"
.format(TKN_PIPELINE_VERSION))
# set or override th Tekton CLI version, default "0.10.x":
# TKN_CLIENT_VERSION=0.10 sdk/python/tests/run_e2e_tests.sh
# set or override th Tekton CLI version, default "0.11.x":
# TKN_CLIENT_VERSION=0.11 sdk/python/tests/run_e2e_tests.sh
# or:
# make e2e_test TKN_CLIENT_VERSION=0.10
TKN_CLIENT_VERSION = env.get("TKN_CLIENT_VERSION", "0.10.")
# make e2e_test TKN_CLIENT_VERSION=0.11
TKN_CLIENT_VERSION = env.get("TKN_CLIENT_VERSION", "0.11.")
# let the user know the expected Tekton CLI version
if env.get("TKN_CLIENT_VERSION"):
@ -115,6 +115,23 @@ if EXCLUDE_TESTS:
# KEEP_FAILED_PIPELINERUNS = env.get("KEEP_FAILED_PIPELINERUNS", "False") == "True"
# Set SLEEP_BETWEEN_TEST_PHASES=<seconds> (default: 5) to increase or decrease
# the sleep time between the test stages of starting a pipelinerun, then first
# attempting to get the pipelinerun status, and lastly to get the pipelinerun
# logs. Increase the sleep for under-powered Kubernetes clusters. The minimal
# recommended configuration for K8s clusters is 4 cores, 2 nodes, 16 GB RAM:
# SLEEP_BETWEEN_TEST_PHASES=10 sdk/python/tests/run_e2e_tests.sh
# or:
# make e2e_test SLEEP_BETWEEN_TEST_PHASES=10
SLEEP_BETWEEN_TEST_PHASES = int(env.get("SLEEP_BETWEEN_TEST_PHASES", "5"))
# let the user know this test run is not performing any verification
if env.get("SLEEP_BETWEEN_TEST_PHASES"):
logging.warning(
"The environment variable 'SLEEP_BETWEEN_TEST_PHASES' was set to '{}'. "
"Default is '5' seconds. Increasing this value should improve the test "
"success rate on a slow Kubernetes cluster.".format(SLEEP_BETWEEN_TEST_PHASES))
# set RERUN_FAILED_TESTS_ONLY=True, to only re-run those E2E tests that failed in
# the previous test run:
# RERUN_FAILED_TESTS_ONLY=True sdk/python/tests/run_e2e_tests.sh
@ -151,6 +168,7 @@ if RERUN_FAILED_TESTS_ONLY:
ignored_yaml_files = [
"big_data_passing.yaml", # does not complete in a reasonable time frame
"katib.yaml", # service account needs Katib permission, takes too long doing 9 trail runs
"retry.yaml", # designed to occasionally fail (randomly) if number of retries exceeded
"timeout.yaml", # random failure (by design) ... would need multiple golden log files to compare to
"tolerations.yaml", # designed to fail, test show only how to add the toleration to the pod
"volume.yaml", # need to rework the credentials part
@ -259,7 +277,7 @@ class TestCompilerE2E(unittest.TestCase):
run(del_cmd.split(), capture_output=True, timeout=10, check=False)
# TODO: find a better way than to sleep, but some PipelineRuns cannot
# be recreated right after the previous pipelineRun has been deleted
sleep(5)
sleep(SLEEP_BETWEEN_TEST_PHASES)
def _start_pipelinerun(self, yaml_file):
kube_cmd = "kubectl apply -f \"{}\" -n {}".format(yaml_file, namespace)
@ -268,12 +286,13 @@ class TestCompilerE2E(unittest.TestCase):
"Process returned non-zero exit code: {} -> {}".format(
kube_cmd, kube_proc.stderr))
# TODO: find a better way than to sleep, but some PipelineRuns take longer
# to be created and logs may not be available yet even with --follow
sleep(5)
# to be created and logs may not be available yet even with --follow or
# when attempting (and retrying) to get the pipelinerun status
sleep(SLEEP_BETWEEN_TEST_PHASES)
def _get_pipelinerun_status(self, name, retries: int = 10) -> str:
tkn_status_cmd = "tkn pipelinerun describe %s -n %s -o jsonpath=" \
"'{.status.conditions[0].type}'" % (name, namespace)
"'{.status.conditions[0].reason}'" % (name, namespace)
status = "Unknown"
for i in range(0, retries):
try:
@ -281,9 +300,9 @@ class TestCompilerE2E(unittest.TestCase):
timeout=10, check=False)
if tkn_status_proc.returncode == 0:
status = tkn_status_proc.stdout.decode("utf-8").strip("'")
if "Succeeded" in status or "Failed" in status:
if status in ["Succeeded", "Completed", "Failed"]:
return status
logging.warning("tkn pipeline '{}' {} ({}/{})".format(
logging.debug("tkn pipeline '{}' status: {} ({}/{})".format(
name, status, i + 1, retries))
else:
logging.error("Could not get pipelinerun status ({}/{}): {}".format(
@ -291,11 +310,11 @@ class TestCompilerE2E(unittest.TestCase):
except SubprocessError:
logging.exception("Error trying to get pipelinerun status ({}/{})".format(
i + 1, retries))
sleep(3)
sleep(SLEEP_BETWEEN_TEST_PHASES)
return status
def _get_pipelinerun_logs(self, name, timeout: int = 30) -> str:
sleep(10) # if we don't wait, we often only get logs of some pipeline tasks
sleep(SLEEP_BETWEEN_TEST_PHASES * 2) # if we don't wait, we often only get logs of some pipeline tasks
tkn_logs_cmd = "tkn pipelinerun logs {} -n {}".format(name, namespace)
tkn_logs_proc = run(tkn_logs_cmd.split(), capture_output=True, timeout=timeout, check=False)
self.assertEqual(tkn_logs_proc.returncode, 0,
@ -311,9 +330,11 @@ class TestCompilerE2E(unittest.TestCase):
try:
with open(golden_log_file, 'r') as f:
golden_log = f.read()
sanitized_golden_log = self._sanitize_log(golden_log)
sanitized_test_log = self._sanitize_log(test_log)
self.maxDiff = None
self.assertEqual(self._sanitize_log(golden_log),
self._sanitize_log(test_log),
self.assertEqual(sanitized_golden_log,
sanitized_test_log,
msg="PipelineRun '{}' did not produce the expected "
" log output: {}".format(name, golden_log_file))
except FileNotFoundError:
@ -344,24 +365,26 @@ class TestCompilerE2E(unittest.TestCase):
# server process receiving a termination signal
lines_to_remove = [
"Pipeline still running ...",
"Server is listening on",
"Unknown signal terminated",
r"Total: .+, Transferred: .+, Speed: .+",
r"localhost:.*GET / HTTP",
]
# replacements are used on multi-line strings, so use '...\n' as opposed to '...$' to denote end of line
# replacements are used on multi-line strings, so '...\n' will be matched by '...$'
replacements = [
(r"(-[-0-9a-z]{3}-[-0-9a-z]{5})(?=[ -/\]\"]|$)", r"-XXX-XXXXX"),
(r"uid:[0-9a-z]{8}(-[0-9a-z]{4}){3}-[0-9a-z]{12}",
"uid:{}-{}-{}-{}-{}".format("X" * 8, "X" * 4, "X" * 4, "X" * 4, "X" * 12)),
(r"resourceVersion:[0-9]+ ", "resourceVersion:-------- "),
(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z", "DATETIME"),
(r"{}".format("|".join(_MONTHNAMES)), "MONTH"),
(r"{}".format("|".join(_DAYNAMES)), "DAY"),
(r"\d", "-"),
(r" +$", ""),
(r" +\r", r"\n"),
(r"^$\n", ""),
(r"\n^$", ""),
(r" +\n", ""),
(r" +\r", ""),
]
sanitized_log = log
@ -385,7 +408,7 @@ class TestCompilerE2E(unittest.TestCase):
def _run_test__verify_pipelinerun_success(self, name):
status = self._get_pipelinerun_status(name)
self.assertEqual("Succeeded", status)
self.assertIn(status, ["Succeeded", "Completed"])
def _run_test__verify_pipelinerun_logs(self, name, log_file):
test_log = self._get_pipelinerun_logs(name)

View File

@ -37,7 +37,7 @@ def affinity_pipeline(
required_during_scheduling_ignored_during_execution=V1NodeSelector(
node_selector_terms=[V1NodeSelectorTerm(
match_expressions=[V1NodeSelectorRequirement(
key='beta.kubernetes.io/os',
key='kubernetes.io/os',
operator='In',
values=['linux'])])])))
echo_op().add_affinity(affinity)

View File

@ -43,7 +43,7 @@ spec:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: beta.kubernetes.io/os
- key: kubernetes.io/os
operator: In
values:
- linux

View File

@ -1,7 +1,8 @@
[get-frequent : main] flies
[save : main] Copying file:///tmp/results.txt...
[save : main] / [0 files][ 0.0 B/ 7.0 B]
/ [1 files][ 7.0 B/ 7.0 B]
[save : main] / [0 files][ 0.0 B/ 7.0 B] / [1 files][ 7.0 B/ 7.0 B]
[save : main] Operation completed over 1 objects/7.0 B.
[exiting : main] exit!

View File

@ -84,9 +84,7 @@ def save_most_frequent_word():
output_path=output_path_param)
saver.container.set_cpu_limit('0.5')
# saver.container.set_gpu_limit('2')
saver.add_node_selector_constraint(
'failure-domain.beta.kubernetes.io/region',
'us-south')
saver.add_node_selector_constraint('kubernetes.io/os', 'linux')
# saver.apply(gcp.use_tpu(tpu_cores=2, tpu_resource='v2', tf_version='1.12'))

View File

@ -96,4 +96,4 @@ spec:
- pipelineTaskName: save
taskPodTemplate:
nodeSelector:
failure-domain.beta.kubernetes.io/region: us-south
kubernetes.io/os: linux

View File

@ -4,7 +4,7 @@
[download : copy-artifacts] tar: removing leading '/' from member names
[download : copy-artifacts] tekton/results/downloaded
[download : copy-artifacts] `downloaded.tgz` -> `storage/mlpipeline/artifacts/download-and-save-most-frequent/download/downloaded.tgz`
[download : copy-artifacts] Total: 0 B, Transferred: 200 B, Speed: 14.79 KiB/s
[download : copy-artifacts] Total: 0 B, Transferred: 200 B, Speed: 14.17 KiB/s
[get-frequent : main] your
@ -12,5 +12,9 @@
[get-frequent : copy-artifacts] tar: removing leading '/' from member names
[get-frequent : copy-artifacts] tekton/results/word
[get-frequent : copy-artifacts] `word.tgz` -> `storage/mlpipeline/artifacts/download-and-save-most-frequent/get-frequent/word.tgz`
[get-frequent : copy-artifacts] Total: 0 B, Transferred: 116 B, Speed: 9.30 KiB/s
[get-frequent : copy-artifacts] Total: 0 B, Transferred: 117 B, Speed: 2.90 KiB/s
[save : main] Copying file:///tmp/results.txt...
[save : main] / [0 files][ 0.0 B/ 6.0 B] / [1 files][ 6.0 B/ 6.0 B]
[save : main] Operation completed over 1 objects/6.0 B.

View File

@ -4,8 +4,10 @@
[gcs-download : copy-artifacts] tar: removing leading '/' from member names
[gcs-download : copy-artifacts] tekton/results/data
[gcs-download : copy-artifacts] `data.tgz` -> `storage/mlpipeline/artifacts/exit-handler/gcs-download/data.tgz`
[gcs-download : copy-artifacts] Total: 0 B, Transferred: 195 B, Speed: 16.39 KiB/s
[gcs-download : copy-artifacts] Total: 0 B, Transferred: 195 B, Speed: 14.59 KiB/s
[echo-2 : main] With which he yoketh your rebellious necks Razeth your cities and subverts your towns And in a moment makes them desolate
[echo-2 : main]
[echo : main] exit!

View File

@ -32,8 +32,8 @@ def node_selector_pipeline(
):
"""A pipeline with Node Selector"""
echo_op().add_node_selector_constraint(
label_name='beta.kubernetes.io/instance-type',
value='b3c.4x16.encrypted')
label_name='kubernetes.io/os',
value='linux')
if __name__ == '__main__':

View File

@ -39,4 +39,4 @@ spec:
- pipelineTaskName: echo
taskPodTemplate:
nodeSelector:
beta.kubernetes.io/instance-type: b3c.4x16.encrypted
kubernetes.io/os: linux

View File

@ -4,12 +4,14 @@
[download : main] '/tekton/results/downloaded-resultoutput' saved
[download : copy-artifacts] Added `storage` successfully.
[download : copy-artifacts] tar: removing leading '/' from member names
[download : copy-artifacts] tekton/results/downloaded-resultoutput
[download : copy-artifacts] tar: removing leading '/' from member names
[download : copy-artifacts] `downloaded_resultOutput.tgz` -> `storage/mlpipeline/artifacts/pipelineparams/download/downloaded_resultOutput.tgz`
[download : copy-artifacts] Total: 0 B, Transferred: 136 B, Speed: 12.10 KiB/s
[download : copy-artifacts] Total: 0 B, Transferred: 138 B, Speed: 11.44 KiB/s
[download : sidecar-echo] 2020/07/13 10:38:44 Server is listening on :5678
[download : sidecar-echo] 2020/07/13 10:38:56 localhost:5678 127.0.0.1:55050 "GET / HTTP/1.1" 200 14 "Wget" 39.277µs
[download : sidecar-echo] 2020/07/13 10:38:59 [ERR] Unknown signal terminated
[download : sidecar-echo] 2020/07/31 09:29:31 Server is listening on :5678
[download : sidecar-echo] 2020/07/31 09:29:43 localhost:5678 127.0.0.1:54892 "GET / HTTP/1.1" 200 14 "Wget" 15.199µs
[download : sidecar-echo] 2020/07/31 09:29:45 [ERR] Unknown signal terminated
[echo : main] pipelineParams: hello world

View File

@ -21,7 +21,8 @@ def random_failure_op(exit_codes):
name='random_failure',
image='python:alpine3.6',
command=['python', '-c'],
arguments=['import random; import sys; exit_code = random.choice([int(i) for i in sys.argv[1].split(",")]); '
arguments=['import random; import sys; '
'exit_code = random.choice([int(i) for i in sys.argv[1].split(",")]); '
'print(exit_code); sys.exit(exit_code)', exit_codes]
)

View File

@ -7,9 +7,11 @@
[download : copy-artifacts] tar: removing leading '/' from member names
[download : copy-artifacts] tekton/results/downloaded
[download : copy-artifacts] `downloaded.tgz` -> `storage/mlpipeline/artifacts/sidecar/download/downloaded.tgz`
[download : copy-artifacts] Total: 0 B, Transferred: 128 B, Speed: 10.89 KiB/s
[download : copy-artifacts] Total: 0 B, Transferred: 130 B, Speed: 12.63 KiB/s
[download : sidecar-echo] 2020/07/13 10:40:23 Server is listening on :5678
[download : sidecar-echo] 2020/07/13 10:40:35 localhost:5678 127.0.0.1:38196 "GET / HTTP/1.1" 200 14 "Wget" 38.547µs
[download : sidecar-echo] 2020/07/13 10:40:37 [ERR] Unknown signal terminated
[download : sidecar-echo] 2020/07/31 09:31:18 Server is listening on :5678
[download : sidecar-echo] 2020/07/31 09:31:30 localhost:5678 127.0.0.1:38090 "GET / HTTP/1.1" 200 14 "Wget" 41.526µs
[download : sidecar-echo] 2020/07/31 09:31:33 [ERR] Unknown signal terminated
[echo : main] hello world