chore(components): Fix typos in Great Expectations component (#5326)

* Components - Added Great Expectations component for dataset validation

* Apply linter

* Remove unnecessary component from a sample pipeline for the Great Expectations component

* Use smart_open dependency in Great Expectations component

This change allows to use GE Expectation Suites located on S3 and other locations.

* Update docs for Great Expectations component

* Rollback changes for GE component
This commit is contained in:
Anton Kiselev 2021-04-04 05:58:21 +03:00 committed by GitHub
parent d95764b756
commit 43507d244d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 19 additions and 27 deletions

View File

@ -6,16 +6,15 @@ def validate_csv_using_greatexpectations(
expectation_suite_path: InputPath(),
data_doc_path: OutputPath(),
):
"""Validate a CSV dataset against a Great Expectations suite and create
Data Doc (a validation report). This component fails if validation is not
successful.
"""Validate a CSV dataset against a Great Expectations suite and create Data Doc (a validation report).
This component fails if validation is not successful.
Annotations:
authors: Yaroslav Beshta <ybeshta@provectus.com>, Anton Kiselev <akiselev@provectus.com>
Args:
csv_path: Path to the CSV file with the dataset.
expectation_suite_path: Path to Great Expectations expectation suite (in JSON format)
expectation_suite_path: Path to Great Expectations expectation suite (in JSON format).
"""
import json
import os
@ -42,7 +41,7 @@ def validate_csv_using_greatexpectations(
if __name__ == '__main__':
calculate_regression_metrics_from_csv_op = create_component_from_func(
validate_csv_using_greatexpectations_op = create_component_from_func(
validate_csv_using_greatexpectations,
output_component_file='component.yaml',
base_image='python:3.8',

View File

@ -1,17 +1,10 @@
name: Validate csv using greatexpectations
description: |-
Validate a CSV dataset against a Great Expectations suite and create Data Doc (a vaidation report).
This component fails if validation is not successful.
Annotations:
authors: Yaroslav Beshta <ybeshta@provectus.com>, Anton Kiselev <akiselev@provectus.com>
Args:
csv_path: Path to the CSV file with the dataset.
expectation_suite_path: Path to Great Expectations expectation suite (in JSON format)
description: Validate a CSV dataset against a Great Expectations suite and create
Data Doc (a validation report).
inputs:
- {name: csv}
- {name: expectation_suite}
- {name: csv, description: Path to the CSV file with the dataset.}
- {name: expectation_suite, description: Path to Great Expectations expectation suite
(in JSON format).}
outputs:
- {name: data_doc}
implementation:
@ -24,9 +17,12 @@ implementation:
'great-expectations==0.13.11' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m
pip install --quiet --no-warn-script-location 'great-expectations==0.13.11'
--user) && "$0" "$@"
- python3
- -u
- -c
- sh
- -ec
- |
program_path=$(mktemp)
printf "%s" "$0" > "$program_path"
python3 -u "$program_path" "$@"
- |
def _make_parent_dirs_and_return_path(file_path: str):
import os
@ -38,7 +34,7 @@ implementation:
expectation_suite_path,
data_doc_path,
):
"""Validate a CSV dataset against a Great Expectations suite and create Data Doc (a vaidation report).
"""Validate a CSV dataset against a Great Expectations suite and create Data Doc (a validation report).
This component fails if validation is not successful.
Annotations:
@ -46,7 +42,7 @@ implementation:
Args:
csv_path: Path to the CSV file with the dataset.
expectation_suite_path: Path to Great Expectations expectation suite (in JSON format)
expectation_suite_path: Path to Great Expectations expectation suite (in JSON format).
"""
import json
import os
@ -58,10 +54,7 @@ implementation:
with open(expectation_suite_path, 'r') as json_file:
expectation_suite = json.load(json_file)
df = ge.read_csv(
csv_path,
expectation_suite=expectation_suite
)
df = ge.read_csv(csv_path, expectation_suite=expectation_suite)
result = df.validate()
document_model = ValidationResultsPageRenderer().render(result)
@ -75,7 +68,7 @@ implementation:
sys.exit(1)
import argparse
_parser = argparse.ArgumentParser(prog='Validate csv using greatexpectations', description='Validate a CSV dataset against a Great Expectations suite and create Data Doc (a vaidation report).\n This component fails if validation is not successful.\n\n Annotations:\n authors: Yaroslav Beshta <ybeshta@provectus.com>, Anton Kiselev <akiselev@provectus.com>\n\n Args:\n csv_path: Path to the CSV file with the dataset.\n expectation_suite_path: Path to Great Expectations expectation suite (in JSON format)')
_parser = argparse.ArgumentParser(prog='Validate csv using greatexpectations', description='Validate a CSV dataset against a Great Expectations suite and create Data Doc (a validation report).')
_parser.add_argument("--csv", dest="csv_path", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--expectation-suite", dest="expectation_suite_path", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--data-doc", dest="data_doc_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)