111 lines
3.2 KiB
YAML
111 lines
3.2 KiB
YAML
# Copyright 2018 Google LLC
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
apiVersion: argoproj.io/v1alpha1
|
|
kind: Workflow
|
|
metadata:
|
|
annotations:
|
|
pipelines.kubeflow.org/pipeline_spec: '{"description": "Download and Get Most Frequent Word and Save to GCS", "inputs": [{"name": "url"}, {"name": "outputpath"}], "name": "Download and Save Most Frequent"}'
|
|
generateName: download-and-save-most-frequent-
|
|
spec:
|
|
arguments:
|
|
parameters:
|
|
- name: url
|
|
- name: outputpath
|
|
entrypoint: download-and-save-most-frequent
|
|
serviceAccountName: pipeline-runner
|
|
templates:
|
|
- container:
|
|
args:
|
|
- gsutil cat {{inputs.parameters.url}} | tee /tmp/results.txt
|
|
command:
|
|
- sh
|
|
- -c
|
|
image: google/cloud-sdk
|
|
inputs:
|
|
parameters:
|
|
- name: url
|
|
name: download
|
|
outputs:
|
|
artifacts:
|
|
- name: download-downloaded
|
|
path: /tmp/results.txt
|
|
parameters:
|
|
- name: download-downloaded
|
|
valueFrom:
|
|
path: /tmp/results.txt
|
|
- dag:
|
|
tasks:
|
|
- arguments:
|
|
parameters:
|
|
- name: url
|
|
value: '{{inputs.parameters.url}}'
|
|
name: download
|
|
template: download
|
|
- arguments:
|
|
parameters:
|
|
- name: download-downloaded
|
|
value: '{{tasks.download.outputs.parameters.download-downloaded}}'
|
|
dependencies:
|
|
- download
|
|
name: get-frequent
|
|
template: get-frequent
|
|
- arguments:
|
|
parameters:
|
|
- name: get-frequent-word
|
|
value: '{{tasks.get-frequent.outputs.parameters.get-frequent-word}}'
|
|
- name: outputpath
|
|
value: '{{inputs.parameters.outputpath}}'
|
|
dependencies:
|
|
- get-frequent
|
|
name: save
|
|
template: save
|
|
inputs:
|
|
parameters:
|
|
- name: outputpath
|
|
- name: url
|
|
name: download-and-save-most-frequent
|
|
- container:
|
|
args:
|
|
- python -c "from collections import Counter; words = Counter('{{inputs.parameters.download-downloaded}}'.split());
|
|
print(max(words, key=words.get))" | tee /tmp/message.txt
|
|
command:
|
|
- sh
|
|
- -c
|
|
image: python:3.5-jessie
|
|
inputs:
|
|
parameters:
|
|
- name: download-downloaded
|
|
name: get-frequent
|
|
outputs:
|
|
artifacts:
|
|
- name: get-frequent-word
|
|
path: /tmp/message.txt
|
|
parameters:
|
|
- name: get-frequent-word
|
|
valueFrom:
|
|
path: /tmp/message.txt
|
|
- container:
|
|
args:
|
|
- echo {{inputs.parameters.get-frequent-word}} | tee /tmp/results.txt | gsutil
|
|
cp /tmp/results.txt {{inputs.parameters.outputpath}}
|
|
command:
|
|
- sh
|
|
- -c
|
|
image: google/cloud-sdk
|
|
inputs:
|
|
parameters:
|
|
- name: get-frequent-word
|
|
- name: outputpath
|
|
name: save
|