comparison example.yaml @ 0:9aafb378478e draft

Uploaded
author aafc-mbb
date Thu, 21 Apr 2016 17:14:00 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:9aafb378478e
1 #####################################################################################
2 # curate_csv_with_worms.yaml
3 #####################################################################################
4 #
5 # Clean data provided via standard input and print results to standard output:
6 # ka curate_csv_with_worms.yaml < ../data/five_records.csv
7 #
8 # Clean data in a named input file and print results to standard output:
9 # ka curate_csv_with_worms.yaml -p input=../data/five_records.csv
10 #
11 # Clean data in a named input file and save results to named output file:
12 # ka curate_csv_with_worms.yaml -p input=../data/five_records.csv -p output=output.csv
13 #
14 #####################################################################################
15
16 imports:
17
18 - classpath:/org/kurator/akka/types.yaml
19
20 components:
21
22 - id: ReadInput
23 type: PythonActor
24 properties:
25 code: |
26 import csv
27 import sys
28 def on_start(options):
29 filePath = options.get('inputFile')
30 f = open(filePath, 'r') if filePath is not None else sys.stdin
31 for record in csv.DictReader(f):
32 yield record
33
34 - id: CleanRecords
35 type: PythonClassActor
36 properties:
37 pythonClass: kurator_worms.record_curator.RecordCurator
38 onData: clean_record
39 parameters:
40 taxon_name_field : 'TaxonName'
41 author_field : 'Author'
42 original_taxon_name_field : 'OriginalTaxonName'
43 original_author_field : 'OriginalAuthor'
44 match_type_field : 'WoRMSMatchType'
45 lsid_field : 'LSID'
46 fuzzy_match_enabled : 'True'
47 listensTo:
48 - !ref ReadInput
49
50 - id: WriteOutput
51 type: PythonClassActor
52 properties:
53 pythonClass: CsvWriter
54 code: |
55 import csv
56 import os
57 import sys
58 class CsvWriter(object):
59 def on_start(self, options):
60 filePath = options.get('outputFile')
61 self.f = open(filePath, 'w') if filePath is not None else sys.stdout
62 self.dw = csv.DictWriter(self.f,
63 fieldnames=options['fieldnames'],
64 quotechar="'",
65 lineterminator=os.linesep)
66 self.dw.writeheader()
67 def on_data(self, record):
68 self.dw.writerow(record)
69 sys.stdout.flush()
70 parameters:
71 fieldnames: [ID,TaxonName,Author,OriginalTaxonName,OriginalAuthor,WoRMSMatchType,LSID]
72 listensTo:
73 - !ref CleanRecords
74
75 - id: ValidateNamesWithWoRMSWorkflow
76 type: Workflow
77 properties:
78 actors:
79 - !ref ReadInput
80 - !ref CleanRecords
81 - !ref WriteOutput
82 parameters:
83 input:
84 actor: !ref ReadInput
85 parameter: inputFile
86 output:
87 actor: !ref WriteOutput
88 parameter: outputFile