Mercurial > repos > aafc-mbb > kurator
comparison example.yaml @ 0:9aafb378478e draft
Uploaded
author | aafc-mbb |
---|---|
date | Thu, 21 Apr 2016 17:14:00 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9aafb378478e |
---|---|
1 ##################################################################################### | |
2 # curate_csv_with_worms.yaml | |
3 ##################################################################################### | |
4 # | |
5 # Clean data provided via standard input and print results to standard output: | |
6 # ka curate_csv_with_worms.yaml < ../data/five_records.csv | |
7 # | |
8 # Clean data in a named input file and print results to standard output: | |
9 # ka curate_csv_with_worms.yaml -p input=../data/five_records.csv | |
10 # | |
11 # Clean data in a named input file and save results to named output file: | |
12 # ka curate_csv_with_worms.yaml -p input=../data/five_records.csv -p output=output.csv | |
13 # | |
14 ##################################################################################### | |
15 | |
16 imports: | |
17 | |
18 - classpath:/org/kurator/akka/types.yaml | |
19 | |
20 components: | |
21 | |
22 - id: ReadInput | |
23 type: PythonActor | |
24 properties: | |
25 code: | | |
26 import csv | |
27 import sys | |
28 def on_start(options): | |
29 filePath = options.get('inputFile') | |
30 f = open(filePath, 'r') if filePath is not None else sys.stdin | |
31 for record in csv.DictReader(f): | |
32 yield record | |
33 | |
34 - id: CleanRecords | |
35 type: PythonClassActor | |
36 properties: | |
37 pythonClass: kurator_worms.record_curator.RecordCurator | |
38 onData: clean_record | |
39 parameters: | |
40 taxon_name_field : 'TaxonName' | |
41 author_field : 'Author' | |
42 original_taxon_name_field : 'OriginalTaxonName' | |
43 original_author_field : 'OriginalAuthor' | |
44 match_type_field : 'WoRMSMatchType' | |
45 lsid_field : 'LSID' | |
46 fuzzy_match_enabled : 'True' | |
47 listensTo: | |
48 - !ref ReadInput | |
49 | |
50 - id: WriteOutput | |
51 type: PythonClassActor | |
52 properties: | |
53 pythonClass: CsvWriter | |
54 code: | | |
55 import csv | |
56 import os | |
57 import sys | |
58 class CsvWriter(object): | |
59 def on_start(self, options): | |
60 filePath = options.get('outputFile') | |
61 self.f = open(filePath, 'w') if filePath is not None else sys.stdout | |
62 self.dw = csv.DictWriter(self.f, | |
63 fieldnames=options['fieldnames'], | |
64 quotechar="'", | |
65 lineterminator=os.linesep) | |
66 self.dw.writeheader() | |
67 def on_data(self, record): | |
68 self.dw.writerow(record) | |
69 sys.stdout.flush() | |
70 parameters: | |
71 fieldnames: [ID,TaxonName,Author,OriginalTaxonName,OriginalAuthor,WoRMSMatchType,LSID] | |
72 listensTo: | |
73 - !ref CleanRecords | |
74 | |
75 - id: ValidateNamesWithWoRMSWorkflow | |
76 type: Workflow | |
77 properties: | |
78 actors: | |
79 - !ref ReadInput | |
80 - !ref CleanRecords | |
81 - !ref WriteOutput | |
82 parameters: | |
83 input: | |
84 actor: !ref ReadInput | |
85 parameter: inputFile | |
86 output: | |
87 actor: !ref WriteOutput | |
88 parameter: outputFile |