annotate example.yaml @ 4:9bcf8b075993 draft

Uploaded
author aafc-mbb
date Thu, 21 Apr 2016 17:14:37 -0400
parents 9aafb378478e
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
1 #####################################################################################
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
2 # curate_csv_with_worms.yaml
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
3 #####################################################################################
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
4 #
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
5 # Clean data provided via standard input and print results to standard output:
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
6 # ka curate_csv_with_worms.yaml < ../data/five_records.csv
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
7 #
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
8 # Clean data in a named input file and print results to standard output:
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
9 # ka curate_csv_with_worms.yaml -p input=../data/five_records.csv
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
10 #
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
11 # Clean data in a named input file and save results to named output file:
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
12 # ka curate_csv_with_worms.yaml -p input=../data/five_records.csv -p output=output.csv
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
13 #
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
14 #####################################################################################
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
15
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
16 imports:
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
17
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
18 - classpath:/org/kurator/akka/types.yaml
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
19
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
20 components:
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
21
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
22 - id: ReadInput
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
23 type: PythonActor
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
24 properties:
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
25 code: |
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
26 import csv
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
27 import sys
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
28 def on_start(options):
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
29 filePath = options.get('inputFile')
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
30 f = open(filePath, 'r') if filePath is not None else sys.stdin
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
31 for record in csv.DictReader(f):
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
32 yield record
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
33
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
34 - id: CleanRecords
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
35 type: PythonClassActor
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
36 properties:
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
37 pythonClass: kurator_worms.record_curator.RecordCurator
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
38 onData: clean_record
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
39 parameters:
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
40 taxon_name_field : 'TaxonName'
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
41 author_field : 'Author'
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
42 original_taxon_name_field : 'OriginalTaxonName'
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
43 original_author_field : 'OriginalAuthor'
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
44 match_type_field : 'WoRMSMatchType'
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
45 lsid_field : 'LSID'
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
46 fuzzy_match_enabled : 'True'
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
47 listensTo:
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
48 - !ref ReadInput
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
49
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
50 - id: WriteOutput
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
51 type: PythonClassActor
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
52 properties:
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
53 pythonClass: CsvWriter
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
54 code: |
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
55 import csv
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
56 import os
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
57 import sys
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
58 class CsvWriter(object):
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
59 def on_start(self, options):
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
60 filePath = options.get('outputFile')
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
61 self.f = open(filePath, 'w') if filePath is not None else sys.stdout
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
62 self.dw = csv.DictWriter(self.f,
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
63 fieldnames=options['fieldnames'],
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
64 quotechar="'",
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
65 lineterminator=os.linesep)
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
66 self.dw.writeheader()
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
67 def on_data(self, record):
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
68 self.dw.writerow(record)
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
69 sys.stdout.flush()
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
70 parameters:
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
71 fieldnames: [ID,TaxonName,Author,OriginalTaxonName,OriginalAuthor,WoRMSMatchType,LSID]
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
72 listensTo:
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
73 - !ref CleanRecords
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
74
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
75 - id: ValidateNamesWithWoRMSWorkflow
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
76 type: Workflow
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
77 properties:
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
78 actors:
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
79 - !ref ReadInput
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
80 - !ref CleanRecords
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
81 - !ref WriteOutput
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
82 parameters:
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
83 input:
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
84 actor: !ref ReadInput
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
85 parameter: inputFile
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
86 output:
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
87 actor: !ref WriteOutput
9aafb378478e Uploaded
aafc-mbb
parents:
diff changeset
88 parameter: outputFile