changeset 0:9aafb378478e draft

Uploaded
author aafc-mbb
date Thu, 21 Apr 2016 17:14:00 -0400
parents
children b04af3140b73
files example.yaml
diffstat 1 files changed, 88 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/example.yaml	Thu Apr 21 17:14:00 2016 -0400
@@ -0,0 +1,88 @@
+#####################################################################################
+# curate_csv_with_worms.yaml
+#####################################################################################
+#
+# Clean data provided via standard input and print results to standard output:
+# ka curate_csv_with_worms.yaml < ../data/five_records.csv
+#
+# Clean data in a named input file and print results to standard output:
+# ka curate_csv_with_worms.yaml -p input=../data/five_records.csv
+#
+# Clean data in a named input file and save results to named output file:
+# ka curate_csv_with_worms.yaml -p input=../data/five_records.csv -p output=output.csv
+#
+#####################################################################################
+
+imports:
+
+- classpath:/org/kurator/akka/types.yaml
+
+components:
+
+- id: ReadInput
+  type: PythonActor
+  properties:
+    code: |
+        import csv
+        import sys
+        def on_start(options):
+          filePath = options.get('inputFile') 
+          f = open(filePath, 'r') if filePath is not None else sys.stdin
+          for record in csv.DictReader(f):
+             yield record
+
+- id: CleanRecords
+  type: PythonClassActor
+  properties:
+    pythonClass: kurator_worms.record_curator.RecordCurator
+    onData: clean_record
+    parameters:
+        taxon_name_field          : 'TaxonName'
+        author_field              : 'Author'
+        original_taxon_name_field : 'OriginalTaxonName'
+        original_author_field     : 'OriginalAuthor'
+        match_type_field          : 'WoRMSMatchType'
+        lsid_field                : 'LSID'
+        fuzzy_match_enabled       : 'True'
+    listensTo:
+      - !ref ReadInput
+
+- id: WriteOutput
+  type: PythonClassActor
+  properties:
+    pythonClass: CsvWriter
+    code: |
+        import csv
+        import os
+        import sys
+        class CsvWriter(object): 
+          def on_start(self, options):
+            filePath = options.get('outputFile') 
+            self.f = open(filePath, 'w') if filePath is not None else sys.stdout
+            self.dw = csv.DictWriter(self.f,
+                                     fieldnames=options['fieldnames'],
+                                     quotechar="'", 
+                                     lineterminator=os.linesep)
+            self.dw.writeheader()
+          def on_data(self, record):
+            self.dw.writerow(record)
+            sys.stdout.flush()
+    parameters:
+      fieldnames: [ID,TaxonName,Author,OriginalTaxonName,OriginalAuthor,WoRMSMatchType,LSID]
+    listensTo:
+      - !ref CleanRecords
+
+- id: ValidateNamesWithWoRMSWorkflow
+  type: Workflow
+  properties:
+    actors:
+      - !ref ReadInput
+      - !ref CleanRecords
+      - !ref WriteOutput
+    parameters:
+       input:
+         actor: !ref ReadInput
+         parameter: inputFile
+       output:
+         actor: !ref WriteOutput
+         parameter: outputFile