Mercurial > repos > aafc-mbb > kurator
diff example.yaml @ 0:9aafb378478e draft
Uploaded
author | aafc-mbb |
---|---|
date | Thu, 21 Apr 2016 17:14:00 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example.yaml Thu Apr 21 17:14:00 2016 -0400 @@ -0,0 +1,88 @@ +##################################################################################### +# curate_csv_with_worms.yaml +##################################################################################### +# +# Clean data provided via standard input and print results to standard output: +# ka curate_csv_with_worms.yaml < ../data/five_records.csv +# +# Clean data in a named input file and print results to standard output: +# ka curate_csv_with_worms.yaml -p input=../data/five_records.csv +# +# Clean data in a named input file and save results to named output file: +# ka curate_csv_with_worms.yaml -p input=../data/five_records.csv -p output=output.csv +# +##################################################################################### + +imports: + +- classpath:/org/kurator/akka/types.yaml + +components: + +- id: ReadInput + type: PythonActor + properties: + code: | + import csv + import sys + def on_start(options): + filePath = options.get('inputFile') + f = open(filePath, 'r') if filePath is not None else sys.stdin + for record in csv.DictReader(f): + yield record + +- id: CleanRecords + type: PythonClassActor + properties: + pythonClass: kurator_worms.record_curator.RecordCurator + onData: clean_record + parameters: + taxon_name_field : 'TaxonName' + author_field : 'Author' + original_taxon_name_field : 'OriginalTaxonName' + original_author_field : 'OriginalAuthor' + match_type_field : 'WoRMSMatchType' + lsid_field : 'LSID' + fuzzy_match_enabled : 'True' + listensTo: + - !ref ReadInput + +- id: WriteOutput + type: PythonClassActor + properties: + pythonClass: CsvWriter + code: | + import csv + import os + import sys + class CsvWriter(object): + def on_start(self, options): + filePath = options.get('outputFile') + self.f = open(filePath, 'w') if filePath is not None else sys.stdout + self.dw = csv.DictWriter(self.f, + fieldnames=options['fieldnames'], + quotechar="'", + lineterminator=os.linesep) + self.dw.writeheader() + def on_data(self, record): + self.dw.writerow(record) + sys.stdout.flush() + parameters: + fieldnames: [ID,TaxonName,Author,OriginalTaxonName,OriginalAuthor,WoRMSMatchType,LSID] + listensTo: + - !ref CleanRecords + +- id: ValidateNamesWithWoRMSWorkflow + type: Workflow + properties: + actors: + - !ref ReadInput + - !ref CleanRecords + - !ref WriteOutput + parameters: + input: + actor: !ref ReadInput + parameter: inputFile + output: + actor: !ref WriteOutput + parameter: outputFile