Mercurial > repos > iuc > data_manager_primer_scheme_bedfiles
changeset 0:ae730ecdc7e7 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_primer_scheme_bedfiles commit f78536f45aa5ce801251a9725e468a47de1be44a"
author | iuc |
---|---|
date | Sun, 19 Apr 2020 09:22:19 -0400 |
parents | |
children | cce5d9327cd6 |
files | data_manager/install_primer_scheme_bedfiles.py data_manager/install_primer_scheme_bedfiles.xml data_manager/test-data/sample1.bed data_manager_conf.xml tool-data/primer_scheme_bedfiles.loc.sample tool_data_table_conf.xml.sample |
diffstat | 6 files changed, 247 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/install_primer_scheme_bedfiles.py Sun Apr 19 09:22:19 2020 -0400 @@ -0,0 +1,141 @@ +#!/usr/bin/env python + +from __future__ import print_function, division + +import argparse +import json +import os +import os.path +import re +import sys +import shutil + +import requests + +DATA_TABLE_NAME = "primer_scheme_bedfiles" + + +def fetch_artic_primers(output_directory, primers): + primer_sets = { + "ARTICv1": "https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V1/nCoV-2019.bed", + "ARTICv2": "https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V2/nCoV-2019.bed", + "ARTICv3": "https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V3/nCoV-2019.bed", + } + + data = [] + for name, url in primer_sets.items(): + if name not in primers: + continue + response = requests.get(url) + if response.status_code != 200: + print( + "Error: download of", + url, + "failed with code", + response.status_code, + file=sys.stderr, + ) + exit(response.status_code) + bed_output_filename = os.path.join(output_directory, name + ".bed") + open(bed_output_filename, "w").write(response.text) + description = name[:-2] + " " + name[-2:] + " primer set" + data.append(dict(value=name, path=bed_output_filename, description=description)) + return data + + +def install_primer_file( + output_directory, input_filename, primer_name, primer_description +): + name = re.sub(r"\W", "", str(primer_name).replace(" ", "_")) + output_filename = os.path.join(output_directory, name + ".bed") + shutil.copyfile(input_filename, output_filename) + data = [dict(value=name, description=primer_description, path=output_filename)] + return data + + +class SplitArgs(argparse.Action): + def __call__(self, parser, namespace, values, option_string=None): + setattr(namespace, self.dest, values.split(",")) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Fetch ARTIC SARS-CoV-2 primer files for Galaxy/IRIDA use" + ) + parser.add_argument( + "--output_directory", default="tmp", help="Directory to write output to" + ) + primer_file = parser.add_argument_group() + primer_file.add_argument( + "--primer_file", help="BED format file containing primer scheme" + ) + primer_file.add_argument( + "--primer_name", + help="Name of primer scheme (one word). Required if --primer_file is used", + ) + primer_file.add_argument( + "--primer_description", + help="Description of primer scheme. Required if --primer_file is used", + ) + artic = parser.add_argument_group() + artic.add_argument( + "--artic_primers", + action=SplitArgs, + help="Comma separated list of primers to fetch", + ) + parser.add_argument( + "galaxy_datamanager_filename", + help="Galaxy JSON format file describing data manager inputs", + ) + args = parser.parse_args() + + if args.artic_primers is None and args.primer_file is None: + print( + "One of --artic_primers or --primer_file + --primer_name + --primer_description is required.", + file=sys.stderr, + ) + exit(1) + elif args.primer_file is not None and ( + args.primer_name is None or args.primer_description is None + ): + print( + "If --primer_file is used --primer_name and --primer_description is also required", + file=sys.stderr, + ) + exit(1) + elif args.primer_file is not None and args.artic_primers is not None: + print( + "Only one of --artic_primers or --primer_file + --primer_name + --primer_description can be chosen" + ) + exit(1) + + config = json.load(open(args.galaxy_datamanager_filename)) + output_directory = config.get("output_data", [{}])[0].get("extra_files_path", None) + if output_directory is None: + output_directory = args.output_directory + + if not os.path.isdir(output_directory): + os.makedirs(output_directory) + + data_manager_dict = {} + data_manager_dict["data_tables"] = json.load( + open(args.galaxy_datamanager_filename) + ).get("data_tables", {}) + data_manager_dict["data_tables"] = data_manager_dict.get("data_tables", {}) + data_manager_dict["data_tables"][DATA_TABLE_NAME] = data_manager_dict[ + "data_tables" + ].get(DATA_TABLE_NAME, []) + + if args.artic_primers: + data = fetch_artic_primers(output_directory, args.artic_primers) + else: + data = install_primer_file( + output_directory, + args.primer_file, + args.primer_name, + args.primer_description, + ) + + data_manager_dict["data_tables"][DATA_TABLE_NAME].extend(data) + print(data_manager_dict) + json.dump(data_manager_dict, open(args.galaxy_datamanager_filename, "w"))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/install_primer_scheme_bedfiles.xml Sun Apr 19 09:22:19 2020 -0400 @@ -0,0 +1,64 @@ +<tool id="data_manager_primer_scheme_bedfiles" name="BED-format primer scheme data manager" version="0.0.8" tool_type="manage_data" profile="19.05"> + <requirements> + <requirement type="package">python</requirement> + <requirement type="package" version="2.22.0">requests</requirement> + </requirements> + <!-- fetch all the primers in one go --> + <command detect_errors="exit_code"> + python '$__tool_directory__/install_primer_scheme_bedfiles.py' + '${output_file}' + #if $input.input_type == "ARTIC" + --artic_primers '${input.primers}' + #else + --primer_file '${input.primer_input}' + --primer_name '${input.primer_name}' + --primer_description '${input.primer_description}' + #end if + </command> + <inputs> + <conditional name="input"> + <param name="input_type" label="Choose the source for primer schemes" type="select"> + <option value="ARTIC" selected="true">ARTIC SARS-CoV-2 Github page</option> + <option value="history">History</option> + </param> + <when value="ARTIC"> + <param name="primers" type="select" multiple="true" label="SARS-CoV-2 Primers to fetch"> + <option value="ARTICv1" selected="true">ARTIC v1</option> + <option value="ARTICv2" selected="true">ARTIC v2</option> + <option value="ARTICv3" selected="true">ARTIC v3</option> + </param> + </when> + <when value="history"> + <param name="primer_input" label="Select history item" type="data" format="bed" /> + <param name="primer_name" label="Name for this primer scheme" type="text" + help="Non-word characters will be removed from primer scheme name and spaces replaced with underscore (_)" /> + <param name="primer_description" label="Description for primer scheme" type="text" /> + </when> + </conditional> + </inputs> + <outputs> + <data name="output_file" format="data_manager_json"/> + </outputs> + <tests> + <test> + <param name="input_type" value="ARTIC" /> + <param name="primers" value="ARTICv1,ARTICv2,ARTICv3"/> + <output name="output_file"> + <assert_contents> + <has_text text="ARTIC"/> + </assert_contents> + </output> + </test> + <test> + <param name="input_type" value="history" /> + <param name="primer_input" ftype="bed" value="sample1.bed" /> + <param name="primer_name" value="sample_primer" /> + <param name="primer_description" value="sample primer scheme" /> + <output name="output_file"> + <assert_contents> + <has_text text="sample_primer"/> + </assert_contents> + </output> + </test> + </tests> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/test-data/sample1.bed Sun Apr 19 09:22:19 2020 -0400 @@ -0,0 +1,7 @@ +MN908947.3 30 54 nCoV-2019_1_LEFT 60 + +MN908947.3 385 410 nCoV-2019_1_RIGHT 60 - +MN908947.3 320 342 nCoV-2019_2_LEFT 60 + +MN908947.3 704 726 nCoV-2019_2_RIGHT 60 - +MN908947.3 642 664 nCoV-2019_3_LEFT 60 + +MN908947.3 1004 1028 nCoV-2019_3_RIGHT 60 - +MN908947.3 943 965 nCoV-2019_4_LEFT 60 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_conf.xml Sun Apr 19 09:22:19 2020 -0400 @@ -0,0 +1,20 @@ +<?xml version="1.0"?> +<data_managers> + <data_manager tool_file="data_manager/install_primer_scheme_bedfiles.xml" id="data_manager_primer_scheme_bedfiles"> + <data_table name="primer_scheme_bedfiles"> + <output> + <column name="value" /> + <column name="description" /> + <column name="path" output_ref="output_file" > + <!-- note: the Python script sanitises the possibly user-supplied scheme name ('value') --> + <move type="file"> + <source>${path}</source> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">primer_scheme_bedfiles/#echo str($value)#.bed</target> + </move> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/primer_scheme_bedfiles/#echo str($value)#.bed</value_translation> + <value_translation type="function">abspath</value_translation> + </column> + </output> + </data_table> + </data_manager> +</data_managers>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/primer_scheme_bedfiles.loc.sample Sun Apr 19 09:22:19 2020 -0400 @@ -0,0 +1,8 @@ +# this is a tab separated file describing the location of primer schemes used for amplicon +# sequencing (e.g. using the ARTIC SARS-CoV-2 protocol) +# +# the columns are: +# value description path +# +# for example +# ARTICv1 ARTIC v1 primers /data/galaxy/tool_data/artic_primers/ARTICv1.bed \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Sun Apr 19 09:22:19 2020 -0400 @@ -0,0 +1,7 @@ +<tables> + <!-- Locations of primer scheme BED files --> + <table name="primer_scheme_bedfiles" comment_char="#"> + <columns>value, description, path</columns> + <file path="tool-data/primer_scheme_bedfiles.loc" /> + </table> +</tables>