Mercurial > repos > iuc > data_manager_primer_scheme_bedfiles
changeset 2:b3710e492ee4 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_primer_scheme_bedfiles commit 61b727022bd1b24a8843830a9c5f34ef66835694"
author | iuc |
---|---|
date | Mon, 22 Jun 2020 18:26:10 -0400 |
parents | cce5d9327cd6 |
children | a2953ef09fe1 |
files | data_manager/install_primer_scheme_bedfiles.py data_manager/install_primer_scheme_bedfiles.xml data_manager/test-data/sample1.bed test-data/primer_scheme_bedfiles.loc test-data/sample1.bed tool_data_table_conf.xml.test |
diffstat | 6 files changed, 64 insertions(+), 14 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager/install_primer_scheme_bedfiles.py Fri Apr 24 16:10:30 2020 -0400 +++ b/data_manager/install_primer_scheme_bedfiles.py Mon Jun 22 18:26:10 2020 -0400 @@ -1,20 +1,39 @@ #!/usr/bin/env python -from __future__ import print_function, division +from __future__ import division, print_function import argparse +try: + from io import StringIO +except ImportError: + from StringIO import StringIO import json import os import os.path import re import sys -import shutil import requests DATA_TABLE_NAME = "primer_scheme_bedfiles" +def write_good_bed(input_file, bed_output_filename): + with open(bed_output_filename, "w") as bed_output_file: + for line in input_file: + fields = line.split("\t") + if len(fields) < 6: + # too short to encode the strand format + exit("invalid format in BED file: {}".format(line.rstrip())) + try: + # try and parse field 5 as a number + float(fields[4]) + except ValueError: + # ARTIC with broken BED, set field 5 to 60 + fields[4] = "60" + bed_output_file.write("\t".join(fields)) + + def fetch_artic_primers(output_directory, primers): primer_sets = { "SARS-CoV-2-ARTICv1": "https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V1/nCoV-2019.bed", @@ -37,7 +56,7 @@ ) exit(response.status_code) bed_output_filename = os.path.join(output_directory, name + ".bed") - open(bed_output_filename, "w").write(response.text) + write_good_bed(StringIO(response.text), bed_output_filename) description = name[:-2] + " " + name[-2:] + " primer set" data.append(dict(value=name, path=bed_output_filename, description=description)) return data @@ -48,7 +67,8 @@ ): name = re.sub(r"\W", "", str(primer_name).replace(" ", "_")) output_filename = os.path.join(output_directory, name + ".bed") - shutil.copyfile(input_filename, output_filename) + with open(input_filename) as input_file: + write_good_bed(input_file, output_filename) data = [dict(value=name, description=primer_description, path=output_filename)] return data
--- a/data_manager/install_primer_scheme_bedfiles.xml Fri Apr 24 16:10:30 2020 -0400 +++ b/data_manager/install_primer_scheme_bedfiles.xml Mon Jun 22 18:26:10 2020 -0400 @@ -1,7 +1,6 @@ -<tool id="data_manager_primer_scheme_bedfiles" name="BED-format primer scheme data manager" version="0.0.10" tool_type="manage_data" profile="19.05"> +<tool id="data_manager_primer_scheme_bedfiles" name="BED-format primer scheme data manager" version="0.0.11" tool_type="manage_data" profile="19.05"> <requirements> - <requirement type="package">python</requirement> - <requirement type="package" version="2.22.0">requests</requirement> + <requirement type="package" version="2.24.0">requests</requirement> </requirements> <!-- fetch all the primers in one go --> <command detect_errors="exit_code"> @@ -61,4 +60,16 @@ </output> </test> </tests> + <help><![CDATA[ + Amplicon sequencing for viral pathogens using the `PrimalSeq and iVar`_ relies on + identifying primer locations in a reference sequence using BED format files. This + data manager populates a Galaxy tool data table, either from files provided via + a history or via the ARTIC_ network Github repository. + + .. _PrimalSeq and iVar: https://genomebiology.biomedcentral.com/articles/10.1186/s13059-018-1618-7 + .. _ARTIC: https://artic.network/ + ]]></help> + <citations> + <citation type="doi">10.1186/s13059-018-1618-7</citation> + </citations> </tool>
--- a/data_manager/test-data/sample1.bed Fri Apr 24 16:10:30 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ -MN908947.3 30 54 nCoV-2019_1_LEFT 60 + -MN908947.3 385 410 nCoV-2019_1_RIGHT 60 - -MN908947.3 320 342 nCoV-2019_2_LEFT 60 + -MN908947.3 704 726 nCoV-2019_2_RIGHT 60 - -MN908947.3 642 664 nCoV-2019_3_LEFT 60 + -MN908947.3 1004 1028 nCoV-2019_3_RIGHT 60 - -MN908947.3 943 965 nCoV-2019_4_LEFT 60 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/primer_scheme_bedfiles.loc Mon Jun 22 18:26:10 2020 -0400 @@ -0,0 +1,12 @@ +# this is a tab separated file describing the location of primer schemes used for amplicon +# sequencing (e.g. using the ARTIC SARS-CoV-2 protocol) +# +# the columns are: +# value description path +# +# for example +# SARS-CoV-2-ARTICv1 SARS-CoV-2 ARTIC v1 primers /data/galaxy/tool_data/artic_primers/SARS-CoV-2-ARTICv1.bed +SARS-CoV-2-ARTICv3 SARS-CoV-2-ARTIC v3 primer set /tmp/tmpYMFYgd/tmpdHhY2S/tmppwFSVU/database/data_manager_tool-dataI2hi9i/primer_scheme_bedfiles/SARS-CoV-2-ARTICv3.bed +SARS-CoV-2-ARTICv2 SARS-CoV-2-ARTIC v2 primer set /tmp/tmpYMFYgd/tmpdHhY2S/tmppwFSVU/database/data_manager_tool-dataI2hi9i/primer_scheme_bedfiles/SARS-CoV-2-ARTICv2.bed +SARS-CoV-2-ARTICv1 SARS-CoV-2-ARTIC v1 primer set /tmp/tmpYMFYgd/tmpdHhY2S/tmppwFSVU/database/data_manager_tool-dataI2hi9i/primer_scheme_bedfiles/SARS-CoV-2-ARTICv1.bed +sample_primer sample primer scheme /tmp/tmpYMFYgd/tmpdHhY2S/tmppwFSVU/database/data_manager_tool-dataI2hi9i/primer_scheme_bedfiles/sample_primer.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sample1.bed Mon Jun 22 18:26:10 2020 -0400 @@ -0,0 +1,7 @@ +MN908947.3 30 54 nCoV-2019_1_LEFT 60 + +MN908947.3 385 410 nCoV-2019_1_RIGHT 60 - +MN908947.3 320 342 nCoV-2019_2_LEFT 60 + +MN908947.3 704 726 nCoV-2019_2_RIGHT 60 - +MN908947.3 642 664 nCoV-2019_3_LEFT 60 + +MN908947.3 1004 1028 nCoV-2019_3_RIGHT 60 - +MN908947.3 943 965 nCoV-2019_4_LEFT 60 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Mon Jun 22 18:26:10 2020 -0400 @@ -0,0 +1,7 @@ +<tables> + <!-- Locations of primer scheme BED files --> + <table name="primer_scheme_bedfiles" comment_char="#" allow_duplicate_entries="False"> + <columns>value, description, path</columns> + <file path="${__HERE__}/test-data/primer_scheme_bedfiles.loc" /> + </table> +</tables>