changeset 2:b3710e492ee4 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_primer_scheme_bedfiles commit 61b727022bd1b24a8843830a9c5f34ef66835694"
author iuc
date Mon, 22 Jun 2020 18:26:10 -0400
parents cce5d9327cd6
children a2953ef09fe1
files data_manager/install_primer_scheme_bedfiles.py data_manager/install_primer_scheme_bedfiles.xml data_manager/test-data/sample1.bed test-data/primer_scheme_bedfiles.loc test-data/sample1.bed tool_data_table_conf.xml.test
diffstat 6 files changed, 64 insertions(+), 14 deletions(-) [+]
line wrap: on
line diff
--- a/data_manager/install_primer_scheme_bedfiles.py	Fri Apr 24 16:10:30 2020 -0400
+++ b/data_manager/install_primer_scheme_bedfiles.py	Mon Jun 22 18:26:10 2020 -0400
@@ -1,20 +1,39 @@
 #!/usr/bin/env python
 
-from __future__ import print_function, division
+from __future__ import division, print_function
 
 import argparse
+try:
+    from io import StringIO
+except ImportError:
+    from StringIO import StringIO
 import json
 import os
 import os.path
 import re
 import sys
-import shutil
 
 import requests
 
 DATA_TABLE_NAME = "primer_scheme_bedfiles"
 
 
+def write_good_bed(input_file, bed_output_filename):
+    with open(bed_output_filename, "w") as bed_output_file:
+        for line in input_file:
+            fields = line.split("\t")
+            if len(fields) < 6:
+                # too short to encode the strand format
+                exit("invalid format in BED file: {}".format(line.rstrip()))
+            try:
+                # try and parse field 5 as a number
+                float(fields[4])
+            except ValueError:
+                # ARTIC with broken BED, set field 5 to 60
+                fields[4] = "60"
+            bed_output_file.write("\t".join(fields))
+
+
 def fetch_artic_primers(output_directory, primers):
     primer_sets = {
         "SARS-CoV-2-ARTICv1": "https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V1/nCoV-2019.bed",
@@ -37,7 +56,7 @@
             )
             exit(response.status_code)
         bed_output_filename = os.path.join(output_directory, name + ".bed")
-        open(bed_output_filename, "w").write(response.text)
+        write_good_bed(StringIO(response.text), bed_output_filename)
         description = name[:-2] + " " + name[-2:] + " primer set"
         data.append(dict(value=name, path=bed_output_filename, description=description))
     return data
@@ -48,7 +67,8 @@
 ):
     name = re.sub(r"\W", "", str(primer_name).replace(" ", "_"))
     output_filename = os.path.join(output_directory, name + ".bed")
-    shutil.copyfile(input_filename, output_filename)
+    with open(input_filename) as input_file:
+        write_good_bed(input_file, output_filename)
     data = [dict(value=name, description=primer_description, path=output_filename)]
     return data
 
--- a/data_manager/install_primer_scheme_bedfiles.xml	Fri Apr 24 16:10:30 2020 -0400
+++ b/data_manager/install_primer_scheme_bedfiles.xml	Mon Jun 22 18:26:10 2020 -0400
@@ -1,7 +1,6 @@
-<tool id="data_manager_primer_scheme_bedfiles" name="BED-format primer scheme data manager" version="0.0.10" tool_type="manage_data" profile="19.05">
+<tool id="data_manager_primer_scheme_bedfiles" name="BED-format primer scheme data manager" version="0.0.11" tool_type="manage_data" profile="19.05">
     <requirements>
-        <requirement type="package">python</requirement>
-        <requirement type="package" version="2.22.0">requests</requirement>
+        <requirement type="package" version="2.24.0">requests</requirement>
     </requirements>
     <!-- fetch all the primers in one go -->
     <command detect_errors="exit_code">
@@ -61,4 +60,16 @@
             </output>        
         </test>
     </tests>
+    <help><![CDATA[
+        Amplicon sequencing for viral pathogens using the `PrimalSeq and iVar`_ relies on
+        identifying primer locations in a reference sequence using BED format files. This 
+        data manager populates a Galaxy tool data table, either from files provided via
+        a history or via the ARTIC_ network Github repository.
+
+        .. _PrimalSeq and iVar: https://genomebiology.biomedcentral.com/articles/10.1186/s13059-018-1618-7
+        .. _ARTIC: https://artic.network/
+    ]]></help>
+    <citations>
+      <citation type="doi">10.1186/s13059-018-1618-7</citation>
+    </citations>
 </tool>
--- a/data_manager/test-data/sample1.bed	Fri Apr 24 16:10:30 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-MN908947.3      30      54      nCoV-2019_1_LEFT        60      +
-MN908947.3      385     410     nCoV-2019_1_RIGHT       60      -
-MN908947.3      320     342     nCoV-2019_2_LEFT        60      +
-MN908947.3      704     726     nCoV-2019_2_RIGHT       60      -
-MN908947.3      642     664     nCoV-2019_3_LEFT        60      +
-MN908947.3      1004    1028    nCoV-2019_3_RIGHT       60      -
-MN908947.3      943     965     nCoV-2019_4_LEFT        60      +
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/primer_scheme_bedfiles.loc	Mon Jun 22 18:26:10 2020 -0400
@@ -0,0 +1,12 @@
+# this is a tab separated file describing the location of primer schemes used for amplicon
+# sequencing (e.g. using the ARTIC SARS-CoV-2 protocol)
+#
+# the columns are:
+# value  description path
+#
+# for example
+# SARS-CoV-2-ARTICv1	SARS-CoV-2 ARTIC v1 primers	/data/galaxy/tool_data/artic_primers/SARS-CoV-2-ARTICv1.bed
+SARS-CoV-2-ARTICv3	SARS-CoV-2-ARTIC v3 primer set	/tmp/tmpYMFYgd/tmpdHhY2S/tmppwFSVU/database/data_manager_tool-dataI2hi9i/primer_scheme_bedfiles/SARS-CoV-2-ARTICv3.bed
+SARS-CoV-2-ARTICv2	SARS-CoV-2-ARTIC v2 primer set	/tmp/tmpYMFYgd/tmpdHhY2S/tmppwFSVU/database/data_manager_tool-dataI2hi9i/primer_scheme_bedfiles/SARS-CoV-2-ARTICv2.bed
+SARS-CoV-2-ARTICv1	SARS-CoV-2-ARTIC v1 primer set	/tmp/tmpYMFYgd/tmpdHhY2S/tmppwFSVU/database/data_manager_tool-dataI2hi9i/primer_scheme_bedfiles/SARS-CoV-2-ARTICv1.bed
+sample_primer	sample primer scheme	/tmp/tmpYMFYgd/tmpdHhY2S/tmppwFSVU/database/data_manager_tool-dataI2hi9i/primer_scheme_bedfiles/sample_primer.bed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample1.bed	Mon Jun 22 18:26:10 2020 -0400
@@ -0,0 +1,7 @@
+MN908947.3	30	54	nCoV-2019_1_LEFT	60	+
+MN908947.3	385	410	nCoV-2019_1_RIGHT	60	-
+MN908947.3	320	342	nCoV-2019_2_LEFT	60	+
+MN908947.3	704	726	nCoV-2019_2_RIGHT	60	-
+MN908947.3	642	664	nCoV-2019_3_LEFT	60	+
+MN908947.3	1004	1028	nCoV-2019_3_RIGHT	60	-
+MN908947.3	943	965	nCoV-2019_4_LEFT	60	+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Mon Jun 22 18:26:10 2020 -0400
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Locations of primer scheme BED files -->
+    <table name="primer_scheme_bedfiles" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, description, path</columns>
+        <file path="${__HERE__}/test-data/primer_scheme_bedfiles.loc" />
+    </table>
+</tables>