Mercurial > repos > iuc > data_manager_primer_scheme_bedfiles

--- a/data_manager/install_primer_scheme_bedfiles.py	Mon Jun 22 18:26:10 2020 -0400
+++ b/data_manager/install_primer_scheme_bedfiles.py	Mon Oct 12 13:47:03 2020 +0000
@@ -18,7 +18,7 @@
 DATA_TABLE_NAME = "primer_scheme_bedfiles"


-def write_good_bed(input_file, bed_output_filename):
+def write_artic_style_bed(input_file, bed_output_filename):
     with open(bed_output_filename, "w") as bed_output_file:
         for line in input_file:
             fields = line.split("\t")
@@ -27,10 +27,16 @@
                 exit("invalid format in BED file: {}".format(line.rstrip()))
             try:
                 # try and parse field 5 as a number
-                float(fields[4])
+                score = float(fields[4])
             except ValueError:
-                # ARTIC with broken BED, set field 5 to 60
-                fields[4] = "60"
+                # Alright, this is an ARTIC-style bed,
+                # which is actually against the specs, but required by the
+                # ARTIC pipeline.
+                pass
+            else:
+                # This is a regular bed with numbers in the score column.
+                # We need to "fix" it for the ARTIC pipeline.
+                fields[4] = '_{0}'.format(score)
             bed_output_file.write("\t".join(fields))


@@ -56,7 +62,7 @@
             )
             exit(response.status_code)
         bed_output_filename = os.path.join(output_directory, name + ".bed")
-        write_good_bed(StringIO(response.text), bed_output_filename)
+        write_artic_style_bed(StringIO(response.text), bed_output_filename)
         description = name[:-2] + " " + name[-2:] + " primer set"
         data.append(dict(value=name, path=bed_output_filename, description=description))
     return data
@@ -68,7 +74,7 @@
     name = re.sub(r"\W", "", str(primer_name).replace(" ", "_"))
     output_filename = os.path.join(output_directory, name + ".bed")
     with open(input_filename) as input_file:
-        write_good_bed(input_file, output_filename)
+        write_artic_style_bed(input_file, output_filename)
     data = [dict(value=name, description=primer_description, path=output_filename)]
     return data
--- a/data_manager/install_primer_scheme_bedfiles.xml	Mon Jun 22 18:26:10 2020 -0400
+++ b/data_manager/install_primer_scheme_bedfiles.xml	Mon Oct 12 13:47:03 2020 +0000
@@ -1,4 +1,4 @@
-<tool id="data_manager_primer_scheme_bedfiles" name="BED-format primer scheme data manager" version="0.0.11" tool_type="manage_data" profile="19.05">
+<tool id="data_manager_primer_scheme_bedfiles" name="BED-format primer scheme data manager" version="0.0.12" tool_type="manage_data" profile="19.05">
     <requirements>
         <requirement type="package" version="2.24.0">requests</requirement>
     </requirements>
@@ -61,7 +61,7 @@
         </test>
     </tests>
     <help><![CDATA[
-        Amplicon sequencing for viral pathogens using the `PrimalSeq and iVar`_ relies on
+        Amplicon sequencing for viral pathogens using the ARTIC_ pipeline or `PrimalSeq and iVar`_ relies on
         identifying primer locations in a reference sequence using BED format files. This
         data manager populates a Galaxy tool data table, either from files provided via
         a history or via the ARTIC_ network Github repository.
--- a/test-data/sample1.bed	Mon Jun 22 18:26:10 2020 -0400
+++ b/test-data/sample1.bed	Mon Oct 12 13:47:03 2020 +0000
@@ -1,7 +1,7 @@
-MN908947.3	30	54	nCoV-2019_1_LEFT	60	+
-MN908947.3	385	410	nCoV-2019_1_RIGHT	60	-
-MN908947.3	320	342	nCoV-2019_2_LEFT	60	+
-MN908947.3	704	726	nCoV-2019_2_RIGHT	60	-
-MN908947.3	642	664	nCoV-2019_3_LEFT	60	+
-MN908947.3	1004	1028	nCoV-2019_3_RIGHT	60	-
-MN908947.3	943	965	nCoV-2019_4_LEFT	60	+
+MN908947.3	30	54	nCoV-2019_1_LEFT	nCoV-2019_1	+
+MN908947.3	385	410	nCoV-2019_1_RIGHT	nCoV-2019_1	-
+MN908947.3	320	342	nCoV-2019_2_LEFT	nCoV-2019_2	+
+MN908947.3	704	726	nCoV-2019_2_RIGHT	nCoV-2019_2	-
+MN908947.3	642	664	nCoV-2019_3_LEFT	nCoV-2019_1	+
+MN908947.3	1004	1028	nCoV-2019_3_RIGHT	nCoV-2019_1	-
+MN908947.3	943	965	nCoV-2019_4_LEFT	nCoV-2019_2	+