changeset 8:397e5f0eb3ef draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
author iuc
date Thu, 05 Aug 2021 12:46:37 +0000
parents 364f4ffec275
children c092052ed673
files ivar_trim.xml write_amplicon_info_file.py
diffstat 2 files changed, 73 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/ivar_trim.xml	Fri Jun 11 15:43:11 2021 +0000
+++ b/ivar_trim.xml	Thu Aug 05 12:46:37 2021 +0000
@@ -1,4 +1,4 @@
-<tool id="ivar_trim" name="ivar trim" version="@VERSION@+galaxy0">
+<tool id="ivar_trim" name="ivar trim" version="@VERSION@+galaxy1">
     <description>Trim reads in aligned BAM</description>
     <macros>
         <import>macros.xml</import>
@@ -14,8 +14,13 @@
             cp '$primer.cached_bed.fields.path' bed.bed &&
         #end if
         python '$__tool_directory__/sanitize_bed.py' bed.bed &&
-        #if $amplicons.filter_by == 'yes'
-            python '$__tool_directory__/prepare_amplicon_info.py' bed.bed '$amplicons.amplicon_info' amplicon_info.tsv &&
+        #if $amplicons.filter_by == 'yes' or $amplicons.filter_by == 'yes_computed'
+            #if $amplicons.filter_by == 'yes_computed':
+                python '$__tool_directory__/write_amplicon_info_file.py' bed.bed amplicon_info_raw.tsv &&
+            #else
+                ln -s '$amplicons.amplicon_info' amplicon_info_raw.tsv &&
+            #end if
+            python '$__tool_directory__/prepare_amplicon_info.py' bed.bed amplicon_info_raw.tsv amplicon_info.tsv &&
         #end if
         ln -s '$input_bam' sorted.bam &&
         ln -s '${input_bam.metadata.bam_index}' sorted.bam.bai &&
@@ -56,10 +61,12 @@
         <conditional name="amplicons">
             <param name="filter_by" type="select"
             label="Filter reads based on amplicon info"
-            help="When you select Yes you will need to provide an additional amplicon info dataset. Reads that are not fully contained in any amplicon will be dropped before primer trimming. This option is currently marked as [Experimental] in ivar, but nevertheless recommended here.">
+            help="When you select Yes reads that are not fully contained in any amplicon will be dropped before primer trimming. Info on amplicons can be computed from the primer BED file or provided by the user. This option is currently marked as [Experimental] in ivar, but nevertheless recommended here.">
                 <option value="">No, allow reads to extend beyond amplicon boundaries</option>
-                <option value="yes">Yes, drop reads that extend beyond amplicon boundaries</option>
+                <option value="yes_compute">Yes, drop reads that extend beyond amplicon boundaries</option>
+                <option value="yes">Yes, drop reads that extend beyond amplicon boundaries and use my amplicon info file</option>
             </param>
+            <when value="yes_compute" />
             <when value="yes">
                 <param name="amplicon_info" argument="-f" type="data" format="tabular" />
             </when>
@@ -116,6 +123,17 @@
             <param name="input_bam" value="sars-cov-2/sars_cov2_untrimmed.bam" ftype="bam" />
             <output name="output_bam" file="sars-cov-2/sars_cov2_trimmed.bam" compare="sim_size" delta="100000"/>
         </test>
+        <test>
+            <conditional name="primer">
+                <param name="source" value="cached" />
+                <param name="cached_bed" value="SARS-CoV-2-ARTICv1" />
+            </conditional>
+            <conditional name="amplicons">
+                <param name="filter_by" value="yes_compute" />
+            </conditional>
+            <param name="input_bam" value="sars-cov-2/sars_cov2_untrimmed.bam" ftype="bam" />
+            <output name="output_bam" file="sars-cov-2/sars_cov2_trimmed.bam" compare="sim_size" delta="100000"/>
+        </test>
     </tests>
     <help><![CDATA[
 iVar uses primer positions supplied in a BED file to soft clip primer
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/write_amplicon_info_file.py	Thu Aug 05 12:46:37 2021 +0000
@@ -0,0 +1,50 @@
+#!/usr/bin/env python
+
+import argparse
+import re
+
+AMPLICON_NAME_RE = r'.*_(?P<num>\d+)_[^0-9]*(?P<name>L(?:EFT)?|R(?:IGHT)?)'
+
+
+def primer_info_to_position(name):
+    position = 0
+    re_match = re.match(AMPLICON_NAME_RE, name)
+    if re_match is None:
+        raise ValueError("{} does not match expected amplicon name format".format(name))
+    side = re_match.group('name')
+    num = re_match.group('num')
+    if side == 'RIGHT' or side == 'R':
+        position += 1000
+    if num is not None:
+        position += int(num)
+    return position
+
+
+def write_amplicon_info_file(bed_file, amplicon_info_file):
+    amplicon_sets = {}
+    amplicon_ids = set()
+    for line in bed_file:
+        fields = line.strip().split('\t')
+        name = fields[3]
+        re_match = re.match(AMPLICON_NAME_RE, name)
+        if re_match is None:
+            raise ValueError("{} does not match expected amplicon name format".format(name))
+        amplicon_id = int(re_match.group('num'))
+        amplicon_set = amplicon_sets.get(amplicon_id, [])
+        amplicon_set.append(name)
+        amplicon_sets[amplicon_id] = amplicon_set
+        amplicon_ids.add(amplicon_id)
+
+    for id in sorted(list(amplicon_ids)):
+        amplicon_info = '\t'.join([name for name in sorted(amplicon_sets[id], key=primer_info_to_position)]) + '\n'
+        amplicon_info_file.write(amplicon_info)
+    amplicon_info_file.close()
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Write an amplicon info file for iVar from a BED file describing primer positions')
+    parser.add_argument('bed_file', type=argparse.FileType(), help='Primer BED file')
+    parser.add_argument('amplicon_info_file', type=argparse.FileType('w'), help='Output file: amplicon info file in TSV format')
+    args = parser.parse_args()
+
+    write_amplicon_info_file(args.bed_file, args.amplicon_info_file)