changeset 5:cf65217ad61c draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
author iuc
date Wed, 19 May 2021 16:51:27 +0000
parents db536ad45f28
children c3f9b8720d37
files ivar_trim.xml macros.xml prepare_amplicon_info.py
diffstat 3 files changed, 118 insertions(+), 20 deletions(-) [+]
line wrap: on
line diff
--- a/ivar_trim.xml	Mon Jun 22 07:30:46 2020 -0400
+++ b/ivar_trim.xml	Wed May 19 16:51:27 2021 +0000
@@ -1,4 +1,4 @@
-<tool id="ivar_trim" name="ivar trim" version="@VERSION@+galaxy1">
+<tool id="ivar_trim" name="ivar trim" version="@VERSION@+galaxy0">
     <description>Trim reads in aligned BAM</description>
     <macros>
         <import>macros.xml</import>
@@ -8,23 +8,29 @@
     </expand>
     <expand macro="version_command" />
     <command detect_errors="exit_code"><![CDATA[
-        #if $primer.source == "history"
+        #if $primer.source == 'history'
             cp '$primer.input_bed' bed.bed &&
         #else
             cp '$primer.cached_bed.fields.path' bed.bed &&
         #end if
         python '$__tool_directory__/sanitize_bed.py' bed.bed &&
-
+        #if $amplicons.filter_by == 'yes'
+            python '$__tool_directory__/prepare_amplicon_info.py' bed.bed '$amplicons.amplicon_info' amplicon_info.tsv &&
+        #end if
         ln -s '$input_bam' sorted.bam &&
         ln -s '${input_bam.metadata.bam_index}' sorted.bam.bai &&
 
         ivar trim
         -i sorted.bam
         -b bed.bed
+        #if $amplicons.filter_by == 'yes'
+            -f amplicon_info.tsv
+        #end if
+        -x $primer_pos_wiggle
+        $inc_primers
         -m $min_len
         -q $min_qual
         -s $window_width
-        $inc_primers
         -p trimmed &&
         samtools sort -@ \${GALAXY_SLOTS:-1} -o trimmed.sorted.bam trimmed.bam
     ]]>    </command>
@@ -47,10 +53,25 @@
                 </param>
             </when>
         </conditional>
+        <conditional name="amplicons">
+            <param name="filter_by" type="select"
+            label="Filter reads based on amplicon info"
+            help="When you select Yes you will need to provide an additional amplicon info dataset. Reads that are not fully contained in any amplicon will be dropped before primer trimming. This option is currently marked as [Experimental] in ivar, but nevertheless recommended here.">
+                <option value="">No, allow reads to extend beyond amplicon boundaries</option>
+                <option value="yes">Yes, drop reads that extend beyond amplicon boundaries</option>
+            </param>
+            <when value="yes">
+                <param name="amplicon_info" argument="-f" type="data" format="tabular" />
+            </when>
+            <when value="" />
+        </conditional>
+        <param name="primer_pos_wiggle" argument="-x" type="integer" min="0" value="0"
+        label="Wiggling room for read ends relative to primer binding sites"
+        help="Reads that occur at the specified offset positions relative to primer positions (as annotated in the primer information dataset) will also be trimmed (default: 0)" />
+        <param name="inc_primers" argument="-e" type="boolean" truevalue="-e" falsevalue="" checked="false" label="Include reads not ending in any primer binding sites?"/>
         <param name="min_len" argument="-m" type="integer" min="0" value="30" label="Minimum length of read to retain after trimming"/>
         <param name="min_qual" argument="-q" type="integer" min="0" value="20" label="Minimum quality threshold for sliding window to pass"/>
         <param name="window_width" argument="-s" type="integer" min="0" value="4" label="Width of sliding window"/>
-        <param name="inc_primers" argument="-e" type="boolean" truevalue="-e" falsevalue="" checked="false" label="Include reads with no primers"/>
     </inputs>
     <outputs>
         <data name="output_bam" format="bam" label="${tool.name} on ${on_string} Trimmed bam" from_work_dir="trimmed.sorted.bam"/>
@@ -93,21 +114,42 @@
                 <param name="cached_bed" value="SARS-CoV-2-ARTICv1" />
             </conditional>
             <param name="input_bam" value="sars-cov-2/sars_cov2_untrimmed.bam" ftype="bam" />
-            <output name="output_bam" file="sars-cov-2/sars_cov2_trimmed.bam" />
+            <output name="output_bam" file="sars-cov-2/sars_cov2_trimmed.bam" compare="sim_size" delta="100000"/>
         </test>
     </tests>
     <help><![CDATA[
-        iVar uses primer positions supplied in a BED file to soft clip primer
-        sequences from an aligned and sorted BAM file. Following this, the reads are
-        trimmed based on a quality threshold(Default: 20). To do the quality
-        trimming, iVar uses a sliding window approach(Default: 4). The windows
-        slides from the 5' end to the 3' end and if at any point the average base
-        quality in the window falls below the threshold, the remaining read is soft
-        clipped. If after trimming, the length of the read is greater than the
-        minimum length specified(Default: 30), the read is written to the new
-        trimmed BAM file
-        
-        Documentation can be found at `<https://andersen-lab.github.io/ivar/html/manualpage.html>`_.
-    ]]>    </help>
+iVar uses primer positions supplied in a BED file to soft clip primer
+sequences from an aligned and sorted BAM file. Following this, the reads are
+trimmed further based on a quality threshold.
+
+**Primer and Amplicon info**
+
+The tool requires information about primers and their binding sites in 6-column
+BED format. The information from this file is used to decide whether any mapped
+read in the BAM input ends with a primer sequence and should, thus, be
+soft-clipped.
+
+Optionally, the tool can also discard reads that do not fully map to within any
+amplicon. Such reads are likely to be wet-lab or mapping artefacts and removing
+them can increase variant calling precision. To calculate the extent of
+expected amplicons the tool requires an additional amplicon info dataset that
+lists the names of primers that together form any given amplicon. Primer names
+(exactly matching those in the primer info dataset) need to be TAB-separated
+with one line per amplicon.
+If the primer scheme has more than two primers contributing to a given amplicon
+(in schemes using alternate primers), you can (in this Galaxy tool only)
+specify all of them on one line and the tool will calculate the maximum extent
+of the amplicon.
+
+**Quality trimming details**
+
+To do the quality trimming, iVar uses a sliding window approach. The window
+slides from the 5' end to the 3' end and if at any point the average base
+quality in the window falls below the threshold, the remaining read is soft
+clipped. If after trimming, the length of the read is greater than the minimum
+length specified, the read is written to the new trimmed BAM file.
+
+Documentation can be found at `<https://andersen-lab.github.io/ivar/html/manualpage.html>`_.
+    ]]></help>
     <expand macro="citations" />
 </tool>
--- a/macros.xml	Mon Jun 22 07:30:46 2020 -0400
+++ b/macros.xml	Wed May 19 16:51:27 2021 +0000
@@ -1,9 +1,8 @@
 <macros>
-  <token name="@VERSION@">1.2.2</token>
+  <token name="@VERSION@">1.3.1</token>
   <xml name="requirements">
     <requirements>
       <requirement type="package" version="@VERSION@">ivar</requirement>
-      <requirement type="package" version="1.9">samtools</requirement>
       <yield />
     </requirements>
   </xml>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/prepare_amplicon_info.py	Wed May 19 16:51:27 2021 +0000
@@ -0,0 +1,57 @@
+#!/usr/bin/env python
+
+# extends ivar trim's amplicon info parsing abilities
+# to include calculation of amplicon regions from
+# sets of nested (more than two) primers
+
+import sys
+
+
+# parse primers and their start positions from BED file
+primer_starts = {}
+with open(sys.argv[1]) as i:
+    for line in i:
+        f = line.strip().split('\t')
+        try:
+            if f[5] == '+':
+                primer_starts[f[3]] = int(f[1])
+            elif f[5] == '-':
+                primer_starts[f[3]] = int(f[2]) - 1
+            else:
+                raise ValueError()
+        except (IndexError, ValueError):
+            sys.exit(
+                'Primer BED file needs to be TAB-separated with the '
+                'following columns: '
+                'chrom, chromStart, chromEnd, name, score, strand, '
+                'where "chromStart", "chromEnd" need to be integer values '
+                'and "strand" needs to be either "+" or "-".'
+            )
+
+# parse amplicon info and record outer primer names
+with open(sys.argv[2]) as i:
+    ret_lines = []
+    for line in i:
+        first = last = None
+        for pname in line.strip().split('\t'):
+            try:
+                primer_start = primer_starts[pname]
+            except KeyError:
+                sys.exit(
+                    'Amplicon info with primer name not found in '
+                    f'primer BED file: "{pname}"'
+                )
+            if first is None or primer_start < primer_starts[first]:
+                first = pname
+            if last is None or primer_start > primer_starts[last]:
+                last = pname
+        if first == last:
+            sys.exit(
+                line
+                + 'is not a proper amplicon info line.'
+            )
+        ret_lines.append(f'{first}\t{last}\n')
+
+# write amended amplicon info
+with open(sys.argv[3], 'w') as o:
+    o.writelines(ret_lines)