Repository 'bowtie2'
hg clone https://toolshed.g2.bx.psu.edu/repos/devteam/bowtie2

Changeset 5:42bb952b4e3c (2015-07-21)
Previous changeset 4:1fc845afa3ac (2015-07-21) Next changeset 6:e23b0cdeeba6 (2015-08-26)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/bowtie2 commit d0e3412c58bd3bdc1a483a1e2f7f9c2aa5c87a1f-dirty
modified:
bowtie2_wrapper.xml
added:
read_group_macros.xml
test-data/bowtie2-test2.bam
b
diff -r 1fc845afa3ac -r 42bb952b4e3c bowtie2_wrapper.xml
--- a/bowtie2_wrapper.xml Tue Jul 21 13:00:51 2015 -0400
+++ b/bowtie2_wrapper.xml Tue Jul 21 13:01:59 2015 -0400
b
@@ -1,6 +1,9 @@
-<tool id="bowtie2" name="Bowtie2" version="0.5">
+<tool id="bowtie2" name="Bowtie2" version="0.6">
     <!-- Wrapper compatible with Bowtie version 2.2.4 -->
     <description>- map reads against reference genome</description>
+    <macros>
+        <import>read_group_macros.xml</import>
+    </macros>
     <version_command>bowtie2 --version</version_command>
     <requirements>
         <requirement type="package" version="2.2.4">bowtie2</requirement>
@@ -70,12 +73,30 @@
             #end if
         #end if
         
-        ## Readgroups
-        #if str( $read_group.read_group_selector ) == "yes":
-            --rg-id "${read_group.rgid}"
-            --rg "SM:${read_group.rgsm}"
-            --rg "LB:${read_group.rglb}"
-            --rg "PL:${read_group.rgpl}"
+        ## Read group information.
+        @define_read_group_helpers@
+        #if str( $library.type ) == "single":
+        #set $rg_auto_name = $read_group_name_default($library.input_1)
+        #elif str( $library.type ) == "paired":
+        #set $rg_auto_name = $read_group_name_default($library.input_1, $library.input_2)
+        #else
+        #set $rg_auto_name = $read_group_name_default($library.input_1)
+        #end if
+        @set_use_rg_var@
+        @set_read_group_vars@
+        #if $use_rg
+          $format_read_group("", $rg_id, '"', arg='--rg-id ')
+          $format_read_group("SM:", $rg_sm, '"', arg='--rg ')
+          $format_read_group("PL:", $rg_pl, '"', arg='--rg ')
+          $format_read_group("LB:", $rg_lb, '"', arg='--rg ')
+          $format_read_group("CN:", $rg_cn, '"', arg='--rg ')
+          $format_read_group("DS:", $rg_ds, '"', arg='--rg ')
+          $format_read_group("DT:", $rg_dt, '"', arg='--rg ')
+          $format_read_group("FO:", $rg_fo, '"', arg='--rg ')
+          $format_read_group("KS:", $rg_ks, '"', arg='--rg ')
+          $format_read_group("PG:", $rg_pg, '"', arg='--rg ')
+          $format_read_group("PI:", $rg_pi, '"', arg='--rg ')
+          $format_read_group("PU:", $rg_pu, '"', arg='--rg ')
         #end if
         
         ## Analysis type
@@ -257,20 +278,7 @@
         </conditional>
         
         <!-- read group settings -->
-        <conditional name="read_group">
-            <param name="read_group_selector" type="select" label="Specify the read group for this file?" help="Specifying readgroup information can greatly simplify your downstream analyses by allowing combining multiple datasets. See help below for more details">
-                <option value="yes">Yes</option>
-                <option value="no" selected="True">No</option>
-            </param>
-            <when value="yes">
-                <param name="rgid" type="text" size="25" label="Read group identifier (ID). Each @RG line must have a unique ID. The value of ID is used in the RG tags of alignment records. Must be unique among all read groups in header section." help="--rg-id; Required if RG specified. Read group IDs may be modified when merging SAM files in order to handle collisions." />
-                <param name="rglb" type="text" size="25" label="Library name (LB)" help="--rg; Required if RG specified" />
-                <param name="rgpl" type="text" size="25" label="Platform/technology used to produce the reads (PL)" help="--rg; Required if RG specified. Valid values : CAPILLARY, LS454, ILLUMINA, SOLID, HELICOS, IONTORRENT and PACBIO" />
-                <param name="rgsm" type="text" size="25" label="Sample (SM)" help="--rg; Required if RG specified. Use pool name where a pool is being sequenced" />
-            </when>
-            <when value="no" />
-        </conditional>
-        
+        <expand macro="read_group_conditional" />
         <conditional name="analysis_type">
             <param name="analysis_type_selector" type="select" label="Select analysis mode">
                 <option value="simple">1: Default setting only</option>
@@ -479,6 +487,22 @@
             <param name="own_file" value="bowtie2-ref.fasta" />
             <output name="output" file="bowtie2-test1.bam" ftype="bam" lines_diff="2"/>
         </test>
+        <test>
+            <!-- basic test on single paired default run -->
+            <param name="type" value="paired"/>
+            <param name="selection" value="no"/>
+            <param name="paired_options_selector" value="no"/>
+            <param name="unaligned_file" value="false"/>
+            <param name="analysis_type_selector" value="simple"/>
+            <param name="rg_selector" value="set"/>
+            <param name="ID" value="rg1"/>
+            <param name="PL" value="CAPILLARY"/>
+            <param name="source" value="history" />
+            <param name="input_1" value="bowtie2-fq1.fq" ftype="fastqsanger"/>
+            <param name="input_2" value="bowtie2-fq2.fq" ftype="fastqsanger"/>
+            <param name="own_file" value="bowtie2-ref.fasta" />
+            <output name="output" file="bowtie2-test2.bam" ftype="bam" lines_diff="2"/>
+        </test>
     </tests>
 
     <help>
b
diff -r 1fc845afa3ac -r 42bb952b4e3c read_group_macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/read_group_macros.xml Tue Jul 21 13:01:59 2015 -0400
[
b'@@ -0,0 +1,294 @@\n+<macros>\n+    <!-- Import this at the top of your command block and then\n+         define rg_auto_name. -->\n+    <token name="@define_read_group_helpers@">\n+#import re\n+#def identifier_or_name($input1)\n+    #if hasattr($input1, \'element_identifier\')\n+        #return $input1.element_identifier\n+    #else\n+        #return $input1.name.rstrip(\'.gz\').rstrip(\'.fastq\').rstrip(\'.fq\')\n+    #end if\n+#end def\n+\n+#def clean(name)\n+    #set $name_clean = re.sub(\'[^\\w\\-_\\.]\', \'_\', $name)\n+    #return $name_clean\n+#end def\n+\n+#def read_group_name_default($input1, $input2=None)\n+    #if $input2 is None\n+        #return $clean($identifier_or_name($input1))\n+    #else\n+        #import itertools\n+        #set $input_name1 = $clean($identifier_or_name($input1))\n+        #set $input_name2 = $clean($identifier_or_name($input2))\n+        #set $common_prefix = \'\'.join([c[0] for c in itertools.takewhile(lambda x: all(x[0] == y for y in x), itertools.izip(*[$input_name1, $input_name2]))])\n+        #if len($common_prefix) > 3\n+            #return $common_prefix\n+        #else\n+            #return $input_name1\n+        #end if\n+    #end if\n+#end def\n+\n+#def format_read_group(prefix, value, quote=\'\', arg=\'\')\n+    #if $value\n+        #return $arg + $quote + $prefix + $value + $quote\n+    #else\n+        #return \'\'\n+    #end if\n+#end def\n+\n+#def rg_param(name)\n+    #if $varExists("rg")\n+        #return $rg.get($name, None)\n+    #else\n+        #return $getVar($name, None)\n+    #end if\n+#end def\n+\n+#set $use_rg = True\n+    </token>\n+    <!-- preconditions use_rg and rg_auto_name have been\n+         defined.\n+    -->\n+    <token name="@set_read_group_vars@">\n+#if $use_rg\n+    #if $rg_param(\'read_group_id_conditional\') is None\n+        #set $rg_id = $rg_auto_name\n+    #elif $rg_param(\'read_group_id_conditional\').do_auto_name\n+        #set $rg_id = $rg_auto_name\n+    #else\n+        #set $rg_id = str($rg_param(\'read_group_id_conditional\').ID)\n+    #end if\n+\n+    #if $rg_param(\'read_group_sm_conditional\') is None\n+        #set $rg_sm = \'\'\n+    #elif $rg_param(\'read_group_sm_conditional\').do_auto_name\n+        #set $rg_sm = $rg_auto_name\n+    #else\n+        #set $rg_sm = str($rg_param(\'read_group_sm_conditional\').SM)\n+    #end if\n+\n+    #if $rg_param(\'PL\')\n+        #set $rg_pl = str($rg_param(\'PL\'))\n+    #else\n+        #set $rg_pl = \'\'\n+    #end if\n+\n+    #if $rg_param(\'read_group_lb_conditional\') is None\n+        #set $rg_lb = \'\'\n+    #elif $rg_param(\'read_group_lb_conditional\')do_auto_name\n+        #set $rg_lb = $rg_auto_name\n+    #else\n+        #set $rg_lb = str($rg_param(\'read_group_lb_conditional\').LB)\n+    #end if\n+\n+    #if $rg_param(\'CN\')\n+        #set $rg_cn = str($rg_param(\'CN\'))\n+    #else\n+        #set $rg_cn = \'\'\n+    #end if\n+\n+    #if $rg_param("DS")\n+        #set $rg_ds = str($rg_param("DS"))\n+    #else\n+        #set $rg_ds = \'\'\n+    #end if\n+\n+    #if $rg_param("DT")\n+        #set $rg_dt = str($rg_param("DT"))\n+    #else\n+        #set $rg_dt = \'\'\n+    #end if\n+\n+    #if $rg_param("FO")\n+        #set $rg_fo = str($rg_param("FO"))\n+    #else\n+        #set $rg_fo = \'\'\n+    #end if\n+\n+    #if $rg_param("KS")\n+        #set $rg_ks = str($rg_param("KS"))\n+    #else\n+        #set $rg_ks = \'\'\n+    #end if\n+\n+    #if $rg_param("PG")\n+        #set $rg_pg = str($rg_param("PG"))\n+    #else\n+        #set $rg_pg = \'\'\n+    #end if\n+\n+    #if str($rg_param("PI"))\n+        #set $rg_pi = str($rg_param("PI"))\n+    #else\n+        #set $rg_pi = \'\'\n+    #end if\n+\n+    #if $rg_param("PU")\n+        #set $rg_pu = str($rg_param("PU"))\n+    #else\n+        #set $rg_pu = \'\'\n+    #end if\n+#end if\n+    </token>\n+    <token name="@set_use_rg_var@">\n+#set $use_rg = str($rg.rg_selector) != "do_not_set"\n+    </token>\n+    <xml name="read_group_auto_name_conditional">\n+        <param name="do_auto_name" type="boolean" label="Auto-assign" help="Use dataset name or collection information to automatically assign this value" checked="no" />\n+        <when value="true">'..b'd_group_dt_param">\n+        <param name="DT" type="text" size="25" label="Date that run was produced (DT)" help="ISO8601 format date or date/time, like YYYY-MM-DD" />\n+    </xml>\n+    <xml name="read_group_fo_param">\n+        <param name="FO" type="text" size="25" optional="true" label="Flow order (FO)" help="The array of nucleotide bases that correspond to the nucleotides used for each flow of each read. Multi-base flows are encoded in IUPAC format, and non-nucleotide flows by various other characters. Format: /\\*|[ACMGRSVTWYHKDBN]+/">\n+          <validator type="regex" message="Invalid flow order">\\*|[ACMGRSVTWYHKDBN]+$</validator>\n+        </param>\n+    </xml>\n+    <xml name="read_group_ks_param">\n+        <param name="KS" type="text" size="25" label="The array of nucleotide bases that correspond to the key sequence of each read (KS)" />\n+    </xml>\n+    <xml name="read_group_pg_param">\n+        <param name="PG" type="text" size="25" label="Programs used for processing the read group (PG)" />\n+    </xml>\n+    <xml name="read_group_pi_param">\n+        <param name="PI" type="integer" optional="true" label="Predicted median insert size (PI)" />\n+    </xml>\n+    <xml name="read_group_pu_param">\n+        <param name="PU" type="text" size="25" label="Platform unit (PU)" help="Unique identifier (e.g. flowcell-barcode.lane for Illumina or slide for SOLiD)" optional="True" />\n+    </xml>\n+    <xml name="read_group_pu_required_param">\n+        <param name="PU" type="text" size="25" label="Platform unit (PU)" help="Unique identifier (e.g. flowcell-barcode.lane for Illumina or slide for SOLiD)" optional="False" />\n+    </xml>\n+    <!-- Only ID is required - all groups available -->\n+    <xml name="read_group_inputs_spec">\n+        <expand macro="read_group_id_conditional" />\n+        <expand macro="read_group_sm_conditional" />\n+        <expand macro="read_group_pl_param" />\n+        <expand macro="read_group_lb_conditional" />\n+        <expand macro="read_group_cn_param" />\n+        <expand macro="read_group_ds_param" />\n+        <expand macro="read_group_dt_param" />\n+        <expand macro="read_group_fo_param" />\n+        <expand macro="read_group_ks_param" />\n+        <expand macro="read_group_pg_param" />\n+        <expand macro="read_group_pi_param" />\n+        <expand macro="read_group_pu_param" />\n+    </xml>\n+    <!-- ID, SM, LB, PU, PL all required - not ks, pg, or fo params. -->\n+    <xml name="read_group_inputs_picard">\n+        <expand macro="read_group_id_conditional" />\n+        <expand macro="read_group_sm_required_conditional" />\n+        <expand macro="read_group_lb_required_conditional" />\n+        <expand macro="read_group_pl_param" />\n+        <expand macro="read_group_pu_required_param" />\n+        <expand macro="read_group_cn_param" />\n+        <expand macro="read_group_ds_param" />\n+        <expand macro="read_group_pi_param" />\n+        <expand macro="read_group_dt_param" />\n+    </xml>\n+    <xml name="read_group_conditional">\n+        <conditional name="rg">\n+            <param name="rg_selector" type="select" label="Set read groups information?" help="Specifying read group information can greatly simplify your downstream analyses by allowing combining multiple datasets.">\n+                <option value="set">Set read groups (SAM/BAM specification)</option>\n+                <option value="set_picard">Set read groups (Picard style)</option>\n+                <option value="set_id_auto">Automatically assign ID</option>\n+                <option value="do_not_set" selected="True">Do not set</option>\n+            </param>\n+            <when value="set_picard">\n+                <expand macro="read_group_inputs_picard" />\n+            </when>\n+            <when value="set">\n+                <expand macro="read_group_inputs_spec" />\n+            </when>\n+            <when value="set_id_auto">\n+            </when>\n+            <when value="do_not_set">\n+            </when>\n+        </conditional>\n+    </xml>\n+</macros>\n'
b
diff -r 1fc845afa3ac -r 42bb952b4e3c test-data/bowtie2-test2.bam
b
Binary file test-data/bowtie2-test2.bam has changed