Repository 'bwa'
hg clone https://toolshed.g2.bx.psu.edu/repos/devteam/bwa

Changeset 5:fbf460831036 (2015-07-21)
Previous changeset 4:ac30bfd3e2a8 (2015-06-18) Next changeset 6:09a7281d24c5 (2015-07-21)
Commit message:
planemo upload commit d0e3412c58bd3bdc1a483a1e2f7f9c2aa5c87a1f-dirty
modified:
bwa-mem.xml
bwa.xml
bwa_macros.xml
test-data/bwa-aln-test3.bam
test-data/bwa-mem-test2.bam
tool_dependencies.xml
added:
read_group_macros.xml
b
diff -r ac30bfd3e2a8 -r fbf460831036 bwa-mem.xml
--- a/bwa-mem.xml Thu Jun 18 17:35:40 2015 -0400
+++ b/bwa-mem.xml Tue Jul 21 13:51:02 2015 -0400
b
@@ -1,7 +1,8 @@
 <?xml version="1.0"?>
-<tool id="bwa_mem" name="Map with BWA-MEM" version="0.2.2">
+<tool id="bwa_mem" name="Map with BWA-MEM" version="0.3">
   <description>- map medium and long reads (&gt; 100 bp) against reference genome</description>
   <macros>
+    <import>read_group_macros.xml</import>
     <import>bwa_macros.xml</import>
   </macros>
   <requirements>
@@ -103,7 +104,16 @@
 
     #end if
 
-    #if str( $rg.rg_selector ) == "set":
+    ## Handle read group options...
+    @define_read_group_helpers@
+    #if str( $fastq_input.fastq_input_selector ) == "paired":
+      #set $rg_auto_name = $read_group_name_default($fastq_input.fastq_input1, $fastq_input.fastq_input2)
+    #else:
+      #set $rg_auto_name = $read_group_name_default($fastq_input.fastq_input1)
+    #end if
+    @set_use_rg_var@
+    @set_read_group_vars@
+    #if $use_rg
       @set_rg_string@
       -R '$rg_string'
     #end if
@@ -189,7 +199,7 @@
       </when>
     </conditional>
 
-    <expand macro="readgroup_params" />
+    <expand macro="read_group_conditional" />
 
     <conditional name="analysis_type">
       <param name="analysis_type_selector" type="select" label="Select analysis mode">
b
diff -r ac30bfd3e2a8 -r fbf460831036 bwa.xml
--- a/bwa.xml Thu Jun 18 17:35:40 2015 -0400
+++ b/bwa.xml Tue Jul 21 13:51:02 2015 -0400
[
@@ -1,7 +1,8 @@
 <?xml version="1.0"?>
-<tool id="bwa" name="Map with BWA" version="0.2.3">
+<tool id="bwa" name="Map with BWA" version="0.3.0">
   <description>- map short reads (&lt; 100 bp) against reference genome</description>
   <macros>
+    <import>read_group_macros.xml</import>
     <import>bwa_macros.xml</import>
     <token name="@command_options@">
     #if str( $analysis_type.analysis_type_selector ) == "full":
@@ -29,7 +30,7 @@
     #end if
     </token>
     <token name="@read_group_options@">
-      #if str( $rg.rg_selector ) == "set":
+      #if $use_rg:
         @set_rg_string@
         -r '$rg_string'
       #end if
@@ -109,6 +110,18 @@
         #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path )
     #end if
 
+    ## setup vars for rg handling...
+    @define_read_group_helpers@
+    #if str( $input_type.input_type_selector ) == "paired":
+      #set $rg_auto_name = $read_group_name_default($input_type.fastq_input1, $input_type.fastq_input2)
+    #elif str( $input_type.input_type_selector ) in ["single_bam", "paired_bam"]:
+      #set $rg_auto_name = $read_group_name_default($input_type.bam_input)
+    #else
+      #set $rg_auto_name = $read_group_name_default($input_type.fastq_input1)
+    #end if
+    @set_use_rg_var@
+    @set_read_group_vars@
+
     ## Begin bwa command line
 
 ####### Fastq paired
@@ -327,7 +340,7 @@
 
     </conditional>
 
-    <expand macro="readgroup_params" />
+    <expand macro="read_group_conditional" />
 
     <conditional name="analysis_type">
       <param name="analysis_type_selector" type="select" label="Select analysis mode">
b
diff -r ac30bfd3e2a8 -r fbf460831036 bwa_macros.xml
--- a/bwa_macros.xml Thu Jun 18 17:35:40 2015 -0400
+++ b/bwa_macros.xml Tue Jul 21 13:51:02 2015 -0400
[
@@ -1,34 +1,18 @@
 <macros>
-
+  <import>read_group_macros.xml</import>
   <token name="@set_rg_string@">
-      #set $rg_string = "@RG\tID:" + str($rg.ID) + "\tSM:" + str($rg.SM) + "\tPL:" + str($rg.PL)
-      #if $rg.LB
-        #set $rg_string += "\tLB:" + str($rg.LB)
-      #end if
-      #if $rg.CN
-        #set $rg_string += "\tCN:" + str($rg.CN)
-      #end if
-      #if $rg.DS
-        #set $rg_string += "\tDS:" + str($rg.DS)
-      #end if
-      #if $rg.DT
-        #set $rg_string += "\tDT:" + str($rg.DT)
-      #end if
-      #if $rg.FO
-        #set $rg_string += "\tFO:" + str($rg.FO)
-      #end if
-      #if $rg.KS
-        #set $rg_string += "\tKS:" + str($rg.KS)
-      #end if
-      #if $rg.PG
-        #set $rg_string += "\tPG:" + str($rg.PG)
-      #end if
-      #if str($rg.PI)
-        #set $rg_string += "\tPI:" + str($rg.PI)
-      #end if
-      #if $rg.PU
-        #set $rg_string += "\tPU:" + str($rg.PU)
-      #end if
+      #set $rg_string = "@RG\tID:" + str($rg_id)
+      #set $rg_string += $format_read_group("\tSM:", $rg_sm)
+      #set $rg_string += $format_read_group("\tPL:", $rg_pl)
+      #set $rg_string += $format_read_group("\tLB:", $rg_lb)
+      #set $rg_string += $format_read_group("\tCN:", $rg_cn)
+      #set $rg_string += $format_read_group("\tDS:", $rg_ds)
+      #set $rg_string += $format_read_group("\tDT:", $rg_dt)
+      #set $rg_string += $format_read_group("\tFO:", $rg_fo)
+      #set $rg_string += $format_read_group("\tKS:", $rg_ks)
+      #set $rg_string += $format_read_group("\tPG:", $rg_pg)
+      #set $rg_string += $format_read_group("\tPI:", $rg_pi)
+      #set $rg_string += $format_read_group("\tPU:", $rg_pu)
   </token>
     
   <token name="@RG@">
@@ -108,42 +92,5 @@
 
 
   </token>
-  <xml name="readgroup_params">
-    <conditional name="rg">
-      <param name="rg_selector" type="select" label="Set read groups information?" help="(-R in bwa mem; -r in bwa aln); Specifying read group information can greatly simplify your downstream analyses by allowing combining multiple datasets. See help below for more details">
-        <option value="set">Set</option>
-        <option value="do_not_set" selected="True">Do not set</option>
-      </param>
-      <when value="set">
-        <param name="ID" type="text" value="" size="20" label="Read group identifier (ID)" help="This value must be unique among multiple samples in your experiment">
-          <validator type="empty_field" />
-        </param>
-        <param name="SM" type="text" value="" size="20" label="Read group sample name (SM)" help="This value should be descriptive. Use pool name where a pool is being sequenced" />
-        <param name="PL" type="select" label="Platform/technology used to produce the reads (PL)">
-          <option value="CAPILLARY">CAPILLARY</option>
-          <option value="LS454">LS454</option>
-          <option selected="True" value="ILLUMINA">ILLUMINA</option>
-          <option value="SOLID">SOLID</option>
-          <option value="HELICOS">HELICOS</option>
-          <option value="IONTORRENT">IONTORRENT</option>
-          <option value="PACBIO">PACBIO</option>
-        </param>
-        <param name="LB" type="text" size="25" label="Library name (LB)" />
-        <param name="CN" type="text" size="25" label="Sequencing center that produced the read (CN)" />
-        <param name="DS" type="text" size="25" label="Description (DS)" />
-        <param name="DT" type="text" size="25" label="Date that run was produced (DT)" help="ISO8601 format date or date/time, like YYYY-MM-DD" />
-        <param name="FO" type="text" size="25" optional="true" label="Flow order (FO)" help="The array of nucleotide bases that correspond to the nucleotides used for each flow of each read. Multi-base flows are encoded in IUPAC format, and non-nucleotide flows by various other characters. Format: /\*|[ACMGRSVTWYHKDBN]+/">
-          <validator type="regex" message="Invalid flow order">\*|[ACMGRSVTWYHKDBN]+$</validator>
-        </param>
-        <param name="KS" type="text" size="25" label="The array of nucleotide bases that correspond to the key sequence of each read (KS)" />
-        <param name="PG" type="text" size="25" label="Programs used for processing the read group (PG)" />
-        <param name="PI" type="integer" optional="true" label="Predicted median insert size (PI)" />
-        <param name="PU" type="text" size="25" label="Platform unit (PU)" help="Unique identifier (e.g. flowcell-barcode.lane for Illumina or slide for SOLiD)" />
-      </when>
-      <when value="do_not_set">
-        <!-- do nothing -->
-      </when>
-    </conditional>
-  </xml>
 
 </macros>
b
diff -r ac30bfd3e2a8 -r fbf460831036 read_group_macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/read_group_macros.xml Tue Jul 21 13:51:02 2015 -0400
[
b'@@ -0,0 +1,294 @@\n+<macros>\n+    <!-- Import this at the top of your command block and then\n+         define rg_auto_name. -->\n+    <token name="@define_read_group_helpers@">\n+#import re\n+#def identifier_or_name($input1)\n+    #if hasattr($input1, \'element_identifier\')\n+        #return $input1.element_identifier\n+    #else\n+        #return $input1.name.rstrip(\'.gz\').rstrip(\'.fastq\').rstrip(\'.fq\')\n+    #end if\n+#end def\n+\n+#def clean(name)\n+    #set $name_clean = re.sub(\'[^\\w\\-_\\.]\', \'_\', $name)\n+    #return $name_clean\n+#end def\n+\n+#def read_group_name_default($input1, $input2=None)\n+    #if $input2 is None\n+        #return $clean($identifier_or_name($input1))\n+    #else\n+        #import itertools\n+        #set $input_name1 = $clean($identifier_or_name($input1))\n+        #set $input_name2 = $clean($identifier_or_name($input2))\n+        #set $common_prefix = \'\'.join([c[0] for c in itertools.takewhile(lambda x: all(x[0] == y for y in x), itertools.izip(*[$input_name1, $input_name2]))])\n+        #if len($common_prefix) > 3\n+            #return $common_prefix\n+        #else\n+            #return $input_name1\n+        #end if\n+    #end if\n+#end def\n+\n+#def format_read_group(prefix, value, quote=\'\', arg=\'\')\n+    #if $value\n+        #return $arg + $quote + $prefix + $value + $quote\n+    #else\n+        #return \'\'\n+    #end if\n+#end def\n+\n+#def rg_param(name)\n+    #if $varExists("rg")\n+        #return $rg.get($name, None)\n+    #else\n+        #return $getVar($name, None)\n+    #end if\n+#end def\n+\n+#set $use_rg = True\n+    </token>\n+    <!-- preconditions use_rg and rg_auto_name have been\n+         defined.\n+    -->\n+    <token name="@set_read_group_vars@">\n+#if $use_rg\n+    #if $rg_param(\'read_group_id_conditional\') is None\n+        #set $rg_id = $rg_auto_name\n+    #elif $rg_param(\'read_group_id_conditional\').do_auto_name\n+        #set $rg_id = $rg_auto_name\n+    #else\n+        #set $rg_id = str($rg_param(\'read_group_id_conditional\').ID)\n+    #end if\n+\n+    #if $rg_param(\'read_group_sm_conditional\') is None\n+        #set $rg_sm = \'\'\n+    #elif $rg_param(\'read_group_sm_conditional\').do_auto_name\n+        #set $rg_sm = $rg_auto_name\n+    #else\n+        #set $rg_sm = str($rg_param(\'read_group_sm_conditional\').SM)\n+    #end if\n+\n+    #if $rg_param(\'PL\')\n+        #set $rg_pl = str($rg_param(\'PL\'))\n+    #else\n+        #set $rg_pl = \'\'\n+    #end if\n+\n+    #if $rg_param(\'read_group_lb_conditional\') is None\n+        #set $rg_lb = \'\'\n+    #elif $rg_param(\'read_group_lb_conditional\')do_auto_name\n+        #set $rg_lb = $rg_auto_name\n+    #else\n+        #set $rg_lb = str($rg_param(\'read_group_lb_conditional\').LB)\n+    #end if\n+\n+    #if $rg_param(\'CN\')\n+        #set $rg_cn = str($rg_param(\'CN\'))\n+    #else\n+        #set $rg_cn = \'\'\n+    #end if\n+\n+    #if $rg_param("DS")\n+        #set $rg_ds = str($rg_param("DS"))\n+    #else\n+        #set $rg_ds = \'\'\n+    #end if\n+\n+    #if $rg_param("DT")\n+        #set $rg_dt = str($rg_param("DT"))\n+    #else\n+        #set $rg_dt = \'\'\n+    #end if\n+\n+    #if $rg_param("FO")\n+        #set $rg_fo = str($rg_param("FO"))\n+    #else\n+        #set $rg_fo = \'\'\n+    #end if\n+\n+    #if $rg_param("KS")\n+        #set $rg_ks = str($rg_param("KS"))\n+    #else\n+        #set $rg_ks = \'\'\n+    #end if\n+\n+    #if $rg_param("PG")\n+        #set $rg_pg = str($rg_param("PG"))\n+    #else\n+        #set $rg_pg = \'\'\n+    #end if\n+\n+    #if str($rg_param("PI"))\n+        #set $rg_pi = str($rg_param("PI"))\n+    #else\n+        #set $rg_pi = \'\'\n+    #end if\n+\n+    #if $rg_param("PU")\n+        #set $rg_pu = str($rg_param("PU"))\n+    #else\n+        #set $rg_pu = \'\'\n+    #end if\n+#end if\n+    </token>\n+    <token name="@set_use_rg_var@">\n+#set $use_rg = str($rg.rg_selector) != "do_not_set"\n+    </token>\n+    <xml name="read_group_auto_name_conditional">\n+        <param name="do_auto_name" type="boolean" label="Auto-assign" help="Use dataset name or collection information to automatically assign this value" checked="no" />\n+        <when value="true">'..b'd_group_dt_param">\n+        <param name="DT" type="text" size="25" label="Date that run was produced (DT)" help="ISO8601 format date or date/time, like YYYY-MM-DD" />\n+    </xml>\n+    <xml name="read_group_fo_param">\n+        <param name="FO" type="text" size="25" optional="true" label="Flow order (FO)" help="The array of nucleotide bases that correspond to the nucleotides used for each flow of each read. Multi-base flows are encoded in IUPAC format, and non-nucleotide flows by various other characters. Format: /\\*|[ACMGRSVTWYHKDBN]+/">\n+          <validator type="regex" message="Invalid flow order">\\*|[ACMGRSVTWYHKDBN]+$</validator>\n+        </param>\n+    </xml>\n+    <xml name="read_group_ks_param">\n+        <param name="KS" type="text" size="25" label="The array of nucleotide bases that correspond to the key sequence of each read (KS)" />\n+    </xml>\n+    <xml name="read_group_pg_param">\n+        <param name="PG" type="text" size="25" label="Programs used for processing the read group (PG)" />\n+    </xml>\n+    <xml name="read_group_pi_param">\n+        <param name="PI" type="integer" optional="true" label="Predicted median insert size (PI)" />\n+    </xml>\n+    <xml name="read_group_pu_param">\n+        <param name="PU" type="text" size="25" label="Platform unit (PU)" help="Unique identifier (e.g. flowcell-barcode.lane for Illumina or slide for SOLiD)" optional="True" />\n+    </xml>\n+    <xml name="read_group_pu_required_param">\n+        <param name="PU" type="text" size="25" label="Platform unit (PU)" help="Unique identifier (e.g. flowcell-barcode.lane for Illumina or slide for SOLiD)" optional="False" />\n+    </xml>\n+    <!-- Only ID is required - all groups available -->\n+    <xml name="read_group_inputs_spec">\n+        <expand macro="read_group_id_conditional" />\n+        <expand macro="read_group_sm_conditional" />\n+        <expand macro="read_group_pl_param" />\n+        <expand macro="read_group_lb_conditional" />\n+        <expand macro="read_group_cn_param" />\n+        <expand macro="read_group_ds_param" />\n+        <expand macro="read_group_dt_param" />\n+        <expand macro="read_group_fo_param" />\n+        <expand macro="read_group_ks_param" />\n+        <expand macro="read_group_pg_param" />\n+        <expand macro="read_group_pi_param" />\n+        <expand macro="read_group_pu_param" />\n+    </xml>\n+    <!-- ID, SM, LB, PU, PL all required - not ks, pg, or fo params. -->\n+    <xml name="read_group_inputs_picard">\n+        <expand macro="read_group_id_conditional" />\n+        <expand macro="read_group_sm_required_conditional" />\n+        <expand macro="read_group_lb_required_conditional" />\n+        <expand macro="read_group_pl_param" />\n+        <expand macro="read_group_pu_required_param" />\n+        <expand macro="read_group_cn_param" />\n+        <expand macro="read_group_ds_param" />\n+        <expand macro="read_group_pi_param" />\n+        <expand macro="read_group_dt_param" />\n+    </xml>\n+    <xml name="read_group_conditional">\n+        <conditional name="rg">\n+            <param name="rg_selector" type="select" label="Set read groups information?" help="Specifying read group information can greatly simplify your downstream analyses by allowing combining multiple datasets.">\n+                <option value="set">Set read groups (SAM/BAM specification)</option>\n+                <option value="set_picard">Set read groups (Picard style)</option>\n+                <option value="set_id_auto">Automatically assign ID</option>\n+                <option value="do_not_set" selected="True">Do not set</option>\n+            </param>\n+            <when value="set_picard">\n+                <expand macro="read_group_inputs_picard" />\n+            </when>\n+            <when value="set">\n+                <expand macro="read_group_inputs_spec" />\n+            </when>\n+            <when value="set_id_auto">\n+            </when>\n+            <when value="do_not_set">\n+            </when>\n+        </conditional>\n+    </xml>\n+</macros>\n'
b
diff -r ac30bfd3e2a8 -r fbf460831036 test-data/bwa-aln-test3.bam
b
Binary file test-data/bwa-aln-test3.bam has changed
b
diff -r ac30bfd3e2a8 -r fbf460831036 test-data/bwa-mem-test2.bam
b
Binary file test-data/bwa-mem-test2.bam has changed
b
diff -r ac30bfd3e2a8 -r fbf460831036 tool_dependencies.xml
--- a/tool_dependencies.xml Thu Jun 18 17:35:40 2015 -0400
+++ b/tool_dependencies.xml Tue Jul 21 13:51:02 2015 -0400
b
@@ -4,6 +4,6 @@
         <repository changeset_revision="5b9aca1e1c07" name="package_bwa_0_7_10_039ea20639" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
     </package>
     <package name="samtools" version="1.1">
-        <repository changeset_revision="43f2fbec5d52" name="package_samtools_1_1" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+        <repository changeset_revision="f0c7bc0159e9" name="package_samtools_1_1" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
      </package>
 </tool_dependency>