changeset 4:ac30bfd3e2a8 draft

planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
author devteam
date Thu, 18 Jun 2015 17:35:40 -0400
parents 607ca4b95837
children fbf460831036
files README.md README.rst bwa-mem.xml bwa.xml bwa_macros.xml shed_upload.tar.gz test-data/bwa-aln-test1.bam test-data/bwa-aln-test2.bam test-data/bwa-aln-test3.bam test-data/bwa-mem-test1.bam test-data/bwa-mem-test2.bam
diffstat 11 files changed, 68 insertions(+), 37 deletions(-) [+]
line wrap: on
line diff
--- a/README.md	Fri Mar 20 12:21:16 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,4 +0,0 @@
-bwa-mem
-=======
-
-A collection of Galaxy wrapper for bwa mem, aln, samse, sampe, pemerge, and bwasw 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.rst	Thu Jun 18 17:35:40 2015 -0400
@@ -0,0 +1,4 @@
+bwa-mem
+=======
+
+A collection of Galaxy wrapper for bwa mem, aln, samse, sampe, pemerge, and bwasw 
--- a/bwa-mem.xml	Fri Mar 20 12:21:16 2015 -0400
+++ b/bwa-mem.xml	Thu Jun 18 17:35:40 2015 -0400
@@ -1,5 +1,5 @@
 <?xml version="1.0"?>
-<tool id="bwa_mem" name="Map with BWA-MEM" version="0.2.1">
+<tool id="bwa_mem" name="Map with BWA-MEM" version="0.2.2">
   <description>- map medium and long reads (&gt; 100 bp) against reference genome</description>
   <macros>
     <import>bwa_macros.xml</import>
@@ -135,9 +135,9 @@
   <inputs>
 
     <conditional name="reference_source">
-      <param name="reference_source_selector" type="select" label="Load reference genome from">
-        <option value="cached">Local cache</option>
-        <option value="history">History</option>
+      <param name="reference_source_selector" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options. See `Indexes` section of help below">
+        <option value="cached">Use a built-in genome index</option>
+        <option value="history">Use a genome from history and build index</option>
       </param>
       <when value="cached">
         <param name="ref_file" type="select" label="Using reference genome" help="Select genome from the list">
@@ -162,7 +162,7 @@
       <when value="paired">
         <param name="fastq_input1" type="data" format="fastqsanger" label="Select first set of reads" help="Specify dataset with forward reads"/>
         <param name="fastq_input2" type="data" format="fastqsanger" label="Select second set of reads" help="Specify dataset with reverse reads"/>
-        <param name="iset_stats" type="text" optional="True" size="10" label="Enter mean, standerd deviation, max, and min for insert lengths." help="-I; This parameter is only used for paired reads. Only mean is required while sd, max, and min will be inferred. Examples: both &quot;250&quot; and &quot;250,25&quot; will work while &quot;250,,10&quot; will not. See below for details.">
+        <param name="iset_stats" type="text" optional="True" size="10" label="Enter mean, standard deviation, max, and min for insert lengths." help="-I; This parameter is only used for paired reads. Only mean is required while sd, max, and min will be inferred. Examples: both &quot;250&quot; and &quot;250,25&quot; will work while &quot;250,,10&quot; will not. See below for details.">
           <sanitizer invalid_char="">
             <valid initial="string.digits"><add value=","/> </valid>
           </sanitizer>
@@ -173,7 +173,7 @@
       </when>
       <when value="paired_collection">
         <param name="fastq_input1" format="fastqsanger" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/>
-        <param name="iset_stats" type="text" optional="True" size="10" label="Enter mean, standerd deviation, max, and min for insert lengths." help="-I; This parameter is only used for paired reads. Only mean is required while sd, max, and min will be inferred. Examples: both &quot;250&quot; and &quot;250,25&quot; will work while &quot;250,,10&quot; will not. See below for details.">
+        <param name="iset_stats" type="text" optional="True" size="10" label="Enter mean, standard deviation, max, and min for insert lengths." help="-I; This parameter is only used for paired reads. Only mean is required while sd, max, and min will be inferred. Examples: both &quot;250&quot; and &quot;250,25&quot; will work while &quot;250,,10&quot; will not. See below for details.">
           <sanitizer invalid_char="">
             <valid initial="string.digits"><add value=","/> </valid>
           </sanitizer>
@@ -181,7 +181,7 @@
       </when>
       <when value="paired_iv">
         <param name="fastq_input1" type="data" format="fastqsanger" label="Select fastq dataset" help="Specify dataset with interleaved reads"/>
-        <param name="iset_stats" type="text" optional="True" size="10" label="Enter mean, standerd deviation, max, and min for insert lengths." help="-I; This parameter is only used for paired reads. Only mean is required while sd, max, and min will be inferred. Examples: both &quot;250&quot; and &quot;250,25&quot; will work while &quot;250,,10&quot; will not. See below for details.">
+        <param name="iset_stats" type="text" optional="True" size="10" label="Enter mean, standard deviation, max, and min for insert lengths." help="-I; This parameter is only used for paired reads. Only mean is required while sd, max, and min will be inferred. Examples: both &quot;250&quot; and &quot;250,25&quot; will work while &quot;250,,10&quot; will not. See below for details.">
           <sanitizer invalid_char="">
             <valid initial="string.digits"><add value=","/> </valid>
           </sanitizer>
@@ -193,9 +193,9 @@
 
     <conditional name="analysis_type">
       <param name="analysis_type_selector" type="select" label="Select analysis mode">
-        <option value="illumina">Simple Illumina mode</option>
-        <option value="pacbio">PacBio mode (-x pacbio)</option>
-        <option value="full">Full list of options</option>
+        <option value="illumina">1.Simple Illumina mode</option>
+        <option value="pacbio">2.PacBio mode (-x pacbio)</option>
+        <option value="full">3.Full list of options</option>
       </param>
       <when value="illumina">
         <!-- do nothing -->
@@ -302,6 +302,8 @@
       <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/>
       <param name="rg_selector" value="set"/>
       <param name="ID" value="rg1"/>
+      <param name="PL" value="CAPILLARY"/>
+      <param name="LB" value="AARDVARK-1" />
       <param name="analysis_type_selector" value="illumina"/>
       <output name="bam_output" ftype="bam" file="bwa-mem-test2.bam" lines_diff="2" />
     </test>
@@ -322,6 +324,20 @@
 
 -----
 
+**Indices: Selecting reference genomes for BWA**
+
+Galaxy wrapper for BWA allows you select between precomputed and user-defined indices for reference genomes using **Will you select a reference genome from your history or use a built-in index?** flag. This flag has two options:
+
+  1. **Use a built-in genome index** - when selected (this is default), Galaxy provides the user with **Select reference genome index** dropdown. Genomes listed in this dropdown have been pre-indexed with bwa index utility and are ready to be mapped against.  
+  2. **Use a genome from the history and build index** - when selected, Galaxy provides the user with **Select reference genome sequence** dropdown. This dropdown is populated by all FASTA formatted files listed in your current history. If your genome of interest is uploaded into history it will be shown there. Selecting a genome from this dropdown will cause Galaxy to first transparently index it using `bwa index` command, and then run mapping with `bwa mem`.
+    
+If your genome of interest is not listed here you have two choices:
+
+  1. Contact galaxy team using **Help->Support** link at the top of the interface and let us know that an index needs to be added
+  2. Upload your genome of interest as a FASTA file to Galaxy history and selected **Use a genome from the history and build index** option.
+
+-----
+
 **Galaxy-specific option**
 
 Galaxy allows four levels of control over bwa-mem options provided by **Select analysis mode** menu option. These are:
--- a/bwa.xml	Fri Mar 20 12:21:16 2015 -0400
+++ b/bwa.xml	Thu Jun 18 17:35:40 2015 -0400
@@ -1,5 +1,5 @@
 <?xml version="1.0"?>
-<tool id="bwa" name="Map with BWA" version="0.2.1">
+<tool id="bwa" name="Map with BWA" version="0.2.3">
   <description>- map short reads (&lt; 100 bp) against reference genome</description>
   <macros>
     <import>bwa_macros.xml</import>
@@ -24,7 +24,7 @@
       #end if
 
       #if str( $analysis_type.L ):
-        -B ${analysis_type.L}
+        -L ${analysis_type.L}
       #end if
     #end if
     </token>
@@ -250,11 +250,11 @@
   </command>
 
   <inputs>
-
+    
     <conditional name="reference_source">
-      <param name="reference_source_selector" type="select" label="Load reference genome from">
-        <option value="cached">Local cache</option>
-        <option value="history">History</option>
+      <param name="reference_source_selector" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options. See `Indexes` section of help below">
+        <option value="cached">Use a built-in genome index</option>
+        <option value="history">Use a genome from history and build index</option>
       </param>
       <when value="cached">
         <param name="ref_file" type="select" label="Using reference genome" help="Select genome from the list">
@@ -387,6 +387,7 @@
       <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/>
       <param name="rg_selector" value="set"/>
       <param name="ID" value="rg1"/>
+      <param name="PL" value="CAPILLARY"/>
       <param name="analysis_type_selector" value="illumina"/>
       <output name="bam_output" ftype="bam" file="bwa-aln-test3.bam" lines_diff="2" />
     </test>
@@ -394,18 +395,32 @@
   <help>
 **What is does**
 
-BWA is a software package for mapping low-divergent sequences against a large reference genome, such as the human genome. The bwa-aln algorithm is designed for Illumina sequence reads up to 100bp. For longer reads use BWA-MEM algorithm distributed as separate Galaxy tool.
+BWA is a software package for mapping low-divergent sequences against a large reference genome, such as the human genome. The bwa-aln algorithm is designed for Illumina sequence reads up to 100bp. For longer reads use BWA-MEM algorithm distributed as a separate Galaxy tool.
 
 This Galaxy tool wraps bwa-aln, bwa-samse and -sampe modules of bwa read mapping tool:
 
-  - bwa aln - actual mapper placing reads onto the reference sequence
-  - bwa samse - post-processor converting suffix array coordinates into genome coordinates in SAM format for single reads
-  - bam sampe - post-processor for paired reads
+  - **bwa aln** - actual mapper placing reads onto the reference sequence
+  - **bwa samse** - post-processor converting suffix array coordinates into genome coordinates in SAM format for single reads
+  - **bam sampe** - post-processor for paired reads
 
 Galaxy implementation takes fastq or BAM (unaligned BAM) datasets as input and produces output in BAM (not SAM; in reality SAM produced by the bwa is converted to BAM on the fly by samtools view command) format, which can be further processed using various BAM utilities exiting in Galaxy (BAMTools, SAMTools, Picard).
 
 -----
 
+**Indices: Selecting reference genomes for BWA**
+
+Galaxy wrapper for BWA allows you select between precomputed and user-defined indices for reference genomes using **Will you select a reference genome from your history or use a built-in index?** flag. This flag has two options:
+
+  1. **Use a built-in genome index** - when selected (this is default), Galaxy provides the user with **Select reference genome index** dropdown. Genomes listed in this dropdown have been pre-indexed with bwa index utility and are ready to be mapped against.  
+  2. **Use a genome from the history and build index** - when selected, Galaxy provides the user with **Select reference genome sequence** dropdown. This dropdown is populated by all FASTA formatted files listed in your current history. If your genome of interest is uploaded into history it will be shown there. Selecting a genome from this dropdown will cause Galaxy to first transparently index it using `bwa index` command, and then run mapping with `bwa aln`.
+    
+If your genome of interest is not listed here you have two choices:
+
+  1. Contact galaxy team using **Help->Support** link at the top of the interface and let us know that an index needs to be added
+  2. Upload your genome of interest as a FASTA file to Galaxy history and selected **Use a genome from the history and build index** option.
+
+-----
+
 **Galaxy-specific option**
 
 Galaxy allows three levels of control over bwa-mem options provided by **Select analysis mode** menu option. These are:
--- a/bwa_macros.xml	Fri Mar 20 12:21:16 2015 -0400
+++ b/bwa_macros.xml	Thu Jun 18 17:35:40 2015 -0400
@@ -3,31 +3,31 @@
   <token name="@set_rg_string@">
       #set $rg_string = "@RG\tID:" + str($rg.ID) + "\tSM:" + str($rg.SM) + "\tPL:" + str($rg.PL)
       #if $rg.LB
-        #set $rg_string += "\tLB:$rg.LB"
+        #set $rg_string += "\tLB:" + str($rg.LB)
       #end if
       #if $rg.CN
-        #set $rg_string += "\tCN:$rg.CN"
+        #set $rg_string += "\tCN:" + str($rg.CN)
       #end if
       #if $rg.DS
-        #set $rg_string += "\tDS:$rg.DS"
+        #set $rg_string += "\tDS:" + str($rg.DS)
       #end if
       #if $rg.DT
-        #set $rg_string += "\tDT:$rg.DT"
+        #set $rg_string += "\tDT:" + str($rg.DT)
       #end if
       #if $rg.FO
-        #set $rg_string += "\tFO:$rg.FO"
+        #set $rg_string += "\tFO:" + str($rg.FO)
       #end if
       #if $rg.KS
-        #set $rg_string += "\tKS:$rg.KS"
+        #set $rg_string += "\tKS:" + str($rg.KS)
       #end if
       #if $rg.PG
-        #set $rg_string += "\tPG:$rg.PG"
+        #set $rg_string += "\tPG:" + str($rg.PG)
       #end if
       #if str($rg.PI)
-        #set $rg_string += "\tPI:$rg.PI"
+        #set $rg_string += "\tPI:" + str($rg.PI)
       #end if
       #if $rg.PU
-        #set $rg_string += "\tPU:$rg.PU"
+        #set $rg_string += "\tPU:" + str($rg.PU)
       #end if
   </token>
     
@@ -38,7 +38,7 @@
 
 **Read Groups are Important!**
 
-One of the recommended best practices in NGS analysis is adding read group information to BAM files. You can do thid directly in BWA interface using the
+One of the recommended best practices in NGS analysis is adding read group information to BAM files. You can do this directly in BWA interface using the
 **Specify read group information?** widget. If you are not familiar with read groups you shold know that this is effectively a way to tag reads with an additional ID.
 This allows you to combine BAM files from, for example, multiple BWA runs into a single dataset. This significantly simplifies downstream processing as
 instead of dealing with multiple datasets you only have to handle only one. This is possible because the read group information allows you to identify
@@ -104,13 +104,13 @@
 
 **Dataset collections - processing large numbers of datasets at once**
 
-This will be added shortly
+Dataset collections are in beta-testing. Extensive documentation will be added later this Spring.
 
 
   </token>
   <xml name="readgroup_params">
     <conditional name="rg">
-      <param name="rg_selector" type="select" label="Set read groups information?" help="-R; Specifying read group information can greatly simplify your downstream analyses by allowing combining multiple datasets. See help below for more details">
+      <param name="rg_selector" type="select" label="Set read groups information?" help="(-R in bwa mem; -r in bwa aln); Specifying read group information can greatly simplify your downstream analyses by allowing combining multiple datasets. See help below for more details">
         <option value="set">Set</option>
         <option value="do_not_set" selected="True">Do not set</option>
       </param>
@@ -122,7 +122,7 @@
         <param name="PL" type="select" label="Platform/technology used to produce the reads (PL)">
           <option value="CAPILLARY">CAPILLARY</option>
           <option value="LS454">LS454</option>
-          <option value="ILLUMINA">ILLUMINA</option>
+          <option selected="True" value="ILLUMINA">ILLUMINA</option>
           <option value="SOLID">SOLID</option>
           <option value="HELICOS">HELICOS</option>
           <option value="IONTORRENT">IONTORRENT</option>
Binary file shed_upload.tar.gz has changed
Binary file test-data/bwa-aln-test1.bam has changed
Binary file test-data/bwa-aln-test2.bam has changed
Binary file test-data/bwa-aln-test3.bam has changed
Binary file test-data/bwa-mem-test1.bam has changed
Binary file test-data/bwa-mem-test2.bam has changed