diff tophat2_wrapper.xml @ 2:da1f39fe14bc draft

Uploaded
author devteam
date Thu, 18 Dec 2014 13:56:31 -0500
parents ae06af1118dc
children 81f97e12e573
line wrap: on
line diff
--- a/tophat2_wrapper.xml	Mon Nov 17 11:23:41 2014 -0500
+++ b/tophat2_wrapper.xml	Thu Dec 18 13:56:31 2014 -0500
@@ -1,4 +1,4 @@
-<tool id="tophat2" name="Tophat2" version="0.6">
+<tool id="tophat2" name="Tophat2" version="0.7">
     <!-- Wrapper compatible with Tophat version 2.0.0+ -->
     <description>Gapped-read mapper for RNA-seq data</description>
     <version_command>tophat2 --version</version_command>
@@ -111,7 +111,7 @@
         #end if
 
         ## Set index path, inputs and parameters specific to paired data.
-        #if $singlePaired.sPaired == "paired"
+        #if $singlePaired.sPaired != "single"
             -r $singlePaired.mate_inner_distance
             --mate-std-dev=$singlePaired.mate_std_dev
             
@@ -119,9 +119,13 @@
                 --no-discordant
             #end if
 
-            ${index_path} $singlePaired.input1 $singlePaired.input2
+            #if $singlePaired.sPaired == "paired"
+              ${index_path} "$singlePaired.input1" "$singlePaired.input2"
+            #else
+              ${index_path} "$singlePaired.input.forward" "$singlePaired.input.reverse"
+            #end if
         #else
-            ${index_path} $singlePaired.input1
+            ${index_path} "$singlePaired.input1"
         #end if
     </command>
     
@@ -129,7 +133,8 @@
         <conditional name="singlePaired">
             <param name="sPaired" type="select" label="Is this library mate-paired?">
               <option value="single">Single-end</option>
-              <option value="paired">Paired-end</option>
+              <option value="paired">Paired-end (as individual datasets)</option>
+              <option value="paired_collection">Paired-end (as collection)</option>
             </param>
             <when value="single">
                 <param format="fastqsanger" name="input1" type="data" label="RNA-Seq FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33"/>
@@ -137,13 +142,11 @@
             <when value="paired">
                 <param format="fastqsanger" name="input1" type="data" label="RNA-Seq FASTQ file, forward reads" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" />
                 <param format="fastqsanger" name="input2" type="data" label="RNA-Seq FASTQ file, reverse reads" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" />
-                <param name="mate_inner_distance" type="integer" value="300" label="Mean Inner Distance between Mate Pairs" />
-                <param name="mate_std_dev" type="integer" value="20" label="Std. Dev for Distance between Mate Pairs"  help="The standard deviation for the distribution on inner distances between mate pairs."/>
-                <!-- Discordant pairs. -->
-                <param name="report_discordant_pairs" type="select" label="Report discordant pair alignments?">
-                    <option value="No">No</option>
-                    <option selected="True" value="Yes">Yes</option>
-                </param>
+                <expand macro="paired_parameters" />
+            </when>
+            <when value="paired_collection">
+                <param format="fastqsanger" name="input" type="data_collection" collection_type="paired" label="RNA-Seq FASTQ paired reads" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" />
+                <expand macro="paired_parameters" />
             </when>
         </conditional>
         <expand macro="refGenomeSourceConditional">
@@ -293,6 +296,15 @@
 
     <macros>
       <import>tophat_macros.xml</import>
+      <xml name="paired_parameters">
+        <param name="mate_inner_distance" type="integer" value="300" label="Mean Inner Distance between Mate Pairs" />
+        <param name="mate_std_dev" type="integer" value="20" label="Std. Dev for Distance between Mate Pairs"  help="The standard deviation for the distribution on inner distances between mate pairs."/>
+        <!-- Discordant pairs. -->
+        <param name="report_discordant_pairs" type="select" label="Report discordant pair alignments?">
+            <option value="No">No</option>
+            <option selected="True" value="Yes">Yes</option>
+        </param>
+      </xml>
       <macro name="dbKeyActions">
         <actions>
           <conditional name="refGenomeSource.genomeSource">
@@ -348,6 +360,23 @@
             <output name="junctions" file="tophat2_out2j.bed" />
             <output name="accepted_hits" file="tophat_out2h.bam" compare="sim_size" />
         </test>
+        <test>
+            <!-- Same test as above but with a collection. -->
+            <param name="sPaired" value="paired_collection" />
+            <param name="input">
+              <collection type="paired">
+                <element name="forward" value="tophat_in2.fastqsanger" ftype="fastqsanger" />
+                <element name="reverse" value="tophat_in3.fastqsanger" ftype="fastqsanger" />
+              </collection>
+            </param>
+            <param name="genomeSource" value="history" />
+            <param name="ownFile" ftype="fasta" value="tophat_in1.fasta" />
+            <param name="mate_inner_distance" value="20" />
+            <param name="settingsType" value="preSet" />
+            <param name="specReadGroup" value="No" />
+            <output name="junctions" file="tophat2_out2j.bed" />
+            <output name="accepted_hits" file="tophat_out2h.bam" compare="sim_size" />
+        </test>
         <!-- Test base-space single-end reads with user-supplied reference fasta and full parameters -->
         <test>
             <!-- Tophat commands:
@@ -356,44 +385,66 @@
             Replace the + with double-dash
             Rename the files in tmp_dir appropriately
             -->
-            <param name="sPaired" value="single"/>
-            <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger"/>
+            <conditional name="singlePaired">
+              <param name="sPaired" value="single"/>
+              <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger"/>
+            </conditional>
             <param name="genomeSource" value="history"/>
             <param name="ownFile" value="tophat_in1.fasta"/>
-            <param name="settingsType" value="full"/>
-            <param name="library_type" value="FR Unstranded"/>
-            <param name="read_mismatches" value="2"/>
-            <param name="bowtie_n" value="No"/>
-            <param name="anchor_length" value="8"/>
-            <param name="splice_mismatches" value="0"/>
-            <param name="min_intron_length" value="70"/>
-            <param name="max_intron_length" value="500000"/>
-            <param name="max_multihits" value="40"/>
-            <param name="min_segment_intron" value="50" />
-            <param name="max_segment_intron" value="500000" />
-            <param name="seg_mismatches" value="2"/>
-            <param name="seg_length" value="25"/>
-            <param name="allow_indel_search" value="Yes"/>
-            <param name="max_insertion_length" value="3"/>
-            <param name="max_deletion_length" value="3"/>
-            <param name="use_junctions" value="Yes" />
-            <param name="use_annotations" value="No" />
-            <param name="use_juncs" value="No" />
-            <param name="no_novel_juncs" value="No" />
-            <param name="use_search" value="Yes" />
-            <param name="min_coverage_intron" value="50" />
-            <param name="max_coverage_intron" value="20000" />
-            <param name="microexon_search" value="Yes" />
-            <param name="b2_settings" value="No" />
-            <!-- Fusion search params -->
-            <param name="do_search" value="Yes" />            
-            <param name="anchor_len" value="21" />
-            <param name="min_dist" value="10000021" />
-            <param name="read_mismatches" value="3" />
-            <param name="multireads" value="4" />
-            <param name="multipairs" value="5" />
-            <param name="ignore_chromosomes" value="chrM"/>
-            <param name="specReadGroup" value="No" />
+            <conditional name="params">
+              <param name="settingsType" value="full"/>
+              <param name="library_type" value="FR Unstranded"/>
+              <param name="read_mismatches" value="2"/>
+              <param name="bowtie_n" value="No"/>
+              <param name="anchor_length" value="8"/>
+              <param name="splice_mismatches" value="0"/>
+              <param name="min_intron_length" value="70"/>
+              <param name="max_intron_length" value="500000"/>
+              <param name="max_multihits" value="40"/>
+              <param name="min_segment_intron" value="50" />
+              <param name="max_segment_intron" value="500000" />
+              <param name="seg_mismatches" value="2"/>
+              <param name="seg_length" value="25"/>
+              <conditional name="indel_search">
+                <param name="allow_indel_search" value="Yes"/>
+                <param name="max_insertion_length" value="3"/>
+                <param name="max_deletion_length" value="3"/>
+              </conditional>
+              <conditional name="own_junctions">
+                <param name="use_junctions" value="Yes" />
+                <conditional name="gene_model_ann">
+                  <param name="use_annotations" value="No" />
+                </conditional>
+                <conditional name="raw_juncs">
+                  <param name="use_juncs" value="No" />
+                </conditional>
+                <conditional name="no_novel_juncs">
+                  <param name="no_novel_juncs" value="No" />
+                </conditional>
+              </conditional>
+              <conditional name="coverage_search">
+                <param name="use_search" value="Yes" />
+                <param name="min_coverage_intron" value="50" />
+                <param name="max_coverage_intron" value="20000" />
+              </conditional>
+              <param name="microexon_search" value="Yes" />
+              <conditional name="bowtie2_settings">
+                <param name="b2_settings" value="No" />
+              </conditional>
+              <!-- Fusion search params -->
+              <conditional name="fusion_search">
+                <param name="do_search" value="Yes" />
+                <param name="anchor_len" value="21" />
+                <param name="min_dist" value="10000021" />
+                <param name="read_mismatches" value="3" />
+                <param name="multireads" value="4" />
+                <param name="multipairs" value="5" />
+                <param name="ignore_chromosomes" value="chrM"/>
+              </conditional>
+            </conditional>
+            <conditional name="readGroup">
+              <param name="specReadGroup" value="No" />
+            </conditional>
             <output name="insertions" file="tophat_out3i.bed" />
             <output name="deletions" file="tophat_out3d.bed" />
             <output name="junctions" file="tophat2_out3j.bed" />
@@ -406,49 +457,72 @@
             Replace the + with double-dash
             Rename the files in tmp_dir appropriately
             -->
-            <param name="sPaired" value="paired"/>
-            <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger"/>
-            <param name="input2" ftype="fastqsanger" value="tophat_in3.fastqsanger"/>
+            <conditional name="singlePaired">            
+              <param name="sPaired" value="paired"/>
+              <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger"/>
+              <param name="input2" ftype="fastqsanger" value="tophat_in3.fastqsanger"/>
+              <param name="mate_inner_distance" value="20"/>
+              <param name="report_discordant_pairs" value="Yes" />
+            </conditional>
             <param name="genomeSource" value="indexed"/>
             <param name="index" value="tophat_test"/>
-            <param name="mate_inner_distance" value="20"/>
-            <param name="settingsType" value="full"/>
-            <param name="library_type" value="FR Unstranded"/>
-            <param name="read_mismatches" value="5"/>
-            <param name="bowtie_n" value="Yes"/>
-            <param name="mate_std_dev" value="20"/>
-            <param name="anchor_length" value="8"/>
-            <param name="splice_mismatches" value="0"/>
-            <param name="min_intron_length" value="70"/>
-            <param name="max_intron_length" value="500000"/>
-            <param name="max_multihits" value="40"/>
-            <param name="min_segment_intron" value="50" />
-            <param name="max_segment_intron" value="500000" />
-            <param name="seg_mismatches" value="2"/>
-            <param name="seg_length" value="25"/>
-            <param name="allow_indel_search" value="No"/>
-            <param name="use_junctions" value="Yes" />
-            <param name="use_annotations" value="No" />
-            <param name="use_juncs" value="No" />
-            <param name="no_novel_juncs" value="No" />
-            <param name="report_discordant_pairs" value="Yes" />
-            <param name="use_search" value="No" />
-            <param name="microexon_search" value="Yes" />
-            <param name="b2_settings" value="No" />
-            <!-- Fusion search params -->
-            <param name="do_search" value="Yes" />            
-            <param name="anchor_len" value="21" />
-            <param name="min_dist" value="10000021" />
-            <param name="read_mismatches" value="3" />
-            <param name="multireads" value="4" />
-            <param name="multipairs" value="5" />
-            <param name="ignore_chromosomes" value="chrM"/>
-            <param name="specReadGroup" value="No" />
+            <conditional name="params">
+              <param name="settingsType" value="full"/>
+              <param name="library_type" value="FR Unstranded"/>
+              <param name="read_mismatches" value="5"/>
+              <!-- Error: the read mismatches (5) and the read gap length (2) should be less than or equal to the read edit dist (2) -->
+              <param name="read_edit_dist" value="5" />
+              <param name="bowtie_n" value="Yes"/>
+              <param name="mate_std_dev" value="20"/>
+              <param name="anchor_length" value="8"/>
+              <param name="splice_mismatches" value="0"/>
+              <param name="min_intron_length" value="70"/>
+              <param name="max_intron_length" value="500000"/>
+              <param name="max_multihits" value="40"/>
+              <param name="min_segment_intron" value="50" />
+              <param name="max_segment_intron" value="500000" />
+              <param name="seg_mismatches" value="2"/>
+              <param name="seg_length" value="25"/>
+              <conditional name="indel_search">
+                <param name="allow_indel_search" value="No"/>
+              </conditional>
+              <conditional name="own_junctions">
+                <param name="use_junctions" value="Yes" />
+                <conditional name="gene_model_ann">
+                  <param name="use_annotations" value="No" />
+                </conditional>
+                <conditional name="raw_juncs">
+                  <param name="use_juncs" value="No" />
+                </conditional>
+                <conditional name="no_novel_juncs">
+                  <param name="no_novel_juncs" value="No" />
+                </conditional>
+              </conditional>
+              <conditional name="coverage_search">
+                <param name="use_search" value="No" />
+              </conditional>
+              <param name="microexon_search" value="Yes" />
+              <conditional name="bowtie2_settings">
+                <param name="b2_settings" value="No" />
+              </conditional>
+              <!-- Fusion search params -->
+              <conditional name="fusion_search">
+                <param name="do_search" value="Yes" />            
+                <param name="anchor_len" value="21" />
+                <param name="min_dist" value="10000021" />
+                <param name="read_mismatches" value="3" />
+                <param name="multireads" value="4" />
+                <param name="multipairs" value="5" />
+                <param name="ignore_chromosomes" value="chrM"/>
+              </conditional>
+            </conditional>
+            <conditional name="readGroup">
+              <param name="specReadGroup" value="No" />
+            </conditional>
             <output name="junctions" file="tophat2_out4j.bed" />
             <output name="accepted_hits" file="tophat_out4h.bam" compare="sim_size" />
         </test>
     </tests>
-
     <help>
 **Tophat Overview**
 
@@ -524,4 +598,7 @@
   --min-segment-intron              The minimum intron length that may be found during split-segment search. The default is 50.
   --max-segment-intron              The maximum intron length that may be found during split-segment search. The default is 500000.
     </help>
+    <citations>
+        <citation type="doi">10.1186/gb-2013-14-4-r36</citation>
+    </citations>
 </tool>