changeset 2:da1f39fe14bc draft

Uploaded
author devteam
date Thu, 18 Dec 2014 13:56:31 -0500
parents ae06af1118dc
children 81f97e12e573
files test-data/bowtie2/tophat_test.1.bt2 test-data/bowtie2/tophat_test.2.bt2 test-data/bowtie2/tophat_test.3.bt2 test-data/bowtie2/tophat_test.4.bt2 test-data/bowtie2/tophat_test.fa test-data/bowtie2/tophat_test.rev.1.bt2 test-data/bowtie2/tophat_test.rev.2.bt2 test-data/bowtie2_indices.loc test-data/tophat2_out3j.bed test-data/tophat2_out4j.bed tool_data_table_conf.xml.test tool_dependencies.xml tophat2_wrapper.xml
diffstat 13 files changed, 190 insertions(+), 92 deletions(-) [+]
line wrap: on
line diff
Binary file test-data/bowtie2/tophat_test.1.bt2 has changed
Binary file test-data/bowtie2/tophat_test.2.bt2 has changed
Binary file test-data/bowtie2/tophat_test.3.bt2 has changed
Binary file test-data/bowtie2/tophat_test.4.bt2 has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/bowtie2/tophat_test.fa	Thu Dec 18 13:56:31 2014 -0500
@@ -0,0 +1,14 @@
+>test_chromosome
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+ACTACTATCTGACTAGACTGGAGGCGCTTGCGACTGAGCTAGGACGTGCC
+ACTACGGGGATGACGACTAGGACTACGGACGGACTTAGAGCGTCAGATGC
+AGCGACTGGACTATTTAGGACGATCGGACTGAGGAGGGCAGTAGGACGCT
+ACGTATTTGGCGCGCGGCGCTACGGCTGAGCGTCGAGCTTGCGATACGCC
+GTAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAG
+ACTATTACTTTATTATCTTACTCGGACGTAGACGGATCGGCAACGGGACT
+GTAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAG
+TTTTCTACTTGAGACTGGGATCGAGGCGGACTTTTTAGGACGGGACTTGC
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
Binary file test-data/bowtie2/tophat_test.rev.1.bt2 has changed
Binary file test-data/bowtie2/tophat_test.rev.2.bt2 has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/bowtie2_indices.loc	Thu Dec 18 13:56:31 2014 -0500
@@ -0,0 +1,1 @@
+tophat_test	tophat_test	tophat_test	${__HERE__}/bowtie2/tophat_test
--- a/test-data/tophat2_out3j.bed	Mon Nov 17 11:23:41 2014 -0500
+++ b/test-data/tophat2_out3j.bed	Thu Dec 18 13:56:31 2014 -0500
@@ -1,3 +1,3 @@
 track name=junctions description="TopHat junctions"
-test_chromosome	177	400	JUNC00000001	27	+	177	400	255,0,0	2	73,50	0,173
-test_chromosome	350	550	JUNC00000002	26	+	350	550	255,0,0	2	50,50	0,150
+test_chromosome	180	400	JUNC00000001	19	+	180	400	255,0,0	2	70,50	0,170
+test_chromosome	350	550	JUNC00000002	23	+	350	550	255,0,0	2	50,50	0,150
--- a/test-data/tophat2_out4j.bed	Mon Nov 17 11:23:41 2014 -0500
+++ b/test-data/tophat2_out4j.bed	Thu Dec 18 13:56:31 2014 -0500
@@ -1,3 +1,3 @@
 track name=junctions description="TopHat junctions"
-test_chromosome	177	400	JUNC00000001	51	+	177	400	255,0,0	2	73,50	0,173
-test_chromosome	350	550	JUNC00000002	43	+	350	550	255,0,0	2	50,50	0,150
+test_chromosome	177	400	JUNC00000001	44	+	177	400	255,0,0	2	73,50	0,173
+test_chromosome	350	550	JUNC00000002	42	+	350	550	255,0,0	2	50,50	0,150
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Thu Dec 18 13:56:31 2014 -0500
@@ -0,0 +1,6 @@
+<tables>
+    <table name="tophat2_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="${__HERE__}/test-data/bowtie2_indices.loc" />
+    </table>
+</tables>
--- a/tool_dependencies.xml	Mon Nov 17 11:23:41 2014 -0500
+++ b/tool_dependencies.xml	Thu Dec 18 13:56:31 2014 -0500
@@ -1,12 +1,12 @@
 <?xml version="1.0"?>
 <tool_dependency>
   <package name="bowtie2" version="2.1.0">
-      <repository changeset_revision="017a00c265f1" name="package_bowtie2_2_1_0" owner="devteam" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" />
+      <repository changeset_revision="017a00c265f1" name="package_bowtie2_2_1_0" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
     </package>
     <package name="samtools" version="0.1.18">
-      <repository changeset_revision="171cd8bc208d" name="package_samtools_0_1_18" owner="devteam" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" />
+      <repository changeset_revision="171cd8bc208d" name="package_samtools_0_1_18" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
     </package>
     <package name="tophat2" version="2.0.9">
-      <repository changeset_revision="8549fd545473" name="package_tophat2_2_0_9" owner="devteam" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" />
+      <repository changeset_revision="8549fd545473" name="package_tophat2_2_0_9" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
     </package>
 </tool_dependency>
--- a/tophat2_wrapper.xml	Mon Nov 17 11:23:41 2014 -0500
+++ b/tophat2_wrapper.xml	Thu Dec 18 13:56:31 2014 -0500
@@ -1,4 +1,4 @@
-<tool id="tophat2" name="Tophat2" version="0.6">
+<tool id="tophat2" name="Tophat2" version="0.7">
     <!-- Wrapper compatible with Tophat version 2.0.0+ -->
     <description>Gapped-read mapper for RNA-seq data</description>
     <version_command>tophat2 --version</version_command>
@@ -111,7 +111,7 @@
         #end if
 
         ## Set index path, inputs and parameters specific to paired data.
-        #if $singlePaired.sPaired == "paired"
+        #if $singlePaired.sPaired != "single"
             -r $singlePaired.mate_inner_distance
             --mate-std-dev=$singlePaired.mate_std_dev
             
@@ -119,9 +119,13 @@
                 --no-discordant
             #end if
 
-            ${index_path} $singlePaired.input1 $singlePaired.input2
+            #if $singlePaired.sPaired == "paired"
+              ${index_path} "$singlePaired.input1" "$singlePaired.input2"
+            #else
+              ${index_path} "$singlePaired.input.forward" "$singlePaired.input.reverse"
+            #end if
         #else
-            ${index_path} $singlePaired.input1
+            ${index_path} "$singlePaired.input1"
         #end if
     </command>
     
@@ -129,7 +133,8 @@
         <conditional name="singlePaired">
             <param name="sPaired" type="select" label="Is this library mate-paired?">
               <option value="single">Single-end</option>
-              <option value="paired">Paired-end</option>
+              <option value="paired">Paired-end (as individual datasets)</option>
+              <option value="paired_collection">Paired-end (as collection)</option>
             </param>
             <when value="single">
                 <param format="fastqsanger" name="input1" type="data" label="RNA-Seq FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33"/>
@@ -137,13 +142,11 @@
             <when value="paired">
                 <param format="fastqsanger" name="input1" type="data" label="RNA-Seq FASTQ file, forward reads" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" />
                 <param format="fastqsanger" name="input2" type="data" label="RNA-Seq FASTQ file, reverse reads" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" />
-                <param name="mate_inner_distance" type="integer" value="300" label="Mean Inner Distance between Mate Pairs" />
-                <param name="mate_std_dev" type="integer" value="20" label="Std. Dev for Distance between Mate Pairs"  help="The standard deviation for the distribution on inner distances between mate pairs."/>
-                <!-- Discordant pairs. -->
-                <param name="report_discordant_pairs" type="select" label="Report discordant pair alignments?">
-                    <option value="No">No</option>
-                    <option selected="True" value="Yes">Yes</option>
-                </param>
+                <expand macro="paired_parameters" />
+            </when>
+            <when value="paired_collection">
+                <param format="fastqsanger" name="input" type="data_collection" collection_type="paired" label="RNA-Seq FASTQ paired reads" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" />
+                <expand macro="paired_parameters" />
             </when>
         </conditional>
         <expand macro="refGenomeSourceConditional">
@@ -293,6 +296,15 @@
 
     <macros>
       <import>tophat_macros.xml</import>
+      <xml name="paired_parameters">
+        <param name="mate_inner_distance" type="integer" value="300" label="Mean Inner Distance between Mate Pairs" />
+        <param name="mate_std_dev" type="integer" value="20" label="Std. Dev for Distance between Mate Pairs"  help="The standard deviation for the distribution on inner distances between mate pairs."/>
+        <!-- Discordant pairs. -->
+        <param name="report_discordant_pairs" type="select" label="Report discordant pair alignments?">
+            <option value="No">No</option>
+            <option selected="True" value="Yes">Yes</option>
+        </param>
+      </xml>
       <macro name="dbKeyActions">
         <actions>
           <conditional name="refGenomeSource.genomeSource">
@@ -348,6 +360,23 @@
             <output name="junctions" file="tophat2_out2j.bed" />
             <output name="accepted_hits" file="tophat_out2h.bam" compare="sim_size" />
         </test>
+        <test>
+            <!-- Same test as above but with a collection. -->
+            <param name="sPaired" value="paired_collection" />
+            <param name="input">
+              <collection type="paired">
+                <element name="forward" value="tophat_in2.fastqsanger" ftype="fastqsanger" />
+                <element name="reverse" value="tophat_in3.fastqsanger" ftype="fastqsanger" />
+              </collection>
+            </param>
+            <param name="genomeSource" value="history" />
+            <param name="ownFile" ftype="fasta" value="tophat_in1.fasta" />
+            <param name="mate_inner_distance" value="20" />
+            <param name="settingsType" value="preSet" />
+            <param name="specReadGroup" value="No" />
+            <output name="junctions" file="tophat2_out2j.bed" />
+            <output name="accepted_hits" file="tophat_out2h.bam" compare="sim_size" />
+        </test>
         <!-- Test base-space single-end reads with user-supplied reference fasta and full parameters -->
         <test>
             <!-- Tophat commands:
@@ -356,44 +385,66 @@
             Replace the + with double-dash
             Rename the files in tmp_dir appropriately
             -->
-            <param name="sPaired" value="single"/>
-            <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger"/>
+            <conditional name="singlePaired">
+              <param name="sPaired" value="single"/>
+              <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger"/>
+            </conditional>
             <param name="genomeSource" value="history"/>
             <param name="ownFile" value="tophat_in1.fasta"/>
-            <param name="settingsType" value="full"/>
-            <param name="library_type" value="FR Unstranded"/>
-            <param name="read_mismatches" value="2"/>
-            <param name="bowtie_n" value="No"/>
-            <param name="anchor_length" value="8"/>
-            <param name="splice_mismatches" value="0"/>
-            <param name="min_intron_length" value="70"/>
-            <param name="max_intron_length" value="500000"/>
-            <param name="max_multihits" value="40"/>
-            <param name="min_segment_intron" value="50" />
-            <param name="max_segment_intron" value="500000" />
-            <param name="seg_mismatches" value="2"/>
-            <param name="seg_length" value="25"/>
-            <param name="allow_indel_search" value="Yes"/>
-            <param name="max_insertion_length" value="3"/>
-            <param name="max_deletion_length" value="3"/>
-            <param name="use_junctions" value="Yes" />
-            <param name="use_annotations" value="No" />
-            <param name="use_juncs" value="No" />
-            <param name="no_novel_juncs" value="No" />
-            <param name="use_search" value="Yes" />
-            <param name="min_coverage_intron" value="50" />
-            <param name="max_coverage_intron" value="20000" />
-            <param name="microexon_search" value="Yes" />
-            <param name="b2_settings" value="No" />
-            <!-- Fusion search params -->
-            <param name="do_search" value="Yes" />            
-            <param name="anchor_len" value="21" />
-            <param name="min_dist" value="10000021" />
-            <param name="read_mismatches" value="3" />
-            <param name="multireads" value="4" />
-            <param name="multipairs" value="5" />
-            <param name="ignore_chromosomes" value="chrM"/>
-            <param name="specReadGroup" value="No" />
+            <conditional name="params">
+              <param name="settingsType" value="full"/>
+              <param name="library_type" value="FR Unstranded"/>
+              <param name="read_mismatches" value="2"/>
+              <param name="bowtie_n" value="No"/>
+              <param name="anchor_length" value="8"/>
+              <param name="splice_mismatches" value="0"/>
+              <param name="min_intron_length" value="70"/>
+              <param name="max_intron_length" value="500000"/>
+              <param name="max_multihits" value="40"/>
+              <param name="min_segment_intron" value="50" />
+              <param name="max_segment_intron" value="500000" />
+              <param name="seg_mismatches" value="2"/>
+              <param name="seg_length" value="25"/>
+              <conditional name="indel_search">
+                <param name="allow_indel_search" value="Yes"/>
+                <param name="max_insertion_length" value="3"/>
+                <param name="max_deletion_length" value="3"/>
+              </conditional>
+              <conditional name="own_junctions">
+                <param name="use_junctions" value="Yes" />
+                <conditional name="gene_model_ann">
+                  <param name="use_annotations" value="No" />
+                </conditional>
+                <conditional name="raw_juncs">
+                  <param name="use_juncs" value="No" />
+                </conditional>
+                <conditional name="no_novel_juncs">
+                  <param name="no_novel_juncs" value="No" />
+                </conditional>
+              </conditional>
+              <conditional name="coverage_search">
+                <param name="use_search" value="Yes" />
+                <param name="min_coverage_intron" value="50" />
+                <param name="max_coverage_intron" value="20000" />
+              </conditional>
+              <param name="microexon_search" value="Yes" />
+              <conditional name="bowtie2_settings">
+                <param name="b2_settings" value="No" />
+              </conditional>
+              <!-- Fusion search params -->
+              <conditional name="fusion_search">
+                <param name="do_search" value="Yes" />
+                <param name="anchor_len" value="21" />
+                <param name="min_dist" value="10000021" />
+                <param name="read_mismatches" value="3" />
+                <param name="multireads" value="4" />
+                <param name="multipairs" value="5" />
+                <param name="ignore_chromosomes" value="chrM"/>
+              </conditional>
+            </conditional>
+            <conditional name="readGroup">
+              <param name="specReadGroup" value="No" />
+            </conditional>
             <output name="insertions" file="tophat_out3i.bed" />
             <output name="deletions" file="tophat_out3d.bed" />
             <output name="junctions" file="tophat2_out3j.bed" />
@@ -406,49 +457,72 @@
             Replace the + with double-dash
             Rename the files in tmp_dir appropriately
             -->
-            <param name="sPaired" value="paired"/>
-            <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger"/>
-            <param name="input2" ftype="fastqsanger" value="tophat_in3.fastqsanger"/>
+            <conditional name="singlePaired">            
+              <param name="sPaired" value="paired"/>
+              <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger"/>
+              <param name="input2" ftype="fastqsanger" value="tophat_in3.fastqsanger"/>
+              <param name="mate_inner_distance" value="20"/>
+              <param name="report_discordant_pairs" value="Yes" />
+            </conditional>
             <param name="genomeSource" value="indexed"/>
             <param name="index" value="tophat_test"/>
-            <param name="mate_inner_distance" value="20"/>
-            <param name="settingsType" value="full"/>
-            <param name="library_type" value="FR Unstranded"/>
-            <param name="read_mismatches" value="5"/>
-            <param name="bowtie_n" value="Yes"/>
-            <param name="mate_std_dev" value="20"/>
-            <param name="anchor_length" value="8"/>
-            <param name="splice_mismatches" value="0"/>
-            <param name="min_intron_length" value="70"/>
-            <param name="max_intron_length" value="500000"/>
-            <param name="max_multihits" value="40"/>
-            <param name="min_segment_intron" value="50" />
-            <param name="max_segment_intron" value="500000" />
-            <param name="seg_mismatches" value="2"/>
-            <param name="seg_length" value="25"/>
-            <param name="allow_indel_search" value="No"/>
-            <param name="use_junctions" value="Yes" />
-            <param name="use_annotations" value="No" />
-            <param name="use_juncs" value="No" />
-            <param name="no_novel_juncs" value="No" />
-            <param name="report_discordant_pairs" value="Yes" />
-            <param name="use_search" value="No" />
-            <param name="microexon_search" value="Yes" />
-            <param name="b2_settings" value="No" />
-            <!-- Fusion search params -->
-            <param name="do_search" value="Yes" />            
-            <param name="anchor_len" value="21" />
-            <param name="min_dist" value="10000021" />
-            <param name="read_mismatches" value="3" />
-            <param name="multireads" value="4" />
-            <param name="multipairs" value="5" />
-            <param name="ignore_chromosomes" value="chrM"/>
-            <param name="specReadGroup" value="No" />
+            <conditional name="params">
+              <param name="settingsType" value="full"/>
+              <param name="library_type" value="FR Unstranded"/>
+              <param name="read_mismatches" value="5"/>
+              <!-- Error: the read mismatches (5) and the read gap length (2) should be less than or equal to the read edit dist (2) -->
+              <param name="read_edit_dist" value="5" />
+              <param name="bowtie_n" value="Yes"/>
+              <param name="mate_std_dev" value="20"/>
+              <param name="anchor_length" value="8"/>
+              <param name="splice_mismatches" value="0"/>
+              <param name="min_intron_length" value="70"/>
+              <param name="max_intron_length" value="500000"/>
+              <param name="max_multihits" value="40"/>
+              <param name="min_segment_intron" value="50" />
+              <param name="max_segment_intron" value="500000" />
+              <param name="seg_mismatches" value="2"/>
+              <param name="seg_length" value="25"/>
+              <conditional name="indel_search">
+                <param name="allow_indel_search" value="No"/>
+              </conditional>
+              <conditional name="own_junctions">
+                <param name="use_junctions" value="Yes" />
+                <conditional name="gene_model_ann">
+                  <param name="use_annotations" value="No" />
+                </conditional>
+                <conditional name="raw_juncs">
+                  <param name="use_juncs" value="No" />
+                </conditional>
+                <conditional name="no_novel_juncs">
+                  <param name="no_novel_juncs" value="No" />
+                </conditional>
+              </conditional>
+              <conditional name="coverage_search">
+                <param name="use_search" value="No" />
+              </conditional>
+              <param name="microexon_search" value="Yes" />
+              <conditional name="bowtie2_settings">
+                <param name="b2_settings" value="No" />
+              </conditional>
+              <!-- Fusion search params -->
+              <conditional name="fusion_search">
+                <param name="do_search" value="Yes" />            
+                <param name="anchor_len" value="21" />
+                <param name="min_dist" value="10000021" />
+                <param name="read_mismatches" value="3" />
+                <param name="multireads" value="4" />
+                <param name="multipairs" value="5" />
+                <param name="ignore_chromosomes" value="chrM"/>
+              </conditional>
+            </conditional>
+            <conditional name="readGroup">
+              <param name="specReadGroup" value="No" />
+            </conditional>
             <output name="junctions" file="tophat2_out4j.bed" />
             <output name="accepted_hits" file="tophat_out4h.bam" compare="sim_size" />
         </test>
     </tests>
-
     <help>
 **Tophat Overview**
 
@@ -524,4 +598,7 @@
   --min-segment-intron              The minimum intron length that may be found during split-segment search. The default is 50.
   --max-segment-intron              The maximum intron length that may be found during split-segment search. The default is 500000.
     </help>
+    <citations>
+        <citation type="doi">10.1186/gb-2013-14-4-r36</citation>
+    </citations>
 </tool>