changeset 10:b6e3849293b1 draft

Uploaded
author devteam
date Fri, 19 Dec 2014 11:59:06 -0500
parents 424d49834830
children 8160c8ea4eb9
files cuff_macros.xml cuffmerge_wrapper.py cuffmerge_wrapper.xml tool_dependencies.xml
diffstat 4 files changed, 122 insertions(+), 33 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cuff_macros.xml	Fri Dec 19 11:59:06 2014 -0500
@@ -0,0 +1,91 @@
+<macros>
+  <token name="@VERSION@">2.2.1</token>
+  <xml name="requirements">
+    <requirements>
+      <requirement type="package" version="2.2.1">cufflinks</requirement>
+      <yield />
+    </requirements>
+  </xml>
+  <xml name="stdio">
+    <stdio>
+        <exit_code range="1:" />
+        <exit_code range=":-1" />
+        <regex match="Error:" />
+        <regex match="Exception:" />
+    </stdio>
+  </xml>
+  <xml name="condition_inputs">
+    <!-- DEFAULT : use BAM/SAM files -->
+    <conditional name="in_type">
+        <param name="set_in_type" type="select" label="Input data type"
+            help="CuffNorm supports either CXB (from cuffquant) or SAM/BAM input files. Mixing is not supported. Default: SAM/BAM">
+            <option value="BAM">SAM/BAM</option>
+            <option value="CXB">Cuffquant (CXB)</option>
+            <option value="CONDITION_LIST">List of single replicate conditions</option>
+            <option value="CONDITION_REPLICATE_LIST">List of multiple replicate conditions</option>
+        </param>
+        <when value="BAM">
+            <repeat name="conditions" title="Condition" min="2">
+                <param name="name" title="Condition name" type="text" label="Name"/>
+                <param name="samples" label="Replicates" type="data" format="sam,bam" multiple="true"/>
+            </repeat>
+        </when>
+        <when value="CXB">
+            <repeat name="conditions" title="Condition" min="2">
+                <param name="name" title="Condition name" type="text" label="Name"/>
+                <param name="samples" label="Replicates" type="data" format="cxb" multiple="true"/>
+            </repeat>
+        </when>
+        <when value="CONDITION_LIST">
+            <param name="conditions" title="List of Conditions" type="data_collection" collection_type="list" />
+        </when>
+        <when value="CONDITION_REPLICATE_LIST">
+            <param name="conditions" title="List of Conditions" type="data_collection" collection_type="list:list" />
+        </when>
+    </conditional>
+  </xml>
+  <token name="@CONDITION_SAMPLES@">
+            #if $in_type.set_in_type in ['BAM', 'CXB']
+                #for $condition in $in_type.conditions:
+                    #set samples = ','.join( [ str( $sample ) for $sample in $condition.samples ] )
+                    $samples
+                #end for
+            #elif $in_type.set_in_type == 'CONDITION_LIST'
+                #for $sample in $in_type.conditions:
+                    $sample
+                #end for
+            #elif $in_type.set_in_type == 'CONDITION_REPLICATE_LIST'
+                #for $condition_list in $in_type.conditions:
+                    #set samples = ','.join( [ str( $sample ) for $sample in $condition_list ] )
+                    $samples
+                #end for
+            #end if
+  </token>
+  <token name="@CONDITION_LABELS@">
+            #import re
+            #if $in_type.set_in_type in ['BAM', 'CXB']
+                #set labels = '\'' + '\',\''.join( [ str( $condition.name ) for $condition in $in_type.conditions ] ) + '\''
+            #elif $in_type.set_in_type in ['CONDITION_LIST', 'CONDITION_REPLICATE_LIST']
+                #set labels = '\'' + '\',\''.join( map(lambda x: re.sub('[^\w\-_]', '_', x), $in_type.conditions.keys() ) ) + '\''
+            #end if
+            --labels $labels
+  </token>
+  <xml name="cufflinks_gtf_inputs">
+    <param format="gtf" name="inputs" type="data" label="GTF file(s) produced by Cufflinks" help="" multiple="true" />
+    <repeat name="additional_inputs" title="Additional GTF Inputs (Lists)">
+      <param format="gtf" name="additional_inputs" type="data_collection" label="GTF file(s) produced by Cufflinks" help="" />
+    </repeat>
+  </xml>
+  <token name="@CUFFLINKS_GTF_INPUTS@">
+            ## Inputs.
+            #for $input_file in $inputs:
+                "${input_file}"
+            #end for
+            #for $additional_input in $additional_inputs:
+                #for $input_file in $additional_input.additional_inputs:
+                  "${input_file}"
+                #end for
+            #end for
+  </token>
+  <token name="@HAS_MULTIPLE_INPUTS@">getattr(inputs, "__len__", [].__len__)() >= 2</token>
+</macros>
\ No newline at end of file
--- a/cuffmerge_wrapper.py	Mon Jan 20 11:09:02 2014 -0500
+++ b/cuffmerge_wrapper.py	Fri Dec 19 11:59:06 2014 -0500
@@ -1,7 +1,5 @@
 #!/usr/bin/env python
 
-# Supports Cuffmerge versions 1.3 and newer.
-
 import optparse, os, shutil, subprocess, sys, tempfile
 
 def stop_err( msg ):
@@ -15,13 +13,13 @@
     parser.add_option( '-s', dest='use_seq_data', action="store_true", help='Causes cuffmerge to look into for fasta files with the underlying genomic sequences (one file per contig) against which your reads were aligned for some optional classification functions. For example, Cufflinks transcripts consisting mostly of lower-case bases are classified as repeats. Note that <seq_dir> must contain one fasta file per reference chromosome, and each file must be named after the chromosome, and have a .fa or .fasta extension.')
     parser.add_option( '-p', '--num-threads', dest='num_threads', help='Use this many threads to align reads. The default is 1.' )
     
-    
     # Wrapper / Galaxy options.
     parser.add_option( '', '--index', dest='index', help='The path of the reference genome' )
     parser.add_option( '', '--ref_file', dest='ref_file', help='The reference dataset from the history' )
     
     # Outputs.
     parser.add_option( '', '--merged-transcripts', dest='merged_transcripts' )
+    parser.add_option( '--min-isoform-fraction', dest='min_isoform_fraction' )
     
     (options, args) = parser.parse_args()
     
@@ -68,7 +66,8 @@
         cmd += " -g %s " % options.ref_annotation
     if options.use_seq_data:
         cmd += " -s %s " % seq_path
-        
+    if options.min_isoform_fraction:
+        cmd += " --min-isoform-fraction %s " % (options.min_isoform_fraction)
     # Add input files to a file.
     inputs_file_name = tempfile.NamedTemporaryFile( dir="." ).name
     inputs_file = open( inputs_file_name, 'w' )
--- a/cuffmerge_wrapper.xml	Mon Jan 20 11:09:02 2014 -0500
+++ b/cuffmerge_wrapper.xml	Fri Dec 19 11:59:06 2014 -0500
@@ -1,12 +1,12 @@
-<tool id="cuffmerge" name="Cuffmerge" version="0.0.6">
-    <!-- Wrapper supports Cuffmerge versions 1.3 and newer -->
+<tool id="cuffmerge" name="Cuffmerge" version="@VERSION@.0">
     <description>merge together several Cufflinks assemblies</description>
-    <requirements>
-        <requirement type="package" version="2.1.1">cufflinks</requirement>
-    </requirements>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <macros>
+      <import>cuff_macros.xml</import>
+    </macros>
     <command interpreter="python">
         cuffmerge_wrapper.py
-        
             --num-threads="\${GALAXY_SLOTS:-4}"
             
             ## Use annotation reference?
@@ -16,29 +16,23 @@
             
             ## Use sequence data?
             #if $seq_data.use_seq_data == "Yes":
-	            -s
+                -s
                 #if $seq_data.seq_source.index_source == "history":
                     --ref_file="${seq_data.seq_source.ref_file}"
                 #else:
                     --index="${seq_data.seq_source.index.fields.path}"
                 #end if
             #end if
-            
+
+            --min-isoform-fraction="${min_isoform_fraction}"
+
             ## Outputs.
             --merged-transcripts="${merged_transcripts}"
-                        
-            ## Inputs.
-            "${first_input}"
-            #for $input_file in $input_files:
-                "${input_file.additional_input}"
-            #end for
-            
+
+            @CUFFLINKS_GTF_INPUTS@
     </command>
     <inputs>
-        <param format="gtf" name="first_input" type="data" label="GTF file produced by Cufflinks" help=""/>
-        <repeat name="input_files" title="Additional GTF Input Files">
-            <param format="gtf" name="additional_input" type="data" label="GTF file produced by Cufflinks" help=""/>
-        </repeat>
+        <expand macro="cufflinks_gtf_inputs" />
         <conditional name="annotation">
             <param name="use_ref_annotation" type="select" label="Use Reference Annotation">
                 <option value="No">No</option>
@@ -65,7 +59,7 @@
                   <when value="cached">
                     <param name="index" type="select" label="Using reference genome">
                       <options from_data_table="fasta_indexes">
-                        <filter type="data_meta" ref="first_input" key="dbkey" column="1" />
+                        <filter type="data_meta" ref="inputs" key="dbkey" column="1" />
                         <validator type="no_options" message="No reference genome is available for the build associated with the selected input dataset" />
                       </options>
                     </param>
@@ -76,6 +70,7 @@
                 </conditional>
             </when>
         </conditional>
+        <param name="min_isoform_fraction" type="float" min="0" max="1" value="0.05" label="Minimum isoform fraction" help="Discard isoforms with abundance below this value" />
     </inputs>
 
     <outputs>
@@ -87,12 +82,12 @@
             cuffmerge -g cuffcompare_in3.gtf cuffcompare_in1.gtf cuffcompare_in2.gtf
         -->
         <test>
-            <param name="first_input" value="cuffcompare_in1.gtf" ftype="gtf"/>
-            <param name="additional_input" value="cuffcompare_in2.gtf" ftype="gtf"/>
+            <param name="inputs" value="cuffcompare_in1.gtf,cuffcompare_in2.gtf" ftype="gtf"/>
             <param name="use_ref_annotation" value="Yes"/>
             <param name="reference_annotation" value="cuffcompare_in3.gtf" ftype="gtf"/>
+            <param name="min_isoform_fraction" value="0.08" />
             <param name="use_seq_data" value="No"/>
-			<!-- oId assignment differ/are non-deterministic -->
+            <!-- oId assignment differ/are non-deterministic -->
             <output name="merged_transcripts" file="cuffmerge_out1.gtf" lines_diff="50"/>
         </test>
     </tests>
@@ -102,8 +97,8 @@
 
 Cuffmerge is part of Cufflinks_. Please cite: Trapnell C, Williams BA, Pertea G, Mortazavi AM, Kwan G, van Baren MJ, Salzberg SL, Wold B, Pachter L. Transcript assembly and abundance estimation from RNA-Seq reveals thousands of new transcripts and switching among isoforms. Nature Biotechnology doi:10.1038/nbt.1621
 
-.. _Cufflinks: http://cufflinks.cbcb.umd.edu/
-        
+.. _Cufflinks: http://cole-trapnell-lab.github.io/cufflinks/
+
 ------
 
 **Know what you are doing**
@@ -112,7 +107,7 @@
 
 There is no such thing (yet) as an automated gearshift in expression analysis. It is all like stick-shift driving in San Francisco. In other words, running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
 
-.. __: http://cufflinks.cbcb.umd.edu/manual.html#cuffmerge
+.. __: http://cole-trapnell-lab.github.io/cufflinks/cuffmerge/
 
 ------
 
@@ -130,5 +125,9 @@
 
 Merged transcripts file:
 
-Cuffmerge produces a GTF file that contains an assembly that merges together the input assemblies.    </help>
+Cuffmerge produces a GTF file that contains an assembly that merges together the input assemblies.
+    </help>
+    <citations>
+        <citation type="doi">10.1038/nbt.1621</citation>
+    </citations>
 </tool>
--- a/tool_dependencies.xml	Mon Jan 20 11:09:02 2014 -0500
+++ b/tool_dependencies.xml	Fri Dec 19 11:59:06 2014 -0500
@@ -1,6 +1,6 @@
 <?xml version="1.0"?>
 <tool_dependency>
-    <package name="cufflinks" version="2.1.1">
-        <repository changeset_revision="394b13717223" name="package_cufflinks_2_1_1" owner="devteam" toolshed="http://toolshed.g2.bx.psu.edu" />
+    <package name="cufflinks" version="2.2.1">
+        <repository changeset_revision="899067a260d1" name="package_cufflinks_2_2_1" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
     </package>
 </tool_dependency>