Galaxy |

Changeset 3:f109453ecfa2 (2013-11-15)

Previous changeset 2:fdf01b3c1841 (2013-11-08) Next changeset 4:57c13cd32044 (2013-11-15)

Commit message:
readCufflinks() parses feature file based on file extension: .gtf/.gff3 Also added gtfFile and genome input to cummerbund_wrapper.xml

modified:
README
cuffdiff_wrapper.xml
cummerbund_wrapper.xml

removed:
cuffdiff_wrapper.py

diff -r fdf01b3c1841 -r f109453ecfa2 README
--- a/README Fri Nov 08 14:54:01 2013 -0600
+++ b/README Fri Nov 15 13:39:14 2013 -0600

@@ -18,3 +18,5 @@

+
+

diff -r fdf01b3c1841 -r f109453ecfa2 cuffdiff_wrapper.py
--- a/cuffdiff_wrapper.py Fri Nov 08 14:54:01 2013 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

b'@@ -1,325 +0,0 @@\n-#!/usr/bin/env python\n-\n-# Wrapper supports Cuffdiff versions v1.3.0-v2.0\n-\n-import optparse, os, shutil, subprocess, sys, tempfile\n-\n-def group_callback( option, op_str, value, parser ):\n- groups = []\n- flist = []\n- for arg in parser.rargs:\n- arg = arg.strip()\n- if arg[0] is "-":\n- break\n- elif arg[0] is ",":\n- groups.append(flist)\n- flist = []\n- else:\n- flist.append(arg)\n- groups.append(flist)\n-\n- setattr(parser.values, option.dest, groups)\n- \n-def label_callback( option, op_str, value, parser ):\n- labels = []\n- for arg in parser.rargs:\n- arg = arg.strip()\n- if arg[0] is "-":\n- break\n- else:\n- labels.append(arg)\n-\n- setattr(parser.values, option.dest, labels)\n-\n-def stop_err( msg ):\n- sys.stderr.write( "%s\\n" % msg )\n- sys.exit(1)\n- \n-# Copied from sam_to_bam.py:\n-def check_seq_file( dbkey, cached_seqs_pointer_file ):\n- seq_path = \'\'\n- for line in open( cached_seqs_pointer_file ):\n- line = line.rstrip( \'\\r\\n\' )\n- if line and not line.startswith( \'#\' ) and line.startswith( \'index\' ):\n- fields = line.split( \'\\t\' )\n- if len( fields ) < 3:\n- continue\n- if fields[1] == dbkey:\n- seq_path = fields[2].strip()\n- break\n- return seq_path\n-\n-def __main__():\n- #Parse Command Line\n- parser = optparse.OptionParser()\n- \n- # Cuffdiff options.\n- parser.add_option( \'-s\', \'--inner-dist-std-dev\', dest=\'inner_dist_std_dev\', help=\'The standard deviation for the distribution on inner distances between mate pairs. The default is 20bp.\' )\n- parser.add_option( \'-p\', \'--num-threads\', dest=\'num_threads\', help=\'Use this many threads to align reads. The default is 1.\' )\n- parser.add_option( \'-m\', \'--inner-mean-dist\', dest=\'inner_mean_dist\', help=\'This is the expected (mean) inner distance between mate pairs. \\\n- For, example, for paired end runs with fragments selected at 300bp, \\\n- where each end is 50bp, you should set -r to be 200. The default is 45bp.\')\n- parser.add_option( \'-c\', \'--min-alignment-count\', dest=\'min_alignment_count\', help=\'The minimum number of alignments in a locus for needed to conduct significance testing on changes in that locus observed between samples. If no testing is performed, changes in the locus are deemed not signficant, and the locus\\\' observed changes don\\\'t contribute to correction for multiple testing. The default is 1,000 fragment alignments (up to 2,000 paired reads).\' )\n- parser.add_option( \'--FDR\', dest=\'FDR\', help=\'The allowed false discovery rate. The default is 0.05.\' )\n- parser.add_option( \'-u\', \'--multi-read-correct\', dest=\'multi_read_correct\', action="store_true", help=\'Tells Cufflinks to do an initial estimation procedure to more accurately weight reads mapping to multiple locations in the genome\')\n- parser.add_option( \'--library-norm-method\', dest=\'library_norm_method\' )\n- parser.add_option( \'--dispersion-method\', dest=\'dispersion_method\' )\n-\n- # Advanced Options:\t\n- parser.add_option( \'--num-importance-samples\', dest=\'num_importance_samples\', help=\'Sets the number of importance samples generated for each locus during abundance estimation. Default: 1000\' )\n- parser.add_option( \'--max-mle-iterations\', dest=\'max_mle_iterations\', help=\'Sets the number of iterations allowed during maximum likelihood estimation of abundances. Default: 5000\' )\n- \n- # Wrapper / Galaxy options.\n- parser.add_option( \'-f\', \'--files\', dest=\'groups\', action="callback", callback=group_callback, help="Groups to be processed, groups are separated by spaces, replicates in a group comma separated. group1_rep1,group1_rep2 group2_rep1,group2_rep2, ..., groupN_rep1, grou'..b'ir, "tss_group_exp.diff" ), options.tss_groups_exp_output )\n-\n- if options.isoforms_count_tracking_output and os.path.exists(os.path.join( cuffdatadir, "isoforms.count_tracking" )):\n- shutil.copyfile( os.path.join( cuffdatadir, "isoforms.count_tracking" ), options.isoforms_count_tracking_output )\n- if options.genes_count_tracking_output and os.path.exists(os.path.join( cuffdatadir, "genes.count_tracking" )):\n- shutil.copyfile( os.path.join( cuffdatadir, "genes.count_tracking" ), options.genes_count_tracking_output )\n- if options.cds_count_tracking_output and os.path.exists(os.path.join( cuffdatadir, "cds.count_tracking" )):\n- shutil.copyfile( os.path.join( cuffdatadir, "cds.count_tracking" ), options.cds_count_tracking_output )\n- if options.tss_groups_count_tracking_output and os.path.exists(os.path.join( cuffdatadir, "tss_groups.count_tracking" )):\n- shutil.copyfile( os.path.join( cuffdatadir, "tss_groups.count_tracking" ), options.tss_groups_count_tracking_output )\n-\n- if options.cds_diff_output and os.path.exists(os.path.join( cuffdatadir, "cds.diff" )):\n- shutil.copyfile( os.path.join( cuffdatadir, "cds.diff" ), options.cds_diff_output )\n-\n- if options.splicing_diff_output and os.path.exists(os.path.join( cuffdatadir, "splicing.diff" )):\n- shutil.copyfile( os.path.join( cuffdatadir, "splicing.diff" ), options.splicing_diff_output )\n- if options.promoters_diff_output and os.path.exists(os.path.join( cuffdatadir, "promoters.diff" )):\n- shutil.copyfile( os.path.join( cuffdatadir, "promoters.diff" ), options.promoters_diff_output ) \n-\n- if options.run_info_output and os.path.exists(os.path.join( cuffdatadir, "run.info" )):\n- shutil.copyfile( os.path.join( cuffdatadir, "run.info" ), options.run_info_output ) \n- if options.read_groups_info_output and os.path.exists(os.path.join( cuffdatadir, "read_groups.info" )):\n- shutil.copyfile( os.path.join( cuffdatadir, "read_groups.info" ), options.read_groups_info_output ) \n-\n- except Exception, e:\n- stop_err( \'Error in cuffdiff:\\n\' + str( e ) ) \n- if options.cummeRbund_db_output:\n- try:\n- dbFile = \'cuffData.db\'\n- rscript = tempfile.NamedTemporaryFile( dir=tmp_output_dir,suffix=\'.r\' ).name\n- rscript_fh = open( rscript, \'wb\' )\n- rscript_fh.write(\'library(cummeRbund)\\n\')\n- if options.inputA and options.ref_file:\n- rscript_fh.write(\'cuff<-readCufflinks(dir = "%s", dbFile = "%s", gtfFile = "%s", genome = "%s", rebuild = T)\\n\' % (cuffdatadir,dbFile,options.inputA,options.ref_file))\n- else:\n- rscript_fh.write(\'cuff<-readCufflinks(dir = "%s", dbFile = "%s", rebuild = T)\\n\' % (cuffdatadir,dbFile))\n- rscript_fh.close()\n- cmd = ( "Rscript --vanilla %s" % rscript )\n- tmp_name = tempfile.NamedTemporaryFile( dir=tmp_output_dir ).name\n- tmp_stderr = open( tmp_name, \'wb\' )\n- proc = subprocess.Popen( args=cmd, shell=True, stderr=tmp_stderr.fileno() )\n- #proc = subprocess.Popen( args=cmd, shell=True)\n- returncode = proc.wait()\n- tmp_stderr.close()\n- if os.path.exists(os.path.join( cuffdatadir, dbFile )):\n- shutil.copyfile( os.path.join( cuffdatadir, dbFile ), options.cummeRbund_db_output ) \n- shutil.rmtree(os.path.join( cuffdatadir, dbFile ))\n- except Exception, e:\n- stop_err( \'Error generating cummeRbund cuffData.db:\\n\' + str( e ) ) \n- finally:\n- # Clean up temp dirs\n- if os.path.exists( tmp_output_dir ):\n- shutil.rmtree( tmp_output_dir )\n-\n-if __name__=="__main__": __main__()\n'

diff -r fdf01b3c1841 -r f109453ecfa2 cuffdiff_wrapper.xml
--- a/cuffdiff_wrapper.xml Fri Nov 08 14:54:01 2013 -0600
+++ b/cuffdiff_wrapper.xml Fri Nov 15 13:39:14 2013 -0600

[

@@ -58,13 +58,15 @@
             #if $build_cummerbund_db:
                 && echo 'library(cummeRbund)' > cuffData.r
                 #if $bias_correction.do_bias_correction == "Yes":
+                    ## cummeRbund relies on the file extension to determine gff format
+                    #set $gtf_link = '.'.join(['cuff',$gtf_input.extension])
+                    && ln -s $gtf_input $gtf_link
                     #if $bias_correction.seq_source.index_source == "history":
                         ## Custom genome from history.
-                        && echo 'cuff<-readCufflinks( dbFile = "cuffdata.db", gtfFile = "$gtf_input", genome = "$bias_correction.seq_source.ref_file", rebuild = T)' >> cuffData.r
+                        && echo 'cuff<-readCufflinks( dbFile = "cuffdata.db", gtfFile = "$gtf_link", genome = "$bias_correction.seq_source.ref_file", rebuild = T)' >> cuffData.r
                     #else:
                         ## Built-in genome.
-                        ${__get_data_table_entry__('sam_fa_indexes', 'value', $gtf_input.dbkey, 'path')}
-                        && echo 'cuff<-readCufflinks( dbFile = "cuffdata.db", gtfFile = "$gtf_input", genome = "${__get_data_table_entry__('sam_fa_indexes', 'value', $gtf_input.dbkey, 'path')}", rebuild = T)' >> cuffData.r
+                        && echo 'cuff<-readCufflinks( dbFile = "cuffdata.db", gtfFile = "$gtf_link", genome = "${__get_data_table_entry__('sam_fa_indexes', 'value', $gtf_input.dbkey, 'path')}", rebuild = T)' >> cuffData.r
                     #end if
                 #else
                     && echo 'cuff<-readCufflinks( dbFile = "cuffdata.db", rebuild = T)' >> cuffData.r
@@ -134,7 +136,9 @@
             <option value="No" selected="true">No</option>
             <option value="Yes">Yes</option>
         </param>
-        <param name="build_cummerbund_db" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Build cummeRbund database"/>
+        <param name="build_cummerbund_db" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Build cummeRbund database">
+          <help>"Perform Bias Correction" must be selected to include gtf and genome reference in the cummeRbund database.</help>
+        </param>
         <conditional name="additional">
             <param name="sAdditional" type="select" label="Set Additional Parameters? (not recommended for paired-end reads)">
                 <option value="No">No</option>

diff -r fdf01b3c1841 -r f109453ecfa2 cummerbund_wrapper.xml
--- a/cummerbund_wrapper.xml Fri Nov 08 14:54:01 2013 -0600
+++ b/cummerbund_wrapper.xml Fri Nov 15 13:39:14 2013 -0600

[

@@ -1,6 +1,10 @@
<tool id="cummerbund" name="cummeRbund" version="0.0.7">
     <description>R package designed to aid and simplify the task of analyzing Cufflinks RNA-Seq output</description>
-
+    
     <command interpreter="python">
         cummerbund_wrapper.py
             --r-script ${script_file}
@@ -26,6 +30,30 @@
                 <param format="tabular" name="promoters_diff" type="data" label="Promoters differential expression testing"/>
                 <param format="tabular" name="splicing_diff" type="data" label="Splicing differential expression testing"/>
                 <param name="rebuild" type="hidden" value="TRUE"/>
+                <conditional name="reference">
+                    <param name="include" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Include genome reference and Feature file"/>`
+                    <when value="yes">
+                        <param name="gtf_file" type="data" format="gtf,gff3" label="Transcripts" help="A transcript GFF3 or GTF file produced by cufflinks, cuffcompare, or other source."/>
+                        <conditional name="genome">
+                            <param name="source" type="select" label="Reference sequence data">
+                                <option value="cached">Locally cached</option>
+                                <option value="history">History</option>
+                            </param>
+                            <when value="cached">
+                                <param name="ref_fasta" type="select" label="Select the reference genome that was used for cuffdiff">
+                                    <options from_data_table="all_fasta">
+                                        <filter type="sort_by" column="2" />
+                                        <validator type="no_options" message="No reference fasta files are available" />
+                                    </options>
+                                </param>
+                            </when>
+                            <when value="history">
+                                <param name="ref_file" type="data" format="fasta" label="Using reference file" />
+                            </when>
+                        </conditional>
+                    </when>
+                    <when value="no"/>
+                </conditional>
             </when>
             <when value="history">
                 <param name="input_database" type="data" format="cuffdatadb" label="Select backend database (sqlite)"/>
@@ -227,8 +255,19 @@
library("cummeRbund")

## Initialize cuff object
+#if $backend_database_source.backend_database_selector == "cuffdiff_output":
+  ## Check if gtfFIle and genome are includes
+  #set $gtf_link = None
+  #if $backend_database_source.reference.include:
+    #set $gtf_link = '.'.join(['cuff',$backend_database_source.reference.gtf_file.extension])
+system('ln -s $backend_database_source.reference.gtf_file $gtf_link')
+    #if $backend_database_source.reference.genome.source == 'cached':
+      #set $ref_file = $backend_database_source.reference.genome.ref_fasta
+    #else
+      #set $ref_file = $backend_database_source.reference.genome.ref_file
+    #end if
+  #end if
cuff <- readCufflinks(dir = "",
-#if $backend_database_source.backend_database_selector == "cuffdiff_output":
                          dbFile = "${output_database}",
                          geneFPKM = "${genes_fpkm_tracking}",
                          geneDiff = "${genes_exp}",
@@ -241,8 +280,13 @@
                          CDSDiff = "${cds_diff}",
                          promoterFile = "${promoters_diff}",
                          splicingFile = "${splicing_diff}",
+#if $gtf_link:
+                         gtfFile = "${gtf_link}",
+                         genome = "${ref_file}",
+#end if
                          rebuild = T)
#else:
+cuff <- readCufflinks(dir = "",
                          dbFile = "${backend_database_source.input_database}",
                          rebuild = F)
#end if