Mercurial > repos > jjohnson > cummerbund
changeset 0:da7241f92ecf
Uploaded
author | jjohnson |
---|---|
date | Mon, 04 Feb 2013 19:50:25 -0500 |
parents | |
children | ebb9a992508d |
files | ._cuffdiff_mds_plot.pl README cuffdata.py cuffdata_cummerbund.xml cuffdata_datasets.xml cuffdiff_mds_plot.pl cuffdiff_mds_plot.xml cuffdiff_wrapper.py cuffdiff_wrapper.xml cummerbund_wrapper.py cummerbund_wrapper.xml datatypes_conf.xml |
diffstat | 12 files changed, 1617 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README Mon Feb 04 19:50:25 2013 -0500 @@ -0,0 +1,20 @@ +CummeRbund is an R package that is designed to aid and simplify the task of analyzing Cufflinks RNA-Seq output. +( http://compbio.mit.edu/cummeRbund/ ) + + +Prerequisites for installing cumme=Rbund: +The linux package: libxml2-dev +In ubuntu: sudo apt-get install libxml2-dev + +R package: ggplot2 + install.packages("ggplot2",dependencies = TRUE) + +To install cummeRbund on your R server, follow the directions in: +( http://www.bioconductor.org/packages/release/bioc/html/cummeRbund.html ) + source("http://bioconductor.org/biocLite.R") + biocLite("cummeRbund") + +This galaxy tool package includes a replacement variation of the cuffdiff wrapper that will generate an output that can be used directly in cummeRbund. + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cuffdata.py Mon Feb 04 19:50:25 2013 -0500 @@ -0,0 +1,115 @@ +""" +CuffData +""" +import logging +import os,os.path,re +import galaxy.datatypes.data +from galaxy.datatypes.images import Html +from galaxy.datatypes.binary import Binary +from galaxy import util +from galaxy.datatypes.metadata import MetadataElement + +log = logging.getLogger(__name__) + +class CuffDiffData( Html ): + """ + CuffDiff output files: + run.info + read_groups.info + cds.count_tracking + cds.diff + cds.fpkm_tracking + cds.read_group_tracking + cds_exp.diff + gene_exp.diff + genes.count_tracking + genes.fpkm_tracking + genes.read_group_tracking + isoform_exp.diff + isoforms.count_tracking + isoforms.fpkm_tracking + isoforms.read_group_tracking + promoters.diff + splicing.diff + tss_group_exp.diff + tss_groups.count_tracking + tss_groups.fpkm_tracking + tss_groups.read_group_tracking + """ + file_ext = 'cuffdata' + is_binary = False + composite_type = 'auto_primary_file' + allow_datatype_change = False + def __init__( self, **kwd ): + Html.__init__( self, **kwd ) + self.add_composite_file('run.info', description = 'run.info', mimetype = 'text/html', optional = True, is_binary = False ) + self.add_composite_file('read_groups.info', description = 'read_groups.info', mimetype = 'text/html', optional = True, is_binary = False ) + self.add_composite_file('cds.count_tracking', description = 'cds.count_tracking', mimetype = 'text/html', optional = True, is_binary = False ) + self.add_composite_file('cds.diff', description = 'cds.diff', mimetype = 'text/html', optional = True, is_binary = False ) + self.add_composite_file('cds.fpkm_tracking', description = 'cds.fpkm_tracking', mimetype = 'text/html', optional = True, is_binary = False ) + self.add_composite_file('cds.read_group_tracking', description = 'cds.read_group_tracking', mimetype = 'text/html', optional = True, is_binary = False ) + self.add_composite_file('cds_exp.diff', description = 'cds_exp.diff', mimetype = 'text/html', optional = True, is_binary = False ) + self.add_composite_file('gene_exp.diff', description = 'gene_exp.diff', mimetype = 'text/html', optional = True, is_binary = False ) + self.add_composite_file('genes.count_tracking', description = 'genes.count_tracking', mimetype = 'text/html', optional = True, is_binary = False ) + self.add_composite_file('genes.fpkm_tracking', description = 'genes.fpkm_tracking', mimetype = 'text/html', optional = True, is_binary = False ) + self.add_composite_file('genes.read_group_tracking', description = 'genes.read_group_tracking', mimetype = 'text/html', optional = True, is_binary = False ) + self.add_composite_file('isoform_exp.diff', description = 'isoform_exp.diff', mimetype = 'text/html', optional = True, is_binary = False ) + self.add_composite_file('isoforms.count_tracking', description = 'isoforms.count_tracking', mimetype = 'text/html', optional = True, is_binary = False ) + self.add_composite_file('isoforms.fpkm_tracking', description = 'isoforms.fpkm_tracking', mimetype = 'text/html', optional = True, is_binary = False ) + self.add_composite_file('isoforms.read_group_tracking', description = 'isoforms.read_group_tracking', mimetype = 'text/html', optional = True, is_binary = False ) + self.add_composite_file('promoters.diff', description = 'promoters.diff', mimetype = 'text/html', optional = True, is_binary = False ) + self.add_composite_file('splicing.diff', description = 'splicing.diff', mimetype = 'text/html', optional = True, is_binary = False ) + self.add_composite_file('tss_group_exp.diff', description = 'tss_group_exp.diff', mimetype = 'text/html', optional = True, is_binary = False ) + self.add_composite_file('tss_groups.count_tracking', description = 'tss_groups.count_tracking', mimetype = 'text/html', optional = True, is_binary = False ) + self.add_composite_file('tss_groups.fpkm_tracking', description = 'tss_groups.fpkm_tracking', mimetype = 'text/html', optional = True, is_binary = False ) + self.add_composite_file('tss_groups.read_group_tracking', description = 'tss_groups.read_group_tracking', mimetype = 'text/html', optional = True, is_binary = False ) + + def generate_primary_file( self, dataset = None ): + """ + This is called only at upload to write the html file + cannot rename the datasets here - they come with the default unfortunately + """ + rval = ['<html><head><title>CuffDiff Output</title></head>'] + rval.append('<body>') + rval.append('<p/>CuffDiff Outputs:<p/><ul>') + for composite_name, composite_file in self.get_composite_files( dataset = dataset ).iteritems(): + fn = composite_name + log.debug( "Velvet log info %s %s %s" % ('JJ generate_primary_file',fn,composite_file)) + opt_text = '' + if composite_file.optional: + opt_text = ' (optional)' + if composite_file.get('description'): + rval.append( '<li><a href="%s" type="text/plain">%s (%s)</a>%s</li>' % ( fn, fn, composite_file.get('description'), opt_text ) ) + else: + rval.append( '<li><a href="%s" type="text/plain">%s</a>%s</li>' % ( fn, fn, opt_text ) ) + rval.append( '</ul></body></html>' ) + return "\n".join( rval ) + + def regenerate_primary_file(self,dataset): + """ + cannot do this until we are setting metadata + """ + flist = os.listdir(dataset.extra_files_path) + rval = ['<html><head><title>CuffDiff Output</title></head>'] + rval.append('<body>') + rval.append('<p/>CuffDiff Outputs:<p/><ul>') + for i,fname in enumerate(flist): + sfname = os.path.split(fname)[-1] + rval.append( '<li><a href="%s" type="text/html">%s</a>' % ( sfname, sfname ) ) + rval.append( '</ul></body></html>' ) + f = file(dataset.file_name,'w') + f.write("\n".join( rval )) + f.write('\n') + f.close() + + def set_meta( self, dataset, **kwd ): + Html.set_meta( self, dataset, **kwd ) + self.regenerate_primary_file(dataset) + + def sniff( self, filename ): + return False + +class CuffDataDB( Binary ): + file_ext = 'cuffdata' + is_binary = True + allow_datatype_change = False
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cuffdata_cummerbund.xml Mon Feb 04 19:50:25 2013 -0500 @@ -0,0 +1,36 @@ +<tool id="cuffdata_cummerbund" name="Cuffdata cummeRbund DB" version="0.0.5"> + <!-- Wrapper supports Cuffdiff versions v1.3.0-v2.0 --> + <description>Create CummeRbund database</description> + <command> + ## symlink all files to job working dir + for i in ${cuffdata.extra_files_path}/*; #raw do ln -s $i ${i##*/}; done; #end raw + ## run rscript + Rscript --vanilla --quiet -e 'library(cummeRbund); cuff<-readCufflinks(dbFile="cuffData.db"); samples(cuff@genes); cuff' 2> r.stderr | sed 's/\[1\]/samples:/' + </command> + <inputs> + <param name="cuffdata" type="data" format="cuffdata" label="Cuffidif cuffdata" help=""/> + </inputs> + + <outputs> + <data format="cuffdatadb" name="cummeRbund_db" label="${tool.name} on ${on_string}: cummeRbund sqlite Database" from_work_dir="cuffData.db"/> + </outputs> + <stdio> + <exit_code range="1:" level="fatal" description="Cufflinks Err" /> + </stdio> + <tests> + </tests> + <help> +**Cuffdiff cuffdata to CummeRBund database** + +Create a CummeRBund_ SQLite database from the cuffdata output from Cuffdiff_. + +Cuffdiff_ is part of Cufflinks_. Cuffdiff find significant changes in transcript expression, splicing, and promoter use. Please cite: Trapnell C, Williams BA, Pertea G, Mortazavi AM, Kwan G, van Baren MJ, Salzberg SL, Wold B, Pachter L. Transcript assembly and abundance estimation from RNA-Seq reveals thousands of new transcripts and switching among isoforms. Nature Biotechnology doi:10.1038/nbt.1621 + +CummeRbund_ is a collaborative effort between the Computational Biology group led by Manolis Kellis at MIT's Computer Science and Artificial Intelligence Laboratory, and the Rinn Lab at the Harvard University department of Stem Cells and Regenerative Medicine + +.. _CummeRBund: http://compbio.mit.edu/cummeRbund/ +.. _Cufflinks: http://cufflinks.cbcb.umd.edu/ +.. _Cuffdiff: http://cufflinks.cbcb.umd.edu/manual.html#cuffdiff + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cuffdata_datasets.xml Mon Feb 04 19:50:25 2013 -0500 @@ -0,0 +1,189 @@ +<tool id="cuffdata_datasets" name="Cuffdata datasets" version="0.0.6"> + <!-- Wrapper supports Cuffdiff versions v1.3.0-v2.0 --> + <description>history datasets from Cuffdiff output</description> + <requirements> + <requirement type="package">cufflinks</requirement> + </requirements> + <command> + #set sel_outputs = $output_sel.__str__.split(',') + #if 'run_info' in $sel_outputs: + cat ${cuffdata.extra_files_path}/run.info > $run_info; + #end if + #if 'read_groups_info' in $sel_outputs: + cat ${cuffdata.extra_files_path}/read_groups.info > $read_groups_info; + #end if + + #if 'splicing_diff' in $sel_outputs: + cat ${cuffdata.extra_files_path}/splicing.diff > $splicing_diff; + #end if + #if 'promoters_diff' in $sel_outputs: + cat ${cuffdata.extra_files_path}/promoters.diff > $promoters_diff; + #end if + + #if 'cds_diff' in $sel_outputs: + cat ${cuffdata.extra_files_path}/cds.diff > $cds_diff; + #end if + #if 'cds_exp_diff' in $sel_outputs: + cat ${cuffdata.extra_files_path}/cds_exp.diff > $cds_exp_diff; + #end if + #if 'cds_fpkm_tracking' in $sel_outputs: + cat ${cuffdata.extra_files_path}/cds.fpkm_tracking > $cds_fpkm_tracking; + #end if + #if 'cds_count_tracking' in $sel_outputs: + cat ${cuffdata.extra_files_path}/cds.count_tracking > $cds_count_tracking; + #end if + #if 'cds_read_group_tracking' in $sel_outputs: + cat ${cuffdata.extra_files_path}/cds.read_group_tracking > $cds_read_group_tracking; + #end if + + #if 'tss_groups_exp_diff' in $sel_outputs: + cat ${cuffdata.extra_files_path}/tss_groups_exp.diff > $tss_groups_exp_diff; + #end if + #if 'tss_groups_fpkm_tracking' in $sel_outputs: + cat ${cuffdata.extra_files_path}/tss_groups.fpkm_tracking > $tss_groups_fpkm_tracking; + #end if + #if 'tss_groups_count_tracking' in $sel_outputs: + cat ${cuffdata.extra_files_path}/tss_groups.count_tracking > $tss_groups_count_tracking; + #end if + #if 'tss_groups_read_group_tracking' in $sel_outputs: + cat ${cuffdata.extra_files_path}/tss_groups.read_group_tracking > $tss_groups_read_group_tracking; + #end if + + #if 'genes_exp_diff' in $sel_outputs: + cat ${cuffdata.extra_files_path}/genes_exp.diff > $genes_exp_diff; + #end if + #if 'genes_fpkm_tracking' in $sel_outputs: + cat ${cuffdata.extra_files_path}/genes.fpkm_tracking > $genes_fpkm_tracking; + #end if + #if 'genes_count_tracking' in $sel_outputs: + cat ${cuffdata.extra_files_path}/genes.count_tracking > $genes_count_tracking; + #end if + #if 'genes_read_group_tracking' in $sel_outputs: + cat ${cuffdata.extra_files_path}/genes.read_group_tracking > $genes_read_group_tracking; + #end if + + #if 'isoforms_exp_diff' in $sel_outputs: + cat ${cuffdata.extra_files_path}/isoforms_exp.diff > $isoforms_exp_diff; + #end if + #if 'isoforms_fpkm_tracking' in $sel_outputs: + cat ${cuffdata.extra_files_path}/isoforms.fpkm_tracking > $isoforms_fpkm_tracking; + #end if + #if 'isoforms_count_tracking' in $sel_outputs: + cat ${cuffdata.extra_files_path}/isoforms.count_tracking > $isoforms_count_tracking; + #end if + #if 'isoforms_read_group_tracking' in $sel_outputs: + cat ${cuffdata.extra_files_path}/isoforms.read_group_tracking > $isoforms_read_group_tracking; + #end if + + </command> + <inputs> + <param name="cuffdata" type="data" format="cuffdata" label="Cuffidif cuffdata" help=""/> + <param name="output_sel" type="select" multiple="true" display="checkboxes" force_select="true" label="Select outputs for history datasets"> + <option value="run_info">run.info</option> + <option value="read_groups_info">read_groups.info</option> + <option value="splicing_diff">splicing.diff</option> + <option value="promoters_diff">promoters.diff</option> + <option value="genes_exp_diff">genes_exp.diff</option> + <option value="genes_fpkm_tracking">genes.fpkm_tracking</option> + <option value="genes_count_tracking">genes.count_tracking</option> + <option value="genes_read_group_tracking">genes.read_group_tracking</option> + <option value="isoforms_exp_diff">isoforms.exp_diff</option> + <option value="isoforms_fpkm_tracking">isoforms.fpkm_tracking</option> + <option value="isoforms_count_tracking">isoforms.count_tracking</option> + <option value="isoforms_read_group_tracking">isoforms.read_group_tracking</option> + <option value="cds_diff">cds.diff</option> + <option value="cds_exp_diff">cds_exp.diff</option> + <option value="cds_fpkm_tracking">cds.fpkm_tracking</option> + <option value="cds_count_tracking">cds.count_tracking</option> + <option value="cds_read_group_tracking">cds.read_group_tracking</option> + <option value="tss_groups_exp_diff">tss_groups_exp.diff</option> + <option value="tss_groups_fpkm_tracking">tss_groups.fpkm_tracking</option> + <option value="tss_groups_count_tracking">tss_groups.count_tracking</option> + <option value="tss_groups_read_group_tracking">tss_groups.read_group_tracking</option> + </param> + </inputs> + + <outputs> + <data format="text" name="run_info" label="${tool.name} on ${on_string}: run.info"> + <filter>'run_info' in output_sel</filter> + </data> + <data format="tabular" name="read_groups_info" label="${tool.name} on ${on_string}: read_groups.info"> + <filter>'read_groups_info' in output_sel</filter> + </data> + <data format="tabular" name="splicing_diff" label="${tool.name} on ${on_string}: splicing differential expression testing"> + <filter>'splicing_diff' in output_sel</filter> + </data> + <data format="tabular" name="promoters_diff" label="${tool.name} on ${on_string}: promoters differential expression testing"> + <filter>'promoters_diff' in output_sel</filter> + </data> + <data format="tabular" name="cds_diff" label="${tool.name} on ${on_string}: CDS overloading diffential expression testing"> + <filter>'cds_diff' in output_sel</filter> + </data> + <data format="tabular" name="cds_exp_diff" label="${tool.name} on ${on_string}: CDS differential expression testing"> + <filter>'cds_exp_diff' in output_sel</filter> + </data> + <data format="tabular" name="cds_fpkm_tracking" label="${tool.name} on ${on_string}: CDS FPKM tracking"> + <filter>'cds_fpkm_tracking' in output_sel</filter> + </data> + <data format="tabular" name="cds_count_tracking" label="${tool.name} on ${on_string}: CDS counts"> + <filter>'cds_count_tracking' in output_sel</filter> + </data> + <data format="tabular" name="cds_read_group_tracking" label="${tool.name} on ${on_string}: CDS Read Group tracking"> + <filter>'cds_read_group_tracking' in output_sel</filter> + </data> + <data format="tabular" name="tss_groups_exp_diff" label="${tool.name} on ${on_string}: TSS groups differential expression testing"> + <filter>'tss_groups_exp_diff' in output_sel</filter> + </data> + <data format="tabular" name="tss_groups_fpkm_tracking" label="${tool.name} on ${on_string}: TSS groups FPKM tracking"> + <filter>'tss_groups_fpkm_tracking' in output_sel</filter> + </data> + <data format="tabular" name="tss_groups_count_tracking" label="${tool.name} on ${on_string}: TSS groups counts"> + <filter>'tss_groups_count_tracking' in output_sel</filter> + </data> + <data format="tabular" name="tss_groups_read_group_tracking" label="${tool.name} on ${on_string}: TSS groups Read Group tracking"> + <filter>'tss_groups_read_group_tracking' in output_sel</filter> + </data> + <data format="tabular" name="isoforms_exp_diff" label="${tool.name} on ${on_string}: transcript differential expression testing"> + <filter>'isoforms_exp_diff' in output_sel</filter> + </data> + <data format="tabular" name="isoforms_fpkm_tracking" label="${tool.name} on ${on_string}: transcript FPKM tracking"> + <filter>'isoforms_fpkm_tracking' in output_sel</filter> + </data> + <data format="tabular" name="isoforms_count_tracking" label="${tool.name} on ${on_string}: transcript counts"> + <filter>'isoforms_count_tracking' in output_sel</filter> + </data> + <data format="tabular" name="isoforms_read_group_tracking" label="${tool.name} on ${on_string}: transcript Read Group tracking"> + <filter>'isoforms_read_group_tracking' in output_sel</filter> + </data> + <data format="tabular" name="genes_exp_diff" label="${tool.name} on ${on_string}: gene differential expression testing"> + <filter>'genes_exp_diff' in output_sel</filter> + </data> + <data format="tabular" name="genes_fpkm_tracking" label="${tool.name} on ${on_string}: gene FPKM tracking"> + <filter>'genes_fpkm_tracking' in output_sel</filter> + </data> + <data format="tabular" name="genes_count_tracking" label="${tool.name} on ${on_string}: gene counts"> + <filter>'genes_count_tracking' in output_sel</filter> + </data> + <data format="tabular" name="genes_read_group_tracking" label="${tool.name} on ${on_string}: gene Read Group tracking"> + <filter>'genes_read_group_tracking' in output_sel</filter> + </data> + </outputs> + <stdio> + <exit_code range="1:" level="fatal" description="Cufflinks Err" /> + </stdio> + + + <tests> + </tests> + + <help> +**Cuffdata to history datasets** + +Copy Cuffdiff output files from a cuffdata html page to datasets in your history. + +Cuffdiff is part of Cufflinks_. Cuffdiff find significant changes in transcript expression, splicing, and promoter use. Please cite: Trapnell C, Williams BA, Pertea G, Mortazavi AM, Kwan G, van Baren MJ, Salzberg SL, Wold B, Pachter L. Transcript assembly and abundance estimation from RNA-Seq reveals thousands of new transcripts and switching among isoforms. Nature Biotechnology doi:10.1038/nbt.1621 + +.. _Cufflinks: http://cufflinks.cbcb.umd.edu/ + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cuffdiff_mds_plot.pl Mon Feb 04 19:50:25 2013 -0500 @@ -0,0 +1,83 @@ +#!/usr/bin/perl -w + +############################################################### +# cuffdiff_mds_plot.pl +# John Garbe +# Septmeber 2012 +# +# Given a sample tracking file from cuffdiff2, convert it to +# the proper format for loading into R and generating an MDS plot +# +################################################################ + +# check to make sure having correct files +my $usage = "usage: cuffdiff_mds_plot.pl [TABULAR.in] [TABULAR.out] [PLOT.png]\n"; +die $usage unless @ARGV == 3; + +#get the input arguments +my $inputFile = $ARGV[0]; +my $outputFile = $ARGV[1]; +my $plotFile = $ARGV[2]; + +#Open files +open (INPUT, "<", $inputFile) || die("Could not open file $inputFile \n"); +open (OUTPUT, ">", $outputFile) || die("Could not open file $outputFile \n"); +open (PLOT, ">", $plotFile) || die("Could not open file $plotFile \n"); + +# header looks like this: +# tracking_id condition replicate raw_frags internal_scaled_frags external_scaled_frags FPKM effective_length status +my $header = <INPUT>; + +# read in the sample tracking file +while (<INPUT>) { + chomp; + @line = split /\t/; + $tracking_id{$line[0]} = 1; + $sample = $line[1] . "-" . $line[2]; + $fpkm{$sample}{$line[0]} = $line[6]; +} +close(INPUT); + +@sorted_tracking_id = sort( keys(%tracking_id)); + +# print out header +foreach $tracking_id (@sorted_tracking_id) { + print OUTPUT "\t$tracking_id"; +} +print OUTPUT "\n"; + +# print out data +foreach $sample (keys(%fpkm)) { + print OUTPUT "$sample"; + + foreach $tracking_id (@sorted_tracking_id) { + print OUTPUT "\t$fpkm{$sample}{$tracking_id}"; + } + + print OUTPUT "\n"; +} +close(OUTPUT); + +#variables to store the name of the R script file +my $r_script = "cuffinks2mdf.r"; + +open(Rcmd,">", $r_script) or die "Cannot open $r_script \n\n"; +print Rcmd " + datat <- read.table(\"$outputFile\"); + cmd <- cmdscale(dist(datat)); + png(filename=\"$plotFile\"); + plot(cmd[,1], cmd[,2], type=\"n\", ann=FALSE); + text(cmd[,1], cmd[,2], labels = rownames(datat)); + title(main=\"Multidimensional Scaling Plot\"); + title(xlab= \"Dimension 1\"); + title(ylab= \"Dimension 2\"); + dev.off(); + #eof" . "\n"; + +close Rcmd; + + +system("R --no-restore --no-save --no-readline < $r_script > $r_script.out"); +#close the input and output files +close(PLOT); +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cuffdiff_mds_plot.xml Mon Feb 04 19:50:25 2013 -0500 @@ -0,0 +1,20 @@ +<tool id="cuffdiff_mds_plot" name="Cuffdiff MDS Plot" version="1.0"> + <description>MultiDimensional Scaling (MDS) plot from cuffdiff read_group_tracking</description> + <command interpreter="perl"> + cuffdiff_mds_plot.pl $input data.tmp $plot + </command> + <inputs> + <param format="tabular" name="input" type="data" label="A cuffdiff2 tracking file" + help="any of the *.read_group_tracking files produced by cuffdiff2"/> + </inputs> + <outputs> + <data format="png" name="plot" label="${tool.name} on ${on_string}"/> + </outputs> + <tests> + <test> + </test> + </tests> + <help> + Generates a multidimensional scaling (MDS) plot of a read_group_tracking output file from cuffdiff ver2. + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cuffdiff_wrapper.py Mon Feb 04 19:50:25 2013 -0500 @@ -0,0 +1,321 @@ +#!/usr/bin/env python + +# Wrapper supports Cuffdiff versions v1.3.0-v2.0 + +import optparse, os, shutil, subprocess, sys, tempfile + +def group_callback( option, op_str, value, parser ): + groups = [] + flist = [] + for arg in parser.rargs: + arg = arg.strip() + if arg[0] is "-": + break + elif arg[0] is ",": + groups.append(flist) + flist = [] + else: + flist.append(arg) + groups.append(flist) + + setattr(parser.values, option.dest, groups) + +def label_callback( option, op_str, value, parser ): + labels = [] + for arg in parser.rargs: + arg = arg.strip() + if arg[0] is "-": + break + else: + labels.append(arg) + + setattr(parser.values, option.dest, labels) + +def stop_err( msg ): + sys.stderr.write( "%s\n" % msg ) + sys.exit() + +# Copied from sam_to_bam.py: +def check_seq_file( dbkey, cached_seqs_pointer_file ): + seq_path = '' + for line in open( cached_seqs_pointer_file ): + line = line.rstrip( '\r\n' ) + if line and not line.startswith( '#' ) and line.startswith( 'index' ): + fields = line.split( '\t' ) + if len( fields ) < 3: + continue + if fields[1] == dbkey: + seq_path = fields[2].strip() + break + return seq_path + +def __main__(): + #Parse Command Line + parser = optparse.OptionParser() + + # Cuffdiff options. + parser.add_option( '-s', '--inner-dist-std-dev', dest='inner_dist_std_dev', help='The standard deviation for the distribution on inner distances between mate pairs. The default is 20bp.' ) + parser.add_option( '-p', '--num-threads', dest='num_threads', help='Use this many threads to align reads. The default is 1.' ) + parser.add_option( '-m', '--inner-mean-dist', dest='inner_mean_dist', help='This is the expected (mean) inner distance between mate pairs. \ + For, example, for paired end runs with fragments selected at 300bp, \ + where each end is 50bp, you should set -r to be 200. The default is 45bp.') + parser.add_option( '-c', '--min-alignment-count', dest='min_alignment_count', help='The minimum number of alignments in a locus for needed to conduct significance testing on changes in that locus observed between samples. If no testing is performed, changes in the locus are deemed not signficant, and the locus\' observed changes don\'t contribute to correction for multiple testing. The default is 1,000 fragment alignments (up to 2,000 paired reads).' ) + parser.add_option( '--FDR', dest='FDR', help='The allowed false discovery rate. The default is 0.05.' ) + parser.add_option( '-u', '--multi-read-correct', dest='multi_read_correct', action="store_true", help='Tells Cufflinks to do an initial estimation procedure to more accurately weight reads mapping to multiple locations in the genome') + + # Advanced Options: + parser.add_option( '--num-importance-samples', dest='num_importance_samples', help='Sets the number of importance samples generated for each locus during abundance estimation. Default: 1000' ) + parser.add_option( '--max-mle-iterations', dest='max_mle_iterations', help='Sets the number of iterations allowed during maximum likelihood estimation of abundances. Default: 5000' ) + + # Wrapper / Galaxy options. + parser.add_option( '-f', '--files', dest='groups', action="callback", callback=group_callback, help="Groups to be processed, groups are separated by spaces, replicates in a group comma separated. group1_rep1,group1_rep2 group2_rep1,group2_rep2, ..., groupN_rep1, groupN_rep2" ) + parser.add_option( '-A', '--inputA', dest='inputA', help='A transcript GTF file produced by cufflinks, cuffcompare, or other source.') + parser.add_option( '-1', '--input1', dest='input1', help='File of RNA-Seq read alignments in the SAM format. SAM is a standard short read alignment, that allows aligners to attach custom tags to individual alignments, and Cufflinks requires that the alignments you supply have some of these tags. Please see Input formats for more details.' ) + parser.add_option( '-2', '--input2', dest='input2', help='File of RNA-Seq read alignments in the SAM format. SAM is a standard short read alignment, that allows aligners to attach custom tags to individual alignments, and Cufflinks requires that the alignments you supply have some of these tags. Please see Input formats for more details.' ) + + # Label options + parser.add_option('-L', '--labels', dest='labels', action="callback", callback=label_callback, help="Labels for the groups the replicates are in.") + + # Normalization options. + parser.add_option( "-N", "--quartile-normalization", dest="do_normalization", action="store_true" ) + + # Bias correction options. + parser.add_option( '-b', dest='do_bias_correction', action="store_true", help='Providing Cufflinks with a multifasta file via this option instructs it to run our new bias detection and correction algorithm which can significantly improve accuracy of transcript abundance estimates.') + parser.add_option( '', '--dbkey', dest='dbkey', help='The build of the reference dataset' ) + parser.add_option( '', '--index_dir', dest='index_dir', help='GALAXY_DATA_INDEX_DIR' ) + parser.add_option( '', '--ref_file', dest='ref_file', help='The reference dataset from the history' ) + + # Outputs. + parser.add_option( "--isoforms_fpkm_tracking_output", dest="isoforms_fpkm_tracking_output" ) + parser.add_option( "--genes_fpkm_tracking_output", dest="genes_fpkm_tracking_output" ) + parser.add_option( "--cds_fpkm_tracking_output", dest="cds_fpkm_tracking_output" ) + parser.add_option( "--tss_groups_fpkm_tracking_output", dest="tss_groups_fpkm_tracking_output" ) + parser.add_option( "--isoforms_read_group_tracking_output", dest="isoforms_read_group_tracking_output", default=None) + parser.add_option( "--genes_read_group_tracking_output", dest="genes_read_group_tracking_output", default=None ) + parser.add_option( "--cds_read_group_tracking_output", dest="cds_read_group_tracking_output", default=None ) + parser.add_option( "--tss_groups_read_group_tracking_output", dest="tss_groups_read_group_tracking_output", default=None ) + parser.add_option( "--isoforms_exp_output", dest="isoforms_exp_output" ) + parser.add_option( "--genes_exp_output", dest="genes_exp_output" ) + parser.add_option( "--tss_groups_exp_output", dest="tss_groups_exp_output" ) + parser.add_option( "--cds_exp_fpkm_tracking_output", dest="cds_exp_fpkm_tracking_output" ) + parser.add_option( "--cds_diff_output", dest="cds_diff_output" ) + parser.add_option( "--isoforms_count_tracking_output", dest="isoforms_count_tracking_output" ) + parser.add_option( "--genes_count_tracking_output", dest="genes_count_tracking_output" ) + parser.add_option( "--cds_count_tracking_output", dest="cds_count_tracking_output" ) + parser.add_option( "--tss_groups_count_tracking_output", dest="tss_groups_count_tracking_output" ) + parser.add_option( "--splicing_diff_output", dest="splicing_diff_output" ) + parser.add_option( "--cds_diff_output", dest="cds_diff_output" ) + parser.add_option( "--promoters_diff_output", dest="promoters_diff_output" ) + parser.add_option( "--run_info_output", dest="run_info_output" ) + parser.add_option( "--read_groups_info_output", dest="read_groups_info_output" ) + parser.add_option( "--cuffdatadir", dest="cuffdatadir", default=None) + parser.add_option( "--cummeRbund_db_output", dest="cummeRbund_db_output", default=None) + + (options, args) = parser.parse_args() + + # output version # of tool + try: + tmp = tempfile.NamedTemporaryFile().name + tmp_stdout = open( tmp, 'wb' ) + proc = subprocess.Popen( args='cuffdiff --no-update-check 2>&1', shell=True, stdout=tmp_stdout ) + tmp_stdout.close() + returncode = proc.wait() + stdout = None + for line in open( tmp_stdout.name, 'rb' ): + if line.lower().find( 'cuffdiff v' ) >= 0: + stdout = line.strip() + break + if stdout: + sys.stdout.write( '%s\n' % stdout ) + else: + raise Exception + except: + sys.stdout.write( 'Could not determine Cuffdiff version\n' ) + + # Make temp directory for output. + tmp_output_dir = tempfile.mkdtemp() + cuffdatadir = options.cuffdatadir if options.cuffdatadir else tmp_output_dir + if not os.path.exists( cuffdatadir ): + os.makedirs( cuffdatadir ) + + + # If doing bias correction, set/link to sequence file. + if options.do_bias_correction: + if options.ref_file != 'None': + # Sequence data from history. + # Create symbolic link to ref_file so that index will be created in working directory. + seq_path = os.path.join( cuffdatadir, "ref.fa" ) + os.symlink( options.ref_file, seq_path ) + else: + # Sequence data from loc file. + cached_seqs_pointer_file = os.path.join( options.index_dir, 'sam_fa_indices.loc' ) + if not os.path.exists( cached_seqs_pointer_file ): + stop_err( 'The required file (%s) does not exist.' % cached_seqs_pointer_file ) + # If found for the dbkey, seq_path will look something like /galaxy/data/equCab2/sam_index/equCab2.fa, + # and the equCab2.fa file will contain fasta sequences. + seq_path = check_seq_file( options.dbkey, cached_seqs_pointer_file ) + if seq_path == '': + stop_err( 'No sequence data found for dbkey %s, so bias correction cannot be used.' % options.dbkey ) + + # Build command. + + # Base; always use quiet mode to avoid problems with storing log output. + cmd = "cuffdiff --no-update-check -q" + + # Add options. + if options.inner_dist_std_dev: + cmd += ( " -s %i" % int ( options.inner_dist_std_dev ) ) + if options.num_threads: + cmd += ( " -p %i" % int ( options.num_threads ) ) + if options.inner_mean_dist: + cmd += ( " -m %i" % int ( options.inner_mean_dist ) ) + if options.min_alignment_count: + cmd += ( " -c %i" % int ( options.min_alignment_count ) ) + if options.FDR: + cmd += ( " --FDR %f" % float( options.FDR ) ) + if options.multi_read_correct: + cmd += ( " -u" ) + if options.num_importance_samples: + cmd += ( " --num-importance-samples %i" % int ( options.num_importance_samples ) ) + if options.max_mle_iterations: + cmd += ( " --max-mle-iterations %i" % int ( options.max_mle_iterations ) ) + if options.do_normalization: + cmd += ( " -N" ) + if options.do_bias_correction: + cmd += ( " -b %s" % seq_path ) + + # Add inputs. + # For replicate analysis: group1_rep1,group1_rep2 groupN_rep1,groupN_rep2 + if options.groups: + cmd += " --labels " + for label in options.labels: + cmd += label + "," + cmd = cmd[:-1] + + cmd += " " + options.inputA + " " + + for group in options.groups: + for filename in group: + cmd += filename + "," + cmd = cmd[:-1] + " " + else: + cmd += " " + options.inputA + " " + options.input1 + " " + options.input2 + + # Debugging. + print cmd + + # Run command. + try: + tmp_name = tempfile.NamedTemporaryFile( dir=tmp_output_dir ).name + tmp_stderr = open( tmp_name, 'wb' ) + proc = subprocess.Popen( args=cmd, shell=True, cwd=cuffdatadir, stderr=tmp_stderr.fileno() ) + returncode = proc.wait() + tmp_stderr.close() + + # Get stderr, allowing for case where it's very large. + tmp_stderr = open( tmp_name, 'rb' ) + stderr = '' + buffsize = 1048576 + try: + while True: + stderr += tmp_stderr.read( buffsize ) + if not stderr or len( stderr ) % buffsize != 0: + break + except OverflowError: + pass + tmp_stderr.close() + + # Error checking. + if returncode != 0: + raise Exception, stderr + + # check that there are results in the output file + if len( open( os.path.join( cuffdatadir, "isoforms.fpkm_tracking" ), 'rb' ).read().strip() ) == 0: + raise Exception, 'The main output file is empty, there may be an error with your input file or settings.' + except Exception, e: + stop_err( 'Error running cuffdiff. ' + str( e ) ) + + + # Copy output files from tmp directory to specified files. + try: + try: + if options.isoforms_fpkm_tracking_output and os.path.exists(os.path.join( cuffdatadir, "isoforms.fpkm_tracking" )): + shutil.copyfile( os.path.join( cuffdatadir, "isoforms.fpkm_tracking" ), options.isoforms_fpkm_tracking_output ) + if options.genes_fpkm_tracking_output and os.path.exists(os.path.join( cuffdatadir, "genes.fpkm_tracking" )): + shutil.copyfile( os.path.join( cuffdatadir, "genes.fpkm_tracking" ), options.genes_fpkm_tracking_output ) + if options.cds_fpkm_tracking_output and os.path.exists(os.path.join( cuffdatadir, "cds.fpkm_tracking" )): + shutil.copyfile( os.path.join( cuffdatadir, "cds.fpkm_tracking" ), options.cds_fpkm_tracking_output ) + if options.tss_groups_fpkm_tracking_output and os.path.exists(os.path.join( cuffdatadir, "tss_groups.fpkm_tracking" )): + shutil.copyfile( os.path.join( cuffdatadir, "tss_groups.fpkm_tracking" ), options.tss_groups_fpkm_tracking_output ) + + if options.isoforms_read_group_tracking_output and os.path.exists(os.path.join( cuffdatadir, "isoforms.read_group_tracking" )): + shutil.copyfile( os.path.join( cuffdatadir, "isoforms.read_group_tracking" ), options.isoforms_read_group_tracking_output ) + if options.genes_read_group_tracking_output and os.path.exists(os.path.join( cuffdatadir, "genes.read_group_tracking" )): + shutil.copyfile( os.path.join( cuffdatadir, "genes.read_group_tracking" ), options.genes_read_group_tracking_output ) + if options.cds_read_group_tracking_output and os.path.exists(os.path.join( cuffdatadir, "cds.read_group_tracking" )): + shutil.copyfile( os.path.join( cuffdatadir, "cds.read_group_tracking" ), options.cds_read_group_tracking_output ) + if options.tss_groups_read_group_tracking_output and os.path.exists(os.path.join( cuffdatadir, "tss_groups.read_group_tracking" )): + shutil.copyfile( os.path.join( cuffdatadir, "tss_groups.read_group_tracking" ), options.tss_groups_read_group_tracking_output ) + + if options.isoforms_exp_output and os.path.exists(os.path.join( cuffdatadir, "isoform_exp.diff" )): + shutil.copyfile( os.path.join( cuffdatadir, "isoform_exp.diff" ), options.isoforms_exp_output ) + if options.genes_exp_output and os.path.exists(os.path.join( cuffdatadir, "gene_exp.diff" )): + shutil.copyfile( os.path.join( cuffdatadir, "gene_exp.diff" ), options.genes_exp_output ) + if options.cds_exp_fpkm_tracking_output and os.path.exists(os.path.join( cuffdatadir, "cds_exp.diff" )): + shutil.copyfile( os.path.join( cuffdatadir, "cds_exp.diff" ), options.cds_exp_fpkm_tracking_output ) + if options.tss_groups_exp_output and os.path.exists(os.path.join( cuffdatadir, "tss_group_exp.diff" )): + shutil.copyfile( os.path.join( cuffdatadir, "tss_group_exp.diff" ), options.tss_groups_exp_output ) + + if options.isoforms_count_tracking_output and os.path.exists(os.path.join( cuffdatadir, "isoforms.count_tracking" )): + shutil.copyfile( os.path.join( cuffdatadir, "isoforms.count_tracking" ), options.isoforms_count_tracking_output ) + if options.genes_count_tracking_output and os.path.exists(os.path.join( cuffdatadir, "genes.count_tracking" )): + shutil.copyfile( os.path.join( cuffdatadir, "genes.count_tracking" ), options.genes_count_tracking_output ) + if options.cds_count_tracking_output and os.path.exists(os.path.join( cuffdatadir, "cds.count_tracking" )): + shutil.copyfile( os.path.join( cuffdatadir, "cds.count_tracking" ), options.cds_count_tracking_output ) + if options.tss_groups_count_tracking_output and os.path.exists(os.path.join( cuffdatadir, "tss_groups.count_tracking" )): + shutil.copyfile( os.path.join( cuffdatadir, "tss_groups.count_tracking" ), options.tss_groups_count_tracking_output ) + + if options.cds_diff_output and os.path.exists(os.path.join( cuffdatadir, "cds.diff" )): + shutil.copyfile( os.path.join( cuffdatadir, "cds.diff" ), options.cds_diff_output ) + + if options.splicing_diff_output and os.path.exists(os.path.join( cuffdatadir, "splicing.diff" )): + shutil.copyfile( os.path.join( cuffdatadir, "splicing.diff" ), options.splicing_diff_output ) + if options.promoters_diff_output and os.path.exists(os.path.join( cuffdatadir, "promoters.diff" )): + shutil.copyfile( os.path.join( cuffdatadir, "promoters.diff" ), options.promoters_diff_output ) + + if options.run_info_output and os.path.exists(os.path.join( cuffdatadir, "run.info" )): + shutil.copyfile( os.path.join( cuffdatadir, "run.info" ), options.run_info_output ) + if options.read_groups_info_output and os.path.exists(os.path.join( cuffdatadir, "read_groups.info" )): + shutil.copyfile( os.path.join( cuffdatadir, "read_groups.info" ), options.read_groups_info_output ) + + except Exception, e: + stop_err( 'Error in cuffdiff:\n' + str( e ) ) + if options.cummeRbund_db_output: + try: + dbFile = 'cuffData.db' + rscript = tempfile.NamedTemporaryFile( dir=tmp_output_dir,suffix='.r' ).name + rscript_fh = open( rscript, 'wb' ) + rscript_fh.write('library(cummeRbund)\n') + if options.inputA and options.ref_file: + rscript_fh.write('cuff<-readCufflinks(dir = "%s", dbFile = "%s", gtfFile = "%s", genome = "%s", rebuild = T)\n' % (cuffdatadir,dbFile,options.inputA,options.ref_file)) + else: + rscript_fh.write('cuff<-readCufflinks(dir = "%s", dbFile = "%s", rebuild = T)\n' % (cuffdatadir,dbFile)) + rscript_fh.close() + cmd = ( "Rscript --vanilla %s" % rscript ) + tmp_name = tempfile.NamedTemporaryFile( dir=tmp_output_dir ).name + tmp_stderr = open( tmp_name, 'wb' ) + proc = subprocess.Popen( args=cmd, shell=True, stderr=tmp_stderr.fileno() ) + #proc = subprocess.Popen( args=cmd, shell=True) + returncode = proc.wait() + tmp_stderr.close() + if os.path.exists(os.path.join( cuffdatadir, dbFile )): + shutil.copyfile( os.path.join( cuffdatadir, dbFile ), options.cummeRbund_db_output ) + shutil.rmtree(os.path.join( cuffdatadir, dbFile )) + except Exception, e: + stop_err( 'Error generating cummeRbund cuffData.db:\n' + str( e ) ) + finally: + # Clean up temp dirs + if os.path.exists( tmp_output_dir ): + shutil.rmtree( tmp_output_dir ) + +if __name__=="__main__": __main__()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cuffdiff_wrapper.xml Mon Feb 04 19:50:25 2013 -0500 @@ -0,0 +1,383 @@ +<tool id="cuffdiff_cummerbund" name="Cuffdiff" version="0.0.6"> + <!-- Wrapper supports Cuffdiff versions v1.3.0-v2.0 --> + <description>find significant changes in transcript expression, splicing, and promoter use</description> + <requirements> + <requirement type="package">cufflinks</requirement> + </requirements> + <command interpreter="python"> + #set sel_outputs = $output_sel.__str__.split(',') + cuffdiff_wrapper.py + --FDR=$fdr + --num-threads="4" + --min-alignment-count=$min_alignment_count + + #if 'cuffdata' in $sel_outputs or not $output_sel: + --cuffdatadir=$cuffdata.extra_files_path + #end if + #if 'cummeRbund_db' in $sel_outputs: + --cummeRbund_db=$cummeRbund_db + #end if + + #if 'isoforms_fpkm_tracking' in $sel_outputs: + --isoforms_fpkm_tracking_output=$isoforms_fpkm_tracking + #end if + #if 'genes_fpkm_tracking' in $sel_outputs: + --genes_fpkm_tracking_output=$genes_fpkm_tracking + #end if + #if 'cds_fpkm_tracking' in $sel_outputs: + --cds_fpkm_tracking_output=$cds_fpkm_tracking + #end if + #if 'tss_groups_fpkm_tracking' in $sel_outputs: + --tss_groups_fpkm_tracking_output=$tss_groups_fpkm_tracking + #end if + #if 'isoforms_exp_diff' in $sel_outputs: + --isoforms_exp_output=$isoforms_exp_diff + #end if + #if 'genes_exp_diff' in $sel_outputs: + --genes_exp_output=$genes_exp_diff + #end if + #if 'tss_groups_exp_diff' in $sel_outputs: + --tss_groups_exp_output=$tss_groups_exp_diff + #end if + #if 'cds_exp_fpkm_tracking' in $sel_outputs: + --cds_exp_fpkm_tracking_output=$cds_exp_fpkm_tracking + #end if + #if 'splicing_diff' in $sel_outputs: + --splicing_diff_output=$splicing_diff + #end if + #if 'cds_diff' in $sel_outputs: + --cds_diff_output=$cds_diff + #end if + #if 'promoters_diff' in $sel_outputs: + --promoters_diff_output=$promoters_diff + #end if + #if 'cds_read_group_tracking' in $sel_outputs: + --cds_read_group_tracking=$cds_read_group_tracking + #end if + #if 'tss_groups_read_group_tracking' in $sel_outputs: + --tss_groups_read_group_tracking=$tss_groups_read_group_tracking + #end if + #if 'genes_read_group_tracking' in $sel_outputs: + --genes_read_group_tracking=$genes_read_group_tracking + #end if + #if 'isoforms_read_group_tracking' in $sel_outputs: + --isoforms_read_group_tracking=$isoforms_read_group_tracking + #end if + + ## Set advanced data parameters? + #if $additional.sAdditional == "Yes": + -m $additional.frag_mean_len + -s $additional.frag_len_std_dev + #end if + + ## Normalization? + #if str($do_normalization) == "Yes": + -N + #end if + + ## Multi-read correct? + #if str($multiread_correct) == "Yes": + -u + #end if + + ## Bias correction? + #if $bias_correction.do_bias_correction == "Yes": + -b + #if $bias_correction.seq_source.index_source == "history": + --ref_file=$bias_correction.seq_source.ref_file + #else: + --ref_file="None" + #end if + --dbkey=${gtf_input.metadata.dbkey} + --index_dir=${GALAXY_DATA_INDEX_DIR} + #end if + + ## Inputs. + --inputA=$gtf_input + #if $group_analysis.do_groups == "No": + --input1=$aligned_reads1 + --input2=$aligned_reads2 + #else: + ## Replicates. + --labels + #for $group in $group_analysis.groups + ${group.group} + #end for + --files + #for $group in $group_analysis.groups + #for $file in $group.files: + ${file.file} + #end for + , + #end for + #end if + + </command> + <inputs> + <param format="gtf,gff3" name="gtf_input" type="data" label="Transcripts" help="A transcript GFF3 or GTF file produced by cufflinks, cuffcompare, or other source."/> + <conditional name="group_analysis"> + <param name="do_groups" type="select" label="Perform replicate analysis" help="Perform cuffdiff with replicates in each group."> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + <when value="Yes"> + <repeat name="groups" title="Group"> + <param name="group" title="Group name" type="text" label="Group name (no spaces or commas)"/> + <repeat name="files" title="Replicate"> + <param name="file" label="Add file" type="data" format="sam,bam"/> + </repeat> + </repeat> + </when> + <when value="No"> + <param format="sam,bam" name="aligned_reads1" type="data" label="SAM or BAM file of aligned RNA-Seq reads" help=""/> + <param format="sam,bam" name="aligned_reads2" type="data" label="SAM or BAM file of aligned RNA-Seq reads" help=""/> + </when> + </conditional> + + <param name="fdr" type="float" value="0.05" label="False Discovery Rate" help="The allowed false discovery rate."/> + + <param name="min_alignment_count" type="integer" value="10" label="Min Alignment Count" help="The minimum number of alignments in a locus for needed to conduct significance testing on changes in that locus observed between samples."/> + + <param name="do_normalization" type="select" label="Perform quartile normalization" help="Removes top 25% of genes from FPKM denominator to improve accuracy of differential expression calls for low abundance transcripts."> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + + <param name="multiread_correct" type="select" label="Use multi-read correct" help="Tells Cufflinks to do an initial estimation procedure to more accurately weight reads mapping to multiple locations in the genome."> + <option value="No" selected="true">No</option> + <option value="Yes">Yes</option> + </param> + + <conditional name="bias_correction"> + <param name="do_bias_correction" type="select" label="Perform Bias Correction" help="Bias detection and correction can significantly improve accuracy of transcript abundance estimates."> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + <when value="Yes"> + <conditional name="seq_source"> + <param name="index_source" type="select" label="Reference sequence data"> + <option value="cached">Locally cached</option> + <option value="history">History</option> + </param> + <when value="cached"></when> + <when value="history"> + <param name="ref_file" type="data" format="fasta" label="Using reference file" /> + </when> + </conditional> + </when> + <when value="No"></when> + </conditional> + + <conditional name="additional"> + <param name="sAdditional" type="select" label="Set Additional Parameters? (not recommended)"> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + <when value="No"></when> + <when value="Yes"> + <param name="frag_mean_len" type="integer" value="200" label="Average Fragment Length"/> + <param name="frag_len_std_dev" type="integer" value="80" label="Fragment Length Standard Deviation"/> + </when> + </conditional> + + <param name="output_sel" type="select" multiple="true" display="checkboxes" force_select="true" label="Select outputs for history datasets"> + <option value="cuffdata">cuffdata - html page with links to cuffdiff outputs</option> + <option value="cummeRbund_db">cummeRbund database</option> + <option value="run_info">run.info</option> + <option value="read_groups_info">read_groups.info</option> + <option value="splicing_diff">splicing.diff</option> + <option value="promoters_diff">promoters.diff</option> + <option value="genes_exp_diff">genes_exp.diff</option> + <option value="genes_fpkm_tracking">genes.fpkm_tracking</option> + <option value="genes_count_tracking">genes.count_tracking</option> + <option value="genes_read_group_tracking">genes.read_group_tracking</option> + <option value="isoforms_exp_diff">isoforms.exp_diff</option> + <option value="isoforms_fpkm_tracking">isoforms.fpkm_tracking</option> + <option value="isoforms_count_tracking">isoforms.count_tracking</option> + <option value="isoforms_read_group_tracking">isoforms.read_group_tracking</option> + <option value="cds_diff">cds.diff</option> + <option value="cds_exp_diff">cds_exp.diff</option> + <option value="cds_fpkm_tracking">cds.fpkm_tracking</option> + <option value="cds_count_tracking">cds.count_tracking</option> + <option value="cds_read_group_tracking">cds.read_group_tracking</option> + <option value="tss_groups_exp_diff">tss_groups_exp.diff</option> + <option value="tss_groups_fpkm_tracking">tss_groups.fpkm_tracking</option> + <option value="tss_groups_count_tracking">tss_groups.count_tracking</option> + <option value="tss_groups_read_group_tracking">tss_groups.read_group_tracking</option> + </param> + + </inputs> + + <outputs> + <data format="text" name="run_info" label="${tool.name} on ${on_string}: run.info"> + <filter>output_sel and 'run_info' in output_sel</filter> + </data> + <data format="tabular" name="read_groups_info" label="${tool.name} on ${on_string}: read_groups.info"> + <filter>output_sel and 'read_groups_info' in output_sel</filter> + </data> + <data format="tabular" name="splicing_diff" label="${tool.name} on ${on_string}: splicing differential expression testing"> + <filter>output_sel and 'splicing_diff' in output_sel</filter> + </data> + <data format="tabular" name="promoters_diff" label="${tool.name} on ${on_string}: promoters differential expression testing"> + <filter>output_sel and 'promoters_diff' in output_sel</filter> + </data> + <data format="tabular" name="cds_diff" label="${tool.name} on ${on_string}: CDS overloading diffential expression testing"> + <filter>output_sel and 'cds_diff' in output_sel</filter> + </data> + <data format="tabular" name="cds_exp_diff" label="${tool.name} on ${on_string}: CDS differential expression testing"> + <filter>output_sel and 'cds_exp_diff' in output_sel</filter> + </data> + <data format="tabular" name="cds_fpkm_tracking" label="${tool.name} on ${on_string}: CDS FPKM tracking"> + <filter>output_sel and 'cds_fpkm_tracking' in output_sel</filter> + </data> + <data format="tabular" name="cds_count_tracking" label="${tool.name} on ${on_string}: CDS counts"> + <filter>output_sel and 'cds_count_tracking' in output_sel</filter> + </data> + <data format="tabular" name="cds_read_group_tracking" label="${tool.name} on ${on_string}: CDS Read Group tracking"> + <filter>output_sel and 'cds_read_group_tracking' in output_sel</filter> + </data> + <data format="tabular" name="tss_groups_exp_diff" label="${tool.name} on ${on_string}: TSS groups differential expression testing"> + <filter>output_sel and 'tss_groups_exp_diff' in output_sel</filter> + </data> + <data format="tabular" name="tss_groups_fpkm_tracking" label="${tool.name} on ${on_string}: TSS groups FPKM tracking"> + <filter>output_sel and 'tss_groups_fpkm_tracking' in output_sel</filter> + </data> + <data format="tabular" name="tss_groups_count_tracking" label="${tool.name} on ${on_string}: TSS groups counts"> + <filter>output_sel and 'tss_groups_count_tracking' in output_sel</filter> + </data> + <data format="tabular" name="tss_groups_read_group_tracking" label="${tool.name} on ${on_string}: TSS groups Read Group tracking"> + <filter>output_sel and 'tss_groups_read_group_tracking' in output_sel</filter> + </data> + <data format="tabular" name="isoforms_exp_diff" label="${tool.name} on ${on_string}: transcript differential expression testing"> + <filter>output_sel and 'isoforms_exp_diff' in output_sel</filter> + </data> + <data format="tabular" name="isoforms_fpkm_tracking" label="${tool.name} on ${on_string}: transcript FPKM tracking"> + <filter>output_sel and 'isoforms_fpkm_tracking' in output_sel</filter> + </data> + <data format="tabular" name="isoforms_count_tracking" label="${tool.name} on ${on_string}: transcript counts"> + <filter>output_sel and 'isoforms_count_tracking' in output_sel</filter> + </data> + <data format="tabular" name="isoforms_read_group_tracking" label="${tool.name} on ${on_string}: transcript Read Group tracking"> + <filter>output_sel and 'isoforms_read_group_tracking' in output_sel</filter> + </data> + <data format="tabular" name="genes_exp_diff" label="${tool.name} on ${on_string}: gene differential expression testing"> + <filter>output_sel and 'genes_exp_diff' in output_sel</filter> + </data> + <data format="tabular" name="genes_fpkm_tracking" label="${tool.name} on ${on_string}: gene FPKM tracking"> + <filter>output_sel and 'genes_fpkm_tracking' in output_sel</filter> + </data> + <data format="tabular" name="genes_count_tracking" label="${tool.name} on ${on_string}: gene counts"> + <filter>output_sel and 'genes_count_tracking' in output_sel</filter> + </data> + <data format="tabular" name="genes_read_group_tracking" label="${tool.name} on ${on_string}: gene Read Group tracking"> + <filter>output_sel and 'genes_read_group_tracking' in output_sel</filter> + </data> + <data format="cuffdata" name="cuffdata" label="${tool.name} on ${on_string}: cuffdata" > + <filter>not output_sel or output_sel and 'cuffdata' in output_sel</filter> + </data> + <data format="cuffdatadb" name="cummeRbund_db" label="${tool.name} on ${on_string}: cummeRbund sqlite Database" > + <filter>output_sel and 'cummeRbund_db' in output_sel</filter> + </data> + </outputs> + <stdio> + <exit_code range="1:" level="fatal" description="Cufflinks Err" /> + </stdio> + + + <tests> + <test> + <!-- + cuffdiff cuffcompare_out5.gtf cuffdiff_in1.sam cuffdiff_in2.sam + --> + <param name="gtf_input" value="cuffcompare_out5.gtf" ftype="gtf" /> + <param name="do_groups" value="No" /> + <param name="aligned_reads1" value="cuffdiff_in1.sam" ftype="sam" /> + <param name="aligned_reads2" value="cuffdiff_in2.sam" ftype="sam" /> + <!-- Defaults. --> + <param name="fdr" value="0.05" /> + <param name="min_alignment_count" value="0" /> + <param name="do_bias_correction" value="No" /> + <param name="do_normalization" value="No" /> + <param name="multiread_correct" value="No"/> + <param name="sAdditional" value="No"/> + <!-- + Line diffs are needed because cuffdiff does not produce deterministic output. + TODO: can we find datasets that lead to deterministic behavior? + --> + <output name="splicing_diff" file="cuffdiff_out9.txt"/> + <output name="promoters_diff" file="cuffdiff_out10.txt"/> + <output name="cds_diff" file="cuffdiff_out11.txt"/> + <output name="cds_exp_fpkm_tracking" file="cuffdiff_out4.txt"/> + <output name="cds_fpkm_tracking" file="cuffdiff_out8.txt"/> + <output name="tss_groups_exp" file="cuffdiff_out3.txt" lines_diff="200"/> + <output name="tss_groups_fpkm_tracking" file="cuffdiff_out7.txt"/> + <output name="genes_exp" file="cuffdiff_out2.txt" lines_diff="200"/> + <output name="genes_fpkm_tracking" file="cuffdiff_out6.txt" lines_diff="200"/> + <output name="isoforms_exp" file="cuffdiff_out1.txt" lines_diff="200"/> + <output name="isoforms_fpkm_tracking" file="cuffdiff_out5.txt" lines_diff="200"/> + </test> + </tests> + + <help> +**Cuffdiff Overview** + +Cuffdiff is part of Cufflinks_. Cuffdiff find significant changes in transcript expression, splicing, and promoter use. Please cite: Trapnell C, Williams BA, Pertea G, Mortazavi AM, Kwan G, van Baren MJ, Salzberg SL, Wold B, Pachter L. Transcript assembly and abundance estimation from RNA-Seq reveals thousands of new transcripts and switching among isoforms. Nature Biotechnology doi:10.1038/nbt.1621 + +.. _Cufflinks: http://cufflinks.cbcb.umd.edu/ + +------ + +**Know what you are doing** + +.. class:: warningmark + +There is no such thing (yet) as an automated gearshift in expression analysis. It is all like stick-shift driving in San Francisco. In other words, running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy. + +.. __: http://cufflinks.cbcb.umd.edu/manual.html#cuffdiff + +------ + +**Input format** + +Cuffdiff takes Cufflinks or Cuffcompare GTF files as input along with two SAM files containing the fragment alignments for two or more samples. + +------ + +**Outputs** + +Cuffdiff produces many output files: + +1. Transcript FPKM expression tracking. +2. Gene FPKM expression tracking; tracks the summed FPKM of transcripts sharing each gene_id +3. Primary transcript FPKM tracking; tracks the summed FPKM of transcripts sharing each tss_id +4. Coding sequence FPKM tracking; tracks the summed FPKM of transcripts sharing each p_id, independent of tss_id +5. Transcript differential FPKM. +6. Gene differential FPKM. Tests difference sin the summed FPKM of transcripts sharing each gene_id +7. Primary transcript differential FPKM. Tests difference sin the summed FPKM of transcripts sharing each tss_id +8. Coding sequence differential FPKM. Tests difference sin the summed FPKM of transcripts sharing each p_id independent of tss_id +9. Differential splicing tests: this tab delimited file lists, for each primary transcript, the amount of overloading detected among its isoforms, i.e. how much differential splicing exists between isoforms processed from a single primary transcript. Only primary transcripts from which two or more isoforms are spliced are listed in this file. +10. Differential promoter tests: this tab delimited file lists, for each gene, the amount of overloading detected among its primary transcripts, i.e. how much differential promoter use exists between samples. Only genes producing two or more distinct primary transcripts (i.e. multi-promoter genes) are listed here. +11. Differential CDS tests: this tab delimited file lists, for each gene, the amount of overloading detected among its coding sequences, i.e. how much differential CDS output exists between samples. Only genes producing two or more distinct CDS (i.e. multi-protein genes) are listed here. + +------- + +**Settings** + +All of the options have a default value. You can change any of them. Most of the options in Cuffdiff have been implemented here. + +------ + +**Cuffdiff parameter list** + +This is a list of implemented Cuffdiff options:: + + -m INT Average fragement length; default 200 + -s INT Fragment legnth standard deviation; default 80 + -c INT The minimum number of alignments in a locus for needed to conduct significance testing on changes in that locus observed between samples. If no testing is performed, changes in the locus are deemed not significant, and the locus' observed changes don't contribute to correction for multiple testing. The default is 1,000 fragment alignments (up to 2,000 paired reads). + --FDR FLOAT The allowed false discovery rate. The default is 0.05. + --num-importance-samples INT Sets the number of importance samples generated for each locus during abundance estimation. Default: 1000 + --max-mle-iterations INT Sets the number of iterations allowed during maximum likelihood estimation of abundances. Default: 5000 + -N With this option, Cufflinks excludes the contribution of the top 25 percent most highly expressed genes from the number of mapped fragments used in the FPKM denominator. This can improve robustness of differential expression calls for less abundant genes and transcripts. + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cummerbund_wrapper.py Mon Feb 04 19:50:25 2013 -0500 @@ -0,0 +1,82 @@ +#!/usr/bin/env python + +### Runs "r_script" and generates a HTML report +### Inspired on cuffdiff_wrapper.py and gatk_wrapper.py +### Carlos Borroto <carlos.borroto@gmail.com> + +import optparse, os, shutil, subprocess, sys, tempfile + +def stop_err( msg ): + sys.stderr.write( "%s\n" % msg ) + sys.exit(1) + +def html_report_from_directory( html_out, dir ): + html_out.write( '<html>\n<head>\n<title>Galaxy - cummeRbund Output</title>\n</head>\n<body>\n<p/>\n<ul>\n' ) + for fname in sorted( os.listdir( dir ) ): + # html_out.write( '<li><a href="%s">%s</a></li>\n' % ( fname, fname ) ) + html_out.write( '<li><a href="%s"><img src="%s" alt="" height="80" width="80">%s</a></li>\n' % ( fname, fname , fname ) ) + html_out.write( '</ul>\n</body>\n</html>\n' ) + +def __main__(): + #Parse Command Line + parser = optparse.OptionParser() + + # wrapper options + parser.add_option('', '--r-script', dest='r_script', help='R script') + parser.add_option('', '--html-report-from-directory', dest='html_report_from_directory', type="string", nargs=2, help='"Target HTML File" "Directory"') + + (options, args) = parser.parse_args() + + (html_filename, html_dir) = options.html_report_from_directory + + # Make html report directory for output. + os.mkdir( html_dir ) + + # Make a tmp dir + tmp_dir = tempfile.mkdtemp( prefix='tmp-cummeRbund-' ) + + # Build command. + cmd = ( "Rscript --vanilla %s" % options.r_script ) + + # Debugging. + # print cmd + +#liubo added, for test, look at the generated R script +# shutil.copy2(options.r_script, '/nfs/software/galaxy/r_script') + + + # Run command. + try: + tmp_name = tempfile.NamedTemporaryFile( dir=tmp_dir ).name + tmp_stderr = open( tmp_name, 'wb' ) + proc = subprocess.Popen( args=cmd, shell=True, cwd=html_dir, stderr=tmp_stderr.fileno() ) + returncode = proc.wait() + tmp_stderr.close() + + # Get stderr, allowing for case where it's very large. + tmp_stderr = open( tmp_name, 'rb' ) + stderr = '' + buffsize = 1048576 + try: + while True: + stderr += tmp_stderr.read( buffsize ) + if not stderr or len( stderr ) % buffsize != 0: + break + except OverflowError: + pass + tmp_stderr.close() + + # Error checking. + if returncode != 0: + raise Exception, stderr + except Exception, e: + stop_err( 'Error running R script. ' + str( e ) ) + + # write the html report + html_report_from_directory( open( html_filename, 'wb' ), html_dir ) + + # Clean up temp dirs + if os.path.exists( tmp_dir ): + shutil.rmtree( tmp_dir ) + +if __name__=="__main__": __main__()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cummerbund_wrapper.xml Mon Feb 04 19:50:25 2013 -0500 @@ -0,0 +1,355 @@ +<tool id="cummerbund" name="cummeRbund" version="0.0.6"> + + <description>R package designed to aid and simplify the task of analyzing Cufflinks RNA-Seq output</description> + + <command interpreter="python"> + cummerbund_wrapper.py + --r-script ${script_file} + --html-report-from-directory "${output_html}" "${output_html.files_path}" + </command> + + <inputs> + <conditional name="backend_database_source"> + <param name="backend_database_selector" type="select" label="Will you select a backend database file from the history or do you want to build a new one using cuffdiff output?"> + <option value="history" selected="true">Use backend database from the history</option> + <option value="cuffdiff_output">Build backend database using cuffdiff output</option> + </param> + <when value="cuffdiff_output"> + <param format="tabular" name="isoforms_fpkm_tracking" type="data" label="Transcript FPKM tracking"/> + <param format="tabular" name="isoforms_exp" type="data" label="Transcript differential expression testing"/> + <param format="tabular" name="genes_fpkm_tracking" type="data" label="Gene FPKM tracking"/> + <param format="tabular" name="genes_exp" type="data" label="Gene differential expression testing"/> + <param format="tabular" name="tss_groups_fpkm_tracking" type="data" label="TSS groups FPKM tracking"/> + <param format="tabular" name="tss_groups_exp" type="data" label="TSS groups differential expression testing"/> + <param format="tabular" name="cds_fpkm_tracking" type="data" label="CDS FPKM tracking"/> + <param format="tabular" name="cds_exp_diff" type="data" label="CDS FPKM differential expression testing"/> + <param format="tabular" name="cds_diff" type="data" label="CDS overloading diffential expression testing"/> + <param format="tabular" name="promoters_diff" type="data" label="Promoters differential expression testing"/> + <param format="tabular" name="splicing_diff" type="data" label="Splicing differential expression testing"/> + <param name="rebuild" type="hidden" value="TRUE"/> + </when> + <when value="history"> + <param name="input_database" type="data" format="cuffdatadb" label="Select backend database (sqlite)"/> + </when> + </conditional> + <repeat name="plots" title="Plots"> + <param name="width" type="text" value="1280" label="The width of the image"/> + <param name="height" type="text" value="960" label="The height of the image"/> + <conditional name="plot"> + <param name="type" type="select" label="Plot type"> + <option value="density" selected="true">Density</option> + <option value="dispersion">Dispersion</option> + <option value="fpkmSCV">Squared Coefficient of Variation</option> + <option value="scatterMatrix">Scatter Matrix</option> + <option value="boxplot">Boxplot</option> + <option value="scatter">Scatter</option> + <option value="volcano">Volcano</option> + <option value="heatmap">Heatmap</option> + <option value="cluster">Cluster</option> + <option value="expressionplot">Expression Plot</option> + <option value="expressionbarplot">Expression Bar Plot</option> + <option value="mds">MultiDimentional Scaling (MDS) Plot</option> + <option value="pca">Principal Component Analysis (PCA) Plot</option> + <option value="maplot">Intensity vs Fold-change (MvaA) Plot</option> + <option value="dendrogram">Dendrogram</option> + </param> + <when value="density"> + <param name="log_mode" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Apply log10 transformation on FPKM values?"/> + <param name="replicates" type="boolean" truevalue="T" falsevalue="F" checked="False" label="Replicates?"/> + </when> + <when value="mds"> + <param name="replicates" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Replicates?"/> + </when> + <when value="pca"> + <param name="replicates" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Replicates?"/> + </when> + <when value="maplot"> + <param name="x" type="text" label="Sample name 1"/> + <param name="y" type="text" label="Sample name 2"/> + <param name="use_count" type="boolean" truevalue="T" falsevalue="F" checked="False" label="Use Count?"/> + </when> + <when value="dendrogram"> + <param name="replicates" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Replicates?"/> + </when> + <when value="dispersion"> + </when> + <when value="fpkmSCV"> + </when> + <when value="scatterMatrix"> + </when> + <when value="boxplot"> + <param name="log_mode" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Apply log10 transformation on FPKM values?"/> + </when> + <when value="scatter"> + <param name="x" type="text" label="Sample name for x axis"/> + <param name="y" type="text" label="Sample name for y axis"/> + <param name="log_mode" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Apply log10 transformation on FPKM values?"/> + <param name="smooth" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Add a smooth-fit regression line"/> + <conditional name="multiple_genes"> + <param name="multiple_genes_selector" type="boolean" truevalue="T" falsevalue="F" checked="False" label="Limit plot to a group of genes?"/> + <when value="T"> + <param name="features" type="select" label="Expression levels to plot?"> + <option value="gene" selected="true">Genes</option> + <option value="isoforms">Isoforms</option> + <option value="tss">TSS</option> + <option value="cds">CDS</option> + </param> + <repeat name="genes" title="Genes"> + <param name="gene_id" type="text" label="Gene ID"/> + </repeat> + </when> + <when value="F"/> + </conditional> + </when> + <when value="volcano"> + <param name="x" type="text" label="First sample name for comparison"/> + <param name="y" type="text" label="Second sample name for comparison"/> + <conditional name="multiple_genes"> + <param name="multiple_genes_selector" type="boolean" truevalue="T" falsevalue="F" checked="False" label="Limit plot to a group of genes?"/> + <when value="T"> + <param name="features" type="select" label="Expression levels to plot?"> + <option value="gene" selected="true">Genes</option> + <option value="isoforms">Isoforms</option> + <option value="tss">TSS</option> + <option value="cds">CDS</option> + </param> + <repeat name="genes" title="Genes"> + <param name="gene_id" type="text" label="Gene ID"/> + </repeat> + </when> + <when value="F"/> + </conditional> + </when> + <when value="heatmap"> + <param name="features" type="select" label="Expression levels to plot?"> + <option value="gene" selected="true">Genes</option> + <option value="isoforms">Isoforms</option> + <option value="tss">TSS</option> + <option value="cds">CDS</option> + </param> + <repeat name="genes" title="Genes"> + <param name="gene_id" type="text" label="Gene ID"/> + </repeat> + <param name="clustering" type="select" label="Cluster by"> + <option value="row">Row</option> + <option value="column">Column</option> + <option value="both" selected="true">Both</option> + <option value="none">None</option> + </param> + <param name="labcol" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="True" label="Display column labels?"/> + <param name="labrow" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="True" label="Display column labels?"/> + <param name="log_mode" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Apply log10 transformation on FPKM values?"/> + <param name="border" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="False" label="Draw border around plot?"/> + </when> + <when value="cluster"> + <param name="features" type="select" label="Expression levels to plot?"> + <option value="gene" selected="true">Genes</option> + <option value="isoforms">Isoforms</option> + <option value="tss">TSS</option> + <option value="cds">CDS</option> + </param> + <repeat name="genes" title="Genes"> + <param name="gene_id" type="text" label="Gene ID"/> + </repeat> + <param name="k" type="text" label="Number of pre-defined clusters to attempt to find."/> + <param name="iter_max" type="text" value="100" label="Max iterations"/> + </when> + <when value="expressionplot"> + <param name="features" type="select" label="Expression levels to plot?"> + <option value="gene" selected="true">Genes</option> + <option value="isoforms">Isoforms</option> + <option value="tss">TSS</option> + <option value="cds">CDS</option> + </param> + <param name="gene_id" type="text" label="Gene ID"/> + <param name="log_mode" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Apply log10 transformation on FPKM values?"/> + <param name="draw_summary" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="False" label="Draw a 'summary' line with mean FPKM + values for each condition?"/> + <param name="show_error_bars" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="True" label="Draw error bars?"/> + </when> + <when value="expressionbarplot"> + <param name="features" type="select" label="Expression levels to plot?"> + <option value="gene" selected="true">Genes</option> + <option value="isoforms">Isoforms</option> + <option value="tss">TSS</option> + <option value="cds">CDS</option> + </param> + <param name="gene_id" type="text" label="Gene ID"/> + <param name="log_mode" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Apply log10 transformation on FPKM values?"/> + <param name="show_error_bars" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="True" label="Draw error bars?"/> + </when> + </conditional> + </repeat> + </inputs> + <stdio> + <exit_code range="1:" level="fatal" description="CummerBund Error" /> + </stdio> + <outputs> + <data format="data" name="output_database" label="${tool.name} on ${on_string}: Database File (sqlite)"> + <filter>backend_database_source['backend_database_selector'] == "cuffdiff_output"</filter> + </data> + <data format="html" name="output_html" label="${tool.name} on ${on_string} (HTML)"/> + </outputs> + + <requirements> + <requirement type="binary">R</requirement> + </requirements> + +<!--> + <tests> + <test> + <param name="" value=""/> + <output name="" file=""/> + </test> + </tests> +--> + <configfiles> + <configfile name="script_file"> + +## Feature Selection ## +get_features <- function(myGenes, f="gene") { + if (f == "isoforms") + return(isoforms(myGenes)) + else if (f == "tss") + return(TSS(myGenes)) + else if (f == "cds") + return(CDS(myGenes)) + else + return(myGenes) +} + +## Main Function ## + +## Load cummeRbund library +library("cummeRbund") + +## Initialize cuff object +cuff <- readCufflinks(dir = "", +#if $backend_database_source.backend_database_selector == "cuffdiff_output": + dbFile = "${output_database}", + geneFPKM = "${genes_fpkm_tracking}", + geneDiff = "${genes_exp}", + isoformFPKM = "${isoforms_fpkm_tracking}", + isoformDiff = "${isoforms_exp}", + TSSFPKM = "${tss_groups_fpkm_tracking}", + TSSDiff = "${tss_groups_exp}", + CDSFPKM = "${cds_fpkm_tracking}", + CDSExpDiff = "${cds_exp_diff}", + CDSDiff = "${cds_diff}", + promoterFile = "${promoters_diff}", + splicingFile = "${splicing_diff}", + rebuild = T) +#else: + dbFile = "${backend_database_source.input_database}", + rebuild = F) +#end if + +#for $i, $p in enumerate($plots, start=1): + #set $filename = "plot%02d-%s.png" % ($i, $p.plot['type']) +png(filename = "${filename}", width = ${p.width}, height = ${p.height}) +tryCatch({ + ## Density plot ## + #if $p.plot['type'] == "density": +csDensity(genes(cuff),replicates=$p.plot.replicates) + + ## Dispersion plot ## + #elif $p.plot['type'] == "dispersion": +dispersionPlot(genes(cuff)) + + ## Squared Coefficient of Variation plot ## + #elif $p.plot['type'] == "fpkmSCV": +fpkmSCVPlot(genes(cuff)) + + ## Scatter Matrix ## + #elif $p.plot['type'] == "scatterMatrix": +csScatterMatrix(genes(cuff)) + + ## Boxplot ## + #elif $p.plot['type'] == "boxplot": +csBoxplot(genes(cuff)) + + ## Scatter ## + #elif $p.plot['type'] == "scatter": + #if $p.plot.multiple_genes['multiple_genes_selector']: +myGeneIds <- c() + #for $g in $p.plot.multiple_genes['genes']: +myGeneIds <- c(myGeneIds, "$g['gene_id']") + #end for +myGenes <- getGenes(cuff, myGeneIds) +csScatter(get_features(myGenes, "$p.plot.multiple_genes['features']"), "${p.plot.x}", "${p.plot.y}", smooth=${p.plot.smooth}) + #else +csScatter(genes(cuff), "${p.plot.x}", "${p.plot.y}", smooth=${p.plot.smooth}) + #end if + + ## Volcano ## + #elif $p.plot['type'] == "volcano": + #if $p.plot.multiple_genes['multiple_genes_selector']: +myGeneIds <- c() + #for $g in $p.plot.multiple_genes['genes']: +myGeneIds <- c(myGeneIds, "$g['gene_id']") + #end for +myGenes <- getGenes(cuff, myGeneIds) +csVolcano(get_features(myGenes, "$p.plot.multiple_genes['features']"), "${p.plot.x}", "${p.plot.y}") + #else +csVolcano(genes(cuff), "${p.plot.x}", "${p.plot.y}") + #end if + + ## Heatmap ## + #elif $p.plot['type'] == "heatmap": +myGeneIds <- c() + #for $g in $p.plot.genes: +myGeneIds <- c(myGeneIds, "$g['gene_id']") + #end for +myGenes <- getGenes(cuff, myGeneIds) +csHeatmap(get_features(myGenes, "${p.plot.features}"), clustering="${p.plot.clustering}", labCol="${p.plot.labcol}", labRow="${p.plot.labrow}", border="${p.plot.border}") + + ## Cluster ## + #elif $p.plot['type'] == "cluster": +myGeneIds <- c() + #for $g in $p.plot.genes: +myGeneIds <- c(myGeneIds, "$g['gene_id']") + #end for +myGenes <- getGenes(cuff, myGeneIds) +csCluster(get_features(myGenes, "${p.plot.features}"), k=${p.plot.k}, iter.max="${p.plot.iter_max}") + + ## Expression Plot ## + #elif $p.plot['type'] == "expressionplot": +myGeneId <- "$p.plot.gene_id" +myGenes <- getGenes(cuff, myGeneId) +expressionPlot(get_features(myGenes, "${p.plot.features}"), drawSummary=${p.plot.draw_summary}, iter.max="${p.plot.show_error_bars}") + + ## Expression Bar Plot ## + #elif $p.plot['type'] == "expressionbarplot": +myGeneId <- "$p.plot.gene_id" +myGenes <- getGenes(cuff, myGeneId) +expressionBarplot(get_features(myGenes, "${p.plot.features}"), iter.max="${p.plot.show_error_bars}") + + ## MDS plot ## + #elif $p.plot['type'] == "mds": +MDSplot(genes(cuff),replicates=$p.plot.replicates) + + ## PCA plot ## + #elif $p.plot['type'] == "pca": +PCAplot(genes(cuff),"PC1","PC2",replicates=$p.plot.replicates) + + ## MAplot plot ## + #elif $p.plot['type'] == "maplot": +MAplot(genes(cuff), "${p.plot.x}", "${p.plot.y}", useCount=${p.plot.use_count}) + + ## Dendogram plot ## + #elif $p.plot['type'] == "dendrogram": +csDendro(genes(cuff),replicates=$p.plot.replicates) + #end if + +},error = function(e) {paste("$p.plot['type'] failed: ", e)}) +devname = dev.off() + +#end for + </configfile> + </configfiles> + + <help> +This tool allows for persistent storage, access, exploration, and manipulation of Cufflinks high-throughput sequencing data. In addition, provides numerous plotting functions for commonly used visualizations. + </help> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes_conf.xml Mon Feb 04 19:50:25 2013 -0500 @@ -0,0 +1,13 @@ +<?xml version="1.0"?> +<datatypes> + <datatype_files> + <datatype_file name="cuffdata.py"/> + </datatype_files> + <registration> + <!-- composite dataset with cuffdiff outputs in the extra files path --> + <datatype extension="cuffdata" type="galaxy.datatypes.cuffdata:CuffDiffData"/> + <!-- SQLite database --> + <datatype extension="cuffdatadb" type="galaxy.datatypes.cuffdata:CuffDataDB"/> + </registration> +</datatypes> +