Mercurial > repos > jjohnson > cummerbund
diff cummerbund_wrapper.xml @ 0:da7241f92ecf
Uploaded
author | jjohnson |
---|---|
date | Mon, 04 Feb 2013 19:50:25 -0500 |
parents | |
children | fdf01b3c1841 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cummerbund_wrapper.xml Mon Feb 04 19:50:25 2013 -0500 @@ -0,0 +1,355 @@ +<tool id="cummerbund" name="cummeRbund" version="0.0.6"> + + <description>R package designed to aid and simplify the task of analyzing Cufflinks RNA-Seq output</description> + + <command interpreter="python"> + cummerbund_wrapper.py + --r-script ${script_file} + --html-report-from-directory "${output_html}" "${output_html.files_path}" + </command> + + <inputs> + <conditional name="backend_database_source"> + <param name="backend_database_selector" type="select" label="Will you select a backend database file from the history or do you want to build a new one using cuffdiff output?"> + <option value="history" selected="true">Use backend database from the history</option> + <option value="cuffdiff_output">Build backend database using cuffdiff output</option> + </param> + <when value="cuffdiff_output"> + <param format="tabular" name="isoforms_fpkm_tracking" type="data" label="Transcript FPKM tracking"/> + <param format="tabular" name="isoforms_exp" type="data" label="Transcript differential expression testing"/> + <param format="tabular" name="genes_fpkm_tracking" type="data" label="Gene FPKM tracking"/> + <param format="tabular" name="genes_exp" type="data" label="Gene differential expression testing"/> + <param format="tabular" name="tss_groups_fpkm_tracking" type="data" label="TSS groups FPKM tracking"/> + <param format="tabular" name="tss_groups_exp" type="data" label="TSS groups differential expression testing"/> + <param format="tabular" name="cds_fpkm_tracking" type="data" label="CDS FPKM tracking"/> + <param format="tabular" name="cds_exp_diff" type="data" label="CDS FPKM differential expression testing"/> + <param format="tabular" name="cds_diff" type="data" label="CDS overloading diffential expression testing"/> + <param format="tabular" name="promoters_diff" type="data" label="Promoters differential expression testing"/> + <param format="tabular" name="splicing_diff" type="data" label="Splicing differential expression testing"/> + <param name="rebuild" type="hidden" value="TRUE"/> + </when> + <when value="history"> + <param name="input_database" type="data" format="cuffdatadb" label="Select backend database (sqlite)"/> + </when> + </conditional> + <repeat name="plots" title="Plots"> + <param name="width" type="text" value="1280" label="The width of the image"/> + <param name="height" type="text" value="960" label="The height of the image"/> + <conditional name="plot"> + <param name="type" type="select" label="Plot type"> + <option value="density" selected="true">Density</option> + <option value="dispersion">Dispersion</option> + <option value="fpkmSCV">Squared Coefficient of Variation</option> + <option value="scatterMatrix">Scatter Matrix</option> + <option value="boxplot">Boxplot</option> + <option value="scatter">Scatter</option> + <option value="volcano">Volcano</option> + <option value="heatmap">Heatmap</option> + <option value="cluster">Cluster</option> + <option value="expressionplot">Expression Plot</option> + <option value="expressionbarplot">Expression Bar Plot</option> + <option value="mds">MultiDimentional Scaling (MDS) Plot</option> + <option value="pca">Principal Component Analysis (PCA) Plot</option> + <option value="maplot">Intensity vs Fold-change (MvaA) Plot</option> + <option value="dendrogram">Dendrogram</option> + </param> + <when value="density"> + <param name="log_mode" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Apply log10 transformation on FPKM values?"/> + <param name="replicates" type="boolean" truevalue="T" falsevalue="F" checked="False" label="Replicates?"/> + </when> + <when value="mds"> + <param name="replicates" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Replicates?"/> + </when> + <when value="pca"> + <param name="replicates" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Replicates?"/> + </when> + <when value="maplot"> + <param name="x" type="text" label="Sample name 1"/> + <param name="y" type="text" label="Sample name 2"/> + <param name="use_count" type="boolean" truevalue="T" falsevalue="F" checked="False" label="Use Count?"/> + </when> + <when value="dendrogram"> + <param name="replicates" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Replicates?"/> + </when> + <when value="dispersion"> + </when> + <when value="fpkmSCV"> + </when> + <when value="scatterMatrix"> + </when> + <when value="boxplot"> + <param name="log_mode" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Apply log10 transformation on FPKM values?"/> + </when> + <when value="scatter"> + <param name="x" type="text" label="Sample name for x axis"/> + <param name="y" type="text" label="Sample name for y axis"/> + <param name="log_mode" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Apply log10 transformation on FPKM values?"/> + <param name="smooth" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Add a smooth-fit regression line"/> + <conditional name="multiple_genes"> + <param name="multiple_genes_selector" type="boolean" truevalue="T" falsevalue="F" checked="False" label="Limit plot to a group of genes?"/> + <when value="T"> + <param name="features" type="select" label="Expression levels to plot?"> + <option value="gene" selected="true">Genes</option> + <option value="isoforms">Isoforms</option> + <option value="tss">TSS</option> + <option value="cds">CDS</option> + </param> + <repeat name="genes" title="Genes"> + <param name="gene_id" type="text" label="Gene ID"/> + </repeat> + </when> + <when value="F"/> + </conditional> + </when> + <when value="volcano"> + <param name="x" type="text" label="First sample name for comparison"/> + <param name="y" type="text" label="Second sample name for comparison"/> + <conditional name="multiple_genes"> + <param name="multiple_genes_selector" type="boolean" truevalue="T" falsevalue="F" checked="False" label="Limit plot to a group of genes?"/> + <when value="T"> + <param name="features" type="select" label="Expression levels to plot?"> + <option value="gene" selected="true">Genes</option> + <option value="isoforms">Isoforms</option> + <option value="tss">TSS</option> + <option value="cds">CDS</option> + </param> + <repeat name="genes" title="Genes"> + <param name="gene_id" type="text" label="Gene ID"/> + </repeat> + </when> + <when value="F"/> + </conditional> + </when> + <when value="heatmap"> + <param name="features" type="select" label="Expression levels to plot?"> + <option value="gene" selected="true">Genes</option> + <option value="isoforms">Isoforms</option> + <option value="tss">TSS</option> + <option value="cds">CDS</option> + </param> + <repeat name="genes" title="Genes"> + <param name="gene_id" type="text" label="Gene ID"/> + </repeat> + <param name="clustering" type="select" label="Cluster by"> + <option value="row">Row</option> + <option value="column">Column</option> + <option value="both" selected="true">Both</option> + <option value="none">None</option> + </param> + <param name="labcol" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="True" label="Display column labels?"/> + <param name="labrow" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="True" label="Display column labels?"/> + <param name="log_mode" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Apply log10 transformation on FPKM values?"/> + <param name="border" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="False" label="Draw border around plot?"/> + </when> + <when value="cluster"> + <param name="features" type="select" label="Expression levels to plot?"> + <option value="gene" selected="true">Genes</option> + <option value="isoforms">Isoforms</option> + <option value="tss">TSS</option> + <option value="cds">CDS</option> + </param> + <repeat name="genes" title="Genes"> + <param name="gene_id" type="text" label="Gene ID"/> + </repeat> + <param name="k" type="text" label="Number of pre-defined clusters to attempt to find."/> + <param name="iter_max" type="text" value="100" label="Max iterations"/> + </when> + <when value="expressionplot"> + <param name="features" type="select" label="Expression levels to plot?"> + <option value="gene" selected="true">Genes</option> + <option value="isoforms">Isoforms</option> + <option value="tss">TSS</option> + <option value="cds">CDS</option> + </param> + <param name="gene_id" type="text" label="Gene ID"/> + <param name="log_mode" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Apply log10 transformation on FPKM values?"/> + <param name="draw_summary" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="False" label="Draw a 'summary' line with mean FPKM + values for each condition?"/> + <param name="show_error_bars" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="True" label="Draw error bars?"/> + </when> + <when value="expressionbarplot"> + <param name="features" type="select" label="Expression levels to plot?"> + <option value="gene" selected="true">Genes</option> + <option value="isoforms">Isoforms</option> + <option value="tss">TSS</option> + <option value="cds">CDS</option> + </param> + <param name="gene_id" type="text" label="Gene ID"/> + <param name="log_mode" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Apply log10 transformation on FPKM values?"/> + <param name="show_error_bars" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="True" label="Draw error bars?"/> + </when> + </conditional> + </repeat> + </inputs> + <stdio> + <exit_code range="1:" level="fatal" description="CummerBund Error" /> + </stdio> + <outputs> + <data format="data" name="output_database" label="${tool.name} on ${on_string}: Database File (sqlite)"> + <filter>backend_database_source['backend_database_selector'] == "cuffdiff_output"</filter> + </data> + <data format="html" name="output_html" label="${tool.name} on ${on_string} (HTML)"/> + </outputs> + + <requirements> + <requirement type="binary">R</requirement> + </requirements> + +<!--> + <tests> + <test> + <param name="" value=""/> + <output name="" file=""/> + </test> + </tests> +--> + <configfiles> + <configfile name="script_file"> + +## Feature Selection ## +get_features <- function(myGenes, f="gene") { + if (f == "isoforms") + return(isoforms(myGenes)) + else if (f == "tss") + return(TSS(myGenes)) + else if (f == "cds") + return(CDS(myGenes)) + else + return(myGenes) +} + +## Main Function ## + +## Load cummeRbund library +library("cummeRbund") + +## Initialize cuff object +cuff <- readCufflinks(dir = "", +#if $backend_database_source.backend_database_selector == "cuffdiff_output": + dbFile = "${output_database}", + geneFPKM = "${genes_fpkm_tracking}", + geneDiff = "${genes_exp}", + isoformFPKM = "${isoforms_fpkm_tracking}", + isoformDiff = "${isoforms_exp}", + TSSFPKM = "${tss_groups_fpkm_tracking}", + TSSDiff = "${tss_groups_exp}", + CDSFPKM = "${cds_fpkm_tracking}", + CDSExpDiff = "${cds_exp_diff}", + CDSDiff = "${cds_diff}", + promoterFile = "${promoters_diff}", + splicingFile = "${splicing_diff}", + rebuild = T) +#else: + dbFile = "${backend_database_source.input_database}", + rebuild = F) +#end if + +#for $i, $p in enumerate($plots, start=1): + #set $filename = "plot%02d-%s.png" % ($i, $p.plot['type']) +png(filename = "${filename}", width = ${p.width}, height = ${p.height}) +tryCatch({ + ## Density plot ## + #if $p.plot['type'] == "density": +csDensity(genes(cuff),replicates=$p.plot.replicates) + + ## Dispersion plot ## + #elif $p.plot['type'] == "dispersion": +dispersionPlot(genes(cuff)) + + ## Squared Coefficient of Variation plot ## + #elif $p.plot['type'] == "fpkmSCV": +fpkmSCVPlot(genes(cuff)) + + ## Scatter Matrix ## + #elif $p.plot['type'] == "scatterMatrix": +csScatterMatrix(genes(cuff)) + + ## Boxplot ## + #elif $p.plot['type'] == "boxplot": +csBoxplot(genes(cuff)) + + ## Scatter ## + #elif $p.plot['type'] == "scatter": + #if $p.plot.multiple_genes['multiple_genes_selector']: +myGeneIds <- c() + #for $g in $p.plot.multiple_genes['genes']: +myGeneIds <- c(myGeneIds, "$g['gene_id']") + #end for +myGenes <- getGenes(cuff, myGeneIds) +csScatter(get_features(myGenes, "$p.plot.multiple_genes['features']"), "${p.plot.x}", "${p.plot.y}", smooth=${p.plot.smooth}) + #else +csScatter(genes(cuff), "${p.plot.x}", "${p.plot.y}", smooth=${p.plot.smooth}) + #end if + + ## Volcano ## + #elif $p.plot['type'] == "volcano": + #if $p.plot.multiple_genes['multiple_genes_selector']: +myGeneIds <- c() + #for $g in $p.plot.multiple_genes['genes']: +myGeneIds <- c(myGeneIds, "$g['gene_id']") + #end for +myGenes <- getGenes(cuff, myGeneIds) +csVolcano(get_features(myGenes, "$p.plot.multiple_genes['features']"), "${p.plot.x}", "${p.plot.y}") + #else +csVolcano(genes(cuff), "${p.plot.x}", "${p.plot.y}") + #end if + + ## Heatmap ## + #elif $p.plot['type'] == "heatmap": +myGeneIds <- c() + #for $g in $p.plot.genes: +myGeneIds <- c(myGeneIds, "$g['gene_id']") + #end for +myGenes <- getGenes(cuff, myGeneIds) +csHeatmap(get_features(myGenes, "${p.plot.features}"), clustering="${p.plot.clustering}", labCol="${p.plot.labcol}", labRow="${p.plot.labrow}", border="${p.plot.border}") + + ## Cluster ## + #elif $p.plot['type'] == "cluster": +myGeneIds <- c() + #for $g in $p.plot.genes: +myGeneIds <- c(myGeneIds, "$g['gene_id']") + #end for +myGenes <- getGenes(cuff, myGeneIds) +csCluster(get_features(myGenes, "${p.plot.features}"), k=${p.plot.k}, iter.max="${p.plot.iter_max}") + + ## Expression Plot ## + #elif $p.plot['type'] == "expressionplot": +myGeneId <- "$p.plot.gene_id" +myGenes <- getGenes(cuff, myGeneId) +expressionPlot(get_features(myGenes, "${p.plot.features}"), drawSummary=${p.plot.draw_summary}, iter.max="${p.plot.show_error_bars}") + + ## Expression Bar Plot ## + #elif $p.plot['type'] == "expressionbarplot": +myGeneId <- "$p.plot.gene_id" +myGenes <- getGenes(cuff, myGeneId) +expressionBarplot(get_features(myGenes, "${p.plot.features}"), iter.max="${p.plot.show_error_bars}") + + ## MDS plot ## + #elif $p.plot['type'] == "mds": +MDSplot(genes(cuff),replicates=$p.plot.replicates) + + ## PCA plot ## + #elif $p.plot['type'] == "pca": +PCAplot(genes(cuff),"PC1","PC2",replicates=$p.plot.replicates) + + ## MAplot plot ## + #elif $p.plot['type'] == "maplot": +MAplot(genes(cuff), "${p.plot.x}", "${p.plot.y}", useCount=${p.plot.use_count}) + + ## Dendogram plot ## + #elif $p.plot['type'] == "dendrogram": +csDendro(genes(cuff),replicates=$p.plot.replicates) + #end if + +},error = function(e) {paste("$p.plot['type'] failed: ", e)}) +devname = dev.off() + +#end for + </configfile> + </configfiles> + + <help> +This tool allows for persistent storage, access, exploration, and manipulation of Cufflinks high-throughput sequencing data. In addition, provides numerous plotting functions for commonly used visualizations. + </help> + +</tool>