Previous changeset 2:674c75219f15 (2012-09-12) Next changeset 4:ebd59bc6855c (2012-09-12) |
Commit message:
Uploaded |
added:
edgeR.xml |
b |
diff -r 674c75219f15 -r 6965066838fc edgeR.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/edgeR.xml Wed Sep 12 23:45:02 2012 -0400 |
[ |
b'@@ -0,0 +1,210 @@\n+<tool id="edgeR" name="Empirical analysis of digital gene expression data" version="0.0.1">\n+ \n+ <command interpreter="perl">\n+ \tedgeR.pl -a $analysis_type.analysis -e $html_file.files_path -f $fdr -h $html_file -o $output\n+ \t## Pairwise comparisons\n+ \t#if $analysis_type.analysis == "pw":\n+ \t\t-r $analysis_type.rowsumfilter\n+ \t\t#if $analysis_type.tagwise_disp.twd == "TRUE":\n+ \t\t\t-p $analysis_type.tagwise_disp.twd_prop\n+ \t\t\t-u $analysis_type.tagwise_disp.twd_trend\n+ \t\t\t-t\n+ \t\t#end if\n+ \t## GLM\n+ \t#else if $analysis_type.analysis == "glm":\n+\t\t#if $analysis_type.exp.export_norm == "true":\n+\t\t\t-n $norm_exp\n+\t\t#end if\n+ \t\t-d $analysis_type.disp\n+\t\t$analysis_type.cont_pw\n+ \t\t#for $fct in $analysis_type.factors:\n+ \t\t\tfactor::${$fct.fact_name}::${$fct.fact}\n+ \t\t#end for\n+ \t\t#for $c in $analysis_type.cont_pred:\n+\t\t\tcp::${c.cp_name}::${c.cp}\n+\t\t#end for\n+\t\t#for $cnt in $analysis_type.contrasts:\n+\t\t\t"cnt::${cnt.add_cont}"\n+\t\t#end for\n+\t## LIMMA\n+\t#else\n+\t\t#if $analysis_type.exp.export_norm == "true":\n+\t\t\t-n $norm_exp $analysis_type.exp.log\n+\t\t#end if\n+\t\t$analysis_type.cont_pw\n+ \t\t#for $fct in $analysis_type.factors:\n+ \t\t\tfactor::${$fct.fact_name}::${$fct.fact}\n+ \t\t#end for\n+ \t\t#for $c in $analysis_type.cont_pred:\n+\t\t\tcp::${c.cp_name}::${c.cp}\n+\t\t#end for\n+\t\t#for $cnt in $analysis_type.contrasts:\n+\t\t\t"cnt::${cnt.add_cont}"\n+\t\t#end for\n+\t#end if\n+\t$matrix\n+\t\t\t\t\n+ </command>\n+\n+ <inputs>\n+ \t<param name="matrix" type="data" format="tabular" label="Digital Expression Matrix"/>\n+ \t<conditional name="analysis_type">\n+\t\t<param name="analysis" type="select" label="Type Of Analysis">\n+\t\t\t<option value="pw">Pairwise comparisons (1 Factor Analysis)</option>\n+\t\t\t<option value="glm" selected="true">Generalized Linear Models (Multiple Factor Analysis using GLM)</option>\n+\t\t\t<option value="limma">Linear Models for RNA-Seq (Multiple Factor Analysis using LIMMA)</option>\n+\t\t</param>\n+\t\t<when value="pw">\n+\t\t\t<param name="rowsumfilter" type="integer" value="5" label="Common Dispersion Rowsum Filter" help="Numeric scalar giving a value for the filtering out of low abundance tags in the estimation of the common dispersion. Only tags with total sum of counts above this value are used in the estimation of the common dispersion. Low abundance tags can adversely affect the estimation of the common dispersion, so this argument allows the user to select an appropriate filter threshold for the tag abundance."/>\n+\t\t\t<conditional name="tagwise_disp">\n+\t\t\t\t<param name="twd" type="select" label="Maximize the Negative Binomial Weighted Conditional Likelihood" help="Calculate and use an estimate of the dispersion parameter for each tag">\n+\t\t\t\t\t<option value="TRUE" selected="true">True</option>\n+\t\t\t\t\t<option value="FALSE">False</option>\n+\t\t\t\t</param>\n+\t\t\t\t<when value="TRUE">\n+\t\t\t\t\t<param name="twd_trend" type="select" label="Method for allowing the prior distribution for the dispersion to be abundance-dependent">\n+\t\t\t\t\t\t<option value="movingave" selected="true">Movingave</option>\n+\t\t\t\t\t\t<option value="tricube">Tricube</option>\n+\t\t\t\t\t\t<option value="none">None</option>\n+\t\t\t\t\t</param>\n+\t\t\t\t\t<param name="twd_prop" type="float" value="0.3" label="The proportion of all tags/genes to be used for the locally weighted estimation of the tagwise dispersion, allowing the dispersion estimates to vary with abundance (expression level)"/>\n+\t\t\t\t</when>\n+\t\t\t</conditional>\n+\t\t</when>\n+\t\t<when value="glm">\n+\t\t\t<param name="disp" type="select" label="Select The Dispersion Estimate To Use:">\n+\t\t\t\t<option value="common">Common Dispersion</option>\n+\t\t\t\t<option value="trend">Trended Dispersion</option>\n+\t\t\t\t<option value="tag" selected="true">Tagwise Dispersion</option>\n+\t\t\t</param>\n+\t\t\t<repeat name="factors" title="Factor">\n+\t\t\t\t<param name="fact_name" title="Factor Name" type="text" label="Name Of Factor (no spaces or commas)"/>\n+\t\t\t\t<param name="fact" title="Factor" type="text" size="100" label="The Level Of Each Sample Seperated By A Colon (no space'..b'sed Digital Gene Expression Matrix."/>\n+\t\t\t\t</when>\n+\t\t\t</conditional>\n+\t\t</when>\n+\t</conditional>\n+\t<param name="fdr" type="select" label="False discovery rate adjustment method">\n+\t\t<option value="BH">Benjamini and Hochberg (1995)</option>\n+\t\t<option value="holm">Holm (1979)</option>\n+\t\t<option value="hochberg">Hochberg (1988)</option>\n+\t\t<option value="hommel">Hommel (1988)</option>\n+\t\t<option value="BY">Benjamini and Yekutieli (2001)</option>\n+\t\t<option value="none">None</option>\n+\t</param>\n+ </inputs>\n+ \n+ <outputs>\n+ \t<data format="tabular" name="output" label="EdgeR analysis on ${matrix.name}"/>\n+ \t<data name="html_file" format="html" label="EdgeR analysis plots for ${matrix.name}"/>\n+ \t<data name="norm_exp" format="tabular" label="EdgeR Norm Expr Matrix for ${matrix.name}">\n+ \t\t<filter>analysis_type[ "analysis" ] != "pw" and analysis_type[ "exp" ][ "export_norm" ] == "true"</filter>\n+ \t</data>\n+ </outputs>\n+ \t\n+\t<help>\n+\n+.. class:: infomark\n+ \n+**What it does**\n+\n+Estimates differential gene expression for short read sequence count using methods appropriate for count data.\n+If you have paired data you may also want to consider Tophat/Cufflinks. \n+Input must be raw count data for each sequence arranged in a rectangular matrix as a tabular file.\n+Note - no scaling - please make sure you have untransformed raw counts of reads for each sequence.\n+ \n+Performs digital differential gene expression analysis between groups (eg a treatment and control).\n+Biological replicates provide information about experimental variability required for reliable inference.\n+\n+**What it does not do**\n+edgeR_ requires biological replicates. \n+Without replicates you can\'t account for known important experimental sources of variability that the approach implemented here requires.\n+\n+**Input**\n+A count matrix containing sequence names as rows and sample specific counts of reads from this sequence as columns.\n+The matrix must have 2 header rows, the first indicating the group assignment and the second uniquely identifiying the samples. It must also contain a unique set of (eg Feature) names in the first column. \n+\n+Example::\n+\n+\t#\tG1:Mut\tG1:Mut\tG1:Mut\tG2:WT\tG2:WT\tG2:WT\n+\t#Feature\tSpl1\tSpl2\tSpl3\tSpl4\tSpl5\tSpl6\n+\tNM_001001130\t97\t43\t61\t34\t73\t26\n+\tNM_001001144\t25\t8\t9\t3\t5\t5\n+\tNM_001001152\t72\t45\t29\t20\t31\t13\n+\tNM_001001160\t0\t1\t1\t1\t0\t0\n+\tNM_001001177\t0\t1\t0\t4\t3\t3\n+\tNM_001001178\t0\t2\t1\t0\t4\t0\n+\tNM_001001179\t0\t0\t0\t0\t0\t2\n+\tNM_001001180\t0\t0\t0\t0\t0\t2\n+\tNM_001001181\t415\t319\t462\t185\t391\t155\n+\tNM_001001182\t1293\t945\t987\t297\t938\t496\n+\tNM_001001183\t5\t4\t11\t7\t11\t2\n+\tNM_001001184\t135\t198\t178\t110\t205\t64\n+\tNM_001001185\t186\t1\t0\t1\t1\t0\n+\tNM_001001186\t75\t90\t91\t34\t63\t54\n+\tNM_001001187\t267\t236\t170\t165\t202\t51\n+\tNM_001001295\t5\t2\t6\t1\t7\t0\n+\tNM_001001309\t1\t0\t0\t1\t2\t1\n+\t...\n+\t\n+\n+Please use the "Count reads in features with htseq-count" tool to generate the count matrix.\n+\n+**Output**\n+\n+A tabular file containing relative expression levels, statistical estimates of differential expression probability, R scripts, log, and some helpful diagnostic plots.\n+\n+.. class:: infomark\n+\n+**Attribution**\n+This tool wraps the edgeR_ Bioconductor package so all calculations and plots are controlled by that code. See edgeR_ for all documentation and appropriate attribution. \n+Recommended reference is Mark D. Robinson, Davis J. McCarthy, Gordon K. Smyth, PMCID: PMC2796818\n+\n+.. class:: infomark\n+\n+**Attribution**\n+When applying the LIMMA (Linear models for RNA-Seq) anlysis the tool also makes use of the limma_ Bioconductor package.\n+Recommended reference is Smyth, G. K. (2005). Limma: linear models for microarray data. In: \'Bioinformatics and Computational Biology Solutions using R and Bioconductor\'. R. Gentleman, V. Carey, S. Dudoit, R. Irizarry, W. Huber (eds), Springer, New York, pages 397--420.\n+\n+ .. _edgeR: http://www.bioconductor.org/packages/release/bioc/html/edgeR.html\n+ .. _limma: http://www.bioconductor.org/packages/release/bioc/html/limma.html\n+\n+\t</help>\n+ \n+</tool>\n' |