limma_voom: limma_voom.xml comparison

comparison limma_voom.xml @ 0:bdebdea5f6a7 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/limma_voom commit 2f34a215c35f08c3666f314a87d235437baa1d21

author	iuc
date	Mon, 12 Jun 2017 07:41:02 -0400
parents
children	76d01fe0ec36

comparison

equal deleted inserted replaced

--1:000000000000
+:bdebdea5f6a7
+<tool id="limma_voom" name="limma-voom" version="1.1.1">
+<description>
+Differential expression with optional sample weights
+</description>
+<requirements>
+<requirement type="package" version="3.16.5">bioconductor-edger</requirement>
+<requirement type="package" version="3.30.13">bioconductor-limma</requirement>
+<requirement type="package" version="1.4.29">r-statmod</requirement>
+<requirement type="package" version="0.4.1">r-scales</requirement>
+</requirements>
+<version_command>
+<![CDATA[
+echo $(R --version | grep version | grep -v GNU)", limma version" $(R --vanilla --slave -e "library(limma); cat(sessionInfo()\$otherPkgs\$limma\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", edgeR version" $(R --vanilla --slave -e "library(edgeR); cat(sessionInfo()\$otherPkgs\$edgeR\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
+]]>
+</version_command>
+<command detect_errors="exit_code">
+<![CDATA[
+Rscript '$__tool_directory__/limma_voom.R'
+'$counts'
+#if $anno.annoOpt=='yes':
+'$geneanno'
+#else:
+None
+#end if
+'$outReport'
+'$outReport.files_path'
+$rdaOption
+$normalisationOption
+$weightOption
+'$contrast'
+#if $filterCPM.filterLowCPM=='yes':
+'$filterCPM.cpmReq'
+'$filterCPM.sampleReq'
+#else:
+0
+0
+#end if
+#if $testOpt.wantOpt=='yes':
+'$testOpt.pAdjust'
+'$testOpt.pVal'
+'$testOpt.lfc'
+#else:
+"BH"
+0.05
+0
+#end if
+'$factName::$factLevel'
+&&
+mkdir ./output_dir
+&&
+mv '$outReport.files_path'/*.tsv output_dir/
+]]>
+</command>
+<inputs>
+<param name="counts" type="data" format="tabular" label="Counts Data"/>
+<conditional name="anno">
+<param name="annoOpt" type="select"
+label="Use Gene Annotations?"
+help="If an annotation file is provided, annotations will be added to the table of differential expression results to provide descriptions for each gene.">
+<option value="no">No</option>
+<option value="yes">Yes</option>
+</param>
+<when value="yes">
+<param name="geneanno" type="data" format="tabular" label="Gene Annotations"/>
+</when>
+<when value="no" />
+</conditional>
+<!--*Code commented until solution for multiple factors is found*
+<repeat name="factors" title="Factors" min="1" max="5" default="1">
+<param name="factName" type="text" label="Factor Name (No spaces)"
+help="Eg. Genotype"/>
+<param name="factLevel" type="text" size="100"
+label="Factor Levels (No spaces)"
+help="Eg. WT,WT,Mut,Mut,WT"/>
+</repeat>
+-->
+<param name="factName" type="text" label="Factor Name" help="Eg. Genotype."/>
+<param name="factLevel" type="text" label="Factor Values"
+help="Eg. WT,WT,WT,Mut,Mut,Mut
+NOTE: Please ensure that the same levels are typed identically with cases matching."/>
+<param name="contrast" type="text" label="Contrasts of interest" help="Eg. Mut-WT,KD-Control"/>
+<conditional name="filterCPM">
+<param name="filterLowCPM" type="select" label="Filter Low CPM?"
+help="Treat genes with very low expression as unexpressed and filter out to speed up computation.">
+<option value="yes" selected="True">Yes</option>
+<option value="no">No</option>
+</param>
+<when value="yes">
+<param name="cpmReq" type="float" value="0.5" min="0" label="Minimum CPM"/>
+<param name="sampleReq" type="integer" value="1" min="0" label="Minimum Samples"
+help="Filter out all the genes that do not meet the minimum CPM in at least this many samples."/>
+</when>
+<when value="no"/>
+</conditional>
+<param name="weightOption" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Apply sample weights?"
+help="Apply weights if outliers are present.">
+</param>
+<param name="normalisationOption" type="select" label="Normalisation Method">
+<option value="TMM">TMM</option>
+<option value="RLE">RLE</option>
+<option value="upperquartile">Upperquartile</option>
+<option value="none">None (Don't normalise)</option>
+</param>
+<param name="rdaOption" type="boolean" truevalue="yes" falsevalue="no" checked="false"
+label="Output RData?"
+help="Output all the data used by R to construct the plots and tables, can be loaded into R. A link to the RData file will be provided in the HTML report.">
+</param>
+<conditional name="testOpt">
+<param name="wantOpt" type="select" label="Use Advanced Testing Options?"
+help="Enable choices for p-value adjustment method, p-value threshold and log2-fold-change threshold.">
+<option value="no" selected="True">No</option>
+<option value="yes">Yes</option>
+</param>
+<when value="yes">
+<param name="pAdjust" type="select" label="P-Value Adjustment Method.">
+<option value="BH">Benjamini and Hochberg (1995)</option>
+<option value="BY">Benjamini and Yekutieli (2001)</option>
+<option value="holm">Holm (1979)</option>
+<option value="none">None</option>
+</param>
+<param name="pVal" type="float" value="0.05" min="0" max="1"
+label="Adjusted Threshold"
+help="Genes below this threshold are considered significant and highlighted in the MA plot. If either BH(1995) or BY(2001) were selected then this value is a false-discovery-rate control. If Holm(1979) was selected then this is an adjusted p-value for family-wise error rate."/>
+<param name="lfc" type="float" value="0" min="0"
+label="Minimum log2-fold-change Required"
+help="Genes above this threshold and below the p-value threshold are considered significant and highlighted in the MA plot."/>
+</when>
+<when value="no"/>
+</conditional>
+</inputs>
+<outputs>
+<data format="html" name="outReport" label="${tool.name} on ${on_string}: Report" />
+<collection name="voom_results" type="list" label="${tool.name} on ${on_string}: DE genes">
+<discover_datasets pattern="(?P&lt;name&gt;.+)\.tsv$" format="tabular" directory="output_dir" visible="false" />
+</collection>
+</outputs>
+<tests>
+<test>
+<param name="counts" value="matrix.txt" />
+<param name="factName" value="Genotype" />
+<param name="factLevel" value="WT,WT,WT,Mut,Mut,Mut" />
+<param name="contrast" value="Mut-WT,WT-Mut" />
+<param name="normalisationOption" value="TMM" />
+<output_collection name="voom_results" count="2">
+<element name="limma-voom_Mut-WT" ftype="tabular" file="limma-voom_Mut-WT.tsv" />
+<element name="limma-voom_WT-Mut" ftype="tabular" file="limma-voom_WT-Mut.tsv" />
+</output_collection>
+<output name="outReport" >
+<assert_contents>
+<has_text text="Limma-voom Analysis Output" />
+<not_has_text text="RData" />
+</assert_contents>
+</output>
+</test>
+<test>
+<param name="annoOpt" value="yes" />
+<param name="geneanno" value="anno.txt" />
+<param name="counts" value="matrix.txt" />
+<param name="factName" value="Genotype" />
+<param name="factLevel" value="WT,WT,WT,Mut,Mut,Mut" />
+<param name="contrast" value="Mut-WT" />
+<param name="normalisationOption" value="TMM" />
+<output_collection name="voom_results" >
+<element name="limma-voom_Mut-WT" ftype="tabular" file="limma-voom_Mut-WTanno.tsv" />
+</output_collection>
+</test>
+<test>
+<param name="rdaOption" value="yes" />
+<param name="counts" value="matrix.txt" />
+<param name="factName" value="Genotype" />
+<param name="factLevel" value="WT,WT,WT,Mut,Mut,Mut" />
+<param name="contrast" value="Mut-WT" />
+<param name="normalisationOption" value="TMM" />
+<output name="outReport" >
+<assert_contents>
+<has_text text="RData" />
+</assert_contents>
+</output>
+</test>
+</tests>
+<help>
+<![CDATA[
+.. class:: infomark
+**What it does**
+Given a matrix of counts (e.g. from featureCounts) and optional information about the genes, this tool
+produces plots and tables useful in the analysis of differential gene
+expression.
+-----
+**Inputs**
+**Counts Data:**
+A matrix of counts, with rows corresponding to genes
+and columns corresponding to counts for the samples.
+Values must be tab separated, with the first row containing the sample/column
+labels and the first column containing the row/gene labels.
+Example:
+========== ======= ======= ======= ======== ======== ========
+**GeneID** **WT1** **WT2** **WT3** **Mut1** **Mut2** **Mut3**
+---------- ------- ------- ------- -------- -------- --------
+11287      1699    1528    1601    1463     1441     1495
+11298      1905    1744    1834    1345     1291     1346
+11302      6       8       7       5        6        5
+11303      2099    1974    2100    1574     1519     1654
+11304      356     312     337     361      397      346
+11305      2528    2438    2493    1762     1942     2027
+========== ======= ======= ======= ======== ======== ========
+**Gene Annotations:**
+Optional input for gene annotations, this can contain more
+information about the genes than just an ID number. The annotations will
+be avaiable in the differential expression results table.
+Example:
+==========  ==========  ===================================================
+**GeneID**  **Symbol**  **GeneName**
+----------  ----------  ---------------------------------------------------
+1287        Pzp         pregnancy zone protein
+1298        Aanat       arylalkylamine N-acetyltransferase
+1302        Aatk        apoptosis-associated tyrosine kinase
+1303        Abca1       ATP-binding cassette, sub-family A (ABC1), member 1
+1304        Abca4       ATP-binding cassette, sub-family A (ABC1), member 4
+1305        Abca2       ATP-binding cassette, sub-family A (ABC1), member 2
+==========  ==========  ===================================================
+**Factor Name:**
+The name of the factor being investigated. This tool currently assumes
+that only one factor is of interest.
+**Factor Levels:**
+The levels of the factor of interest, this must be entered in the same
+order as the samples to which the levels correspond as listed in the
+columns of the counts matrix.
+The values should be seperated by commas, and spaces must not be used.
+**Contrasts of Interest:**
+The contrasts you wish to make between levels.
+A common contrast would be a simple difference between two levels: "Mut-WT"
+represents the difference between the mutant and wild type genotypes.
+The values should be seperated by commas and spaces must not be used.
+**Filter Low CPM:**
+Option to ignore the genes that do not show significant levels of
+expression, this filtering is dependent on two criteria:
+* **Minimum CPM:** This is the counts per million that a gene must have in at
+least some specified number of samples.
+* **Minumum Samples:** This is the number of samples in which the CPM
+requirement must be met in order for that gene to be acknowledged.
+Only genes that exhibit a CPM greater than the required amount in at least the
+number of samples specified will be used for analysis. Care should be taken to
+ensure that the sample requirement is appropriate. In the case of an experiment
+with two experimental groups each with two members, if there is a change from
+insignificant cpm to significant cpm but the sample requirement is set to 3,
+then this will cause that gene to fail the criteria. When in doubt simply do not
+filter.
+**Normalisation Method:**
+Option for using different methods to rescale the raw library
+size. For more information, see calcNormFactor section in the edgeR_ user's
+manual.
+**Apply Sample Weights:**
+Option to downweight outlier samples such that their information is still
+used in the statistical analysis but their impact is reduced. Use this
+whenever significant outliers are present. The MDS plotting tool in this package
+is useful for identifying outliers. For more information on this option see Liu et al. (2015).
+**Use Advanced Testing Options?:**
+By default error rate for multiple testing is controlled using Benjamini and
+Hochberg's false discovery rate control at a threshold value of 0.05. However
+there are options to change this to custom values.
+* **P-Value Adjustment Method:**
+Change the multiple testing control method, the options are BH(1995) and
+BY(2001) which are both false discovery rate controls. There is also
+Holm(1979) which is a method for family-wise error rate control.
+* **Adjusted Threshold:**
+Set the threshold for the resulting value of the multiple testing control
+method. Only observations whose statistic falls below this value is
+considered significant, thus highlighted in the MA plot.
+* **Minimum log2-fold-change Required:**
+In addition to meeting the requirement for the adjusted statistic for
+multiple testing, the observation must have an absolute log2-fold-change
+greater than this threshold to be considered significant, thus highlighted
+in the MA plot.
+-----
+**Citations:**
+.. class:: infomark
+limma
+Please cite the paper below for the limma software itself.  Please also try
+to cite the appropriate methodology articles that describe the statistical
+methods implemented in limma, depending on which limma functions you are
+using.  The methodology articles are listed in Section 2.1 of the limma
+User's Guide.
+* Smyth GK (2005). Limma: linear models for microarray data. In:
+'Bioinformatics and Computational Biology Solutions using R and
+Bioconductor'. R. Gentleman, V. Carey, S. Dudoit, R. Irizarry,
+W. Huber (eds), Springer, New York, pages 397-420.
+* Law CW, Chen Y, Shi W, and Smyth GK (2014). Voom:
+precision weights unlock linear model analysis tools for
+RNA-seq read counts. Genome Biology 15, R29.
+* Liu R, Holik AZ, Su S, Jansz N, Chen K, Leong HS, Blewitt ME, Asselin-Labat ML, Smyth GK, Ritchie ME (2015). Why weight? Modelling sample and observational level variability improves power in RNA-seq analyses. Nucleic Acids Research, 43(15), e97.
+* Ritchie, M. E., Diyagama, D., Neilson, J., van Laar, R., Dobrovic,
+A., Holloway, A., and Smyth, G. K. (2006). Empirical array quality weights
+for microarray data. BMC Bioinformatics 7, Article 261.
+.. class:: infomark
+edgeR
+Please cite the first paper for the software itself and the other papers for
+the various original statistical methods implemented in edgeR.  See
+Section 1.2 in the User's Guide for more detail.
+* Robinson MD, McCarthy DJ and Smyth GK (2010). edgeR: a Bioconductor
+package for differential expression analysis of digital gene expression
+data. Bioinformatics 26, 139-140
+* Robinson MD and Smyth GK (2007). Moderated statistical tests for assessing
+differences in tag abundance. Bioinformatics 23, 2881-2887
+* Robinson MD and Smyth GK (2008). Small-sample estimation of negative
+binomial dispersion, with applications to SAGE data.
+Biostatistics, 9, 321-332
+* McCarthy DJ, Chen Y and Smyth GK (2012). Differential expression analysis
+of multifactor RNA-Seq experiments with respect to biological variation.
+Nucleic Acids Research 40, 4288-4297
+Please report problems or suggestions to: su.s@wehi.edu.au
+.. _edgeR: http://www.bioconductor.org/packages/release/bioc/html/edgeR.html
+.. _limma: http://www.bioconductor.org/packages/release/bioc/html/limma.html
+]]>
+</help>
+<citations>
+<citation type="doi">10.1093/nar/gkv412</citation>
+</citations>
+</tool>

Mercurial > repos > iuc > limma_voom

comparison limma_voom.xml @ 0:bdebdea5f6a7 draft