Mercurial > repos > iuc > deseq2
changeset 29:cd9874cb9019 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit cbeb1c4c436be04323bd9a809a6393d00b168d07"
author | iuc |
---|---|
date | Mon, 29 Nov 2021 18:16:48 +0000 |
parents | 7ff33c2d525b |
children | 8fe98f7094de |
files | deseq2.R deseq2.xml deseq2_macros.xml |
diffstat | 3 files changed, 96 insertions(+), 1 deletions(-) [+] |
line wrap: on
line diff
--- a/deseq2.R Fri Nov 19 21:03:55 2021 +0000 +++ b/deseq2.R Mon Nov 29 18:16:48 2021 +0000 @@ -52,6 +52,7 @@ "batch_factors", "w", 1, "character", "outfile", "o", 1, "character", "countsfile", "n", 1, "character", + "sizefactorsfile", "F", 1, "character", "rlogfile", "r", 1, "character", "vstfile", "v", 1, "character", "header", "H", 0, "logical", @@ -217,6 +218,30 @@ if (!is.null(opt$esf)) { dds <- estimateSizeFactors(dds, type = opt$esf) } + +# estimate size factors for each sample +# - https://support.bioconductor.org/p/97676/ +if (!is.null(opt$sizefactorsfile)) { + nm <- assays(dds)[["avgTxLength"]] + if (!is.null(nm)) { + ## Recommended: takes into account tximport data + cat("\nsize factors for samples: taking tximport data into account\n") + size_factors <- estimateSizeFactorsForMatrix(counts(dds) / nm) + } else { + norm_factors <- normalizationFactors(dds) + if (!is.null(norm_factors)) { + ## In practice, gives same results as above. + cat("\nsize factors for samples: no tximport data, using derived normalization factors\n") + size_factors <- estimateSizeFactorsForMatrix(norm_factors) + } else { + ## If we have no other information, estimate from raw. + cat("\nsize factors for samples: no tximport data, no normalization factors, estimating from raw data\n") + size_factors <- estimateSizeFactorsForMatrix(counts(dds)) + } + } + write.table(size_factors, file = opt$sizefactorsfile, sep = "\t", col.names = F, quote = FALSE) +} + apply_batch_factors <- function(dds, batch_factors) { rownames(batch_factors) <- batch_factors$identifier batch_factors <- subset(batch_factors, select = -c(identifier, condition))
--- a/deseq2.xml Fri Nov 19 21:03:55 2021 +0000 +++ b/deseq2.xml Mon Nov 29 18:16:48 2021 +0000 @@ -47,6 +47,9 @@ #if 'normCounts' in $output_options.output_selector: -n '$counts_out' #end if + #if 'sizefactors' in $output_options.output_selector: + -F '$sizefactors_out' + #end if #if 'normRLog' in $output_options.output_selector: -r '$rlog_out' #end if @@ -195,6 +198,7 @@ <section name="output_options" title="Output options"> <param name="output_selector" type="select" multiple="True" optional="true" display="checkboxes" label="Output selector"> <option value="pdf" selected="True">Generate plots for visualizing the analysis results</option> + <option value="sizefactors" >Output sample size factors</option> <option value="normCounts">Output normalised counts</option> <option value="normVST">Output VST normalized table</option> <option value="normRLog">Output rLog normalized table</option> @@ -217,6 +221,9 @@ <data name="plots" format="pdf" label="DESeq2 plots on ${on_string}"> <filter>output_options['output_selector'] and 'pdf' in output_options['output_selector']</filter> </data> + <data name="sizefactors_out" format="tabular" label="Size Factors on ${on_string}"> + <filter>output_options['output_selector'] and 'sizefactors' in output_options['output_selector']</filter> + </data> <data name="counts_out" format="tabular" label="Normalized counts file on ${on_string}"> <filter>output_options['output_selector'] and 'normCounts' in output_options['output_selector']</filter> </data> @@ -513,6 +520,65 @@ </assert_contents> </output> </test> + <!-- Same as above alpha_ma test, but with size factors --> + <test expect_num_outputs="2"> + <repeat name="rep_factorName"> + <param name="factorName" value="Treatment"/> + <repeat name="rep_factorLevel"> + <param name="factorLevel" value="Treated"/> + <param name="countsFile" value="sailfish_ensembl/sailfish_quant.sf1.tab,sailfish_ensembl/sailfish_quant.sf2.tab,sailfish_ensembl/sailfish_quant.sf3.tab"/> + </repeat> + <repeat name="rep_factorLevel"> + <param name="factorLevel" value="Untreated"/> + <param name="countsFile" value="sailfish_ensembl/sailfish_quant.sf4.tab,sailfish_ensembl/sailfish_quant.sf5.tab,sailfish_ensembl/sailfish_quant.sf6.tab"/> + </repeat> + </repeat> + <section name="output_options"> + <param name="output_selector" value="sizefactors"/> + <param name="alpha_ma" value="0.05"/> + </section> + <param name="tximport_selector" value="tximport"/> + <param name="txtype" value="sailfish"/> + <param name="mapping_format_selector" value="gtf"/> + <param name="gtf_file" value="Homo_sapiens.GRCh38.94.gtf" ftype="gtf"/> + <output name="sizefactors_out"> + <assert_contents> + <has_text_matching expression="sailfish_quant\.sf4\.tab\t0\.8\d+" /> + <has_text_matching expression="sailfish_quant\.sf3\.tab\t1\.0\d+" /> + </assert_contents> + </output> + </test> + <!-- Same as above alpha_ma size factor test, but with a non-default estimator--> + <test expect_num_outputs="2"> + <repeat name="rep_factorName"> + <param name="factorName" value="Treatment"/> + <repeat name="rep_factorLevel"> + <param name="factorLevel" value="Treated"/> + <param name="countsFile" value="sailfish_ensembl/sailfish_quant.sf1.tab,sailfish_ensembl/sailfish_quant.sf2.tab,sailfish_ensembl/sailfish_quant.sf3.tab"/> + </repeat> + <repeat name="rep_factorLevel"> + <param name="factorLevel" value="Untreated"/> + <param name="countsFile" value="sailfish_ensembl/sailfish_quant.sf4.tab,sailfish_ensembl/sailfish_quant.sf5.tab,sailfish_ensembl/sailfish_quant.sf6.tab"/> + </repeat> + </repeat> + <section name="advanced_options"> + <param name="esf" value="poscounts" /> + </section> + <section name="output_options"> + <param name="output_selector" value="sizefactors"/> + <param name="alpha_ma" value="0.05"/> + </section> + <param name="tximport_selector" value="tximport"/> + <param name="txtype" value="sailfish"/> + <param name="mapping_format_selector" value="gtf"/> + <param name="gtf_file" value="Homo_sapiens.GRCh38.94.gtf" ftype="gtf"/> + <output name="sizefactors_out" > + <assert_contents> + <has_text_matching expression="sailfish_quant\.sf4\.tab\t0\.8\d+" /> + <has_text_matching expression="sailfish_quant\.sf3\.tab\t1\.0\d+" /> + </assert_contents> + </output> + </test> </tests> <help><![CDATA[ .. class:: infomark @@ -608,6 +674,10 @@ which controls false discovery rate (FDR) ====== ========================================================== +By selecting ``Output sample size factors`` in the "Output options" +selection box, the size factors used to normalize the samples can also +be output as a tabular file. + .. _DESeq2: http://master.bioconductor.org/packages/release/bioc/html/DESeq2.html .. _tximport: https://bioconductor.org/packages/devel/bioc/vignettes/tximport/inst/doc/tximport.html ]]></help>
--- a/deseq2_macros.xml Fri Nov 19 21:03:55 2021 +0000 +++ b/deseq2_macros.xml Mon Nov 29 18:16:48 2021 +0000 @@ -33,7 +33,7 @@ </requirements> </xml> <token name="@TOOL_VERSION@">2.11.40.7</token> - <token name="@SUFFIX_VERSION@">0</token> + <token name="@SUFFIX_VERSION@">1</token> <xml name="edam_ontology"> <edam_topics> <edam_topic>topic_3308</edam_topic>