Mercurial > repos > iuc > deseq2
changeset 18:3bf1b3ec1ddf draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 448dccb0c02aba00d8301247b0f0f406ab3d4fa2
author | iuc |
---|---|
date | Fri, 16 Nov 2018 14:47:19 -0500 |
parents | d9e5cadc7f0b |
children | c56e0689e46e |
files | deseq2.R deseq2.xml |
diffstat | 2 files changed, 84 insertions(+), 5 deletions(-) [+] |
line wrap: on
line diff
--- a/deseq2.R Wed Sep 05 15:54:03 2018 -0400 +++ b/deseq2.R Fri Nov 16 14:47:19 2018 -0500 @@ -49,6 +49,8 @@ "batch_factors", "", 1, "character", "outfile", "o", 1, "character", "countsfile", "n", 1, "character", + "rlogfile", "r", 1, "character", + "vstfile", "v", 1, "character", "header", "H", 0, "logical", "factors", "f", 1, "character", "files_to_labels", "l", 1, "character", @@ -56,6 +58,7 @@ "tximport", "i", 0, "logical", "txtype", "y", 1, "character", "tx2gene", "x", 1, "character", # a space-sep tx-to-gene map or GTF file (auto detect .gtf/.GTF) + "esf", "e", 1, "character", "fit_type", "t", 1, "integer", "many_contrasts", "m", 0, "logical", "outlier_replace_off" , "a", 0, "logical", @@ -188,7 +191,10 @@ } dds <- get_deseq_dataset(sampleTable, header=opt$header, designFormula=designFormula, tximport=opt$tximport, txtype=opt$txtype, tx2gene=opt$tx2gene) - +# estimate size factors for the chosen method +if(!is.null(opt$esf)){ + dds <- estimateSizeFactors(dds, type=opt$esf) +} apply_batch_factors <- function (dds, batch_factors) { rownames(batch_factors) <- batch_factors$identifier batch_factors <- subset(batch_factors, select = -c(identifier, condition)) @@ -284,6 +290,19 @@ write.table(normalizedCounts, file=opt$countsfile, sep="\t", col.names=NA, quote=FALSE) } +if (!is.null(opt$rlogfile)) { + rLogNormalized <-rlogTransformation(dds) + rLogNormalizedMat <- assay(rLogNormalized) + write.table(rLogNormalizedMat, file=opt$rlogfile, sep="\t", col.names=NA, quote=FALSE) +} + +if (!is.null(opt$vstfile)) { + vstNormalized<-varianceStabilizingTransformation(dds) + vstNormalizedMat <- assay(vstNormalized) + write.table(vstNormalizedMat, file=opt$vstfile, sep="\t", col.names=NA, quote=FALSE) +} + + if (is.null(opt$many_contrasts)) { # only contrast the first and second level of the primary factor ref <- allLevels[1]
--- a/deseq2.xml Wed Sep 05 15:54:03 2018 -0400 +++ b/deseq2.xml Fri Nov 16 14:47:19 2018 -0500 @@ -1,4 +1,4 @@ -<tool id="deseq2" name="DESeq2" version="2.11.40.2"> +<tool id="deseq2" name="DESeq2" version="2.11.40.3"> <description>Determines differentially expressed features from count tables</description> <requirements> <requirement type="package" version="1.18.1">bioconductor-deseq2</requirement> @@ -43,6 +43,12 @@ #if $normCounts: -n '$counts_out' #end if + #if $normRLog: + -r '$rlog_out' + #end if + #if $normVST: + -v '$vst_out' + #end if #set $filename_to_element_identifiers = {} #set $temp_factor_names = list() #for $factor in $rep_factorName: @@ -63,8 +69,11 @@ -f '#echo json.dumps(temp_factor_names)#' -l '#echo json.dumps(filename_to_element_identifiers)#' + #if $esf: + -e $esf + #end if -t $fit_type - #if $batch_factors + #if $batch_factors: --batch_factors '$batch_factors' #end if #if $outlier_replace_off: @@ -142,9 +151,26 @@ help="output an additional PDF files" /> <param name="normCounts" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Output normalized counts table" /> + <param name="normRLog" type="boolean" truevalue="1" falsevalue="0" checked="false" + label="Output rLog normalized table" /> + <param name="normVST" type="boolean" truevalue="1" falsevalue="0" checked="false" + label="Output VST normalized table" /> <param name="many_contrasts" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Output all levels vs all levels of primary factor (use when you have >2 levels for primary factor)" help=" DESeq2 performs independent filtering by default using the mean of normalized counts as a filter statistic" /> + <param name="esf" type="select" label="(Optional) Method for estimateSizeFactors" + help="Method for estimation: either 'ratio', 'poscounts', or 'iterate'. 'ratio' uses the standard median ratio method introduced in DESeq. + The size factor is the median ratio of the sample over a 'pseudosample': for each gene, the geometric mean of all samples. + 'poscounts' and 'iterate' offer alternative estimators, which can be used even when all genes contain a sample with a zero (a problem + for the default method, as the geometric mean becomes zero, and the ratio undefined). The 'poscounts' estimator deals with a gene with + some zeros, by calculating a modified geometric mean by taking the n-th root of the product of the non-zero counts. This evolved out of + use cases with Paul McMurdie's phyloseq package for metagenomic samples. The 'iterate' estimator iterates between estimating the dispersion + with a design of ~1, and finding a size factor vector by numerically optimizing the likelihood of the ~1 model."> + <option value="" selected="true">No Selection (use default)</option> + <option value="ratio">ratio</option> + <option value="poscounts">poscounts</option> + <option value="iterate">iterate</option> + </param> <param name="fit_type" type="select" label="Fit type"> <option value="1" selected="true">parametric</option> <option value="2">local</option> @@ -180,10 +206,16 @@ <data format="tabular" name="counts_out" label="Normalized counts file on ${on_string}"> <filter>normCounts == True</filter> </data> + <data format="tabular" name="rlog_out" label="rLog-Normalized counts file on ${on_string}"> + <filter>normRLog == True</filter> + </data> + <data format="tabular" name="vst_out" label="VST-Normalized counts file on ${on_string}"> + <filter>normVST == True</filter> + </data> </outputs> <tests> <!--Ensure counts files with header works --> - <test expect_num_outputs="2"> + <test expect_num_outputs="4"> <repeat name="rep_factorName"> <param name="factorName" value="Treatment"/> <repeat name="rep_factorLevel"> @@ -197,12 +229,26 @@ </repeat> <param name="pdf" value="False"/> <param name="normCounts" value="True"/> + <param name="normRLog" value="True"/> + <param name="normVST" value="True"/> <output name="counts_out"> <assert_contents> <has_text_matching expression="GSM461176_untreat_single.counts\tGSM461177_untreat_paired.counts\tGSM461178_untreat_paired.counts\tGSM461182_untreat_single.counts\tGSM461179_treat_single.counts\tGSM461180_treat_paired.counts\tGSM461181_treat_paired.counts" /> <has_text_matching expression="FBgn0000003\t0\t0\t0\t0\t0\t0\t0" /> </assert_contents> </output> + <output name="rlog_out"> + <assert_contents> + <has_text_matching expression="GSM461176_untreat_single.counts\tGSM461177_untreat_paired.counts\tGSM461178_untreat_paired.counts\tGSM461182_untreat_single.counts\tGSM461179_treat_single.counts\tGSM461180_treat_paired.counts\tGSM461181_treat_paired.counts" /> + <has_text_matching expression="FBgn0000003\t0\t0\t0\t0\t0\t0\t0" /> + </assert_contents> + </output> + <output name="vst_out"> + <assert_contents> + <has_text_matching expression="GSM461176_untreat_single.counts\tGSM461177_untreat_paired.counts\tGSM461178_untreat_paired.counts\tGSM461182_untreat_single.counts\tGSM461179_treat_single.counts\tGSM461180_treat_paired.counts\tGSM461181_treat_paired.counts" /> + <has_text_matching expression="FBgn0000003\t5.*\t5.*\t5.*\t5.*\t5.*\t5.*\t5.*" /> + </assert_contents> + </output> <output name="deseq_out" > <assert_contents> <has_text_matching expression="FBgn0003360\t1933.9504.*\t-2.8399.*\t0.1309.*-21.6851.*2.831.*8.024" /> @@ -232,7 +278,7 @@ </output> </test> <!--Ensure counts files without header works --> - <test expect_num_outputs="2"> + <test expect_num_outputs="4"> <repeat name="rep_factorName"> <param name="factorName" value="Treatment"/> <repeat name="rep_factorLevel"> @@ -247,12 +293,26 @@ <param name="header" value="False"/> <param name="pdf" value="False"/> <param name="normCounts" value="True"/> + <param name="normRLog" value="True"/> + <param name="normVST" value="True"/> <output name="counts_out"> <assert_contents> <has_text_matching expression="GSM461176_untreat_single.counts.noheader\tGSM461177_untreat_paired.counts.noheader\tGSM461178_untreat_paired.counts.noheader\tGSM461182_untreat_single.counts.noheader\tGSM461179_treat_single.counts.noheader\tGSM461180_treat_paired.counts.noheader\tGSM461181_treat_paired.counts.noheader" /> <has_text_matching expression="FBgn0000003\t0\t0\t0\t0\t0\t0\t0" /> </assert_contents> </output> + <output name="rlog_out"> + <assert_contents> + <has_text_matching expression="GSM461176_untreat_single.counts.noheader\tGSM461177_untreat_paired.counts.noheader\tGSM461178_untreat_paired.counts.noheader\tGSM461182_untreat_single.counts.noheader\tGSM461179_treat_single.counts.noheader\tGSM461180_treat_paired.counts.noheader\tGSM461181_treat_paired.counts.noheader" /> + <has_text_matching expression="FBgn0000003\t0\t0\t0\t0\t0\t0\t0" /> + </assert_contents> + </output> + <output name="vst_out"> + <assert_contents> + <has_text_matching expression="GSM461176_untreat_single.counts.noheader\tGSM461177_untreat_paired.counts.noheader\tGSM461178_untreat_paired.counts.noheader\tGSM461182_untreat_single.counts.noheader\tGSM461179_treat_single.counts.noheader\tGSM461180_treat_paired.counts.noheader\tGSM461181_treat_paired.counts.noheader" /> + <has_text_matching expression="FBgn0000003\t5.*\t5.*\t5.*\t5.*\t5.*\t5.*\t5.*" /> + </assert_contents> + </output> <output name="deseq_out" > <assert_contents> <has_text_matching expression="FBgn0003360\t1933.9504.*\t-2.8399.*\t0.1309.*-21.6851.*2.831.*8.024" />