Mercurial > repos > iuc > dexseq
changeset 7:62adf13b86ea draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dexseq commit 06f2c57b523aab7c997d82e1345fd23f178de598"
author | iuc |
---|---|
date | Fri, 19 Mar 2021 09:45:03 +0000 |
parents | 9fd8b69e6e68 |
children | 2872c633f07e |
files | dexseq.R dexseq_helper.py plotdexseq.R |
diffstat | 3 files changed, 100 insertions(+), 96 deletions(-) [+] |
line wrap: on
line diff
--- a/dexseq.R Tue Feb 26 10:50:15 2019 -0500 +++ b/dexseq.R Fri Mar 19 09:45:03 2021 +0000 @@ -1,12 +1,14 @@ ## Setup R error handling to go to stderr -options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) +options(show.error.messages = F, error = function() { + cat(geterrmessage(), file = stderr()); q("no", 1, F) +}) # we need that to not crash galaxy with an UTF8 error on German LC settings. Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") suppressPackageStartupMessages({ library("DEXSeq") - library('getopt') - library('rjson') + library("getopt") + library("rjson") }) @@ -15,107 +17,107 @@ #get options, using the spec as defined by the enclosed list. #we read the options from the default: commandArgs(TRUE). -spec = matrix(c( - 'verbose', 'v', 2, "integer", - 'help', 'h', 0, "logical", - 'gtf', 'a', 1, "character", - 'outfile', 'o', 1, "character", - 'reportdir', 'r', 1, "character", - 'rds', 'd', 1, "character", - 'factors', 'f', 1, "character", - 'threads', 'p', 1, "integer", - 'fdr', 'c', 1, "double" -), byrow=TRUE, ncol=4); -opt = getopt(spec); +spec <- matrix(c( + "verbose", "v", 2, "integer", + "help", "h", 0, "logical", + "gtf", "a", 1, "character", + "outfile", "o", 1, "character", + "reportdir", "r", 1, "character", + "rds", "d", 1, "character", + "factors", "f", 1, "character", + "threads", "p", 1, "integer", + "fdr", "c", 1, "double" +), byrow = TRUE, ncol = 4); +opt <- getopt(spec); # if help was asked for print a friendly message # and exit with a non-zero error code -if ( !is.null(opt$help) ) { - cat(getopt(spec, usage=TRUE)); - q(status=1); +if (!is.null(opt$help)) { + cat(getopt(spec, usage = TRUE)); + q(status = 1); } -trim <- function (x) gsub("^\\s+|\\s+$", "", x) +trim <- function(x) gsub("^\\s+|\\s+$", "", x) opt$samples <- trim(opt$samples) opt$factors <- trim(opt$factors) parser <- newJSONParser() -parser$addData( opt$factors ) -factorsList <- parser$getObject() +parser$addData(opt$factors) +factors_list <- parser$getObject() -sampleTable<-data.frame() -countFiles<-c() -factorNames<-c() -primaryFactor<-"" -for(factor in factorsList){ - factorName<-factor[[1]] - factorNames<-append(factorNames, paste(factorName,"exon",sep=":")) - factorValuesMapList<-factor[[2]] - c = length(factorValuesMapList) - for (factorValuesMap in factorValuesMapList){ - for(files in factorValuesMap){ - for(file in files){ - if(primaryFactor == "") { - countFiles<-append(countFiles,file) +sample_table <- data.frame() +count_files <- c() +factor_names <- c() +primary_factor <- "" +for (factor in factors_list) { + factor_name <- factor[[1]] + factor_names <- append(factor_names, paste(factor_name, "exon", sep = ":")) + factor_values_map_list <- factor[[2]] + c <- length(factor_values_map_list) + for (factorValuesMap in factor_values_map_list) { + for (files in factorValuesMap) { + for (file in files) { + if (primary_factor == "") { + count_files <- append(count_files, file) } - sampleTable[basename(file),factorName]<-paste(c,names(factorValuesMap),sep="_") + sample_table[basename(file), factor_name] <- paste(c, names(factorValuesMap), sep = "_") } } - c = c-1 + c <- c - 1 } - if(primaryFactor == ""){ - primaryFactor <- factorName + if (primary_factor == "") { + primary_factor <- factor_name } } -factorNames<-append(factorNames,"exon") -factorNames<-append(factorNames,"sample") -factorNames<-rev(factorNames) -formulaFullModel <- as.formula(paste("", paste(factorNames, collapse=" + "), sep=" ~ ")) -factorNames <- head(factorNames,-1) -formulaReducedModel <- as.formula(paste("", paste(factorNames, collapse=" + "), sep=" ~ ")) +factor_names <- append(factor_names, "exon") +factor_names <- append(factor_names, "sample") +factor_names <- rev(factor_names) +formula_full_model <- as.formula(paste("", paste(factor_names, collapse = " + "), sep = " ~ ")) +factor_names <- head(factor_names, -1) +formula_reduced_model <- as.formula(paste("", paste(factor_names, collapse = " + "), sep = " ~ ")) -sampleTable -formulaFullModel -formulaReducedModel -primaryFactor -countFiles +sample_table +formula_full_model +formula_reduced_model +primary_factor +count_files opt$reportdir opt$threads getwd() -dxd = DEXSeqDataSetFromHTSeq(countFiles, sampleData=sampleTable, design= formulaFullModel, flattenedfile=opt$gtf) +dxd <- DEXSeqDataSetFromHTSeq(count_files, sampleData = sample_table, design = formula_full_model, flattenedfile = opt$gtf) colData(dxd) dxd <- estimateSizeFactors(dxd) print("Estimated size factors") sizeFactors(dxd) -BPPARAM=MulticoreParam(workers=opt$threads) -dxd <- estimateDispersions(dxd, formula=formulaFullModel, BPPARAM=BPPARAM) +bpparam <- MulticoreParam(workers = opt$threads) +dxd <- estimateDispersions(dxd, formula = formula_full_model, BPPARAM = bpparam) print("Estimated dispersions") -dxd <- testForDEU(dxd, reducedModel=formulaReducedModel, fullModel=formulaFullModel, BPPARAM=BPPARAM) +dxd <- testForDEU(dxd, reducedModel = formula_reduced_model, fullModel = formula_full_model, BPPARAM = bpparam) print("tested for DEU") -dxd <- estimateExonFoldChanges(dxd, fitExpToVar=primaryFactor, BPPARAM=BPPARAM) +dxd <- estimateExonFoldChanges(dxd, fitExpToVar = primary_factor, BPPARAM = bpparam) print("Estimated fold changes") res <- DEXSeqResults(dxd) print("Results") table(res$padj <= opt$fdr) -resSorted <- res[order(res$padj),] -head(resSorted) +res_sorted <- res[order(res$padj), ] +head(res_sorted) -export_table <- as.data.frame(resSorted) +export_table <- as.data.frame(res_sorted) last_column <- ncol(export_table) -for(i in 1:nrow(export_table)) { - export_table[i,last_column] <- paste(export_table[i,last_column][[1]],collapse=", ") +for (i in seq_len(nrow(export_table))) { + export_table[i, last_column] <- paste(export_table[i, last_column][[1]], collapse = ", ") } -write.table(export_table, file = opt$outfile, sep="\t", quote = FALSE, col.names = FALSE) +write.table(export_table, file = opt$outfile, sep = "\t", quote = FALSE, col.names = FALSE) print("Written Results") -if ( !is.null(opt$rds) ) { - saveRDS(res, file="DEXSeqResults.rds") +if (!is.null(opt$rds)) { + saveRDS(res, file = "DEXSeqResults.rds") } -if ( !is.null(opt$reportdir) ) { - DEXSeqHTML(res, fitExpToVar=primaryFactor, path=opt$reportdir, FDR=opt$fdr, color=c("#B7FEA0", "#FF8F43", "#637EE9", "#FF0000", "#F1E7A1", "#C3EEE7","#CEAEFF", "#EDC3C5", "#AAA8AA")) +if (!is.null(opt$reportdir)) { + DEXSeqHTML(res, fitExpToVar = primary_factor, path = opt$reportdir, FDR = opt$fdr, color = c("#B7FEA0", "#FF8F43", "#637EE9", "#FF0000", "#F1E7A1", "#C3EEE7", "#CEAEFF", "#EDC3C5", "#AAA8AA")) } sessionInfo()
--- a/dexseq_helper.py Tue Feb 26 10:50:15 2019 -0500 +++ b/dexseq_helper.py Fri Mar 19 09:45:03 2021 +0000 @@ -1,4 +1,4 @@ -def validate_input( trans, error_map, param_values, page_param_map ): +def validate_input(trans, error_map, param_values, page_param_map): """ Validates the user input, before execution. """ @@ -13,7 +13,7 @@ if fn in factor_name_list: factor_duplication = True break - factor_name_list.append( fn ) + factor_name_list.append(fn) level_name_list = list() @@ -22,15 +22,15 @@ fl = factor[level] if fl in level_name_list: level_duplication = True - level_name_list.append( fl ) + level_name_list.append(fl) if level_duplication: - error_map['rep_factorName'] = [ dict() for t in factors ] - for i in range( len( factors ) ): - error_map['rep_factorName'][i]['FactorLevel1'] = [ {'factorLevel': 'Factor levels for each factor need to be unique'} for t in [factor['factorLevel1'], factor['factorLevel2']] ] + error_map['rep_factorName'] = [dict() for t in factors] + for i in range(len(factors)): + error_map['rep_factorName'][i]['FactorLevel1'] = [{'factorLevel': 'Factor levels for each factor need to be unique'} for t in [factor['factorLevel1'], factor['factorLevel2']]] break if factor_duplication: - error_map['rep_factorName'] = [ dict() for t in factors ] - for i in range( len( factors ) ): + error_map['rep_factorName'] = [dict() for t in factors] + for i in range(len(factors)): error_map['rep_factorName'][i]['factorName'] = 'Factor names need to be unique.'
--- a/plotdexseq.R Tue Feb 26 10:50:15 2019 -0500 +++ b/plotdexseq.R Fri Mar 19 09:45:03 2021 +0000 @@ -1,11 +1,13 @@ ## Setup R error handling to go to stderr -options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) +options(show.error.messages = F, error = function() { + cat(geterrmessage(), file = stderr()); q("no", 1, F) +}) # we need that to not crash galaxy with an UTF8 error on German LC settings. Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") suppressPackageStartupMessages({ library("DEXSeq") - library('getopt') + library("getopt") }) options(stringAsfactors = FALSE, useFancyQuotes = FALSE) @@ -13,35 +15,35 @@ #get options, using the spec as defined by the enclosed list. #we read the options from the default: commandArgs(TRUE). -spec = matrix(c( - 'rdata', 'r', 1, "character", - 'primaryfactor', 'p', 1, "character", - 'geneid', 'g', 1, "character", - 'genefile', 'f', 1, "character", - 'fdr', 'c', 1, "double", - 'transcripts', 't', 1, "logical", - 'names', 'a', 1, "logical", - 'normcounts', 'n', 1, "logical", - 'splicing', 's', 1, "logical" -), byrow=TRUE, ncol=4); -opt = getopt(spec); +spec <- matrix(c( + "rdata", "r", 1, "character", + "primaryfactor", "p", 1, "character", + "geneid", "g", 1, "character", + "genefile", "f", 1, "character", + "fdr", "c", 1, "double", + "transcripts", "t", 1, "logical", + "names", "a", 1, "logical", + "normcounts", "n", 1, "logical", + "splicing", "s", 1, "logical" +), byrow = TRUE, ncol = 4); +opt <- getopt(spec); res <- readRDS(opt$rdata) if (!is.null(opt$genefile)) { - genes <- read.delim(opt$genefile, header=FALSE) - genes <- genes[, 1] + genes <- read.delim(opt$genefile, header = FALSE) + genes <- genes[, 1] } else { - genes <- opt$geneid + genes <- opt$geneid } pdf("plot.pdf") -for (i in genes){ - plotDEXSeq(res, i, FDR=opt$fdr, fitExpToVar=opt$primaryfactor, - norCounts=opt$normcounts, expression=TRUE, splicing=opt$splicing, - displayTranscripts=opt$transcripts, names=opt$names, legend=TRUE, - color=NULL, color.samples=NULL, transcriptDb=NULL) +for (i in genes) { + plotDEXSeq(res, i, FDR = opt$fdr, fitExpToVar = opt$primaryfactor, + norCounts = opt$normcounts, expression = TRUE, splicing = opt$splicing, + displayTranscripts = opt$transcripts, names = opt$names, legend = TRUE, + color = NULL, color.samples = NULL, transcriptDb = NULL) } dev.off() -sessionInfo() \ No newline at end of file +sessionInfo()