# HG changeset patch # User sblanck # Date 1530017685 14400 # Node ID 3ce32282f6a404f741a43c79317e546ce24f2b94 # Parent ca46ad51fe5a76add555db41606d04adda8b8500 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 228bd64b01f184d5d8ebc9f65fe0add2d45ed4fe diff -r ca46ad51fe5a -r 3ce32282f6a4 ImportDataFromMatrix.R --- a/ImportDataFromMatrix.R Thu Mar 22 11:16:24 2018 -0400 +++ b/ImportDataFromMatrix.R Tue Jun 26 08:54:45 2018 -0400 @@ -10,7 +10,6 @@ ##### Read options option_list=list( make_option("--input",type="character",default="NULL",help="rdata object containing eset object"), - make_option("--conditions",type="character",default=NULL,help="Text file summarizing conditions of the experiment (required)"), make_option("--normalization",type="character",default=NULL,help="log2 transformation"), make_option("--annotations",type="character",default="NULL",help="rdata object containing eset object"), make_option("--rdataoutput",type="character",default="NULL",help="output rdata object containing eset object"), @@ -30,18 +29,10 @@ stop("input required.", call.=FALSE) } -if(is.null(opt$conditions)){ - print_help(opt_parser) - stop("conditions input required.", call.=FALSE) -} #loading libraries -suppressPackageStartupMessages(require(GEOquery)) - suppressPackageStartupMessages(require(Biobase)) -suppressPackageStartupMessages(require(GEOquery)) -suppressPackageStartupMessages(require(GEOmetadb)) suppressPackageStartupMessages(require(limma)) suppressPackageStartupMessages(require(jsonlite)) suppressPackageStartupMessages(require(affy)) @@ -60,19 +51,16 @@ dir.create(result.path, showWarnings = TRUE, recursive = FALSE) data=as.matrix(read.table(file = dataFile,row.names=1,header=TRUE)) -conditions=read.table(file=conditionsFile,sep = "\t",row.names=1) +#conditions=read.table(file=conditionsFile,sep = "\t",row.names=1) htmlfile=readChar(result.template, file.info(result.template)$size) -colnames(conditions)=c("source_name_ch1","description") -phenodata<-new("AnnotatedDataFrame",data=conditions) +#colnames(conditions)=c("source_name_ch1","description") +#phenodata<-new("AnnotatedDataFrame",data=conditions) -head(data) -conditions - -eset=ExpressionSet(assayData=data,phenoData=phenodata,annotation=annotation) +eset=ExpressionSet(assayData=data,annotation=annotation) if (normalization == "quantile") { - eset <- normalize.ExpressionSet.quantiles(eset, transfn="log2") + eset <- normalize.ExpressionSet.quantiles(eset, transfn="log") } else if (normalization == "log2") { exprs(eset) = log2(exprs(eset)) } @@ -111,4 +99,4 @@ #writeLines(c("

Venn diagram

"),file.conn) #writeLines(c("

"),file.conn) #writeLines(c(""),file.conn) -#close(file.conn) \ No newline at end of file +#close(file.conn) diff -r ca46ad51fe5a -r 3ce32282f6a4 ImportDataFromMatrix.xml --- a/ImportDataFromMatrix.xml Thu Mar 22 11:16:24 2018 -0400 +++ b/ImportDataFromMatrix.xml Tue Jun 26 08:54:45 2018 -0400 @@ -4,10 +4,7 @@ bioconductor-biobase - bioconductor-geoquery - bioconductor-geometadb bioconductor-limma - bioconductor-biobase bioconductor-affy bioconductor-affyplm r-jsonlite @@ -26,7 +23,6 @@ ${__tool_directory__}/ImportDataFromMatrix.R --input $input --normalization $normalization - --conditions $conditions --annotations $annotations --rdataoutput $result_export_eset --htmloutput $result_html @@ -42,7 +38,6 @@ - diff -r ca46ad51fe5a -r 3ce32282f6a4 MetaMA.R --- a/MetaMA.R Thu Mar 22 11:16:24 2018 -0400 +++ b/MetaMA.R Tue Jun 26 08:54:45 2018 -0400 @@ -35,6 +35,7 @@ suppressPackageStartupMessages(require(annaffy)) suppressPackageStartupMessages(require(VennDiagram)) suppressPackageStartupMessages(require(GEOquery)) +suppressPackageStartupMessages(require(UpSetR)) listInput <- trimws( unlist( strsplit(trimws(opt$input), ",") ) ) species=opt$species @@ -56,23 +57,23 @@ result.path<-opt$htmloutputpath result.template<-opt$htmltemplate -showVenn<-function(res,file) -{ - venn.plot<-venn.diagram(x = c(res[c(1:(length(res)-3))],meta=list(res$Meta)), - filename = NULL, col = "black", - fill = c(1:(length(res)-2)), - margin=0.05, alpha = 0.6) - jpeg(file) - grid.draw(venn.plot) - dev.off() -} +#showVenn<-function(res,file) +#{ +# venn.plot<-venn.diagram(x = c(res[c(1:(length(res)-3))],meta=list(res$Meta)), +# filename = NULL, col = "black", +# fill = c(1:(length(res)-2)), +# margin=0.05, alpha = 0.6) +# jpeg(file) +# grid.draw(venn.plot) +# dev.off() +#} if(!species %in% installed.packages()[,"Package"]) { source("https://bioconductor.org/biocLite.R") biocLite(species) } -library("org.Hs.eg.db") +library(species,character.only = TRUE) x <- org.Hs.egUNIGENE mapped_genes <- mappedkeys(x) link <- as.list(x[mapped_genes]) @@ -159,7 +160,7 @@ #write(file="~/galaxy-modal/classes.txt",length(classes)) res=pvalcombination(esets=esets,classes=classes,moderated="limma") resIDDIRR=IDDIRR(res$Meta,res$AllIndStudies) -length(res$Meta) +#length(res$Meta) Hs.Meta=rownames(esets[[1]])[res$Meta] origId.Meta=lapply(conv_unigene,FUN=function(vec) as.vector(unlist(vec[Hs.Meta]))) @@ -182,12 +183,6 @@ vennFilename="venn.png" vennFile=file.path(result.path,vennFilename) -htmlfile=readChar(result.template, file.info(result.template)$size) -htmlfile=gsub(x=htmlfile,pattern = "###DATAJSON###",replacement = datajson, fixed = TRUE) -htmlfile=gsub(x=htmlfile,pattern = "###SUMMARYJSON###",replacement = summaryjson, fixed = TRUE) -htmlfile=gsub(x=htmlfile,pattern = "###VENN###",replacement = vennFilename, fixed = TRUE) -write(htmlfile,result.html) - library(VennDiagram) flog.threshold(ERROR) @@ -212,8 +207,28 @@ l[[paste("study",i,sep="")]]<-res[[i]] } l[["Meta"]]=res[[length(res)-1]] -showVenn(l,vennFile) + +if (length(l)<5) { + title="Venn diagram" + width=500 + showVenn(l,vennFile) +} else { + title="Upsetr diagram" + width=1000 + png(vennFile,width=width,height=500) + upset(fromList(as.list(l)), order.by = "freq") + dev.off() +} + file.copy(vennFilename,result.path) + +htmlfile=readChar(result.template, file.info(result.template)$size) +htmlfile=gsub(x=htmlfile,pattern = "###DATAJSON###",replacement = datajson, fixed = TRUE) +htmlfile=gsub(x=htmlfile,pattern = "###SUMMARYJSON###",replacement = summaryjson, fixed = TRUE) +htmlfile=gsub(x=htmlfile,pattern = "###VENN###",replacement = vennFilename, fixed = TRUE) +tmlfile=gsub(x=htmlfile,pattern = "###WIDTH###",replacement = as.character(width), fixed = TRUE) +htmlfile=gsub(x=htmlfile,pattern = "###TITLE###",replacement = title, fixed = TRUE) +write(htmlfile,result.html) #l=list() #for(i in 1:length(esets)) #{ @@ -224,4 +239,4 @@ #writeLines(c("

Venn diagram

"),file.conn) #writeLines(c("

"),file.conn) #writeLines(c(""),file.conn) -#close(file.conn) \ No newline at end of file +#close(file.conn) diff -r ca46ad51fe5a -r 3ce32282f6a4 MetaMA.xml --- a/MetaMA.xml Thu Mar 22 11:16:24 2018 -0400 +++ b/MetaMA.xml Tue Jun 26 08:54:45 2018 -0400 @@ -10,6 +10,7 @@ r-venndiagram r-metama r-optparse + r-upsetr
@@ -57,7 +58,7 @@ **Results** -- Venn Diagram summarizing the results of the meta-analysis +- Venn Diagram or upsetR diagram (when the number of studies is greater than 2) summarizing the results of the meta-analysis - A list of indicators to evaluate the quality of the performance of the meta-analysis - DE : Number of differentially expressed genes diff -r ca46ad51fe5a -r 3ce32282f6a4 MetaMa_tpl.html --- a/MetaMa_tpl.html Thu Mar 22 11:16:24 2018 -0400 +++ b/MetaMa_tpl.html Tue Jun 26 08:54:45 2018 -0400 @@ -167,8 +167,8 @@
-

Venn diagram

-
+

###TITLE###

+
diff -r ca46ad51fe5a -r 3ce32282f6a4 MetaRNASeq.R --- a/MetaRNASeq.R Thu Mar 22 11:16:24 2018 -0400 +++ b/MetaRNASeq.R Tue Jun 26 08:54:45 2018 -0400 @@ -10,6 +10,9 @@ ##### Read options option_list=list( make_option("--input",type="character",default="NULL",help="list of rdata objects containing eset objects"), + make_option("--inputName",type="character",default=NULL,help="filenames of the Rddata objects"), + make_option("--nbreplicates",type="character",default=NULL,help="number of replicate per study"), + make_option("--fdr",type="character",default=NULL,help="Adjusted p-value threshold to be declared differentially expressed"), make_option("--result",type="character",default=NULL,help="text file containing result of the meta-analysis"), make_option("--htmloutput",type="character",default=NULL,help="Output html report"), make_option("--htmloutputpath",type="character",default="NULL",help="Path of output html report"), @@ -31,26 +34,25 @@ suppressPackageStartupMessages(require(VennDiagram)) suppressPackageStartupMessages(require(GEOquery)) -listInput <- trimws( unlist( strsplit(trimws(opt$input), ",") ) ) - -listfiles=vector() -listfilenames=vector() +listfiles <- trimws( unlist( strsplit(trimws(opt$input), ";") ) ) +listfilenames <- trimws( unlist( strsplit(trimws(opt$inputName), ";") ) ) +nbreplicates <- as.numeric(trimws( unlist( strsplit(trimws(opt$nbreplicates), ";") ) )) -for (i in 1:length(listInput)) -{ - inputFileInfo <- unlist( strsplit( listInput[i], ';' ) ) - listfiles=c(listfiles,inputFileInfo[1]) - listfilenames=c(listfilenames,inputFileInfo[2]) -} +#for (i in 1:length(listInput)) +#{ +# inputFileInfo <- unlist( strsplit( listInput[i], ';' ) ) +# listfiles=c(listfiles,inputFileInfo[1]) +# listfilenames=c(listfilenames,inputFileInfo[2]) +# nbreplicates[i]=as.numeric(inputFileInfo[3]) +#} + outputfile <- opt$result result.html = opt$htmloutput html.files.path=opt$htmloutputpath result.template=opt$htmltemplate -alpha=0.05 - -#print(comparison) +alpha=as.numeric(opt$fdr) listData=lapply(listfiles,read.table) orderData=lapply(listData, function(x) x[order(x[1]), ]) @@ -67,95 +69,13 @@ colnames(DE)=listfilenames FC=as.data.frame(FC) colnames(FC)=listfilenames -# the comparison must only have two values and the conds must -# be a vector from those values, at least one of each. - -#if (length(comparison) != 2) { -# stop("Comparison type must be a tuple: ", cargs[length(cargs) - 8]) -#} - -sink("/dev/null") -dir.create(html.files.path, recursive=TRUE) -#library(DESeq) -#library(HTSFilter) - -#DE=list() -#FC=list() -#i=1 - -# Open the html output file -#file.conn = file(diag.html, open="w") - -#writeLines( c(""), file.conn) - -# Perform deseq analysis on each study -#for(i in 1:length(listfiles)) -#{ -# f=listfiles[i] -# fname=listfilenames[i] -# study_name=unlist(strsplit(fname,"[.]"))[1] -# print(paste0("study.name ",study_name)) -# d <- read.table(f, sep=" ", header=TRUE, row.names=1) -# conds<-sapply(strsplit(colnames(d),"[.]"),FUN=function(x) x[1]) -# if (length(unique(conds)) != 2) { -# warning(as.data.frame(strsplit(colnames(d),"[.]"))) -# stop("You can only have two columns types: ", paste(conds,collapse=" ")) -# } -# if (!identical(sort(comparison), sort(unique(conds)))) { -# stop("Column types must use the two names from Comparison type, and vice versa. Must have at least one of each in the Column types.\nColumn types: ", cargs[2], "\n", "Comparison type: ", cargs[3]) -# } -# if (length(d) != length(conds)) { -# stop("Number of total sample columns in counts file must correspond to the columns types field. E.g. if column types is 'kidney,kidney,liver,liver' then number of sample columns in counts file must be 4 as well.") -# } -# -# cds <- newCountDataSet(d, conds) -# cds <- estimateSizeFactors(cds) -# -# cdsBlind <- estimateDispersions( cds, method="blind" ) -# -# if (length(conds) != 2) { -# cds <- estimateDispersions( cds ) -# norep = FALSE -# } -# -# if (length(conds) == 2) { -# cds <- estimateDispersions( cds, method=method, sharingMode=mod, fitType="parametric" ) -# norep = TRUE -# } -# -# filter<-HTSFilter(cds, plot=FALSE) -# cds.filter<-filter$filteredData -# on.index<-which(filter$on==1) -# -# res<-as.data.frame(matrix(NA,nrow=nrow(cds),ncol=ncol(cds))) -# nbT <- nbinomTest(cds.filter, comparison[1], comparison[2]) -# colnames(res)<-colnames(nbT) -# res[on.index,]<-nbT -# #write.table(res[order(res$padj), ], file=outputfile, quote=FALSE, row.names=FALSE, sep="\t") -# -# -# temp.pval.plot = file.path( html.files.path, paste("PvalHist",i,".png",sep="")) -# png( temp.pval.plot, width=500, height=500 ) -# hist(res$pval, breaks=100, col="skyblue", border="slateblue", main="") -# dev.off() -# -# writeLines( c("

P-value histogram for ",study_name,"

"), file.conn) -# writeLines( c("

"), file.conn) -# -# #on enregistre la p-value -# rawpval[[study_name]]<-res$pval -# DE[[study_name]]<-ifelse(res$padj<=alpha,1,0) -# FC[[study_name]]<-res$log2FoldChange -# -# i=i+1 -#} - # combinations library(metaRNASeq) +library(UpSetR) fishcomb<-fishercomb(rawpval, BHth=alpha) warning(length(rawpval)) -invnormcomb<-invnorm(rawpval, nrep=c(8,8), BHth=alpha) +invnormcomb<-invnorm(rawpval, nrep=nbreplicates, BHth=alpha) #DE[["fishercomb"]]<-ifelse(fishcomb$adjpval<=alpha,1,0) #DE[["invnormcomb"]]<-ifelse(invnormcomb$adjpval<=alpha,1,0) @@ -180,17 +100,30 @@ IDDIRRfishcomb=IDD.IRR(fishcomb_de,indstudy_de) IDDIRRinvnorm=IDD.IRR(invnorm_de,indstudy_de) -#conflits<-data.frame(ID=listData[[1]][rownames(DEresults),1],Fishercomb=DEresults[["DE.fishercomb"]],Invnormcomb=DEresults[["DE.invnorm"]],sign=commonsgnFC) conflits<-data.frame(ID=listData[[1]][rownames(DEresults),1],DE=DEresults,FC=FC,signFC=commonsgnFC) #write DE outputfile write.table(conflits, outputfile,sep="\t",,row.names=FALSE) library(VennDiagram) -DE_num=apply(DEresults, 2, FUN=function(x) which(x==1)) -venn.plot<-venn.diagram(x=as.list(DE_num),filename=NULL, col="black", fill=1:length(DE_num)+1,alpha=0.6) +DE_num=apply(keepDE[,1:(length(listfiles)+2)], 2, FUN=function(x) which(x==1)) +#DE_num=apply(DEresults, 2, FUN=function(x) which(x==1)) +dir.create(html.files.path, showWarnings = TRUE, recursive = FALSE) temp.venn.plot = file.path( html.files.path, paste("venn.png")) -png(temp.venn.plot,width=500,height=500) -grid.draw(venn.plot) -dev.off() +if (length(listfiles)<=2) { + title="VENN DIAGRAM" + width=500 + venn.plot<-venn.diagram(x=as.list(DE_num),filename=NULL, col="black", fill=1:length(DE_num)+1,alpha=0.6) + png(temp.venn.plot,width=width,height=500) + grid.draw(venn.plot) + dev.off() +} else { + title="UPSETR DIAGRAM" + width=1000 + png(temp.venn.plot,width=width,height=500) + upset(fromList(as.list(DE_num)), order.by = "freq") + dev.off() + +} + library(jsonlite) matrixConflits=as.matrix(conflits) @@ -198,11 +131,6 @@ summaryFishcombjson=toJSON(as.matrix(t(IDDIRRfishcomb)),pretty = TRUE) summaryinvnormjson=toJSON(as.matrix(t(IDDIRRinvnorm)),pretty = TRUE) - -#vennsplit=strsplit(result.venn,split="/")[[1]] -#venn=paste0("./",vennsplit[length(vennsplit)]) - - vennFilename="venn.png" vennFile=file.path(html.files.path,vennFilename) htmlfile=readChar(result.template, file.info(result.template)$size) @@ -210,39 +138,7 @@ htmlfile=gsub(x=htmlfile,pattern = "###FISHSUMMARYJSON###",replacement = summaryFishcombjson, fixed = TRUE) htmlfile=gsub(x=htmlfile,pattern = "###INVSUMMARYJSON###",replacement = summaryinvnormjson, fixed = TRUE) htmlfile=gsub(x=htmlfile,pattern = "###VENN###",replacement = vennFilename, fixed = TRUE) +htmlfile=gsub(x=htmlfile,pattern = "###WIDTH###",replacement = as.character(width), fixed = TRUE) +htmlfile=gsub(x=htmlfile,pattern = "###TITLE###",replacement = title, fixed = TRUE) write(htmlfile,result.html) -#library(VennDiagram) -#flog.threshold(ERROR) -# -##venn.plot<-venn.diagram(x = c(res[c(1:(length(res)-3))],meta=list(res$Meta)),filename = v, col = "black", fill = c(1:(length(res)-2)), margin=0.05, alpha = 0.6,imagetype = "png") -#dir.create(result.path, showWarnings = TRUE, recursive = FALSE) -# -#showVenn<-function(liste,file) -#{ -# venn.plot<-venn.diagram(x = liste, -# filename = vennFilename, col = "black", -# fill = 1:length(liste)+1, -# margin=0.05, alpha = 0.6,imagetype = "png") -## png(file); -## grid.draw(venn.plot); -## dev.off(); -# -#} -# -#l=list() -#for(i in 1:length(esets)) -#{ -# l[[paste("study",i,sep="")]]<-res[[i]] -#} -#l[["Meta"]]=res[[length(res)-1]] -#showVenn(l,vennFile) -#file.copy(vennFilename,result.path) - - -#writeLines( c("

Venn Plot

"), file.conn) -#writeLines( c("

"), file.conn) -#writeLines( c(""), file.conn) -#close(file.conn) -#print("passe6") -#sink(NULL) diff -r ca46ad51fe5a -r 3ce32282f6a4 MetaRNASeq_tpl.html --- a/MetaRNASeq_tpl.html Thu Mar 22 11:16:24 2018 -0400 +++ b/MetaRNASeq_tpl.html Tue Jun 26 08:54:45 2018 -0400 @@ -139,8 +139,8 @@
-

Venn diagram

-
+

###TITLE###

+
diff -r ca46ad51fe5a -r 3ce32282f6a4 MetaRNAseq.xml --- a/MetaRNAseq.xml Thu Mar 22 11:16:24 2018 -0400 +++ b/MetaRNAseq.xml Tue Jun 26 08:54:45 2018 -0400 @@ -1,4 +1,4 @@ - + Performs meta-analysis thanks to metaRNAseq @@ -10,6 +10,7 @@ r-jsonlite r-metarnaseq r-optparse + r-upsetr @@ -19,21 +20,40 @@ - + --htmltemplate ${__tool_directory__}/MetaRNASeq_tpl.html + ]]> + - + + + + + + @@ -44,7 +64,31 @@ + diff -r ca46ad51fe5a -r 3ce32282f6a4 Recount.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Recount.R Tue Jun 26 08:54:45 2018 -0400 @@ -0,0 +1,47 @@ +#!/usr/bin/env Rscript +# setup R error handling to go to stderr +options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) + +# we need that to not crash galaxy with an UTF8 error on German LC settings. +loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") + +library("optparse") + +##### Read options +option_list=list( + make_option("--id",type="character",default=NULL,help="GSE ID from GEO databse (required)"), + make_option("--report",type="character",default=NULL,help="Text file summarizing conditions of the experiment") + +); + +opt_parser = OptionParser(option_list=option_list); +opt = parse_args(opt_parser); + +if(is.null(opt$id)){ + print_help(opt_parser) + stop("Recount id required.", call.=FALSE) +} + +#loading libraries +suppressPackageStartupMessages(require(recount)) + +studyID=opt$id +reportFile=opt$report + +dir.create("./split", showWarnings = TRUE, recursive = FALSE) + +url <- download_study(studyID) +load(file.path(studyID, 'rse_gene.Rdata')) +rse <- scale_counts(rse_gene) +counts=assay(rse) +conditions=rse$title + +for (i in 1:ncol(counts)) +{ + currentCount=as.data.frame(counts[,i]) + sampleID=colnames(counts)[i] + #colnames(currentCount)=sampleID + write.table(x=currentCount,file=paste0("./split/",sampleID,"_",conditions[i],'.tabular'),sep="\t",row.names = T, col.names = F) +} + +write.table(as.data.frame(cbind(sampleID=colnames(counts),title=conditions)),quote = FALSE,col.names =TRUE, row.names=FALSE,file=reportFile,sep="\t") diff -r ca46ad51fe5a -r 3ce32282f6a4 Recount.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Recount.xml Tue Jun 26 08:54:45 2018 -0400 @@ -0,0 +1,52 @@ + + + Get rna-seq count data with R recount Package + + + bioconductor-recount + r-optparse + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r ca46ad51fe5a -r 3ce32282f6a4 repository_dependencies.xml --- a/repository_dependencies.xml Thu Mar 22 11:16:24 2018 -0400 +++ b/repository_dependencies.xml Tue Jun 26 08:54:45 2018 -0400 @@ -1,4 +1,4 @@ - +