Mercurial > repos > sblanck > smagexp
changeset 6:3ce32282f6a4 draft
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 228bd64b01f184d5d8ebc9f65fe0add2d45ed4fe
author | sblanck |
---|---|
date | Tue, 26 Jun 2018 08:54:45 -0400 |
parents | ca46ad51fe5a |
children | aedbf50acb4a |
files | ImportDataFromMatrix.R ImportDataFromMatrix.xml MetaMA.R MetaMA.xml MetaMa_tpl.html MetaRNASeq.R MetaRNASeq_tpl.html MetaRNAseq.xml Recount.R Recount.xml repository_dependencies.xml |
diffstat | 11 files changed, 235 insertions(+), 197 deletions(-) [+] |
line wrap: on
line diff
--- a/ImportDataFromMatrix.R Thu Mar 22 11:16:24 2018 -0400 +++ b/ImportDataFromMatrix.R Tue Jun 26 08:54:45 2018 -0400 @@ -10,7 +10,6 @@ ##### Read options option_list=list( make_option("--input",type="character",default="NULL",help="rdata object containing eset object"), - make_option("--conditions",type="character",default=NULL,help="Text file summarizing conditions of the experiment (required)"), make_option("--normalization",type="character",default=NULL,help="log2 transformation"), make_option("--annotations",type="character",default="NULL",help="rdata object containing eset object"), make_option("--rdataoutput",type="character",default="NULL",help="output rdata object containing eset object"), @@ -30,18 +29,10 @@ stop("input required.", call.=FALSE) } -if(is.null(opt$conditions)){ - print_help(opt_parser) - stop("conditions input required.", call.=FALSE) -} #loading libraries -suppressPackageStartupMessages(require(GEOquery)) - suppressPackageStartupMessages(require(Biobase)) -suppressPackageStartupMessages(require(GEOquery)) -suppressPackageStartupMessages(require(GEOmetadb)) suppressPackageStartupMessages(require(limma)) suppressPackageStartupMessages(require(jsonlite)) suppressPackageStartupMessages(require(affy)) @@ -60,19 +51,16 @@ dir.create(result.path, showWarnings = TRUE, recursive = FALSE) data=as.matrix(read.table(file = dataFile,row.names=1,header=TRUE)) -conditions=read.table(file=conditionsFile,sep = "\t",row.names=1) +#conditions=read.table(file=conditionsFile,sep = "\t",row.names=1) htmlfile=readChar(result.template, file.info(result.template)$size) -colnames(conditions)=c("source_name_ch1","description") -phenodata<-new("AnnotatedDataFrame",data=conditions) +#colnames(conditions)=c("source_name_ch1","description") +#phenodata<-new("AnnotatedDataFrame",data=conditions) -head(data) -conditions - -eset=ExpressionSet(assayData=data,phenoData=phenodata,annotation=annotation) +eset=ExpressionSet(assayData=data,annotation=annotation) if (normalization == "quantile") { - eset <- normalize.ExpressionSet.quantiles(eset, transfn="log2") + eset <- normalize.ExpressionSet.quantiles(eset, transfn="log") } else if (normalization == "log2") { exprs(eset) = log2(exprs(eset)) } @@ -111,4 +99,4 @@ #writeLines(c("<h2>Venn diagram</h2>"),file.conn) #writeLines(c("<img src='venn.png'><br/><br/>"),file.conn) #writeLines(c("</body></html>"),file.conn) -#close(file.conn) \ No newline at end of file +#close(file.conn)
--- a/ImportDataFromMatrix.xml Thu Mar 22 11:16:24 2018 -0400 +++ b/ImportDataFromMatrix.xml Tue Jun 26 08:54:45 2018 -0400 @@ -4,10 +4,7 @@ <requirements> <requirement type="package">bioconductor-biobase</requirement> - <requirement type="package">bioconductor-geoquery</requirement> - <requirement type="package">bioconductor-geometadb</requirement> <requirement type="package">bioconductor-limma</requirement> - <requirement type="package">bioconductor-biobase</requirement> <requirement type="package">bioconductor-affy</requirement> <requirement type="package">bioconductor-affyplm</requirement> <requirement type="package">r-jsonlite</requirement> @@ -26,7 +23,6 @@ ${__tool_directory__}/ImportDataFromMatrix.R --input $input --normalization $normalization - --conditions $conditions --annotations $annotations --rdataoutput $result_export_eset --htmloutput $result_html @@ -42,7 +38,6 @@ <option value="log2">log2 only</option> <option value="none">none</option> </param> - <param name="conditions" type="data" format="cond" label="Conditions" help="conditions associated with the input file"/> <param name="annotations" type="text" label="Annotation GPL code" help="GPL code for annotations"/> </inputs>
--- a/MetaMA.R Thu Mar 22 11:16:24 2018 -0400 +++ b/MetaMA.R Tue Jun 26 08:54:45 2018 -0400 @@ -35,6 +35,7 @@ suppressPackageStartupMessages(require(annaffy)) suppressPackageStartupMessages(require(VennDiagram)) suppressPackageStartupMessages(require(GEOquery)) +suppressPackageStartupMessages(require(UpSetR)) listInput <- trimws( unlist( strsplit(trimws(opt$input), ",") ) ) species=opt$species @@ -56,23 +57,23 @@ result.path<-opt$htmloutputpath result.template<-opt$htmltemplate -showVenn<-function(res,file) -{ - venn.plot<-venn.diagram(x = c(res[c(1:(length(res)-3))],meta=list(res$Meta)), - filename = NULL, col = "black", - fill = c(1:(length(res)-2)), - margin=0.05, alpha = 0.6) - jpeg(file) - grid.draw(venn.plot) - dev.off() -} +#showVenn<-function(res,file) +#{ +# venn.plot<-venn.diagram(x = c(res[c(1:(length(res)-3))],meta=list(res$Meta)), +# filename = NULL, col = "black", +# fill = c(1:(length(res)-2)), +# margin=0.05, alpha = 0.6) +# jpeg(file) +# grid.draw(venn.plot) +# dev.off() +#} if(!species %in% installed.packages()[,"Package"]) { source("https://bioconductor.org/biocLite.R") biocLite(species) } -library("org.Hs.eg.db") +library(species,character.only = TRUE) x <- org.Hs.egUNIGENE mapped_genes <- mappedkeys(x) link <- as.list(x[mapped_genes]) @@ -159,7 +160,7 @@ #write(file="~/galaxy-modal/classes.txt",length(classes)) res=pvalcombination(esets=esets,classes=classes,moderated="limma") resIDDIRR=IDDIRR(res$Meta,res$AllIndStudies) -length(res$Meta) +#length(res$Meta) Hs.Meta=rownames(esets[[1]])[res$Meta] origId.Meta=lapply(conv_unigene,FUN=function(vec) as.vector(unlist(vec[Hs.Meta]))) @@ -182,12 +183,6 @@ vennFilename="venn.png" vennFile=file.path(result.path,vennFilename) -htmlfile=readChar(result.template, file.info(result.template)$size) -htmlfile=gsub(x=htmlfile,pattern = "###DATAJSON###",replacement = datajson, fixed = TRUE) -htmlfile=gsub(x=htmlfile,pattern = "###SUMMARYJSON###",replacement = summaryjson, fixed = TRUE) -htmlfile=gsub(x=htmlfile,pattern = "###VENN###",replacement = vennFilename, fixed = TRUE) -write(htmlfile,result.html) - library(VennDiagram) flog.threshold(ERROR) @@ -212,8 +207,28 @@ l[[paste("study",i,sep="")]]<-res[[i]] } l[["Meta"]]=res[[length(res)-1]] -showVenn(l,vennFile) + +if (length(l)<5) { + title="Venn diagram" + width=500 + showVenn(l,vennFile) +} else { + title="Upsetr diagram" + width=1000 + png(vennFile,width=width,height=500) + upset(fromList(as.list(l)), order.by = "freq") + dev.off() +} + file.copy(vennFilename,result.path) + +htmlfile=readChar(result.template, file.info(result.template)$size) +htmlfile=gsub(x=htmlfile,pattern = "###DATAJSON###",replacement = datajson, fixed = TRUE) +htmlfile=gsub(x=htmlfile,pattern = "###SUMMARYJSON###",replacement = summaryjson, fixed = TRUE) +htmlfile=gsub(x=htmlfile,pattern = "###VENN###",replacement = vennFilename, fixed = TRUE) +tmlfile=gsub(x=htmlfile,pattern = "###WIDTH###",replacement = as.character(width), fixed = TRUE) +htmlfile=gsub(x=htmlfile,pattern = "###TITLE###",replacement = title, fixed = TRUE) +write(htmlfile,result.html) #l=list() #for(i in 1:length(esets)) #{ @@ -224,4 +239,4 @@ #writeLines(c("<h2>Venn diagram</h2>"),file.conn) #writeLines(c("<img src='venn.png'><br/><br/>"),file.conn) #writeLines(c("</body></html>"),file.conn) -#close(file.conn) \ No newline at end of file +#close(file.conn)
--- a/MetaMA.xml Thu Mar 22 11:16:24 2018 -0400 +++ b/MetaMA.xml Tue Jun 26 08:54:45 2018 -0400 @@ -10,6 +10,7 @@ <requirement type="package">r-venndiagram</requirement> <requirement type="package">r-metama</requirement> <requirement type="package">r-optparse</requirement> + <requirement type="package" version="1.3.3">r-upsetr</requirement> </requirements> <stdio> @@ -57,7 +58,7 @@ **Results** -- Venn Diagram summarizing the results of the meta-analysis +- Venn Diagram or upsetR diagram (when the number of studies is greater than 2) summarizing the results of the meta-analysis - A list of indicators to evaluate the quality of the performance of the meta-analysis - DE : Number of differentially expressed genes
--- a/MetaMa_tpl.html Thu Mar 22 11:16:24 2018 -0400 +++ b/MetaMa_tpl.html Tue Jun 26 08:54:45 2018 -0400 @@ -167,8 +167,8 @@ <body> <table><tr><td> -<h2>Venn diagram</h2> -<img src='###VENN###' height="400" width="400"><br/> +<h2>###TITLE###</h2> +<img src='###VENN###' height="500" width=###WIDTH###><br/> </td><td> </td></tr></table>
--- a/MetaRNASeq.R Thu Mar 22 11:16:24 2018 -0400 +++ b/MetaRNASeq.R Tue Jun 26 08:54:45 2018 -0400 @@ -10,6 +10,9 @@ ##### Read options option_list=list( make_option("--input",type="character",default="NULL",help="list of rdata objects containing eset objects"), + make_option("--inputName",type="character",default=NULL,help="filenames of the Rddata objects"), + make_option("--nbreplicates",type="character",default=NULL,help="number of replicate per study"), + make_option("--fdr",type="character",default=NULL,help="Adjusted p-value threshold to be declared differentially expressed"), make_option("--result",type="character",default=NULL,help="text file containing result of the meta-analysis"), make_option("--htmloutput",type="character",default=NULL,help="Output html report"), make_option("--htmloutputpath",type="character",default="NULL",help="Path of output html report"), @@ -31,26 +34,25 @@ suppressPackageStartupMessages(require(VennDiagram)) suppressPackageStartupMessages(require(GEOquery)) -listInput <- trimws( unlist( strsplit(trimws(opt$input), ",") ) ) - -listfiles=vector() -listfilenames=vector() +listfiles <- trimws( unlist( strsplit(trimws(opt$input), ";") ) ) +listfilenames <- trimws( unlist( strsplit(trimws(opt$inputName), ";") ) ) +nbreplicates <- as.numeric(trimws( unlist( strsplit(trimws(opt$nbreplicates), ";") ) )) -for (i in 1:length(listInput)) -{ - inputFileInfo <- unlist( strsplit( listInput[i], ';' ) ) - listfiles=c(listfiles,inputFileInfo[1]) - listfilenames=c(listfilenames,inputFileInfo[2]) -} +#for (i in 1:length(listInput)) +#{ +# inputFileInfo <- unlist( strsplit( listInput[i], ';' ) ) +# listfiles=c(listfiles,inputFileInfo[1]) +# listfilenames=c(listfilenames,inputFileInfo[2]) +# nbreplicates[i]=as.numeric(inputFileInfo[3]) +#} + outputfile <- opt$result result.html = opt$htmloutput html.files.path=opt$htmloutputpath result.template=opt$htmltemplate -alpha=0.05 - -#print(comparison) +alpha=as.numeric(opt$fdr) listData=lapply(listfiles,read.table) orderData=lapply(listData, function(x) x[order(x[1]), ]) @@ -67,95 +69,13 @@ colnames(DE)=listfilenames FC=as.data.frame(FC) colnames(FC)=listfilenames -# the comparison must only have two values and the conds must -# be a vector from those values, at least one of each. - -#if (length(comparison) != 2) { -# stop("Comparison type must be a tuple: ", cargs[length(cargs) - 8]) -#} - -sink("/dev/null") -dir.create(html.files.path, recursive=TRUE) -#library(DESeq) -#library(HTSFilter) - -#DE=list() -#FC=list() -#i=1 - -# Open the html output file -#file.conn = file(diag.html, open="w") - -#writeLines( c("<html><body>"), file.conn) - -# Perform deseq analysis on each study -#for(i in 1:length(listfiles)) -#{ -# f=listfiles[i] -# fname=listfilenames[i] -# study_name=unlist(strsplit(fname,"[.]"))[1] -# print(paste0("study.name ",study_name)) -# d <- read.table(f, sep=" ", header=TRUE, row.names=1) -# conds<-sapply(strsplit(colnames(d),"[.]"),FUN=function(x) x[1]) -# if (length(unique(conds)) != 2) { -# warning(as.data.frame(strsplit(colnames(d),"[.]"))) -# stop("You can only have two columns types: ", paste(conds,collapse=" ")) -# } -# if (!identical(sort(comparison), sort(unique(conds)))) { -# stop("Column types must use the two names from Comparison type, and vice versa. Must have at least one of each in the Column types.\nColumn types: ", cargs[2], "\n", "Comparison type: ", cargs[3]) -# } -# if (length(d) != length(conds)) { -# stop("Number of total sample columns in counts file must correspond to the columns types field. E.g. if column types is 'kidney,kidney,liver,liver' then number of sample columns in counts file must be 4 as well.") -# } -# -# cds <- newCountDataSet(d, conds) -# cds <- estimateSizeFactors(cds) -# -# cdsBlind <- estimateDispersions( cds, method="blind" ) -# -# if (length(conds) != 2) { -# cds <- estimateDispersions( cds ) -# norep = FALSE -# } -# -# if (length(conds) == 2) { -# cds <- estimateDispersions( cds, method=method, sharingMode=mod, fitType="parametric" ) -# norep = TRUE -# } -# -# filter<-HTSFilter(cds, plot=FALSE) -# cds.filter<-filter$filteredData -# on.index<-which(filter$on==1) -# -# res<-as.data.frame(matrix(NA,nrow=nrow(cds),ncol=ncol(cds))) -# nbT <- nbinomTest(cds.filter, comparison[1], comparison[2]) -# colnames(res)<-colnames(nbT) -# res[on.index,]<-nbT -# #write.table(res[order(res$padj), ], file=outputfile, quote=FALSE, row.names=FALSE, sep="\t") -# -# -# temp.pval.plot = file.path( html.files.path, paste("PvalHist",i,".png",sep="")) -# png( temp.pval.plot, width=500, height=500 ) -# hist(res$pval, breaks=100, col="skyblue", border="slateblue", main="") -# dev.off() -# -# writeLines( c("<h2>P-value histogram for ",study_name,"</h2>"), file.conn) -# writeLines( c("<img src='PvalHist",i,".png'><br/><br/>"), file.conn) -# -# #on enregistre la p-value -# rawpval[[study_name]]<-res$pval -# DE[[study_name]]<-ifelse(res$padj<=alpha,1,0) -# FC[[study_name]]<-res$log2FoldChange -# -# i=i+1 -#} - # combinations library(metaRNASeq) +library(UpSetR) fishcomb<-fishercomb(rawpval, BHth=alpha) warning(length(rawpval)) -invnormcomb<-invnorm(rawpval, nrep=c(8,8), BHth=alpha) +invnormcomb<-invnorm(rawpval, nrep=nbreplicates, BHth=alpha) #DE[["fishercomb"]]<-ifelse(fishcomb$adjpval<=alpha,1,0) #DE[["invnormcomb"]]<-ifelse(invnormcomb$adjpval<=alpha,1,0) @@ -180,17 +100,30 @@ IDDIRRfishcomb=IDD.IRR(fishcomb_de,indstudy_de) IDDIRRinvnorm=IDD.IRR(invnorm_de,indstudy_de) -#conflits<-data.frame(ID=listData[[1]][rownames(DEresults),1],Fishercomb=DEresults[["DE.fishercomb"]],Invnormcomb=DEresults[["DE.invnorm"]],sign=commonsgnFC) conflits<-data.frame(ID=listData[[1]][rownames(DEresults),1],DE=DEresults,FC=FC,signFC=commonsgnFC) #write DE outputfile write.table(conflits, outputfile,sep="\t",,row.names=FALSE) library(VennDiagram) -DE_num=apply(DEresults, 2, FUN=function(x) which(x==1)) -venn.plot<-venn.diagram(x=as.list(DE_num),filename=NULL, col="black", fill=1:length(DE_num)+1,alpha=0.6) +DE_num=apply(keepDE[,1:(length(listfiles)+2)], 2, FUN=function(x) which(x==1)) +#DE_num=apply(DEresults, 2, FUN=function(x) which(x==1)) +dir.create(html.files.path, showWarnings = TRUE, recursive = FALSE) temp.venn.plot = file.path( html.files.path, paste("venn.png")) -png(temp.venn.plot,width=500,height=500) -grid.draw(venn.plot) -dev.off() +if (length(listfiles)<=2) { + title="VENN DIAGRAM" + width=500 + venn.plot<-venn.diagram(x=as.list(DE_num),filename=NULL, col="black", fill=1:length(DE_num)+1,alpha=0.6) + png(temp.venn.plot,width=width,height=500) + grid.draw(venn.plot) + dev.off() +} else { + title="UPSETR DIAGRAM" + width=1000 + png(temp.venn.plot,width=width,height=500) + upset(fromList(as.list(DE_num)), order.by = "freq") + dev.off() + +} + library(jsonlite) matrixConflits=as.matrix(conflits) @@ -198,11 +131,6 @@ summaryFishcombjson=toJSON(as.matrix(t(IDDIRRfishcomb)),pretty = TRUE) summaryinvnormjson=toJSON(as.matrix(t(IDDIRRinvnorm)),pretty = TRUE) - -#vennsplit=strsplit(result.venn,split="/")[[1]] -#venn=paste0("./",vennsplit[length(vennsplit)]) - - vennFilename="venn.png" vennFile=file.path(html.files.path,vennFilename) htmlfile=readChar(result.template, file.info(result.template)$size) @@ -210,39 +138,7 @@ htmlfile=gsub(x=htmlfile,pattern = "###FISHSUMMARYJSON###",replacement = summaryFishcombjson, fixed = TRUE) htmlfile=gsub(x=htmlfile,pattern = "###INVSUMMARYJSON###",replacement = summaryinvnormjson, fixed = TRUE) htmlfile=gsub(x=htmlfile,pattern = "###VENN###",replacement = vennFilename, fixed = TRUE) +htmlfile=gsub(x=htmlfile,pattern = "###WIDTH###",replacement = as.character(width), fixed = TRUE) +htmlfile=gsub(x=htmlfile,pattern = "###TITLE###",replacement = title, fixed = TRUE) write(htmlfile,result.html) -#library(VennDiagram) -#flog.threshold(ERROR) -# -##venn.plot<-venn.diagram(x = c(res[c(1:(length(res)-3))],meta=list(res$Meta)),filename = v, col = "black", fill = c(1:(length(res)-2)), margin=0.05, alpha = 0.6,imagetype = "png") -#dir.create(result.path, showWarnings = TRUE, recursive = FALSE) -# -#showVenn<-function(liste,file) -#{ -# venn.plot<-venn.diagram(x = liste, -# filename = vennFilename, col = "black", -# fill = 1:length(liste)+1, -# margin=0.05, alpha = 0.6,imagetype = "png") -## png(file); -## grid.draw(venn.plot); -## dev.off(); -# -#} -# -#l=list() -#for(i in 1:length(esets)) -#{ -# l[[paste("study",i,sep="")]]<-res[[i]] -#} -#l[["Meta"]]=res[[length(res)-1]] -#showVenn(l,vennFile) -#file.copy(vennFilename,result.path) - - -#writeLines( c("<h2>Venn Plot</h2>"), file.conn) -#writeLines( c("<img src='venn.png'><br/><br/>"), file.conn) -#writeLines( c("</body></html>"), file.conn) -#close(file.conn) -#print("passe6") -#sink(NULL)
--- a/MetaRNASeq_tpl.html Thu Mar 22 11:16:24 2018 -0400 +++ b/MetaRNASeq_tpl.html Tue Jun 26 08:54:45 2018 -0400 @@ -139,8 +139,8 @@ <body> <table><tr><td> -<h2>Venn diagram</h2> -<img src='###VENN###' height="400" width="400"><br/> +<h2>###TITLE###</h2> +<img src='###VENN###' height="500" width=###WIDTH###><br/> </td><td> </td></tr></table>
--- a/MetaRNAseq.xml Thu Mar 22 11:16:24 2018 -0400 +++ b/MetaRNAseq.xml Tue Jun 26 08:54:45 2018 -0400 @@ -1,4 +1,4 @@ -<tool id="metarnaseq" name="RNA-seq data meta-analysis" version="1.0.0"> +<tool id="metarnaseq" name="RNA-seq data meta-analysis" version="1.1.0"> <description>Performs meta-analysis thanks to metaRNAseq</description> @@ -10,6 +10,7 @@ <requirement type="package">r-jsonlite</requirement> <requirement type="package">r-metarnaseq</requirement> <requirement type="package">r-optparse</requirement> + <requirement type="package" version="1.3.3">r-upsetr</requirement> </requirements> <stdio> @@ -19,21 +20,40 @@ <command> - <![CDATA[ +<![CDATA[ Rscript ${__tool_directory__}/MetaRNASeq.R - --input "#for $input in $inputs# $input;$input.name, #end for#" + --input "#for $s in $studies: + ${s.input}; + #end for +" + --inputName "#for $s in $studies: + ${s.input.name}; + #end for +" + --nbreplicates "#for $s in $studies: + ${s.nbreplicates}; + #end for +" + + --fdr $fdr --result $top_table --htmloutput $result_html --htmloutputpath $result_html.extra_files_path - --htmltemplate ${__tool_directory__}/MetaRNASeq_tpl.html - ]]> + --htmltemplate ${__tool_directory__}/MetaRNASeq_tpl.html + ]]> + </command> <inputs> - <param format="tabular" name="inputs" multiple="true" type="data" optional="false" label="Counts file" help="Must have the same number of row in each study. The experimental conditions must be specified in the header of each file"/> + <repeat name="studies" title="Study results" min="2" default="2" help="DESeq2 Result file and number of replicate of the study"> + <param format="tabular" name="input" multiple="false" type="data" optional="false" label="DESeq2 result file" help="Must have the same number of row in each study"/> + <param name="nbreplicates" type="integer" value="10" label="Number of replicates" help="Number of replicates of the study"/> + </repeat> + <param name="fdr" type="float" value="0.05" label="FDR" min="0" max="1" help="Adjusted p-value threshold to be declared differentially expressed"/> </inputs> + <outputs> <data format="tabular" name="top_table" label="Summary of meta-analysis and single studie analysis from ${tool.name} on ${on_string}"/> <data format="html" name="result_html" label="Charts for ${tool.name} on ${on_string}"/> @@ -44,7 +64,31 @@ </tests> <help> +<![CDATA[ +**What it does** + +Given several DESeq2 results this tool runs a meta-analysis using the metaRNAseq R package. + +**Inputs** +- At least 2 studies, and for each study + - Results of DESeq2 study + - Number of replicates of the study +- A FDR Threshold for a gene to be declared differentially expressed + +**Results** + +- Venn Diagram or upsetR diagram (when the number of studies is greater than 2) summarizing the results of the meta-analysis +- A list of indicators to evaluate the quality of the performance of the meta-analysis + + - DE : Number of differentially expressed genes + - IDD (Integration Driven discoveries) : number of genes that are declared differentially expressed in the meta-analysis that were not identified in any of the single studies alone + - Loss : Number of genes that are identified differentially expressed in single studies but not in meta-analysis + - DR (Integration-driven Discovery Rate) : corresponding proportion of IDD + - IRR (Integration-driven Revision) : corresponding proportion of Loss + +It also generates a text file containing summarization of the results of each single analysis and meta-analysis. Potential conflicts between single analyses are indicated by zero values in the "signFC" column. +]]> </help> </tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Recount.R Tue Jun 26 08:54:45 2018 -0400 @@ -0,0 +1,47 @@ +#!/usr/bin/env Rscript +# setup R error handling to go to stderr +options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) + +# we need that to not crash galaxy with an UTF8 error on German LC settings. +loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") + +library("optparse") + +##### Read options +option_list=list( + make_option("--id",type="character",default=NULL,help="GSE ID from GEO databse (required)"), + make_option("--report",type="character",default=NULL,help="Text file summarizing conditions of the experiment") + +); + +opt_parser = OptionParser(option_list=option_list); +opt = parse_args(opt_parser); + +if(is.null(opt$id)){ + print_help(opt_parser) + stop("Recount id required.", call.=FALSE) +} + +#loading libraries +suppressPackageStartupMessages(require(recount)) + +studyID=opt$id +reportFile=opt$report + +dir.create("./split", showWarnings = TRUE, recursive = FALSE) + +url <- download_study(studyID) +load(file.path(studyID, 'rse_gene.Rdata')) +rse <- scale_counts(rse_gene) +counts=assay(rse) +conditions=rse$title + +for (i in 1:ncol(counts)) +{ + currentCount=as.data.frame(counts[,i]) + sampleID=colnames(counts)[i] + #colnames(currentCount)=sampleID + write.table(x=currentCount,file=paste0("./split/",sampleID,"_",conditions[i],'.tabular'),sep="\t",row.names = T, col.names = F) +} + +write.table(as.data.frame(cbind(sampleID=colnames(counts),title=conditions)),quote = FALSE,col.names =TRUE, row.names=FALSE,file=reportFile,sep="\t")
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Recount.xml Tue Jun 26 08:54:45 2018 -0400 @@ -0,0 +1,52 @@ +<tool id="Recount" name="Recount" version="1.0.0"> + + <description>Get rna-seq count data with R recount Package</description> + + <requirements> + <requirement type="package">bioconductor-recount</requirement> + <requirement type="package">r-optparse</requirement> + </requirements> + <stdio> + <exit_code range="1:" /> + <regex match="Warning" source="both" level="warning"/> + </stdio> + + <command> + <![CDATA[ + Rscript --vanilla + ${__tool_directory__}/Recount.R + --id ${recountID} + --report ${report} + ]]> + </command> + + <inputs> + <param name="recountID" type="text" size="12" optional="false" label="Recount ID" help=""> + <validator type="empty_field"/> + </param> + </inputs> + + <outputs> + <data format="txt" name="report"> + <discover_datasets pattern="__designation_and_ext__" format="tabular" directory="split" visible="true" /> + </data> +</outputs> + + <tests> + </tests> + + <help> +<![CDATA[ +**What it does** + +This tool fetches RNA-seq count data directly from Recount project based on the recount bioconductor R package. Given an ID, it returns a small report +containing the metadata of the experiment. Il also generates one count file per sample. + +**Results** + +- Tabular file containing samples ID and samples conditions +- One count file per sample +]]> + </help> + +</tool>
--- a/repository_dependencies.xml Thu Mar 22 11:16:24 2018 -0400 +++ b/repository_dependencies.xml Tue Jun 26 08:54:45 2018 -0400 @@ -1,4 +1,4 @@ <?xml version="1.0"?> <repositories description="smagexp datatypes"> - <repository changeset_revision="f174dc3d2641" name="smagexp_datatypes" owner="sblanck" toolshed="https://toolshed.g2.bx.psu.edu" /> + <repository changeset_revision="f174dc3d2641" name="smagexp_datatypes" owner="sblanck" toolshed="toolshed.g2.bx.psu.edu" /> </repositories>