diff GEOQuery.R @ 0:1024245abc70 draft

planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
author sblanck
date Thu, 22 Feb 2018 08:38:22 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/GEOQuery.R	Thu Feb 22 08:38:22 2018 -0500
@@ -0,0 +1,79 @@
+#!/usr/bin/env Rscript
+# setup R error handling to go to stderr
+options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
+
+# we need that to not crash galaxy with an UTF8 error on German LC settings.
+loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+
+library("optparse")
+
+##### Read options
+option_list=list(
+		make_option("--id",type="character",default=NULL,help="GSE ID from GEO databse (required)"),
+		make_option("--transformation",type="character",default=NULL,help="log2 transformation (required)"),
+		make_option("--data",type="character",default=NULL,help="A table containing the expression data"),
+		make_option("--rdata",type="character",default="NULL",help="rdata object containing eset object"),
+		make_option("--conditions",type="character",default=NULL,help="Text file summarizing conditions of the experiment")
+		
+);
+
+opt_parser = OptionParser(option_list=option_list);
+opt = parse_args(opt_parser);
+
+if(is.null(opt$id)){
+	print_help(opt_parser)
+	stop("GEOdata id required.", call.=FALSE)
+}
+
+#loading libraries
+suppressPackageStartupMessages(require(GEOquery))
+
+GEOQueryID=opt$id
+GEOQueryData=opt$data
+GEOQueryRData=opt$rdata
+conditionFile=opt$conditions
+transformation=opt$transformation
+
+data1=getGEO(GEOQueryID)
+eset=data1[[1]]
+
+#check if datas are in log2 space
+normalization<-function(data){
+	ex <- exprs(data)
+	qx <- as.numeric(quantile(ex, c(0., 0.25, 0.5, 0.75, 0.99, 1.0), na.rm=T))
+	LogC <- (qx[5] > 100) ||
+			(qx[6]-qx[1] > 50 && qx[2] > 0) ||
+			(qx[2] > 0 && qx[2] < 1 && qx[4] > 1 && qx[4] < 2)
+	if (LogC) { ex[which(ex <= 0)] <- NaN
+		return (log2(ex)) } else {
+		return (ex)
+	}
+}
+
+if (transformation=="auto"){
+	exprs(eset)=normalization(eset)
+} else if (transformation=="yes"){
+	exprs(eset)=log2(exprs(eset))			
+}
+
+matrixData=exprs(eset)
+write.table(matrixData,col.names=NA,row.names=TRUE,sep="\t",file=GEOQueryData)
+
+#Construcion of condition file
+#if there is data in "source_name_ch1" field, we keep this data as a condition
+#else we keep the "description" field data.
+if (length(unique(tolower(pData(data1[[1]])["source_name_ch1"][,1])))>1)
+{
+	conditions=pData(data1[[1]])["source_name_ch1"]
+	description=paste0(as.vector(pData(data1[[1]])["geo_accession"][,1]), " ",as.vector(pData(data1[[1]])["title"][,1]), " ", as.vector(conditions[,1]))
+}	else
+{
+	conditions=pData(data1[[1]])["description"]
+	description=paste0(as.vector(pData(data1[[1]])["geo_accession"][,1]), " ",as.vector(pData(data1[[1]])["title"][,1]), " ", as.vector(conditions[,1]))
+}
+
+conditions[,1]=tolower(conditions[,1])
+pData(eset)["source_name_ch1"]=conditions
+
+write.table(cbind(conditions,description),quote = FALSE,col.names = FALSE, row.names=TRUE,file=conditionFile,sep="\t")
+save(eset,conditions,file=GEOQueryRData)
\ No newline at end of file