annotate GEOQuery.R @ 8:e4e6e583b8d9 draft default tip

planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit ad301f84e16455ca3886e9b56e908180dfd66d4f
author sblanck
date Thu, 26 Jul 2018 08:17:53 -0400
parents 1024245abc70
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
1 #!/usr/bin/env Rscript
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
2 # setup R error handling to go to stderr
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
3 options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
4
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
5 # we need that to not crash galaxy with an UTF8 error on German LC settings.
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
6 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
7
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
8 library("optparse")
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
9
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
10 ##### Read options
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
11 option_list=list(
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
12 make_option("--id",type="character",default=NULL,help="GSE ID from GEO databse (required)"),
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
13 make_option("--transformation",type="character",default=NULL,help="log2 transformation (required)"),
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
14 make_option("--data",type="character",default=NULL,help="A table containing the expression data"),
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
15 make_option("--rdata",type="character",default="NULL",help="rdata object containing eset object"),
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
16 make_option("--conditions",type="character",default=NULL,help="Text file summarizing conditions of the experiment")
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
17
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
18 );
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
19
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
20 opt_parser = OptionParser(option_list=option_list);
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
21 opt = parse_args(opt_parser);
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
22
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
23 if(is.null(opt$id)){
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
24 print_help(opt_parser)
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
25 stop("GEOdata id required.", call.=FALSE)
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
26 }
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
27
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
28 #loading libraries
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
29 suppressPackageStartupMessages(require(GEOquery))
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
30
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
31 GEOQueryID=opt$id
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
32 GEOQueryData=opt$data
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
33 GEOQueryRData=opt$rdata
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
34 conditionFile=opt$conditions
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
35 transformation=opt$transformation
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
36
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
37 data1=getGEO(GEOQueryID)
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
38 eset=data1[[1]]
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
39
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
40 #check if datas are in log2 space
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
41 normalization<-function(data){
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
42 ex <- exprs(data)
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
43 qx <- as.numeric(quantile(ex, c(0., 0.25, 0.5, 0.75, 0.99, 1.0), na.rm=T))
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
44 LogC <- (qx[5] > 100) ||
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
45 (qx[6]-qx[1] > 50 && qx[2] > 0) ||
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
46 (qx[2] > 0 && qx[2] < 1 && qx[4] > 1 && qx[4] < 2)
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
47 if (LogC) { ex[which(ex <= 0)] <- NaN
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
48 return (log2(ex)) } else {
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
49 return (ex)
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
50 }
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
51 }
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
52
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
53 if (transformation=="auto"){
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
54 exprs(eset)=normalization(eset)
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
55 } else if (transformation=="yes"){
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
56 exprs(eset)=log2(exprs(eset))
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
57 }
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
58
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
59 matrixData=exprs(eset)
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
60 write.table(matrixData,col.names=NA,row.names=TRUE,sep="\t",file=GEOQueryData)
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
61
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
62 #Construcion of condition file
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
63 #if there is data in "source_name_ch1" field, we keep this data as a condition
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
64 #else we keep the "description" field data.
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
65 if (length(unique(tolower(pData(data1[[1]])["source_name_ch1"][,1])))>1)
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
66 {
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
67 conditions=pData(data1[[1]])["source_name_ch1"]
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
68 description=paste0(as.vector(pData(data1[[1]])["geo_accession"][,1]), " ",as.vector(pData(data1[[1]])["title"][,1]), " ", as.vector(conditions[,1]))
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
69 } else
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
70 {
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
71 conditions=pData(data1[[1]])["description"]
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
72 description=paste0(as.vector(pData(data1[[1]])["geo_accession"][,1]), " ",as.vector(pData(data1[[1]])["title"][,1]), " ", as.vector(conditions[,1]))
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
73 }
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
74
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
75 conditions[,1]=tolower(conditions[,1])
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
76 pData(eset)["source_name_ch1"]=conditions
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
77
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
78 write.table(cbind(conditions,description),quote = FALSE,col.names = FALSE, row.names=TRUE,file=conditionFile,sep="\t")
1024245abc70 planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff changeset
79 save(eset,conditions,file=GEOQueryRData)