Mercurial > repos > sblanck > smagexp
annotate GEOQuery.R @ 0:1024245abc70 draft
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
author | sblanck |
---|---|
date | Thu, 22 Feb 2018 08:38:22 -0500 |
parents | |
children |
rev | line source |
---|---|
0
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
1 #!/usr/bin/env Rscript |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
2 # setup R error handling to go to stderr |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
3 options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
4 |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
5 # we need that to not crash galaxy with an UTF8 error on German LC settings. |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
6 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
7 |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
8 library("optparse") |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
9 |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
10 ##### Read options |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
11 option_list=list( |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
12 make_option("--id",type="character",default=NULL,help="GSE ID from GEO databse (required)"), |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
13 make_option("--transformation",type="character",default=NULL,help="log2 transformation (required)"), |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
14 make_option("--data",type="character",default=NULL,help="A table containing the expression data"), |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
15 make_option("--rdata",type="character",default="NULL",help="rdata object containing eset object"), |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
16 make_option("--conditions",type="character",default=NULL,help="Text file summarizing conditions of the experiment") |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
17 |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
18 ); |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
19 |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
20 opt_parser = OptionParser(option_list=option_list); |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
21 opt = parse_args(opt_parser); |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
22 |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
23 if(is.null(opt$id)){ |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
24 print_help(opt_parser) |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
25 stop("GEOdata id required.", call.=FALSE) |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
26 } |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
27 |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
28 #loading libraries |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
29 suppressPackageStartupMessages(require(GEOquery)) |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
30 |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
31 GEOQueryID=opt$id |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
32 GEOQueryData=opt$data |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
33 GEOQueryRData=opt$rdata |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
34 conditionFile=opt$conditions |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
35 transformation=opt$transformation |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
36 |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
37 data1=getGEO(GEOQueryID) |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
38 eset=data1[[1]] |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
39 |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
40 #check if datas are in log2 space |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
41 normalization<-function(data){ |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
42 ex <- exprs(data) |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
43 qx <- as.numeric(quantile(ex, c(0., 0.25, 0.5, 0.75, 0.99, 1.0), na.rm=T)) |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
44 LogC <- (qx[5] > 100) || |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
45 (qx[6]-qx[1] > 50 && qx[2] > 0) || |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
46 (qx[2] > 0 && qx[2] < 1 && qx[4] > 1 && qx[4] < 2) |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
47 if (LogC) { ex[which(ex <= 0)] <- NaN |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
48 return (log2(ex)) } else { |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
49 return (ex) |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
50 } |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
51 } |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
52 |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
53 if (transformation=="auto"){ |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
54 exprs(eset)=normalization(eset) |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
55 } else if (transformation=="yes"){ |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
56 exprs(eset)=log2(exprs(eset)) |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
57 } |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
58 |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
59 matrixData=exprs(eset) |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
60 write.table(matrixData,col.names=NA,row.names=TRUE,sep="\t",file=GEOQueryData) |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
61 |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
62 #Construcion of condition file |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
63 #if there is data in "source_name_ch1" field, we keep this data as a condition |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
64 #else we keep the "description" field data. |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
65 if (length(unique(tolower(pData(data1[[1]])["source_name_ch1"][,1])))>1) |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
66 { |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
67 conditions=pData(data1[[1]])["source_name_ch1"] |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
68 description=paste0(as.vector(pData(data1[[1]])["geo_accession"][,1]), " ",as.vector(pData(data1[[1]])["title"][,1]), " ", as.vector(conditions[,1])) |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
69 } else |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
70 { |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
71 conditions=pData(data1[[1]])["description"] |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
72 description=paste0(as.vector(pData(data1[[1]])["geo_accession"][,1]), " ",as.vector(pData(data1[[1]])["title"][,1]), " ", as.vector(conditions[,1])) |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
73 } |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
74 |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
75 conditions[,1]=tolower(conditions[,1]) |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
76 pData(eset)["source_name_ch1"]=conditions |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
77 |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
78 write.table(cbind(conditions,description),quote = FALSE,col.names = FALSE, row.names=TRUE,file=conditionFile,sep="\t") |
1024245abc70
planemo upload for repository https://github.com/sblanck/smagexp/tree/master/smagexp_tools commit 5974f806f344dbcc384b931492d7f023bfbbe03b
sblanck
parents:
diff
changeset
|
79 save(eset,conditions,file=GEOQueryRData) |