Mercurial > repos > yguitton > metams_rungc
comparison metams.r @ 0:2066efbafd7c draft
planemo upload for repository https://github.com/workflow4metabolomics/metaMS commit 6384fcf4496a64affe0b8a173c3f7ea09a275ffb
author | yguitton |
---|---|
date | Wed, 13 Jul 2016 06:46:45 -0400 |
parents | |
children | 142fbe102a9d |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2066efbafd7c |
---|---|
1 #!/usr/local/public/bin/Rscript --vanilla --slave --no-site-file | |
2 # metams.r version="0.99.9" | |
3 #created by Yann GUITTON | |
4 | |
5 | |
6 #Redirect all stdout to the log file | |
7 log_file=file("metams.log", open = "wt") | |
8 sink(log_file) | |
9 sink(log_file, type = "out") | |
10 | |
11 library(metaMS) | |
12 library(batch) #necessary for parseCommandArgs function | |
13 | |
14 source_local <- function(fname) { | |
15 argv <- commandArgs(trailingOnly = FALSE) | |
16 base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)) | |
17 source(paste(base_dir, fname, sep="/")) | |
18 } | |
19 print("step1") | |
20 | |
21 | |
22 listArguments = parseCommandArgs(evaluate=FALSE) #interpretation of arguments given in command line as an R list of objects | |
23 print("new version 8") | |
24 | |
25 | |
26 print(listArguments) | |
27 | |
28 if (listArguments[["ri"]]!="NULL"){ | |
29 RIarg=read.table(listArguments[["ri"]], h=T) | |
30 colnames(RIarg)<-c("rt","RI") | |
31 # print(RIarg) | |
32 } else { | |
33 RIarg = NULL | |
34 # cat("Ri= ",RIarg) | |
35 } | |
36 | |
37 | |
38 DBarg=listArguments[["db"]] | |
39 # if (listArguments[["use_db"]]!="NULL"){ | |
40 if (DBarg!="NULL"){ | |
41 DBarg=listArguments[["db"]] | |
42 cat("Db= ",DBarg) | |
43 } else { | |
44 DBarg = NULL | |
45 cat("NO Db : ",DBarg) | |
46 } | |
47 | |
48 | |
49 | |
50 | |
51 #for unknown EIC printing | |
52 | |
53 if (listArguments[["unkn"]]!="NULL") { | |
54 unknarg<-"" | |
55 } else { | |
56 unknarg<-listArguments[["unkn"]] | |
57 } | |
58 | |
59 print(paste("Unkn:",unknarg)) | |
60 | |
61 #remove unused arguments | |
62 listArguments[["unkn"]]<-NULL | |
63 listArguments[["db"]] <- NULL | |
64 listArguments[["ri"]] <- NULL | |
65 | |
66 print(" step2") | |
67 | |
68 #runGC accept either a list of files a zip folder or an xset object from xcms.xcmsSet tool | |
69 | |
70 #CASE 2 from zip file | |
71 #necessary to unzip .zip file uploaded to Galaxy | |
72 #thanks to .zip file it's possible to upload many file as the same time conserving the tree hierarchy of directories | |
73 | |
74 if (!is.null(listArguments[["zipfile"]])){ | |
75 print("CASE 2 from zip file") | |
76 directory=unzip(listArguments[["zipfile"]]) | |
77 listArguments=append(list(directory), listArguments) | |
78 | |
79 metams_zip_file= listArguments[["zipfile"]] | |
80 listArguments[["zipfile"]]=NULL | |
81 | |
82 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") | |
83 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") | |
84 #samples<-list.files(path=directory, pattern=filepattern, all.files=FALSE,recursive=TRUE,full.names=TRUE,ignore.case=FALSE) | |
85 samples<-directory[grep(filepattern, directory)] | |
86 # cat(samples) #debugg | |
87 #create sampleMetadata, get sampleMetadata and class | |
88 sampleMetadata<-xcms:::phenoDataFromPaths(samples) | |
89 sampleMetadata<-cbind(sampleMetadata=rownames(sampleMetadata),sampleMetadata) | |
90 row.names(sampleMetadata)<-NULL | |
91 } else { | |
92 metams_zip_file="" | |
93 } | |
94 | |
95 #CASE 3 from xset is an .RData file necessary to use the xcmsSet object generated by xcms.xcmsSet given by previous tools | |
96 if (!is.null(listArguments[["xset"]])){ | |
97 print("CASE 3 from xset") | |
98 require(CAMERA, quietly = TRUE) | |
99 load(listArguments[["xset"]]) | |
100 cat(class(xset)) | |
101 xsetparam=listArguments[["xset"]] | |
102 listArguments[["xset"]]=NULL #remove xset from arguments | |
103 | |
104 #xset from xcms.xcmsSet is not well formatted for metaMS this function do the formatting | |
105 if (class(xset)=="xcmsSet"){ | |
106 | |
107 #treat case where Rdata came from xcmsSet with zip file entry | |
108 if(exists("zip_file")){ | |
109 if (zip_file!=""){ | |
110 directory=unzip(zip_file) | |
111 print("CASE 3 from xset and with ZIP input") | |
112 | |
113 } else { | |
114 print("CASE 3 from xset and with LIBRARY input") | |
115 } | |
116 } | |
117 #end zip file case | |
118 if (length(xset@rt$raw)>1){ | |
119 #create an exceptable list of xset for metaMS | |
120 xset.l<-vector("list",length(xset@rt$raw)) | |
121 for (i in 1:length(xset@rt$raw)){ | |
122 xset.l[[i]]<-new("xcmsSet") | |
123 xset.l[[i]]@peaks<-xset@peaks[which(xset@peaks[,"sample"]==i),] | |
124 df<-data.frame(class=xset@phenoData[i,]) | |
125 rownames(df)<-rownames(xset@phenoData)[i] | |
126 xset.l[[i]]@phenoData<-df | |
127 xset.l[[i]]@rt$raw<-xset@rt$raw[[i]] | |
128 xset.l[[i]]@rt$corrected<-xset@rt$corrected[[i]] | |
129 xset.l[[i]]@filepaths<-xset@filepaths[i] | |
130 xset.l[[i]]@profinfo<-xset@profinfo | |
131 } | |
132 } else { | |
133 xset.l<-xset | |
134 } | |
135 | |
136 } | |
137 #create sampleMetadata, get sampleMetadata and class | |
138 sampleMetadata<-xset@phenoData | |
139 sampleMetadata<-cbind(sampleMetadata=rownames(sampleMetadata),sampleMetadata) | |
140 row.names(sampleMetadata)<-NULL | |
141 samples<-xset@filepaths | |
142 } else { | |
143 xsetparam<-NULL | |
144 } | |
145 | |
146 | |
147 | |
148 #Import the different functions | |
149 source_local("lib_metams.r") | |
150 | |
151 #load function for ploting EICs and pspectra | |
152 # source_local("plotUnknown.r") | |
153 | |
154 #settings process | |
155 if (listArguments[["settings"]]=="default") { | |
156 data(FEMsettings) | |
157 if (listArguments[["rtrange"]][1]!="NULL") { | |
158 rtrange=listArguments[["rtrange"]] | |
159 } else { | |
160 rtrange=NULL | |
161 } | |
162 | |
163 if (!is.null(DBarg)){ | |
164 manual <- read.msp(DBarg) | |
165 DBarg <- createSTDdbGC(stdInfo = NULL, settings = TSQXLS.GC, manualDB = manual) | |
166 } | |
167 nSlaves=listArguments[["nSlaves"]] | |
168 if(!metams_zip_file=="") { | |
169 resGC<-runGC(files=samples,settings=TSQXLS.GC, rtrange=rtrange, DB= DBarg, removeArtefacts = TRUE, findUnknowns = TRUE, returnXset = TRUE, RIstandards = RIarg, nSlaves = nSlaves) #default settings for GC from Wehrens et al | |
170 } | |
171 if(!is.null(xsetparam)){ | |
172 settingslist=TSQXLS.GC | |
173 if (class(xset.l[[1]])!="xsAnnotate"){ | |
174 cat("Process xsAnnotate") | |
175 xset<-lapply(xset.l, | |
176 function(x) { | |
177 y <- xsAnnotate(x, sample = 1) | |
178 capture.output(z <- groupFWHM(y, perfwhm = settingslist@CAMERA$perfwhm), | |
179 file = NULL) | |
180 z}) | |
181 | |
182 } | |
183 | |
184 resGC<-runGC(xset=xset,settings=TSQXLS.GC, rtrange=rtrange, DB= DBarg, removeArtefacts = TRUE, findUnknowns = TRUE, returnXset = TRUE, RIstandards = RIarg, nSlaves = nSlaves) #default settings for GC from Wehrens et al | |
185 } | |
186 } | |
187 | |
188 if (listArguments[["settings"]]=="User_defined") { | |
189 listArguments[["settings"]]=NULL #delete from the list of arguments | |
190 fwhmparam=listArguments[["fwhm"]] | |
191 rtdiffparam=listArguments[["rtdiff"]] | |
192 #RIdiffparam=listArguments[["RIdiff"]] #only if timeComparison = "RI" | |
193 minfeatparam=listArguments[["minfeat"]] | |
194 simthreshparam=listArguments[["simthreshold"]] | |
195 minclassfractionparam=listArguments[["minclassfraction"]] | |
196 minclasssizeparam=listArguments[["minclasssize"]] | |
197 if (listArguments[["rtrange"]]!="NULL") { | |
198 rtrange=listArguments[["rtrange"]] | |
199 cat("rtrange= ",rtrange) | |
200 } else { | |
201 rtrange=NULL | |
202 cat("rtrange= ",rtrange) | |
203 } | |
204 nSlaves=listArguments[["nSlaves"]] | |
205 | |
206 GALAXY.GC <- metaMSsettings("protocolName" = "GALAXY.GC", | |
207 "chrom" = "GC", | |
208 PeakPicking = list( | |
209 method = "matchedFilter", | |
210 step = 0.5, | |
211 steps = 2, | |
212 mzdiff = .5, | |
213 fwhm = fwhmparam, | |
214 snthresh = 2, | |
215 max = 500), | |
216 CAMERA = list(perfwhm = 1)) | |
217 | |
218 metaSetting(GALAXY.GC, "DBconstruction") <- list( | |
219 minintens = 0.0, | |
220 rttol = rtdiffparam, | |
221 intensityMeasure = "maxo", | |
222 DBthreshold = .80, | |
223 minfeat = minfeatparam) | |
224 metaSetting(GALAXY.GC, "match2DB") <- list( | |
225 simthresh = simthreshparam, | |
226 timeComparison = "rt", | |
227 rtdiff = rtdiffparam, | |
228 RIdiff = 5, | |
229 minfeat = minfeatparam) | |
230 #to be used when DB option will be available | |
231 # metaSetting(GALAXY.GC, "matchIrrelevants") <- list( | |
232 # irrelevantClasses = c("Bleeding", "Plasticizers"), | |
233 # timeComparison = "rt", | |
234 # RIdiff = 2, | |
235 # rtdiff = rtdiffparam, | |
236 # simthresh = simthreshparam) | |
237 metaSetting(GALAXY.GC, "betweenSamples") <- list( | |
238 min.class.fraction = minclassfractionparam, | |
239 min.class.size = minclasssizeparam, | |
240 timeComparison = "rt", | |
241 rtdiff = rtdiffparam, | |
242 RIdiff = 2, | |
243 simthresh = simthreshparam) | |
244 | |
245 | |
246 # files, xset, settings, rtrange = NULL, DB = NULL, | |
247 # removeArtefacts = TRUE, findUnknowns = nexp > 1, | |
248 # returnXset = FALSE, RIstandards = NULL, nSlaves = 0 | |
249 if (!is.null(DBarg)){ | |
250 manual <- read.msp(DBarg) | |
251 DBarg <- createSTDdbGC(stdInfo = NULL, settings = GALAXY.GC, manualDB = manual) | |
252 } | |
253 if (!metams_zip_file=="") { | |
254 resGC<-runGC(files=samples,settings=GALAXY.GC,rtrange = rtrange, DB= DBarg , removeArtefacts = TRUE, findUnknowns = TRUE, returnXset = TRUE, RIstandards = RIarg, nSlaves = nSlaves) | |
255 } | |
256 if(!is.null(xsetparam)) { | |
257 settingslist=GALAXY.GC | |
258 if (class(xset.l[[1]])!="xsAnnotate") { | |
259 print("Process xsAnnotate") | |
260 xset<-lapply(xset.l, | |
261 function(x) { | |
262 y <- xsAnnotate(x, sample = 1) | |
263 capture.output(z <- groupFWHM(y, perfwhm = settingslist@CAMERA$perfwhm), | |
264 file = NULL) | |
265 z}) | |
266 | |
267 } | |
268 resGC<-runGC(xset=xset,settings=GALAXY.GC,rtrange = rtrange, DB= DBarg, removeArtefacts = TRUE, findUnknowns = TRUE, returnXset = TRUE, RIstandards = RIarg, nSlaves = nSlaves) | |
269 } | |
270 } | |
271 | |
272 #peakTable ordered by rt | |
273 peaktable<-resGC$PeakTable<-resGC$PeakTable[order(resGC$PeakTable[,"rt"]),] | |
274 write.table(peaktable, file="peaktable.tsv", sep="\t", row.names=FALSE) | |
275 | |
276 #peakTable for PCA | |
277 #dataMatrix | |
278 dataMatrix<-cbind(Name=resGC$PeakTable[,"Name"],resGC$PeakTable[,(colnames(resGC$PeakTable) %in% sampleMetadata[,1])]) | |
279 rownames(dataMatrix)<-NULL | |
280 write.table(dataMatrix, file="dataMatrix.tsv", sep="\t", row.names=FALSE, quote=FALSE) | |
281 | |
282 #variableMetadata | |
283 variableMetadata<-resGC$PeakTable[,!(colnames(resGC$PeakTable) %in% sampleMetadata[,1])] | |
284 rownames(variableMetadata)<-NULL | |
285 write.table(variableMetadata, file="variableMetadata.tsv", sep="\t", row.names=FALSE, quote=FALSE) | |
286 | |
287 #sampleMetadata | |
288 write.table(sampleMetadata, file="sampleMetadata.tsv", sep="\t", row.names=FALSE, quote=FALSE) | |
289 | |
290 #peak spectrum as MSP for DB search | |
291 write.msp(resGC$PseudoSpectra, file="peakspectra.msp", newFile = TRUE) | |
292 | |
293 #TIC/BPC picture generation | |
294 # use files as entry not xset that do not exist | |
295 | |
296 print("TICs") | |
297 #Use getTIC2s and getBPC2s because getTICs and getBPCs can exists due to transfert of function in Rdata | |
298 b<-getTIC2s(files = samples, pdfname="TICs_raw.pdf", rt="raw") | |
299 | |
300 print("BPCs") | |
301 c<-getBPC2s(files=samples, rt="raw", pdfname="BPCs_raw.pdf") | |
302 | |
303 print("Step QC plot") | |
304 | |
305 #Quality controls plots but only working in R (don't know why) | |
306 a<-plotUnknowns(resGC=resGC, unkn=unknarg) #use unknparam value | |
307 | |
308 # create a mergpdf | |
309 | |
310 #test | |
311 system(paste('gs -o TICsBPCs_merged.pdf -sDEVICE=pdfwrite -dPDFSETTINGS=/prepress *Cs_raw.pdf')) | |
312 system(paste('gs -o GCMS_EIC.pdf -sDEVICE=pdfwrite -dPDFSETTINGS=/prepress Unknown_*')) | |
313 | |
314 | |
315 ############################################TEST FUNCTION START################################################################# | |
316 | |
317 ############################################TEST FUNCTION END################################################################# | |
318 | |
319 | |
320 | |
321 | |
322 #zip files of all runGC outputs | |
323 | |
324 system(paste('ls . | grep -e "tsv$" -e "msp$" -e "GCMS_" | zip -r -@ "rungc.zip"')) | |
325 | |
326 | |
327 #delete the parameters to avoid the passage to the next tool in .RData image | |
328 rm(listArguments) | |
329 | |
330 #saving R data in .Rdata file to save the variables used in the present tool | |
331 save.image(paste("runGC","RData",sep=".")) | |
332 |