comparison metams.r @ 0:2066efbafd7c draft

planemo upload for repository https://github.com/workflow4metabolomics/metaMS commit 6384fcf4496a64affe0b8a173c3f7ea09a275ffb
author yguitton
date Wed, 13 Jul 2016 06:46:45 -0400
parents
children 142fbe102a9d
comparison
equal deleted inserted replaced
-1:000000000000 0:2066efbafd7c
1 #!/usr/local/public/bin/Rscript --vanilla --slave --no-site-file
2 # metams.r version="0.99.9"
3 #created by Yann GUITTON
4
5
6 #Redirect all stdout to the log file
7 log_file=file("metams.log", open = "wt")
8 sink(log_file)
9 sink(log_file, type = "out")
10
11 library(metaMS)
12 library(batch) #necessary for parseCommandArgs function
13
14 source_local <- function(fname) {
15 argv <- commandArgs(trailingOnly = FALSE)
16 base_dir <- dirname(substring(argv[grep("--file=", argv)], 8))
17 source(paste(base_dir, fname, sep="/"))
18 }
19 print("step1")
20
21
22 listArguments = parseCommandArgs(evaluate=FALSE) #interpretation of arguments given in command line as an R list of objects
23 print("new version 8")
24
25
26 print(listArguments)
27
28 if (listArguments[["ri"]]!="NULL"){
29 RIarg=read.table(listArguments[["ri"]], h=T)
30 colnames(RIarg)<-c("rt","RI")
31 # print(RIarg)
32 } else {
33 RIarg = NULL
34 # cat("Ri= ",RIarg)
35 }
36
37
38 DBarg=listArguments[["db"]]
39 # if (listArguments[["use_db"]]!="NULL"){
40 if (DBarg!="NULL"){
41 DBarg=listArguments[["db"]]
42 cat("Db= ",DBarg)
43 } else {
44 DBarg = NULL
45 cat("NO Db : ",DBarg)
46 }
47
48
49
50
51 #for unknown EIC printing
52
53 if (listArguments[["unkn"]]!="NULL") {
54 unknarg<-""
55 } else {
56 unknarg<-listArguments[["unkn"]]
57 }
58
59 print(paste("Unkn:",unknarg))
60
61 #remove unused arguments
62 listArguments[["unkn"]]<-NULL
63 listArguments[["db"]] <- NULL
64 listArguments[["ri"]] <- NULL
65
66 print(" step2")
67
68 #runGC accept either a list of files a zip folder or an xset object from xcms.xcmsSet tool
69
70 #CASE 2 from zip file
71 #necessary to unzip .zip file uploaded to Galaxy
72 #thanks to .zip file it's possible to upload many file as the same time conserving the tree hierarchy of directories
73
74 if (!is.null(listArguments[["zipfile"]])){
75 print("CASE 2 from zip file")
76 directory=unzip(listArguments[["zipfile"]])
77 listArguments=append(list(directory), listArguments)
78
79 metams_zip_file= listArguments[["zipfile"]]
80 listArguments[["zipfile"]]=NULL
81
82 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
83 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|")
84 #samples<-list.files(path=directory, pattern=filepattern, all.files=FALSE,recursive=TRUE,full.names=TRUE,ignore.case=FALSE)
85 samples<-directory[grep(filepattern, directory)]
86 # cat(samples) #debugg
87 #create sampleMetadata, get sampleMetadata and class
88 sampleMetadata<-xcms:::phenoDataFromPaths(samples)
89 sampleMetadata<-cbind(sampleMetadata=rownames(sampleMetadata),sampleMetadata)
90 row.names(sampleMetadata)<-NULL
91 } else {
92 metams_zip_file=""
93 }
94
95 #CASE 3 from xset is an .RData file necessary to use the xcmsSet object generated by xcms.xcmsSet given by previous tools
96 if (!is.null(listArguments[["xset"]])){
97 print("CASE 3 from xset")
98 require(CAMERA, quietly = TRUE)
99 load(listArguments[["xset"]])
100 cat(class(xset))
101 xsetparam=listArguments[["xset"]]
102 listArguments[["xset"]]=NULL #remove xset from arguments
103
104 #xset from xcms.xcmsSet is not well formatted for metaMS this function do the formatting
105 if (class(xset)=="xcmsSet"){
106
107 #treat case where Rdata came from xcmsSet with zip file entry
108 if(exists("zip_file")){
109 if (zip_file!=""){
110 directory=unzip(zip_file)
111 print("CASE 3 from xset and with ZIP input")
112
113 } else {
114 print("CASE 3 from xset and with LIBRARY input")
115 }
116 }
117 #end zip file case
118 if (length(xset@rt$raw)>1){
119 #create an exceptable list of xset for metaMS
120 xset.l<-vector("list",length(xset@rt$raw))
121 for (i in 1:length(xset@rt$raw)){
122 xset.l[[i]]<-new("xcmsSet")
123 xset.l[[i]]@peaks<-xset@peaks[which(xset@peaks[,"sample"]==i),]
124 df<-data.frame(class=xset@phenoData[i,])
125 rownames(df)<-rownames(xset@phenoData)[i]
126 xset.l[[i]]@phenoData<-df
127 xset.l[[i]]@rt$raw<-xset@rt$raw[[i]]
128 xset.l[[i]]@rt$corrected<-xset@rt$corrected[[i]]
129 xset.l[[i]]@filepaths<-xset@filepaths[i]
130 xset.l[[i]]@profinfo<-xset@profinfo
131 }
132 } else {
133 xset.l<-xset
134 }
135
136 }
137 #create sampleMetadata, get sampleMetadata and class
138 sampleMetadata<-xset@phenoData
139 sampleMetadata<-cbind(sampleMetadata=rownames(sampleMetadata),sampleMetadata)
140 row.names(sampleMetadata)<-NULL
141 samples<-xset@filepaths
142 } else {
143 xsetparam<-NULL
144 }
145
146
147
148 #Import the different functions
149 source_local("lib_metams.r")
150
151 #load function for ploting EICs and pspectra
152 # source_local("plotUnknown.r")
153
154 #settings process
155 if (listArguments[["settings"]]=="default") {
156 data(FEMsettings)
157 if (listArguments[["rtrange"]][1]!="NULL") {
158 rtrange=listArguments[["rtrange"]]
159 } else {
160 rtrange=NULL
161 }
162
163 if (!is.null(DBarg)){
164 manual <- read.msp(DBarg)
165 DBarg <- createSTDdbGC(stdInfo = NULL, settings = TSQXLS.GC, manualDB = manual)
166 }
167 nSlaves=listArguments[["nSlaves"]]
168 if(!metams_zip_file=="") {
169 resGC<-runGC(files=samples,settings=TSQXLS.GC, rtrange=rtrange, DB= DBarg, removeArtefacts = TRUE, findUnknowns = TRUE, returnXset = TRUE, RIstandards = RIarg, nSlaves = nSlaves) #default settings for GC from Wehrens et al
170 }
171 if(!is.null(xsetparam)){
172 settingslist=TSQXLS.GC
173 if (class(xset.l[[1]])!="xsAnnotate"){
174 cat("Process xsAnnotate")
175 xset<-lapply(xset.l,
176 function(x) {
177 y <- xsAnnotate(x, sample = 1)
178 capture.output(z <- groupFWHM(y, perfwhm = settingslist@CAMERA$perfwhm),
179 file = NULL)
180 z})
181
182 }
183
184 resGC<-runGC(xset=xset,settings=TSQXLS.GC, rtrange=rtrange, DB= DBarg, removeArtefacts = TRUE, findUnknowns = TRUE, returnXset = TRUE, RIstandards = RIarg, nSlaves = nSlaves) #default settings for GC from Wehrens et al
185 }
186 }
187
188 if (listArguments[["settings"]]=="User_defined") {
189 listArguments[["settings"]]=NULL #delete from the list of arguments
190 fwhmparam=listArguments[["fwhm"]]
191 rtdiffparam=listArguments[["rtdiff"]]
192 #RIdiffparam=listArguments[["RIdiff"]] #only if timeComparison = "RI"
193 minfeatparam=listArguments[["minfeat"]]
194 simthreshparam=listArguments[["simthreshold"]]
195 minclassfractionparam=listArguments[["minclassfraction"]]
196 minclasssizeparam=listArguments[["minclasssize"]]
197 if (listArguments[["rtrange"]]!="NULL") {
198 rtrange=listArguments[["rtrange"]]
199 cat("rtrange= ",rtrange)
200 } else {
201 rtrange=NULL
202 cat("rtrange= ",rtrange)
203 }
204 nSlaves=listArguments[["nSlaves"]]
205
206 GALAXY.GC <- metaMSsettings("protocolName" = "GALAXY.GC",
207 "chrom" = "GC",
208 PeakPicking = list(
209 method = "matchedFilter",
210 step = 0.5,
211 steps = 2,
212 mzdiff = .5,
213 fwhm = fwhmparam,
214 snthresh = 2,
215 max = 500),
216 CAMERA = list(perfwhm = 1))
217
218 metaSetting(GALAXY.GC, "DBconstruction") <- list(
219 minintens = 0.0,
220 rttol = rtdiffparam,
221 intensityMeasure = "maxo",
222 DBthreshold = .80,
223 minfeat = minfeatparam)
224 metaSetting(GALAXY.GC, "match2DB") <- list(
225 simthresh = simthreshparam,
226 timeComparison = "rt",
227 rtdiff = rtdiffparam,
228 RIdiff = 5,
229 minfeat = minfeatparam)
230 #to be used when DB option will be available
231 # metaSetting(GALAXY.GC, "matchIrrelevants") <- list(
232 # irrelevantClasses = c("Bleeding", "Plasticizers"),
233 # timeComparison = "rt",
234 # RIdiff = 2,
235 # rtdiff = rtdiffparam,
236 # simthresh = simthreshparam)
237 metaSetting(GALAXY.GC, "betweenSamples") <- list(
238 min.class.fraction = minclassfractionparam,
239 min.class.size = minclasssizeparam,
240 timeComparison = "rt",
241 rtdiff = rtdiffparam,
242 RIdiff = 2,
243 simthresh = simthreshparam)
244
245
246 # files, xset, settings, rtrange = NULL, DB = NULL,
247 # removeArtefacts = TRUE, findUnknowns = nexp > 1,
248 # returnXset = FALSE, RIstandards = NULL, nSlaves = 0
249 if (!is.null(DBarg)){
250 manual <- read.msp(DBarg)
251 DBarg <- createSTDdbGC(stdInfo = NULL, settings = GALAXY.GC, manualDB = manual)
252 }
253 if (!metams_zip_file=="") {
254 resGC<-runGC(files=samples,settings=GALAXY.GC,rtrange = rtrange, DB= DBarg , removeArtefacts = TRUE, findUnknowns = TRUE, returnXset = TRUE, RIstandards = RIarg, nSlaves = nSlaves)
255 }
256 if(!is.null(xsetparam)) {
257 settingslist=GALAXY.GC
258 if (class(xset.l[[1]])!="xsAnnotate") {
259 print("Process xsAnnotate")
260 xset<-lapply(xset.l,
261 function(x) {
262 y <- xsAnnotate(x, sample = 1)
263 capture.output(z <- groupFWHM(y, perfwhm = settingslist@CAMERA$perfwhm),
264 file = NULL)
265 z})
266
267 }
268 resGC<-runGC(xset=xset,settings=GALAXY.GC,rtrange = rtrange, DB= DBarg, removeArtefacts = TRUE, findUnknowns = TRUE, returnXset = TRUE, RIstandards = RIarg, nSlaves = nSlaves)
269 }
270 }
271
272 #peakTable ordered by rt
273 peaktable<-resGC$PeakTable<-resGC$PeakTable[order(resGC$PeakTable[,"rt"]),]
274 write.table(peaktable, file="peaktable.tsv", sep="\t", row.names=FALSE)
275
276 #peakTable for PCA
277 #dataMatrix
278 dataMatrix<-cbind(Name=resGC$PeakTable[,"Name"],resGC$PeakTable[,(colnames(resGC$PeakTable) %in% sampleMetadata[,1])])
279 rownames(dataMatrix)<-NULL
280 write.table(dataMatrix, file="dataMatrix.tsv", sep="\t", row.names=FALSE, quote=FALSE)
281
282 #variableMetadata
283 variableMetadata<-resGC$PeakTable[,!(colnames(resGC$PeakTable) %in% sampleMetadata[,1])]
284 rownames(variableMetadata)<-NULL
285 write.table(variableMetadata, file="variableMetadata.tsv", sep="\t", row.names=FALSE, quote=FALSE)
286
287 #sampleMetadata
288 write.table(sampleMetadata, file="sampleMetadata.tsv", sep="\t", row.names=FALSE, quote=FALSE)
289
290 #peak spectrum as MSP for DB search
291 write.msp(resGC$PseudoSpectra, file="peakspectra.msp", newFile = TRUE)
292
293 #TIC/BPC picture generation
294 # use files as entry not xset that do not exist
295
296 print("TICs")
297 #Use getTIC2s and getBPC2s because getTICs and getBPCs can exists due to transfert of function in Rdata
298 b<-getTIC2s(files = samples, pdfname="TICs_raw.pdf", rt="raw")
299
300 print("BPCs")
301 c<-getBPC2s(files=samples, rt="raw", pdfname="BPCs_raw.pdf")
302
303 print("Step QC plot")
304
305 #Quality controls plots but only working in R (don't know why)
306 a<-plotUnknowns(resGC=resGC, unkn=unknarg) #use unknparam value
307
308 # create a mergpdf
309
310 #test
311 system(paste('gs -o TICsBPCs_merged.pdf -sDEVICE=pdfwrite -dPDFSETTINGS=/prepress *Cs_raw.pdf'))
312 system(paste('gs -o GCMS_EIC.pdf -sDEVICE=pdfwrite -dPDFSETTINGS=/prepress Unknown_*'))
313
314
315 ############################################TEST FUNCTION START#################################################################
316
317 ############################################TEST FUNCTION END#################################################################
318
319
320
321
322 #zip files of all runGC outputs
323
324 system(paste('ls . | grep -e "tsv$" -e "msp$" -e "GCMS_" | zip -r -@ "rungc.zip"'))
325
326
327 #delete the parameters to avoid the passage to the next tool in .RData image
328 rm(listArguments)
329
330 #saving R data in .Rdata file to save the variables used in the present tool
331 save.image(paste("runGC","RData",sep="."))
332