comparison lib_metams.r @ 0:2066efbafd7c draft

planemo upload for repository https://github.com/workflow4metabolomics/metaMS commit 6384fcf4496a64affe0b8a173c3f7ea09a275ffb
author yguitton
date Wed, 13 Jul 2016 06:46:45 -0400
parents
children c75532b75ba1
comparison
equal deleted inserted replaced
-1:000000000000 0:2066efbafd7c
1 # lib_metams.r version 0.99.6
2 # R function for metaMS runGC under W4M
3 # author Yann GUITTON CNRS IRISA/LINA Idealg project 2014-2015
4
5
6 ##ADDITIONS FROM Y. Guitton
7 getBPC <- function(file,rtcor=NULL, ...) {
8 object <- xcmsRaw(file)
9 sel <- profRange(object, ...)
10 cbind(if (is.null(rtcor)) object@scantime[sel$scanidx] else rtcor ,xcms:::colMax(object@env$profile[sel$massidx,sel$scanidx,drop=FALSE]))
11
12 }
13
14 getBPC2s <- function (files, pdfname="BPCs.pdf", rt = c("raw","corrected"), scanrange=NULL) {
15 require(xcms)
16
17
18
19 #create sampleMetadata, get sampleMetadata and class
20 sampleMetadata<-xcms:::phenoDataFromPaths(files)
21 class<-class<-as.vector(levels(sampleMetadata[,"class"])) #create phenoData like table
22 classnames<-vector("list",length(class))
23 for (i in 1:length(class)){
24 classnames[[i]]<-which( sampleMetadata[,1]==class[i])
25 }
26
27 N <- dim(sampleMetadata)[1]
28
29
30
31 TIC <- vector("list",N)
32
33
34 for (j in 1:N) {
35
36 cat(files[j],"\n")
37 TIC[[j]] <- getBPC(files[j])
38 #good for raw
39 # seems strange for corrected
40 #errors if scanrange used in xcmsSetgeneration
41 if (!is.null(xcmsSet) && rt == "corrected")
42 rtcor <- xcmsSet@rt$corrected[[j]] else
43 rtcor <- NULL
44 TIC[[j]] <- getBPC(files[j],rtcor=rtcor)
45 }
46
47 pdf(pdfname,w=16,h=10)
48 cols <- rainbow(N)
49 lty = 1:N
50 pch = 1:N
51 #search for max x and max y in BPCs
52 xlim = range(sapply(TIC, function(x) range(x[,1])))
53 ylim = range(sapply(TIC, function(x) range(x[,2])))
54 ylim = c(-ylim[2], ylim[2])
55
56
57 ##plot start
58
59 if (length(class)>2){
60 for (k in 1:(length(class)-1)){
61 for (l in (k+1):length(class)){
62 print(paste(class[k],"vs",class[l],sep=" "))
63 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC")
64 colvect<-NULL
65 for (j in 1:length(classnames[[k]])) {
66
67 tic <- TIC[[classnames[[k]][j]]]
68 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
69 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
70 colvect<-append(colvect,cols[classnames[[k]][j]])
71 }
72 for (j in 1:length(classnames[[l]])) {
73 # i=class2names[j]
74 tic <- TIC[[classnames[[l]][j]]]
75 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l")
76 colvect<-append(colvect,cols[classnames[[l]][j]])
77 }
78 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch)
79 }
80 }
81 }#end if length >2
82 if (length(class)==2){
83 k=1
84 l=2
85 colvect<-NULL
86 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k],"vs",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC")
87
88 for (j in 1:length(classnames[[k]])) {
89
90 tic <- TIC[[classnames[[k]][j]]]
91 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
92 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
93 colvect<-append(colvect,cols[classnames[[k]][j]])
94 }
95 for (j in 1:length(classnames[[l]])) {
96 # i=class2names[j]
97 tic <- TIC[[classnames[[l]][j]]]
98 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l")
99 colvect<-append(colvect,cols[classnames[[l]][j]])
100 }
101 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch)
102
103 }#end length ==2
104 if (length(class)==1){
105 k=1
106 ylim = range(sapply(TIC, function(x) range(x[,2])))
107 colvect<-NULL
108 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "BPC")
109
110 for (j in 1:length(classnames[[k]])) {
111
112 tic <- TIC[[classnames[[k]][j]]]
113 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
114 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
115 colvect<-append(colvect,cols[classnames[[k]][j]])
116 }
117
118 legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch)
119
120 }#end length ==1
121 dev.off()
122
123 # invisible(TIC)
124 }
125
126 getTIC <- function(file,rtcor=NULL) {
127 object <- xcmsRaw(file)
128 cbind(if (is.null(rtcor)) object@scantime else rtcor, rawEIC(object,mzrange=range(object@env$mz))$intensity)
129 }
130
131 ##
132 ## overlay TIC from all files in current folder or from xcmsSet, create pdf
133 ##
134 getTIC2s <- function(files, pdfname="TICs.pdf", rt=c("raw","corrected")) {
135
136 #create sampleMetadata, get sampleMetadata and class
137 sampleMetadata<-xcms:::phenoDataFromPaths(files)
138 class<-class<-as.vector(levels(sampleMetadata[,"class"])) #create phenoData like table
139 classnames<-vector("list",length(class))
140 for (i in 1:length(class)){
141 classnames[[i]]<-which( sampleMetadata[,1]==class[i])
142 }
143
144 N <- dim(sampleMetadata)[1]
145 TIC <- vector("list",N)
146
147 for (i in 1:N) {
148 cat(files[i],"\n")
149 if (!is.null(xcmsSet) && rt == "corrected")
150 rtcor <- xcmsSet@rt$corrected[[i]]
151 else
152 rtcor <- NULL
153 TIC[[i]] <- getTIC(files[i],rtcor=rtcor)
154 }
155
156 pdf(pdfname,w=16,h=10)
157 cols <- rainbow(N)
158 lty = 1:N
159 pch = 1:N
160 #search for max x and max y in TICs
161 xlim = range(sapply(TIC, function(x) range(x[,1])))
162 ylim = range(sapply(TIC, function(x) range(x[,2])))
163 ylim = c(-ylim[2], ylim[2])
164
165
166 ##plot start
167 if (length(class)>2){
168 for (k in 1:(length(class)-1)){
169 for (l in (k+1):length(class)){
170 print(paste(class[k],"vs",class[l],sep=" "))
171 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC")
172 colvect<-NULL
173 for (j in 1:length(classnames[[k]])) {
174
175 tic <- TIC[[classnames[[k]][j]]]
176 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
177 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
178 colvect<-append(colvect,cols[classnames[[k]][j]])
179 }
180 for (j in 1:length(classnames[[l]])) {
181 # i=class2names[j]
182 tic <- TIC[[classnames[[l]][j]]]
183 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l")
184 colvect<-append(colvect,cols[classnames[[l]][j]])
185 }
186 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch)
187 }
188 }
189 }#end if length >2
190 if (length(class)==2){
191
192
193 k=1
194 l=2
195
196 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k],"vs",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC")
197 colvect<-NULL
198 for (j in 1:length(classnames[[k]])) {
199
200 tic <- TIC[[classnames[[k]][j]]]
201 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
202 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
203 colvect<-append(colvect,cols[classnames[[k]][j]])
204 }
205 for (j in 1:length(classnames[[l]])) {
206 # i=class2names[j]
207 tic <- TIC[[classnames[[l]][j]]]
208 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l")
209 colvect<-append(colvect,cols[classnames[[l]][j]])
210 }
211 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch)
212
213 }#end length ==2
214 if (length(class)==1){
215 k=1
216 ylim = range(sapply(TIC, function(x) range(x[,2])))
217
218 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "TIC")
219 colvect<-NULL
220 for (j in 1:length(classnames[[k]])) {
221
222 tic <- TIC[[classnames[[k]][j]]]
223 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
224 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
225 colvect<-append(colvect,cols[classnames[[k]][j]])
226 }
227
228 legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch)
229
230 }#end length ==1
231 dev.off()
232
233 # invisible(TIC)
234 }
235
236
237 ##addition for quality control of peak picking
238 #metaMS EIC and pspectra plotting option
239 #version 20150512
240 #only for Galaxy
241
242 plotUnknowns<-function(resGC, unkn=""){
243
244
245 ##Annotation table each value is a pcgrp associated to the unknown
246 ##NOTE pcgrp index are different between xcmsSet and resGC due to filtering steps in metaMS
247 ##R. Wehrens give me some clues on that and we found a correction
248
249 mat<-matrix(ncol=length(resGC$xset), nrow=dim(resGC$PeakTable)[1])
250
251 for (j in 1: length(resGC$xset)){
252 test<-resGC$annotation[[j]]
253 print(paste("j=",j))
254 for (i in 1:dim(test)[1]){
255 if (as.numeric(row.names(test)[i])>dim(mat)[1]){
256 next
257 } else {
258 mat[as.numeric(row.names(test)[i]),j]<-test[i,1]
259 }
260 }
261 }
262 colnames(mat)<-colnames(resGC$PeakTable[,c((which(colnames(resGC$PeakTable)=="rt"|colnames(resGC$PeakTable)=="RI")[length(which(colnames(resGC$PeakTable)=="rt"|colnames(resGC$PeakTable)=="RI"))]+1):dim(resGC$PeakTable)[2])])
263
264 #debug
265
266 # print(dim(mat))
267 # print(mat[1:3,])
268 # write.table(mat, file="myannotationtable.tsv", sep="\t", row.names=FALSE)
269 #correction of annotation matrix due to pcgrp removal by quality check in runGCresult
270 #matrix of correspondance between an@pspectra and filtered pspectra from runGC
271
272 allPCGRPs <-
273 lapply(1:length(resGC$xset),
274 function(i) {
275 an <- resGC$xset[[i]]
276 huhn <- an@pspectra[which(sapply(an@pspectra, length) >=
277 metaSetting(resGC$settings,
278 "DBconstruction.minfeat"))]
279 matCORR<-cbind(1:length(huhn), match(huhn, an@pspectra))
280 })
281
282 if (unkn[1]==""){
283 #plot EIC and spectra for all unknown for comparative purpose
284
285
286 par (mar=c(5, 4, 4, 2) + 0.1)
287 for (l in 1:dim(resGC$PeakTable)[1]){ #l=2
288 #recordPlot
289 perpage=3 #if change change layout also!
290 num.plots <- ceiling(dim(mat)[2]/perpage) #three pcgroup per page
291 my.plots <- vector(num.plots, mode='list')
292 dev.new(width=21/2.54, height=29.7/2.54, file=paste("Unknown_",l,".pdf", sep="")) #A4 pdf
293 # par(mfrow=c(perpage,2))
294 layout(matrix(c(1,1,2,3,4,4,5,6,7,7,8,9), 6, 2, byrow = TRUE), widths=rep(c(1,1),perpage), heights=rep(c(1,5),perpage))
295 # layout.show(6)
296 oma.saved <- par("oma")
297 par(oma = rep.int(0, 4))
298 par(oma = oma.saved)
299 o.par <- par(mar = rep.int(0, 4))
300 on.exit(par(o.par))
301 stop=0 #initialize
302 for (i in 1:num.plots) {
303 start=stop+1
304 stop=start+perpage-1 #
305 for (c in start:stop){
306 if (c <=dim(mat)[2]){
307
308 #get sample name
309 sampname<-basename(resGC$xset[[c]]@xcmsSet@filepaths)
310
311 #remove .cdf, .mzXML filepattern
312 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]",
313 "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
314 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),
315 collapse = "|")
316 sampname<-gsub(filepattern, "",sampname)
317
318 title1<-paste("unknown", l,"from",sampname, sep=" ")
319 an<-resGC$xset[[c]]
320
321 par (mar=c(0, 0, 0, 0) + 0.1)
322 plot.new()
323 box()
324 text(0.5, 0.5, title1, cex=2)
325 if (!is.na(mat[l,c])){
326 pcgrp=allPCGRPs[[c]][which(allPCGRPs[[c]][,1]==mat[l,c]),2]
327 if (pcgrp!=mat[l,c]) print ("pcgrp changed")
328 par (mar=c(3, 2.5, 3, 1.5) + 0.1)
329 plotEICs(an, pspec=pcgrp, maxlabel=2)
330 plotPsSpectrum(an, pspec=pcgrp, maxlabel=2)
331 } else {
332 plot.new()
333 box()
334 text(0.5, 0.5, "NOT FOUND", cex=2)
335 plot.new()
336 box()
337 text(0.5, 0.5, "NOT FOUND", cex=2)
338 }
339 }
340 }
341 # my.plots[[i]] <- recordPlot()
342 }
343 graphics.off()
344
345 # pdf(file=paste("Unknown_",l,".pdf", sep=""), onefile=TRUE)
346 # for (my.plot in my.plots) {
347 # replayPlot(my.plot)
348 # }
349 # my.plots
350 # graphics.off()
351
352 }#end for l
353 }#end if unkn=""
354 else{
355 par (mar=c(5, 4, 4, 2) + 0.1)
356 l=unkn
357 if (length(l)==1){
358 #recordPlot
359 perpage=3 #if change change layout also!
360 num.plots <- ceiling(dim(mat)[2]/perpage) #three pcgroup per page
361 my.plots <- vector(num.plots, mode='list')
362
363 dev.new(width=21/2.54, height=29.7/2.54, file=paste("Unknown_",l,".pdf", sep="")) #A4 pdf
364 # par(mfrow=c(perpage,2))
365 layout(matrix(c(1,1,2,3,4,4,5,6,7,7,8,9), 6, 2, byrow = TRUE), widths=rep(c(1,1),perpage), heights=rep(c(1,5),perpage))
366 # layout.show(6)
367 oma.saved <- par("oma")
368 par(oma = rep.int(0, 4))
369 par(oma = oma.saved)
370 o.par <- par(mar = rep.int(0, 4))
371 on.exit(par(o.par))
372 stop=0 #initialize
373 for (i in 1:num.plots) {
374 start=stop+1
375 stop=start+perpage-1 #
376 for (c in start:stop){
377 if (c <=dim(mat)[2]){
378
379 #get sample name
380 sampname<-basename(resGC$xset[[c]]@xcmsSet@filepaths)
381
382 #remove .cdf, .mzXML filepattern
383 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
384 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|")
385 sampname<-gsub(filepattern, "",sampname)
386
387 title1<-paste("unknown", l,"from",sampname, sep=" ")
388 an<-resGC$xset[[c]]
389
390 par (mar=c(0, 0, 0, 0) + 0.1)
391 plot.new()
392 box()
393 text(0.5, 0.5, title1, cex=2)
394 if (!is.na(mat[l,c])){
395 pcgrp=allPCGRPs[[c]][which(allPCGRPs[[c]][,1]==mat[l,c]),2]
396 if (pcgrp!=mat[l,c]) print ("pcgrp changed")
397 par (mar=c(3, 2.5, 3, 1.5) + 0.1)
398 plotEICs(an, pspec=pcgrp, maxlabel=2)
399 plotPsSpectrum(an, pspec=pcgrp, maxlabel=2)
400 } else {
401 plot.new()
402 box()
403 text(0.5, 0.5, "NOT FOUND", cex=2)
404 plot.new()
405 box()
406 text(0.5, 0.5, "NOT FOUND", cex=2)
407 }
408 }
409 }
410 # my.plots[[i]] <- recordPlot()
411 }
412 graphics.off()
413
414 # pdf(file=paste("Unknown_",l,".pdf", sep=""), onefile=TRUE)
415 # for (my.plot in my.plots) {
416 # replayPlot(my.plot)
417 # }
418 # my.plots
419 # graphics.off()
420 } else {
421 par (mar=c(5, 4, 4, 2) + 0.1)
422 for (l in 1:length(unkn)){ #l=2
423 #recordPlot
424 perpage=3 #if change change layout also!
425 num.plots <- ceiling(dim(mat)[2]/perpage) #three pcgroup per page
426 my.plots <- vector(num.plots, mode='list')
427 dev.new(width=21/2.54, height=29.7/2.54, file=paste("Unknown_",unkn[l],".pdf", sep="")) #A4 pdf
428 # par(mfrow=c(perpage,2))
429 layout(matrix(c(1,1,2,3,4,4,5,6,7,7,8,9), 6, 2, byrow = TRUE), widths=rep(c(1,1),perpage), heights=rep(c(1,5),perpage))
430 # layout.show(6)
431 oma.saved <- par("oma")
432 par(oma = rep.int(0, 4))
433 par(oma = oma.saved)
434 o.par <- par(mar = rep.int(0, 4))
435 on.exit(par(o.par))
436 stop=0 #initialize
437 for (i in 1:num.plots) {
438 start=stop+1
439 stop=start+perpage-1 #
440 for (c in start:stop){
441 if (c <=dim(mat)[2]){
442
443 #get sample name
444 sampname<-basename(resGC$xset[[c]]@xcmsSet@filepaths)
445
446 #remove .cdf, .mzXML filepattern
447 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
448 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|")
449 sampname<-gsub(filepattern, "",sampname)
450
451 title1<-paste("unknown",unkn[l],"from",sampname, sep=" ")
452 an<-resGC$xset[[c]]
453
454 par (mar=c(0, 0, 0, 0) + 0.1)
455 plot.new()
456 box()
457 text(0.5, 0.5, title1, cex=2)
458 if (!is.na(mat[unkn[l],c])){
459 pcgrp=allPCGRPs[[c]][which(allPCGRPs[[c]][,1]==mat[unkn[l],c]),2]
460 if (pcgrp!=mat[unkn[l],c]) print ("pcgrp changed")
461 par (mar=c(3, 2.5, 3, 1.5) + 0.1)
462 plotEICs(an, pspec=pcgrp, maxlabel=2)
463 plotPsSpectrum(an, pspec=pcgrp, maxlabel=2)
464 } else {
465 plot.new()
466 box()
467 text(0.5, 0.5, "NOT FOUND", cex=2)
468 plot.new()
469 box()
470 text(0.5, 0.5, "NOT FOUND", cex=2)
471 }
472 }
473 }
474 # my.plots[[i]] <- recordPlot()
475 }
476 graphics.off()
477
478 # pdf(file=paste("Unknown_",unkn[l],".pdf", sep=""), onefile=TRUE)
479 # for (my.plot in my.plots) {
480 # replayPlot(my.plot)
481 # }
482 # my.plots
483 # graphics.off()
484
485 }#end for unkn[l]
486
487 }
488
489 }
490 } #end function
491
492
493