comparison lib.r @ 11:de0d85537ee3 draft

planemo upload for repository https://github.com/workflow4metabolomics/xcms commit a6f5f18b3d6130f7d7fbb9f2df856838c6217797
author lecorguille
date Fri, 07 Apr 2017 07:37:05 -0400
parents ee29f0a6e361
children 91c71f3808f3
comparison
equal deleted inserted replaced
10:6caa60dcc217 11:de0d85537ee3
25 return (variableMetadata) 25 return (variableMetadata)
26 } 26 }
27 27
28 #@author G. Le Corguille 28 #@author G. Le Corguille
29 #This function format ions identifiers 29 #This function format ions identifiers
30 formatIonIdentifiers <- function(dataData, numDigitsRT=0, numDigitsMZ=0) { 30 formatIonIdentifiers <- function(variableMetadata, numDigitsRT=0, numDigitsMZ=0) {
31 return(make.unique(paste0("M",round(dataData[,"mz"],numDigitsMZ),"T",round(dataData[,"rt"],numDigitsRT)))) 31 splitDeco = strsplit(as.character(variableMetadata$name),"_")
32 idsDeco = sapply(splitDeco, function(x) { deco=unlist(x)[2]; if (is.na(deco)) return ("") else return(paste0("_",deco)) })
33 namecustom = make.unique(paste0("M",round(variableMetadata[,"mz"],numDigitsMZ),"T",round(variableMetadata[,"rt"],numDigitsRT),idsDeco))
34 variableMetadata=cbind(name=variableMetadata$name, namecustom=namecustom, variableMetadata[,!(colnames(variableMetadata) %in% c("name"))])
35 return(variableMetadata)
32 } 36 }
33 37
34 #@author G. Le Corguille 38 #@author G. Le Corguille
35 # value: intensity values to be used into, maxo or intb 39 # value: intensity values to be used into, maxo or intb
36 getPeaklistW4M <- function(xset, intval="into",convertRTMinute=F,numDigitsMZ=4,numDigitsRT=0,variableMetadataOutput,dataMatrixOutput) { 40 getPeaklistW4M <- function(xset, intval="into",convertRTMinute=F,numDigitsMZ=4,numDigitsRT=0,variableMetadataOutput,dataMatrixOutput) {
37 groups <- xset@groups 41 variableMetadata_dataMatrix = peakTable(xset, method="medret", value=intval)
38 values <- groupval(xset, "medret", value=intval) 42 variableMetadata_dataMatrix = cbind(name=groupnames(xset),variableMetadata_dataMatrix)
39 43
40 # renamming of the column rtmed to rt to fit with camera peaklist function output 44 dataMatrix = variableMetadata_dataMatrix[,(make.names(colnames(variableMetadata_dataMatrix)) %in% c("name", make.names(sampnames(xset))))]
41 colnames(groups)[colnames(groups)=="rtmed"] <- "rt" 45
42 colnames(groups)[colnames(groups)=="mzmed"] <- "mz" 46 variableMetadata = variableMetadata_dataMatrix[,!(make.names(colnames(variableMetadata_dataMatrix)) %in% c(make.names(sampnames(xset))))]
43 47 variableMetadata = RTSecondToMinute(variableMetadata, convertRTMinute)
44 ids <- formatIonIdentifiers(groups, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ) 48 variableMetadata = formatIonIdentifiers(variableMetadata, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ)
45 groups = RTSecondToMinute(groups, convertRTMinute) 49
46 50 write.table(variableMetadata, file=variableMetadataOutput,sep="\t",quote=F,row.names=F)
47 rownames(groups) = ids 51 write.table(dataMatrix, file=dataMatrixOutput,sep="\t",quote=F,row.names=F)
48 rownames(values) = ids
49
50 #@TODO: add "name" as the first column name
51 #colnames(groups)[1] = "name"
52 #colnames(values)[1] = "name"
53
54 write.table(groups, file=variableMetadataOutput,sep="\t",quote=F,row.names = T,col.names = NA)
55 write.table(values, file=dataMatrixOutput,sep="\t",quote=F,row.names = T,col.names = NA)
56 } 52 }
57 53
58 #@author Y. Guitton 54 #@author Y. Guitton
59 getBPC <- function(file,rtcor=NULL, ...) { 55 getBPC <- function(file,rtcor=NULL, ...) {
60 object <- xcmsRaw(file) 56 object <- xcmsRaw(file)
61 sel <- profRange(object, ...) 57 sel <- profRange(object, ...)
62 cbind(if (is.null(rtcor)) object@scantime[sel$scanidx] else rtcor ,xcms:::colMax(object@env$profile[sel$massidx,sel$scanidx,drop=FALSE])) 58 cbind(if (is.null(rtcor)) object@scantime[sel$scanidx] else rtcor ,xcms:::colMax(object@env$profile[sel$massidx,sel$scanidx,drop=FALSE]))
63 #plotChrom(xcmsRaw(file), base=T) 59 #plotChrom(xcmsRaw(file), base=T)
64 } 60 }
65 61
66 #@author Y. Guitton 62 #@author Y. Guitton
67 getBPCs <- function (xcmsSet=NULL, pdfname="BPCs.pdf",rt=c("raw","corrected"), scanrange=NULL) { 63 getBPCs <- function (xcmsSet=NULL, pdfname="BPCs.pdf",rt=c("raw","corrected"), scanrange=NULL) {
68 cat("Creating BIC pdf...\n") 64 cat("Creating BIC pdf...\n")
69 65
70 if (is.null(xcmsSet)) { 66 if (is.null(xcmsSet)) {
71 cat("Enter an xcmsSet \n") 67 cat("Enter an xcmsSet \n")
72 stop() 68 stop()
73 } else { 69 } else {
74 files <- filepaths(xcmsSet) 70 files <- filepaths(xcmsSet)
75 } 71 }
76 72
77 class<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class 73 phenoDataClass<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class
78 74
79 classnames<-vector("list",length(class)) 75 classnames<-vector("list",length(phenoDataClass))
80 for (i in 1:length(class)){ 76 for (i in 1:length(phenoDataClass)){
81 classnames[[i]]<-which( xcmsSet@phenoData[,1]==class[i]) 77 classnames[[i]]<-which( xcmsSet@phenoData[,1]==phenoDataClass[i])
82 } 78 }
83 79
84 N <- dim(phenoData(xcmsSet))[1] 80 N <- dim(phenoData(xcmsSet))[1]
85 81
86 TIC <- vector("list",N) 82 TIC <- vector("list",N)
87 83
88 84
89 for (j in 1:N) { 85 for (j in 1:N) {
90 86
91 TIC[[j]] <- getBPC(files[j]) 87 TIC[[j]] <- getBPC(files[j])
92 #good for raw 88 #good for raw
93 # seems strange for corrected 89 # seems strange for corrected
94 #errors if scanrange used in xcmsSetgeneration 90 #errors if scanrange used in xcmsSetgeneration
95 if (!is.null(xcmsSet) && rt == "corrected") 91 if (!is.null(xcmsSet) && rt == "corrected")
96 rtcor <- xcmsSet@rt$corrected[[j]] else 92 rtcor <- xcmsSet@rt$corrected[[j]]
97 rtcor <- NULL 93 else
98 94 rtcor <- NULL
99 TIC[[j]] <- getBPC(files[j],rtcor=rtcor) 95
100 # TIC[[j]][,1]<-rtcor 96 TIC[[j]] <- getBPC(files[j],rtcor=rtcor)
101 } 97 # TIC[[j]][,1]<-rtcor
102 98 }
103 99
104 100
105 pdf(pdfname,w=16,h=10) 101
106 cols <- rainbow(N) 102 pdf(pdfname,w=16,h=10)
107 lty = 1:N 103 cols <- rainbow(N)
108 pch = 1:N 104 lty = 1:N
109 #search for max x and max y in BPCs 105 pch = 1:N
110 xlim = range(sapply(TIC, function(x) range(x[,1]))) 106 #search for max x and max y in BPCs
111 ylim = range(sapply(TIC, function(x) range(x[,2]))) 107 xlim = range(sapply(TIC, function(x) range(x[,1])))
112 ylim = c(-ylim[2], ylim[2]) 108 ylim = range(sapply(TIC, function(x) range(x[,2])))
113 109 ylim = c(-ylim[2], ylim[2])
114 110
115 ##plot start 111
116 112 ##plot start
117 if (length(class)>2){ 113
118 for (k in 1:(length(class)-1)){ 114 if (length(phenoDataClass)>2){
119 for (l in (k+1):length(class)){ 115 for (k in 1:(length(phenoDataClass)-1)){
120 #print(paste(class[k],"vs",class[l],sep=" ")) 116 for (l in (k+1):length(phenoDataClass)){
121 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") 117 #print(paste(phenoDataClass[k],"vs",phenoDataClass[l],sep=" "))
118 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k]," vs ",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC")
119 colvect<-NULL
120 for (j in 1:length(classnames[[k]])) {
121 tic <- TIC[[classnames[[k]][j]]]
122 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
123 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
124 colvect<-append(colvect,cols[classnames[[k]][j]])
125 }
126 for (j in 1:length(classnames[[l]])) {
127 # i=class2names[j]
128 tic <- TIC[[classnames[[l]][j]]]
129 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l")
130 colvect<-append(colvect,cols[classnames[[l]][j]])
131 }
132 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch)
133 }
134 }
135 }#end if length >2
136
137 if (length(phenoDataClass)==2){
138 k=1
139 l=2
140 colvect<-NULL
141 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k],"vs",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC")
142
143 for (j in 1:length(classnames[[k]])) {
144
145 tic <- TIC[[classnames[[k]][j]]]
146 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
147 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
148 colvect<-append(colvect,cols[classnames[[k]][j]])
149 }
150 for (j in 1:length(classnames[[l]])) {
151 # i=class2names[j]
152 tic <- TIC[[classnames[[l]][j]]]
153 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l")
154 colvect<-append(colvect,cols[classnames[[l]][j]])
155 }
156 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch)
157
158 }#end length ==2
159
160 #case where only one class
161 if (length(phenoDataClass)==1){
162 k=1
163 ylim = range(sapply(TIC, function(x) range(x[,2])))
164 colvect<-NULL
165 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k], sep=""), xlab = "Retention Time (min)", ylab = "BPC")
166
167 for (j in 1:length(classnames[[k]])) {
168 tic <- TIC[[classnames[[k]][j]]]
169 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
170 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
171 colvect<-append(colvect,cols[classnames[[k]][j]])
172 }
173
174 legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch)
175
176 }#end length ==1
177
178 dev.off() #pdf(pdfname,w=16,h=10)
179
180 invisible(TIC)
181 }
182
183
184
185 #@author Y. Guitton
186 getTIC <- function(file,rtcor=NULL) {
187 object <- xcmsRaw(file)
188 cbind(if (is.null(rtcor)) object@scantime else rtcor, rawEIC(object,mzrange=range(object@env$mz))$intensity)
189 }
190
191 ##
192 ## overlay TIC from all files in current folder or from xcmsSet, create pdf
193 ##
194 #@author Y. Guitton
195 getTICs <- function(xcmsSet=NULL,files=NULL, pdfname="TICs.pdf",rt=c("raw","corrected")) {
196 cat("Creating TIC pdf...\n")
197
198 if (is.null(xcmsSet)) {
199 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
200 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|")
201 if (is.null(files))
202 files <- getwd()
203 info <- file.info(files)
204 listed <- list.files(files[info$isdir], pattern = filepattern, recursive = TRUE, full.names = TRUE)
205 files <- c(files[!info$isdir], listed)
206 } else {
207 files <- filepaths(xcmsSet)
208 }
209
210 phenoDataClass<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class
211 classnames<-vector("list",length(phenoDataClass))
212 for (i in 1:length(phenoDataClass)){
213 classnames[[i]]<-which( xcmsSet@phenoData[,1]==phenoDataClass[i])
214 }
215
216 N <- length(files)
217 TIC <- vector("list",N)
218
219 for (i in 1:N) {
220 if (!is.null(xcmsSet) && rt == "corrected")
221 rtcor <- xcmsSet@rt$corrected[[i]] else
222 rtcor <- NULL
223 TIC[[i]] <- getTIC(files[i],rtcor=rtcor)
224 }
225
226 pdf(pdfname,w=16,h=10)
227 cols <- rainbow(N)
228 lty = 1:N
229 pch = 1:N
230 #search for max x and max y in TICs
231 xlim = range(sapply(TIC, function(x) range(x[,1])))
232 ylim = range(sapply(TIC, function(x) range(x[,2])))
233 ylim = c(-ylim[2], ylim[2])
234
235
236 ##plot start
237 if (length(phenoDataClass)>2){
238 for (k in 1:(length(phenoDataClass)-1)){
239 for (l in (k+1):length(phenoDataClass)){
240 #print(paste(phenoDataClass[k],"vs",phenoDataClass[l],sep=" "))
241 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k]," vs ",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC")
242 colvect<-NULL
243 for (j in 1:length(classnames[[k]])) {
244 tic <- TIC[[classnames[[k]][j]]]
245 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
246 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
247 colvect<-append(colvect,cols[classnames[[k]][j]])
248 }
249 for (j in 1:length(classnames[[l]])) {
250 # i=class2names[j]
251 tic <- TIC[[classnames[[l]][j]]]
252 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l")
253 colvect<-append(colvect,cols[classnames[[l]][j]])
254 }
255 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch)
256 }
257 }
258 }#end if length >2
259 if (length(phenoDataClass)==2){
260 k=1
261 l=2
262
263 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k],"vs",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC")
122 colvect<-NULL 264 colvect<-NULL
123 for (j in 1:length(classnames[[k]])) { 265 for (j in 1:length(classnames[[k]])) {
124 tic <- TIC[[classnames[[k]][j]]] 266 tic <- TIC[[classnames[[k]][j]]]
125 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") 267 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
126 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") 268 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
127 colvect<-append(colvect,cols[classnames[[k]][j]]) 269 colvect<-append(colvect,cols[classnames[[k]][j]])
128 } 270 }
129 for (j in 1:length(classnames[[l]])) { 271 for (j in 1:length(classnames[[l]])) {
130 # i=class2names[j] 272 # i=class2names[j]
131 tic <- TIC[[classnames[[l]][j]]] 273 tic <- TIC[[classnames[[l]][j]]]
132 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") 274 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l")
133 colvect<-append(colvect,cols[classnames[[l]][j]]) 275 colvect<-append(colvect,cols[classnames[[l]][j]])
134 } 276 }
135 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) 277 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch)
136 } 278
137 } 279 }#end length ==2
138 }#end if length >2 280
139 281 #case where only one class
140 if (length(class)==2){ 282 if (length(phenoDataClass)==1){
141 k=1 283 k=1
142 l=2 284 ylim = range(sapply(TIC, function(x) range(x[,2])))
143 colvect<-NULL 285
144 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k],"vs",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") 286 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k], sep=""), xlab = "Retention Time (min)", ylab = "TIC")
145
146 for (j in 1:length(classnames[[k]])) {
147
148 tic <- TIC[[classnames[[k]][j]]]
149 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
150 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
151 colvect<-append(colvect,cols[classnames[[k]][j]])
152 }
153 for (j in 1:length(classnames[[l]])) {
154 # i=class2names[j]
155 tic <- TIC[[classnames[[l]][j]]]
156 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l")
157 colvect<-append(colvect,cols[classnames[[l]][j]])
158 }
159 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch)
160
161 }#end length ==2
162
163 #case where only one class
164 if (length(class)==1){
165 k=1
166 ylim = range(sapply(TIC, function(x) range(x[,2])))
167 colvect<-NULL
168 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "BPC")
169
170 for (j in 1:length(classnames[[k]])) {
171 tic <- TIC[[classnames[[k]][j]]]
172 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
173 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
174 colvect<-append(colvect,cols[classnames[[k]][j]])
175 }
176
177 legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch)
178
179 }#end length ==1
180
181 dev.off() #pdf(pdfname,w=16,h=10)
182
183 invisible(TIC)
184 }
185
186
187
188 #@author Y. Guitton
189 getTIC <- function(file,rtcor=NULL) {
190 object <- xcmsRaw(file)
191 cbind(if (is.null(rtcor)) object@scantime else rtcor, rawEIC(object,mzrange=range(object@env$mz))$intensity)
192 }
193
194 ##
195 ## overlay TIC from all files in current folder or from xcmsSet, create pdf
196 ##
197 #@author Y. Guitton
198 getTICs <- function(xcmsSet=NULL,files=NULL, pdfname="TICs.pdf",rt=c("raw","corrected")) {
199 cat("Creating TIC pdf...\n")
200
201 if (is.null(xcmsSet)) {
202 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
203 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|")
204 if (is.null(files))
205 files <- getwd()
206 info <- file.info(files)
207 listed <- list.files(files[info$isdir], pattern = filepattern, recursive = TRUE, full.names = TRUE)
208 files <- c(files[!info$isdir], listed)
209 } else {
210 files <- filepaths(xcmsSet)
211 }
212
213 class<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class
214
215 classnames<-vector("list",length(class))
216 for (i in 1:length(class)){
217 classnames[[i]]<-which( xcmsSet@phenoData[,1]==class[i])
218 }
219
220 N <- length(files)
221 TIC <- vector("list",N)
222
223 for (i in 1:N) {
224 if (!is.null(xcmsSet) && rt == "corrected")
225 rtcor <- xcmsSet@rt$corrected[[i]] else
226 rtcor <- NULL
227 TIC[[i]] <- getTIC(files[i],rtcor=rtcor)
228 }
229
230 pdf(pdfname,w=16,h=10)
231 cols <- rainbow(N)
232 lty = 1:N
233 pch = 1:N
234 #search for max x and max y in TICs
235 xlim = range(sapply(TIC, function(x) range(x[,1])))
236 ylim = range(sapply(TIC, function(x) range(x[,2])))
237 ylim = c(-ylim[2], ylim[2])
238
239
240 ##plot start
241 if (length(class)>2){
242 for (k in 1:(length(class)-1)){
243 for (l in (k+1):length(class)){
244 #print(paste(class[k],"vs",class[l],sep=" "))
245 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC")
246 colvect<-NULL 287 colvect<-NULL
247 for (j in 1:length(classnames[[k]])) { 288 for (j in 1:length(classnames[[k]])) {
248 289 tic <- TIC[[classnames[[k]][j]]]
249 tic <- TIC[[classnames[[k]][j]]] 290 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
250 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") 291 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
251 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") 292 colvect<-append(colvect,cols[classnames[[k]][j]])
252 colvect<-append(colvect,cols[classnames[[k]][j]]) 293 }
253 } 294
254 for (j in 1:length(classnames[[l]])) { 295 legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch)
255 # i=class2names[j] 296
256 tic <- TIC[[classnames[[l]][j]]] 297 }#end length ==1
257 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") 298
258 colvect<-append(colvect,cols[classnames[[l]][j]]) 299 dev.off() #pdf(pdfname,w=16,h=10)
259 } 300
260 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) 301 invisible(TIC)
261 }
262 }
263 }#end if length >2
264 if (length(class)==2){
265 k=1
266 l=2
267
268 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k],"vs",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC")
269 colvect<-NULL
270 for (j in 1:length(classnames[[k]])) {
271 tic <- TIC[[classnames[[k]][j]]]
272 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
273 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
274 colvect<-append(colvect,cols[classnames[[k]][j]])
275 }
276 for (j in 1:length(classnames[[l]])) {
277 # i=class2names[j]
278 tic <- TIC[[classnames[[l]][j]]]
279 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l")
280 colvect<-append(colvect,cols[classnames[[l]][j]])
281 }
282 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch)
283
284 }#end length ==2
285
286 #case where only one class
287 if (length(class)==1){
288 k=1
289 ylim = range(sapply(TIC, function(x) range(x[,2])))
290
291 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "TIC")
292 colvect<-NULL
293 for (j in 1:length(classnames[[k]])) {
294 tic <- TIC[[classnames[[k]][j]]]
295 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
296 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
297 colvect<-append(colvect,cols[classnames[[k]][j]])
298 }
299
300 legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch)
301
302 }#end length ==1
303
304 dev.off() #pdf(pdfname,w=16,h=10)
305
306 invisible(TIC)
307 } 302 }
308 303
309 304
310 305
311 ## 306 ##
312 ## Get the polarities from all the samples of a condition 307 ## Get the polarities from all the samples of a condition
313 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM 308 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
314 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM 309 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM
315 getSampleMetadata <- function(xcmsSet=NULL, sampleMetadataOutput="sampleMetadata.tsv") { 310 getSampleMetadata <- function(xcmsSet=NULL, sampleMetadataOutput="sampleMetadata.tsv") {
316 cat("Creating the sampleMetadata file...\n") 311 cat("Creating the sampleMetadata file...\n")
317 312
318 #Create the sampleMetada dataframe 313 #Create the sampleMetada dataframe
319 sampleMetadata=xset@phenoData 314 sampleMetadata=xset@phenoData
320 sampleNamesOrigin=rownames(sampleMetadata) 315 sampleNamesOrigin=rownames(sampleMetadata)
321 sampleNamesMakeNames=make.names(sampleNamesOrigin) 316 sampleNamesMakeNames=make.names(sampleNamesOrigin)
322 317
323 if (any(duplicated(sampleNamesMakeNames))) { 318 if (any(duplicated(sampleNamesMakeNames))) {
324 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) 319 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr())
325 for (sampleName in sampleNamesOrigin) { 320 for (sampleName in sampleNamesOrigin) {
326 write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr()) 321 write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr())
327 } 322 }
328 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") 323 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.")
329 } 324 }
330 325
331 if (!all(sampleNamesOrigin == sampleNamesMakeNames)) { 326 if (!all(sampleNamesOrigin == sampleNamesMakeNames)) {
332 cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n") 327 cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n")
333 for (sampleName in sampleNamesOrigin) { 328 for (sampleName in sampleNamesOrigin) {
334 cat(paste(sampleName,"\t->\t",make.names(sampleName),"\n")) 329 cat(paste(sampleName,"\t->\t",make.names(sampleName),"\n"))
335 } 330 }
336 } 331 }
337 332
338 sampleMetadata$sampleMetadata=sampleNamesMakeNames 333 sampleMetadata$sampleMetadata=sampleNamesMakeNames
339 sampleMetadata=cbind(sampleMetadata["sampleMetadata"],sampleMetadata["class"]) #Reorder columns 334 sampleMetadata=cbind(sampleMetadata["sampleMetadata"],sampleMetadata["class"]) #Reorder columns
340 rownames(sampleMetadata)=NULL 335 rownames(sampleMetadata)=NULL
341 336
342 #Create a list of files name in the current directory 337 #Create a list of files name in the current directory
343 list_files=xset@filepaths 338 list_files=xset@filepaths
344 #For each sample file, the following actions are done 339 #For each sample file, the following actions are done
345 for (file in list_files){ 340 for (file in list_files){
346 #Check if the file is in the CDF format 341 #Check if the file is in the CDF format
347 if (!mzR:::netCDFIsFile(file)){ 342 if (!mzR:::netCDFIsFile(file)){
348 343
349 # If the column isn't exist, with add one filled with NA 344 # If the column isn't exist, with add one filled with NA
350 if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity=NA 345 if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity=NA
351 346
352 #Create a simple xcmsRaw object for each sample 347 #Create a simple xcmsRaw object for each sample
353 xcmsRaw=xcmsRaw(file) 348 xcmsRaw=xcmsRaw(file)
354 #Extract the polarity (a list of polarities) 349 #Extract the polarity (a list of polarities)
355 polarity=xcmsRaw@polarity 350 polarity=xcmsRaw@polarity
356 #Verify if all the scans have the same polarity 351 #Verify if all the scans have the same polarity
357 uniq_list=unique(polarity) 352 uniq_list=unique(polarity)
358 if (length(uniq_list)>1){ 353 if (length(uniq_list)>1){
359 polarity="mixed" 354 polarity="mixed"
360 } else { 355 } else {
361 polarity=as.character(uniq_list) 356 polarity=as.character(uniq_list)
362 } 357 }
363 #Transforms the character to obtain only the sample name 358 #Transforms the character to obtain only the sample name
364 filename=basename(file) 359 filename=basename(file)
365 library(tools) 360 library(tools)
366 samplename=file_path_sans_ext(filename) 361 samplename=file_path_sans_ext(filename)
367 362
368 #Set the polarity attribute 363 #Set the polarity attribute
369 sampleMetadata$polarity[sampleMetadata$sampleMetadata==samplename]=polarity 364 sampleMetadata$polarity[sampleMetadata$sampleMetadata==samplename]=polarity
370 365
371 #Delete xcmsRaw object because it creates a bug for the fillpeaks step 366 #Delete xcmsRaw object because it creates a bug for the fillpeaks step
372 rm(xcmsRaw) 367 rm(xcmsRaw)
373 } 368 }
374 369
375 } 370 }
376 371
377 write.table(sampleMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=sampleMetadataOutput) 372 write.table(sampleMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=sampleMetadataOutput)
378 373
379 return(list("sampleNamesOrigin"=sampleNamesOrigin,"sampleNamesMakeNames"=sampleNamesMakeNames)) 374 return(list("sampleNamesOrigin"=sampleNamesOrigin,"sampleNamesMakeNames"=sampleNamesMakeNames))
380 375
381 } 376 }
382 377
383 378
384 ## 379 ##
385 ## This function check if xcms will found all the files 380 ## This function check if xcms will found all the files
386 ## 381 ##
387 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM 382 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM
388 checkFilesCompatibilityWithXcms <- function(directory) { 383 checkFilesCompatibilityWithXcms <- function(directory) {
389 cat("Checking files filenames compatibilities with xmcs...\n") 384 cat("Checking files filenames compatibilities with xmcs...\n")
390 # WHAT XCMS WILL FIND 385 # WHAT XCMS WILL FIND
391 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") 386 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
392 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") 387 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|")
393 info <- file.info(directory) 388 info <- file.info(directory)
394 listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) 389 listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE)
395 files <- c(directory[!info$isdir], listed) 390 files <- c(directory[!info$isdir], listed)
396 files_abs <- file.path(getwd(), files) 391 files_abs <- file.path(getwd(), files)
397 exists <- file.exists(files_abs) 392 exists <- file.exists(files_abs)
398 files[exists] <- files_abs[exists] 393 files[exists] <- files_abs[exists]
399 files[exists] <- sub("//","/",files[exists]) 394 files[exists] <- sub("//","/",files[exists])
400 395
401 # WHAT IS ON THE FILESYSTEM 396 # WHAT IS ON THE FILESYSTEM
402 filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T) 397 filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T)
403 filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)] 398 filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)]
404 399
405 # COMPARISON 400 # COMPARISON
406 if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) { 401 if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) {
407 write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr()) 402 write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr())
408 write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr()) 403 write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr())
409 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") 404 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.")
410 405 }
411 }
412 } 406 }
413 407
414 408
415 409
416 ## 410 ##
417 ## This function check if XML contains special caracters. It also checks integrity and completness. 411 ## This function check if XML contains special caracters. It also checks integrity and completness.
418 ## 412 ##
419 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM 413 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
420 checkXmlStructure <- function (directory) { 414 checkXmlStructure <- function (directory) {
421 cat("Checking XML structure...\n") 415 cat("Checking XML structure...\n")
422 416
423 cmd=paste("IFS=$'\n'; for xml in $(find",directory,"-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") 417 cmd=paste("IFS=$'\n'; for xml in $(find",directory,"-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;")
424 capture=system(cmd,intern=TRUE) 418 capture=system(cmd,intern=TRUE)
425 419
426 if (length(capture)>0){ 420 if (length(capture)>0){
427 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) 421 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture)
428 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) 422 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr())
429 write(capture, stderr()) 423 write(capture, stderr())
430 stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files") 424 stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files")
431 } 425 }
432 426
433 } 427 }
434 428
435 429
436 ## 430 ##
437 ## This function check if XML contain special characters 431 ## This function check if XML contain special characters
438 ## 432 ##
439 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM 433 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
440 deleteXmlBadCharacters<- function (directory) { 434 deleteXmlBadCharacters<- function (directory) {
441 cat("Checking Non ASCII characters in the XML...\n") 435 cat("Checking Non ASCII characters in the XML...\n")
442 436
443 processed=F 437 processed=F
444 l=system( paste("find",directory, "-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"),intern=TRUE) 438 l=system( paste("find",directory, "-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"),intern=TRUE)
445 for (i in l){ 439 for (i in l){
446 cmd=paste("LC_ALL=C grep '[^ -~]' \"",i,"\"",sep="") 440 cmd=paste("LC_ALL=C grep '[^ -~]' \"",i,"\"",sep="")
447 capture=suppressWarnings(system(cmd,intern=TRUE)) 441 capture=suppressWarnings(system(cmd,intern=TRUE))
448 if (length(capture)>0){ 442 if (length(capture)>0){
449 cmd=paste("perl -i -pe 's/[^[:ascii:]]//g;'",i) 443 cmd=paste("perl -i -pe 's/[^[:ascii:]]//g;'",i)
450 print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") ) 444 print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") )
451 c=system(cmd,intern=TRUE) 445 c=system(cmd,intern=TRUE)
452 capture="" 446 capture=""
453 processed=T 447 processed=T
454 } 448 }
455 } 449 }
456 if (processed) cat("\n\n") 450 if (processed) cat("\n\n")
457 return(processed) 451 return(processed)
458 } 452 }
459 453
460 454
461 ## 455 ##
462 ## This function will compute MD5 checksum to check the data integrity 456 ## This function will compute MD5 checksum to check the data integrity
463 ## 457 ##
464 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr 458 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
465 getMd5sum <- function (directory) { 459 getMd5sum <- function (directory) {
466 cat("Compute md5 checksum...\n") 460 cat("Compute md5 checksum...\n")
467 # WHAT XCMS WILL FIND 461 # WHAT XCMS WILL FIND
468 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") 462 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
469 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") 463 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|")
470 info <- file.info(directory) 464 info <- file.info(directory)
471 listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) 465 listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE)
472 files <- c(directory[!info$isdir], listed) 466 files <- c(directory[!info$isdir], listed)
473 exists <- file.exists(files) 467 exists <- file.exists(files)
474 files <- files[exists] 468 files <- files[exists]
475 469
476 library(tools) 470 library(tools)
477 471
478 #cat("\n\n") 472 #cat("\n\n")
479 473
480 return(as.matrix(md5sum(files))) 474 return(as.matrix(md5sum(files)))
481 } 475 }
476
477
478 # This function get the raw file path from the arguments
479 getRawfilePathFromArguments <- function(singlefile, zipfile, listArguments) {
480 if (!is.null(listArguments[["zipfile"]])) zipfile = listArguments[["zipfile"]]
481 if (!is.null(listArguments[["zipfilePositive"]])) zipfile = listArguments[["zipfilePositive"]]
482 if (!is.null(listArguments[["zipfileNegative"]])) zipfile = listArguments[["zipfileNegative"]]
483
484 if (!is.null(listArguments[["singlefile_galaxyPath"]])) {
485 singlefile_galaxyPaths = listArguments[["singlefile_galaxyPath"]];
486 singlefile_sampleNames = listArguments[["singlefile_sampleName"]]
487 }
488 if (!is.null(listArguments[["singlefile_galaxyPathPositive"]])) {
489 singlefile_galaxyPaths = listArguments[["singlefile_galaxyPathPositive"]];
490 singlefile_sampleNames = listArguments[["singlefile_sampleNamePositive"]]
491 }
492 if (!is.null(listArguments[["singlefile_galaxyPathNegative"]])) {
493 singlefile_galaxyPaths = listArguments[["singlefile_galaxyPathNegative"]];
494 singlefile_sampleNames = listArguments[["singlefile_sampleNameNegative"]]
495 }
496 if (exists("singlefile_galaxyPaths")){
497 singlefile_galaxyPaths = unlist(strsplit(singlefile_galaxyPaths,","))
498 singlefile_sampleNames = unlist(strsplit(singlefile_sampleNames,","))
499
500 singlefile=NULL
501 for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) {
502 singlefile_galaxyPath=singlefile_galaxyPaths[singlefile_galaxyPath_i]
503 singlefile_sampleName=singlefile_sampleNames[singlefile_galaxyPath_i]
504 singlefile[[singlefile_sampleName]] = singlefile_galaxyPath
505 }
506 }
507 for (argument in c("zipfile","zipfilePositive","zipfileNegative","singlefile_galaxyPath","singlefile_sampleName","singlefile_galaxyPathPositive","singlefile_sampleNamePositive","singlefile_galaxyPathNegative","singlefile_sampleNameNegative")) {
508 listArguments[[argument]]=NULL
509 }
510 return(list(zipfile=zipfile, singlefile=singlefile, listArguments=listArguments))
511 }
512
513
514 # This function retrieve the raw file in the working directory
515 # - if zipfile: unzip the file with its directory tree
516 # - if singlefiles: set symlink with the good filename
517 retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile) {
518 if(!is.null(singlefile) && (length("singlefile")>0)) {
519 for (singlefile_sampleName in names(singlefile)) {
520 singlefile_galaxyPath = singlefile[[singlefile_sampleName]]
521 if(!file.exists(singlefile_galaxyPath)){
522 error_message=paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!")
523 print(error_message); stop(error_message)
524 }
525
526 file.symlink(singlefile_galaxyPath,singlefile_sampleName)
527 }
528 directory = "."
529
530 }
531 if(!is.null(zipfile) && (zipfile!="")) {
532 if(!file.exists(zipfile)){
533 error_message=paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!")
534 print(error_message)
535 stop(error_message)
536 }
537
538 #list all file in the zip file
539 #zip_files=unzip(zipfile,list=T)[,"Name"]
540
541 #unzip
542 suppressWarnings(unzip(zipfile, unzip="unzip"))
543
544 #get the directory name
545 filesInZip=unzip(zipfile, list=T);
546 directories=unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1])));
547 directories=directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir]
548 directory = "."
549 if (length(directories) == 1) directory = directories
550
551 cat("files_root_directory\t",directory,"\n")
552
553 }
554 return (directory)
555 }