Mercurial > repos > lecorguille > xcms_fillpeaks
comparison lib.r @ 11:de0d85537ee3 draft
planemo upload for repository https://github.com/workflow4metabolomics/xcms commit a6f5f18b3d6130f7d7fbb9f2df856838c6217797
author | lecorguille |
---|---|
date | Fri, 07 Apr 2017 07:37:05 -0400 |
parents | ee29f0a6e361 |
children | 91c71f3808f3 |
comparison
equal
deleted
inserted
replaced
10:6caa60dcc217 | 11:de0d85537ee3 |
---|---|
25 return (variableMetadata) | 25 return (variableMetadata) |
26 } | 26 } |
27 | 27 |
28 #@author G. Le Corguille | 28 #@author G. Le Corguille |
29 #This function format ions identifiers | 29 #This function format ions identifiers |
30 formatIonIdentifiers <- function(dataData, numDigitsRT=0, numDigitsMZ=0) { | 30 formatIonIdentifiers <- function(variableMetadata, numDigitsRT=0, numDigitsMZ=0) { |
31 return(make.unique(paste0("M",round(dataData[,"mz"],numDigitsMZ),"T",round(dataData[,"rt"],numDigitsRT)))) | 31 splitDeco = strsplit(as.character(variableMetadata$name),"_") |
32 idsDeco = sapply(splitDeco, function(x) { deco=unlist(x)[2]; if (is.na(deco)) return ("") else return(paste0("_",deco)) }) | |
33 namecustom = make.unique(paste0("M",round(variableMetadata[,"mz"],numDigitsMZ),"T",round(variableMetadata[,"rt"],numDigitsRT),idsDeco)) | |
34 variableMetadata=cbind(name=variableMetadata$name, namecustom=namecustom, variableMetadata[,!(colnames(variableMetadata) %in% c("name"))]) | |
35 return(variableMetadata) | |
32 } | 36 } |
33 | 37 |
34 #@author G. Le Corguille | 38 #@author G. Le Corguille |
35 # value: intensity values to be used into, maxo or intb | 39 # value: intensity values to be used into, maxo or intb |
36 getPeaklistW4M <- function(xset, intval="into",convertRTMinute=F,numDigitsMZ=4,numDigitsRT=0,variableMetadataOutput,dataMatrixOutput) { | 40 getPeaklistW4M <- function(xset, intval="into",convertRTMinute=F,numDigitsMZ=4,numDigitsRT=0,variableMetadataOutput,dataMatrixOutput) { |
37 groups <- xset@groups | 41 variableMetadata_dataMatrix = peakTable(xset, method="medret", value=intval) |
38 values <- groupval(xset, "medret", value=intval) | 42 variableMetadata_dataMatrix = cbind(name=groupnames(xset),variableMetadata_dataMatrix) |
39 | 43 |
40 # renamming of the column rtmed to rt to fit with camera peaklist function output | 44 dataMatrix = variableMetadata_dataMatrix[,(make.names(colnames(variableMetadata_dataMatrix)) %in% c("name", make.names(sampnames(xset))))] |
41 colnames(groups)[colnames(groups)=="rtmed"] <- "rt" | 45 |
42 colnames(groups)[colnames(groups)=="mzmed"] <- "mz" | 46 variableMetadata = variableMetadata_dataMatrix[,!(make.names(colnames(variableMetadata_dataMatrix)) %in% c(make.names(sampnames(xset))))] |
43 | 47 variableMetadata = RTSecondToMinute(variableMetadata, convertRTMinute) |
44 ids <- formatIonIdentifiers(groups, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ) | 48 variableMetadata = formatIonIdentifiers(variableMetadata, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ) |
45 groups = RTSecondToMinute(groups, convertRTMinute) | 49 |
46 | 50 write.table(variableMetadata, file=variableMetadataOutput,sep="\t",quote=F,row.names=F) |
47 rownames(groups) = ids | 51 write.table(dataMatrix, file=dataMatrixOutput,sep="\t",quote=F,row.names=F) |
48 rownames(values) = ids | |
49 | |
50 #@TODO: add "name" as the first column name | |
51 #colnames(groups)[1] = "name" | |
52 #colnames(values)[1] = "name" | |
53 | |
54 write.table(groups, file=variableMetadataOutput,sep="\t",quote=F,row.names = T,col.names = NA) | |
55 write.table(values, file=dataMatrixOutput,sep="\t",quote=F,row.names = T,col.names = NA) | |
56 } | 52 } |
57 | 53 |
58 #@author Y. Guitton | 54 #@author Y. Guitton |
59 getBPC <- function(file,rtcor=NULL, ...) { | 55 getBPC <- function(file,rtcor=NULL, ...) { |
60 object <- xcmsRaw(file) | 56 object <- xcmsRaw(file) |
61 sel <- profRange(object, ...) | 57 sel <- profRange(object, ...) |
62 cbind(if (is.null(rtcor)) object@scantime[sel$scanidx] else rtcor ,xcms:::colMax(object@env$profile[sel$massidx,sel$scanidx,drop=FALSE])) | 58 cbind(if (is.null(rtcor)) object@scantime[sel$scanidx] else rtcor ,xcms:::colMax(object@env$profile[sel$massidx,sel$scanidx,drop=FALSE])) |
63 #plotChrom(xcmsRaw(file), base=T) | 59 #plotChrom(xcmsRaw(file), base=T) |
64 } | 60 } |
65 | 61 |
66 #@author Y. Guitton | 62 #@author Y. Guitton |
67 getBPCs <- function (xcmsSet=NULL, pdfname="BPCs.pdf",rt=c("raw","corrected"), scanrange=NULL) { | 63 getBPCs <- function (xcmsSet=NULL, pdfname="BPCs.pdf",rt=c("raw","corrected"), scanrange=NULL) { |
68 cat("Creating BIC pdf...\n") | 64 cat("Creating BIC pdf...\n") |
69 | 65 |
70 if (is.null(xcmsSet)) { | 66 if (is.null(xcmsSet)) { |
71 cat("Enter an xcmsSet \n") | 67 cat("Enter an xcmsSet \n") |
72 stop() | 68 stop() |
73 } else { | 69 } else { |
74 files <- filepaths(xcmsSet) | 70 files <- filepaths(xcmsSet) |
75 } | 71 } |
76 | 72 |
77 class<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class | 73 phenoDataClass<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class |
78 | 74 |
79 classnames<-vector("list",length(class)) | 75 classnames<-vector("list",length(phenoDataClass)) |
80 for (i in 1:length(class)){ | 76 for (i in 1:length(phenoDataClass)){ |
81 classnames[[i]]<-which( xcmsSet@phenoData[,1]==class[i]) | 77 classnames[[i]]<-which( xcmsSet@phenoData[,1]==phenoDataClass[i]) |
82 } | 78 } |
83 | 79 |
84 N <- dim(phenoData(xcmsSet))[1] | 80 N <- dim(phenoData(xcmsSet))[1] |
85 | 81 |
86 TIC <- vector("list",N) | 82 TIC <- vector("list",N) |
87 | 83 |
88 | 84 |
89 for (j in 1:N) { | 85 for (j in 1:N) { |
90 | 86 |
91 TIC[[j]] <- getBPC(files[j]) | 87 TIC[[j]] <- getBPC(files[j]) |
92 #good for raw | 88 #good for raw |
93 # seems strange for corrected | 89 # seems strange for corrected |
94 #errors if scanrange used in xcmsSetgeneration | 90 #errors if scanrange used in xcmsSetgeneration |
95 if (!is.null(xcmsSet) && rt == "corrected") | 91 if (!is.null(xcmsSet) && rt == "corrected") |
96 rtcor <- xcmsSet@rt$corrected[[j]] else | 92 rtcor <- xcmsSet@rt$corrected[[j]] |
97 rtcor <- NULL | 93 else |
98 | 94 rtcor <- NULL |
99 TIC[[j]] <- getBPC(files[j],rtcor=rtcor) | 95 |
100 # TIC[[j]][,1]<-rtcor | 96 TIC[[j]] <- getBPC(files[j],rtcor=rtcor) |
101 } | 97 # TIC[[j]][,1]<-rtcor |
102 | 98 } |
103 | 99 |
104 | 100 |
105 pdf(pdfname,w=16,h=10) | 101 |
106 cols <- rainbow(N) | 102 pdf(pdfname,w=16,h=10) |
107 lty = 1:N | 103 cols <- rainbow(N) |
108 pch = 1:N | 104 lty = 1:N |
109 #search for max x and max y in BPCs | 105 pch = 1:N |
110 xlim = range(sapply(TIC, function(x) range(x[,1]))) | 106 #search for max x and max y in BPCs |
111 ylim = range(sapply(TIC, function(x) range(x[,2]))) | 107 xlim = range(sapply(TIC, function(x) range(x[,1]))) |
112 ylim = c(-ylim[2], ylim[2]) | 108 ylim = range(sapply(TIC, function(x) range(x[,2]))) |
113 | 109 ylim = c(-ylim[2], ylim[2]) |
114 | 110 |
115 ##plot start | 111 |
116 | 112 ##plot start |
117 if (length(class)>2){ | 113 |
118 for (k in 1:(length(class)-1)){ | 114 if (length(phenoDataClass)>2){ |
119 for (l in (k+1):length(class)){ | 115 for (k in 1:(length(phenoDataClass)-1)){ |
120 #print(paste(class[k],"vs",class[l],sep=" ")) | 116 for (l in (k+1):length(phenoDataClass)){ |
121 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") | 117 #print(paste(phenoDataClass[k],"vs",phenoDataClass[l],sep=" ")) |
118 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k]," vs ",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") | |
119 colvect<-NULL | |
120 for (j in 1:length(classnames[[k]])) { | |
121 tic <- TIC[[classnames[[k]][j]]] | |
122 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") | |
123 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") | |
124 colvect<-append(colvect,cols[classnames[[k]][j]]) | |
125 } | |
126 for (j in 1:length(classnames[[l]])) { | |
127 # i=class2names[j] | |
128 tic <- TIC[[classnames[[l]][j]]] | |
129 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") | |
130 colvect<-append(colvect,cols[classnames[[l]][j]]) | |
131 } | |
132 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) | |
133 } | |
134 } | |
135 }#end if length >2 | |
136 | |
137 if (length(phenoDataClass)==2){ | |
138 k=1 | |
139 l=2 | |
140 colvect<-NULL | |
141 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k],"vs",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") | |
142 | |
143 for (j in 1:length(classnames[[k]])) { | |
144 | |
145 tic <- TIC[[classnames[[k]][j]]] | |
146 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") | |
147 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") | |
148 colvect<-append(colvect,cols[classnames[[k]][j]]) | |
149 } | |
150 for (j in 1:length(classnames[[l]])) { | |
151 # i=class2names[j] | |
152 tic <- TIC[[classnames[[l]][j]]] | |
153 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") | |
154 colvect<-append(colvect,cols[classnames[[l]][j]]) | |
155 } | |
156 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) | |
157 | |
158 }#end length ==2 | |
159 | |
160 #case where only one class | |
161 if (length(phenoDataClass)==1){ | |
162 k=1 | |
163 ylim = range(sapply(TIC, function(x) range(x[,2]))) | |
164 colvect<-NULL | |
165 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k], sep=""), xlab = "Retention Time (min)", ylab = "BPC") | |
166 | |
167 for (j in 1:length(classnames[[k]])) { | |
168 tic <- TIC[[classnames[[k]][j]]] | |
169 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") | |
170 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") | |
171 colvect<-append(colvect,cols[classnames[[k]][j]]) | |
172 } | |
173 | |
174 legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) | |
175 | |
176 }#end length ==1 | |
177 | |
178 dev.off() #pdf(pdfname,w=16,h=10) | |
179 | |
180 invisible(TIC) | |
181 } | |
182 | |
183 | |
184 | |
185 #@author Y. Guitton | |
186 getTIC <- function(file,rtcor=NULL) { | |
187 object <- xcmsRaw(file) | |
188 cbind(if (is.null(rtcor)) object@scantime else rtcor, rawEIC(object,mzrange=range(object@env$mz))$intensity) | |
189 } | |
190 | |
191 ## | |
192 ## overlay TIC from all files in current folder or from xcmsSet, create pdf | |
193 ## | |
194 #@author Y. Guitton | |
195 getTICs <- function(xcmsSet=NULL,files=NULL, pdfname="TICs.pdf",rt=c("raw","corrected")) { | |
196 cat("Creating TIC pdf...\n") | |
197 | |
198 if (is.null(xcmsSet)) { | |
199 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") | |
200 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|") | |
201 if (is.null(files)) | |
202 files <- getwd() | |
203 info <- file.info(files) | |
204 listed <- list.files(files[info$isdir], pattern = filepattern, recursive = TRUE, full.names = TRUE) | |
205 files <- c(files[!info$isdir], listed) | |
206 } else { | |
207 files <- filepaths(xcmsSet) | |
208 } | |
209 | |
210 phenoDataClass<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class | |
211 classnames<-vector("list",length(phenoDataClass)) | |
212 for (i in 1:length(phenoDataClass)){ | |
213 classnames[[i]]<-which( xcmsSet@phenoData[,1]==phenoDataClass[i]) | |
214 } | |
215 | |
216 N <- length(files) | |
217 TIC <- vector("list",N) | |
218 | |
219 for (i in 1:N) { | |
220 if (!is.null(xcmsSet) && rt == "corrected") | |
221 rtcor <- xcmsSet@rt$corrected[[i]] else | |
222 rtcor <- NULL | |
223 TIC[[i]] <- getTIC(files[i],rtcor=rtcor) | |
224 } | |
225 | |
226 pdf(pdfname,w=16,h=10) | |
227 cols <- rainbow(N) | |
228 lty = 1:N | |
229 pch = 1:N | |
230 #search for max x and max y in TICs | |
231 xlim = range(sapply(TIC, function(x) range(x[,1]))) | |
232 ylim = range(sapply(TIC, function(x) range(x[,2]))) | |
233 ylim = c(-ylim[2], ylim[2]) | |
234 | |
235 | |
236 ##plot start | |
237 if (length(phenoDataClass)>2){ | |
238 for (k in 1:(length(phenoDataClass)-1)){ | |
239 for (l in (k+1):length(phenoDataClass)){ | |
240 #print(paste(phenoDataClass[k],"vs",phenoDataClass[l],sep=" ")) | |
241 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k]," vs ",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") | |
242 colvect<-NULL | |
243 for (j in 1:length(classnames[[k]])) { | |
244 tic <- TIC[[classnames[[k]][j]]] | |
245 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") | |
246 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") | |
247 colvect<-append(colvect,cols[classnames[[k]][j]]) | |
248 } | |
249 for (j in 1:length(classnames[[l]])) { | |
250 # i=class2names[j] | |
251 tic <- TIC[[classnames[[l]][j]]] | |
252 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") | |
253 colvect<-append(colvect,cols[classnames[[l]][j]]) | |
254 } | |
255 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) | |
256 } | |
257 } | |
258 }#end if length >2 | |
259 if (length(phenoDataClass)==2){ | |
260 k=1 | |
261 l=2 | |
262 | |
263 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k],"vs",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") | |
122 colvect<-NULL | 264 colvect<-NULL |
123 for (j in 1:length(classnames[[k]])) { | 265 for (j in 1:length(classnames[[k]])) { |
124 tic <- TIC[[classnames[[k]][j]]] | 266 tic <- TIC[[classnames[[k]][j]]] |
125 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") | 267 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") |
126 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") | 268 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") |
127 colvect<-append(colvect,cols[classnames[[k]][j]]) | 269 colvect<-append(colvect,cols[classnames[[k]][j]]) |
128 } | 270 } |
129 for (j in 1:length(classnames[[l]])) { | 271 for (j in 1:length(classnames[[l]])) { |
130 # i=class2names[j] | 272 # i=class2names[j] |
131 tic <- TIC[[classnames[[l]][j]]] | 273 tic <- TIC[[classnames[[l]][j]]] |
132 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") | 274 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") |
133 colvect<-append(colvect,cols[classnames[[l]][j]]) | 275 colvect<-append(colvect,cols[classnames[[l]][j]]) |
134 } | 276 } |
135 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) | 277 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) |
136 } | 278 |
137 } | 279 }#end length ==2 |
138 }#end if length >2 | 280 |
139 | 281 #case where only one class |
140 if (length(class)==2){ | 282 if (length(phenoDataClass)==1){ |
141 k=1 | 283 k=1 |
142 l=2 | 284 ylim = range(sapply(TIC, function(x) range(x[,2]))) |
143 colvect<-NULL | 285 |
144 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k],"vs",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") | 286 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k], sep=""), xlab = "Retention Time (min)", ylab = "TIC") |
145 | |
146 for (j in 1:length(classnames[[k]])) { | |
147 | |
148 tic <- TIC[[classnames[[k]][j]]] | |
149 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") | |
150 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") | |
151 colvect<-append(colvect,cols[classnames[[k]][j]]) | |
152 } | |
153 for (j in 1:length(classnames[[l]])) { | |
154 # i=class2names[j] | |
155 tic <- TIC[[classnames[[l]][j]]] | |
156 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") | |
157 colvect<-append(colvect,cols[classnames[[l]][j]]) | |
158 } | |
159 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) | |
160 | |
161 }#end length ==2 | |
162 | |
163 #case where only one class | |
164 if (length(class)==1){ | |
165 k=1 | |
166 ylim = range(sapply(TIC, function(x) range(x[,2]))) | |
167 colvect<-NULL | |
168 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "BPC") | |
169 | |
170 for (j in 1:length(classnames[[k]])) { | |
171 tic <- TIC[[classnames[[k]][j]]] | |
172 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") | |
173 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") | |
174 colvect<-append(colvect,cols[classnames[[k]][j]]) | |
175 } | |
176 | |
177 legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) | |
178 | |
179 }#end length ==1 | |
180 | |
181 dev.off() #pdf(pdfname,w=16,h=10) | |
182 | |
183 invisible(TIC) | |
184 } | |
185 | |
186 | |
187 | |
188 #@author Y. Guitton | |
189 getTIC <- function(file,rtcor=NULL) { | |
190 object <- xcmsRaw(file) | |
191 cbind(if (is.null(rtcor)) object@scantime else rtcor, rawEIC(object,mzrange=range(object@env$mz))$intensity) | |
192 } | |
193 | |
194 ## | |
195 ## overlay TIC from all files in current folder or from xcmsSet, create pdf | |
196 ## | |
197 #@author Y. Guitton | |
198 getTICs <- function(xcmsSet=NULL,files=NULL, pdfname="TICs.pdf",rt=c("raw","corrected")) { | |
199 cat("Creating TIC pdf...\n") | |
200 | |
201 if (is.null(xcmsSet)) { | |
202 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") | |
203 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|") | |
204 if (is.null(files)) | |
205 files <- getwd() | |
206 info <- file.info(files) | |
207 listed <- list.files(files[info$isdir], pattern = filepattern, recursive = TRUE, full.names = TRUE) | |
208 files <- c(files[!info$isdir], listed) | |
209 } else { | |
210 files <- filepaths(xcmsSet) | |
211 } | |
212 | |
213 class<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class | |
214 | |
215 classnames<-vector("list",length(class)) | |
216 for (i in 1:length(class)){ | |
217 classnames[[i]]<-which( xcmsSet@phenoData[,1]==class[i]) | |
218 } | |
219 | |
220 N <- length(files) | |
221 TIC <- vector("list",N) | |
222 | |
223 for (i in 1:N) { | |
224 if (!is.null(xcmsSet) && rt == "corrected") | |
225 rtcor <- xcmsSet@rt$corrected[[i]] else | |
226 rtcor <- NULL | |
227 TIC[[i]] <- getTIC(files[i],rtcor=rtcor) | |
228 } | |
229 | |
230 pdf(pdfname,w=16,h=10) | |
231 cols <- rainbow(N) | |
232 lty = 1:N | |
233 pch = 1:N | |
234 #search for max x and max y in TICs | |
235 xlim = range(sapply(TIC, function(x) range(x[,1]))) | |
236 ylim = range(sapply(TIC, function(x) range(x[,2]))) | |
237 ylim = c(-ylim[2], ylim[2]) | |
238 | |
239 | |
240 ##plot start | |
241 if (length(class)>2){ | |
242 for (k in 1:(length(class)-1)){ | |
243 for (l in (k+1):length(class)){ | |
244 #print(paste(class[k],"vs",class[l],sep=" ")) | |
245 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") | |
246 colvect<-NULL | 287 colvect<-NULL |
247 for (j in 1:length(classnames[[k]])) { | 288 for (j in 1:length(classnames[[k]])) { |
248 | 289 tic <- TIC[[classnames[[k]][j]]] |
249 tic <- TIC[[classnames[[k]][j]]] | 290 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") |
250 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") | 291 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") |
251 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") | 292 colvect<-append(colvect,cols[classnames[[k]][j]]) |
252 colvect<-append(colvect,cols[classnames[[k]][j]]) | 293 } |
253 } | 294 |
254 for (j in 1:length(classnames[[l]])) { | 295 legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) |
255 # i=class2names[j] | 296 |
256 tic <- TIC[[classnames[[l]][j]]] | 297 }#end length ==1 |
257 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") | 298 |
258 colvect<-append(colvect,cols[classnames[[l]][j]]) | 299 dev.off() #pdf(pdfname,w=16,h=10) |
259 } | 300 |
260 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) | 301 invisible(TIC) |
261 } | |
262 } | |
263 }#end if length >2 | |
264 if (length(class)==2){ | |
265 k=1 | |
266 l=2 | |
267 | |
268 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k],"vs",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") | |
269 colvect<-NULL | |
270 for (j in 1:length(classnames[[k]])) { | |
271 tic <- TIC[[classnames[[k]][j]]] | |
272 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") | |
273 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") | |
274 colvect<-append(colvect,cols[classnames[[k]][j]]) | |
275 } | |
276 for (j in 1:length(classnames[[l]])) { | |
277 # i=class2names[j] | |
278 tic <- TIC[[classnames[[l]][j]]] | |
279 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") | |
280 colvect<-append(colvect,cols[classnames[[l]][j]]) | |
281 } | |
282 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) | |
283 | |
284 }#end length ==2 | |
285 | |
286 #case where only one class | |
287 if (length(class)==1){ | |
288 k=1 | |
289 ylim = range(sapply(TIC, function(x) range(x[,2]))) | |
290 | |
291 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "TIC") | |
292 colvect<-NULL | |
293 for (j in 1:length(classnames[[k]])) { | |
294 tic <- TIC[[classnames[[k]][j]]] | |
295 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") | |
296 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") | |
297 colvect<-append(colvect,cols[classnames[[k]][j]]) | |
298 } | |
299 | |
300 legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) | |
301 | |
302 }#end length ==1 | |
303 | |
304 dev.off() #pdf(pdfname,w=16,h=10) | |
305 | |
306 invisible(TIC) | |
307 } | 302 } |
308 | 303 |
309 | 304 |
310 | 305 |
311 ## | 306 ## |
312 ## Get the polarities from all the samples of a condition | 307 ## Get the polarities from all the samples of a condition |
313 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM | 308 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM |
314 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM | 309 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM |
315 getSampleMetadata <- function(xcmsSet=NULL, sampleMetadataOutput="sampleMetadata.tsv") { | 310 getSampleMetadata <- function(xcmsSet=NULL, sampleMetadataOutput="sampleMetadata.tsv") { |
316 cat("Creating the sampleMetadata file...\n") | 311 cat("Creating the sampleMetadata file...\n") |
317 | 312 |
318 #Create the sampleMetada dataframe | 313 #Create the sampleMetada dataframe |
319 sampleMetadata=xset@phenoData | 314 sampleMetadata=xset@phenoData |
320 sampleNamesOrigin=rownames(sampleMetadata) | 315 sampleNamesOrigin=rownames(sampleMetadata) |
321 sampleNamesMakeNames=make.names(sampleNamesOrigin) | 316 sampleNamesMakeNames=make.names(sampleNamesOrigin) |
322 | 317 |
323 if (any(duplicated(sampleNamesMakeNames))) { | 318 if (any(duplicated(sampleNamesMakeNames))) { |
324 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) | 319 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) |
325 for (sampleName in sampleNamesOrigin) { | 320 for (sampleName in sampleNamesOrigin) { |
326 write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr()) | 321 write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr()) |
327 } | 322 } |
328 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") | 323 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") |
329 } | 324 } |
330 | 325 |
331 if (!all(sampleNamesOrigin == sampleNamesMakeNames)) { | 326 if (!all(sampleNamesOrigin == sampleNamesMakeNames)) { |
332 cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n") | 327 cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n") |
333 for (sampleName in sampleNamesOrigin) { | 328 for (sampleName in sampleNamesOrigin) { |
334 cat(paste(sampleName,"\t->\t",make.names(sampleName),"\n")) | 329 cat(paste(sampleName,"\t->\t",make.names(sampleName),"\n")) |
335 } | 330 } |
336 } | 331 } |
337 | 332 |
338 sampleMetadata$sampleMetadata=sampleNamesMakeNames | 333 sampleMetadata$sampleMetadata=sampleNamesMakeNames |
339 sampleMetadata=cbind(sampleMetadata["sampleMetadata"],sampleMetadata["class"]) #Reorder columns | 334 sampleMetadata=cbind(sampleMetadata["sampleMetadata"],sampleMetadata["class"]) #Reorder columns |
340 rownames(sampleMetadata)=NULL | 335 rownames(sampleMetadata)=NULL |
341 | 336 |
342 #Create a list of files name in the current directory | 337 #Create a list of files name in the current directory |
343 list_files=xset@filepaths | 338 list_files=xset@filepaths |
344 #For each sample file, the following actions are done | 339 #For each sample file, the following actions are done |
345 for (file in list_files){ | 340 for (file in list_files){ |
346 #Check if the file is in the CDF format | 341 #Check if the file is in the CDF format |
347 if (!mzR:::netCDFIsFile(file)){ | 342 if (!mzR:::netCDFIsFile(file)){ |
348 | 343 |
349 # If the column isn't exist, with add one filled with NA | 344 # If the column isn't exist, with add one filled with NA |
350 if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity=NA | 345 if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity=NA |
351 | 346 |
352 #Create a simple xcmsRaw object for each sample | 347 #Create a simple xcmsRaw object for each sample |
353 xcmsRaw=xcmsRaw(file) | 348 xcmsRaw=xcmsRaw(file) |
354 #Extract the polarity (a list of polarities) | 349 #Extract the polarity (a list of polarities) |
355 polarity=xcmsRaw@polarity | 350 polarity=xcmsRaw@polarity |
356 #Verify if all the scans have the same polarity | 351 #Verify if all the scans have the same polarity |
357 uniq_list=unique(polarity) | 352 uniq_list=unique(polarity) |
358 if (length(uniq_list)>1){ | 353 if (length(uniq_list)>1){ |
359 polarity="mixed" | 354 polarity="mixed" |
360 } else { | 355 } else { |
361 polarity=as.character(uniq_list) | 356 polarity=as.character(uniq_list) |
362 } | 357 } |
363 #Transforms the character to obtain only the sample name | 358 #Transforms the character to obtain only the sample name |
364 filename=basename(file) | 359 filename=basename(file) |
365 library(tools) | 360 library(tools) |
366 samplename=file_path_sans_ext(filename) | 361 samplename=file_path_sans_ext(filename) |
367 | 362 |
368 #Set the polarity attribute | 363 #Set the polarity attribute |
369 sampleMetadata$polarity[sampleMetadata$sampleMetadata==samplename]=polarity | 364 sampleMetadata$polarity[sampleMetadata$sampleMetadata==samplename]=polarity |
370 | 365 |
371 #Delete xcmsRaw object because it creates a bug for the fillpeaks step | 366 #Delete xcmsRaw object because it creates a bug for the fillpeaks step |
372 rm(xcmsRaw) | 367 rm(xcmsRaw) |
373 } | 368 } |
374 | 369 |
375 } | 370 } |
376 | 371 |
377 write.table(sampleMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=sampleMetadataOutput) | 372 write.table(sampleMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=sampleMetadataOutput) |
378 | 373 |
379 return(list("sampleNamesOrigin"=sampleNamesOrigin,"sampleNamesMakeNames"=sampleNamesMakeNames)) | 374 return(list("sampleNamesOrigin"=sampleNamesOrigin,"sampleNamesMakeNames"=sampleNamesMakeNames)) |
380 | 375 |
381 } | 376 } |
382 | 377 |
383 | 378 |
384 ## | 379 ## |
385 ## This function check if xcms will found all the files | 380 ## This function check if xcms will found all the files |
386 ## | 381 ## |
387 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM | 382 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM |
388 checkFilesCompatibilityWithXcms <- function(directory) { | 383 checkFilesCompatibilityWithXcms <- function(directory) { |
389 cat("Checking files filenames compatibilities with xmcs...\n") | 384 cat("Checking files filenames compatibilities with xmcs...\n") |
390 # WHAT XCMS WILL FIND | 385 # WHAT XCMS WILL FIND |
391 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") | 386 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") |
392 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") | 387 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") |
393 info <- file.info(directory) | 388 info <- file.info(directory) |
394 listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) | 389 listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) |
395 files <- c(directory[!info$isdir], listed) | 390 files <- c(directory[!info$isdir], listed) |
396 files_abs <- file.path(getwd(), files) | 391 files_abs <- file.path(getwd(), files) |
397 exists <- file.exists(files_abs) | 392 exists <- file.exists(files_abs) |
398 files[exists] <- files_abs[exists] | 393 files[exists] <- files_abs[exists] |
399 files[exists] <- sub("//","/",files[exists]) | 394 files[exists] <- sub("//","/",files[exists]) |
400 | 395 |
401 # WHAT IS ON THE FILESYSTEM | 396 # WHAT IS ON THE FILESYSTEM |
402 filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T) | 397 filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T) |
403 filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)] | 398 filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)] |
404 | 399 |
405 # COMPARISON | 400 # COMPARISON |
406 if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) { | 401 if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) { |
407 write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr()) | 402 write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr()) |
408 write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr()) | 403 write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr()) |
409 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") | 404 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") |
410 | 405 } |
411 } | |
412 } | 406 } |
413 | 407 |
414 | 408 |
415 | 409 |
416 ## | 410 ## |
417 ## This function check if XML contains special caracters. It also checks integrity and completness. | 411 ## This function check if XML contains special caracters. It also checks integrity and completness. |
418 ## | 412 ## |
419 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM | 413 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM |
420 checkXmlStructure <- function (directory) { | 414 checkXmlStructure <- function (directory) { |
421 cat("Checking XML structure...\n") | 415 cat("Checking XML structure...\n") |
422 | 416 |
423 cmd=paste("IFS=$'\n'; for xml in $(find",directory,"-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") | 417 cmd=paste("IFS=$'\n'; for xml in $(find",directory,"-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") |
424 capture=system(cmd,intern=TRUE) | 418 capture=system(cmd,intern=TRUE) |
425 | 419 |
426 if (length(capture)>0){ | 420 if (length(capture)>0){ |
427 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) | 421 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) |
428 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) | 422 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) |
429 write(capture, stderr()) | 423 write(capture, stderr()) |
430 stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files") | 424 stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files") |
431 } | 425 } |
432 | 426 |
433 } | 427 } |
434 | 428 |
435 | 429 |
436 ## | 430 ## |
437 ## This function check if XML contain special characters | 431 ## This function check if XML contain special characters |
438 ## | 432 ## |
439 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM | 433 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM |
440 deleteXmlBadCharacters<- function (directory) { | 434 deleteXmlBadCharacters<- function (directory) { |
441 cat("Checking Non ASCII characters in the XML...\n") | 435 cat("Checking Non ASCII characters in the XML...\n") |
442 | 436 |
443 processed=F | 437 processed=F |
444 l=system( paste("find",directory, "-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"),intern=TRUE) | 438 l=system( paste("find",directory, "-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"),intern=TRUE) |
445 for (i in l){ | 439 for (i in l){ |
446 cmd=paste("LC_ALL=C grep '[^ -~]' \"",i,"\"",sep="") | 440 cmd=paste("LC_ALL=C grep '[^ -~]' \"",i,"\"",sep="") |
447 capture=suppressWarnings(system(cmd,intern=TRUE)) | 441 capture=suppressWarnings(system(cmd,intern=TRUE)) |
448 if (length(capture)>0){ | 442 if (length(capture)>0){ |
449 cmd=paste("perl -i -pe 's/[^[:ascii:]]//g;'",i) | 443 cmd=paste("perl -i -pe 's/[^[:ascii:]]//g;'",i) |
450 print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") ) | 444 print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") ) |
451 c=system(cmd,intern=TRUE) | 445 c=system(cmd,intern=TRUE) |
452 capture="" | 446 capture="" |
453 processed=T | 447 processed=T |
454 } | 448 } |
455 } | 449 } |
456 if (processed) cat("\n\n") | 450 if (processed) cat("\n\n") |
457 return(processed) | 451 return(processed) |
458 } | 452 } |
459 | 453 |
460 | 454 |
461 ## | 455 ## |
462 ## This function will compute MD5 checksum to check the data integrity | 456 ## This function will compute MD5 checksum to check the data integrity |
463 ## | 457 ## |
464 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | 458 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr |
465 getMd5sum <- function (directory) { | 459 getMd5sum <- function (directory) { |
466 cat("Compute md5 checksum...\n") | 460 cat("Compute md5 checksum...\n") |
467 # WHAT XCMS WILL FIND | 461 # WHAT XCMS WILL FIND |
468 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") | 462 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") |
469 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") | 463 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") |
470 info <- file.info(directory) | 464 info <- file.info(directory) |
471 listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) | 465 listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) |
472 files <- c(directory[!info$isdir], listed) | 466 files <- c(directory[!info$isdir], listed) |
473 exists <- file.exists(files) | 467 exists <- file.exists(files) |
474 files <- files[exists] | 468 files <- files[exists] |
475 | 469 |
476 library(tools) | 470 library(tools) |
477 | 471 |
478 #cat("\n\n") | 472 #cat("\n\n") |
479 | 473 |
480 return(as.matrix(md5sum(files))) | 474 return(as.matrix(md5sum(files))) |
481 } | 475 } |
476 | |
477 | |
478 # This function get the raw file path from the arguments | |
479 getRawfilePathFromArguments <- function(singlefile, zipfile, listArguments) { | |
480 if (!is.null(listArguments[["zipfile"]])) zipfile = listArguments[["zipfile"]] | |
481 if (!is.null(listArguments[["zipfilePositive"]])) zipfile = listArguments[["zipfilePositive"]] | |
482 if (!is.null(listArguments[["zipfileNegative"]])) zipfile = listArguments[["zipfileNegative"]] | |
483 | |
484 if (!is.null(listArguments[["singlefile_galaxyPath"]])) { | |
485 singlefile_galaxyPaths = listArguments[["singlefile_galaxyPath"]]; | |
486 singlefile_sampleNames = listArguments[["singlefile_sampleName"]] | |
487 } | |
488 if (!is.null(listArguments[["singlefile_galaxyPathPositive"]])) { | |
489 singlefile_galaxyPaths = listArguments[["singlefile_galaxyPathPositive"]]; | |
490 singlefile_sampleNames = listArguments[["singlefile_sampleNamePositive"]] | |
491 } | |
492 if (!is.null(listArguments[["singlefile_galaxyPathNegative"]])) { | |
493 singlefile_galaxyPaths = listArguments[["singlefile_galaxyPathNegative"]]; | |
494 singlefile_sampleNames = listArguments[["singlefile_sampleNameNegative"]] | |
495 } | |
496 if (exists("singlefile_galaxyPaths")){ | |
497 singlefile_galaxyPaths = unlist(strsplit(singlefile_galaxyPaths,",")) | |
498 singlefile_sampleNames = unlist(strsplit(singlefile_sampleNames,",")) | |
499 | |
500 singlefile=NULL | |
501 for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) { | |
502 singlefile_galaxyPath=singlefile_galaxyPaths[singlefile_galaxyPath_i] | |
503 singlefile_sampleName=singlefile_sampleNames[singlefile_galaxyPath_i] | |
504 singlefile[[singlefile_sampleName]] = singlefile_galaxyPath | |
505 } | |
506 } | |
507 for (argument in c("zipfile","zipfilePositive","zipfileNegative","singlefile_galaxyPath","singlefile_sampleName","singlefile_galaxyPathPositive","singlefile_sampleNamePositive","singlefile_galaxyPathNegative","singlefile_sampleNameNegative")) { | |
508 listArguments[[argument]]=NULL | |
509 } | |
510 return(list(zipfile=zipfile, singlefile=singlefile, listArguments=listArguments)) | |
511 } | |
512 | |
513 | |
514 # This function retrieve the raw file in the working directory | |
515 # - if zipfile: unzip the file with its directory tree | |
516 # - if singlefiles: set symlink with the good filename | |
517 retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile) { | |
518 if(!is.null(singlefile) && (length("singlefile")>0)) { | |
519 for (singlefile_sampleName in names(singlefile)) { | |
520 singlefile_galaxyPath = singlefile[[singlefile_sampleName]] | |
521 if(!file.exists(singlefile_galaxyPath)){ | |
522 error_message=paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!") | |
523 print(error_message); stop(error_message) | |
524 } | |
525 | |
526 file.symlink(singlefile_galaxyPath,singlefile_sampleName) | |
527 } | |
528 directory = "." | |
529 | |
530 } | |
531 if(!is.null(zipfile) && (zipfile!="")) { | |
532 if(!file.exists(zipfile)){ | |
533 error_message=paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!") | |
534 print(error_message) | |
535 stop(error_message) | |
536 } | |
537 | |
538 #list all file in the zip file | |
539 #zip_files=unzip(zipfile,list=T)[,"Name"] | |
540 | |
541 #unzip | |
542 suppressWarnings(unzip(zipfile, unzip="unzip")) | |
543 | |
544 #get the directory name | |
545 filesInZip=unzip(zipfile, list=T); | |
546 directories=unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))); | |
547 directories=directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] | |
548 directory = "." | |
549 if (length(directories) == 1) directory = directories | |
550 | |
551 cat("files_root_directory\t",directory,"\n") | |
552 | |
553 } | |
554 return (directory) | |
555 } |