comparison lib.r @ 14:833d2c821d9c draft

planemo upload for repository https://github.com/workflow4metabolomics/xcms commit 9f72e947d9c241d11221cad561f3525d27231857
author lecorguille
date Tue, 18 Sep 2018 16:11:06 -0400
parents c81275aee959
children 6a4d8753a330
comparison
equal deleted inserted replaced
13:13558e8a4778 14:833d2c821d9c
1 #Authors ABiMS TEAM 1 #@authors ABiMS TEAM, Y. Guitton
2 #Lib.r for Galaxy Workflow4Metabolomics xcms tools 2 # lib.r for Galaxy Workflow4Metabolomics xcms tools
3 # 3
4 #version 2.4: lecorguille 4 #@author G. Le Corguille
5 # add getPeaklistW4M 5 # solve an issue with batch if arguments are logical TRUE/FALSE
6 #version 2.3: yguitton 6 parseCommandArgs <- function(...) {
7 # correction for empty PDF when only 1 class 7 args <- batch::parseCommandArgs(...)
8 #version 2.2 8 for (key in names(args)) {
9 # correct bug in Base Peak Chromatogram (BPC) option, not only TIC when scanrange used in xcmsSet 9 if (args[key] %in% c("TRUE","FALSE"))
10 # Note if scanrange is used a warning is prompted in R console but do not stop PDF generation 10 args[key] = as.logical(args[key])
11 #version 2.1: yguitton 11 }
12 # Modifications made by Guitton Yann 12 return(args)
13 13 }
14 14
15 #@author G. Le Corguille 15 #@author G. Le Corguille
16 #This function convert if it is required the Retention Time in minutes 16 # This function will
17 # - load the packages
18 # - display the sessionInfo
19 loadAndDisplayPackages <- function(pkgs) {
20 for(pkg in pkgs) suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE)))
21
22 sessioninfo = sessionInfo()
23 cat(sessioninfo$R.version$version.string,"\n")
24 cat("Main packages:\n")
25 for (pkg in names(sessioninfo$otherPkgs)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n")
26 cat("Other loaded packages:\n")
27 for (pkg in names(sessioninfo$loadedOnly)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n")
28 }
29
30 #@author G. Le Corguille
31 # This function merge several chromBPI or chromTIC into one.
32 mergeChrom <- function(chrom_merged, chrom) {
33 if (is.null(chrom_merged)) return(NULL)
34 chrom_merged@.Data <- cbind(chrom_merged@.Data, chrom@.Data)
35 return(chrom_merged)
36 }
37
38 #@author G. Le Corguille
39 # This function merge several xdata into one.
40 mergeXData <- function(args) {
41 chromTIC <- NULL
42 chromBPI <- NULL
43 chromTIC_adjusted <- NULL
44 chromBPI_adjusted <- NULL
45 for(image in args$images) {
46
47 load(image)
48 # Handle infiles
49 if (!exists("singlefile")) singlefile <- NULL
50 if (!exists("zipfile")) zipfile <- NULL
51 rawFilePath <- getRawfilePathFromArguments(singlefile, zipfile, args)
52 zipfile <- rawFilePath$zipfile
53 singlefile <- rawFilePath$singlefile
54 retrieveRawfileInTheWorkingDirectory(singlefile, zipfile)
55
56 if (exists("raw_data")) xdata <- raw_data
57 if (!exists("xdata")) stop("\n\nERROR: The RData doesn't contain any object called 'xdata'. This RData should have been created by an old version of XMCS 2.*")
58
59 cat(sampleNamesList$sampleNamesOrigin,"\n")
60
61 if (!exists("xdata_merged")) {
62 xdata_merged <- xdata
63 singlefile_merged <- singlefile
64 md5sumList_merged <- md5sumList
65 sampleNamesList_merged <- sampleNamesList
66 chromTIC_merged <- chromTIC
67 chromBPI_merged <- chromBPI
68 chromTIC_adjusted_merged <- chromTIC_adjusted
69 chromBPI_adjusted_merged <- chromBPI_adjusted
70 } else {
71 if (is(xdata, "XCMSnExp")) xdata_merged <- c(xdata_merged,xdata)
72 else if (is(xdata, "OnDiskMSnExp")) xdata_merged <- .concatenate_OnDiskMSnExp(xdata_merged,xdata)
73 else stop("\n\nERROR: The RData either a OnDiskMSnExp object called raw_data or a XCMSnExp object called xdata")
74
75 singlefile_merged <- c(singlefile_merged,singlefile)
76 md5sumList_merged$origin <- rbind(md5sumList_merged$origin,md5sumList$origin)
77 sampleNamesList_merged$sampleNamesOrigin <- c(sampleNamesList_merged$sampleNamesOrigin,sampleNamesList$sampleNamesOrigin)
78 sampleNamesList_merged$sampleNamesMakeNames <- c(sampleNamesList_merged$sampleNamesMakeNames,sampleNamesList$sampleNamesMakeNames)
79 chromTIC_merged <- mergeChrom(chromTIC_merged, chromTIC)
80 chromBPI_merged <- mergeChrom(chromBPI_merged, chromBPI)
81 chromTIC_adjusted_merged <- mergeChrom(chromTIC_adjusted_merged, chromTIC_adjusted)
82 chromBPI_adjusted_merged <- mergeChrom(chromBPI_adjusted_merged, chromBPI_adjusted)
83 }
84 }
85 rm(image)
86 xdata <- xdata_merged; rm(xdata_merged)
87 singlefile <- singlefile_merged; rm(singlefile_merged)
88 md5sumList <- md5sumList_merged; rm(md5sumList_merged)
89 sampleNamesList <- sampleNamesList_merged; rm(sampleNamesList_merged)
90
91 if (!is.null(args$sampleMetadata)) {
92 cat("\tXSET PHENODATA SETTING...\n")
93 sampleMetadataFile <- args$sampleMetadata
94 sampleMetadata <- getDataFrameFromFile(sampleMetadataFile, header=F)
95 xdata@phenoData@data$sample_group=sampleMetadata$V2[match(xdata@phenoData@data$sample_name,sampleMetadata$V1)]
96
97 if (any(is.na(pData(xdata)$sample_group))) {
98 sample_missing <- pData(xdata)$sample_name[is.na(pData(xdata)$sample_group)]
99 error_message <- paste("Those samples are missing in your sampleMetadata:", paste(sample_missing, collapse=" "))
100 print(error_message)
101 stop(error_message)
102 }
103 }
104
105 if (!is.null(chromTIC_merged)) { chromTIC <- chromTIC_merged; chromTIC@phenoData <- xdata@phenoData }
106 if (!is.null(chromBPI_merged)) { chromBPI <- chromBPI_merged; chromBPI@phenoData <- xdata@phenoData }
107 if (!is.null(chromTIC_adjusted_merged)) { chromTIC_adjusted <- chromTIC_adjusted_merged; chromTIC_adjusted@phenoData <- xdata@phenoData }
108 if (!is.null(chromBPI_adjusted_merged)) { chromBPI_adjusted <- chromBPI_adjusted_merged; chromBPI_adjusted@phenoData <- xdata@phenoData }
109
110 return(list("xdata"=xdata, "singlefile"=singlefile, "md5sumList"=md5sumList,"sampleNamesList"=sampleNamesList, "chromTIC"=chromTIC, "chromBPI"=chromBPI, "chromTIC_adjusted"=chromTIC_adjusted, "chromBPI_adjusted"=chromBPI_adjusted))
111 }
112
113 #@author G. Le Corguille
114 # This function convert if it is required the Retention Time in minutes
17 RTSecondToMinute <- function(variableMetadata, convertRTMinute) { 115 RTSecondToMinute <- function(variableMetadata, convertRTMinute) {
18 if (convertRTMinute){ 116 if (convertRTMinute){
19 #converting the retention times (seconds) into minutes 117 #converting the retention times (seconds) into minutes
20 print("converting the retention times into minutes in the variableMetadata") 118 print("converting the retention times into minutes in the variableMetadata")
21 variableMetadata[,"rt"]=variableMetadata[,"rt"]/60 119 variableMetadata[,"rt"] <- variableMetadata[,"rt"]/60
22 variableMetadata[,"rtmin"]=variableMetadata[,"rtmin"]/60 120 variableMetadata[,"rtmin"] <- variableMetadata[,"rtmin"]/60
23 variableMetadata[,"rtmax"]=variableMetadata[,"rtmax"]/60 121 variableMetadata[,"rtmax"] <- variableMetadata[,"rtmax"]/60
24 } 122 }
25 return (variableMetadata) 123 return (variableMetadata)
26 } 124 }
27 125
28 #@author G. Le Corguille 126 #@author G. Le Corguille
29 #This function format ions identifiers 127 # This function format ions identifiers
30 formatIonIdentifiers <- function(variableMetadata, numDigitsRT=0, numDigitsMZ=0) { 128 formatIonIdentifiers <- function(variableMetadata, numDigitsRT=0, numDigitsMZ=0) {
31 splitDeco = strsplit(as.character(variableMetadata$name),"_") 129 splitDeco <- strsplit(as.character(variableMetadata$name),"_")
32 idsDeco = sapply(splitDeco, function(x) { deco=unlist(x)[2]; if (is.na(deco)) return ("") else return(paste0("_",deco)) }) 130 idsDeco <- sapply(splitDeco, function(x) { deco=unlist(x)[2]; if (is.na(deco)) return ("") else return(paste0("_",deco)) })
33 namecustom = make.unique(paste0("M",round(variableMetadata[,"mz"],numDigitsMZ),"T",round(variableMetadata[,"rt"],numDigitsRT),idsDeco)) 131 namecustom <- make.unique(paste0("M",round(variableMetadata[,"mz"],numDigitsMZ),"T",round(variableMetadata[,"rt"],numDigitsRT),idsDeco))
34 variableMetadata=cbind(name=variableMetadata$name, namecustom=namecustom, variableMetadata[,!(colnames(variableMetadata) %in% c("name"))]) 132 variableMetadata <- cbind(name=variableMetadata$name, namecustom=namecustom, variableMetadata[,!(colnames(variableMetadata) %in% c("name"))])
35 return(variableMetadata) 133 return(variableMetadata)
36 } 134 }
37 135
38 #@author G. Le Corguille 136 #@author G. Le Corguille
137 # This function convert the remain NA to 0 in the dataMatrix
138 naTOzeroDataMatrix <- function(dataMatrix, naTOzero) {
139 if (naTOzero){
140 dataMatrix[is.na(dataMatrix)] <- 0
141 }
142 return (dataMatrix)
143 }
144
145 #@author G. Le Corguille
146 # Draw the plotChromPeakDensity 3 per page in a pdf file
147 getPlotChromPeakDensity <- function(xdata, mzdigit=4) {
148 pdf(file="plotChromPeakDensity.pdf", width=16, height=12)
149
150 par(mfrow = c(3, 1), mar = c(4, 4, 1, 0.5))
151
152 group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))]
153 names(group_colors) <- unique(xdata$sample_group)
154
155 xlim <- c(min(featureDefinitions(xdata)$rtmin), max(featureDefinitions(xdata)$rtmax))
156 for (i in 1:nrow(featureDefinitions(xdata))) {
157 mzmin = featureDefinitions(xdata)[i,]$mzmin
158 mzmax = featureDefinitions(xdata)[i,]$mzmax
159 plotChromPeakDensity(xdata, mz=c(mzmin,mzmax), col=group_colors, pch=16, xlim=xlim, main=paste(round(mzmin,mzdigit),round(mzmax,mzdigit)))
160 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1)
161 }
162
163 dev.off()
164 }
165
166 #@author G. Le Corguille
167 # Draw the plotChromPeakDensity 3 per page in a pdf file
168 getPlotAdjustedRtime <- function(xdata) {
169
170 pdf(file="raw_vs_adjusted_rt.pdf", width=16, height=12)
171
172 # Color by group
173 group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))]
174 if (length(group_colors) > 1) {
175 names(group_colors) <- unique(xdata$sample_group)
176 plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group])
177 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1)
178 }
179
180 # Color by sample
181 plotAdjustedRtime(xdata, col = rainbow(length(xdata@phenoData@data$sample_name)))
182 legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1)
183
184 dev.off()
185 }
186
187 #@author G. Le Corguille
39 # value: intensity values to be used into, maxo or intb 188 # value: intensity values to be used into, maxo or intb
40 getPeaklistW4M <- function(xset, intval="into",convertRTMinute=F,numDigitsMZ=4,numDigitsRT=0,variableMetadataOutput,dataMatrixOutput) { 189 getPeaklistW4M <- function(xdata, intval="into", convertRTMinute=F, numDigitsMZ=4, numDigitsRT=0, naTOzero=T, variableMetadataOutput, dataMatrixOutput) {
41 variableMetadata_dataMatrix = peakTable(xset, method="medret", value=intval) 190 dataMatrix <- featureValues(xdata, method="medret", value=intval)
42 variableMetadata_dataMatrix = cbind(name=groupnames(xset),variableMetadata_dataMatrix) 191 colnames(dataMatrix) <- tools::file_path_sans_ext(colnames(dataMatrix))
43 192 dataMatrix = cbind(name=groupnamesW4M(xdata), dataMatrix)
44 dataMatrix = variableMetadata_dataMatrix[,(make.names(colnames(variableMetadata_dataMatrix)) %in% c("name", make.names(sampnames(xset))))] 193 variableMetadata <- featureDefinitions(xdata)
45 194 colnames(variableMetadata)[1] = "mz"; colnames(variableMetadata)[4] = "rt"
46 variableMetadata = variableMetadata_dataMatrix[,!(make.names(colnames(variableMetadata_dataMatrix)) %in% c(make.names(sampnames(xset))))] 195 variableMetadata = data.frame(name=groupnamesW4M(xdata), variableMetadata)
47 variableMetadata = RTSecondToMinute(variableMetadata, convertRTMinute) 196
48 variableMetadata = formatIonIdentifiers(variableMetadata, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ) 197 variableMetadata <- RTSecondToMinute(variableMetadata, convertRTMinute)
198 variableMetadata <- formatIonIdentifiers(variableMetadata, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ)
199 dataMatrix <- naTOzeroDataMatrix(dataMatrix, naTOzero)
49 200
50 write.table(variableMetadata, file=variableMetadataOutput,sep="\t",quote=F,row.names=F) 201 write.table(variableMetadata, file=variableMetadataOutput,sep="\t",quote=F,row.names=F)
51 write.table(dataMatrix, file=dataMatrixOutput,sep="\t",quote=F,row.names=F) 202 write.table(dataMatrix, file=dataMatrixOutput,sep="\t",quote=F,row.names=F)
52 } 203
53 204 }
54 #@author Y. Guitton 205
55 getBPC <- function(file,rtcor=NULL, ...) { 206 #@author G. Le Corguille
56 object <- xcmsRaw(file) 207 # It allow different of field separators
57 sel <- profRange(object, ...) 208 getDataFrameFromFile <- function(filename, header=T) {
58 cbind(if (is.null(rtcor)) object@scantime[sel$scanidx] else rtcor ,xcms:::colMax(object@env$profile[sel$massidx,sel$scanidx,drop=FALSE])) 209 myDataFrame <- read.table(filename, header=header, sep=";", stringsAsFactors=F)
59 #plotChrom(xcmsRaw(file), base=T) 210 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header=header, sep="\t", stringsAsFactors=F)
60 } 211 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header=header, sep=",", stringsAsFactors=F)
61 212 if (ncol(myDataFrame) < 2) {
62 #@author Y. Guitton 213 error_message="Your tabular file seems not well formatted. The column separators accepted are ; , and tabulation"
63 getBPCs <- function (xcmsSet=NULL, pdfname="BPCs.pdf",rt=c("raw","corrected"), scanrange=NULL) { 214 print(error_message)
64 cat("Creating BIC pdf...\n") 215 stop(error_message)
65 216 }
66 if (is.null(xcmsSet)) { 217 return(myDataFrame)
67 cat("Enter an xcmsSet \n") 218 }
68 stop() 219
69 } else { 220 #@author G. Le Corguille
70 files <- filepaths(xcmsSet) 221 # Draw the BPI and TIC graphics
71 } 222 # colored by sample names or class names
72 223 getPlotChromatogram <- function(chrom, xdata, pdfname="Chromatogram.pdf", aggregationFun = "max") {
73 phenoDataClass<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class 224
74 225 if (aggregationFun == "sum")
75 classnames<-vector("list",length(phenoDataClass)) 226 type="Total Ion Chromatograms"
76 for (i in 1:length(phenoDataClass)){ 227 else
77 classnames[[i]]<-which( xcmsSet@phenoData[,1]==phenoDataClass[i]) 228 type="Base Peak Intensity Chromatograms"
78 } 229
79 230 adjusted="Raw"
80 N <- dim(phenoData(xcmsSet))[1] 231 if (hasAdjustedRtime(xdata))
81 232 adjusted="Adjusted"
82 TIC <- vector("list",N) 233
83 234 main <- paste(type,":",adjusted,"data")
84 235
85 for (j in 1:N) { 236 pdf(pdfname, width=16, height=10)
86 237
87 TIC[[j]] <- getBPC(files[j]) 238 # Color by group
88 #good for raw 239 group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))]
89 # seems strange for corrected 240 if (length(group_colors) > 1) {
90 #errors if scanrange used in xcmsSetgeneration 241 names(group_colors) <- unique(xdata$sample_group)
91 if (!is.null(xcmsSet) && rt == "corrected") 242 plot(chrom, col = group_colors[chrom$sample_group], main=main)
92 rtcor <- xcmsSet@rt$corrected[[j]] 243 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1)
93 else 244 }
94 rtcor <- NULL 245
95 246 # Color by sample
96 TIC[[j]] <- getBPC(files[j],rtcor=rtcor) 247 plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main=main)
97 # TIC[[j]][,1]<-rtcor 248 legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1)
98 } 249
99 250 dev.off()
100 251 }
101 252
102 pdf(pdfname,w=16,h=10) 253
103 cols <- rainbow(N) 254 # Get the polarities from all the samples of a condition
104 lty = 1:N
105 pch = 1:N
106 #search for max x and max y in BPCs
107 xlim = range(sapply(TIC, function(x) range(x[,1])))
108 ylim = range(sapply(TIC, function(x) range(x[,2])))
109 ylim = c(-ylim[2], ylim[2])
110
111
112 ##plot start
113
114 if (length(phenoDataClass)>2){
115 for (k in 1:(length(phenoDataClass)-1)){
116 for (l in (k+1):length(phenoDataClass)){
117 #print(paste(phenoDataClass[k],"vs",phenoDataClass[l],sep=" "))
118 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k]," vs ",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC")
119 colvect<-NULL
120 for (j in 1:length(classnames[[k]])) {
121 tic <- TIC[[classnames[[k]][j]]]
122 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
123 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
124 colvect<-append(colvect,cols[classnames[[k]][j]])
125 }
126 for (j in 1:length(classnames[[l]])) {
127 # i=class2names[j]
128 tic <- TIC[[classnames[[l]][j]]]
129 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l")
130 colvect<-append(colvect,cols[classnames[[l]][j]])
131 }
132 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch)
133 }
134 }
135 }#end if length >2
136
137 if (length(phenoDataClass)==2){
138 k=1
139 l=2
140 colvect<-NULL
141 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k],"vs",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC")
142
143 for (j in 1:length(classnames[[k]])) {
144
145 tic <- TIC[[classnames[[k]][j]]]
146 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
147 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
148 colvect<-append(colvect,cols[classnames[[k]][j]])
149 }
150 for (j in 1:length(classnames[[l]])) {
151 # i=class2names[j]
152 tic <- TIC[[classnames[[l]][j]]]
153 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l")
154 colvect<-append(colvect,cols[classnames[[l]][j]])
155 }
156 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch)
157
158 }#end length ==2
159
160 #case where only one class
161 if (length(phenoDataClass)==1){
162 k=1
163 ylim = range(sapply(TIC, function(x) range(x[,2])))
164 colvect<-NULL
165 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k], sep=""), xlab = "Retention Time (min)", ylab = "BPC")
166
167 for (j in 1:length(classnames[[k]])) {
168 tic <- TIC[[classnames[[k]][j]]]
169 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
170 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
171 colvect<-append(colvect,cols[classnames[[k]][j]])
172 }
173
174 legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch)
175
176 }#end length ==1
177
178 dev.off() #pdf(pdfname,w=16,h=10)
179
180 invisible(TIC)
181 }
182
183
184
185 #@author Y. Guitton
186 getTIC <- function(file,rtcor=NULL) {
187 object <- xcmsRaw(file)
188 cbind(if (is.null(rtcor)) object@scantime else rtcor, rawEIC(object,mzrange=range(object@env$mz))$intensity)
189 }
190
191 ##
192 ## overlay TIC from all files in current folder or from xcmsSet, create pdf
193 ##
194 #@author Y. Guitton
195 getTICs <- function(xcmsSet=NULL,files=NULL, pdfname="TICs.pdf",rt=c("raw","corrected")) {
196 cat("Creating TIC pdf...\n")
197
198 if (is.null(xcmsSet)) {
199 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
200 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|")
201 if (is.null(files))
202 files <- getwd()
203 info <- file.info(files)
204 listed <- list.files(files[info$isdir], pattern = filepattern, recursive = TRUE, full.names = TRUE)
205 files <- c(files[!info$isdir], listed)
206 } else {
207 files <- filepaths(xcmsSet)
208 }
209
210 phenoDataClass<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class
211 classnames<-vector("list",length(phenoDataClass))
212 for (i in 1:length(phenoDataClass)){
213 classnames[[i]]<-which( xcmsSet@phenoData[,1]==phenoDataClass[i])
214 }
215
216 N <- length(files)
217 TIC <- vector("list",N)
218
219 for (i in 1:N) {
220 if (!is.null(xcmsSet) && rt == "corrected")
221 rtcor <- xcmsSet@rt$corrected[[i]] else
222 rtcor <- NULL
223 TIC[[i]] <- getTIC(files[i],rtcor=rtcor)
224 }
225
226 pdf(pdfname,w=16,h=10)
227 cols <- rainbow(N)
228 lty = 1:N
229 pch = 1:N
230 #search for max x and max y in TICs
231 xlim = range(sapply(TIC, function(x) range(x[,1])))
232 ylim = range(sapply(TIC, function(x) range(x[,2])))
233 ylim = c(-ylim[2], ylim[2])
234
235
236 ##plot start
237 if (length(phenoDataClass)>2){
238 for (k in 1:(length(phenoDataClass)-1)){
239 for (l in (k+1):length(phenoDataClass)){
240 #print(paste(phenoDataClass[k],"vs",phenoDataClass[l],sep=" "))
241 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k]," vs ",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC")
242 colvect<-NULL
243 for (j in 1:length(classnames[[k]])) {
244 tic <- TIC[[classnames[[k]][j]]]
245 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
246 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
247 colvect<-append(colvect,cols[classnames[[k]][j]])
248 }
249 for (j in 1:length(classnames[[l]])) {
250 # i=class2names[j]
251 tic <- TIC[[classnames[[l]][j]]]
252 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l")
253 colvect<-append(colvect,cols[classnames[[l]][j]])
254 }
255 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch)
256 }
257 }
258 }#end if length >2
259 if (length(phenoDataClass)==2){
260 k=1
261 l=2
262
263 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k],"vs",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC")
264 colvect<-NULL
265 for (j in 1:length(classnames[[k]])) {
266 tic <- TIC[[classnames[[k]][j]]]
267 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
268 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
269 colvect<-append(colvect,cols[classnames[[k]][j]])
270 }
271 for (j in 1:length(classnames[[l]])) {
272 # i=class2names[j]
273 tic <- TIC[[classnames[[l]][j]]]
274 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l")
275 colvect<-append(colvect,cols[classnames[[l]][j]])
276 }
277 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch)
278
279 }#end length ==2
280
281 #case where only one class
282 if (length(phenoDataClass)==1){
283 k=1
284 ylim = range(sapply(TIC, function(x) range(x[,2])))
285
286 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k], sep=""), xlab = "Retention Time (min)", ylab = "TIC")
287 colvect<-NULL
288 for (j in 1:length(classnames[[k]])) {
289 tic <- TIC[[classnames[[k]][j]]]
290 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
291 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
292 colvect<-append(colvect,cols[classnames[[k]][j]])
293 }
294
295 legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch)
296
297 }#end length ==1
298
299 dev.off() #pdf(pdfname,w=16,h=10)
300
301 invisible(TIC)
302 }
303
304
305
306 ##
307 ## Get the polarities from all the samples of a condition
308 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM 255 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
309 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM 256 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM
310 getSampleMetadata <- function(xcmsSet=NULL, sampleMetadataOutput="sampleMetadata.tsv") { 257 getSampleMetadata <- function(xdata=NULL, sampleMetadataOutput="sampleMetadata.tsv") {
311 cat("Creating the sampleMetadata file...\n") 258 cat("Creating the sampleMetadata file...\n")
312 259
313 #Create the sampleMetada dataframe 260 #Create the sampleMetada dataframe
314 sampleMetadata=xset@phenoData 261 sampleMetadata <- xdata@phenoData@data
315 sampleNamesOrigin=rownames(sampleMetadata) 262 rownames(sampleMetadata) <- NULL
316 sampleNamesMakeNames=make.names(sampleNamesOrigin) 263 colnames(sampleMetadata) <- c("sampleMetadata", "class")
264
265 sampleNamesOrigin <- sampleMetadata$sampleMetadata
266 sampleNamesMakeNames <- make.names(sampleNamesOrigin)
317 267
318 if (any(duplicated(sampleNamesMakeNames))) { 268 if (any(duplicated(sampleNamesMakeNames))) {
319 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) 269 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr())
320 for (sampleName in sampleNamesOrigin) { 270 for (sampleName in sampleNamesOrigin) {
321 write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr()) 271 write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr())
328 for (sampleName in sampleNamesOrigin) { 278 for (sampleName in sampleNamesOrigin) {
329 cat(paste(sampleName,"\t->\t",make.names(sampleName),"\n")) 279 cat(paste(sampleName,"\t->\t",make.names(sampleName),"\n"))
330 } 280 }
331 } 281 }
332 282
333 sampleMetadata$sampleMetadata=sampleNamesMakeNames 283 sampleMetadata$sampleMetadata <- sampleNamesMakeNames
334 sampleMetadata=cbind(sampleMetadata["sampleMetadata"],sampleMetadata["class"]) #Reorder columns 284
335 rownames(sampleMetadata)=NULL 285
336
337 #Create a list of files name in the current directory
338 list_files=xset@filepaths
339 #For each sample file, the following actions are done 286 #For each sample file, the following actions are done
340 for (file in list_files){ 287 for (fileIdx in 1:length(fileNames(xdata))) {
341 #Check if the file is in the CDF format 288 #Check if the file is in the CDF format
342 if (!mzR:::netCDFIsFile(file)){ 289 if (!mzR:::netCDFIsFile(fileNames(xdata))) {
343 290
344 # If the column isn't exist, with add one filled with NA 291 # If the column isn't exist, with add one filled with NA
345 if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity=NA 292 if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity <- NA
346 293
347 #Create a simple xcmsRaw object for each sample
348 xcmsRaw=xcmsRaw(file)
349 #Extract the polarity (a list of polarities) 294 #Extract the polarity (a list of polarities)
350 polarity=xcmsRaw@polarity 295 polarity <- fData(xdata)[fData(xdata)$fileIdx == fileIdx,"polarity"]
351 #Verify if all the scans have the same polarity 296 #Verify if all the scans have the same polarity
352 uniq_list=unique(polarity) 297 uniq_list <- unique(polarity)
353 if (length(uniq_list)>1){ 298 if (length(uniq_list)>1){
354 polarity="mixed" 299 polarity <- "mixed"
355 } else { 300 } else {
356 polarity=as.character(uniq_list) 301 polarity <- as.character(uniq_list)
357 } 302 }
358 #Transforms the character to obtain only the sample name
359 filename=basename(file)
360 library(tools)
361 samplename=file_path_sans_ext(filename)
362 303
363 #Set the polarity attribute 304 #Set the polarity attribute
364 sampleMetadata$polarity[sampleMetadata$sampleMetadata==samplename]=polarity 305 sampleMetadata$polarity[fileIdx] <- polarity
365
366 #Delete xcmsRaw object because it creates a bug for the fillpeaks step
367 rm(xcmsRaw)
368 } 306 }
369 307
370 } 308 }
371 309
372 write.table(sampleMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=sampleMetadataOutput) 310 write.table(sampleMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=sampleMetadataOutput)
373 311
374 return(list("sampleNamesOrigin"=sampleNamesOrigin,"sampleNamesMakeNames"=sampleNamesMakeNames)) 312 return(list("sampleNamesOrigin"=sampleNamesOrigin, "sampleNamesMakeNames"=sampleNamesMakeNames))
375 313
376 } 314 }
377 315
378 316
379 ## 317 # This function check if xcms will found all the files
380 ## This function check if xcms will found all the files
381 ##
382 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM 318 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM
383 checkFilesCompatibilityWithXcms <- function(directory) { 319 checkFilesCompatibilityWithXcms <- function(directory) {
384 cat("Checking files filenames compatibilities with xmcs...\n") 320 cat("Checking files filenames compatibilities with xmcs...\n")
385 # WHAT XCMS WILL FIND 321 # WHAT XCMS WILL FIND
386 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") 322 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
387 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") 323 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|")
388 info <- file.info(directory) 324 info <- file.info(directory)
389 listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) 325 listed <- list.files(directory[info$isdir], pattern=filepattern, recursive=TRUE, full.names=TRUE)
390 files <- c(directory[!info$isdir], listed) 326 files <- c(directory[!info$isdir], listed)
391 files_abs <- file.path(getwd(), files) 327 files_abs <- file.path(getwd(), files)
392 exists <- file.exists(files_abs) 328 exists <- file.exists(files_abs)
393 files[exists] <- files_abs[exists] 329 files[exists] <- files_abs[exists]
394 files[exists] <- sub("//","/",files[exists]) 330 files[exists] <- sub("//","/",files[exists])
395 331
396 # WHAT IS ON THE FILESYSTEM 332 # WHAT IS ON THE FILESYSTEM
397 filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T) 333 filesystem_filepaths <- system(paste0("find \"$PWD/",directory,"\" -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\""), intern=T)
398 filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)] 334 filesystem_filepaths <- filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)]
399 335
400 # COMPARISON 336 # COMPARISON
401 if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) { 337 if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) {
402 write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr()) 338 write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr())
403 write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr()) 339 write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr())
404 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") 340 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.")
405 } 341 }
406 } 342 }
407 343
408 344
409 345 #This function list the compatible files within the directory as xcms did
410 ## 346 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM
411 ## This function check if XML contains special caracters. It also checks integrity and completness. 347 getMSFiles <- function (directory) {
412 ## 348 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
349 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|")
350 info <- file.info(directory)
351 listed <- list.files(directory[info$isdir], pattern=filepattern,recursive=TRUE, full.names=TRUE)
352 files <- c(directory[!info$isdir], listed)
353 exists <- file.exists(files)
354 files <- files[exists]
355 return(files)
356 }
357
358 # This function check if XML contains special caracters. It also checks integrity and completness.
413 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM 359 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
414 checkXmlStructure <- function (directory) { 360 checkXmlStructure <- function (directory) {
415 cat("Checking XML structure...\n") 361 cat("Checking XML structure...\n")
416 362
417 cmd=paste("IFS=$'\n'; for xml in $(find",directory,"-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") 363 cmd <- paste0("IFS=$'\n'; for xml in $(find '",directory,"' -not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;")
418 capture=system(cmd,intern=TRUE) 364 capture <- system(cmd, intern=TRUE)
419 365
420 if (length(capture)>0){ 366 if (length(capture)>0){
421 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) 367 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture)
422 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) 368 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr())
423 write(capture, stderr()) 369 write(capture, stderr())
425 } 371 }
426 372
427 } 373 }
428 374
429 375
430 ## 376 # This function check if XML contain special characters
431 ## This function check if XML contain special characters
432 ##
433 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM 377 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
434 deleteXmlBadCharacters<- function (directory) { 378 deleteXmlBadCharacters<- function (directory) {
435 cat("Checking Non ASCII characters in the XML...\n") 379 cat("Checking Non ASCII characters in the XML...\n")
436 380
437 processed=F 381 processed <- F
438 l=system( paste("find",directory, "-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"),intern=TRUE) 382 l <- system( paste0("find '",directory, "' -not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"), intern=TRUE)
439 for (i in l){ 383 for (i in l){
440 cmd=paste("LC_ALL=C grep '[^ -~]' \"",i,"\"",sep="") 384 cmd <- paste("LC_ALL=C grep '[^ -~]' \"", i, "\"", sep="")
441 capture=suppressWarnings(system(cmd,intern=TRUE)) 385 capture <- suppressWarnings(system(cmd, intern=TRUE))
442 if (length(capture)>0){ 386 if (length(capture)>0){
443 cmd=paste("perl -i -pe 's/[^[:ascii:]]//g;'",i) 387 cmd <- paste("perl -i -pe 's/[^[:ascii:]]//g;'",i)
444 print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") ) 388 print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") )
445 c=system(cmd,intern=TRUE) 389 c <- system(cmd, intern=TRUE)
446 capture="" 390 capture <- ""
447 processed=T 391 processed <- T
448 } 392 }
449 } 393 }
450 if (processed) cat("\n\n") 394 if (processed) cat("\n\n")
451 return(processed) 395 return(processed)
452 } 396 }
453 397
454 398
455 ## 399 # This function will compute MD5 checksum to check the data integrity
456 ## This function will compute MD5 checksum to check the data integrity
457 ##
458 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr 400 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
459 getMd5sum <- function (directory) { 401 getMd5sum <- function (directory) {
460 cat("Compute md5 checksum...\n") 402 cat("Compute md5 checksum...\n")
461 # WHAT XCMS WILL FIND 403 # WHAT XCMS WILL FIND
462 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") 404 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
463 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") 405 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|")
464 info <- file.info(directory) 406 info <- file.info(directory)
465 listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) 407 listed <- list.files(directory[info$isdir], pattern=filepattern, recursive=TRUE, full.names=TRUE)
466 files <- c(directory[!info$isdir], listed) 408 files <- c(directory[!info$isdir], listed)
467 exists <- file.exists(files) 409 exists <- file.exists(files)
468 files <- files[exists] 410 files <- files[exists]
469 411
470 library(tools) 412 library(tools)
474 return(as.matrix(md5sum(files))) 416 return(as.matrix(md5sum(files)))
475 } 417 }
476 418
477 419
478 # This function get the raw file path from the arguments 420 # This function get the raw file path from the arguments
479 getRawfilePathFromArguments <- function(singlefile, zipfile, listArguments) { 421 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
480 if (!is.null(listArguments[["zipfile"]])) zipfile = listArguments[["zipfile"]] 422 getRawfilePathFromArguments <- function(singlefile, zipfile, args, prefix="") {
481 if (!is.null(listArguments[["zipfilePositive"]])) zipfile = listArguments[["zipfilePositive"]] 423 if (!(prefix %in% c("","Positive","Negative","MS1","MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'")
482 if (!is.null(listArguments[["zipfileNegative"]])) zipfile = listArguments[["zipfileNegative"]] 424
483 425 if (!is.null(args[[paste0("zipfile",prefix)]])) zipfile <- args[[paste0("zipfile",prefix)]]
484 if (!is.null(listArguments[["singlefile_galaxyPath"]])) { 426
485 singlefile_galaxyPaths = listArguments[["singlefile_galaxyPath"]]; 427 if (!is.null(args[[paste0("singlefile_galaxyPath",prefix)]])) {
486 singlefile_sampleNames = listArguments[["singlefile_sampleName"]] 428 singlefile_galaxyPaths <- args[[paste0("singlefile_galaxyPath",prefix)]]
487 } 429 singlefile_sampleNames <- args[[paste0("singlefile_sampleName",prefix)]]
488 if (!is.null(listArguments[["singlefile_galaxyPathPositive"]])) { 430 }
489 singlefile_galaxyPaths = listArguments[["singlefile_galaxyPathPositive"]]; 431 if (exists("singlefile_galaxyPaths")){
490 singlefile_sampleNames = listArguments[["singlefile_sampleNamePositive"]] 432 singlefile_galaxyPaths <- unlist(strsplit(singlefile_galaxyPaths,"\\|"))
491 } 433 singlefile_sampleNames <- unlist(strsplit(singlefile_sampleNames,"\\|"))
492 if (!is.null(listArguments[["singlefile_galaxyPathNegative"]])) { 434
493 singlefile_galaxyPaths = listArguments[["singlefile_galaxyPathNegative"]]; 435 singlefile <- NULL
494 singlefile_sampleNames = listArguments[["singlefile_sampleNameNegative"]] 436 for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) {
495 } 437 singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i]
496 if (exists("singlefile_galaxyPaths")){ 438 singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i]
497 singlefile_galaxyPaths = unlist(strsplit(singlefile_galaxyPaths,",")) 439 # In case, an url is used to import data within Galaxy
498 singlefile_sampleNames = unlist(strsplit(singlefile_sampleNames,",")) 440 singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName,"/")), n=1)
499 441 singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath
500 singlefile=NULL 442 }
501 for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) { 443 }
502 singlefile_galaxyPath=singlefile_galaxyPaths[singlefile_galaxyPath_i] 444 return(list(zipfile=zipfile, singlefile=singlefile))
503 singlefile_sampleName=singlefile_sampleNames[singlefile_galaxyPath_i] 445 }
504 singlefile[[singlefile_sampleName]] = singlefile_galaxyPath
505 }
506 }
507 for (argument in c("zipfile","zipfilePositive","zipfileNegative","singlefile_galaxyPath","singlefile_sampleName","singlefile_galaxyPathPositive","singlefile_sampleNamePositive","singlefile_galaxyPathNegative","singlefile_sampleNameNegative")) {
508 listArguments[[argument]]=NULL
509 }
510 return(list(zipfile=zipfile, singlefile=singlefile, listArguments=listArguments))
511 }
512
513 446
514 # This function retrieve the raw file in the working directory 447 # This function retrieve the raw file in the working directory
515 # - if zipfile: unzip the file with its directory tree 448 # - if zipfile: unzip the file with its directory tree
516 # - if singlefiles: set symlink with the good filename 449 # - if singlefiles: set symlink with the good filename
450 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
517 retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile) { 451 retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile) {
518 if(!is.null(singlefile) && (length("singlefile")>0)) { 452 if(!is.null(singlefile) && (length("singlefile")>0)) {
519 for (singlefile_sampleName in names(singlefile)) { 453 for (singlefile_sampleName in names(singlefile)) {
520 singlefile_galaxyPath = singlefile[[singlefile_sampleName]] 454 singlefile_galaxyPath <- singlefile[[singlefile_sampleName]]
521 if(!file.exists(singlefile_galaxyPath)){ 455 if(!file.exists(singlefile_galaxyPath)){
522 error_message=paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!") 456 error_message <- paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!")
523 print(error_message); stop(error_message) 457 print(error_message); stop(error_message)
524 } 458 }
525 459
526 file.symlink(singlefile_galaxyPath,singlefile_sampleName) 460 if (!suppressWarnings( try (file.link(singlefile_galaxyPath, singlefile_sampleName), silent=T)))
527 } 461 file.copy(singlefile_galaxyPath, singlefile_sampleName)
528 directory = "." 462
529 463 }
530 } 464 directory <- "."
531 if(!is.null(zipfile) && (zipfile!="")) { 465
466 }
467 if(!is.null(zipfile) && (zipfile != "")) {
532 if(!file.exists(zipfile)){ 468 if(!file.exists(zipfile)){
533 error_message=paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!") 469 error_message <- paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!")
534 print(error_message) 470 print(error_message)
535 stop(error_message) 471 stop(error_message)
536 } 472 }
537 473
538 #list all file in the zip file 474 #list all file in the zip file
539 #zip_files=unzip(zipfile,list=T)[,"Name"] 475 #zip_files <- unzip(zipfile,list=T)[,"Name"]
540 476
541 #unzip 477 #unzip
542 suppressWarnings(unzip(zipfile, unzip="unzip")) 478 suppressWarnings(unzip(zipfile, unzip="unzip"))
543 479
544 #get the directory name 480 #get the directory name
545 filesInZip=unzip(zipfile, list=T); 481 suppressWarnings(filesInZip <- unzip(zipfile, list=T))
546 directories=unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))); 482 directories <- unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1])))
547 directories=directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] 483 directories <- directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir]
548 directory = "." 484 directory <- "."
549 if (length(directories) == 1) directory = directories 485 if (length(directories) == 1) directory <- directories
550 486
551 cat("files_root_directory\t",directory,"\n") 487 cat("files_root_directory\t",directory,"\n")
552 488
553 } 489 }
554 return (directory) 490 return (directory)
555 } 491 }
492
493
494 # This function retrieve a xset like object
495 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
496 getxcmsSetObject <- function(xobject) {
497 # XCMS 1.x
498 if (class(xobject) == "xcmsSet")
499 return (xobject)
500 # XCMS 3.x
501 if (class(xobject) == "XCMSnExp") {
502 # Get the legacy xcmsSet object
503 suppressWarnings(xset <- as(xobject, 'xcmsSet'))
504 if (!is.null(xset@phenoData$sample_group))
505 sampclass(xset) <- xset@phenoData$sample_group
506 else
507 sampclass(xset) <- "."
508 return (xset)
509 }
510 }