comparison lib.r @ 14:11ab2081bd4a draft

"planemo upload for repository https://github.com/workflow4metabolomics/xcms commit dff3ac53c7c305ff263e6358308db48cf9ed4e27"
author workflow4metabolomics
date Mon, 12 Apr 2021 09:35:02 +0000
parents 226fb89cacc4
children 7faf9b2d83f6
comparison
equal deleted inserted replaced
13:226fb89cacc4 14:11ab2081bd4a
4 #@author G. Le Corguille 4 #@author G. Le Corguille
5 # solve an issue with batch if arguments are logical TRUE/FALSE 5 # solve an issue with batch if arguments are logical TRUE/FALSE
6 parseCommandArgs <- function(...) { 6 parseCommandArgs <- function(...) {
7 args <- batch::parseCommandArgs(...) 7 args <- batch::parseCommandArgs(...)
8 for (key in names(args)) { 8 for (key in names(args)) {
9 if (args[key] %in% c("TRUE","FALSE")) 9 if (args[key] %in% c("TRUE", "FALSE"))
10 args[key] = as.logical(args[key]) 10 args[key] <- as.logical(args[key])
11 } 11 }
12 return(args) 12 return(args)
13 } 13 }
14 14
15 #@author G. Le Corguille 15 #@author G. Le Corguille
16 # This function will 16 # This function will
17 # - load the packages 17 # - load the packages
18 # - display the sessionInfo 18 # - display the sessionInfo
19 loadAndDisplayPackages <- function(pkgs) { 19 loadAndDisplayPackages <- function(pkgs) {
20 for(pkg in pkgs) suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE))) 20 for (pkg in pkgs) suppressPackageStartupMessages(stopifnot(library(pkg, quietly = TRUE, logical.return = TRUE, character.only = TRUE)))
21 21
22 sessioninfo = sessionInfo() 22 sessioninfo <- sessionInfo()
23 cat(sessioninfo$R.version$version.string,"\n") 23 cat(sessioninfo$R.version$version.string, "\n")
24 cat("Main packages:\n") 24 cat("Main packages:\n")
25 for (pkg in names(sessioninfo$otherPkgs)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") 25 for (pkg in names(sessioninfo$otherPkgs)) {
26 cat(paste(pkg, packageVersion(pkg)), "\t")
27 }
28 cat("\n")
26 cat("Other loaded packages:\n") 29 cat("Other loaded packages:\n")
27 for (pkg in names(sessioninfo$loadedOnly)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") 30 for (pkg in names(sessioninfo$loadedOnly)) {
31 cat(paste(pkg, packageVersion(pkg)), "\t")
32 }
33 cat("\n")
28 } 34 }
29 35
30 #@author G. Le Corguille 36 #@author G. Le Corguille
31 # This function merge several chromBPI or chromTIC into one. 37 # This function merge several chromBPI or chromTIC into one.
32 mergeChrom <- function(chrom_merged, chrom) { 38 mergeChrom <- function(chrom_merged, chrom) {
41 chromTIC <- NULL 47 chromTIC <- NULL
42 chromBPI <- NULL 48 chromBPI <- NULL
43 chromTIC_adjusted <- NULL 49 chromTIC_adjusted <- NULL
44 chromBPI_adjusted <- NULL 50 chromBPI_adjusted <- NULL
45 md5sumList <- NULL 51 md5sumList <- NULL
46 for(image in args$images) { 52 for (image in args$images) {
47 53
48 load(image) 54 load(image)
49 # Handle infiles 55 # Handle infiles
50 if (!exists("singlefile")) singlefile <- NULL 56 if (!exists("singlefile")) singlefile <- NULL
51 if (!exists("zipfile")) zipfile <- NULL 57 if (!exists("zipfile")) zipfile <- NULL
52 rawFilePath <- retrieveRawfileInTheWorkingDirectory(singlefile, zipfile, args) 58 rawFilePath <- retrieveRawfileInTheWorkingDir(singlefile, zipfile, args)
53 zipfile <- rawFilePath$zipfile 59 zipfile <- rawFilePath$zipfile
54 singlefile <- rawFilePath$singlefile 60 singlefile <- rawFilePath$singlefile
55 61
56 if (exists("raw_data")) xdata <- raw_data 62 if (exists("raw_data")) xdata <- raw_data
57 if (!exists("xdata")) stop("\n\nERROR: The RData doesn't contain any object called 'xdata'. This RData should have been created by an old version of XMCS 2.*") 63 if (!exists("xdata")) stop("\n\nERROR: The RData doesn't contain any object called 'xdata'. This RData should have been created by an old version of XMCS 2.*")
58 64
59 cat(sampleNamesList$sampleNamesOrigin,"\n") 65 cat(sampleNamesList$sampleNamesOrigin, "\n")
60 66
61 if (!exists("xdata_merged")) { 67 if (!exists("xdata_merged")) {
62 xdata_merged <- xdata 68 xdata_merged <- xdata
63 singlefile_merged <- singlefile 69 singlefile_merged <- singlefile
64 md5sumList_merged <- md5sumList 70 md5sumList_merged <- md5sumList
66 chromTIC_merged <- chromTIC 72 chromTIC_merged <- chromTIC
67 chromBPI_merged <- chromBPI 73 chromBPI_merged <- chromBPI
68 chromTIC_adjusted_merged <- chromTIC_adjusted 74 chromTIC_adjusted_merged <- chromTIC_adjusted
69 chromBPI_adjusted_merged <- chromBPI_adjusted 75 chromBPI_adjusted_merged <- chromBPI_adjusted
70 } else { 76 } else {
71 if (is(xdata, "XCMSnExp")) xdata_merged <- c(xdata_merged,xdata) 77 if (is(xdata, "XCMSnExp")) xdata_merged <- c(xdata_merged, xdata)
72 else if (is(xdata, "OnDiskMSnExp")) xdata_merged <- xcms:::.concatenate_OnDiskMSnExp(xdata_merged,xdata) 78 else if (is(xdata, "OnDiskMSnExp")) xdata_merged <- xcms:::.concatenate_OnDiskMSnExp(xdata_merged, xdata)
73 else stop("\n\nERROR: The RData either a OnDiskMSnExp object called raw_data or a XCMSnExp object called xdata") 79 else stop("\n\nERROR: The RData either a OnDiskMSnExp object called raw_data or a XCMSnExp object called xdata")
74 80
75 singlefile_merged <- c(singlefile_merged,singlefile) 81 singlefile_merged <- c(singlefile_merged, singlefile)
76 md5sumList_merged$origin <- rbind(md5sumList_merged$origin,md5sumList$origin) 82 md5sumList_merged$origin <- rbind(md5sumList_merged$origin, md5sumList$origin)
77 sampleNamesList_merged$sampleNamesOrigin <- c(sampleNamesList_merged$sampleNamesOrigin,sampleNamesList$sampleNamesOrigin) 83 sampleNamesList_merged$sampleNamesOrigin <- c(sampleNamesList_merged$sampleNamesOrigin, sampleNamesList$sampleNamesOrigin)
78 sampleNamesList_merged$sampleNamesMakeNames <- c(sampleNamesList_merged$sampleNamesMakeNames,sampleNamesList$sampleNamesMakeNames) 84 sampleNamesList_merged$sampleNamesMakeNames <- c(sampleNamesList_merged$sampleNamesMakeNames, sampleNamesList$sampleNamesMakeNames)
79 chromTIC_merged <- mergeChrom(chromTIC_merged, chromTIC) 85 chromTIC_merged <- mergeChrom(chromTIC_merged, chromTIC)
80 chromBPI_merged <- mergeChrom(chromBPI_merged, chromBPI) 86 chromBPI_merged <- mergeChrom(chromBPI_merged, chromBPI)
81 chromTIC_adjusted_merged <- mergeChrom(chromTIC_adjusted_merged, chromTIC_adjusted) 87 chromTIC_adjusted_merged <- mergeChrom(chromTIC_adjusted_merged, chromTIC_adjusted)
82 chromBPI_adjusted_merged <- mergeChrom(chromBPI_adjusted_merged, chromBPI_adjusted) 88 chromBPI_adjusted_merged <- mergeChrom(chromBPI_adjusted_merged, chromBPI_adjusted)
83 } 89 }
89 sampleNamesList <- sampleNamesList_merged; rm(sampleNamesList_merged) 95 sampleNamesList <- sampleNamesList_merged; rm(sampleNamesList_merged)
90 96
91 if (!is.null(args$sampleMetadata)) { 97 if (!is.null(args$sampleMetadata)) {
92 cat("\tXSET PHENODATA SETTING...\n") 98 cat("\tXSET PHENODATA SETTING...\n")
93 sampleMetadataFile <- args$sampleMetadata 99 sampleMetadataFile <- args$sampleMetadata
94 sampleMetadata <- getDataFrameFromFile(sampleMetadataFile, header=F) 100 sampleMetadata <- getDataFrameFromFile(sampleMetadataFile, header = F)
95 xdata@phenoData@data$sample_group=sampleMetadata$V2[match(xdata@phenoData@data$sample_name,sampleMetadata$V1)] 101 xdata@phenoData@data$sample_group <- sampleMetadata$V2[match(xdata@phenoData@data$sample_name, sampleMetadata$V1)]
96 102
97 if (any(is.na(pData(xdata)$sample_group))) { 103 if (any(is.na(pData(xdata)$sample_group))) {
98 sample_missing <- pData(xdata)$sample_name[is.na(pData(xdata)$sample_group)] 104 sample_missing <- pData(xdata)$sample_name[is.na(pData(xdata)$sample_group)]
99 error_message <- paste("Those samples are missing in your sampleMetadata:", paste(sample_missing, collapse=" ")) 105 error_message <- paste("Those samples are missing in your sampleMetadata:", paste(sample_missing, collapse = " "))
100 print(error_message) 106 print(error_message)
101 stop(error_message) 107 stop(error_message)
102 } 108 }
103 } 109 }
104 110
105 if (!is.null(chromTIC_merged)) { chromTIC <- chromTIC_merged; chromTIC@phenoData <- xdata@phenoData } 111 if (!is.null(chromTIC_merged)) {
106 if (!is.null(chromBPI_merged)) { chromBPI <- chromBPI_merged; chromBPI@phenoData <- xdata@phenoData } 112 chromTIC <- chromTIC_merged; chromTIC@phenoData <- xdata@phenoData
107 if (!is.null(chromTIC_adjusted_merged)) { chromTIC_adjusted <- chromTIC_adjusted_merged; chromTIC_adjusted@phenoData <- xdata@phenoData } 113 }
108 if (!is.null(chromBPI_adjusted_merged)) { chromBPI_adjusted <- chromBPI_adjusted_merged; chromBPI_adjusted@phenoData <- xdata@phenoData } 114 if (!is.null(chromBPI_merged)) {
109 115 chromBPI <- chromBPI_merged; chromBPI@phenoData <- xdata@phenoData
110 return(list("xdata"=xdata, "singlefile"=singlefile, "md5sumList"=md5sumList,"sampleNamesList"=sampleNamesList, "chromTIC"=chromTIC, "chromBPI"=chromBPI, "chromTIC_adjusted"=chromTIC_adjusted, "chromBPI_adjusted"=chromBPI_adjusted)) 116 }
117 if (!is.null(chromTIC_adjusted_merged)) {
118 chromTIC_adjusted <- chromTIC_adjusted_merged; chromTIC_adjusted@phenoData <- xdata@phenoData
119 }
120 if (!is.null(chromBPI_adjusted_merged)) {
121 chromBPI_adjusted <- chromBPI_adjusted_merged; chromBPI_adjusted@phenoData <- xdata@phenoData
122 }
123
124 return(list("xdata" = xdata, "singlefile" = singlefile, "md5sumList" = md5sumList, "sampleNamesList" = sampleNamesList, "chromTIC" = chromTIC, "chromBPI" = chromBPI, "chromTIC_adjusted" = chromTIC_adjusted, "chromBPI_adjusted" = chromBPI_adjusted))
111 } 125 }
112 126
113 #@author G. Le Corguille 127 #@author G. Le Corguille
114 # This function convert if it is required the Retention Time in minutes 128 # This function convert if it is required the Retention Time in minutes
115 RTSecondToMinute <- function(variableMetadata, convertRTMinute) { 129 RTSecondToMinute <- function(variableMetadata, convertRTMinute) {
116 if (convertRTMinute){ 130 if (convertRTMinute) {
117 #converting the retention times (seconds) into minutes 131 #converting the retention times (seconds) into minutes
118 print("converting the retention times into minutes in the variableMetadata") 132 print("converting the retention times into minutes in the variableMetadata")
119 variableMetadata[,"rt"] <- variableMetadata[,"rt"]/60 133 variableMetadata[, "rt"] <- variableMetadata[, "rt"] / 60
120 variableMetadata[,"rtmin"] <- variableMetadata[,"rtmin"]/60 134 variableMetadata[, "rtmin"] <- variableMetadata[, "rtmin"] / 60
121 variableMetadata[,"rtmax"] <- variableMetadata[,"rtmax"]/60 135 variableMetadata[, "rtmax"] <- variableMetadata[, "rtmax"] / 60
122 } 136 }
123 return (variableMetadata) 137 return(variableMetadata)
124 } 138 }
125 139
126 #@author G. Le Corguille 140 #@author G. Le Corguille
127 # This function format ions identifiers 141 # This function format ions identifiers
128 formatIonIdentifiers <- function(variableMetadata, numDigitsRT=0, numDigitsMZ=0) { 142 formatIonIdentifiers <- function(variableMetadata, numDigitsRT = 0, numDigitsMZ = 0) {
129 splitDeco <- strsplit(as.character(variableMetadata$name),"_") 143 splitDeco <- strsplit(as.character(variableMetadata$name), "_")
130 idsDeco <- sapply(splitDeco, function(x) { deco=unlist(x)[2]; if (is.na(deco)) return ("") else return(paste0("_",deco)) }) 144 idsDeco <- sapply(splitDeco,
131 namecustom <- make.unique(paste0("M",round(variableMetadata[,"mz"],numDigitsMZ),"T",round(variableMetadata[,"rt"],numDigitsRT),idsDeco)) 145 function(x) {
132 variableMetadata <- cbind(name=variableMetadata$name, namecustom=namecustom, variableMetadata[,!(colnames(variableMetadata) %in% c("name"))]) 146 deco <- unlist(x)[2]; if (is.na(deco)) return("") else return(paste0("_", deco))
147 }
148 )
149 namecustom <- make.unique(paste0("M", round(variableMetadata[, "mz"], numDigitsMZ), "T", round(variableMetadata[, "rt"], numDigitsRT), idsDeco))
150 variableMetadata <- cbind(name = variableMetadata$name, namecustom = namecustom, variableMetadata[, !(colnames(variableMetadata) %in% c("name"))])
133 return(variableMetadata) 151 return(variableMetadata)
134 } 152 }
135 153
136 #@author G. Le Corguille 154 #@author G. Le Corguille
137 # This function convert the remain NA to 0 in the dataMatrix 155 # This function convert the remain NA to 0 in the dataMatrix
138 naTOzeroDataMatrix <- function(dataMatrix, naTOzero) { 156 naTOzeroDataMatrix <- function(dataMatrix, naTOzero) {
139 if (naTOzero){ 157 if (naTOzero) {
140 dataMatrix[is.na(dataMatrix)] <- 0 158 dataMatrix[is.na(dataMatrix)] <- 0
141 } 159 }
142 return (dataMatrix) 160 return(dataMatrix)
143 } 161 }
144 162
145 #@author G. Le Corguille 163 #@author G. Le Corguille
146 # Draw the plotChromPeakDensity 3 per page in a pdf file 164 # Draw the plotChromPeakDensity 3 per page in a pdf file
147 getPlotChromPeakDensity <- function(xdata, param = NULL, mzdigit=4) { 165 getPlotChromPeakDensity <- function(xdata, param = NULL, mzdigit = 4) {
148 pdf(file="plotChromPeakDensity.pdf", width=16, height=12) 166 pdf(file = "plotChromPeakDensity.pdf", width = 16, height = 12)
149 167
150 par(mfrow = c(3, 1), mar = c(4, 4, 1, 0.5)) 168 par(mfrow = c(3, 1), mar = c(4, 4, 1, 0.5))
151 169
152 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1") 170 if (length(unique(xdata$sample_group)) < 10) {
171 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1")
172 }else{
173 group_colors <- hcl.colors(length(unique(xdata$sample_group)), palette = "Dark 3")
174 }
153 names(group_colors) <- unique(xdata$sample_group) 175 names(group_colors) <- unique(xdata$sample_group)
176 col_per_samp <- as.character(xdata$sample_group)
177 for (i in seq_len(length(group_colors))) {
178 col_per_samp[col_per_samp == (names(group_colors)[i])] <- group_colors[i]
179 }
154 180
155 xlim <- c(min(featureDefinitions(xdata)$rtmin), max(featureDefinitions(xdata)$rtmax)) 181 xlim <- c(min(featureDefinitions(xdata)$rtmin), max(featureDefinitions(xdata)$rtmax))
156 for (i in 1:nrow(featureDefinitions(xdata))) { 182 for (i in seq_len(nrow(featureDefinitions(xdata)))) {
157 mzmin = featureDefinitions(xdata)[i,]$mzmin 183 mzmin <- featureDefinitions(xdata)[i, ]$mzmin
158 mzmax = featureDefinitions(xdata)[i,]$mzmax 184 mzmax <- featureDefinitions(xdata)[i, ]$mzmax
159 plotChromPeakDensity(xdata, param = param, mz=c(mzmin,mzmax), col=group_colors, pch=16, xlim=xlim, main=paste(round(mzmin,mzdigit),round(mzmax,mzdigit))) 185 plotChromPeakDensity(xdata, param = param, mz = c(mzmin, mzmax), col = col_per_samp, pch = 16, xlim = xlim, main = paste(round(mzmin, mzdigit), round(mzmax, mzdigit)))
160 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) 186 legend("topright", legend = names(group_colors), col = group_colors, cex = 0.8, lty = 1)
161 } 187 }
162 188
163 dev.off() 189 dev.off()
164 } 190 }
165 191
166 #@author G. Le Corguille 192 #@author G. Le Corguille
167 # Draw the plotChromPeakDensity 3 per page in a pdf file 193 # Draw the plotChromPeakDensity 3 per page in a pdf file
168 getPlotAdjustedRtime <- function(xdata) { 194 getPlotAdjustedRtime <- function(xdata) {
169 195
170 pdf(file="raw_vs_adjusted_rt.pdf", width=16, height=12) 196 pdf(file = "raw_vs_adjusted_rt.pdf", width = 16, height = 12)
171 197
172 # Color by group 198 # Color by group
173 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1") 199 if (length(unique(xdata$sample_group)) < 10) {
200 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1")
201 } else {
202 group_colors <- hcl.colors(length(unique(xdata$sample_group)), palette = "Dark 3")
203 }
174 if (length(group_colors) > 1) { 204 if (length(group_colors) > 1) {
175 names(group_colors) <- unique(xdata$sample_group) 205 names(group_colors) <- unique(xdata$sample_group)
176 plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group]) 206 plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group])
177 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) 207 legend("topright", legend = names(group_colors), col = group_colors, cex = 0.8, lty = 1)
178 } 208 }
179 209
180 # Color by sample 210 # Color by sample
181 plotAdjustedRtime(xdata, col = rainbow(length(xdata@phenoData@data$sample_name))) 211 plotAdjustedRtime(xdata, col = rainbow(length(xdata@phenoData@data$sample_name)))
182 legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1) 212 legend("topright", legend = xdata@phenoData@data$sample_name, col = rainbow(length(xdata@phenoData@data$sample_name)), cex = 0.8, lty = 1)
183 213
184 dev.off() 214 dev.off()
185 } 215 }
186 216
187 #@author G. Le Corguille 217 #@author G. Le Corguille
188 # value: intensity values to be used into, maxo or intb 218 # value: intensity values to be used into, maxo or intb
189 getPeaklistW4M <- function(xdata, intval="into", convertRTMinute=F, numDigitsMZ=4, numDigitsRT=0, naTOzero=T, variableMetadataOutput, dataMatrixOutput, sampleNamesList) { 219 getPeaklistW4M <- function(xdata, intval = "into", convertRTMinute = F, numDigitsMZ = 4, numDigitsRT = 0, naTOzero = T, variableMetadataOutput, dataMatrixOutput, sampleNamesList) {
190 dataMatrix <- featureValues(xdata, method="medret", value=intval) 220 dataMatrix <- featureValues(xdata, method = "medret", value = intval)
191 colnames(dataMatrix) <- make.names(tools::file_path_sans_ext(colnames(dataMatrix))) 221 colnames(dataMatrix) <- make.names(tools::file_path_sans_ext(colnames(dataMatrix)))
192 dataMatrix = cbind(name=groupnames(xdata), dataMatrix) 222 dataMatrix <- cbind(name = groupnames(xdata), dataMatrix)
193 variableMetadata <- featureDefinitions(xdata) 223 variableMetadata <- featureDefinitions(xdata)
194 colnames(variableMetadata)[1] = "mz"; colnames(variableMetadata)[4] = "rt" 224 colnames(variableMetadata)[1] <- "mz"; colnames(variableMetadata)[4] <- "rt"
195 variableMetadata = data.frame(name=groupnames(xdata), variableMetadata) 225 variableMetadata <- data.frame(name = groupnames(xdata), variableMetadata)
196 226
197 variableMetadata <- RTSecondToMinute(variableMetadata, convertRTMinute) 227 variableMetadata <- RTSecondToMinute(variableMetadata, convertRTMinute)
198 variableMetadata <- formatIonIdentifiers(variableMetadata, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ) 228 variableMetadata <- formatIonIdentifiers(variableMetadata, numDigitsRT = numDigitsRT, numDigitsMZ = numDigitsMZ)
199 dataMatrix <- naTOzeroDataMatrix(dataMatrix, naTOzero) 229 dataMatrix <- naTOzeroDataMatrix(dataMatrix, naTOzero)
200 230
201 # FIX: issue when the vector at peakidx is too long and is written in a new line during the export 231 # FIX: issue when the vector at peakidx is too long and is written in a new line during the export
202 variableMetadata[,"peakidx"] <- vapply(variableMetadata[,"peakidx"], FUN = paste, FUN.VALUE = character(1), collapse = ",") 232 variableMetadata[, "peakidx"] <- vapply(variableMetadata[, "peakidx"], FUN = paste, FUN.VALUE = character(1), collapse = ",")
203 233
204 write.table(variableMetadata, file=variableMetadataOutput,sep="\t",quote=F,row.names=F) 234 write.table(variableMetadata, file = variableMetadataOutput, sep = "\t", quote = F, row.names = F)
205 write.table(dataMatrix, file=dataMatrixOutput,sep="\t",quote=F,row.names=F) 235 write.table(dataMatrix, file = dataMatrixOutput, sep = "\t", quote = F, row.names = F)
206 236
207 } 237 }
208 238
209 #@author G. Le Corguille 239 #@author G. Le Corguille
210 # It allow different of field separators 240 # It allow different of field separators
211 getDataFrameFromFile <- function(filename, header=T) { 241 getDataFrameFromFile <- function(filename, header = T) {
212 myDataFrame <- read.table(filename, header=header, sep=";", stringsAsFactors=F) 242 myDataFrame <- read.table(filename, header = header, sep = ";", stringsAsFactors = F)
213 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header=header, sep="\t", stringsAsFactors=F) 243 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header = header, sep = "\t", stringsAsFactors = F)
214 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header=header, sep=",", stringsAsFactors=F) 244 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header = header, sep = ",", stringsAsFactors = F)
215 if (ncol(myDataFrame) < 2) { 245 if (ncol(myDataFrame) < 2) {
216 error_message="Your tabular file seems not well formatted. The column separators accepted are ; , and tabulation" 246 error_message <- "Your tabular file seems not well formatted. The column separators accepted are ; , and tabulation"
217 print(error_message) 247 print(error_message)
218 stop(error_message) 248 stop(error_message)
219 } 249 }
220 return(myDataFrame) 250 return(myDataFrame)
221 } 251 }
222 252
223 #@author G. Le Corguille 253 #@author G. Le Corguille
224 # Draw the BPI and TIC graphics 254 # Draw the BPI and TIC graphics
225 # colored by sample names or class names 255 # colored by sample names or class names
226 getPlotChromatogram <- function(chrom, xdata, pdfname="Chromatogram.pdf", aggregationFun = "max") { 256 getPlotChromatogram <- function(chrom, xdata, pdfname = "Chromatogram.pdf", aggregationFun = "max") {
227 257
228 if (aggregationFun == "sum") 258 if (aggregationFun == "sum")
229 type="Total Ion Chromatograms" 259 type <- "Total Ion Chromatograms"
230 else 260 else
231 type="Base Peak Intensity Chromatograms" 261 type <- "Base Peak Intensity Chromatograms"
232 262
233 adjusted="Raw" 263 adjusted <- "Raw"
234 if (hasAdjustedRtime(xdata)) 264 if (hasAdjustedRtime(xdata))
235 adjusted="Adjusted" 265 adjusted <- "Adjusted"
236 266
237 main <- paste(type,":",adjusted,"data") 267 main <- paste(type, ":", adjusted, "data")
238 268
239 pdf(pdfname, width=16, height=10) 269 pdf(pdfname, width = 16, height = 10)
240 270
241 # Color by group 271 # Color by group
242 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1") 272 if (length(unique(xdata$sample_group)) < 10) {
273 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1")
274 }else{
275 group_colors <- hcl.colors(length(unique(xdata$sample_group)), palette = "Dark 3")
276 }
243 if (length(group_colors) > 1) { 277 if (length(group_colors) > 1) {
244 names(group_colors) <- unique(xdata$sample_group) 278 names(group_colors) <- unique(xdata$sample_group)
245 plot(chrom, col = group_colors[as.factor(chrom$sample_group)], main=main, peakType = "none") 279 plot(chrom, col = group_colors[chrom$sample_group], main = main, peakType = "none")
246 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) 280 legend("topright", legend = names(group_colors), col = group_colors, cex = 0.8, lty = 1)
247 } 281 }
248 282
249 # Color by sample 283 # Color by sample
250 plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main=main, peakType = "none") 284 plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main = main, peakType = "none")
251 legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1) 285 legend("topright", legend = xdata@phenoData@data$sample_name, col = rainbow(length(xdata@phenoData@data$sample_name)), cex = 0.8, lty = 1)
252 286
253 dev.off() 287 dev.off()
254 } 288 }
255 289
256 290
257 # Get the polarities from all the samples of a condition 291 # Get the polarities from all the samples of a condition
258 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM 292 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
259 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM 293 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM
260 getSampleMetadata <- function(xdata=NULL, sampleMetadataOutput="sampleMetadata.tsv") { 294 getSampleMetadata <- function(xdata = NULL, sampleMetadataOutput = "sampleMetadata.tsv") {
261 cat("Creating the sampleMetadata file...\n") 295 cat("Creating the sampleMetadata file...\n")
262 296
263 #Create the sampleMetada dataframe 297 #Create the sampleMetada dataframe
264 sampleMetadata <- xdata@phenoData@data 298 sampleMetadata <- xdata@phenoData@data
265 rownames(sampleMetadata) <- NULL 299 rownames(sampleMetadata) <- NULL
269 sampleNamesMakeNames <- make.names(sampleNamesOrigin) 303 sampleNamesMakeNames <- make.names(sampleNamesOrigin)
270 304
271 if (any(duplicated(sampleNamesMakeNames))) { 305 if (any(duplicated(sampleNamesMakeNames))) {
272 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) 306 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr())
273 for (sampleName in sampleNamesOrigin) { 307 for (sampleName in sampleNamesOrigin) {
274 write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr()) 308 write(paste(sampleName, "\t->\t", make.names(sampleName)), stderr())
275 } 309 }
276 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") 310 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.")
277 } 311 }
278 312
279 if (!all(sampleNamesOrigin == sampleNamesMakeNames)) { 313 if (!all(sampleNamesOrigin == sampleNamesMakeNames)) {
280 cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n") 314 cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n")
281 for (sampleName in sampleNamesOrigin) { 315 for (sampleName in sampleNamesOrigin) {
282 cat(paste(sampleName,"\t->\t",make.names(sampleName),"\n")) 316 cat(paste(sampleName, "\t->\t", make.names(sampleName), "\n"))
283 } 317 }
284 } 318 }
285 319
286 sampleMetadata$sample_name <- sampleNamesMakeNames 320 sampleMetadata$sample_name <- sampleNamesMakeNames
287 321
288 322
289 #For each sample file, the following actions are done 323 #For each sample file, the following actions are done
290 for (fileIdx in 1:length(fileNames(xdata))) { 324 for (fileIdx in seq_len(length(fileNames(xdata)))) {
291 #Check if the file is in the CDF format 325 #Check if the file is in the CDF format
292 if (!mzR:::netCDFIsFile(fileNames(xdata))) { 326 if (!mzR:::netCDFIsFile(fileNames(xdata))) {
293 327
294 # If the column isn't exist, with add one filled with NA 328 # If the column isn't exist, with add one filled with NA
295 if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity <- NA 329 if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity <- NA
296 330
297 #Extract the polarity (a list of polarities) 331 #Extract the polarity (a list of polarities)
298 polarity <- fData(xdata)[fData(xdata)$fileIdx == fileIdx,"polarity"] 332 polarity <- fData(xdata)[fData(xdata)$fileIdx == fileIdx, "polarity"]
299 #Verify if all the scans have the same polarity 333 #Verify if all the scans have the same polarity
300 uniq_list <- unique(polarity) 334 uniq_list <- unique(polarity)
301 if (length(uniq_list)>1){ 335 if (length(uniq_list) > 1) {
302 polarity <- "mixed" 336 polarity <- "mixed"
303 } else { 337 } else {
304 polarity <- as.character(uniq_list) 338 polarity <- as.character(uniq_list)
305 } 339 }
306 340
308 sampleMetadata$polarity[fileIdx] <- polarity 342 sampleMetadata$polarity[fileIdx] <- polarity
309 } 343 }
310 344
311 } 345 }
312 346
313 write.table(sampleMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=sampleMetadataOutput) 347 write.table(sampleMetadata, sep = "\t", quote = FALSE, row.names = FALSE, file = sampleMetadataOutput)
314 348
315 return(list("sampleNamesOrigin"=sampleNamesOrigin, "sampleNamesMakeNames"=sampleNamesMakeNames)) 349 return(list("sampleNamesOrigin" = sampleNamesOrigin, "sampleNamesMakeNames" = sampleNamesMakeNames))
316 350
317 } 351 }
318 352
319 353
320 # This function will compute MD5 checksum to check the data integrity 354 # This function will compute MD5 checksum to check the data integrity
321 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr 355 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
322 getMd5sum <- function (files) { 356 getMd5sum <- function(files) {
323 cat("Compute md5 checksum...\n") 357 cat("Compute md5 checksum...\n")
324 library(tools) 358 library(tools)
325 return(as.matrix(md5sum(files))) 359 return(as.matrix(md5sum(files)))
326 } 360 }
327 361
328 # This function retrieve the raw file in the working directory 362 # This function retrieve the raw file in the working directory
329 # - if zipfile: unzip the file with its directory tree 363 # - if zipfile: unzip the file with its directory tree
330 # - if singlefiles: set symlink with the good filename 364 # - if singlefiles: set symlink with the good filename
331 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr 365 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
332 retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile, args, prefix="") { 366 retrieveRawfileInTheWorkingDir <- function(singlefile, zipfile, args, prefix = "") {
333 367
334 if (!(prefix %in% c("","Positive","Negative","MS1","MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'") 368 if (!(prefix %in% c("", "Positive", "Negative", "MS1", "MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'")
335 369
336 # single - if the file are passed in the command arguments -> refresh singlefile 370 # single - if the file are passed in the command arguments -> refresh singlefile
337 if (!is.null(args[[paste0("singlefile_galaxyPath",prefix)]])) { 371 if (!is.null(args[[paste0("singlefile_galaxyPath", prefix)]])) {
338 singlefile_galaxyPaths <- unlist(strsplit(args[[paste0("singlefile_galaxyPath",prefix)]],"\\|")) 372 singlefile_galaxyPaths <- unlist(strsplit(args[[paste0("singlefile_galaxyPath", prefix)]], "\\|"))
339 singlefile_sampleNames <- unlist(strsplit(args[[paste0("singlefile_sampleName",prefix)]],"\\|")) 373 singlefile_sampleNames <- unlist(strsplit(args[[paste0("singlefile_sampleName", prefix)]], "\\|"))
340 374
341 singlefile <- NULL 375 singlefile <- NULL
342 for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) { 376 for (singlefile_galaxyPath_i in seq_len(length(singlefile_galaxyPaths))) {
343 singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i] 377 singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i]
344 singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i] 378 singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i]
345 # In case, an url is used to import data within Galaxy 379 # In case, an url is used to import data within Galaxy
346 singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName,"/")), n=1) 380 singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName, "/")), n = 1)
347 singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath 381 singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath
348 } 382 }
349 } 383 }
350 # zipfile - if the file are passed in the command arguments -> refresh zipfile 384 # zipfile - if the file are passed in the command arguments -> refresh zipfile
351 if (!is.null(args[[paste0("zipfile",prefix)]])) 385 if (!is.null(args[[paste0("zipfile", prefix)]]))
352 zipfile <- args[[paste0("zipfile",prefix)]] 386 zipfile <- args[[paste0("zipfile", prefix)]]
353 387
354 # single 388 # single
355 if(!is.null(singlefile) && (length("singlefile")>0)) { 389 if (!is.null(singlefile) && (length("singlefile") > 0)) {
356 files <- vector() 390 files <- vector()
357 for (singlefile_sampleName in names(singlefile)) { 391 for (singlefile_sampleName in names(singlefile)) {
358 singlefile_galaxyPath <- singlefile[[singlefile_sampleName]] 392 singlefile_galaxyPath <- singlefile[[singlefile_sampleName]]
359 if(!file.exists(singlefile_galaxyPath)){ 393 if (!file.exists(singlefile_galaxyPath)) {
360 error_message <- paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!") 394 error_message <- paste("Cannot access the sample:", singlefile_sampleName, "located:", singlefile_galaxyPath, ". Please, contact your administrator ... if you have one!")
361 print(error_message); stop(error_message) 395 print(error_message); stop(error_message)
362 } 396 }
363 397
364 if (!suppressWarnings( try (file.link(singlefile_galaxyPath, singlefile_sampleName), silent=T))) 398 if (!suppressWarnings(try(file.link(singlefile_galaxyPath, singlefile_sampleName), silent = T)))
365 file.copy(singlefile_galaxyPath, singlefile_sampleName) 399 file.copy(singlefile_galaxyPath, singlefile_sampleName)
366 files <- c(files, singlefile_sampleName) 400 files <- c(files, singlefile_sampleName)
367 } 401 }
368 } 402 }
369 # zipfile 403 # zipfile
370 if(!is.null(zipfile) && (zipfile != "")) { 404 if (!is.null(zipfile) && (zipfile != "")) {
371 if(!file.exists(zipfile)){ 405 if (!file.exists(zipfile)) {
372 error_message <- paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!") 406 error_message <- paste("Cannot access the Zip file:", zipfile, ". Please, contact your administrator ... if you have one!")
373 print(error_message) 407 print(error_message)
374 stop(error_message) 408 stop(error_message)
375 } 409 }
376 suppressWarnings(unzip(zipfile, unzip="unzip")) 410 suppressWarnings(unzip(zipfile, unzip = "unzip"))
377 411
378 #get the directory name 412 #get the directory name
379 suppressWarnings(filesInZip <- unzip(zipfile, list=T)) 413 suppressWarnings(filesInZip <- unzip(zipfile, list = T))
380 directories <- unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))) 414 directories <- unique(unlist(lapply(strsplit(filesInZip$Name, "/"), function(x) x[1])))
381 directories <- directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] 415 directories <- directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir]
382 directory <- "." 416 directory <- "."
383 if (length(directories) == 1) directory <- directories 417 if (length(directories) == 1) directory <- directories
384 418
385 cat("files_root_directory\t",directory,"\n") 419 cat("files_root_directory\t", directory, "\n")
386 420
387 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") 421 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
388 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|") 422 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|")
389 info <- file.info(directory) 423 info <- file.info(directory)
390 listed <- list.files(directory[info$isdir], pattern=filepattern,recursive=TRUE, full.names=TRUE) 424 listed <- list.files(directory[info$isdir], pattern = filepattern, recursive = TRUE, full.names = TRUE)
391 files <- c(directory[!info$isdir], listed) 425 files <- c(directory[!info$isdir], listed)
392 exists <- file.exists(files) 426 exists <- file.exists(files)
393 files <- files[exists] 427 files <- files[exists]
394 428
395 } 429 }
396 return(list(zipfile=zipfile, singlefile=singlefile, files=files)) 430 return(list(zipfile = zipfile, singlefile = singlefile, files = files))
397 431
398 } 432 }
399 433
400 434
401 # This function retrieve a xset like object 435 # This function retrieve a xset like object
402 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr 436 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
403 getxcmsSetObject <- function(xobject) { 437 getxcmsSetObject <- function(xobject) {
404 # XCMS 1.x 438 # XCMS 1.x
405 if (class(xobject) == "xcmsSet") 439 if (class(xobject) == "xcmsSet")
406 return (xobject) 440 return(xobject)
407 # XCMS 3.x 441 # XCMS 3.x
408 if (class(xobject) == "XCMSnExp") { 442 if (class(xobject) == "XCMSnExp") {
409 # Get the legacy xcmsSet object 443 # Get the legacy xcmsSet object
410 suppressWarnings(xset <- as(xobject, 'xcmsSet')) 444 suppressWarnings(xset <- as(xobject, "xcmsSet"))
411 if (!is.null(xset@phenoData$sample_group)) 445 if (!is.null(xset@phenoData$sample_group))
412 sampclass(xset) <- xset@phenoData$sample_group 446 sampclass(xset) <- xset@phenoData$sample_group
413 else 447 else
414 sampclass(xset) <- "." 448 sampclass(xset) <- "."
415 return (xset) 449 return(xset)
416 } 450 }
417 } 451 }