comparison lib.r @ 14:8846a03995d3 draft

planemo upload for repository https://github.com/workflow4metabolomics/tools-metabolomics/ commit 2cb157bd9a8701a3d6874e084032cbd050b8953e
author workflow4metabolomics
date Mon, 11 Sep 2023 09:18:38 +0000
parents 024974037c4e
children
comparison
equal deleted inserted replaced
13:024974037c4e 14:8846a03995d3
2 # lib.r for Galaxy Workflow4Metabolomics xcms tools 2 # lib.r for Galaxy Workflow4Metabolomics xcms tools
3 3
4 #@author G. Le Corguille 4 #@author G. Le Corguille
5 # solve an issue with batch if arguments are logical TRUE/FALSE 5 # solve an issue with batch if arguments are logical TRUE/FALSE
6 parseCommandArgs <- function(...) { 6 parseCommandArgs <- function(...) {
7 args <- batch::parseCommandArgs(...) 7 args <- batch::parseCommandArgs(...)
8 for (key in names(args)) { 8 for (key in names(args)) {
9 if (args[key] %in% c("TRUE", "FALSE")) 9 if (args[key] %in% c("TRUE", "FALSE"))
10 args[key] <- as.logical(args[key]) 10 args[key] <- as.logical(args[key])
11 } 11 }
12 return(args) 12 return(args)
13 } 13 }
14 14
15 #@author G. Le Corguille 15 #@author G. Le Corguille
16 # This function will 16 # This function will
17 # - load the packages 17 # - load the packages
18 # - display the sessionInfo 18 # - display the sessionInfo
19 loadAndDisplayPackages <- function(pkgs) { 19 loadAndDisplayPackages <- function(pkgs) {
20 for (pkg in pkgs) suppressPackageStartupMessages(stopifnot(library(pkg, quietly = TRUE, logical.return = TRUE, character.only = TRUE))) 20 for (pkg in pkgs) suppressPackageStartupMessages(stopifnot(library(pkg, quietly = TRUE, logical.return = TRUE, character.only = TRUE)))
21 21
22 sessioninfo <- sessionInfo() 22 sessioninfo <- sessionInfo()
23 cat(sessioninfo$R.version$version.string, "\n") 23 cat(sessioninfo$R.version$version.string, "\n")
24 cat("Main packages:\n") 24 cat("Main packages:\n")
25 for (pkg in names(sessioninfo$otherPkgs)) { 25 for (pkg in names(sessioninfo$otherPkgs)) {
26 cat(paste(pkg, packageVersion(pkg)), "\t") 26 cat(paste(pkg, packageVersion(pkg)), "\t")
27 } 27 }
28 cat("\n") 28 cat("\n")
29 cat("Other loaded packages:\n") 29 cat("Other loaded packages:\n")
30 for (pkg in names(sessioninfo$loadedOnly)) { 30 for (pkg in names(sessioninfo$loadedOnly)) {
31 cat(paste(pkg, packageVersion(pkg)), "\t") 31 cat(paste(pkg, packageVersion(pkg)), "\t")
32 } 32 }
33 cat("\n") 33 cat("\n")
34 } 34 }
35 35
36 #@author G. Le Corguille 36 #@author G. Le Corguille
37 # This function merge several chromBPI or chromTIC into one. 37 # This function merge several chromBPI or chromTIC into one.
38 mergeChrom <- function(chrom_merged, chrom) { 38 mergeChrom <- function(chrom_merged, chrom) {
39 if (is.null(chrom_merged)) return(NULL) 39 if (is.null(chrom_merged)) return(NULL)
40 chrom_merged@.Data <- cbind(chrom_merged@.Data, chrom@.Data) 40 chrom_merged@.Data <- cbind(chrom_merged@.Data, chrom@.Data)
41 return(chrom_merged) 41 return(chrom_merged)
42 } 42 }
43 43
44 #@author G. Le Corguille 44 #@author G. Le Corguille
45 # This function merge several xdata into one. 45 # This function merge several xdata into one.
46 mergeXData <- function(args) { 46 mergeXData <- function(args) {
47 chromTIC <- NULL 47 chromTIC <- NULL
48 chromBPI <- NULL 48 chromBPI <- NULL
49 chromTIC_adjusted <- NULL 49 chromTIC_adjusted <- NULL
50 chromBPI_adjusted <- NULL 50 chromBPI_adjusted <- NULL
51 md5sumList <- NULL 51 md5sumList <- NULL
52 for (image in args$images) { 52 for (image in args$images) {
53 53
54 load(image) 54 load(image)
55 # Handle infiles 55 # Handle infiles
56 if (!exists("singlefile")) singlefile <- NULL 56 if (!exists("singlefile")) singlefile <- NULL
57 if (!exists("zipfile")) zipfile <- NULL 57 if (!exists("zipfile")) zipfile <- NULL
58 rawFilePath <- retrieveRawfileInTheWorkingDir(singlefile, zipfile, args) 58 rawFilePath <- retrieveRawfileInTheWorkingDir(singlefile, zipfile, args)
59 zipfile <- rawFilePath$zipfile 59 zipfile <- rawFilePath$zipfile
60 singlefile <- rawFilePath$singlefile 60 singlefile <- rawFilePath$singlefile
61 61
62 if (exists("raw_data")) xdata <- raw_data 62 if (exists("raw_data")) xdata <- raw_data
63 if (!exists("xdata")) stop("\n\nERROR: The RData doesn't contain any object called 'xdata'. This RData should have been created by an old version of XMCS 2.*") 63 if (!exists("xdata")) stop("\n\nERROR: The RData doesn't contain any object called 'xdata'. This RData should have been created by an old version of XMCS 2.*")
64 64
65 cat(sampleNamesList$sampleNamesOrigin, "\n") 65 cat(sampleNamesList$sampleNamesOrigin, "\n")
66 66
67 if (!exists("xdata_merged")) { 67 if (!exists("xdata_merged")) {
68 xdata_merged <- xdata 68 xdata_merged <- xdata
69 singlefile_merged <- singlefile 69 singlefile_merged <- singlefile
70 md5sumList_merged <- md5sumList 70 md5sumList_merged <- md5sumList
71 sampleNamesList_merged <- sampleNamesList 71 sampleNamesList_merged <- sampleNamesList
72 chromTIC_merged <- chromTIC 72 chromTIC_merged <- chromTIC
73 chromBPI_merged <- chromBPI 73 chromBPI_merged <- chromBPI
74 chromTIC_adjusted_merged <- chromTIC_adjusted 74 chromTIC_adjusted_merged <- chromTIC_adjusted
75 chromBPI_adjusted_merged <- chromBPI_adjusted 75 chromBPI_adjusted_merged <- chromBPI_adjusted
76 } else { 76 } else {
77 if (is(xdata, "XCMSnExp")) xdata_merged <- c(xdata_merged, xdata) 77 if (is(xdata, "XCMSnExp")) xdata_merged <- c(xdata_merged, xdata)
78 else if (is(xdata, "OnDiskMSnExp")) xdata_merged <- xcms:::.concatenate_OnDiskMSnExp(xdata_merged, xdata) 78 else if (is(xdata, "OnDiskMSnExp")) xdata_merged <- xcms:::.concatenate_OnDiskMSnExp(xdata_merged, xdata)
79 else stop("\n\nERROR: The RData either a OnDiskMSnExp object called raw_data or a XCMSnExp object called xdata") 79 else stop("\n\nERROR: The RData either a OnDiskMSnExp object called raw_data or a XCMSnExp object called xdata")
80 80
81 singlefile_merged <- c(singlefile_merged, singlefile) 81 singlefile_merged <- c(singlefile_merged, singlefile)
82 md5sumList_merged$origin <- rbind(md5sumList_merged$origin, md5sumList$origin) 82 md5sumList_merged$origin <- rbind(md5sumList_merged$origin, md5sumList$origin)
83 sampleNamesList_merged$sampleNamesOrigin <- c(sampleNamesList_merged$sampleNamesOrigin, sampleNamesList$sampleNamesOrigin) 83 sampleNamesList_merged$sampleNamesOrigin <- c(sampleNamesList_merged$sampleNamesOrigin, sampleNamesList$sampleNamesOrigin)
84 sampleNamesList_merged$sampleNamesMakeNames <- c(sampleNamesList_merged$sampleNamesMakeNames, sampleNamesList$sampleNamesMakeNames) 84 sampleNamesList_merged$sampleNamesMakeNames <- c(sampleNamesList_merged$sampleNamesMakeNames, sampleNamesList$sampleNamesMakeNames)
85 chromTIC_merged <- mergeChrom(chromTIC_merged, chromTIC) 85 chromTIC_merged <- mergeChrom(chromTIC_merged, chromTIC)
86 chromBPI_merged <- mergeChrom(chromBPI_merged, chromBPI) 86 chromBPI_merged <- mergeChrom(chromBPI_merged, chromBPI)
87 chromTIC_adjusted_merged <- mergeChrom(chromTIC_adjusted_merged, chromTIC_adjusted) 87 chromTIC_adjusted_merged <- mergeChrom(chromTIC_adjusted_merged, chromTIC_adjusted)
88 chromBPI_adjusted_merged <- mergeChrom(chromBPI_adjusted_merged, chromBPI_adjusted) 88 chromBPI_adjusted_merged <- mergeChrom(chromBPI_adjusted_merged, chromBPI_adjusted)
89 } 89 }
90 } 90 }
91 rm(image) 91 rm(image)
92 xdata <- xdata_merged; rm(xdata_merged) 92 xdata <- xdata_merged
93 singlefile <- singlefile_merged; rm(singlefile_merged) 93 rm(xdata_merged)
94 md5sumList <- md5sumList_merged; rm(md5sumList_merged) 94 singlefile <- singlefile_merged
95 sampleNamesList <- sampleNamesList_merged; rm(sampleNamesList_merged) 95 rm(singlefile_merged)
96 96 md5sumList <- md5sumList_merged
97 if (!is.null(args$sampleMetadata)) { 97 rm(md5sumList_merged)
98 cat("\tXSET PHENODATA SETTING...\n") 98 sampleNamesList <- sampleNamesList_merged
99 sampleMetadataFile <- args$sampleMetadata 99 rm(sampleNamesList_merged)
100 sampleMetadata <- getDataFrameFromFile(sampleMetadataFile, header = F) 100
101 xdata@phenoData@data$sample_group <- sampleMetadata$V2[match(xdata@phenoData@data$sample_name, sampleMetadata$V1)] 101 if (!is.null(args$sampleMetadata)) {
102 102 cat("\tXSET PHENODATA SETTING...\n")
103 if (any(is.na(pData(xdata)$sample_group))) { 103 sampleMetadataFile <- args$sampleMetadata
104 sample_missing <- pData(xdata)$sample_name[is.na(pData(xdata)$sample_group)] 104 sampleMetadata <- getDataFrameFromFile(sampleMetadataFile, header = FALSE)
105 error_message <- paste("Those samples are missing in your sampleMetadata:", paste(sample_missing, collapse = " ")) 105 xdata@phenoData@data$sample_group <- sampleMetadata$V2[match(xdata@phenoData@data$sample_name, sampleMetadata$V1)]
106 print(error_message) 106
107 stop(error_message) 107 if (any(is.na(pData(xdata)$sample_group))) {
108 } 108 sample_missing <- pData(xdata)$sample_name[is.na(pData(xdata)$sample_group)]
109 } 109 error_message <- paste("Those samples are missing in your sampleMetadata:", paste(sample_missing, collapse = " "))
110 110 print(error_message)
111 if (!is.null(chromTIC_merged)) { 111 stop(error_message)
112 chromTIC <- chromTIC_merged; chromTIC@phenoData <- xdata@phenoData 112 }
113 } 113 }
114 if (!is.null(chromBPI_merged)) { 114
115 chromBPI <- chromBPI_merged; chromBPI@phenoData <- xdata@phenoData 115 if (!is.null(chromTIC_merged)) {
116 } 116 chromTIC <- chromTIC_merged
117 if (!is.null(chromTIC_adjusted_merged)) { 117 chromTIC@phenoData <- xdata@phenoData
118 chromTIC_adjusted <- chromTIC_adjusted_merged; chromTIC_adjusted@phenoData <- xdata@phenoData 118 }
119 } 119 if (!is.null(chromBPI_merged)) {
120 if (!is.null(chromBPI_adjusted_merged)) { 120 chromBPI <- chromBPI_merged
121 chromBPI_adjusted <- chromBPI_adjusted_merged; chromBPI_adjusted@phenoData <- xdata@phenoData 121 chromBPI@phenoData <- xdata@phenoData
122 } 122 }
123 123 if (!is.null(chromTIC_adjusted_merged)) {
124 return(list("xdata" = xdata, "singlefile" = singlefile, "md5sumList" = md5sumList, "sampleNamesList" = sampleNamesList, "chromTIC" = chromTIC, "chromBPI" = chromBPI, "chromTIC_adjusted" = chromTIC_adjusted, "chromBPI_adjusted" = chromBPI_adjusted)) 124 chromTIC_adjusted <- chromTIC_adjusted_merged
125 chromTIC_adjusted@phenoData <- xdata@phenoData
126 }
127 if (!is.null(chromBPI_adjusted_merged)) {
128 chromBPI_adjusted <- chromBPI_adjusted_merged
129 chromBPI_adjusted@phenoData <- xdata@phenoData
130 }
131
132 return(list("xdata" = xdata, "singlefile" = singlefile, "md5sumList" = md5sumList, "sampleNamesList" = sampleNamesList, "chromTIC" = chromTIC, "chromBPI" = chromBPI, "chromTIC_adjusted" = chromTIC_adjusted, "chromBPI_adjusted" = chromBPI_adjusted))
125 } 133 }
126 134
127 #@author G. Le Corguille 135 #@author G. Le Corguille
128 # This function convert if it is required the Retention Time in minutes 136 # This function convert if it is required the Retention Time in minutes
129 RTSecondToMinute <- function(variableMetadata, convertRTMinute) { 137 RTSecondToMinute <- function(variableMetadata, convertRTMinute) {
130 if (convertRTMinute) { 138 if (convertRTMinute) {
131 #converting the retention times (seconds) into minutes 139 #converting the retention times (seconds) into minutes
132 print("converting the retention times into minutes in the variableMetadata") 140 print("converting the retention times into minutes in the variableMetadata")
133 variableMetadata[, "rt"] <- variableMetadata[, "rt"] / 60 141 variableMetadata[, "rt"] <- variableMetadata[, "rt"] / 60
134 variableMetadata[, "rtmin"] <- variableMetadata[, "rtmin"] / 60 142 variableMetadata[, "rtmin"] <- variableMetadata[, "rtmin"] / 60
135 variableMetadata[, "rtmax"] <- variableMetadata[, "rtmax"] / 60 143 variableMetadata[, "rtmax"] <- variableMetadata[, "rtmax"] / 60
136 } 144 }
137 return(variableMetadata) 145 return(variableMetadata)
138 } 146 }
139 147
140 #@author G. Le Corguille 148 #@author G. Le Corguille
141 # This function format ions identifiers 149 # This function format ions identifiers
142 formatIonIdentifiers <- function(variableMetadata, numDigitsRT = 0, numDigitsMZ = 0) { 150 formatIonIdentifiers <- function(variableMetadata, numDigitsRT = 0, numDigitsMZ = 0) {
143 splitDeco <- strsplit(as.character(variableMetadata$name), "_") 151 splitDeco <- strsplit(as.character(variableMetadata$name), "_")
144 idsDeco <- sapply(splitDeco, 152 idsDeco <- sapply(splitDeco,
145 function(x) { 153 function(x) {
146 deco <- unlist(x)[2]; if (is.na(deco)) return("") else return(paste0("_", deco)) 154 deco <- unlist(x)[2]
147 } 155 if (is.na(deco)) return("") else return(paste0("_", deco))
148 ) 156 }
149 namecustom <- make.unique(paste0("M", round(variableMetadata[, "mz"], numDigitsMZ), "T", round(variableMetadata[, "rt"], numDigitsRT), idsDeco)) 157 )
150 variableMetadata <- cbind(name = variableMetadata$name, namecustom = namecustom, variableMetadata[, !(colnames(variableMetadata) %in% c("name"))]) 158 namecustom <- make.unique(paste0("M", round(variableMetadata[, "mz"], numDigitsMZ), "T", round(variableMetadata[, "rt"], numDigitsRT), idsDeco))
151 return(variableMetadata) 159 variableMetadata <- cbind(name = variableMetadata$name, namecustom = namecustom, variableMetadata[, !(colnames(variableMetadata) %in% c("name"))])
160 return(variableMetadata)
152 } 161 }
153 162
154 #@author G. Le Corguille 163 #@author G. Le Corguille
155 # This function convert the remain NA to 0 in the dataMatrix 164 # This function convert the remain NA to 0 in the dataMatrix
156 naTOzeroDataMatrix <- function(dataMatrix, naTOzero) { 165 naTOzeroDataMatrix <- function(dataMatrix, naTOzero) {
157 if (naTOzero) { 166 if (naTOzero) {
158 dataMatrix[is.na(dataMatrix)] <- 0 167 dataMatrix[is.na(dataMatrix)] <- 0
159 } 168 }
160 return(dataMatrix) 169 return(dataMatrix)
161 } 170 }
162 171
163 #@author G. Le Corguille 172 #@author G. Le Corguille
164 # Draw the plotChromPeakDensity 3 per page in a pdf file 173 # Draw the plotChromPeakDensity 3 per page in a pdf file
165 getPlotChromPeakDensity <- function(xdata, param = NULL, mzdigit = 4) { 174 getPlotChromPeakDensity <- function(xdata, param = NULL, mzdigit = 4) {
166 pdf(file = "plotChromPeakDensity.pdf", width = 16, height = 12) 175 pdf(file = "plotChromPeakDensity.pdf", width = 16, height = 12)
167 176
168 par(mfrow = c(3, 1), mar = c(4, 4, 1, 0.5)) 177 par(mfrow = c(3, 1), mar = c(4, 4, 1, 0.5))
169 178
170 if (length(unique(xdata$sample_group)) < 10) { 179 if (length(unique(xdata$sample_group)) < 10) {
171 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1") 180 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1")
172 }else{ 181 } else {
173 group_colors <- hcl.colors(length(unique(xdata$sample_group)), palette = "Dark 3") 182 group_colors <- hcl.colors(length(unique(xdata$sample_group)), palette = "Dark 3")
174 } 183 }
175 names(group_colors) <- unique(xdata$sample_group) 184 names(group_colors) <- unique(xdata$sample_group)
176 col_per_samp <- as.character(xdata$sample_group) 185 col_per_samp <- as.character(xdata$sample_group)
177 for (i in seq_len(length(group_colors))) { 186 for (i in seq_len(length(group_colors))) {
178 col_per_samp[col_per_samp == (names(group_colors)[i])] <- group_colors[i] 187 col_per_samp[col_per_samp == (names(group_colors)[i])] <- group_colors[i]
179 } 188 }
180 189
181 xlim <- c(min(featureDefinitions(xdata)$rtmin), max(featureDefinitions(xdata)$rtmax)) 190 xlim <- c(min(featureDefinitions(xdata)$rtmin), max(featureDefinitions(xdata)$rtmax))
182 for (i in seq_len(nrow(featureDefinitions(xdata)))) { 191 for (i in seq_len(nrow(featureDefinitions(xdata)))) {
183 mzmin <- featureDefinitions(xdata)[i, ]$mzmin 192 mzmin <- featureDefinitions(xdata)[i, ]$mzmin
184 mzmax <- featureDefinitions(xdata)[i, ]$mzmax 193 mzmax <- featureDefinitions(xdata)[i, ]$mzmax
185 plotChromPeakDensity(xdata, param = param, mz = c(mzmin, mzmax), col = col_per_samp, pch = 16, xlim = xlim, main = paste(round(mzmin, mzdigit), round(mzmax, mzdigit))) 194 plotChromPeakDensity(xdata, param = param, mz = c(mzmin, mzmax), col = col_per_samp, pch = 16, xlim = xlim, main = paste(round(mzmin, mzdigit), round(mzmax, mzdigit)))
186 legend("topright", legend = names(group_colors), col = group_colors, cex = 0.8, lty = 1) 195 legend("topright", legend = names(group_colors), col = group_colors, cex = 0.8, lty = 1)
187 } 196 }
188 197
189 dev.off() 198 dev.off()
190 } 199 }
191 200
192 #@author G. Le Corguille 201 #@author G. Le Corguille
193 # Draw the plotChromPeakDensity 3 per page in a pdf file 202 # Draw the plotChromPeakDensity 3 per page in a pdf file
194 getPlotAdjustedRtime <- function(xdata) { 203 getPlotAdjustedRtime <- function(xdata) {
195 204
196 pdf(file = "raw_vs_adjusted_rt.pdf", width = 16, height = 12) 205 pdf(file = "raw_vs_adjusted_rt.pdf", width = 16, height = 12)
197 206
198 # Color by group 207 # Color by group
199 if (length(unique(xdata$sample_group)) < 10) { 208 if (length(unique(xdata$sample_group)) < 10) {
200 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1") 209 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1")
201 } else { 210 } else {
202 group_colors <- hcl.colors(length(unique(xdata$sample_group)), palette = "Dark 3") 211 group_colors <- hcl.colors(length(unique(xdata$sample_group)), palette = "Dark 3")
203 } 212 }
204 if (length(group_colors) > 1) { 213 if (length(group_colors) > 1) {
205 names(group_colors) <- unique(xdata$sample_group) 214 names(group_colors) <- unique(xdata$sample_group)
206 plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group]) 215 plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group])
207 legend("topright", legend = names(group_colors), col = group_colors, cex = 0.8, lty = 1) 216 legend("topright", legend = names(group_colors), col = group_colors, cex = 0.8, lty = 1)
208 } 217 }
209 218
210 # Color by sample 219 # Color by sample
211 plotAdjustedRtime(xdata, col = rainbow(length(xdata@phenoData@data$sample_name))) 220 plotAdjustedRtime(xdata, col = rainbow(length(xdata@phenoData@data$sample_name)))
212 legend("topright", legend = xdata@phenoData@data$sample_name, col = rainbow(length(xdata@phenoData@data$sample_name)), cex = 0.8, lty = 1) 221 legend("topright", legend = xdata@phenoData@data$sample_name, col = rainbow(length(xdata@phenoData@data$sample_name)), cex = 0.8, lty = 1)
213 222
214 dev.off() 223 dev.off()
215 } 224 }
216 225
217 #@author G. Le Corguille 226 #@author G. Le Corguille
218 # value: intensity values to be used into, maxo or intb 227 # value: intensity values to be used into, maxo or intb
219 getPeaklistW4M <- function(xdata, intval = "into", convertRTMinute = F, numDigitsMZ = 4, numDigitsRT = 0, naTOzero = T, variableMetadataOutput, dataMatrixOutput, sampleNamesList) { 228 getPeaklistW4M <- function(xdata, intval = "into", convertRTMinute = FALSE, numDigitsMZ = 4, numDigitsRT = 0, naTOzero = TRUE, variableMetadataOutput, dataMatrixOutput, sampleNamesList) {
220 dataMatrix <- featureValues(xdata, method = "medret", value = intval) 229 dataMatrix <- featureValues(xdata, method = "medret", value = intval)
221 colnames(dataMatrix) <- make.names(tools::file_path_sans_ext(colnames(dataMatrix))) 230 colnames(dataMatrix) <- make.names(tools::file_path_sans_ext(colnames(dataMatrix)))
222 dataMatrix <- cbind(name = groupnames(xdata), dataMatrix) 231 dataMatrix <- cbind(name = groupnames(xdata), dataMatrix)
223 variableMetadata <- featureDefinitions(xdata) 232 variableMetadata <- featureDefinitions(xdata)
224 colnames(variableMetadata)[1] <- "mz"; colnames(variableMetadata)[4] <- "rt" 233 colnames(variableMetadata)[1] <- "mz"
225 variableMetadata <- data.frame(name = groupnames(xdata), variableMetadata) 234 colnames(variableMetadata)[4] <- "rt"
226 235 variableMetadata <- data.frame(name = groupnames(xdata), variableMetadata)
227 variableMetadata <- RTSecondToMinute(variableMetadata, convertRTMinute) 236
228 variableMetadata <- formatIonIdentifiers(variableMetadata, numDigitsRT = numDigitsRT, numDigitsMZ = numDigitsMZ) 237 variableMetadata <- RTSecondToMinute(variableMetadata, convertRTMinute)
229 dataMatrix <- naTOzeroDataMatrix(dataMatrix, naTOzero) 238 variableMetadata <- formatIonIdentifiers(variableMetadata, numDigitsRT = numDigitsRT, numDigitsMZ = numDigitsMZ)
230 239 dataMatrix <- naTOzeroDataMatrix(dataMatrix, naTOzero)
231 # FIX: issue when the vector at peakidx is too long and is written in a new line during the export 240
232 variableMetadata[, "peakidx"] <- vapply(variableMetadata[, "peakidx"], FUN = paste, FUN.VALUE = character(1), collapse = ",") 241 # FIX: issue when the vector at peakidx is too long and is written in a new line during the export
233 242 variableMetadata[, "peakidx"] <- vapply(variableMetadata[, "peakidx"], FUN = paste, FUN.VALUE = character(1), collapse = ",")
234 write.table(variableMetadata, file = variableMetadataOutput, sep = "\t", quote = F, row.names = F) 243
235 write.table(dataMatrix, file = dataMatrixOutput, sep = "\t", quote = F, row.names = F) 244 write.table(variableMetadata, file = variableMetadataOutput, sep = "\t", quote = FALSE, row.names = FALSE)
245 write.table(dataMatrix, file = dataMatrixOutput, sep = "\t", quote = FALSE, row.names = FALSE)
236 246
237 } 247 }
238 248
239 #@author G. Le Corguille 249 #@author G. Le Corguille
240 # It allow different of field separators 250 # It allow different of field separators
241 getDataFrameFromFile <- function(filename, header = T) { 251 getDataFrameFromFile <- function(filename, header = TRUE) {
242 myDataFrame <- read.table(filename, header = header, sep = ";", stringsAsFactors = F) 252 myDataFrame <- read.table(filename, header = header, sep = ";", stringsAsFactors = FALSE)
243 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header = header, sep = "\t", stringsAsFactors = F) 253 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header = header, sep = "\t", stringsAsFactors = FALSE)
244 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header = header, sep = ",", stringsAsFactors = F) 254 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header = header, sep = ",", stringsAsFactors = FALSE)
245 if (ncol(myDataFrame) < 2) { 255 if (ncol(myDataFrame) < 2) {
246 error_message <- "Your tabular file seems not well formatted. The column separators accepted are ; , and tabulation" 256 error_message <- "Your tabular file seems not well formatted. The column separators accepted are ; , and tabulation"
247 print(error_message) 257 print(error_message)
248 stop(error_message) 258 stop(error_message)
249 } 259 }
250 return(myDataFrame) 260 return(myDataFrame)
251 } 261 }
252 262
253 #@author G. Le Corguille 263 #@author G. Le Corguille
254 # Draw the BPI and TIC graphics 264 # Draw the BPI and TIC graphics
255 # colored by sample names or class names 265 # colored by sample names or class names
256 getPlotChromatogram <- function(chrom, xdata, pdfname = "Chromatogram.pdf", aggregationFun = "max") { 266 getPlotChromatogram <- function(chrom, xdata, pdfname = "Chromatogram.pdf", aggregationFun = "max") {
257 267
258 if (aggregationFun == "sum") 268 if (aggregationFun == "sum")
259 type <- "Total Ion Chromatograms" 269 type <- "Total Ion Chromatograms"
260 else 270 else
261 type <- "Base Peak Intensity Chromatograms" 271 type <- "Base Peak Intensity Chromatograms"
262 272
263 adjusted <- "Raw" 273 adjusted <- "Raw"
264 if (hasAdjustedRtime(xdata)) 274 if (hasAdjustedRtime(xdata))
265 adjusted <- "Adjusted" 275 adjusted <- "Adjusted"
266 276
267 main <- paste(type, ":", adjusted, "data") 277 main <- paste(type, ":", adjusted, "data")
268 278
269 pdf(pdfname, width = 16, height = 10) 279 pdf(pdfname, width = 16, height = 10)
270 280
271 # Color by group 281 # Color by group
272 if (length(unique(xdata$sample_group)) < 10) { 282 if (length(unique(xdata$sample_group)) < 10) {
273 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1") 283 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1")
274 }else{ 284 } else {
275 group_colors <- hcl.colors(length(unique(xdata$sample_group)), palette = "Dark 3") 285 group_colors <- hcl.colors(length(unique(xdata$sample_group)), palette = "Dark 3")
276 } 286 }
277 if (length(group_colors) > 1) { 287 if (length(group_colors) > 1) {
278 names(group_colors) <- unique(xdata$sample_group) 288 names(group_colors) <- unique(xdata$sample_group)
279 plot(chrom, col = group_colors[chrom$sample_group], main = main, peakType = "none") 289 plot(chrom, col = group_colors[chrom$sample_group], main = main, peakType = "none")
280 legend("topright", legend = names(group_colors), col = group_colors, cex = 0.8, lty = 1) 290 legend("topright", legend = names(group_colors), col = group_colors, cex = 0.8, lty = 1)
281 } 291 }
282 292
283 # Color by sample 293 # Color by sample
284 plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main = main, peakType = "none") 294 plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main = main, peakType = "none")
285 legend("topright", legend = xdata@phenoData@data$sample_name, col = rainbow(length(xdata@phenoData@data$sample_name)), cex = 0.8, lty = 1) 295 legend("topright", legend = xdata@phenoData@data$sample_name, col = rainbow(length(xdata@phenoData@data$sample_name)), cex = 0.8, lty = 1)
286 296
287 dev.off() 297 dev.off()
288 } 298 }
289 299
290 300
291 # Get the polarities from all the samples of a condition 301 # Get the polarities from all the samples of a condition
292 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM 302 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
293 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM 303 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM
294 getSampleMetadata <- function(xdata = NULL, sampleMetadataOutput = "sampleMetadata.tsv") { 304 getSampleMetadata <- function(xdata = NULL, sampleMetadataOutput = "sampleMetadata.tsv") {
295 cat("Creating the sampleMetadata file...\n") 305 cat("Creating the sampleMetadata file...\n")
296 306
297 #Create the sampleMetada dataframe 307 #Create the sampleMetada dataframe
298 sampleMetadata <- xdata@phenoData@data 308 sampleMetadata <- xdata@phenoData@data
299 rownames(sampleMetadata) <- NULL 309 rownames(sampleMetadata) <- NULL
300 colnames(sampleMetadata) <- c("sample_name", "class") 310 colnames(sampleMetadata) <- c("sample_name", "class")
301 311
302 sampleNamesOrigin <- sampleMetadata$sample_name 312 sampleNamesOrigin <- sampleMetadata$sample_name
303 sampleNamesMakeNames <- make.names(sampleNamesOrigin) 313 sampleNamesMakeNames <- make.names(sampleNamesOrigin)
304 314
305 if (any(duplicated(sampleNamesMakeNames))) { 315 if (any(duplicated(sampleNamesMakeNames))) {
306 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) 316 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr())
307 for (sampleName in sampleNamesOrigin) { 317 for (sampleName in sampleNamesOrigin) {
308 write(paste(sampleName, "\t->\t", make.names(sampleName)), stderr()) 318 write(paste(sampleName, "\t->\t", make.names(sampleName)), stderr())
309 } 319 }
310 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") 320 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.")
311 } 321 }
312 322
313 if (!all(sampleNamesOrigin == sampleNamesMakeNames)) { 323 if (!all(sampleNamesOrigin == sampleNamesMakeNames)) {
314 cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n") 324 cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n")
315 for (sampleName in sampleNamesOrigin) { 325 for (sampleName in sampleNamesOrigin) {
316 cat(paste(sampleName, "\t->\t", make.names(sampleName), "\n")) 326 cat(paste(sampleName, "\t->\t", make.names(sampleName), "\n"))
317 } 327 }
318 } 328 }
319 329
320 sampleMetadata$sample_name <- sampleNamesMakeNames 330 sampleMetadata$sample_name <- sampleNamesMakeNames
321 331
322 332
323 #For each sample file, the following actions are done 333 #For each sample file, the following actions are done
324 for (fileIdx in seq_len(length(fileNames(xdata)))) { 334 for (fileIdx in seq_len(length(fileNames(xdata)))) {
325 #Check if the file is in the CDF format 335 #Check if the file is in the CDF format
326 if (!mzR:::netCDFIsFile(fileNames(xdata))) { 336 if (!mzR:::netCDFIsFile(fileNames(xdata))) {
327 337
328 # If the column isn't exist, with add one filled with NA 338 # If the column isn't exist, with add one filled with NA
329 if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity <- NA 339 if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity <- NA
330 340
331 #Extract the polarity (a list of polarities) 341 #Extract the polarity (a list of polarities)
332 polarity <- fData(xdata)[fData(xdata)$fileIdx == fileIdx, "polarity"] 342 polarity <- fData(xdata)[fData(xdata)$fileIdx == fileIdx, "polarity"]
333 #Verify if all the scans have the same polarity 343 #Verify if all the scans have the same polarity
334 uniq_list <- unique(polarity) 344 uniq_list <- unique(polarity)
335 if (length(uniq_list) > 1) { 345 if (length(uniq_list) > 1) {
336 polarity <- "mixed" 346 polarity <- "mixed"
337 } else { 347 } else {
338 polarity <- as.character(uniq_list) 348 polarity <- as.character(uniq_list)
339 } 349 }
340 350
341 #Set the polarity attribute 351 #Set the polarity attribute
342 sampleMetadata$polarity[fileIdx] <- polarity 352 sampleMetadata$polarity[fileIdx] <- polarity
343 } 353 }
344 354
345 } 355 }
346 356
347 write.table(sampleMetadata, sep = "\t", quote = FALSE, row.names = FALSE, file = sampleMetadataOutput) 357 write.table(sampleMetadata, sep = "\t", quote = FALSE, row.names = FALSE, file = sampleMetadataOutput)
348 358
349 return(list("sampleNamesOrigin" = sampleNamesOrigin, "sampleNamesMakeNames" = sampleNamesMakeNames)) 359 return(list("sampleNamesOrigin" = sampleNamesOrigin, "sampleNamesMakeNames" = sampleNamesMakeNames))
350 360
351 } 361 }
352 362
353 363
354 # This function will compute MD5 checksum to check the data integrity 364 # This function will compute MD5 checksum to check the data integrity
355 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr 365 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
356 getMd5sum <- function(files) { 366 getMd5sum <- function(files) {
357 cat("Compute md5 checksum...\n") 367 cat("Compute md5 checksum...\n")
358 library(tools) 368 library(tools)
359 return(as.matrix(md5sum(files))) 369 return(as.matrix(md5sum(files)))
360 } 370 }
361 371
362 # This function retrieve the raw file in the working directory 372 # This function retrieve the raw file in the working directory
363 # - if zipfile: unzip the file with its directory tree 373 # - if zipfile: unzip the file with its directory tree
364 # - if singlefiles: set symlink with the good filename 374 # - if singlefiles: set symlink with the good filename
365 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr 375 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
366 retrieveRawfileInTheWorkingDir <- function(singlefile, zipfile, args, prefix = "") { 376 retrieveRawfileInTheWorkingDir <- function(singlefile, zipfile, args, prefix = "") {
367 377
368 if (!(prefix %in% c("", "Positive", "Negative", "MS1", "MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'") 378 if (!(prefix %in% c("", "Positive", "Negative", "MS1", "MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'")
369 379
370 # single - if the file are passed in the command arguments -> refresh singlefile 380 # single - if the file are passed in the command arguments -> refresh singlefile
371 if (!is.null(args[[paste0("singlefile_galaxyPath", prefix)]])) { 381 if (!is.null(args[[paste0("singlefile_galaxyPath", prefix)]])) {
372 singlefile_galaxyPaths <- unlist(strsplit(args[[paste0("singlefile_galaxyPath", prefix)]], "\\|")) 382 singlefile_galaxyPaths <- unlist(strsplit(args[[paste0("singlefile_galaxyPath", prefix)]], "\\|"))
373 singlefile_sampleNames <- unlist(strsplit(args[[paste0("singlefile_sampleName", prefix)]], "\\|")) 383 singlefile_sampleNames <- unlist(strsplit(args[[paste0("singlefile_sampleName", prefix)]], "\\|"))
374 384
375 singlefile <- NULL 385 singlefile <- NULL
376 for (singlefile_galaxyPath_i in seq_len(length(singlefile_galaxyPaths))) { 386 for (singlefile_galaxyPath_i in seq_len(length(singlefile_galaxyPaths))) {
377 singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i] 387 singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i]
378 singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i] 388 singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i]
379 # In case, an url is used to import data within Galaxy 389 # In case, an url is used to import data within Galaxy
380 singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName, "/")), n = 1) 390 singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName, "/")), n = 1)
381 singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath 391 singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath
392 }
393 }
394 # zipfile - if the file are passed in the command arguments -> refresh zipfile
395 if (!is.null(args[[paste0("zipfile", prefix)]]))
396 zipfile <- args[[paste0("zipfile", prefix)]]
397
398 # single
399 if (!is.null(singlefile) && (length("singlefile") > 0)) {
400 files <- vector()
401 for (singlefile_sampleName in names(singlefile)) {
402 singlefile_galaxyPath <- singlefile[[singlefile_sampleName]]
403 if (!file.exists(singlefile_galaxyPath)) {
404 error_message <- paste("Cannot access the sample:", singlefile_sampleName, "located:", singlefile_galaxyPath, ". Please, contact your administrator ... if you have one!")
405 print(error_message)
406 stop(error_message)
382 } 407 }
383 } 408
384 # zipfile - if the file are passed in the command arguments -> refresh zipfile 409 if (!suppressWarnings(try(file.link(singlefile_galaxyPath, singlefile_sampleName), silent = TRUE)))
385 if (!is.null(args[[paste0("zipfile", prefix)]])) 410 file.copy(singlefile_galaxyPath, singlefile_sampleName)
386 zipfile <- args[[paste0("zipfile", prefix)]] 411 files <- c(files, singlefile_sampleName)
387 412 }
388 # single 413 }
389 if (!is.null(singlefile) && (length("singlefile") > 0)) { 414 # zipfile
390 files <- vector() 415 if (!is.null(zipfile) && (zipfile != "")) {
391 for (singlefile_sampleName in names(singlefile)) { 416 if (!file.exists(zipfile)) {
392 singlefile_galaxyPath <- singlefile[[singlefile_sampleName]] 417 error_message <- paste("Cannot access the Zip file:", zipfile, ". Please, contact your administrator ... if you have one!")
393 if (!file.exists(singlefile_galaxyPath)) { 418 print(error_message)
394 error_message <- paste("Cannot access the sample:", singlefile_sampleName, "located:", singlefile_galaxyPath, ". Please, contact your administrator ... if you have one!") 419 stop(error_message)
395 print(error_message); stop(error_message) 420 }
396 } 421 suppressWarnings(unzip(zipfile, unzip = "unzip"))
397 422
398 if (!suppressWarnings(try(file.link(singlefile_galaxyPath, singlefile_sampleName), silent = T))) 423 #get the directory name
399 file.copy(singlefile_galaxyPath, singlefile_sampleName) 424 suppressWarnings(filesInZip <- unzip(zipfile, list = TRUE))
400 files <- c(files, singlefile_sampleName) 425 directories <- unique(unlist(lapply(strsplit(filesInZip$Name, "/"), function(x) x[1])))
401 } 426 directories <- directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir]
402 } 427 directory <- "."
403 # zipfile 428 if (length(directories) == 1) directory <- directories
404 if (!is.null(zipfile) && (zipfile != "")) { 429
405 if (!file.exists(zipfile)) { 430 cat("files_root_directory\t", directory, "\n")
406 error_message <- paste("Cannot access the Zip file:", zipfile, ". Please, contact your administrator ... if you have one!") 431
407 print(error_message) 432 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
408 stop(error_message) 433 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|")
409 } 434 info <- file.info(directory)
410 suppressWarnings(unzip(zipfile, unzip = "unzip")) 435 listed <- list.files(directory[info$isdir], pattern = filepattern, recursive = TRUE, full.names = TRUE)
411 436 files <- c(directory[!info$isdir], listed)
412 #get the directory name 437 exists <- file.exists(files)
413 suppressWarnings(filesInZip <- unzip(zipfile, list = T)) 438 files <- files[exists]
414 directories <- unique(unlist(lapply(strsplit(filesInZip$Name, "/"), function(x) x[1]))) 439
415 directories <- directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] 440 }
416 directory <- "." 441 return(list(zipfile = zipfile, singlefile = singlefile, files = files))
417 if (length(directories) == 1) directory <- directories
418
419 cat("files_root_directory\t", directory, "\n")
420
421 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
422 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|")
423 info <- file.info(directory)
424 listed <- list.files(directory[info$isdir], pattern = filepattern, recursive = TRUE, full.names = TRUE)
425 files <- c(directory[!info$isdir], listed)
426 exists <- file.exists(files)
427 files <- files[exists]
428
429 }
430 return(list(zipfile = zipfile, singlefile = singlefile, files = files))
431
432 } 442 }
433 443
434 444
435 # This function retrieve a xset like object 445 # This function retrieve a xset like object
436 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr 446 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
437 getxcmsSetObject <- function(xobject) { 447 getxcmsSetObject <- function(xobject) {
438 # XCMS 1.x 448 # XCMS 1.x
439 if (class(xobject) == "xcmsSet") 449 if (class(xobject) == "xcmsSet")
440 return(xobject) 450 return(xobject)
441 # XCMS 3.x 451 # XCMS 3.x
442 if (class(xobject) == "XCMSnExp") { 452 if (class(xobject) == "XCMSnExp") {
443 # Get the legacy xcmsSet object 453 # Get the legacy xcmsSet object
444 suppressWarnings(xset <- as(xobject, "xcmsSet")) 454 suppressWarnings(xset <- as(xobject, "xcmsSet"))
445 if (!is.null(xset@phenoData$sample_group)) 455 if (!is.null(xset@phenoData$sample_group))
446 sampclass(xset) <- xset@phenoData$sample_group 456 sampclass(xset) <- xset@phenoData$sample_group
447 else 457 else
448 sampclass(xset) <- "." 458 sampclass(xset) <- "."
449 return(xset) 459 return(xset)
450 } 460 }
451 } 461 }