Mercurial > repos > lecorguille > xcms_merge
comparison lib.r @ 14:5bd125a3f3b0 draft
"planemo upload for repository https://github.com/workflow4metabolomics/xcms commit dcc90f9cf76e6980c0a7d9698c89fab826e7adae"
author | workflow4metabolomics |
---|---|
date | Wed, 07 Apr 2021 12:07:13 +0000 |
parents | 39797c768bba |
children | 58b5a4b6e1da |
comparison
equal
deleted
inserted
replaced
13:39797c768bba | 14:5bd125a3f3b0 |
---|---|
4 #@author G. Le Corguille | 4 #@author G. Le Corguille |
5 # solve an issue with batch if arguments are logical TRUE/FALSE | 5 # solve an issue with batch if arguments are logical TRUE/FALSE |
6 parseCommandArgs <- function(...) { | 6 parseCommandArgs <- function(...) { |
7 args <- batch::parseCommandArgs(...) | 7 args <- batch::parseCommandArgs(...) |
8 for (key in names(args)) { | 8 for (key in names(args)) { |
9 if (args[key] %in% c("TRUE","FALSE")) | 9 if (args[key] %in% c("TRUE", "FALSE")) |
10 args[key] = as.logical(args[key]) | 10 args[key] <- as.logical(args[key]) |
11 } | 11 } |
12 return(args) | 12 return(args) |
13 } | 13 } |
14 | 14 |
15 #@author G. Le Corguille | 15 #@author G. Le Corguille |
16 # This function will | 16 # This function will |
17 # - load the packages | 17 # - load the packages |
18 # - display the sessionInfo | 18 # - display the sessionInfo |
19 loadAndDisplayPackages <- function(pkgs) { | 19 loadAndDisplayPackages <- function(pkgs) { |
20 for(pkg in pkgs) suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE))) | 20 for (pkg in pkgs) suppressPackageStartupMessages(stopifnot(library(pkg, quietly = TRUE, logical.return = TRUE, character.only = TRUE))) |
21 | 21 |
22 sessioninfo = sessionInfo() | 22 sessioninfo <- sessionInfo() |
23 cat(sessioninfo$R.version$version.string,"\n") | 23 cat(sessioninfo$R.version$version.string, "\n") |
24 cat("Main packages:\n") | 24 cat("Main packages:\n") |
25 for (pkg in names(sessioninfo$otherPkgs)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") | 25 for (pkg in names(sessioninfo$otherPkgs)) { |
26 cat(paste(pkg, packageVersion(pkg)), "\t") | |
27 } | |
28 cat("\n") | |
26 cat("Other loaded packages:\n") | 29 cat("Other loaded packages:\n") |
27 for (pkg in names(sessioninfo$loadedOnly)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") | 30 for (pkg in names(sessioninfo$loadedOnly)) { |
31 cat(paste(pkg, packageVersion(pkg)), "\t") | |
32 } | |
33 cat("\n") | |
28 } | 34 } |
29 | 35 |
30 #@author G. Le Corguille | 36 #@author G. Le Corguille |
31 # This function merge several chromBPI or chromTIC into one. | 37 # This function merge several chromBPI or chromTIC into one. |
32 mergeChrom <- function(chrom_merged, chrom) { | 38 mergeChrom <- function(chrom_merged, chrom) { |
41 chromTIC <- NULL | 47 chromTIC <- NULL |
42 chromBPI <- NULL | 48 chromBPI <- NULL |
43 chromTIC_adjusted <- NULL | 49 chromTIC_adjusted <- NULL |
44 chromBPI_adjusted <- NULL | 50 chromBPI_adjusted <- NULL |
45 md5sumList <- NULL | 51 md5sumList <- NULL |
46 for(image in args$images) { | 52 for (image in args$images) { |
47 | 53 |
48 load(image) | 54 load(image) |
49 # Handle infiles | 55 # Handle infiles |
50 if (!exists("singlefile")) singlefile <- NULL | 56 if (!exists("singlefile")) singlefile <- NULL |
51 if (!exists("zipfile")) zipfile <- NULL | 57 if (!exists("zipfile")) zipfile <- NULL |
52 rawFilePath <- retrieveRawfileInTheWorkingDirectory(singlefile, zipfile, args) | 58 rawFilePath <- retrieveRawfileInTheWorkingDir(singlefile, zipfile, args) |
53 zipfile <- rawFilePath$zipfile | 59 zipfile <- rawFilePath$zipfile |
54 singlefile <- rawFilePath$singlefile | 60 singlefile <- rawFilePath$singlefile |
55 | 61 |
56 if (exists("raw_data")) xdata <- raw_data | 62 if (exists("raw_data")) xdata <- raw_data |
57 if (!exists("xdata")) stop("\n\nERROR: The RData doesn't contain any object called 'xdata'. This RData should have been created by an old version of XMCS 2.*") | 63 if (!exists("xdata")) stop("\n\nERROR: The RData doesn't contain any object called 'xdata'. This RData should have been created by an old version of XMCS 2.*") |
58 | 64 |
59 cat(sampleNamesList$sampleNamesOrigin,"\n") | 65 cat(sampleNamesList$sampleNamesOrigin, "\n") |
60 | 66 |
61 if (!exists("xdata_merged")) { | 67 if (!exists("xdata_merged")) { |
62 xdata_merged <- xdata | 68 xdata_merged <- xdata |
63 singlefile_merged <- singlefile | 69 singlefile_merged <- singlefile |
64 md5sumList_merged <- md5sumList | 70 md5sumList_merged <- md5sumList |
66 chromTIC_merged <- chromTIC | 72 chromTIC_merged <- chromTIC |
67 chromBPI_merged <- chromBPI | 73 chromBPI_merged <- chromBPI |
68 chromTIC_adjusted_merged <- chromTIC_adjusted | 74 chromTIC_adjusted_merged <- chromTIC_adjusted |
69 chromBPI_adjusted_merged <- chromBPI_adjusted | 75 chromBPI_adjusted_merged <- chromBPI_adjusted |
70 } else { | 76 } else { |
71 if (is(xdata, "XCMSnExp")) xdata_merged <- c(xdata_merged,xdata) | 77 if (is(xdata, "XCMSnExp")) xdata_merged <- c(xdata_merged, xdata) |
72 else if (is(xdata, "OnDiskMSnExp")) xdata_merged <- xcms:::.concatenate_OnDiskMSnExp(xdata_merged,xdata) | 78 else if (is(xdata, "OnDiskMSnExp")) xdata_merged <- xcms:::.concatenate_OnDiskMSnExp(xdata_merged, xdata) |
73 else stop("\n\nERROR: The RData either a OnDiskMSnExp object called raw_data or a XCMSnExp object called xdata") | 79 else stop("\n\nERROR: The RData either a OnDiskMSnExp object called raw_data or a XCMSnExp object called xdata") |
74 | 80 |
75 singlefile_merged <- c(singlefile_merged,singlefile) | 81 singlefile_merged <- c(singlefile_merged, singlefile) |
76 md5sumList_merged$origin <- rbind(md5sumList_merged$origin,md5sumList$origin) | 82 md5sumList_merged$origin <- rbind(md5sumList_merged$origin, md5sumList$origin) |
77 sampleNamesList_merged$sampleNamesOrigin <- c(sampleNamesList_merged$sampleNamesOrigin,sampleNamesList$sampleNamesOrigin) | 83 sampleNamesList_merged$sampleNamesOrigin <- c(sampleNamesList_merged$sampleNamesOrigin, sampleNamesList$sampleNamesOrigin) |
78 sampleNamesList_merged$sampleNamesMakeNames <- c(sampleNamesList_merged$sampleNamesMakeNames,sampleNamesList$sampleNamesMakeNames) | 84 sampleNamesList_merged$sampleNamesMakeNames <- c(sampleNamesList_merged$sampleNamesMakeNames, sampleNamesList$sampleNamesMakeNames) |
79 chromTIC_merged <- mergeChrom(chromTIC_merged, chromTIC) | 85 chromTIC_merged <- mergeChrom(chromTIC_merged, chromTIC) |
80 chromBPI_merged <- mergeChrom(chromBPI_merged, chromBPI) | 86 chromBPI_merged <- mergeChrom(chromBPI_merged, chromBPI) |
81 chromTIC_adjusted_merged <- mergeChrom(chromTIC_adjusted_merged, chromTIC_adjusted) | 87 chromTIC_adjusted_merged <- mergeChrom(chromTIC_adjusted_merged, chromTIC_adjusted) |
82 chromBPI_adjusted_merged <- mergeChrom(chromBPI_adjusted_merged, chromBPI_adjusted) | 88 chromBPI_adjusted_merged <- mergeChrom(chromBPI_adjusted_merged, chromBPI_adjusted) |
83 } | 89 } |
89 sampleNamesList <- sampleNamesList_merged; rm(sampleNamesList_merged) | 95 sampleNamesList <- sampleNamesList_merged; rm(sampleNamesList_merged) |
90 | 96 |
91 if (!is.null(args$sampleMetadata)) { | 97 if (!is.null(args$sampleMetadata)) { |
92 cat("\tXSET PHENODATA SETTING...\n") | 98 cat("\tXSET PHENODATA SETTING...\n") |
93 sampleMetadataFile <- args$sampleMetadata | 99 sampleMetadataFile <- args$sampleMetadata |
94 sampleMetadata <- getDataFrameFromFile(sampleMetadataFile, header=F) | 100 sampleMetadata <- getDataFrameFromFile(sampleMetadataFile, header = F) |
95 xdata@phenoData@data$sample_group=sampleMetadata$V2[match(xdata@phenoData@data$sample_name,sampleMetadata$V1)] | 101 xdata@phenoData@data$sample_group <- sampleMetadata$V2[match(xdata@phenoData@data$sample_name, sampleMetadata$V1)] |
96 | 102 |
97 if (any(is.na(pData(xdata)$sample_group))) { | 103 if (any(is.na(pData(xdata)$sample_group))) { |
98 sample_missing <- pData(xdata)$sample_name[is.na(pData(xdata)$sample_group)] | 104 sample_missing <- pData(xdata)$sample_name[is.na(pData(xdata)$sample_group)] |
99 error_message <- paste("Those samples are missing in your sampleMetadata:", paste(sample_missing, collapse=" ")) | 105 error_message <- paste("Those samples are missing in your sampleMetadata:", paste(sample_missing, collapse = " ")) |
100 print(error_message) | 106 print(error_message) |
101 stop(error_message) | 107 stop(error_message) |
102 } | 108 } |
103 } | 109 } |
104 | 110 |
105 if (!is.null(chromTIC_merged)) { chromTIC <- chromTIC_merged; chromTIC@phenoData <- xdata@phenoData } | 111 if (!is.null(chromTIC_merged)) { |
106 if (!is.null(chromBPI_merged)) { chromBPI <- chromBPI_merged; chromBPI@phenoData <- xdata@phenoData } | 112 chromTIC <- chromTIC_merged; chromTIC@phenoData <- xdata@phenoData |
107 if (!is.null(chromTIC_adjusted_merged)) { chromTIC_adjusted <- chromTIC_adjusted_merged; chromTIC_adjusted@phenoData <- xdata@phenoData } | 113 } |
108 if (!is.null(chromBPI_adjusted_merged)) { chromBPI_adjusted <- chromBPI_adjusted_merged; chromBPI_adjusted@phenoData <- xdata@phenoData } | 114 if (!is.null(chromBPI_merged)) { |
109 | 115 chromBPI <- chromBPI_merged; chromBPI@phenoData <- xdata@phenoData |
110 return(list("xdata"=xdata, "singlefile"=singlefile, "md5sumList"=md5sumList,"sampleNamesList"=sampleNamesList, "chromTIC"=chromTIC, "chromBPI"=chromBPI, "chromTIC_adjusted"=chromTIC_adjusted, "chromBPI_adjusted"=chromBPI_adjusted)) | 116 } |
117 if (!is.null(chromTIC_adjusted_merged)) { | |
118 chromTIC_adjusted <- chromTIC_adjusted_merged; chromTIC_adjusted@phenoData <- xdata@phenoData | |
119 } | |
120 if (!is.null(chromBPI_adjusted_merged)) { | |
121 chromBPI_adjusted <- chromBPI_adjusted_merged; chromBPI_adjusted@phenoData <- xdata@phenoData | |
122 } | |
123 | |
124 return(list("xdata" = xdata, "singlefile" = singlefile, "md5sumList" = md5sumList, "sampleNamesList" = sampleNamesList, "chromTIC" = chromTIC, "chromBPI" = chromBPI, "chromTIC_adjusted" = chromTIC_adjusted, "chromBPI_adjusted" = chromBPI_adjusted)) | |
111 } | 125 } |
112 | 126 |
113 #@author G. Le Corguille | 127 #@author G. Le Corguille |
114 # This function convert if it is required the Retention Time in minutes | 128 # This function convert if it is required the Retention Time in minutes |
115 RTSecondToMinute <- function(variableMetadata, convertRTMinute) { | 129 RTSecondToMinute <- function(variableMetadata, convertRTMinute) { |
116 if (convertRTMinute){ | 130 if (convertRTMinute) { |
117 #converting the retention times (seconds) into minutes | 131 #converting the retention times (seconds) into minutes |
118 print("converting the retention times into minutes in the variableMetadata") | 132 print("converting the retention times into minutes in the variableMetadata") |
119 variableMetadata[,"rt"] <- variableMetadata[,"rt"]/60 | 133 variableMetadata[, "rt"] <- variableMetadata[, "rt"] / 60 |
120 variableMetadata[,"rtmin"] <- variableMetadata[,"rtmin"]/60 | 134 variableMetadata[, "rtmin"] <- variableMetadata[, "rtmin"] / 60 |
121 variableMetadata[,"rtmax"] <- variableMetadata[,"rtmax"]/60 | 135 variableMetadata[, "rtmax"] <- variableMetadata[, "rtmax"] / 60 |
122 } | 136 } |
123 return (variableMetadata) | 137 return(variableMetadata) |
124 } | 138 } |
125 | 139 |
126 #@author G. Le Corguille | 140 #@author G. Le Corguille |
127 # This function format ions identifiers | 141 # This function format ions identifiers |
128 formatIonIdentifiers <- function(variableMetadata, numDigitsRT=0, numDigitsMZ=0) { | 142 formatIonIdentifiers <- function(variableMetadata, numDigitsRT = 0, numDigitsMZ = 0) { |
129 splitDeco <- strsplit(as.character(variableMetadata$name),"_") | 143 splitDeco <- strsplit(as.character(variableMetadata$name), "_") |
130 idsDeco <- sapply(splitDeco, function(x) { deco=unlist(x)[2]; if (is.na(deco)) return ("") else return(paste0("_",deco)) }) | 144 idsDeco <- sapply(splitDeco, |
131 namecustom <- make.unique(paste0("M",round(variableMetadata[,"mz"],numDigitsMZ),"T",round(variableMetadata[,"rt"],numDigitsRT),idsDeco)) | 145 function(x) { |
132 variableMetadata <- cbind(name=variableMetadata$name, namecustom=namecustom, variableMetadata[,!(colnames(variableMetadata) %in% c("name"))]) | 146 deco <- unlist(x)[2]; if (is.na(deco)) return("") else return(paste0("_", deco)) |
147 } | |
148 ) | |
149 namecustom <- make.unique(paste0("M", round(variableMetadata[, "mz"], numDigitsMZ), "T", round(variableMetadata[, "rt"], numDigitsRT), idsDeco)) | |
150 variableMetadata <- cbind(name = variableMetadata$name, namecustom = namecustom, variableMetadata[, !(colnames(variableMetadata) %in% c("name"))]) | |
133 return(variableMetadata) | 151 return(variableMetadata) |
134 } | 152 } |
135 | 153 |
136 #@author G. Le Corguille | 154 #@author G. Le Corguille |
137 # This function convert the remain NA to 0 in the dataMatrix | 155 # This function convert the remain NA to 0 in the dataMatrix |
138 naTOzeroDataMatrix <- function(dataMatrix, naTOzero) { | 156 naTOzeroDataMatrix <- function(dataMatrix, naTOzero) { |
139 if (naTOzero){ | 157 if (naTOzero) { |
140 dataMatrix[is.na(dataMatrix)] <- 0 | 158 dataMatrix[is.na(dataMatrix)] <- 0 |
141 } | 159 } |
142 return (dataMatrix) | 160 return(dataMatrix) |
143 } | 161 } |
144 | 162 |
145 #@author G. Le Corguille | 163 #@author G. Le Corguille |
146 # Draw the plotChromPeakDensity 3 per page in a pdf file | 164 # Draw the plotChromPeakDensity 3 per page in a pdf file |
147 getPlotChromPeakDensity <- function(xdata, param = NULL, mzdigit=4) { | 165 getPlotChromPeakDensity <- function(xdata, param = NULL, mzdigit = 4) { |
148 pdf(file="plotChromPeakDensity.pdf", width=16, height=12) | 166 pdf(file = "plotChromPeakDensity.pdf", width = 16, height = 12) |
149 | 167 |
150 par(mfrow = c(3, 1), mar = c(4, 4, 1, 0.5)) | 168 par(mfrow = c(3, 1), mar = c(4, 4, 1, 0.5)) |
151 | 169 |
152 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1") | 170 if (length(unique(xdata$sample_group)) < 10) { |
171 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1") | |
172 }else{ | |
173 group_colors <- hcl.colors(length(unique(xdata$sample_group)), palette = "Dark 3") | |
174 } | |
153 names(group_colors) <- unique(xdata$sample_group) | 175 names(group_colors) <- unique(xdata$sample_group) |
176 col_per_samp <- as.character(xdata$sample_group) | |
177 for (i in seq_len(length(group_colors))) { | |
178 col_per_samp[col_per_samp == (names(group_colors)[i])] <- group_colors[i] | |
179 } | |
154 | 180 |
155 xlim <- c(min(featureDefinitions(xdata)$rtmin), max(featureDefinitions(xdata)$rtmax)) | 181 xlim <- c(min(featureDefinitions(xdata)$rtmin), max(featureDefinitions(xdata)$rtmax)) |
156 for (i in 1:nrow(featureDefinitions(xdata))) { | 182 for (i in seq_len(nrow(featureDefinitions(xdata)))) { |
157 mzmin = featureDefinitions(xdata)[i,]$mzmin | 183 mzmin <- featureDefinitions(xdata)[i, ]$mzmin |
158 mzmax = featureDefinitions(xdata)[i,]$mzmax | 184 mzmax <- featureDefinitions(xdata)[i, ]$mzmax |
159 plotChromPeakDensity(xdata, param = param, mz=c(mzmin,mzmax), col=group_colors, pch=16, xlim=xlim, main=paste(round(mzmin,mzdigit),round(mzmax,mzdigit))) | 185 plotChromPeakDensity(xdata, param = param, mz = c(mzmin, mzmax), col = col_per_samp, pch = 16, xlim = xlim, main = paste(round(mzmin, mzdigit), round(mzmax, mzdigit))) |
160 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) | 186 legend("topright", legend = names(group_colors), col = group_colors, cex = 0.8, lty = 1) |
161 } | 187 } |
162 | 188 |
163 dev.off() | 189 dev.off() |
164 } | 190 } |
165 | 191 |
166 #@author G. Le Corguille | 192 #@author G. Le Corguille |
167 # Draw the plotChromPeakDensity 3 per page in a pdf file | 193 # Draw the plotChromPeakDensity 3 per page in a pdf file |
168 getPlotAdjustedRtime <- function(xdata) { | 194 getPlotAdjustedRtime <- function(xdata) { |
169 | 195 |
170 pdf(file="raw_vs_adjusted_rt.pdf", width=16, height=12) | 196 pdf(file = "raw_vs_adjusted_rt.pdf", width = 16, height = 12) |
171 | 197 |
172 # Color by group | 198 # Color by group |
173 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1") | 199 if (length(unique(xdata$sample_group)) < 10) { |
200 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1") | |
201 } else { | |
202 group_colors <- hcl.colors(length(unique(xdata$sample_group)), palette = "Dark 3") | |
203 } | |
174 if (length(group_colors) > 1) { | 204 if (length(group_colors) > 1) { |
175 names(group_colors) <- unique(xdata$sample_group) | 205 names(group_colors) <- unique(xdata$sample_group) |
176 plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group]) | 206 plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group]) |
177 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) | 207 legend("topright", legend = names(group_colors), col = group_colors, cex = 0.8, lty = 1) |
178 } | 208 } |
179 | 209 |
180 # Color by sample | 210 # Color by sample |
181 plotAdjustedRtime(xdata, col = rainbow(length(xdata@phenoData@data$sample_name))) | 211 plotAdjustedRtime(xdata, col = rainbow(length(xdata@phenoData@data$sample_name))) |
182 legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1) | 212 legend("topright", legend = xdata@phenoData@data$sample_name, col = rainbow(length(xdata@phenoData@data$sample_name)), cex = 0.8, lty = 1) |
183 | 213 |
184 dev.off() | 214 dev.off() |
185 } | 215 } |
186 | 216 |
187 #@author G. Le Corguille | 217 #@author G. Le Corguille |
188 # value: intensity values to be used into, maxo or intb | 218 # value: intensity values to be used into, maxo or intb |
189 getPeaklistW4M <- function(xdata, intval="into", convertRTMinute=F, numDigitsMZ=4, numDigitsRT=0, naTOzero=T, variableMetadataOutput, dataMatrixOutput, sampleNamesList) { | 219 getPeaklistW4M <- function(xdata, intval = "into", convertRTMinute = F, numDigitsMZ = 4, numDigitsRT = 0, naTOzero = T, variableMetadataOutput, dataMatrixOutput, sampleNamesList) { |
190 dataMatrix <- featureValues(xdata, method="medret", value=intval) | 220 dataMatrix <- featureValues(xdata, method = "medret", value = intval) |
191 colnames(dataMatrix) <- make.names(tools::file_path_sans_ext(colnames(dataMatrix))) | 221 colnames(dataMatrix) <- make.names(tools::file_path_sans_ext(colnames(dataMatrix))) |
192 dataMatrix = cbind(name=groupnames(xdata), dataMatrix) | 222 dataMatrix <- cbind(name = groupnames(xdata), dataMatrix) |
193 variableMetadata <- featureDefinitions(xdata) | 223 variableMetadata <- featureDefinitions(xdata) |
194 colnames(variableMetadata)[1] = "mz"; colnames(variableMetadata)[4] = "rt" | 224 colnames(variableMetadata)[1] <- "mz"; colnames(variableMetadata)[4] <- "rt" |
195 variableMetadata = data.frame(name=groupnames(xdata), variableMetadata) | 225 variableMetadata <- data.frame(name = groupnames(xdata), variableMetadata) |
196 | 226 |
197 variableMetadata <- RTSecondToMinute(variableMetadata, convertRTMinute) | 227 variableMetadata <- RTSecondToMinute(variableMetadata, convertRTMinute) |
198 variableMetadata <- formatIonIdentifiers(variableMetadata, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ) | 228 variableMetadata <- formatIonIdentifiers(variableMetadata, numDigitsRT = numDigitsRT, numDigitsMZ = numDigitsMZ) |
199 dataMatrix <- naTOzeroDataMatrix(dataMatrix, naTOzero) | 229 dataMatrix <- naTOzeroDataMatrix(dataMatrix, naTOzero) |
200 | 230 |
201 # FIX: issue when the vector at peakidx is too long and is written in a new line during the export | 231 # FIX: issue when the vector at peakidx is too long and is written in a new line during the export |
202 variableMetadata[,"peakidx"] <- vapply(variableMetadata[,"peakidx"], FUN = paste, FUN.VALUE = character(1), collapse = ",") | 232 variableMetadata[, "peakidx"] <- vapply(variableMetadata[, "peakidx"], FUN = paste, FUN.VALUE = character(1), collapse = ",") |
203 | 233 |
204 write.table(variableMetadata, file=variableMetadataOutput,sep="\t",quote=F,row.names=F) | 234 write.table(variableMetadata, file = variableMetadataOutput, sep = "\t", quote = F, row.names = F) |
205 write.table(dataMatrix, file=dataMatrixOutput,sep="\t",quote=F,row.names=F) | 235 write.table(dataMatrix, file = dataMatrixOutput, sep = "\t", quote = F, row.names = F) |
206 | 236 |
207 } | 237 } |
208 | 238 |
209 #@author G. Le Corguille | 239 #@author G. Le Corguille |
210 # It allow different of field separators | 240 # It allow different of field separators |
211 getDataFrameFromFile <- function(filename, header=T) { | 241 getDataFrameFromFile <- function(filename, header = T) { |
212 myDataFrame <- read.table(filename, header=header, sep=";", stringsAsFactors=F) | 242 myDataFrame <- read.table(filename, header = header, sep = ";", stringsAsFactors = F) |
213 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header=header, sep="\t", stringsAsFactors=F) | 243 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header = header, sep = "\t", stringsAsFactors = F) |
214 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header=header, sep=",", stringsAsFactors=F) | 244 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header = header, sep = ",", stringsAsFactors = F) |
215 if (ncol(myDataFrame) < 2) { | 245 if (ncol(myDataFrame) < 2) { |
216 error_message="Your tabular file seems not well formatted. The column separators accepted are ; , and tabulation" | 246 error_message <- "Your tabular file seems not well formatted. The column separators accepted are ; , and tabulation" |
217 print(error_message) | 247 print(error_message) |
218 stop(error_message) | 248 stop(error_message) |
219 } | 249 } |
220 return(myDataFrame) | 250 return(myDataFrame) |
221 } | 251 } |
222 | 252 |
223 #@author G. Le Corguille | 253 #@author G. Le Corguille |
224 # Draw the BPI and TIC graphics | 254 # Draw the BPI and TIC graphics |
225 # colored by sample names or class names | 255 # colored by sample names or class names |
226 getPlotChromatogram <- function(chrom, xdata, pdfname="Chromatogram.pdf", aggregationFun = "max") { | 256 getPlotChromatogram <- function(chrom, xdata, pdfname = "Chromatogram.pdf", aggregationFun = "max") { |
227 | 257 |
228 if (aggregationFun == "sum") | 258 if (aggregationFun == "sum") |
229 type="Total Ion Chromatograms" | 259 type <- "Total Ion Chromatograms" |
230 else | 260 else |
231 type="Base Peak Intensity Chromatograms" | 261 type <- "Base Peak Intensity Chromatograms" |
232 | 262 |
233 adjusted="Raw" | 263 adjusted <- "Raw" |
234 if (hasAdjustedRtime(xdata)) | 264 if (hasAdjustedRtime(xdata)) |
235 adjusted="Adjusted" | 265 adjusted <- "Adjusted" |
236 | 266 |
237 main <- paste(type,":",adjusted,"data") | 267 main <- paste(type, ":", adjusted, "data") |
238 | 268 |
239 pdf(pdfname, width=16, height=10) | 269 pdf(pdfname, width = 16, height = 10) |
240 | 270 |
241 # Color by group | 271 # Color by group |
242 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1") | 272 if (length(unique(xdata$sample_group)) < 10) { |
273 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1") | |
274 }else{ | |
275 group_colors <- hcl.colors(length(unique(xdata$sample_group)), palette = "Dark 3") | |
276 } | |
243 if (length(group_colors) > 1) { | 277 if (length(group_colors) > 1) { |
244 names(group_colors) <- unique(xdata$sample_group) | 278 names(group_colors) <- unique(xdata$sample_group) |
245 plot(chrom, col = group_colors[as.factor(chrom$sample_group)], main=main, peakType = "none") | 279 plot(chrom, col = group_colors[chrom$sample_group], main = main, peakType = "none") |
246 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) | 280 legend("topright", legend = names(group_colors), col = group_colors, cex = 0.8, lty = 1) |
247 } | 281 } |
248 | 282 |
249 # Color by sample | 283 # Color by sample |
250 plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main=main, peakType = "none") | 284 plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main = main, peakType = "none") |
251 legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1) | 285 legend("topright", legend = xdata@phenoData@data$sample_name, col = rainbow(length(xdata@phenoData@data$sample_name)), cex = 0.8, lty = 1) |
252 | 286 |
253 dev.off() | 287 dev.off() |
254 } | 288 } |
255 | 289 |
256 | 290 |
257 # Get the polarities from all the samples of a condition | 291 # Get the polarities from all the samples of a condition |
258 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM | 292 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM |
259 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM | 293 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM |
260 getSampleMetadata <- function(xdata=NULL, sampleMetadataOutput="sampleMetadata.tsv") { | 294 getSampleMetadata <- function(xdata = NULL, sampleMetadataOutput = "sampleMetadata.tsv") { |
261 cat("Creating the sampleMetadata file...\n") | 295 cat("Creating the sampleMetadata file...\n") |
262 | 296 |
263 #Create the sampleMetada dataframe | 297 #Create the sampleMetada dataframe |
264 sampleMetadata <- xdata@phenoData@data | 298 sampleMetadata <- xdata@phenoData@data |
265 rownames(sampleMetadata) <- NULL | 299 rownames(sampleMetadata) <- NULL |
269 sampleNamesMakeNames <- make.names(sampleNamesOrigin) | 303 sampleNamesMakeNames <- make.names(sampleNamesOrigin) |
270 | 304 |
271 if (any(duplicated(sampleNamesMakeNames))) { | 305 if (any(duplicated(sampleNamesMakeNames))) { |
272 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) | 306 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) |
273 for (sampleName in sampleNamesOrigin) { | 307 for (sampleName in sampleNamesOrigin) { |
274 write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr()) | 308 write(paste(sampleName, "\t->\t", make.names(sampleName)), stderr()) |
275 } | 309 } |
276 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") | 310 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") |
277 } | 311 } |
278 | 312 |
279 if (!all(sampleNamesOrigin == sampleNamesMakeNames)) { | 313 if (!all(sampleNamesOrigin == sampleNamesMakeNames)) { |
280 cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n") | 314 cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n") |
281 for (sampleName in sampleNamesOrigin) { | 315 for (sampleName in sampleNamesOrigin) { |
282 cat(paste(sampleName,"\t->\t",make.names(sampleName),"\n")) | 316 cat(paste(sampleName, "\t->\t", make.names(sampleName), "\n")) |
283 } | 317 } |
284 } | 318 } |
285 | 319 |
286 sampleMetadata$sample_name <- sampleNamesMakeNames | 320 sampleMetadata$sample_name <- sampleNamesMakeNames |
287 | 321 |
288 | 322 |
289 #For each sample file, the following actions are done | 323 #For each sample file, the following actions are done |
290 for (fileIdx in 1:length(fileNames(xdata))) { | 324 for (fileIdx in seq_len(length(fileNames(xdata)))) { |
291 #Check if the file is in the CDF format | 325 #Check if the file is in the CDF format |
292 if (!mzR:::netCDFIsFile(fileNames(xdata))) { | 326 if (!mzR:::netCDFIsFile(fileNames(xdata))) { |
293 | 327 |
294 # If the column isn't exist, with add one filled with NA | 328 # If the column isn't exist, with add one filled with NA |
295 if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity <- NA | 329 if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity <- NA |
296 | 330 |
297 #Extract the polarity (a list of polarities) | 331 #Extract the polarity (a list of polarities) |
298 polarity <- fData(xdata)[fData(xdata)$fileIdx == fileIdx,"polarity"] | 332 polarity <- fData(xdata)[fData(xdata)$fileIdx == fileIdx, "polarity"] |
299 #Verify if all the scans have the same polarity | 333 #Verify if all the scans have the same polarity |
300 uniq_list <- unique(polarity) | 334 uniq_list <- unique(polarity) |
301 if (length(uniq_list)>1){ | 335 if (length(uniq_list) > 1) { |
302 polarity <- "mixed" | 336 polarity <- "mixed" |
303 } else { | 337 } else { |
304 polarity <- as.character(uniq_list) | 338 polarity <- as.character(uniq_list) |
305 } | 339 } |
306 | 340 |
308 sampleMetadata$polarity[fileIdx] <- polarity | 342 sampleMetadata$polarity[fileIdx] <- polarity |
309 } | 343 } |
310 | 344 |
311 } | 345 } |
312 | 346 |
313 write.table(sampleMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=sampleMetadataOutput) | 347 write.table(sampleMetadata, sep = "\t", quote = FALSE, row.names = FALSE, file = sampleMetadataOutput) |
314 | 348 |
315 return(list("sampleNamesOrigin"=sampleNamesOrigin, "sampleNamesMakeNames"=sampleNamesMakeNames)) | 349 return(list("sampleNamesOrigin" = sampleNamesOrigin, "sampleNamesMakeNames" = sampleNamesMakeNames)) |
316 | 350 |
317 } | 351 } |
318 | 352 |
319 | 353 |
320 # This function will compute MD5 checksum to check the data integrity | 354 # This function will compute MD5 checksum to check the data integrity |
321 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | 355 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr |
322 getMd5sum <- function (files) { | 356 getMd5sum <- function(files) { |
323 cat("Compute md5 checksum...\n") | 357 cat("Compute md5 checksum...\n") |
324 library(tools) | 358 library(tools) |
325 return(as.matrix(md5sum(files))) | 359 return(as.matrix(md5sum(files))) |
326 } | 360 } |
327 | 361 |
328 # This function retrieve the raw file in the working directory | 362 # This function retrieve the raw file in the working directory |
329 # - if zipfile: unzip the file with its directory tree | 363 # - if zipfile: unzip the file with its directory tree |
330 # - if singlefiles: set symlink with the good filename | 364 # - if singlefiles: set symlink with the good filename |
331 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | 365 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr |
332 retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile, args, prefix="") { | 366 retrieveRawfileInTheWorkingDir <- function(singlefile, zipfile, args, prefix = "") { |
333 | 367 |
334 if (!(prefix %in% c("","Positive","Negative","MS1","MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'") | 368 if (!(prefix %in% c("", "Positive", "Negative", "MS1", "MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'") |
335 | 369 |
336 # single - if the file are passed in the command arguments -> refresh singlefile | 370 # single - if the file are passed in the command arguments -> refresh singlefile |
337 if (!is.null(args[[paste0("singlefile_galaxyPath",prefix)]])) { | 371 if (!is.null(args[[paste0("singlefile_galaxyPath", prefix)]])) { |
338 singlefile_galaxyPaths <- unlist(strsplit(args[[paste0("singlefile_galaxyPath",prefix)]],"\\|")) | 372 singlefile_galaxyPaths <- unlist(strsplit(args[[paste0("singlefile_galaxyPath", prefix)]], "\\|")) |
339 singlefile_sampleNames <- unlist(strsplit(args[[paste0("singlefile_sampleName",prefix)]],"\\|")) | 373 singlefile_sampleNames <- unlist(strsplit(args[[paste0("singlefile_sampleName", prefix)]], "\\|")) |
340 | 374 |
341 singlefile <- NULL | 375 singlefile <- NULL |
342 for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) { | 376 for (singlefile_galaxyPath_i in seq_len(length(singlefile_galaxyPaths))) { |
343 singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i] | 377 singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i] |
344 singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i] | 378 singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i] |
345 # In case, an url is used to import data within Galaxy | 379 # In case, an url is used to import data within Galaxy |
346 singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName,"/")), n=1) | 380 singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName, "/")), n = 1) |
347 singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath | 381 singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath |
348 } | 382 } |
349 } | 383 } |
350 # zipfile - if the file are passed in the command arguments -> refresh zipfile | 384 # zipfile - if the file are passed in the command arguments -> refresh zipfile |
351 if (!is.null(args[[paste0("zipfile",prefix)]])) | 385 if (!is.null(args[[paste0("zipfile", prefix)]])) |
352 zipfile <- args[[paste0("zipfile",prefix)]] | 386 zipfile <- args[[paste0("zipfile", prefix)]] |
353 | 387 |
354 # single | 388 # single |
355 if(!is.null(singlefile) && (length("singlefile")>0)) { | 389 if (!is.null(singlefile) && (length("singlefile") > 0)) { |
356 files <- vector() | 390 files <- vector() |
357 for (singlefile_sampleName in names(singlefile)) { | 391 for (singlefile_sampleName in names(singlefile)) { |
358 singlefile_galaxyPath <- singlefile[[singlefile_sampleName]] | 392 singlefile_galaxyPath <- singlefile[[singlefile_sampleName]] |
359 if(!file.exists(singlefile_galaxyPath)){ | 393 if (!file.exists(singlefile_galaxyPath)) { |
360 error_message <- paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!") | 394 error_message <- paste("Cannot access the sample:", singlefile_sampleName, "located:", singlefile_galaxyPath, ". Please, contact your administrator ... if you have one!") |
361 print(error_message); stop(error_message) | 395 print(error_message); stop(error_message) |
362 } | 396 } |
363 | 397 |
364 if (!suppressWarnings( try (file.link(singlefile_galaxyPath, singlefile_sampleName), silent=T))) | 398 if (!suppressWarnings(try(file.link(singlefile_galaxyPath, singlefile_sampleName), silent = T))) |
365 file.copy(singlefile_galaxyPath, singlefile_sampleName) | 399 file.copy(singlefile_galaxyPath, singlefile_sampleName) |
366 files <- c(files, singlefile_sampleName) | 400 files <- c(files, singlefile_sampleName) |
367 } | 401 } |
368 } | 402 } |
369 # zipfile | 403 # zipfile |
370 if(!is.null(zipfile) && (zipfile != "")) { | 404 if (!is.null(zipfile) && (zipfile != "")) { |
371 if(!file.exists(zipfile)){ | 405 if (!file.exists(zipfile)) { |
372 error_message <- paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!") | 406 error_message <- paste("Cannot access the Zip file:", zipfile, ". Please, contact your administrator ... if you have one!") |
373 print(error_message) | 407 print(error_message) |
374 stop(error_message) | 408 stop(error_message) |
375 } | 409 } |
376 suppressWarnings(unzip(zipfile, unzip="unzip")) | 410 suppressWarnings(unzip(zipfile, unzip = "unzip")) |
377 | 411 |
378 #get the directory name | 412 #get the directory name |
379 suppressWarnings(filesInZip <- unzip(zipfile, list=T)) | 413 suppressWarnings(filesInZip <- unzip(zipfile, list = T)) |
380 directories <- unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))) | 414 directories <- unique(unlist(lapply(strsplit(filesInZip$Name, "/"), function(x) x[1]))) |
381 directories <- directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] | 415 directories <- directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] |
382 directory <- "." | 416 directory <- "." |
383 if (length(directories) == 1) directory <- directories | 417 if (length(directories) == 1) directory <- directories |
384 | 418 |
385 cat("files_root_directory\t",directory,"\n") | 419 cat("files_root_directory\t", directory, "\n") |
386 | 420 |
387 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") | 421 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") |
388 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|") | 422 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|") |
389 info <- file.info(directory) | 423 info <- file.info(directory) |
390 listed <- list.files(directory[info$isdir], pattern=filepattern,recursive=TRUE, full.names=TRUE) | 424 listed <- list.files(directory[info$isdir], pattern = filepattern, recursive = TRUE, full.names = TRUE) |
391 files <- c(directory[!info$isdir], listed) | 425 files <- c(directory[!info$isdir], listed) |
392 exists <- file.exists(files) | 426 exists <- file.exists(files) |
393 files <- files[exists] | 427 files <- files[exists] |
394 | 428 |
395 } | 429 } |
396 return(list(zipfile=zipfile, singlefile=singlefile, files=files)) | 430 return(list(zipfile = zipfile, singlefile = singlefile, files = files)) |
397 | 431 |
398 } | 432 } |
399 | 433 |
400 | 434 |
401 # This function retrieve a xset like object | 435 # This function retrieve a xset like object |
402 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | 436 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr |
403 getxcmsSetObject <- function(xobject) { | 437 getxcmsSetObject <- function(xobject) { |
404 # XCMS 1.x | 438 # XCMS 1.x |
405 if (class(xobject) == "xcmsSet") | 439 if (class(xobject) == "xcmsSet") |
406 return (xobject) | 440 return(xobject) |
407 # XCMS 3.x | 441 # XCMS 3.x |
408 if (class(xobject) == "XCMSnExp") { | 442 if (class(xobject) == "XCMSnExp") { |
409 # Get the legacy xcmsSet object | 443 # Get the legacy xcmsSet object |
410 suppressWarnings(xset <- as(xobject, 'xcmsSet')) | 444 suppressWarnings(xset <- as(xobject, "xcmsSet")) |
411 if (!is.null(xset@phenoData$sample_group)) | 445 if (!is.null(xset@phenoData$sample_group)) |
412 sampclass(xset) <- xset@phenoData$sample_group | 446 sampclass(xset) <- xset@phenoData$sample_group |
413 else | 447 else |
414 sampclass(xset) <- "." | 448 sampclass(xset) <- "." |
415 return (xset) | 449 return(xset) |
416 } | 450 } |
417 } | 451 } |