Mercurial > repos > workflow4metabolomics > camera_groupcorr
comparison lib.r @ 0:cb57be5de070 draft default tip
planemo upload commit 24d44ee26b7c23380c2b42fae2f7f6e58472100d
| author | workflow4metabolomics |
|---|---|
| date | Sun, 24 Nov 2024 21:29:48 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:cb57be5de070 |
|---|---|
| 1 # lib.r | |
| 2 | |
| 3 # @author G. Le Corguille | |
| 4 # solve an issue with batch if arguments are logical TRUE/FALSE | |
| 5 parseCommandArgs <- function(...) { | |
| 6 args <- batch::parseCommandArgs(...) | |
| 7 for (key in names(args)) { | |
| 8 if (args[key] %in% c("TRUE", "FALSE")) { | |
| 9 args[key] <- as.logical(args[key]) | |
| 10 } | |
| 11 } | |
| 12 return(args) | |
| 13 } | |
| 14 | |
| 15 # @author G. Le Corguille | |
| 16 # This function will | |
| 17 # - load the packages | |
| 18 # - display the sessionInfo | |
| 19 loadAndDisplayPackages <- function(pkgs) { | |
| 20 for (pkg in pkgs) suppressPackageStartupMessages(stopifnot(library(pkg, quietly = TRUE, logical.return = TRUE, character.only = TRUE))) | |
| 21 | |
| 22 sessioninfo <- sessionInfo() | |
| 23 cat(sessioninfo$R.version$version.string, "\n") | |
| 24 cat("Main packages:\n") | |
| 25 for (pkg in names(sessioninfo$otherPkgs)) { | |
| 26 cat(paste(pkg, packageVersion(pkg)), "\t") | |
| 27 } | |
| 28 cat("\n") | |
| 29 cat("Other loaded packages:\n") | |
| 30 for (pkg in names(sessioninfo$loadedOnly)) { | |
| 31 cat(paste(pkg, packageVersion(pkg)), "\t") | |
| 32 } | |
| 33 cat("\n") | |
| 34 } | |
| 35 | |
| 36 # This function retrieve a xset like object | |
| 37 # @author Gildas Le Corguille lecorguille@sb-roscoff.fr | |
| 38 getxcmsSetObject <- function(xobject) { | |
| 39 # XCMS 1.x | |
| 40 if (class(xobject) == "xcmsSet") { | |
| 41 return(xobject) | |
| 42 } | |
| 43 # XCMS 3.x | |
| 44 if (class(xobject) == "XCMSnExp") { | |
| 45 # Get the legacy xcmsSet object | |
| 46 suppressWarnings(xset <- as(xobject, "xcmsSet")) | |
| 47 if (is.null(xset@phenoData$sample_group)) { | |
| 48 sampclass(xset) <- "." | |
| 49 } else { | |
| 50 sampclass(xset) <- xset@phenoData$sample_group | |
| 51 } | |
| 52 if (!is.null(xset@phenoData$sample_name)) { | |
| 53 rownames(xset@phenoData) <- xset@phenoData$sample_name | |
| 54 } | |
| 55 return(xset) | |
| 56 } | |
| 57 } | |
| 58 | |
| 59 # @author G. Le Corguille | |
| 60 # The function create a pdf from the different png generated by diffreport | |
| 61 diffreport_png2pdf <- function(filebase) { | |
| 62 dir.create("pdf") | |
| 63 | |
| 64 pdfEicOutput <- paste0("pdf/", filebase, "-eic_pdf.pdf") | |
| 65 pdfBoxOutput <- paste0("pdf/", filebase, "-box_pdf.pdf") | |
| 66 | |
| 67 system(paste0("gm convert ", filebase, "_eic/*.png ", pdfEicOutput)) | |
| 68 system(paste0("gm convert ", filebase, "_box/*.png ", pdfBoxOutput)) | |
| 69 } | |
| 70 | |
| 71 # @author G. Le Corguille | |
| 72 # The function create a zip archive from the different png generated by diffreport | |
| 73 diffreport_png2zip <- function() { | |
| 74 zip("eic.zip", dir(pattern = "_eic"), zip = Sys.which("zip")) | |
| 75 zip("box.zip", dir(pattern = "_box"), zip = Sys.which("zip")) | |
| 76 } | |
| 77 | |
| 78 # The function create a zip archive from the different tabular generated by diffreport | |
| 79 diffreport_tabular2zip <- function() { | |
| 80 zip("tabular.zip", dir(pattern = "tabular/*"), zip = Sys.which("zip")) | |
| 81 } | |
| 82 | |
| 83 # @author G. Le Corguille | |
| 84 # This function convert if it is required the Retention Time in minutes | |
| 85 RTSecondToMinute <- function(variableMetadata, convertRTMinute) { | |
| 86 if (convertRTMinute) { | |
| 87 # converting the retention times (seconds) into minutes | |
| 88 print("converting the retention times into minutes in the variableMetadata") | |
| 89 variableMetadata[, "rt"] <- variableMetadata[, "rt"] / 60 | |
| 90 variableMetadata[, "rtmin"] <- variableMetadata[, "rtmin"] / 60 | |
| 91 variableMetadata[, "rtmax"] <- variableMetadata[, "rtmax"] / 60 | |
| 92 } | |
| 93 return(variableMetadata) | |
| 94 } | |
| 95 | |
| 96 # @author G. Le Corguille | |
| 97 # This function format ions identifiers | |
| 98 formatIonIdentifiers <- function(variableMetadata, numDigitsRT = 0, numDigitsMZ = 0) { | |
| 99 splitDeco <- strsplit(as.character(variableMetadata$name), "_") | |
| 100 idsDeco <- sapply(splitDeco, function(x) { | |
| 101 deco <- unlist(x)[2] | |
| 102 if (is.na(deco)) { | |
| 103 return("") | |
| 104 } else { | |
| 105 return(paste0("_", deco)) | |
| 106 } | |
| 107 }) | |
| 108 namecustom <- make.unique(paste0("M", round(variableMetadata[, "mz"], numDigitsMZ), "T", round(variableMetadata[, "rt"], numDigitsRT), idsDeco)) | |
| 109 variableMetadata <- cbind(name = variableMetadata$name, namecustom = namecustom, variableMetadata[, !(colnames(variableMetadata) %in% c("name"))]) | |
| 110 return(variableMetadata) | |
| 111 } | |
| 112 | |
| 113 # The function annotateDiffreport without the corr function which bugs | |
| 114 annotatediff <- function(xset = xset, args = args, variableMetadataOutput = "variableMetadata.tsv") { | |
| 115 # Resolve the bug with x11, with the function png | |
| 116 options(bitmapType = "cairo") | |
| 117 | |
| 118 # Check if the fillpeaks step has been done previously, if it hasn't, there is an error message and the execution is stopped. | |
| 119 res <- try(is.null(xset@filled)) | |
| 120 | |
| 121 # ------ annot ------- | |
| 122 args$calcCiS <- as.logical(args$calcCiS) | |
| 123 args$calcIso <- as.logical(args$calcIso) | |
| 124 args$calcCaS <- as.logical(args$calcCaS) | |
| 125 | |
| 126 # common parameters | |
| 127 args4annotate <- list( | |
| 128 object = xset, | |
| 129 nSlaves = args$nSlaves, sigma = args$sigma, perfwhm = args$perfwhm, | |
| 130 maxcharge = args$maxcharge, maxiso = args$maxiso, minfrac = args$minfrac, | |
| 131 ppm = args$ppm, mzabs = args$mzabs, quick = args$quick, | |
| 132 polarity = args$polarity, max_peaks = args$max_peaks, intval = args$intval | |
| 133 ) | |
| 134 | |
| 135 if (args$quick == FALSE) { | |
| 136 args4annotate <- append( | |
| 137 args4annotate, | |
| 138 list( | |
| 139 graphMethod = args$graphMethod, cor_eic_th = args$cor_eic_th, pval = args$pval, | |
| 140 calcCiS = args$calcCiS, calcIso = args$calcIso, calcCaS = args$calcCaS | |
| 141 ) | |
| 142 ) | |
| 143 # no ruleset | |
| 144 if (!is.null(args$multiplier)) { | |
| 145 args4annotate <- append( | |
| 146 args4annotate, | |
| 147 list(multiplier = args$multiplier) | |
| 148 ) | |
| 149 } else { # ruleset | |
| 150 rulset <- read.table(args$rules, h = TRUE, sep = ";") | |
| 151 if (ncol(rulset) < 4) rulset <- read.table(args$rules, h = TRUE, sep = "\t") | |
| 152 if (ncol(rulset) < 4) rulset <- read.table(args$rules, h = TRUE, sep = ",") | |
| 153 if (ncol(rulset) < 4) { | |
| 154 error_message <- "Your ruleset file seems not well formatted. The column separators accepted are ; , and tabulation" | |
| 155 print(error_message) | |
| 156 stop(error_message) | |
| 157 } | |
| 158 | |
| 159 args4annotate <- append( | |
| 160 args4annotate, | |
| 161 list(rules = rulset) | |
| 162 ) | |
| 163 } | |
| 164 } | |
| 165 | |
| 166 | |
| 167 # launch annotate | |
| 168 xa <- do.call("annotate", args4annotate) | |
| 169 peakList <- getPeaklist(xa, intval = args$intval) | |
| 170 peakList <- cbind(groupnames(xa@xcmsSet), peakList) | |
| 171 colnames(peakList)[1] <- c("name") | |
| 172 | |
| 173 # --- Multi condition : diffreport --- | |
| 174 diffrepOri <- NULL | |
| 175 if (!is.null(args$runDiffreport) && nlevels(sampclass(xset)) >= 2) { | |
| 176 # Check if the fillpeaks step has been done previously, if it hasn't, there is an error message and the execution is stopped. | |
| 177 res <- try(is.null(xset@filled)) | |
| 178 classes <- levels(sampclass(xset)) | |
| 179 for (i in seq_len(length(classes) - 1)) { | |
| 180 for (n in seq_len(length(classes))) { | |
| 181 if (i + n <= length(classes)) { | |
| 182 filebase <- paste(classes[i], class2 = classes[i + n], sep = "-vs-") | |
| 183 | |
| 184 diffrep <- diffreport( | |
| 185 object = xset, class1 = classes[i], class2 = classes[i + n], | |
| 186 filebase = filebase, eicmax = args$eicmax, eicwidth = args$eicwidth, | |
| 187 sortpval = TRUE, value = args$value, h = args$h, w = args$w, mzdec = args$mzdec, missing = 0 | |
| 188 ) | |
| 189 | |
| 190 diffrepOri <- diffrep | |
| 191 | |
| 192 # renamming of the column rtmed to rt to fit with camera peaklist function output | |
| 193 colnames(diffrep)[colnames(diffrep) == "rtmed"] <- "rt" | |
| 194 colnames(diffrep)[colnames(diffrep) == "mzmed"] <- "mz" | |
| 195 | |
| 196 # combines results and reorder columns | |
| 197 diffrep <- merge(peakList, diffrep[, c("name", "fold", "tstat", "pvalue")], by.x = "name", by.y = "name", sort = FALSE) | |
| 198 diffrep <- cbind(diffrep[, !(colnames(diffrep) %in% c(sampnames(xa@xcmsSet)))], diffrep[, (colnames(diffrep) %in% c(sampnames(xa@xcmsSet)))]) | |
| 199 | |
| 200 diffrep <- RTSecondToMinute(diffrep, args$convertRTMinute) | |
| 201 diffrep <- formatIonIdentifiers(diffrep, numDigitsRT = args$numDigitsRT, numDigitsMZ = args$numDigitsMZ) | |
| 202 | |
| 203 if (args$sortpval) { | |
| 204 diffrep <- diffrep[order(diffrep$pvalue), ] | |
| 205 } | |
| 206 | |
| 207 dir.create("tabular", showWarnings = FALSE) | |
| 208 write.table(diffrep, sep = "\t", quote = FALSE, row.names = FALSE, file = paste("tabular/", filebase, "_tsv.tabular", sep = "")) | |
| 209 | |
| 210 if (args$eicmax != 0) { | |
| 211 if (args$png2 == "pdf") { | |
| 212 diffreport_png2pdf(filebase) | |
| 213 } | |
| 214 if (args$png2 == "zip") { | |
| 215 diffreport_png2zip() | |
| 216 } | |
| 217 } | |
| 218 } | |
| 219 } | |
| 220 } | |
| 221 if (args$tabular2 == "zip") { | |
| 222 diffreport_tabular2zip() | |
| 223 } | |
| 224 } | |
| 225 | |
| 226 # --- variableMetadata --- | |
| 227 variableMetadata <- peakList[, !(make.names(colnames(peakList)) %in% c(make.names(sampnames(xa@xcmsSet))))] | |
| 228 variableMetadata <- RTSecondToMinute(variableMetadata, args$convertRTMinute) | |
| 229 variableMetadata <- formatIonIdentifiers(variableMetadata, numDigitsRT = args$numDigitsRT, numDigitsMZ = args$numDigitsMZ) | |
| 230 # if we have 2 conditions, we keep stat of diffrep | |
| 231 if (!is.null(args$runDiffreport) && nlevels(sampclass(xset)) == 2) { | |
| 232 variableMetadata <- merge(variableMetadata, diffrep[, c("name", "fold", "tstat", "pvalue")], by.x = "name", by.y = "name", sort = FALSE) | |
| 233 if (exists("args[[\"sortpval\"]]")) { | |
| 234 variableMetadata <- variableMetadata[order(variableMetadata$pvalue), ] | |
| 235 } | |
| 236 } | |
| 237 | |
| 238 variableMetadataOri <- variableMetadata | |
| 239 write.table(variableMetadata, sep = "\t", quote = FALSE, row.names = FALSE, file = variableMetadataOutput) | |
| 240 | |
| 241 return(list("xa" = xa, "diffrep" = diffrepOri, "variableMetadata" = variableMetadataOri)) | |
| 242 } | |
| 243 | |
| 244 | |
| 245 combinexsAnnos_function <- function( | |
| 246 xaP, xaN, diffrepP = NULL, diffrepN = NULL, | |
| 247 pos = TRUE, tol = 2, ruleset = NULL, keep_meta = TRUE, convertRTMinute = FALSE, numDigitsMZ = 0, | |
| 248 numDigitsRT = 0, variableMetadataOutput = "variableMetadata.tsv") { | |
| 249 # Load the two Rdata to extract the xset objects from positive and negative mode | |
| 250 cat("\tObject xset from positive mode\n") | |
| 251 print(xaP) | |
| 252 cat("\n") | |
| 253 | |
| 254 cat("\tObject xset from negative mode\n") | |
| 255 print(xaN) | |
| 256 cat("\n") | |
| 257 | |
| 258 cat("\n") | |
| 259 cat("\tCombining...\n") | |
| 260 # Convert the string to numeric for creating matrix | |
| 261 row <- as.numeric(strsplit(ruleset, ",")[[1]][1]) | |
| 262 column <- as.numeric(strsplit(ruleset, ",")[[1]][2]) | |
| 263 ruleset <- cbind(row, column) | |
| 264 # Test if the file comes from an older version tool | |
| 265 if ((!is.null(xaP)) && (!is.null(xaN))) { | |
| 266 # Launch the combinexsannos function from CAMERA | |
| 267 cAnnot <- combinexsAnnos(xaP, xaN, pos = pos, tol = tol, ruleset = ruleset) | |
| 268 } else { | |
| 269 stop("You must relauch the CAMERA.annotate step with the lastest version.") | |
| 270 } | |
| 271 | |
| 272 if (pos) { | |
| 273 xa <- xaP | |
| 274 mode <- "neg. Mode" | |
| 275 } else { | |
| 276 xa <- xaN | |
| 277 mode <- "pos. Mode" | |
| 278 } | |
| 279 | |
| 280 peakList <- getPeaklist(xa) | |
| 281 peakList <- cbind(groupnames(xa@xcmsSet), peakList) | |
| 282 colnames(peakList)[1] <- c("name") | |
| 283 variableMetadata <- cbind(peakList, cAnnot[, c("isotopes", "adduct", "pcgroup", mode)]) | |
| 284 variableMetadata <- variableMetadata[, !(colnames(variableMetadata) %in% c(sampnames(xa@xcmsSet)))] | |
| 285 | |
| 286 # Test if there are more than two classes (conditions) | |
| 287 if (nlevels(sampclass(xaP@xcmsSet)) == 2 && (!is.null(diffrepN)) && (!is.null(diffrepP))) { | |
| 288 diffrepP <- diffrepP[, c("name", "fold", "tstat", "pvalue")] | |
| 289 colnames(diffrepP) <- paste("P.", colnames(diffrepP), sep = "") | |
| 290 diffrepN <- diffrepN[, c("name", "fold", "tstat", "pvalue")] | |
| 291 colnames(diffrepN) <- paste("N.", colnames(diffrepN), sep = "") | |
| 292 | |
| 293 variableMetadata <- merge(variableMetadata, diffrepP, by.x = "name", by.y = "P.name") | |
| 294 variableMetadata <- merge(variableMetadata, diffrepN, by.x = "name", by.y = "N.name") | |
| 295 } | |
| 296 | |
| 297 rownames(variableMetadata) <- NULL | |
| 298 # TODO: checker colnames(variableMetadata)[1:2] = c("name", "mz/rt"); | |
| 299 | |
| 300 variableMetadata <- RTSecondToMinute(variableMetadata, convertRTMinute) | |
| 301 variableMetadata <- formatIonIdentifiers(variableMetadata, numDigitsRT = numDigitsRT, numDigitsMZ = numDigitsMZ) | |
| 302 | |
| 303 # If the user want to keep only the metabolites which match a difference | |
| 304 if (keep_meta) { | |
| 305 variableMetadata <- variableMetadata[variableMetadata[, c(mode)] != "", ] | |
| 306 } | |
| 307 | |
| 308 # Write the output into a tsv file | |
| 309 write.table(variableMetadata, sep = "\t", quote = FALSE, row.names = FALSE, file = variableMetadataOutput) | |
| 310 return(variableMetadata) | |
| 311 } | |
| 312 | |
| 313 # This function get the raw file path from the arguments | |
| 314 getRawfilePathFromArguments <- function(singlefile, zipfile, args) { | |
| 315 if (!is.null(args$zipfile)) zipfile <- args$zipfile | |
| 316 if (!is.null(args$zipfilePositive)) zipfile <- args$zipfilePositive | |
| 317 if (!is.null(args$zipfileNegative)) zipfile <- args$zipfileNegative | |
| 318 | |
| 319 if (!is.null(args$singlefile_galaxyPath)) { | |
| 320 singlefile_galaxyPaths <- args$singlefile_galaxyPath | |
| 321 singlefile_sampleNames <- args$singlefile_sampleName | |
| 322 } | |
| 323 if (!is.null(args$singlefile_galaxyPathPositive)) { | |
| 324 singlefile_galaxyPaths <- args$singlefile_galaxyPathPositive | |
| 325 singlefile_sampleNames <- args$singlefile_sampleNamePositive | |
| 326 } | |
| 327 if (!is.null(args$singlefile_galaxyPathNegative)) { | |
| 328 singlefile_galaxyPaths <- args$singlefile_galaxyPathNegative | |
| 329 singlefile_sampleNames <- args$singlefile_sampleNameNegative | |
| 330 } | |
| 331 if (exists("singlefile_galaxyPaths")) { | |
| 332 singlefile_galaxyPaths <- unlist(strsplit(singlefile_galaxyPaths, ",")) | |
| 333 singlefile_sampleNames <- unlist(strsplit(singlefile_sampleNames, ",")) | |
| 334 | |
| 335 singlefile <- NULL | |
| 336 for (singlefile_galaxyPath_i in seq_len(length(singlefile_galaxyPaths))) { | |
| 337 singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i] | |
| 338 singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i] | |
| 339 singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath | |
| 340 } | |
| 341 } | |
| 342 for (argument in c( | |
| 343 "zipfile", "zipfilePositive", "zipfileNegative", | |
| 344 "singlefile_galaxyPath", "singlefile_sampleName", | |
| 345 "singlefile_galaxyPathPositive", "singlefile_sampleNamePositive", | |
| 346 "singlefile_galaxyPathNegative", "singlefile_sampleNameNegative" | |
| 347 )) { | |
| 348 args[[argument]] <- NULL | |
| 349 } | |
| 350 return(list(zipfile = zipfile, singlefile = singlefile, args = args)) | |
| 351 } | |
| 352 | |
| 353 | |
| 354 # This function retrieve the raw file in the working directory | |
| 355 # - if zipfile: unzip the file with its directory tree | |
| 356 # - if singlefiles: set symlink with the good filename | |
| 357 retrieveRawfileInTheWorkingDir <- function(singlefile, zipfile) { | |
| 358 if (!is.null(singlefile) && (length("singlefile") > 0)) { | |
| 359 for (singlefile_sampleName in names(singlefile)) { | |
| 360 singlefile_galaxyPath <- singlefile[[singlefile_sampleName]] | |
| 361 if (!file.exists(singlefile_galaxyPath)) { | |
| 362 error_message <- paste("Cannot access the sample:", singlefile_sampleName, "located:", singlefile_galaxyPath, ". Please, contact your administrator ... if you have one!") | |
| 363 print(error_message) | |
| 364 stop(error_message) | |
| 365 } | |
| 366 | |
| 367 file.symlink(singlefile_galaxyPath, singlefile_sampleName) | |
| 368 } | |
| 369 directory <- "." | |
| 370 } | |
| 371 if (!is.null(zipfile) && (zipfile != "")) { | |
| 372 if (!file.exists(zipfile)) { | |
| 373 error_message <- paste("Cannot access the Zip file:", zipfile, ". Please, contact your administrator ... if you have one!") | |
| 374 print(error_message) | |
| 375 stop(error_message) | |
| 376 } | |
| 377 | |
| 378 # unzip | |
| 379 suppressWarnings(unzip(zipfile, unzip = "unzip")) | |
| 380 | |
| 381 # get the directory name | |
| 382 filesInZip <- unzip(zipfile, list = TRUE) | |
| 383 directories <- unique(unlist(lapply(strsplit(filesInZip$Name, "/"), function(x) x[1]))) | |
| 384 directories <- directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] | |
| 385 directory <- "." | |
| 386 if (length(directories) == 1) directory <- directories | |
| 387 | |
| 388 cat("files_root_directory\t", directory, "\n") | |
| 389 } | |
| 390 return(directory) | |
| 391 } |
