Mercurial > repos > lecorguille > ipo
comparison lib.r @ 2:8e5f667359cb draft default tip
planemo upload for repository https://github.com/rietho/IPO commit d25c744220e416cce158161fa7dc3b0f153a5c11
| author | workflow4metabolomics |
|---|---|
| date | Mon, 11 Sep 2023 22:37:32 +0000 |
| parents | ae8de756dfcf |
| children |
comparison
equal
deleted
inserted
replaced
| 1:ae8de756dfcf | 2:8e5f667359cb |
|---|---|
| 1 #@author G. Le Corguille | 1 #@author G. Le Corguille |
| 2 # solve an issue with batch if arguments are logical TRUE/FALSE | 2 # solve an issue with batch if arguments are logical TRUE/FALSE |
| 3 parseCommandArgs <- function(...) { | 3 parseCommandArgs <- function(...) { |
| 4 args <- batch::parseCommandArgs(...) | 4 args <- batch::parseCommandArgs(...) |
| 5 for (key in names(args)) { | 5 for (key in names(args)) { |
| 6 if (args[key] %in% c("TRUE","FALSE")) | 6 if (args[key] %in% c("TRUE", "FALSE")) |
| 7 args[key] = as.logical(args[key]) | 7 args[key] <- as.logical(args[key]) |
| 8 } | 8 } |
| 9 return(args) | 9 return(args) |
| 10 } | 10 } |
| 11 | 11 |
| 12 #@author G. Le Corguille | 12 #@author G. Le Corguille |
| 13 # This function will | 13 # This function will |
| 14 # - load the packages | 14 # - load the packages |
| 15 # - display the sessionInfo | 15 # - display the sessionInfo |
| 16 loadAndDisplayPackages <- function(pkgs) { | 16 loadAndDisplayPackages <- function(pkgs) { |
| 17 for(pkg in pkgs) suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE))) | 17 for (pkg in pkgs) suppressPackageStartupMessages(stopifnot(library(pkg, quietly = TRUE, logical.return = TRUE, character.only = TRUE))) |
| 18 sessioninfo = sessionInfo() | 18 sessioninfo <- sessionInfo() |
| 19 cat(sessioninfo$R.version$version.string,"\n") | 19 cat(sessioninfo$R.version$version.string, "\n") |
| 20 cat("Main packages:\n") | 20 cat("Main packages:\n") |
| 21 for (pkg in names(sessioninfo$otherPkgs)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") | 21 for (pkg in names(sessioninfo$otherPkgs)) { |
| 22 cat("Other loaded packages:\n") | 22 cat(paste(pkg, packageVersion(pkg)), "\t") |
| 23 for (pkg in names(sessioninfo$loadedOnly)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") | 23 } |
| 24 cat("\n") | |
| 25 cat("Other loaded packages:\n") | |
| 26 for (pkg in names(sessioninfo$loadedOnly)) { | |
| 27 cat(paste(pkg, packageVersion(pkg)), "\t") | |
| 28 } | |
| 29 cat("\n") | |
| 24 } | 30 } |
| 25 | 31 |
| 26 ## | 32 ## |
| 27 ## This function launch IPO functions to get the best parameters for xcmsSet | 33 ## This function launch IPO functions to get the best parameters for xcmsSet |
| 28 ## A sample among the whole dataset is used to save time | 34 ## A sample among the whole dataset is used to save time |
| 29 ## | 35 ## |
| 30 ipo4xcmsSet = function(directory, parametersOutput, args, samplebyclass=4) { | 36 ipo4xcmsSet <- function(directory, parametersOutput, args, samplebyclass = 4) { |
| 31 setwd(directory) | 37 setwd(directory) |
| 32 | 38 |
| 33 files = list.files(".", recursive=T) # "KO/ko15.CDF" "KO/ko16.CDF" "WT/wt15.CDF" "WT/wt16.CDF" | 39 files <- list.files(".", recursive = TRUE) # "KO/ko15.CDF" "KO/ko16.CDF" "WT/wt15.CDF" "WT/wt16.CDF" |
| 34 files = files[!files %in% c("conda_activate.log", "log.txt")] | 40 files <- files[!files %in% c("conda_activate.log", "log.txt")] |
| 35 files_classes = basename(dirname(files)) # "KO", "KO", "WT", "WT" | 41 files_classes <- basename(dirname(files)) # "KO", "KO", "WT", "WT" |
| 36 | 42 |
| 37 mzmlfile = files | 43 mzmlfile <- files |
| 38 if (samplebyclass > 0) { | 44 if (samplebyclass > 0) { |
| 39 #random selection of N files for IPO in each class | 45 #random selection of N files for IPO in each class |
| 40 classes<-unique(basename(dirname(files))) | 46 classes <- unique(basename(dirname(files))) |
| 41 mzmlfile = NULL | 47 mzmlfile <- NULL |
| 42 for (class_i in classes){ | 48 for (class_i in classes){ |
| 43 files_class_i = files[files_classes==class_i] | 49 files_class_i <- files[files_classes == class_i] |
| 44 if (samplebyclass > length(files_class_i)) { | 50 if (samplebyclass > length(files_class_i)) { |
| 45 mzmlfile = c(mzmlfile, files_class_i) | 51 mzmlfile <- c(mzmlfile, files_class_i) |
| 46 } else { | 52 } else { |
| 47 mzmlfile = c(mzmlfile,sample(files_class_i,samplebyclass)) | 53 mzmlfile <- c(mzmlfile, sample(files_class_i, samplebyclass)) |
| 48 } | 54 } |
| 49 } | 55 } |
| 50 } | 56 } |
| 51 #@TODO: else, must we keep the RData to been use directly by group? | 57 #@TODO: else, must we keep the RData to been use directly by group? |
| 52 | 58 |
| 53 cat("\t\tSamples used:\n") | 59 cat("\t\tSamples used:\n") |
| 54 print(mzmlfile) | 60 print(mzmlfile) |
| 55 | 61 |
| 56 peakpickingParameters = getDefaultXcmsSetStartingParams(args$method) #get default parameters of IPO | 62 peakpickingParameters <- getDefaultXcmsSetStartingParams(args$method) #get default parameters of IPO |
| 57 | 63 |
| 58 # filter args to only get releavant parameters and complete with those that are not declared | 64 # filter args to only get releavant parameters and complete with those that are not declared |
| 59 peakpickingParametersUser = c(args[names(args) %in% names(peakpickingParameters)], peakpickingParameters[!(names(peakpickingParameters) %in% names(args))]) | 65 peakpickingParametersUser <- c(args[names(args) %in% names(peakpickingParameters)], peakpickingParameters[!(names(peakpickingParameters) %in% names(args))]) |
| 60 peakpickingParametersUser$verbose.columns = TRUE | 66 peakpickingParametersUser$verbose.columns <- TRUE |
| 61 #peakpickingParametersUser$profparam <- list(step=0.005) #not yet used by IPO have to think of it for futur improvement | 67 resultPeakpicking <- optimizeXcmsSet(mzmlfile, peakpickingParametersUser, nSlaves = args$nSlaves, subdir = "./IPO_results") #some images generated by IPO |
| 62 resultPeakpicking = optimizeXcmsSet(mzmlfile, peakpickingParametersUser, nSlaves=args$nSlaves, subdir="../IPO_results") #some images generated by IPO | 68 |
| 63 | 69 # export |
| 64 # export | 70 peakpicking_best_params <- resultPeakpicking$best_settings$parameters[!(names(resultPeakpicking$best_settings$parameters) %in% c("nSlaves", "verbose.columns"))] |
| 65 resultPeakpicking_best_settings_parameters = resultPeakpicking$best_settings$parameters[!(names(resultPeakpicking$best_settings$parameters) %in% c("nSlaves","verbose.columns"))] | 71 write.table(t(as.data.frame(peakpicking_best_params)), file = parametersOutput, sep = "\t", row.names = TRUE, col.names = FALSE, quote = FALSE) #can be read by user |
| 66 write.table(t(as.data.frame(resultPeakpicking_best_settings_parameters)), file=parametersOutput, sep="\t", row.names=T, col.names=F, quote=F) #can be read by user | 72 |
| 67 | 73 return(resultPeakpicking$best_settings$xset) |
| 68 return (resultPeakpicking$best_settings$xset) | |
| 69 } | 74 } |
| 70 | 75 |
| 71 ## | 76 ## |
| 72 ## This function launch IPO functions to get the best parameters for group and retcor | 77 ## This function launch IPO functions to get the best parameters for group and retcor |
| 73 ## | 78 ## |
| 74 ipo4retgroup = function(xset, directory, parametersOutput, args, samplebyclass=4) { | 79 ipo4retgroup <- function(xset, directory, parametersOutput, args, samplebyclass = 4) { |
| 75 setwd(directory) | 80 setwd(directory) |
| 76 | 81 |
| 77 files = list.files(".", recursive=T) # "KO/ko15.CDF" "KO/ko16.CDF" "WT/wt15.CDF" "WT/wt16.CDF" | 82 files <- list.files(".", recursive = TRUE) # "KO/ko15.CDF" "KO/ko16.CDF" "WT/wt15.CDF" "WT/wt16.CDF" |
| 78 files = files[!files %in% c("conda_activate.log", "log.txt")] | 83 files <- files[!files %in% c("conda_activate.log", "log.txt")] |
| 79 files_classes = basename(dirname(files)) # "KO", "KO", "WT", "WT" | 84 files_classes <- basename(dirname(files)) # "KO", "KO", "WT", "WT" |
| 80 | 85 |
| 81 retcorGroupParameters = getDefaultRetGroupStartingParams(args$retcorMethod) #get default parameters of IPO | 86 retcorGroupParameters <- getDefaultRetGroupStartingParams(args$retcorMethod) #get default parameters of IPO |
| 82 print(retcorGroupParameters) | 87 print(retcorGroupParameters) |
| 83 # filter args to only get releavant parameters and complete with those that are not declared | 88 # filter args to only get releavant parameters and complete with those that are not declared |
| 84 retcorGroupParametersUser = c(args[names(args) %in% names(retcorGroupParameters)], retcorGroupParameters[!(names(retcorGroupParameters) %in% names(args))]) | 89 retcorGroupParametersUser <- c(args[names(args) %in% names(retcorGroupParameters)], retcorGroupParameters[!(names(retcorGroupParameters) %in% names(args))]) |
| 85 print("retcorGroupParametersUser") | 90 print("retcorGroupParametersUser") |
| 86 print(retcorGroupParametersUser) | 91 print(retcorGroupParametersUser) |
| 87 resultRetcorGroup = optimizeRetGroup(xset, retcorGroupParametersUser, nSlaves=args$nSlaves, subdir="../IPO_results") #some images generated by IPO | 92 resultRetcorGroup <- optimizeRetGroup(xset, retcorGroupParametersUser, nSlaves = args$nSlaves, subdir = "../IPO_results") #some images generated by IPO |
| 88 | 93 |
| 89 # export | 94 # export |
| 90 resultRetcorGroup_best_settings_parameters = resultRetcorGroup$best_settings | 95 resultRetcorGroup_best_params <- resultRetcorGroup$best_settings |
| 91 write.table(t(as.data.frame(resultRetcorGroup_best_settings_parameters)), file=parametersOutput, sep="\t", row.names=T, col.names=F, quote=F) #can be read by user | 96 write.table(t(as.data.frame(resultRetcorGroup_best_params)), file = parametersOutput, sep = "\t", row.names = TRUE, col.names = FALSE, quote = FALSE) #can be read by user |
| 92 } | 97 } |
| 93 | 98 |
| 94 | 99 |
| 95 | 100 |
| 96 # This function check if XML contains special caracters. It also checks integrity and completness. | 101 # This function check if XML contains special caracters. It also checks integrity and completness. |
| 97 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM | 102 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM |
| 98 checkXmlStructure <- function (directory) { | 103 checkXmlStructure <- function(directory) { |
| 99 cat("Checking XML structure...\n") | 104 cat("Checking XML structure...\n") |
| 100 | 105 |
| 101 cmd <- paste0("IFS=$'\n'; for xml in $(find '",directory,"' -not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") | 106 cmd <- paste0("IFS=$'\n'; for xml in $(find '", directory, "' -not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") |
| 102 capture <- system(cmd, intern=TRUE) | 107 capture <- system(cmd, intern = TRUE) |
| 103 | 108 |
| 104 if (length(capture)>0){ | 109 if (length(capture) > 0) { |
| 105 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) | |
| 106 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) | 110 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) |
| 107 write(capture, stderr()) | 111 write(capture, stderr()) |
| 108 stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files") | 112 stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files") |
| 109 } | 113 } |
| 110 | 114 |
| 111 } | 115 } |
| 112 | 116 |
| 113 | 117 |
| 114 # This function get the raw file path from the arguments | 118 # This function get the raw file path from the arguments |
| 115 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | 119 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr |
| 116 getRawfilePathFromArguments <- function(singlefile, zipfile, args, prefix="") { | 120 getRawfilePathFromArguments <- function(singlefile, zipfile, args, prefix = "") { |
| 117 if (!(prefix %in% c("","Positive","Negative","MS1","MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'") | 121 if (!(prefix %in% c("", "Positive", "Negative", "MS1", "MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'") |
| 118 | 122 |
| 119 if (!is.null(args[[paste0("zipfile",prefix)]])) zipfile <- args[[paste0("zipfile",prefix)]] | 123 if (!is.null(args[[paste0("zipfile", prefix)]])) zipfile <- args[[paste0("zipfile", prefix)]] |
| 120 | 124 |
| 121 if (!is.null(args[[paste0("singlefile_galaxyPath",prefix)]])) { | 125 if (!is.null(args[[paste0("singlefile_galaxyPath", prefix)]])) { |
| 122 singlefile_galaxyPaths <- args[[paste0("singlefile_galaxyPath",prefix)]] | 126 singlefile_galaxyPaths <- args[[paste0("singlefile_galaxyPath", prefix)]] |
| 123 singlefile_sampleNames <- args[[paste0("singlefile_sampleName",prefix)]] | 127 singlefile_sampleNames <- args[[paste0("singlefile_sampleName", prefix)]] |
| 124 } | 128 } |
| 125 if (exists("singlefile_galaxyPaths")){ | 129 if (exists("singlefile_galaxyPaths")) { |
| 126 singlefile_galaxyPaths <- unlist(strsplit(singlefile_galaxyPaths,"\\|")) | 130 singlefile_galaxyPaths <- unlist(strsplit(singlefile_galaxyPaths, "\\|")) |
| 127 singlefile_sampleNames <- unlist(strsplit(singlefile_sampleNames,"\\|")) | 131 singlefile_sampleNames <- unlist(strsplit(singlefile_sampleNames, "\\|")) |
| 128 | 132 |
| 129 singlefile <- NULL | 133 singlefile <- NULL |
| 130 for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) { | 134 for (singlefile_galaxyPath_i in seq_along(length(singlefile_galaxyPaths))) { |
| 131 singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i] | 135 singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i] |
| 132 singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i] | 136 singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i] |
| 133 # In case, an url is used to import data within Galaxy | 137 # In case, an url is used to import data within Galaxy |
| 134 singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName,"/")), n=1) | 138 singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName, "/")), n = 1) |
| 135 singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath | 139 singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath |
| 136 } | 140 } |
| 137 } | 141 } |
| 138 return(list(zipfile=zipfile, singlefile=singlefile)) | 142 return(list(zipfile = zipfile, singlefile = singlefile)) |
| 139 } | 143 } |
| 140 | 144 |
| 141 # This function retrieve the raw file in the working directory | 145 # This function retrieve the raw file in the working directory |
| 142 # - if zipfile: unzip the file with its directory tree | 146 # - if zipfile: unzip the file with its directory tree |
| 143 # - if singlefiles: set symlink with the good filename | 147 # - if singlefiles: set symlink with the good filename |
| 144 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | 148 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr |
| 145 retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile) { | 149 retrieveRawfileInWD <- function(singlefile, zipfile) { |
| 146 if(!is.null(singlefile) && (length("singlefile")>0)) { | 150 if (!is.null(singlefile) && (length("singlefile") > 0)) { |
| 147 for (singlefile_sampleName in names(singlefile)) { | 151 for (singlefile_sampleName in names(singlefile)) { |
| 148 singlefile_galaxyPath <- singlefile[[singlefile_sampleName]] | 152 singlefile_galaxyPath <- singlefile[[singlefile_sampleName]] |
| 149 if(!file.exists(singlefile_galaxyPath)){ | 153 if (!file.exists(singlefile_galaxyPath)) { |
| 150 error_message <- paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!") | 154 error_message <- paste("Cannot access the sample:", singlefile_sampleName, "located:", singlefile_galaxyPath, ". Please, contact your administrator ... if you have one!") |
| 151 print(error_message); stop(error_message) | 155 print(error_message) |
| 152 } | 156 stop(error_message) |
| 153 | 157 } |
| 154 if (!suppressWarnings( try (file.link(singlefile_galaxyPath, singlefile_sampleName), silent=T))) | 158 |
| 155 file.copy(singlefile_galaxyPath, singlefile_sampleName) | 159 if (!suppressWarnings(try(file.link(singlefile_galaxyPath, singlefile_sampleName), silent = TRUE))) |
| 156 | 160 file.copy(singlefile_galaxyPath, singlefile_sampleName) |
| 157 } | 161 |
| 158 directory <- "." | 162 } |
| 159 | 163 directory <- "." |
| 160 } | 164 |
| 161 if(!is.null(zipfile) && (zipfile != "")) { | 165 } |
| 162 if(!file.exists(zipfile)){ | 166 if (!is.null(zipfile) && (zipfile != "")) { |
| 163 error_message <- paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!") | 167 if (!file.exists(zipfile)) { |
| 164 print(error_message) | 168 error_message <- paste( |
| 165 stop(error_message) | 169 "Cannot access the Zip file:", |
| 166 } | 170 zipfile, |
| 167 | 171 ". Please, contact your administrator ... if you have one!" |
| 168 #list all file in the zip file | 172 ) |
| 169 #zip_files <- unzip(zipfile,list=T)[,"Name"] | 173 print(error_message) |
| 170 | 174 stop(error_message) |
| 171 #unzip | 175 } |
| 172 suppressWarnings(unzip(zipfile, unzip="unzip")) | 176 |
| 173 | 177 #unzip |
| 174 #get the directory name | 178 suppressWarnings(unzip(zipfile, unzip = "unzip")) |
| 175 suppressWarnings(filesInZip <- unzip(zipfile, list=T)) | 179 |
| 176 directories <- unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))) | 180 #get the directory name |
| 177 directories <- directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] | 181 suppressWarnings(filesInZip <- unzip(zipfile, list = TRUE)) |
| 178 directory <- "." | 182 directories <- unique(unlist(lapply(strsplit(filesInZip$Name, "/"), function(x) x[1]))) |
| 179 if (length(directories) == 1) directory <- directories | 183 directories <- directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] |
| 180 | 184 directory <- "." |
| 181 cat("files_root_directory\t",directory,"\n") | 185 if (length(directories) == 1) directory <- directories |
| 182 | 186 |
| 183 } | 187 cat("files_root_directory\t", directory, "\n") |
| 184 return (directory) | 188 |
| 189 } | |
| 190 return(directory) | |
| 185 } | 191 } |
| 186 | 192 |
| 187 | 193 |
| 188 # This function retrieve a xset like object | 194 # This function retrieve a xset like object |
| 189 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | 195 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr |
| 190 getxcmsSetObject <- function(xobject) { | 196 getxcmsSetObject <- function(xobject) { |
| 191 # XCMS 1.x | 197 # XCMS 1.x |
| 192 if (class(xobject) == "xcmsSet") | 198 if (class(xobject) == "xcmsSet") |
| 193 return (xobject) | 199 return(xobject) |
| 194 # XCMS 3.x | 200 # XCMS 3.x |
| 195 if (class(xobject) == "XCMSnExp") { | 201 if (class(xobject) == "XCMSnExp") { |
| 196 # Get the legacy xcmsSet object | 202 # Get the legacy xcmsSet object |
| 197 suppressWarnings(xset <- as(xobject, 'xcmsSet')) | 203 suppressWarnings(xset <- as(xobject, "xcmsSet")) |
| 198 if (!is.null(xset@phenoData$sample_group)) | 204 if (!is.null(xset@phenoData$sample_group)) |
| 199 sampclass(xset) <- xset@phenoData$sample_group | 205 sampclass(xset) <- xset@phenoData$sample_group |
| 200 else | 206 else |
| 201 sampclass(xset) <- "." | 207 sampclass(xset) <- "." |
| 202 return (xset) | 208 return(xset) |
| 203 } | 209 } |
| 204 } | 210 } |
