Mercurial > repos > lecorguille > ipo
comparison lib.r @ 2:8e5f667359cb draft default tip
planemo upload for repository https://github.com/rietho/IPO commit d25c744220e416cce158161fa7dc3b0f153a5c11
author | workflow4metabolomics |
---|---|
date | Mon, 11 Sep 2023 22:37:32 +0000 |
parents | ae8de756dfcf |
children |
comparison
equal
deleted
inserted
replaced
1:ae8de756dfcf | 2:8e5f667359cb |
---|---|
1 #@author G. Le Corguille | 1 #@author G. Le Corguille |
2 # solve an issue with batch if arguments are logical TRUE/FALSE | 2 # solve an issue with batch if arguments are logical TRUE/FALSE |
3 parseCommandArgs <- function(...) { | 3 parseCommandArgs <- function(...) { |
4 args <- batch::parseCommandArgs(...) | 4 args <- batch::parseCommandArgs(...) |
5 for (key in names(args)) { | 5 for (key in names(args)) { |
6 if (args[key] %in% c("TRUE","FALSE")) | 6 if (args[key] %in% c("TRUE", "FALSE")) |
7 args[key] = as.logical(args[key]) | 7 args[key] <- as.logical(args[key]) |
8 } | 8 } |
9 return(args) | 9 return(args) |
10 } | 10 } |
11 | 11 |
12 #@author G. Le Corguille | 12 #@author G. Le Corguille |
13 # This function will | 13 # This function will |
14 # - load the packages | 14 # - load the packages |
15 # - display the sessionInfo | 15 # - display the sessionInfo |
16 loadAndDisplayPackages <- function(pkgs) { | 16 loadAndDisplayPackages <- function(pkgs) { |
17 for(pkg in pkgs) suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE))) | 17 for (pkg in pkgs) suppressPackageStartupMessages(stopifnot(library(pkg, quietly = TRUE, logical.return = TRUE, character.only = TRUE))) |
18 sessioninfo = sessionInfo() | 18 sessioninfo <- sessionInfo() |
19 cat(sessioninfo$R.version$version.string,"\n") | 19 cat(sessioninfo$R.version$version.string, "\n") |
20 cat("Main packages:\n") | 20 cat("Main packages:\n") |
21 for (pkg in names(sessioninfo$otherPkgs)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") | 21 for (pkg in names(sessioninfo$otherPkgs)) { |
22 cat("Other loaded packages:\n") | 22 cat(paste(pkg, packageVersion(pkg)), "\t") |
23 for (pkg in names(sessioninfo$loadedOnly)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") | 23 } |
24 cat("\n") | |
25 cat("Other loaded packages:\n") | |
26 for (pkg in names(sessioninfo$loadedOnly)) { | |
27 cat(paste(pkg, packageVersion(pkg)), "\t") | |
28 } | |
29 cat("\n") | |
24 } | 30 } |
25 | 31 |
26 ## | 32 ## |
27 ## This function launch IPO functions to get the best parameters for xcmsSet | 33 ## This function launch IPO functions to get the best parameters for xcmsSet |
28 ## A sample among the whole dataset is used to save time | 34 ## A sample among the whole dataset is used to save time |
29 ## | 35 ## |
30 ipo4xcmsSet = function(directory, parametersOutput, args, samplebyclass=4) { | 36 ipo4xcmsSet <- function(directory, parametersOutput, args, samplebyclass = 4) { |
31 setwd(directory) | 37 setwd(directory) |
32 | 38 |
33 files = list.files(".", recursive=T) # "KO/ko15.CDF" "KO/ko16.CDF" "WT/wt15.CDF" "WT/wt16.CDF" | 39 files <- list.files(".", recursive = TRUE) # "KO/ko15.CDF" "KO/ko16.CDF" "WT/wt15.CDF" "WT/wt16.CDF" |
34 files = files[!files %in% c("conda_activate.log", "log.txt")] | 40 files <- files[!files %in% c("conda_activate.log", "log.txt")] |
35 files_classes = basename(dirname(files)) # "KO", "KO", "WT", "WT" | 41 files_classes <- basename(dirname(files)) # "KO", "KO", "WT", "WT" |
36 | 42 |
37 mzmlfile = files | 43 mzmlfile <- files |
38 if (samplebyclass > 0) { | 44 if (samplebyclass > 0) { |
39 #random selection of N files for IPO in each class | 45 #random selection of N files for IPO in each class |
40 classes<-unique(basename(dirname(files))) | 46 classes <- unique(basename(dirname(files))) |
41 mzmlfile = NULL | 47 mzmlfile <- NULL |
42 for (class_i in classes){ | 48 for (class_i in classes){ |
43 files_class_i = files[files_classes==class_i] | 49 files_class_i <- files[files_classes == class_i] |
44 if (samplebyclass > length(files_class_i)) { | 50 if (samplebyclass > length(files_class_i)) { |
45 mzmlfile = c(mzmlfile, files_class_i) | 51 mzmlfile <- c(mzmlfile, files_class_i) |
46 } else { | 52 } else { |
47 mzmlfile = c(mzmlfile,sample(files_class_i,samplebyclass)) | 53 mzmlfile <- c(mzmlfile, sample(files_class_i, samplebyclass)) |
48 } | 54 } |
49 } | 55 } |
50 } | 56 } |
51 #@TODO: else, must we keep the RData to been use directly by group? | 57 #@TODO: else, must we keep the RData to been use directly by group? |
52 | 58 |
53 cat("\t\tSamples used:\n") | 59 cat("\t\tSamples used:\n") |
54 print(mzmlfile) | 60 print(mzmlfile) |
55 | 61 |
56 peakpickingParameters = getDefaultXcmsSetStartingParams(args$method) #get default parameters of IPO | 62 peakpickingParameters <- getDefaultXcmsSetStartingParams(args$method) #get default parameters of IPO |
57 | 63 |
58 # filter args to only get releavant parameters and complete with those that are not declared | 64 # filter args to only get releavant parameters and complete with those that are not declared |
59 peakpickingParametersUser = c(args[names(args) %in% names(peakpickingParameters)], peakpickingParameters[!(names(peakpickingParameters) %in% names(args))]) | 65 peakpickingParametersUser <- c(args[names(args) %in% names(peakpickingParameters)], peakpickingParameters[!(names(peakpickingParameters) %in% names(args))]) |
60 peakpickingParametersUser$verbose.columns = TRUE | 66 peakpickingParametersUser$verbose.columns <- TRUE |
61 #peakpickingParametersUser$profparam <- list(step=0.005) #not yet used by IPO have to think of it for futur improvement | 67 resultPeakpicking <- optimizeXcmsSet(mzmlfile, peakpickingParametersUser, nSlaves = args$nSlaves, subdir = "./IPO_results") #some images generated by IPO |
62 resultPeakpicking = optimizeXcmsSet(mzmlfile, peakpickingParametersUser, nSlaves=args$nSlaves, subdir="../IPO_results") #some images generated by IPO | 68 |
63 | 69 # export |
64 # export | 70 peakpicking_best_params <- resultPeakpicking$best_settings$parameters[!(names(resultPeakpicking$best_settings$parameters) %in% c("nSlaves", "verbose.columns"))] |
65 resultPeakpicking_best_settings_parameters = resultPeakpicking$best_settings$parameters[!(names(resultPeakpicking$best_settings$parameters) %in% c("nSlaves","verbose.columns"))] | 71 write.table(t(as.data.frame(peakpicking_best_params)), file = parametersOutput, sep = "\t", row.names = TRUE, col.names = FALSE, quote = FALSE) #can be read by user |
66 write.table(t(as.data.frame(resultPeakpicking_best_settings_parameters)), file=parametersOutput, sep="\t", row.names=T, col.names=F, quote=F) #can be read by user | 72 |
67 | 73 return(resultPeakpicking$best_settings$xset) |
68 return (resultPeakpicking$best_settings$xset) | |
69 } | 74 } |
70 | 75 |
71 ## | 76 ## |
72 ## This function launch IPO functions to get the best parameters for group and retcor | 77 ## This function launch IPO functions to get the best parameters for group and retcor |
73 ## | 78 ## |
74 ipo4retgroup = function(xset, directory, parametersOutput, args, samplebyclass=4) { | 79 ipo4retgroup <- function(xset, directory, parametersOutput, args, samplebyclass = 4) { |
75 setwd(directory) | 80 setwd(directory) |
76 | 81 |
77 files = list.files(".", recursive=T) # "KO/ko15.CDF" "KO/ko16.CDF" "WT/wt15.CDF" "WT/wt16.CDF" | 82 files <- list.files(".", recursive = TRUE) # "KO/ko15.CDF" "KO/ko16.CDF" "WT/wt15.CDF" "WT/wt16.CDF" |
78 files = files[!files %in% c("conda_activate.log", "log.txt")] | 83 files <- files[!files %in% c("conda_activate.log", "log.txt")] |
79 files_classes = basename(dirname(files)) # "KO", "KO", "WT", "WT" | 84 files_classes <- basename(dirname(files)) # "KO", "KO", "WT", "WT" |
80 | 85 |
81 retcorGroupParameters = getDefaultRetGroupStartingParams(args$retcorMethod) #get default parameters of IPO | 86 retcorGroupParameters <- getDefaultRetGroupStartingParams(args$retcorMethod) #get default parameters of IPO |
82 print(retcorGroupParameters) | 87 print(retcorGroupParameters) |
83 # filter args to only get releavant parameters and complete with those that are not declared | 88 # filter args to only get releavant parameters and complete with those that are not declared |
84 retcorGroupParametersUser = c(args[names(args) %in% names(retcorGroupParameters)], retcorGroupParameters[!(names(retcorGroupParameters) %in% names(args))]) | 89 retcorGroupParametersUser <- c(args[names(args) %in% names(retcorGroupParameters)], retcorGroupParameters[!(names(retcorGroupParameters) %in% names(args))]) |
85 print("retcorGroupParametersUser") | 90 print("retcorGroupParametersUser") |
86 print(retcorGroupParametersUser) | 91 print(retcorGroupParametersUser) |
87 resultRetcorGroup = optimizeRetGroup(xset, retcorGroupParametersUser, nSlaves=args$nSlaves, subdir="../IPO_results") #some images generated by IPO | 92 resultRetcorGroup <- optimizeRetGroup(xset, retcorGroupParametersUser, nSlaves = args$nSlaves, subdir = "../IPO_results") #some images generated by IPO |
88 | 93 |
89 # export | 94 # export |
90 resultRetcorGroup_best_settings_parameters = resultRetcorGroup$best_settings | 95 resultRetcorGroup_best_params <- resultRetcorGroup$best_settings |
91 write.table(t(as.data.frame(resultRetcorGroup_best_settings_parameters)), file=parametersOutput, sep="\t", row.names=T, col.names=F, quote=F) #can be read by user | 96 write.table(t(as.data.frame(resultRetcorGroup_best_params)), file = parametersOutput, sep = "\t", row.names = TRUE, col.names = FALSE, quote = FALSE) #can be read by user |
92 } | 97 } |
93 | 98 |
94 | 99 |
95 | 100 |
96 # This function check if XML contains special caracters. It also checks integrity and completness. | 101 # This function check if XML contains special caracters. It also checks integrity and completness. |
97 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM | 102 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM |
98 checkXmlStructure <- function (directory) { | 103 checkXmlStructure <- function(directory) { |
99 cat("Checking XML structure...\n") | 104 cat("Checking XML structure...\n") |
100 | 105 |
101 cmd <- paste0("IFS=$'\n'; for xml in $(find '",directory,"' -not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") | 106 cmd <- paste0("IFS=$'\n'; for xml in $(find '", directory, "' -not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") |
102 capture <- system(cmd, intern=TRUE) | 107 capture <- system(cmd, intern = TRUE) |
103 | 108 |
104 if (length(capture)>0){ | 109 if (length(capture) > 0) { |
105 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) | |
106 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) | 110 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) |
107 write(capture, stderr()) | 111 write(capture, stderr()) |
108 stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files") | 112 stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files") |
109 } | 113 } |
110 | 114 |
111 } | 115 } |
112 | 116 |
113 | 117 |
114 # This function get the raw file path from the arguments | 118 # This function get the raw file path from the arguments |
115 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | 119 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr |
116 getRawfilePathFromArguments <- function(singlefile, zipfile, args, prefix="") { | 120 getRawfilePathFromArguments <- function(singlefile, zipfile, args, prefix = "") { |
117 if (!(prefix %in% c("","Positive","Negative","MS1","MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'") | 121 if (!(prefix %in% c("", "Positive", "Negative", "MS1", "MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'") |
118 | 122 |
119 if (!is.null(args[[paste0("zipfile",prefix)]])) zipfile <- args[[paste0("zipfile",prefix)]] | 123 if (!is.null(args[[paste0("zipfile", prefix)]])) zipfile <- args[[paste0("zipfile", prefix)]] |
120 | 124 |
121 if (!is.null(args[[paste0("singlefile_galaxyPath",prefix)]])) { | 125 if (!is.null(args[[paste0("singlefile_galaxyPath", prefix)]])) { |
122 singlefile_galaxyPaths <- args[[paste0("singlefile_galaxyPath",prefix)]] | 126 singlefile_galaxyPaths <- args[[paste0("singlefile_galaxyPath", prefix)]] |
123 singlefile_sampleNames <- args[[paste0("singlefile_sampleName",prefix)]] | 127 singlefile_sampleNames <- args[[paste0("singlefile_sampleName", prefix)]] |
124 } | 128 } |
125 if (exists("singlefile_galaxyPaths")){ | 129 if (exists("singlefile_galaxyPaths")) { |
126 singlefile_galaxyPaths <- unlist(strsplit(singlefile_galaxyPaths,"\\|")) | 130 singlefile_galaxyPaths <- unlist(strsplit(singlefile_galaxyPaths, "\\|")) |
127 singlefile_sampleNames <- unlist(strsplit(singlefile_sampleNames,"\\|")) | 131 singlefile_sampleNames <- unlist(strsplit(singlefile_sampleNames, "\\|")) |
128 | 132 |
129 singlefile <- NULL | 133 singlefile <- NULL |
130 for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) { | 134 for (singlefile_galaxyPath_i in seq_along(length(singlefile_galaxyPaths))) { |
131 singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i] | 135 singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i] |
132 singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i] | 136 singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i] |
133 # In case, an url is used to import data within Galaxy | 137 # In case, an url is used to import data within Galaxy |
134 singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName,"/")), n=1) | 138 singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName, "/")), n = 1) |
135 singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath | 139 singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath |
136 } | 140 } |
137 } | 141 } |
138 return(list(zipfile=zipfile, singlefile=singlefile)) | 142 return(list(zipfile = zipfile, singlefile = singlefile)) |
139 } | 143 } |
140 | 144 |
141 # This function retrieve the raw file in the working directory | 145 # This function retrieve the raw file in the working directory |
142 # - if zipfile: unzip the file with its directory tree | 146 # - if zipfile: unzip the file with its directory tree |
143 # - if singlefiles: set symlink with the good filename | 147 # - if singlefiles: set symlink with the good filename |
144 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | 148 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr |
145 retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile) { | 149 retrieveRawfileInWD <- function(singlefile, zipfile) { |
146 if(!is.null(singlefile) && (length("singlefile")>0)) { | 150 if (!is.null(singlefile) && (length("singlefile") > 0)) { |
147 for (singlefile_sampleName in names(singlefile)) { | 151 for (singlefile_sampleName in names(singlefile)) { |
148 singlefile_galaxyPath <- singlefile[[singlefile_sampleName]] | 152 singlefile_galaxyPath <- singlefile[[singlefile_sampleName]] |
149 if(!file.exists(singlefile_galaxyPath)){ | 153 if (!file.exists(singlefile_galaxyPath)) { |
150 error_message <- paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!") | 154 error_message <- paste("Cannot access the sample:", singlefile_sampleName, "located:", singlefile_galaxyPath, ". Please, contact your administrator ... if you have one!") |
151 print(error_message); stop(error_message) | 155 print(error_message) |
152 } | 156 stop(error_message) |
153 | 157 } |
154 if (!suppressWarnings( try (file.link(singlefile_galaxyPath, singlefile_sampleName), silent=T))) | 158 |
155 file.copy(singlefile_galaxyPath, singlefile_sampleName) | 159 if (!suppressWarnings(try(file.link(singlefile_galaxyPath, singlefile_sampleName), silent = TRUE))) |
156 | 160 file.copy(singlefile_galaxyPath, singlefile_sampleName) |
157 } | 161 |
158 directory <- "." | 162 } |
159 | 163 directory <- "." |
160 } | 164 |
161 if(!is.null(zipfile) && (zipfile != "")) { | 165 } |
162 if(!file.exists(zipfile)){ | 166 if (!is.null(zipfile) && (zipfile != "")) { |
163 error_message <- paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!") | 167 if (!file.exists(zipfile)) { |
164 print(error_message) | 168 error_message <- paste( |
165 stop(error_message) | 169 "Cannot access the Zip file:", |
166 } | 170 zipfile, |
167 | 171 ". Please, contact your administrator ... if you have one!" |
168 #list all file in the zip file | 172 ) |
169 #zip_files <- unzip(zipfile,list=T)[,"Name"] | 173 print(error_message) |
170 | 174 stop(error_message) |
171 #unzip | 175 } |
172 suppressWarnings(unzip(zipfile, unzip="unzip")) | 176 |
173 | 177 #unzip |
174 #get the directory name | 178 suppressWarnings(unzip(zipfile, unzip = "unzip")) |
175 suppressWarnings(filesInZip <- unzip(zipfile, list=T)) | 179 |
176 directories <- unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))) | 180 #get the directory name |
177 directories <- directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] | 181 suppressWarnings(filesInZip <- unzip(zipfile, list = TRUE)) |
178 directory <- "." | 182 directories <- unique(unlist(lapply(strsplit(filesInZip$Name, "/"), function(x) x[1]))) |
179 if (length(directories) == 1) directory <- directories | 183 directories <- directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] |
180 | 184 directory <- "." |
181 cat("files_root_directory\t",directory,"\n") | 185 if (length(directories) == 1) directory <- directories |
182 | 186 |
183 } | 187 cat("files_root_directory\t", directory, "\n") |
184 return (directory) | 188 |
189 } | |
190 return(directory) | |
185 } | 191 } |
186 | 192 |
187 | 193 |
188 # This function retrieve a xset like object | 194 # This function retrieve a xset like object |
189 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | 195 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr |
190 getxcmsSetObject <- function(xobject) { | 196 getxcmsSetObject <- function(xobject) { |
191 # XCMS 1.x | 197 # XCMS 1.x |
192 if (class(xobject) == "xcmsSet") | 198 if (class(xobject) == "xcmsSet") |
193 return (xobject) | 199 return(xobject) |
194 # XCMS 3.x | 200 # XCMS 3.x |
195 if (class(xobject) == "XCMSnExp") { | 201 if (class(xobject) == "XCMSnExp") { |
196 # Get the legacy xcmsSet object | 202 # Get the legacy xcmsSet object |
197 suppressWarnings(xset <- as(xobject, 'xcmsSet')) | 203 suppressWarnings(xset <- as(xobject, "xcmsSet")) |
198 if (!is.null(xset@phenoData$sample_group)) | 204 if (!is.null(xset@phenoData$sample_group)) |
199 sampclass(xset) <- xset@phenoData$sample_group | 205 sampclass(xset) <- xset@phenoData$sample_group |
200 else | 206 else |
201 sampclass(xset) <- "." | 207 sampclass(xset) <- "." |
202 return (xset) | 208 return(xset) |
203 } | 209 } |
204 } | 210 } |