annotate lib.r @ 0:ac5f2936575b draft

planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
author lecorguille
date Thu, 03 Aug 2017 06:00:00 -0400
parents
children ae8de756dfcf
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
1 ##
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
2 ## This function launch IPO functions to get the best parameters for xcmsSet
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
3 ## A sample among the whole dataset is used to save time
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
4 ##
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
5 ipo4xcmsSet = function(directory, parametersOutput, listArguments, samplebyclass=4) {
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
6 setwd(directory)
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
7
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
8 files = list.files(".", recursive=T) # "KO/ko15.CDF" "KO/ko16.CDF" "WT/wt15.CDF" "WT/wt16.CDF"
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
9 files_classes = basename(dirname(files)) # "KO", "KO", "WT", "WT"
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
10
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
11 mzmlfile = files
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
12 if (samplebyclass > 0) {
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
13 #random selection of N files for IPO in each class
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
14 classes<-unique(basename(dirname(files)))
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
15 mzmlfile = NULL
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
16 for (class_i in classes){
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
17 files_class_i = files[files_classes==class_i]
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
18 if (samplebyclass > length(files_class_i)) {
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
19 mzmlfile = c(mzmlfile, files_class_i)
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
20 } else {
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
21 mzmlfile = c(mzmlfile,sample(files_class_i,samplebyclass))
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
22 }
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
23 }
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
24 }
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
25 #@TODO: else, must we keep the RData to been use directly by group?
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
26
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
27 cat("\t\tSamples used:\n")
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
28 print(mzmlfile)
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
29
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
30 peakpickingParameters = getDefaultXcmsSetStartingParams(listArguments[["method"]]) #get default parameters of IPO
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
31
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
32 # filter listArguments to only get releavant parameters and complete with those that are not declared
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
33 peakpickingParametersUser = c(listArguments[names(listArguments) %in% names(peakpickingParameters)], peakpickingParameters[!(names(peakpickingParameters) %in% names(listArguments))])
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
34 peakpickingParametersUser$verbose.columns = TRUE
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
35
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
36 #peakpickingParametersUser$profparam <- list(step=0.005) #not yet used by IPO have to think of it for futur improvement
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
37 resultPeakpicking = optimizeXcmsSet(mzmlfile, peakpickingParametersUser, nSlaves=peakpickingParametersUser$nSlaves, subdir="../IPO_results") #some images generated by IPO
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
38
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
39 # export
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
40 resultPeakpicking_best_settings_parameters = resultPeakpicking$best_settings$parameters[!(names(resultPeakpicking$best_settings$parameters) %in% c("nSlaves","verbose.columns"))]
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
41 write.table(t(as.data.frame(resultPeakpicking_best_settings_parameters)), file=parametersOutput, sep="\t", row.names=T, col.names=F, quote=F) #can be read by user
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
42
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
43 return (resultPeakpicking$best_settings$xset)
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
44 }
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
45
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
46 ##
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
47 ## This function launch IPO functions to get the best parameters for group and retcor
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
48 ##
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
49 ipo4retgroup = function(xset, directory, parametersOutput, listArguments, samplebyclass=4) {
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
50 setwd(directory)
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
51
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
52 files = list.files(".", recursive=T) # "KO/ko15.CDF" "KO/ko16.CDF" "WT/wt15.CDF" "WT/wt16.CDF"
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
53 files_classes = basename(dirname(files)) # "KO", "KO", "WT", "WT"
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
54
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
55 retcorGroupParameters = getDefaultRetGroupStartingParams(listArguments[["retcorMethod"]]) #get default parameters of IPO
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
56 print(retcorGroupParameters)
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
57 # filter listArguments to only get releavant parameters and complete with those that are not declared
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
58 retcorGroupParametersUser = c(listArguments[names(listArguments) %in% names(retcorGroupParameters)], retcorGroupParameters[!(names(retcorGroupParameters) %in% names(listArguments))])
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
59 print("retcorGroupParametersUser")
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
60 print(retcorGroupParametersUser)
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
61 resultRetcorGroup = optimizeRetGroup(xset, retcorGroupParametersUser, nSlaves=listArguments[["nSlaves"]], subdir="../IPO_results") #some images generated by IPO
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
62
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
63 # export
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
64 resultRetcorGroup_best_settings_parameters = resultRetcorGroup$best_settings
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
65 write.table(t(as.data.frame(resultRetcorGroup_best_settings_parameters)), file=parametersOutput, sep="\t", row.names=T, col.names=F, quote=F) #can be read by user
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
66 }
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
67
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
68
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
69
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
70
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
71 ##
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
72 ## This function check if xcms will found all the files
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
73 ##
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
74 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
75 checkFilesCompatibilityWithXcms <- function(directory) {
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
76 cat("Checking files filenames compatibilities with xmcs...\n")
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
77 # WHAT XCMS WILL FIND
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
78 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
79 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|")
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
80 info <- file.info(directory)
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
81 listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE)
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
82 files <- c(directory[!info$isdir], listed)
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
83 files_abs <- file.path(getwd(), files)
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
84 exists <- file.exists(files_abs)
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
85 files[exists] <- files_abs[exists]
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
86 files[exists] <- sub("//","/",files[exists])
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
87
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
88 # WHAT IS ON THE FILESYSTEM
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
89 filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T)
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
90 filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)]
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
91
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
92 # COMPARISON
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
93 if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) {
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
94 write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr())
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
95 write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr())
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
96 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.")
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
97
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
98 }
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
99 }
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
100
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
101
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
102
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
103 ##
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
104 ## This function check if XML contains special caracters. It also checks integrity and completness.
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
105 ##
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
106 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
107 checkXmlStructure <- function (directory) {
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
108 cat("Checking XML structure...\n")
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
109
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
110 cmd=paste("IFS=$'\n'; for xml in $(find",directory,"-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;")
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
111 capture=system(cmd,intern=TRUE)
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
112
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
113 if (length(capture)>0){
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
114 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture)
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
115 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr())
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
116 write(capture, stderr())
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
117 stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files")
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
118 }
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
119
ac5f2936575b planemo upload commit 131562ad89c33a2f87754936ce3c8fe6899484c0
lecorguille
parents:
diff changeset
120 }