comparison createDatabase.R @ 6:d4a17be5429a draft

"planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 2579c8746819670348c378f86116f83703c493eb"
author computational-metabolomics
date Thu, 04 Mar 2021 12:34:06 +0000
parents 0fe389e3f871
children
comparison
equal deleted inserted replaced
5:2f7cd31eba49 6:d4a17be5429a
1 library(msPurity) 1 library(msPurity)
2 library(optparse) 2 library(optparse)
3 library(xcms) 3 library(xcms)
4 library(CAMERA) 4 library(CAMERA)
5 print(sessionInfo()) 5 print(sessionInfo())
6 print('CREATING DATABASE') 6 print("CREATING DATABASE")
7 7
8 xset_pa_filename_fix <- function(opt, pa, xset){ 8 xset_pa_filename_fix <- function(opt, pa, xset) {
9 9
10 if (!is.null(opt$mzML_files) && !is.null(opt$galaxy_names)){ 10 if (!is.null(opt$mzML_files) && !is.null(opt$galaxy_names)) {
11 # NOTE: Relies on the pa@fileList having the names of files given as 'names' of the variables 11 # NOTE: Relies on the pa@fileList having the names of files given as 'names' of the variables
12 # needs to be done due to Galaxy moving the files around and screwing up any links to files 12 # needs to be done due to Galaxy moving the files around and screwing up any links to files
13 13
14 filepaths <- trimws(strsplit(opt$mzML_files, ',')[[1]]) 14 filepaths <- trimws(strsplit(opt$mzML_files, ",")[[1]])
15 filepaths <- filepaths[filepaths != ""] 15 filepaths <- filepaths[filepaths != ""]
16 new_names <- basename(filepaths)
17 16
18 galaxy_names <- trimws(strsplit(opt$galaxy_names, ',')[[1]]) 17 galaxy_names <- trimws(strsplit(opt$galaxy_names, ",")[[1]])
19 galaxy_names <- galaxy_names[galaxy_names != ""] 18 galaxy_names <- galaxy_names[galaxy_names != ""]
20 19
21 nsave <- names(pa@fileList) 20 nsave <- names(pa@fileList)
22 old_filenames <- basename(pa@fileList) 21 old_filenames <- basename(pa@fileList)
23 pa@fileList <- filepaths[match(names(pa@fileList), galaxy_names)] 22 pa@fileList <- filepaths[match(names(pa@fileList), galaxy_names)]
26 pa@puritydf$filename <- basename(pa@fileList[match(pa@puritydf$filename, old_filenames)]) 25 pa@puritydf$filename <- basename(pa@fileList[match(pa@puritydf$filename, old_filenames)])
27 pa@grped_df$filename <- basename(pa@fileList[match(pa@grped_df$filename, old_filenames)]) 26 pa@grped_df$filename <- basename(pa@fileList[match(pa@grped_df$filename, old_filenames)])
28 } 27 }
29 28
30 29
31 if(!all(basename(pa@fileList)==basename(xset@filepaths))){ 30 if (!all(basename(pa@fileList) == basename(xset@filepaths))) {
32 if(!all(names(pa@fileList)==basename(xset@filepaths))){ 31 if (!all(names(pa@fileList) == basename(xset@filepaths))) {
33 print('FILELISTS DO NOT MATCH') 32 print("FILELISTS DO NOT MATCH")
34 message('FILELISTS DO NOT MATCH') 33 message("FILELISTS DO NOT MATCH")
35 quit(status = 1) 34 quit(status = 1)
36 }else{ 35 }else{
37 xset@filepaths <- unname(pa@fileList) 36 xset@filepaths <- unname(pa@fileList)
38 } 37 }
39 } 38 }
46 45
47 46
48 47
49 48
50 option_list <- list( 49 option_list <- list(
51 make_option(c("-o", "--outDir"), type="character"), 50 make_option(c("-o", "--outDir"), type = "character"),
52 make_option("--pa", type="character"), 51 make_option("--pa", type = "character"),
53 make_option("--xset_xa", type="character"), 52 make_option("--xset_xa", type = "character"),
54 make_option("--xcms_camera_option", type="character"), 53 make_option("--xcms_camera_option", type = "character"),
55 make_option("--eic", action="store_true"), 54 make_option("--eic", action = "store_true"),
56 make_option("--cores", default=4), 55 make_option("--cores", default = 4),
57 make_option("--mzML_files", type="character"), 56 make_option("--mzML_files", type = "character"),
58 make_option("--galaxy_names", type="character"), 57 make_option("--galaxy_names", type = "character"),
59 make_option("--grpPeaklist", type="character") 58 make_option("--grpPeaklist", type = "character")
60 ) 59 )
61 60
62 61
63 # store options 62 # store options
64 opt<- parse_args(OptionParser(option_list=option_list)) 63 opt <- parse_args(OptionParser(option_list = option_list))
65 print(opt) 64 print(opt)
66 65
67 loadRData <- function(rdata_path, name){ 66 loadRData <- function(rdata_path, name) {
68 #loads an RData file, and returns the named xset object if it is there 67 #loads an RData file, and returns the named xset object if it is there
69 load(rdata_path) 68 load(rdata_path)
70 return(get(ls()[ls() %in% name])) 69 return(get(ls()[ls() %in% name]))
71 } 70 }
72 71
73 getxcmsSetObject <- function(xobject) { 72 getxcmsSetObject <- function(xobject) {
74 # XCMS 1.x 73 # XCMS 1.x
75 if (class(xobject) == "xcmsSet") 74 if (class(xobject) == "xcmsSet")
76 return (xobject) 75 return(xobject)
77 # XCMS 3.x 76 # XCMS 3.x
78 if (class(xobject) == "XCMSnExp") { 77 if (class(xobject) == "XCMSnExp") {
79 # Get the legacy xcmsSet object 78 # Get the legacy xcmsSet object
80 suppressWarnings(xset <- as(xobject, 'xcmsSet')) 79 suppressWarnings(xset <- as(xobject, "xcmsSet"))
81 sampclass(xset) <- xset@phenoData$sample_group 80 xcms::sampclass(xset) <- xset@phenoData$sample_group
82 return (xset) 81 return(xset)
83 } 82 }
84 } 83 }
85 84
86 85
87 print(paste('pa', opt$pa)) 86 print(paste("pa", opt$pa))
88 print(opt$xset) 87 print(opt$xset)
89 88
90 print(opt$xcms_camera_option) 89 print(opt$xcms_camera_option)
91 # Requires 90 # Requires
92 pa <- loadRData(opt$pa, 'pa') 91 pa <- loadRData(opt$pa, "pa")
93 92
94 93
95 print(pa@fileList) 94 print(pa@fileList)
96 95
97 # Missing list element causes failures (should be updated 96 # Missing list element causes failures (should be updated
98 # in msPurity R package for future releases) 97 # in msPurity R package for future releases)
99 if (!exists('allfrag', where=pa@filter_frag_params)){ 98 if (!exists("allfrag", where = pa@filter_frag_params)) {
100 pa@filter_frag_params$allfrag <- FALSE 99 pa@filter_frag_params$allfrag <- FALSE
101 } 100 }
102 101
103 if (opt$xcms_camera_option=='xcms'){ 102 if (opt$xcms_camera_option == "xcms") {
104 103
105 xset <- loadRData(opt$xset, c('xset','xdata')) 104 xset <- loadRData(opt$xset, c("xset", "xdata"))
106 xset <- getxcmsSetObject(xset) 105 xset <- getxcmsSetObject(xset)
107 fix <- xset_pa_filename_fix(opt, pa, xset) 106 fix <- xset_pa_filename_fix(opt, pa, xset)
108 pa <- fix[[1]] 107 pa <- fix[[1]]
109 xset <- fix[[2]] 108 xset <- fix[[2]]
110 xa <- NULL 109 xa <- NULL
111 }else{ 110 }else{
112 111
113 xa <- loadRData(opt$xset, 'xa') 112 xa <- loadRData(opt$xset, "xa")
114 fix <- xset_pa_filename_fix(opt, pa, xa@xcmsSet) 113 fix <- xset_pa_filename_fix(opt, pa, xa@xcmsSet)
115 pa <- fix[[1]] 114 pa <- fix[[1]]
116 xa@xcmsSet <- fix[[2]] 115 xa@xcmsSet <- fix[[2]]
117 xset <- NULL 116 xset <- NULL
118 } 117 }
119 118
120 119
121 120 if (is.null(opt$grpPeaklist)) {
122 if(is.null(opt$grpPeaklist)){ 121 grpPeaklist <- NA
123 grpPeaklist = NA
124 }else{ 122 }else{
125 grpPeaklist = opt$grpPeaklist 123 grpPeaklist <- opt$grpPeaklist
126 } 124 }
127 125
128
129
130 dbPth <- msPurity::createDatabase(pa, 126 dbPth <- msPurity::createDatabase(pa,
131 xset=xset, 127 xset = xset,
132 xsa=xa, 128 xsa = xa,
133 outDir=opt$outDir, 129 outDir = opt$outDir,
134 grpPeaklist=grpPeaklist, 130 grpPeaklist = grpPeaklist,
135 dbName='createDatabase_output.sqlite' 131 dbName = "createDatabase_output.sqlite"
136 ) 132 )
137 133
138 134
139 135
140 136
141 137
142 if (!is.null(opt$eic)){ 138 if (!is.null(opt$eic)) {
143 139
144 if (is.null(xset)){ 140 if (is.null(xset)) {
145 xset <- xa@xcmsSet 141 xset <- xa@xcmsSet
146 } 142 }
147 # previous check should have matched filelists together 143 # previous check should have matched filelists together
148 xset@filepaths <- unname(pa@fileList) 144 xset@filepaths <- unname(pa@fileList)
149 145
150 convert2Raw <- function(x, xset){ 146 convert2Raw <- function(x, xset) {
151 sid <- unique(x$sample) 147 sid <- unique(x$sample)
152 # for each file get list of peaks 148 # for each file get list of peaks
153 x$rt_raw <- xset@rt$raw[[sid]][match(x$rt, xset@rt$corrected[[sid]])] 149 x$rt_raw <- xset@rt$raw[[sid]][match(x$rt, xset@rt$corrected[[sid]])]
154 x$rtmin_raw <- xset@rt$raw[[sid]][match(x$rtmin, xset@rt$corrected[[sid]])] 150 x$rtmin_raw <- xset@rt$raw[[sid]][match(x$rtmin, xset@rt$corrected[[sid]])]
155 x$rtmax_raw <- xset@rt$raw[[sid]][match(x$rtmax, xset@rt$corrected[[sid]])] 151 x$rtmax_raw <- xset@rt$raw[[sid]][match(x$rtmax, xset@rt$corrected[[sid]])]
156 return(x) 152 return(x)
157 153
158 } 154 }
159 155
160 xset@peaks <- as.matrix(plyr::ddply(data.frame(xset@peaks), ~ sample, convert2Raw, xset=xset)) 156 xset@peaks <- as.matrix(
157 plyr::ddply(data.frame(xset@peaks), ~ sample, convert2Raw, xset = xset))
161 158
162 # Saves the EICS into the previously created database 159 # Saves the EICS into the previously created database
163 px <- msPurity::purityX(xset, 160 px <- msPurity::purityX(xset,
164 saveEIC = TRUE, 161 saveEIC = TRUE,
165 cores=1, 162 cores = 1,
166 sqlitePth=dbPth, 163 sqlitePth = dbPth,
167 rtrawColumns = TRUE) 164 rtrawColumns = TRUE)
168 165
169 } 166 }
170 167
171 closeAllConnections() 168 closeAllConnections()