Mercurial > repos > computational-metabolomics > mspurity_createdatabase
comparison createDatabase.R @ 6:2f71b3495221 draft
"planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 2579c8746819670348c378f86116f83703c493eb"
| author | computational-metabolomics |
|---|---|
| date | Thu, 04 Mar 2021 12:27:21 +0000 |
| parents | 3d36a391612c |
| children | efd14b326007 |
comparison
equal
deleted
inserted
replaced
| 5:3ec6fd8e4c17 | 6:2f71b3495221 |
|---|---|
| 1 library(msPurity) | 1 library(msPurity) |
| 2 library(optparse) | 2 library(optparse) |
| 3 library(xcms) | 3 library(xcms) |
| 4 library(CAMERA) | 4 library(CAMERA) |
| 5 print(sessionInfo()) | 5 print(sessionInfo()) |
| 6 print('CREATING DATABASE') | 6 print("CREATING DATABASE") |
| 7 | 7 |
| 8 xset_pa_filename_fix <- function(opt, pa, xset){ | 8 xset_pa_filename_fix <- function(opt, pa, xset) { |
| 9 | 9 |
| 10 if (!is.null(opt$mzML_files) && !is.null(opt$galaxy_names)){ | 10 if (!is.null(opt$mzML_files) && !is.null(opt$galaxy_names)) { |
| 11 # NOTE: Relies on the pa@fileList having the names of files given as 'names' of the variables | 11 # NOTE: Relies on the pa@fileList having the names of files given as 'names' of the variables |
| 12 # needs to be done due to Galaxy moving the files around and screwing up any links to files | 12 # needs to be done due to Galaxy moving the files around and screwing up any links to files |
| 13 | 13 |
| 14 filepaths <- trimws(strsplit(opt$mzML_files, ',')[[1]]) | 14 filepaths <- trimws(strsplit(opt$mzML_files, ",")[[1]]) |
| 15 filepaths <- filepaths[filepaths != ""] | 15 filepaths <- filepaths[filepaths != ""] |
| 16 new_names <- basename(filepaths) | |
| 17 | 16 |
| 18 galaxy_names <- trimws(strsplit(opt$galaxy_names, ',')[[1]]) | 17 galaxy_names <- trimws(strsplit(opt$galaxy_names, ",")[[1]]) |
| 19 galaxy_names <- galaxy_names[galaxy_names != ""] | 18 galaxy_names <- galaxy_names[galaxy_names != ""] |
| 20 | 19 |
| 21 nsave <- names(pa@fileList) | 20 nsave <- names(pa@fileList) |
| 22 old_filenames <- basename(pa@fileList) | 21 old_filenames <- basename(pa@fileList) |
| 23 pa@fileList <- filepaths[match(names(pa@fileList), galaxy_names)] | 22 pa@fileList <- filepaths[match(names(pa@fileList), galaxy_names)] |
| 26 pa@puritydf$filename <- basename(pa@fileList[match(pa@puritydf$filename, old_filenames)]) | 25 pa@puritydf$filename <- basename(pa@fileList[match(pa@puritydf$filename, old_filenames)]) |
| 27 pa@grped_df$filename <- basename(pa@fileList[match(pa@grped_df$filename, old_filenames)]) | 26 pa@grped_df$filename <- basename(pa@fileList[match(pa@grped_df$filename, old_filenames)]) |
| 28 } | 27 } |
| 29 | 28 |
| 30 | 29 |
| 31 if(!all(basename(pa@fileList)==basename(xset@filepaths))){ | 30 if (!all(basename(pa@fileList) == basename(xset@filepaths))) { |
| 32 if(!all(names(pa@fileList)==basename(xset@filepaths))){ | 31 if (!all(names(pa@fileList) == basename(xset@filepaths))) { |
| 33 print('FILELISTS DO NOT MATCH') | 32 print("FILELISTS DO NOT MATCH") |
| 34 message('FILELISTS DO NOT MATCH') | 33 message("FILELISTS DO NOT MATCH") |
| 35 quit(status = 1) | 34 quit(status = 1) |
| 36 }else{ | 35 }else{ |
| 37 xset@filepaths <- unname(pa@fileList) | 36 xset@filepaths <- unname(pa@fileList) |
| 38 } | 37 } |
| 39 } | 38 } |
| 46 | 45 |
| 47 | 46 |
| 48 | 47 |
| 49 | 48 |
| 50 option_list <- list( | 49 option_list <- list( |
| 51 make_option(c("-o", "--outDir"), type="character"), | 50 make_option(c("-o", "--outDir"), type = "character"), |
| 52 make_option("--pa", type="character"), | 51 make_option("--pa", type = "character"), |
| 53 make_option("--xset_xa", type="character"), | 52 make_option("--xset_xa", type = "character"), |
| 54 make_option("--xcms_camera_option", type="character"), | 53 make_option("--xcms_camera_option", type = "character"), |
| 55 make_option("--eic", action="store_true"), | 54 make_option("--eic", action = "store_true"), |
| 56 make_option("--cores", default=4), | 55 make_option("--cores", default = 4), |
| 57 make_option("--mzML_files", type="character"), | 56 make_option("--mzML_files", type = "character"), |
| 58 make_option("--galaxy_names", type="character"), | 57 make_option("--galaxy_names", type = "character"), |
| 59 make_option("--grpPeaklist", type="character") | 58 make_option("--grpPeaklist", type = "character") |
| 60 ) | 59 ) |
| 61 | 60 |
| 62 | 61 |
| 63 # store options | 62 # store options |
| 64 opt<- parse_args(OptionParser(option_list=option_list)) | 63 opt <- parse_args(OptionParser(option_list = option_list)) |
| 65 print(opt) | 64 print(opt) |
| 66 | 65 |
| 67 loadRData <- function(rdata_path, name){ | 66 loadRData <- function(rdata_path, name) { |
| 68 #loads an RData file, and returns the named xset object if it is there | 67 #loads an RData file, and returns the named xset object if it is there |
| 69 load(rdata_path) | 68 load(rdata_path) |
| 70 return(get(ls()[ls() %in% name])) | 69 return(get(ls()[ls() %in% name])) |
| 71 } | 70 } |
| 72 | 71 |
| 73 getxcmsSetObject <- function(xobject) { | 72 getxcmsSetObject <- function(xobject) { |
| 74 # XCMS 1.x | 73 # XCMS 1.x |
| 75 if (class(xobject) == "xcmsSet") | 74 if (class(xobject) == "xcmsSet") |
| 76 return (xobject) | 75 return(xobject) |
| 77 # XCMS 3.x | 76 # XCMS 3.x |
| 78 if (class(xobject) == "XCMSnExp") { | 77 if (class(xobject) == "XCMSnExp") { |
| 79 # Get the legacy xcmsSet object | 78 # Get the legacy xcmsSet object |
| 80 suppressWarnings(xset <- as(xobject, 'xcmsSet')) | 79 suppressWarnings(xset <- as(xobject, "xcmsSet")) |
| 81 sampclass(xset) <- xset@phenoData$sample_group | 80 xcms::sampclass(xset) <- xset@phenoData$sample_group |
| 82 return (xset) | 81 return(xset) |
| 83 } | 82 } |
| 84 } | 83 } |
| 85 | 84 |
| 86 | 85 |
| 87 print(paste('pa', opt$pa)) | 86 print(paste("pa", opt$pa)) |
| 88 print(opt$xset) | 87 print(opt$xset) |
| 89 | 88 |
| 90 print(opt$xcms_camera_option) | 89 print(opt$xcms_camera_option) |
| 91 # Requires | 90 # Requires |
| 92 pa <- loadRData(opt$pa, 'pa') | 91 pa <- loadRData(opt$pa, "pa") |
| 93 | 92 |
| 94 | 93 |
| 95 print(pa@fileList) | 94 print(pa@fileList) |
| 96 | 95 |
| 97 # Missing list element causes failures (should be updated | 96 # Missing list element causes failures (should be updated |
| 98 # in msPurity R package for future releases) | 97 # in msPurity R package for future releases) |
| 99 if (!exists('allfrag', where=pa@filter_frag_params)){ | 98 if (!exists("allfrag", where = pa@filter_frag_params)) { |
| 100 pa@filter_frag_params$allfrag <- FALSE | 99 pa@filter_frag_params$allfrag <- FALSE |
| 101 } | 100 } |
| 102 | 101 |
| 103 if (opt$xcms_camera_option=='xcms'){ | 102 if (opt$xcms_camera_option == "xcms") { |
| 104 | 103 |
| 105 xset <- loadRData(opt$xset, c('xset','xdata')) | 104 xset <- loadRData(opt$xset, c("xset", "xdata")) |
| 106 xset <- getxcmsSetObject(xset) | 105 xset <- getxcmsSetObject(xset) |
| 107 fix <- xset_pa_filename_fix(opt, pa, xset) | 106 fix <- xset_pa_filename_fix(opt, pa, xset) |
| 108 pa <- fix[[1]] | 107 pa <- fix[[1]] |
| 109 xset <- fix[[2]] | 108 xset <- fix[[2]] |
| 110 xa <- NULL | 109 xa <- NULL |
| 111 }else{ | 110 }else{ |
| 112 | 111 |
| 113 xa <- loadRData(opt$xset, 'xa') | 112 xa <- loadRData(opt$xset, "xa") |
| 114 fix <- xset_pa_filename_fix(opt, pa, xa@xcmsSet) | 113 fix <- xset_pa_filename_fix(opt, pa, xa@xcmsSet) |
| 115 pa <- fix[[1]] | 114 pa <- fix[[1]] |
| 116 xa@xcmsSet <- fix[[2]] | 115 xa@xcmsSet <- fix[[2]] |
| 117 xset <- NULL | 116 xset <- NULL |
| 118 } | 117 } |
| 119 | 118 |
| 120 | 119 |
| 121 | 120 if (is.null(opt$grpPeaklist)) { |
| 122 if(is.null(opt$grpPeaklist)){ | 121 grpPeaklist <- NA |
| 123 grpPeaklist = NA | |
| 124 }else{ | 122 }else{ |
| 125 grpPeaklist = opt$grpPeaklist | 123 grpPeaklist <- opt$grpPeaklist |
| 126 } | 124 } |
| 127 | 125 |
| 128 | |
| 129 | |
| 130 dbPth <- msPurity::createDatabase(pa, | 126 dbPth <- msPurity::createDatabase(pa, |
| 131 xset=xset, | 127 xset = xset, |
| 132 xsa=xa, | 128 xsa = xa, |
| 133 outDir=opt$outDir, | 129 outDir = opt$outDir, |
| 134 grpPeaklist=grpPeaklist, | 130 grpPeaklist = grpPeaklist, |
| 135 dbName='createDatabase_output.sqlite' | 131 dbName = "createDatabase_output.sqlite" |
| 136 ) | 132 ) |
| 137 | 133 |
| 138 | 134 |
| 139 | 135 |
| 140 | 136 |
| 141 | 137 |
| 142 if (!is.null(opt$eic)){ | 138 if (!is.null(opt$eic)) { |
| 143 | 139 |
| 144 if (is.null(xset)){ | 140 if (is.null(xset)) { |
| 145 xset <- xa@xcmsSet | 141 xset <- xa@xcmsSet |
| 146 } | 142 } |
| 147 # previous check should have matched filelists together | 143 # previous check should have matched filelists together |
| 148 xset@filepaths <- unname(pa@fileList) | 144 xset@filepaths <- unname(pa@fileList) |
| 149 | 145 |
| 150 convert2Raw <- function(x, xset){ | 146 convert2Raw <- function(x, xset) { |
| 151 sid <- unique(x$sample) | 147 sid <- unique(x$sample) |
| 152 # for each file get list of peaks | 148 # for each file get list of peaks |
| 153 x$rt_raw <- xset@rt$raw[[sid]][match(x$rt, xset@rt$corrected[[sid]])] | 149 x$rt_raw <- xset@rt$raw[[sid]][match(x$rt, xset@rt$corrected[[sid]])] |
| 154 x$rtmin_raw <- xset@rt$raw[[sid]][match(x$rtmin, xset@rt$corrected[[sid]])] | 150 x$rtmin_raw <- xset@rt$raw[[sid]][match(x$rtmin, xset@rt$corrected[[sid]])] |
| 155 x$rtmax_raw <- xset@rt$raw[[sid]][match(x$rtmax, xset@rt$corrected[[sid]])] | 151 x$rtmax_raw <- xset@rt$raw[[sid]][match(x$rtmax, xset@rt$corrected[[sid]])] |
| 156 return(x) | 152 return(x) |
| 157 | 153 |
| 158 } | 154 } |
| 159 | 155 |
| 160 xset@peaks <- as.matrix(plyr::ddply(data.frame(xset@peaks), ~ sample, convert2Raw, xset=xset)) | 156 xset@peaks <- as.matrix( |
| 157 plyr::ddply(data.frame(xset@peaks), ~ sample, convert2Raw, xset = xset)) | |
| 161 | 158 |
| 162 # Saves the EICS into the previously created database | 159 # Saves the EICS into the previously created database |
| 163 px <- msPurity::purityX(xset, | 160 px <- msPurity::purityX(xset, |
| 164 saveEIC = TRUE, | 161 saveEIC = TRUE, |
| 165 cores=1, | 162 cores = 1, |
| 166 sqlitePth=dbPth, | 163 sqlitePth = dbPth, |
| 167 rtrawColumns = TRUE) | 164 rtrawColumns = TRUE) |
| 168 | 165 |
| 169 } | 166 } |
| 170 | 167 |
| 171 closeAllConnections() | 168 closeAllConnections() |
