comparison dimsPredictPuritySingle.R @ 8:b16952cc06d2 draft

planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 7e1748612a9f9dce11a9e54ff36752b600e7aea3
author computational-metabolomics
date Wed, 12 Jun 2024 16:02:20 +0000
parents 6b9a83e08467
children
comparison
equal deleted inserted replaced
7:942e676515fe 8:b16952cc06d2
1 library(msPurity) 1 library(msPurity)
2 library(optparse) 2 library(optparse)
3 print(sessionInfo()) 3 print(sessionInfo())
4 4
5 option_list <- list( 5 option_list <- list(
6 make_option(c("--mzML_file"), type = "character"), 6 make_option(c("--mzML_file"), type = "character"),
7 make_option(c("--mzML_files"), type = "character"), 7 make_option(c("--mzML_files"), type = "character"),
8 make_option(c("--mzML_filename"), type = "character", default = ""), 8 make_option(c("--mzML_filename"), type = "character", default = ""),
9 make_option(c("--mzML_galaxy_names"), type = "character", default = ""), 9 make_option(c("--mzML_galaxy_names"), type = "character", default = ""),
10 make_option(c("--peaks_file"), type = "character"), 10 make_option(c("--peaks_file"), type = "character"),
11 make_option(c("-o", "--out_dir"), type = "character"), 11 make_option(c("-o", "--out_dir"), type = "character"),
12 make_option("--minoffset", default = 0.5), 12 make_option("--minoffset", default = 0.5),
13 make_option("--maxoffset", default = 0.5), 13 make_option("--maxoffset", default = 0.5),
14 make_option("--ilim", default = 0.05), 14 make_option("--ilim", default = 0.05),
15 make_option("--ppm", default = 4), 15 make_option("--ppm", default = 4),
16 make_option("--dimspy", action = "store_true"), 16 make_option("--dimspy", action = "store_true"),
17 make_option("--sim", action = "store_true"), 17 make_option("--sim", action = "store_true"),
18 make_option("--remove_nas", action = "store_true"), 18 make_option("--remove_nas", action = "store_true"),
19 make_option("--iwNorm", default = "none", type = "character"), 19 make_option("--iwNorm", default = "none", type = "character"),
20 make_option("--file_num_dimspy", default = 1), 20 make_option("--file_num_dimspy", default = 1),
21 make_option("--exclude_isotopes", action = "store_true"), 21 make_option("--exclude_isotopes", action = "store_true"),
22 make_option("--isotope_matrix", type = "character") 22 make_option("--isotope_matrix", type = "character")
23 ) 23 )
24 24
25 # store options 25 # store options
26 opt <- parse_args(OptionParser(option_list = option_list)) 26 opt <- parse_args(OptionParser(option_list = option_list))
27 27
41 mzML_filename <- trimws(mzML_filename) 41 mzML_filename <- trimws(mzML_filename)
42 mzML_files <- str_to_vec(mzML_files) 42 mzML_files <- str_to_vec(mzML_files)
43 galaxy_names <- str_to_vec(galaxy_names) 43 galaxy_names <- str_to_vec(galaxy_names)
44 if (mzML_filename %in% galaxy_names) { 44 if (mzML_filename %in% galaxy_names) {
45 return(mzML_files[galaxy_names == mzML_filename]) 45 return(mzML_files[galaxy_names == mzML_filename])
46 }else{ 46 } else {
47 stop(paste("mzML file not found - ", mzML_filename)) 47 stop(paste("mzML file not found - ", mzML_filename))
48 } 48 }
49 } 49 }
50 50
51 51
52 if (is.null(opt$dimspy)) { 52 if (is.null(opt$dimspy)) {
53 df <- read.table(opt$peaks_file, header = TRUE, sep = "\t") 53 df <- read.table(opt$peaks_file, header = TRUE, sep = "\t")
54 if (file.exists(opt$mzML_file)) { 54 if (file.exists(opt$mzML_file)) {
55 mzML_file <- opt$mzML_file 55 mzML_file <- opt$mzML_file
56 }else if (!is.null(opt$mzML_files)) { 56 } else if (!is.null(opt$mzML_files)) {
57 mzML_file <- find_mzml_file(opt$mzML_files, opt$mzML_galaxy_names, 57 mzML_file <- find_mzml_file(
58 opt$mzML_filename) 58 opt$mzML_files, opt$mzML_galaxy_names,
59 }else{ 59 opt$mzML_filename
60 )
61 } else {
60 mzML_file <- file.path(opt$mzML_file, filename) 62 mzML_file <- file.path(opt$mzML_file, filename)
61 } 63 }
62 }else{ 64 } else {
63 indf <- read.table(opt$peaks_file, 65 indf <- read.table(opt$peaks_file,
64 header = TRUE, sep = "\t", stringsAsFactors = FALSE) 66 header = TRUE, sep = "\t", stringsAsFactors = FALSE
67 )
65 68
66 filename <- colnames(indf)[8:ncol(indf)][opt$file_num_dimspy] 69 filename <- colnames(indf)[8:ncol(indf)][opt$file_num_dimspy]
67 print(filename) 70 print(filename)
68 # check if the data file is mzML or RAW (can only use mzML currently) so 71 # check if the data file is mzML or RAW (can only use mzML currently) so
69 # we expect an mzML file of the same name in the same folder 72 # we expect an mzML file of the same name in the same folder
73 filename <- sub("raw", "mzML", filename, ignore.case = TRUE) 76 filename <- sub("raw", "mzML", filename, ignore.case = TRUE)
74 print(filename) 77 print(filename)
75 78
76 if (file.exists(opt$mzML_file)) { 79 if (file.exists(opt$mzML_file)) {
77 mzML_file <- opt$mzML_file 80 mzML_file <- opt$mzML_file
78 }else if (!is.null(opt$mzML_files)) { 81 } else if (!is.null(opt$mzML_files)) {
79 mzML_file <- find_mzml_file(opt$mzML_files, opt$mzML_galaxy_names, filename) 82 mzML_file <- find_mzml_file(opt$mzML_files, opt$mzML_galaxy_names, filename)
80 }else{ 83 } else {
81 mzML_file <- file.path(opt$mzML_file, filename) 84 mzML_file <- file.path(opt$mzML_file, filename)
82 } 85 }
83 86
84 # Update the dimspy output with the correct information 87 # Update the dimspy output with the correct information
85 df <- indf[4:nrow(indf), ] 88 df <- indf[4:nrow(indf), ]
93 } 96 }
94 df$mz <- as.numeric(df$mz) 97 df$mz <- as.numeric(df$mz)
95 } 98 }
96 99
97 if (!is.null(opt$remove_nas)) { 100 if (!is.null(opt$remove_nas)) {
98 df <- df[!is.na(df$mz), ] 101 df <- df[!is.na(df$mz), ]
99 } 102 }
100 103
101 if (is.null(opt$isotope_matrix)) { 104 if (is.null(opt$isotope_matrix)) {
102 im <- NULL 105 im <- NULL
103 }else{ 106 } else {
104 im <- read.table(opt$isotope_matrix, 107 im <- read.table(opt$isotope_matrix,
105 header = TRUE, sep = "\t", stringsAsFactors = FALSE) 108 header = TRUE, sep = "\t", stringsAsFactors = FALSE
109 )
106 } 110 }
107 111
108 if (is.null(opt$exclude_isotopes)) { 112 if (is.null(opt$exclude_isotopes)) {
109 isotopes <- FALSE 113 isotopes <- FALSE
110 }else{ 114 } else {
111 isotopes <- TRUE 115 isotopes <- TRUE
112 } 116 }
113 117
114 if (is.null(opt$sim)) { 118 if (is.null(opt$sim)) {
115 sim <- FALSE 119 sim <- FALSE
116 }else{ 120 } else {
117 sim <- TRUE 121 sim <- TRUE
118 } 122 }
119 123
120 minOffset <- as.numeric(opt$minoffset) 124 minOffset <- as.numeric(opt$minoffset)
121 maxOffset <- as.numeric(opt$maxoffset) 125 maxOffset <- as.numeric(opt$maxoffset)
122 126
123 if (opt$iwNorm == "none") { 127 if (opt$iwNorm == "none") {
124 iwNorm <- FALSE 128 iwNorm <- FALSE
125 iwNormFun <- NULL 129 iwNormFun <- NULL
126 }else if (opt$iwNorm == "gauss") { 130 } else if (opt$iwNorm == "gauss") {
127 iwNorm <- TRUE 131 iwNorm <- TRUE
128 iwNormFun <- msPurity::iwNormGauss(minOff = -minOffset, maxOff = maxOffset) 132 iwNormFun <- msPurity::iwNormGauss(minOff = -minOffset, maxOff = maxOffset)
129 }else if (opt$iwNorm == "rcosine") { 133 } else if (opt$iwNorm == "rcosine") {
130 iwNorm <- TRUE 134 iwNorm <- TRUE
131 iwNormFun <- msPurity::iwNormRcosine(minOff = -minOffset, maxOff = maxOffset) 135 iwNormFun <- msPurity::iwNormRcosine(minOff = -minOffset, maxOff = maxOffset)
132 }else if (opt$iwNorm == "QE5") { 136 } else if (opt$iwNorm == "QE5") {
133 iwNorm <- TRUE 137 iwNorm <- TRUE
134 iwNormFun <- msPurity::iwNormQE.5() 138 iwNormFun <- msPurity::iwNormQE.5()
135 } 139 }
136 140
137 print("FIRST ROWS OF PEAK FILE") 141 print("FIRST ROWS OF PEAK FILE")
138 print(head(df)) 142 print(head(df))
139 print(mzML_file) 143 print(mzML_file)
140 predicted <- msPurity::dimsPredictPuritySingle(df$mz, 144 predicted <- msPurity::dimsPredictPuritySingle(df$mz,
141 filepth = mzML_file, 145 filepth = mzML_file,
142 minOffset = minOffset, 146 minOffset = minOffset,
143 maxOffset = maxOffset, 147 maxOffset = maxOffset,
144 ppm = opt$ppm, 148 ppm = opt$ppm,
145 mzML = TRUE, 149 mzML = TRUE,
146 sim = sim, 150 sim = sim,
147 ilim = opt$ilim, 151 ilim = opt$ilim,
148 isotopes = isotopes, 152 isotopes = isotopes,
149 im = im, 153 im = im,
150 iwNorm = iwNorm, 154 iwNorm = iwNorm,
151 iwNormFun = iwNormFun 155 iwNormFun = iwNormFun
152 ) 156 )
153 predicted <- cbind(df, predicted) 157 predicted <- cbind(df, predicted)
154 158
155 print(head(predicted)) 159 print(head(predicted))
156 print(file.path(opt$out_dir, "dimsPredictPuritySingle_output.tsv")) 160 print(file.path(opt$out_dir, "dimsPredictPuritySingle_output.tsv"))
157 161
158 write.table(predicted, 162 write.table(predicted,
159 file.path(opt$out_dir, "dimsPredictPuritySingle_output.tsv"), 163 file.path(opt$out_dir, "dimsPredictPuritySingle_output.tsv"),
160 row.names = FALSE, sep = "\t") 164 row.names = FALSE, sep = "\t"
165 )