comparison flagRemove.R @ 6:d25273689e04 draft

"planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 2579c8746819670348c378f86116f83703c493eb"
author computational-metabolomics
date Thu, 04 Mar 2021 12:31:13 +0000
parents d1d73007a255
children b91b9492a4bf
comparison
equal deleted inserted replaced
5:50409aeeeac5 6:d25273689e04
1 library(msPurity) 1 library(msPurity)
2 library(optparse) 2 library(optparse)
3 print(sessionInfo()) 3 print(sessionInfo())
4 option_list <- list( 4 option_list <- list(
5 make_option(c("-o", "--out_dir"), type="character", default=getwd(), 5 make_option(c("-o", "--out_dir"), type = "character", default = getwd(),
6 help="Output folder for resulting files [default = %default]" 6 help = "Output folder for resulting files [default = %default]"
7 ), 7 ),
8 make_option(c("-x", "--xset_path"), type="character", default=file.path(getwd(),"xset.rds"), 8 make_option(c("-x", "--xset_path"), type = "character", default = file.path(getwd(), "xset.rds"),
9 help="The path to the xcmsSet object [default = %default]" 9 help = "The path to the xcmsSet object [default = %default]"
10 ), 10 ),
11 make_option("--polarity", default=NA, 11 make_option("--polarity", default = NA,
12 help="polarity (just used for naming purpose for files being saved) [positive, negative, NA] [default %default]" 12 help = "polarity (just used for naming purpose for files being saved) [positive, negative, NA] [default %default]"
13 ), 13 ),
14 make_option("--rsd_i_blank", default=100, 14 make_option("--rsd_i_blank", default = 100,
15 help="RSD threshold for the blank [default = %default]" 15 help = "RSD threshold for the blank [default = %default]"
16 ), 16 ),
17 make_option("--minfrac_blank", default=0.5, 17 make_option("--minfrac_blank", default = 0.5,
18 help="minimum fraction of files for features needed for the blank [default = %default]" 18 help = "minimum fraction of files for features needed for the blank [default = %default]"
19 ), 19 ),
20 make_option("--rsd_rt_blank", default=100, 20 make_option("--rsd_rt_blank", default = 100,
21 help="RSD threshold for the RT of the blank [default = %default]" 21 help = "RSD threshold for the RT of the blank [default = %default]"
22 ), 22 ),
23 23
24 make_option("--ithres_blank", default=0, 24 make_option("--ithres_blank", default = 0,
25 help="Intensity threshold for the blank [default = %default]" 25 help = "Intensity threshold for the blank [default = %default]"
26 ), 26 ),
27 make_option("--s2b", default=10, 27 make_option("--s2b", default = 10,
28 help="fold change (sample/blank) needed for sample peak to be allowed. e.g. 28 help = "fold change (sample/blank) needed for sample peak to be allowed. e.g.
29 if s2b set to 10 and the recorded sample 'intensity' value was 100 and blank was 10. 29 if s2b set to 10 and the recorded sample 'intensity' value was 100 and blank was 10.
30 1000/10 = 100, so sample has fold change higher than the threshold and the peak 30 1000/10 = 100, so sample has fold change higher than the threshold and the peak
31 is not considered a blank [default = %default]" 31 is not considered a blank [default = %default]"
32 ), 32 ),
33 make_option("--blank_class", default='blank', type="character", 33 make_option("--blank_class", default = "blank", type = "character",
34 help="A string representing the class that will be used for the blank.[default = %default]" 34 help = "A string representing the class that will be used for the blank.[default = %default]"
35 ), 35 ),
36 make_option("--egauss_thr", default=NA, 36 make_option("--egauss_thr", default = NA,
37 help="Threshold for filtering out non gaussian shaped peaks. Note this only works 37 help = "Threshold for filtering out non gaussian shaped peaks. Note this only works
38 if the 'verbose columns' and 'fit gauss' was used with xcms 38 if the 'verbose columns' and 'fit gauss' was used with xcms
39 [default = %default]" 39 [default = %default]"
40 ), 40 ),
41 make_option("--rsd_i_sample", default=100, 41 make_option("--rsd_i_sample", default = 100,
42 help="RSD threshold for the samples [default = %default]" 42 help = "RSD threshold for the samples [default = %default]"
43 ), 43 ),
44 make_option("--minfrac_sample", default=0.8, 44 make_option("--minfrac_sample", default = 0.8,
45 help="minimum fraction of files for features needed for the samples [default = %default]" 45 help = "minimum fraction of files for features needed for the samples [default = %default]"
46 ), 46 ),
47 make_option("--rsd_rt_sample", default=100, 47 make_option("--rsd_rt_sample", default = 100,
48 help="RSD threshold for the RT of the samples [default = %default]" 48 help = "RSD threshold for the RT of the samples [default = %default]"
49 ), 49 ),
50 make_option("--ithres_sample", default=5000, 50 make_option("--ithres_sample", default = 5000,
51 help="Intensity threshold for the sample [default = %default]" 51 help = "Intensity threshold for the sample [default = %default]"
52 ), 52 ),
53 make_option("--grp_rm_ids", default=NA, 53 make_option("--grp_rm_ids", default = NA,
54 help="vector of grouped_xcms peaks to remove (corresponds to the row from xcms::group output) 54 help = "vector of grouped_xcms peaks to remove (corresponds to the row from xcms::group output)
55 [default = %default]" 55 [default = %default]"
56 ), 56 ),
57 make_option("--remove_spectra", action="store_true", 57 make_option("--remove_spectra", action = "store_true",
58 help=" TRUE if flagged spectra is to be removed [default = %default]" 58 help = "TRUE if flagged spectra is to be removed [default = %default]"
59 ), 59 ),
60 make_option("--minfrac_xcms", default=0.5, 60 make_option("--minfrac_xcms", default = 0.5,
61 help="minfrac for xcms grouping [default = %default]" 61 help = "minfrac for xcms grouping [default = %default]"
62 ), 62 ),
63 make_option("--mzwid", default=0.001, 63 make_option("--mzwid", default = 0.001,
64 help="mzwid for xcms grouping [default = %default]" 64 help = "mzwid for xcms grouping [default = %default]"
65 ), 65 ),
66 make_option("--bw", default=5, 66 make_option("--bw", default = 5,
67 help="bw for xcms grouping [default = %default]" 67 help = "bw for xcms grouping [default = %default]"
68 ), 68 ),
69 69
70 make_option("--temp_save", action="store_true", 70 make_option("--temp_save", action = "store_true",
71 help="Assign True if files for each step saved (for testing purposes) [default = %default]" 71 help = "Assign True if files for each step saved (for testing purposes) [default = %default]"
72 ), 72 ),
73 73
74 make_option("--samplelist", type="character", help="Sample list to determine the blank class") 74 make_option("--samplelist", type = "character", help = "Sample list to determine the blank class")
75
76
77
78
79 75
80 ) 76 )
81 77
82 #make_option("--multilist", action="store_true" 78 # nolint start
83 # help="NOT CURRENTLY IMPLEMENTED: If paired blank removal is to be performed a - multilist - sample list file has to be provided" 79 # make_option("--multilist", action="store_true"
84 #), 80 # help="NOT CURRENTLY IMPLEMENTED: If paired blank removal is to be performed a - multilist - sample list file has to be provided"
81 # ),
82 # nolint end
85 83
86 # store options 84 # store options
87 opt<- parse_args(OptionParser(option_list=option_list)) 85 opt <- parse_args(OptionParser(option_list = option_list))
88 86
89 opt <- replace(opt, opt == "NA", NA) 87 opt <- replace(opt, opt == "NA", NA)
90 88
91 89 if (is.null(opt$temp_save)) {
92 90 temp_save <- FALSE
93
94 if (is.null(opt$temp_save)){
95 temp_save<-FALSE
96 }else{ 91 }else{
97 temp_save<-TRUE 92 temp_save <- TRUE
98 } 93 }
99 94
100 if (is.null(opt$remove_spectra)){ 95 if (is.null(opt$remove_spectra)) {
101 remove_spectra<-FALSE 96 remove_spectra <- FALSE
102 }else{ 97 }else{
103 remove_spectra<-TRUE 98 remove_spectra <- TRUE
104 } 99 }
105 100
106 101
107 print(opt) 102 print(opt)
108 103
109 getxcmsSetObject <- function(xobject) { 104 getxcmsSetObject <- function(xobject) {
110 # XCMS 1.x 105 # XCMS 1.x
111 if (class(xobject) == "xcmsSet") 106 if (class(xobject) == "xcmsSet")
112 return (xobject) 107 return(xobject)
113 # XCMS 3.x 108 # XCMS 3.x
114 if (class(xobject) == "XCMSnExp") { 109 if (class(xobject) == "XCMSnExp") {
115 # Get the legacy xcmsSet object 110 # Get the legacy xcmsSet object
116 suppressWarnings(xset <- as(xobject, 'xcmsSet')) 111 suppressWarnings(xset <- as(xobject, "xcmsSet"))
117 sampclass(xset) <- xset@phenoData$sample_group 112 xcms::sampclass(xset) <- xset@phenoData$sample_group
118 return (xset) 113 return(xset)
119 } 114 }
120 } 115 }
121 116
122 117
123 loadRData <- function(rdata_path, name){ 118 loadRData <- function(rdata_path, name) {
124 #loads an RData file, and returns the named xset object if it is there 119 #loads an RData file, and returns the named xset object if it is there
125 load(rdata_path) 120 load(rdata_path)
126 return(get(ls()[ls() %in% name])) 121 return(get(ls()[ls() %in% name]))
127 } 122 }
128 123
129 xset <- getxcmsSetObject(loadRData(opt$xset_path, c('xset','xdata'))) 124 xset <- getxcmsSetObject(loadRData(opt$xset_path, c("xset", "xdata")))
130 125
131 print(xset) 126 print(xset)
132 if (is.null(opt$samplelist)){ 127 if (is.null(opt$samplelist)) {
133 blank_class <- opt$blank_class 128 blank_class <- opt$blank_class
134 }else{ 129 }else{
135 samplelist <- read.table(opt$samplelist, sep='\t', header=TRUE) 130 samplelist <- read.table(opt$samplelist, sep = "\t", header = TRUE)
136 samplelist_blank <- unique(samplelist$sample_class[samplelist$blank=='yes']) 131 samplelist_blank <- unique(samplelist$sample_class[samplelist$blank == "yes"])
137 132
138 chosen_blank <- samplelist_blank[samplelist_blank %in% xset@phenoData$class] 133 chosen_blank <- samplelist_blank[samplelist_blank %in% xset@phenoData$class]
139 if (length(chosen_blank)>1){ 134 if (length(chosen_blank) > 1) {
140 print('ERROR: only 1 blank is currently allowed to be used with this tool') 135 print("ERROR: only 1 blank is currently allowed to be used with this tool")
141 quit() 136 quit()
142 } 137 }
143 blank_class <- as.character(chosen_blank) 138 blank_class <- as.character(chosen_blank)
144 print(blank_class) 139 print(blank_class)
145 } 140 }
146 141
147 142
148 if (is.null(opt$multilist)){ 143 if (is.null(opt$multilist)) {
149 ffrm_out <- flag_remove(xset, 144 ffrm_out <- flag_remove(xset,
150 pol=opt$polarity, 145 pol = opt$polarity,
151 rsd_i_blank=opt$rsd_i_blank, 146 rsd_i_blank = opt$rsd_i_blank,
152 minfrac_blank=opt$minfrac_blank, 147 minfrac_blank = opt$minfrac_blank,
153 rsd_rt_blank=opt$rsd_rt_blank, 148 rsd_rt_blank = opt$rsd_rt_blank,
154 ithres_blank=opt$ithres_blank, 149 ithres_blank = opt$ithres_blank,
155 s2b=opt$s2b, 150 s2b = opt$s2b,
156 ref.class=blank_class, 151 ref.class = blank_class,
157 egauss_thr=opt$egauss_thr, 152 egauss_thr = opt$egauss_thr,
158 rsd_i_sample=opt$rsd_i_sample, 153 rsd_i_sample = opt$rsd_i_sample,
159 minfrac_sample=opt$minfrac_sample, 154 minfrac_sample = opt$minfrac_sample,
160 rsd_rt_sample=opt$rsd_rt_sample, 155 rsd_rt_sample = opt$rsd_rt_sample,
161 ithres_sample=opt$ithres_sample, 156 ithres_sample = opt$ithres_sample,
162 minfrac_xcms=opt$minfrac_xcms, 157 minfrac_xcms = opt$minfrac_xcms,
163 mzwid=opt$mzwid, 158 mzwid = opt$mzwid,
164 bw=opt$bw, 159 bw = opt$bw,
165 out_dir=opt$out_dir, 160 out_dir = opt$out_dir,
166 temp_save=temp_save, 161 temp_save = temp_save,
167 remove_spectra=remove_spectra, 162 remove_spectra = remove_spectra,
168 grp_rm_ids=unlist(strsplit(as.character(opt$grp_rm_ids), split=", "))[[1]]) 163 grp_rm_ids = unlist(strsplit(as.character(opt$grp_rm_ids), split = ", "))[[1]])
169 print('flag remove finished') 164 print("flag remove finished")
170 xset <- ffrm_out[[1]] 165 xset <- ffrm_out[[1]]
171 grp_peaklist <- ffrm_out[[2]] 166 grp_peaklist <- ffrm_out[[2]]
172 removed_peaks <- ffrm_out[[3]] 167 removed_peaks <- ffrm_out[[3]]
173 168
174 save.image(file=file.path(opt$out_dir, 'xset_filtered.RData'), version=2) 169 save.image(file = file.path(opt$out_dir, "xset_filtered.RData"), version = 2)
175 170
176 # grpid needed for mspurity ID needed for deconrank... (will clean up at some up) 171 # grpid needed for mspurity ID needed for deconrank... (will clean up at some up)
177 peak_pth <- file.path(opt$out_dir, 'peaklist_filtered.tsv') 172 peak_pth <- file.path(opt$out_dir, "peaklist_filtered.tsv")
178 print(peak_pth) 173 print(peak_pth)
179 write.table(data.frame('grpid'=rownames(grp_peaklist), 'ID'=rownames(grp_peaklist), grp_peaklist), 174 write.table(data.frame("grpid" = rownames(grp_peaklist), "ID" = rownames(grp_peaklist), grp_peaklist),
180 peak_pth, row.names=FALSE, sep='\t') 175 peak_pth, row.names = FALSE, sep = "\t")
181 176
182 removed_peaks <- data.frame(removed_peaks) 177 removed_peaks <- data.frame(removed_peaks)
183 write.table(data.frame('ID'=rownames(removed_peaks),removed_peaks), 178 write.table(data.frame("ID" = rownames(removed_peaks), removed_peaks),
184 file.path(opt$out_dir, 'removed_peaks.tsv'), row.names=FALSE, sep='\t') 179 file.path(opt$out_dir, "removed_peaks.tsv"), row.names = FALSE, sep = "\t")
185 180
186 }else{ 181 }else{
187 182
188 183 # nolint start
189 # TODO 184 # TODO
190 #xsets <- split(xset, multilist_df$multlist) 185 #xsets <- split(xset, multilist_df$multlist)
191 # 186 #
192 #mult_grps <- unique(multilist_df$multlist) 187 #mult_grps <- unique(multilist_df$multlist)
193 # 188 #
194 #for (mgrp in mult_grps){ 189 #for (mgrp in mult_grps){
195 # xset_i <- xsets[mgrp] 190 # xset_i <- xsets[mgrp]
196 # xcms::group(xset_i, 191 # xcms::group(xset_i,
197 # 192 #
198 # } 193 # }
199 194 # nolint end
200 195
201 196
202 } 197 }
203
204