comparison flagRemove.R @ 0:7f3183988100 draft

"planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit cb903cd93f9378cfb5eeb68512a54178dcea7bbc-dirty"
author computational-metabolomics
date Wed, 27 Nov 2019 13:51:34 -0500
parents
children cf62d2c0104e
comparison
equal deleted inserted replaced
-1:000000000000 0:7f3183988100
1 library(msPurity)
2 library(optparse)
3 print(sessionInfo())
4 option_list <- list(
5 make_option(c("-o", "--out_dir"), type="character", default=getwd(),
6 help="Output folder for resulting files [default = %default]"
7 ),
8 make_option(c("-x", "--xset_path"), type="character", default=file.path(getwd(),"xset.rds"),
9 help="The path to the xcmsSet object [default = %default]"
10 ),
11 make_option("--polarity", default=NA,
12 help="polarity (just used for naming purpose for files being saved) [positive, negative, NA] [default %default]"
13 ),
14 make_option("--rsd_i_blank", default=100,
15 help="RSD threshold for the blank [default = %default]"
16 ),
17 make_option("--minfrac_blank", default=0.5,
18 help="minimum fraction of files for features needed for the blank [default = %default]"
19 ),
20 make_option("--rsd_rt_blank", default=100,
21 help="RSD threshold for the RT of the blank [default = %default]"
22 ),
23
24 make_option("--ithres_blank", default=0,
25 help="Intensity threshold for the blank [default = %default]"
26 ),
27 make_option("--s2b", default=10,
28 help="fold change (sample/blank) needed for sample peak to be allowed. e.g.
29 if s2b set to 10 and the recorded sample 'intensity' value was 100 and blank was 10.
30 1000/10 = 100, so sample has fold change higher than the threshold and the peak
31 is not considered a blank [default = %default]"
32 ),
33 make_option("--blank_class", default='blank', type="character",
34 help="A string representing the class that will be used for the blank.[default = %default]"
35 ),
36 make_option("--egauss_thr", default=NA,
37 help="Threshold for filtering out non gaussian shaped peaks. Note this only works
38 if the 'verbose columns' and 'fit gauss' was used with xcms
39 [default = %default]"
40 ),
41 make_option("--rsd_i_sample", default=100,
42 help="RSD threshold for the samples [default = %default]"
43 ),
44 make_option("--minfrac_sample", default=0.8,
45 help="minimum fraction of files for features needed for the samples [default = %default]"
46 ),
47 make_option("--rsd_rt_sample", default=100,
48 help="RSD threshold for the RT of the samples [default = %default]"
49 ),
50 make_option("--ithres_sample", default=5000,
51 help="Intensity threshold for the sample [default = %default]"
52 ),
53 make_option("--grp_rm_ids", default=NA,
54 help="vector of grouped_xcms peaks to remove (corresponds to the row from xcms::group output)
55 [default = %default]"
56 ),
57 make_option("--remove_spectra", action="store_true",
58 help=" TRUE if flagged spectra is to be removed [default = %default]"
59 ),
60 make_option("--minfrac_xcms", default=0.5,
61 help="minfrac for xcms grouping [default = %default]"
62 ),
63 make_option("--mzwid", default=0.001,
64 help="mzwid for xcms grouping [default = %default]"
65 ),
66 make_option("--bw", default=5,
67 help="bw for xcms grouping [default = %default]"
68 ),
69
70 make_option("--temp_save", action="store_true",
71 help="Assign True if files for each step saved (for testing purposes) [default = %default]"
72 ),
73
74 make_option("--samplelist", type="character", help="Sample list to determine the blank class")
75
76
77
78
79
80 )
81
82 #make_option("--multilist", action="store_true"
83 # help="NOT CURRENTLY IMPLEMENTED: If paired blank removal is to be performed a - multilist - sample list file has to be provided"
84 #),
85
86 # store options
87 opt<- parse_args(OptionParser(option_list=option_list))
88
89 opt <- replace(opt, opt == "NA", NA)
90
91
92
93
94 if (is.null(opt$temp_save)){
95 temp_save<-FALSE
96 }else{
97 temp_save<-TRUE
98 }
99
100 if (is.null(opt$remove_spectra)){
101 remove_spectra<-FALSE
102 }else{
103 remove_spectra<-TRUE
104 }
105
106
107 print(opt)
108
109 getxcmsSetObject <- function(xobject) {
110 # XCMS 1.x
111 if (class(xobject) == "xcmsSet")
112 return (xobject)
113 # XCMS 3.x
114 if (class(xobject) == "XCMSnExp") {
115 # Get the legacy xcmsSet object
116 suppressWarnings(xset <- as(xobject, 'xcmsSet'))
117 sampclass(xset) <- xset@phenoData$sample_group
118 return (xset)
119 }
120 }
121
122
123 loadRData <- function(rdata_path, name){
124 #loads an RData file, and returns the named xset object if it is there
125 load(rdata_path)
126 return(get(ls()[ls() %in% name]))
127 }
128
129 xset <- getxcmsSetObject(loadRData(opt$xset_path, c('xset','xdata')))
130
131 print(xset)
132 if (is.null(opt$samplelist)){
133 blank_class <- opt$blank_class
134 }else{
135 samplelist <- read.table(opt$samplelist, sep='\t', header=TRUE)
136 samplelist_blank <- unique(samplelist$sample_class[samplelist$blank=='yes'])
137
138 chosen_blank <- samplelist_blank[samplelist_blank %in% xset@phenoData$class]
139 if (length(chosen_blank)>1){
140 print('ERROR: only 1 blank is currently allowed to be used with this tool')
141 exit()
142 }
143 blank_class <- as.character(chosen_blank)
144 print(blank_class)
145 }
146
147
148 if (is.null(opt$multilist)){
149 ffrm_out <- flag_remove(xset,
150 pol=opt$polarity,
151 rsd_i_blank=opt$rsd_i_blank,
152 minfrac_blank=opt$minfrac_blank,
153 rsd_rt_blank=opt$rsd_rt_blank,
154 ithres_blank=opt$ithres_blank,
155 s2b=opt$s2b,
156 ref.class=blank_class,
157 egauss_thr=opt$egauss_thr,
158 rsd_i_sample=opt$rsd_i_sample,
159 minfrac_sample=opt$minfrac_sample,
160 rsd_rt_sample=opt$rsd_rt_sample,
161 ithres_sample=opt$ithres_sample,
162 minfrac_xcms=opt$minfrac_xcms,
163 mzwid=opt$mzwid,
164 bw=opt$bw,
165 out_dir=opt$out_dir,
166 temp_save=temp_save,
167 remove_spectra=remove_spectra,
168 grp_rm_ids=unlist(strsplit(as.character(opt$grp_rm_ids), split=", "))[[1]])
169 print('flag remove finished')
170 xset <- ffrm_out[[1]]
171 grp_peaklist <- ffrm_out[[2]]
172 removed_peaks <- ffrm_out[[3]]
173
174 save.image(file=file.path(opt$out_dir, 'xset_filtered.RData'))
175
176 # grpid needed for mspurity ID needed for deconrank... (will clean up at some up)
177 peak_pth <- file.path(opt$out_dir, 'peaklist_filtered.tsv')
178 print(peak_pth)
179 write.table(data.frame('grpid'=rownames(grp_peaklist), 'ID'=rownames(grp_peaklist), grp_peaklist),
180 peak_pth, row.names=FALSE, sep='\t')
181
182 removed_peaks <- data.frame(removed_peaks)
183 write.table(data.frame('ID'=rownames(removed_peaks),removed_peaks),
184 file.path(opt$out_dir, 'removed_peaks.tsv'), row.names=FALSE, sep='\t')
185
186 }else{
187
188
189 # TODO
190 #xsets <- split(xset, multilist_df$multlist)
191 #
192 #mult_grps <- unique(multilist_df$multlist)
193 #
194 #for (mgrp in mult_grps){
195 # xset_i <- xsets[mgrp]
196 # xcms::group(xset_i,
197 #
198 # }
199
200
201
202 }
203
204