Mercurial > repos > yguitton > metams_rungc
comparison metaMS_runGC.r @ 4:c10824185547 draft
planemo upload for repository https://github.com/workflow4metabolomics/metaMS commit 174a1713024f246c1485cbd75218577e89353522
author | workflow4metabolomics |
---|---|
date | Wed, 03 Jul 2019 05:14:32 -0400 |
parents | |
children | b8d4129dd2a6 |
comparison
equal
deleted
inserted
replaced
3:c75532b75ba1 | 4:c10824185547 |
---|---|
1 #!/usr/bin/env Rscript | |
2 # metaMS_runGC.r version="3.0.0" | |
3 #created by Yann GUITTON and updated by Julien SAINT-VANNE | |
4 #use RI options + add try on plotUnknown add session Info | |
5 #use make.names in sampleMetadata to avoid issues with files names | |
6 | |
7 | |
8 # ----- LOG FILE ----- | |
9 #log_file=file("log.txt", open = "wt") | |
10 #sink(log_file) | |
11 #sink(log_file, type = "output") | |
12 | |
13 | |
14 # ----- PACKAGE ----- | |
15 cat("\tSESSION INFO\n\n") | |
16 | |
17 #Import the different functions and packages | |
18 source_local <- function(fname) { | |
19 argv <- commandArgs(trailingOnly = FALSE) | |
20 base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)) | |
21 source(paste(base_dir, fname, sep="/")) | |
22 } | |
23 source_local("lib_metams.r") | |
24 | |
25 pkgs <- c("metaMS","stringr","batch","CAMERA") #"batch" necessary for parseCommandArgs function | |
26 loadAndDisplayPackages(pkgs) | |
27 | |
28 cat("\n\n") | |
29 | |
30 modNamC <- "metaMS:runGC" ## module name | |
31 cat("\nStart of the '", modNamC, "' Galaxy module call: ", format(Sys.time(), "%a %d %b %Y %X"), "\n", sep="") | |
32 | |
33 | |
34 # ----- PROCESSING INFILE ----- | |
35 cat("\n\n\tARGUMENTS PROCESSING INFO\n\n") | |
36 args = parseCommandArgs(evaluate=FALSE) #interpretation of arguments given in command line as an R list of objects | |
37 #write.table(as.matrix(args), col.names=F, quote=F, sep='\t\t') | |
38 print(cbind(value = unlist(args))) | |
39 | |
40 # ----- PROCESSING INFILE ----- | |
41 cat("\n\n\tARGUMENTS PROCESSING INFO\n\n") | |
42 | |
43 # Saving the specific parameters | |
44 #RI parameter | |
45 if (args$ri!="NULL"){ | |
46 RIarg=read.table(args$ri) | |
47 if (ncol(RIarg) < 2) RIarg=read.table(args$ri, h=T, sep=";") | |
48 if (ncol(RIarg) < 2) RIarg=read.table(args$ri, h=T, sep="\t") | |
49 if (ncol(RIarg) < 2) RIarg=read.table(args$ri, h=T, sep=",") | |
50 if (ncol(RIarg) < 2) { | |
51 error_message="Your RI file seems not well formatted. The column separators accepted are ; , and tabulation" | |
52 print(error_message) | |
53 stop(error_message) | |
54 } | |
55 #to do check real column names | |
56 colnames(RIarg)<-c("rt","RI") | |
57 } else { | |
58 RIarg = NULL | |
59 } | |
60 | |
61 #RIshift parameter | |
62 if (args$rishift!="none"){ | |
63 RIshift=args$rishift | |
64 } else { | |
65 RIshift = "none" | |
66 } | |
67 | |
68 #Personal database parameter | |
69 if (args$db!="NULL"){ | |
70 DBgc=args$db | |
71 } else { | |
72 DBgc = NULL | |
73 } | |
74 | |
75 #settings process | |
76 if (args$settings=="default") { | |
77 cat("Using default parameters") | |
78 data(FEMsettings) | |
79 if (args$rtrange[1]!="NULL") { | |
80 rtrange=args$rtrange | |
81 } else { | |
82 rtrange=NULL | |
83 } | |
84 | |
85 if (!is.null(DBgc)){ | |
86 manual <- read.msp(DBgc) | |
87 DBgc <- createSTDdbGC(stdInfo = NULL, settings = TSQXLS.GC, manualDB = manual) | |
88 } | |
89 | |
90 #use RI instead of rt for time comparison vs DB | |
91 if (RIshift!="none"){ | |
92 TSQXLS.GC@match2DB.timeComparison<-"RI" | |
93 TSQXLS.GC@match2DB.RIdiff<-as.numeric(RIshift) | |
94 TSQXLS.GC@betweenSamples.timeComparison<-"RI" | |
95 TSQXLS.GC@betweenSamples.RIdiff<-as.numeric(RIshift) | |
96 } | |
97 nSlaves=args$nSlaves | |
98 } | |
99 | |
100 if (args$settings=="User_defined") { | |
101 cat("Using user's parameters\n") | |
102 fwhmparam=args$fwhm | |
103 rtdiffparam=args$rtdiff | |
104 minfeatparam=args$minfeat | |
105 simthreshparam=args$simthreshold | |
106 minclassfractionparam=args$minclassfraction | |
107 minclasssizeparam=args$minclasssize | |
108 | |
109 if (args$rtrange!="NULL") { | |
110 rtrange=args$rtrange | |
111 cat("rtrange= ",rtrange) | |
112 } else { | |
113 rtrange=NULL | |
114 cat("rtrange= ",rtrange) | |
115 } | |
116 | |
117 nSlaves=args$nSlaves | |
118 | |
119 GALAXY.GC <- | |
120 metaMSsettings("protocolName" = "GALAXY.GC", | |
121 "chrom" = "GC", | |
122 PeakPicking = list( | |
123 method = "matchedFilter", | |
124 step = 0.5, | |
125 steps = 2, | |
126 mzdiff = .5, | |
127 fwhm = fwhmparam, | |
128 snthresh = 2, | |
129 max = 500), | |
130 CAMERA = list(perfwhm = 1)) | |
131 | |
132 metaSetting(GALAXY.GC, "DBconstruction") <- list( | |
133 minintens = 0.0, | |
134 rttol = rtdiffparam, | |
135 intensityMeasure = "maxo", | |
136 DBthreshold = .80, | |
137 minfeat = minfeatparam) | |
138 metaSetting(GALAXY.GC, "match2DB") <- list( | |
139 simthresh = simthreshparam, | |
140 timeComparison = "rt", | |
141 rtdiff = rtdiffparam, | |
142 RIdiff = 5, | |
143 minfeat = minfeatparam) | |
144 | |
145 #to used if contaminant filter | |
146 | |
147 # metaSetting(GALAXY.GC, "matchIrrelevants") <- list( | |
148 # irrelevantClasses = c("Bleeding", "Plasticizers"), | |
149 # timeComparison = "RI", | |
150 # RIdiff = RIdiffparam, | |
151 # rtdiff = rtdiffparam, | |
152 # simthresh = simthreshparam) | |
153 | |
154 metaSetting(GALAXY.GC, "betweenSamples") <- list( | |
155 min.class.fraction = minclassfractionparam, | |
156 min.class.size = minclasssizeparam, | |
157 timeComparison = "rt", | |
158 rtdiff = rtdiffparam, | |
159 RIdiff = 2, | |
160 simthresh = simthreshparam) | |
161 | |
162 #ONLY use RI instead of rt for time comparison vs DB or samples | |
163 if (RIshift!="none"){ | |
164 GALAXY.GC@match2DB.timeComparison<-"RI" | |
165 GALAXY.GC@match2DB.RIdiff<-as.numeric(RIshift) | |
166 GALAXY.GC@betweenSamples.timeComparison<-"RI" | |
167 GALAXY.GC@betweenSamples.RIdiff<-as.numeric(RIshift) | |
168 } | |
169 | |
170 if (!is.null(DBgc)){ | |
171 manual <- read.msp(DBgc) | |
172 DBgc <- createSTDdbGC(stdInfo = NULL, settings = GALAXY.GC, manualDB = manual) | |
173 } | |
174 } | |
175 | |
176 | |
177 # ----- INFILE PROCESSING ----- | |
178 cat("\n\n\n\tINFILE PROCESSING INFO\n\n") | |
179 | |
180 # Handle infiles | |
181 if (!exists("singlefile")) singlefile <- NULL | |
182 if (!exists("zipfile")) zipfile <- NULL | |
183 rawFilePath <- getRawfilePathFromArguments(singlefile, zipfile, args) | |
184 zipfile <- rawFilePath$zipfile | |
185 singlefile <- rawFilePath$singlefile | |
186 directory <- retrieveRawfileInTheWorkingDirectory(singlefile, zipfile) | |
187 | |
188 | |
189 # ----- MAIN PROCESSING INFO ----- | |
190 cat("\n\tMAIN PROCESSING INFO\n") | |
191 | |
192 cat("\t\tCOMPUTE\n\n") | |
193 | |
194 #runGC accept either a list of files a zip folder or an xset object from xcms.xcmsSet tool | |
195 #From xset is an .RData file necessary to use the xcmsSet object generated by xcms.xcmsSet given by previous tools | |
196 if (!is.null(args$singlefile_galaxyPath)){ | |
197 cat("Loading datas from XCMS file(s)...\n") | |
198 load(args$singlefile_galaxyPath) | |
199 | |
200 #Transform XCMS object if needed | |
201 if(!exists("xset")) { | |
202 if(exists("xdata")) { | |
203 xset<-getxcmsSetObject(xdata) | |
204 } else { | |
205 error_message="no xset and no xdata... Probably a problem" | |
206 print(error_message) | |
207 stop(error_message) | |
208 } | |
209 } | |
210 | |
211 #xset from xcms.xcmsSet is not well formatted for metaMS this function do the formatting | |
212 if (class(xset)=="xcmsSet"){ | |
213 if (length(xset@rt$raw)>1){ | |
214 #create an exceptable list of xset for metaMS | |
215 xset.l<-vector("list",length(xset@rt$raw)) | |
216 for (i in 1:length(xset@rt$raw)){ | |
217 xset.l[[i]]<-new("xcmsSet") | |
218 xset.l[[i]]@peaks<-xset@peaks[which(xset@peaks[,"sample"]==i),] | |
219 df<-data.frame(class=xset@phenoData[i,]) | |
220 rownames(df)<-rownames(xset@phenoData)[i] | |
221 xset.l[[i]]@phenoData<-df | |
222 xset.l[[i]]@rt$raw<-xset@rt$raw[[i]] | |
223 xset.l[[i]]@rt$corrected<-xset@rt$corrected[[i]] | |
224 xset.l[[i]]@filepaths<-xset@filepaths[i] | |
225 xset.l[[i]]@profinfo<-xset@profinfo | |
226 } | |
227 } else { | |
228 xset.l<-xset | |
229 } | |
230 | |
231 #create sampleMetadata, get sampleMetadata and class | |
232 sampleMetadata<-xset@phenoData | |
233 colnames(sampleMetadata) <- c("sampleMetadata","sample_group","class") | |
234 sampleMetadata<-sampleMetadata[,-2] | |
235 row.names(sampleMetadata)<-NULL | |
236 samples<-xset@filepaths | |
237 } else { | |
238 xset<-NULL | |
239 } | |
240 if(args$settings == "default") { | |
241 settingslist=TSQXLS.GC | |
242 if (class(xset.l[[1]])!="xsAnnotate"){ | |
243 cat("Process xsAnnotate with CAMERA package...\n") | |
244 xsetCAM<-lapply(xset.l, | |
245 function(x) { | |
246 y <- xsAnnotate(x, sample = 1) | |
247 capture.output(z <- groupFWHM(y, perfwhm = settingslist@CAMERA$perfwhm), | |
248 file = NULL) | |
249 z}) | |
250 } else { | |
251 xsetCAM <- xset.l | |
252 } | |
253 | |
254 #default settings for GC from Wehrens et al | |
255 cat("Process runGC with metaMS package...\n\n") | |
256 print(str(TSQXLS.GC)) | |
257 resGC<-runGC(xset=xsetCAM,settings=TSQXLS.GC, rtrange=rtrange, DB= DBgc, removeArtefacts = TRUE, | |
258 findUnknowns = TRUE, returnXset = TRUE, RIstandards = RIarg, nSlaves = nSlaves) | |
259 } else { | |
260 if(args$settings == "User_defined") { | |
261 settingslist=GALAXY.GC | |
262 if (class(xset.l[[1]])!="xsAnnotate") { | |
263 cat("Process xsAnnotate with CAMERA package...\n") | |
264 xsetCAM<-lapply(xset.l, | |
265 function(x) { | |
266 y <- xsAnnotate(x, sample = 1) | |
267 capture.output(z <- groupFWHM(y, perfwhm = settingslist@CAMERA$perfwhm), | |
268 file = NULL) | |
269 z}) | |
270 } else { | |
271 xsetCAM <- xset.l | |
272 } | |
273 | |
274 #user settings for GC | |
275 cat("Process runGC with metaMS package...\n\n") | |
276 print(str(GALAXY.GC)) | |
277 resGC<-runGC(xset=xsetCAM,settings=GALAXY.GC,rtrange = rtrange, DB= DBgc, removeArtefacts = TRUE, | |
278 findUnknowns = TRUE, returnXset = TRUE, RIstandards = RIarg, nSlaves = nSlaves) | |
279 } else { | |
280 error_message <- "There is no xset" | |
281 print(error_message) | |
282 stop(error_message) | |
283 } | |
284 } | |
285 } else { | |
286 error_message <- "No galaxy path entered" | |
287 print(error_message) | |
288 stop(error_message) | |
289 } | |
290 | |
291 | |
292 # ----- EXPORT ----- | |
293 #peakTable ordered by rt | |
294 cat("\nGenerating peakTable file") | |
295 peaktable<-getCorrectFileName(resGC$PeakTable,sampleMetadata) | |
296 cat("\t.\t.") | |
297 write.table(peaktable, file="peaktable.tsv", sep="\t", row.names=FALSE) | |
298 cat("\t.\tOK") | |
299 | |
300 #peakTable for PCA | |
301 #dataMatrix | |
302 cat("\nGenerating dataMatrix file") | |
303 dataMatrix<-cbind(Name=peaktable[,"Name"],peaktable[,(colnames(peaktable) %in% sampleMetadata[,1])]) | |
304 rownames(dataMatrix)<-NULL | |
305 cat("\t.\t.") | |
306 write.table(dataMatrix, file="dataMatrix.tsv", sep="\t", row.names=FALSE, quote=FALSE) | |
307 cat("\t.\tOK") | |
308 | |
309 #variableMetadata | |
310 cat("\nGenerating variableMetadata file") | |
311 variableMetadata<-peaktable[,!(colnames(peaktable) %in% sampleMetadata[,1])] | |
312 rownames(variableMetadata)<-NULL | |
313 cat("\t.") | |
314 write.table(variableMetadata, file="variableMetadata.tsv", sep="\t", row.names=FALSE, quote=FALSE) | |
315 cat("\t.\tOK") | |
316 | |
317 #sampleMetadata | |
318 cat("\nGenerating sampleMetadata file") | |
319 cat("\t.\t.") | |
320 write.table(sampleMetadata, file="sampleMetadata.tsv", sep="\t", row.names=FALSE, quote=FALSE) | |
321 cat("\t.\tOK") | |
322 | |
323 #peak spectrum as MSP for DB search | |
324 cat("\nGenerating",length(resGC$PseudoSpectra),"peakspectra in peakspectra.msp file\n") | |
325 write.msp(resGC$PseudoSpectra, file="peakspectra.msp", newFile = TRUE) | |
326 | |
327 #saving R data in .Rdata file to save the variables used in the present tool | |
328 objects2save <- c("resGC", "xset", "singlefile", "zipfile", "DBgc") | |
329 save(list = objects2save[objects2save %in% ls()], file = "runGC.RData") | |
330 | |
331 cat("\nEnd of '", modNamC, "' Galaxy module call: ", as.character(Sys.time()), "\n", sep = "") |