comparison metaMS_runGC.r @ 4:c10824185547 draft

planemo upload for repository https://github.com/workflow4metabolomics/metaMS commit 174a1713024f246c1485cbd75218577e89353522
author workflow4metabolomics
date Wed, 03 Jul 2019 05:14:32 -0400
parents
children b8d4129dd2a6
comparison
equal deleted inserted replaced
3:c75532b75ba1 4:c10824185547
1 #!/usr/bin/env Rscript
2 # metaMS_runGC.r version="3.0.0"
3 #created by Yann GUITTON and updated by Julien SAINT-VANNE
4 #use RI options + add try on plotUnknown add session Info
5 #use make.names in sampleMetadata to avoid issues with files names
6
7
8 # ----- LOG FILE -----
9 #log_file=file("log.txt", open = "wt")
10 #sink(log_file)
11 #sink(log_file, type = "output")
12
13
14 # ----- PACKAGE -----
15 cat("\tSESSION INFO\n\n")
16
17 #Import the different functions and packages
18 source_local <- function(fname) {
19 argv <- commandArgs(trailingOnly = FALSE)
20 base_dir <- dirname(substring(argv[grep("--file=", argv)], 8))
21 source(paste(base_dir, fname, sep="/"))
22 }
23 source_local("lib_metams.r")
24
25 pkgs <- c("metaMS","stringr","batch","CAMERA") #"batch" necessary for parseCommandArgs function
26 loadAndDisplayPackages(pkgs)
27
28 cat("\n\n")
29
30 modNamC <- "metaMS:runGC" ## module name
31 cat("\nStart of the '", modNamC, "' Galaxy module call: ", format(Sys.time(), "%a %d %b %Y %X"), "\n", sep="")
32
33
34 # ----- PROCESSING INFILE -----
35 cat("\n\n\tARGUMENTS PROCESSING INFO\n\n")
36 args = parseCommandArgs(evaluate=FALSE) #interpretation of arguments given in command line as an R list of objects
37 #write.table(as.matrix(args), col.names=F, quote=F, sep='\t\t')
38 print(cbind(value = unlist(args)))
39
40 # ----- PROCESSING INFILE -----
41 cat("\n\n\tARGUMENTS PROCESSING INFO\n\n")
42
43 # Saving the specific parameters
44 #RI parameter
45 if (args$ri!="NULL"){
46 RIarg=read.table(args$ri)
47 if (ncol(RIarg) < 2) RIarg=read.table(args$ri, h=T, sep=";")
48 if (ncol(RIarg) < 2) RIarg=read.table(args$ri, h=T, sep="\t")
49 if (ncol(RIarg) < 2) RIarg=read.table(args$ri, h=T, sep=",")
50 if (ncol(RIarg) < 2) {
51 error_message="Your RI file seems not well formatted. The column separators accepted are ; , and tabulation"
52 print(error_message)
53 stop(error_message)
54 }
55 #to do check real column names
56 colnames(RIarg)<-c("rt","RI")
57 } else {
58 RIarg = NULL
59 }
60
61 #RIshift parameter
62 if (args$rishift!="none"){
63 RIshift=args$rishift
64 } else {
65 RIshift = "none"
66 }
67
68 #Personal database parameter
69 if (args$db!="NULL"){
70 DBgc=args$db
71 } else {
72 DBgc = NULL
73 }
74
75 #settings process
76 if (args$settings=="default") {
77 cat("Using default parameters")
78 data(FEMsettings)
79 if (args$rtrange[1]!="NULL") {
80 rtrange=args$rtrange
81 } else {
82 rtrange=NULL
83 }
84
85 if (!is.null(DBgc)){
86 manual <- read.msp(DBgc)
87 DBgc <- createSTDdbGC(stdInfo = NULL, settings = TSQXLS.GC, manualDB = manual)
88 }
89
90 #use RI instead of rt for time comparison vs DB
91 if (RIshift!="none"){
92 TSQXLS.GC@match2DB.timeComparison<-"RI"
93 TSQXLS.GC@match2DB.RIdiff<-as.numeric(RIshift)
94 TSQXLS.GC@betweenSamples.timeComparison<-"RI"
95 TSQXLS.GC@betweenSamples.RIdiff<-as.numeric(RIshift)
96 }
97 nSlaves=args$nSlaves
98 }
99
100 if (args$settings=="User_defined") {
101 cat("Using user's parameters\n")
102 fwhmparam=args$fwhm
103 rtdiffparam=args$rtdiff
104 minfeatparam=args$minfeat
105 simthreshparam=args$simthreshold
106 minclassfractionparam=args$minclassfraction
107 minclasssizeparam=args$minclasssize
108
109 if (args$rtrange!="NULL") {
110 rtrange=args$rtrange
111 cat("rtrange= ",rtrange)
112 } else {
113 rtrange=NULL
114 cat("rtrange= ",rtrange)
115 }
116
117 nSlaves=args$nSlaves
118
119 GALAXY.GC <-
120 metaMSsettings("protocolName" = "GALAXY.GC",
121 "chrom" = "GC",
122 PeakPicking = list(
123 method = "matchedFilter",
124 step = 0.5,
125 steps = 2,
126 mzdiff = .5,
127 fwhm = fwhmparam,
128 snthresh = 2,
129 max = 500),
130 CAMERA = list(perfwhm = 1))
131
132 metaSetting(GALAXY.GC, "DBconstruction") <- list(
133 minintens = 0.0,
134 rttol = rtdiffparam,
135 intensityMeasure = "maxo",
136 DBthreshold = .80,
137 minfeat = minfeatparam)
138 metaSetting(GALAXY.GC, "match2DB") <- list(
139 simthresh = simthreshparam,
140 timeComparison = "rt",
141 rtdiff = rtdiffparam,
142 RIdiff = 5,
143 minfeat = minfeatparam)
144
145 #to used if contaminant filter
146
147 # metaSetting(GALAXY.GC, "matchIrrelevants") <- list(
148 # irrelevantClasses = c("Bleeding", "Plasticizers"),
149 # timeComparison = "RI",
150 # RIdiff = RIdiffparam,
151 # rtdiff = rtdiffparam,
152 # simthresh = simthreshparam)
153
154 metaSetting(GALAXY.GC, "betweenSamples") <- list(
155 min.class.fraction = minclassfractionparam,
156 min.class.size = minclasssizeparam,
157 timeComparison = "rt",
158 rtdiff = rtdiffparam,
159 RIdiff = 2,
160 simthresh = simthreshparam)
161
162 #ONLY use RI instead of rt for time comparison vs DB or samples
163 if (RIshift!="none"){
164 GALAXY.GC@match2DB.timeComparison<-"RI"
165 GALAXY.GC@match2DB.RIdiff<-as.numeric(RIshift)
166 GALAXY.GC@betweenSamples.timeComparison<-"RI"
167 GALAXY.GC@betweenSamples.RIdiff<-as.numeric(RIshift)
168 }
169
170 if (!is.null(DBgc)){
171 manual <- read.msp(DBgc)
172 DBgc <- createSTDdbGC(stdInfo = NULL, settings = GALAXY.GC, manualDB = manual)
173 }
174 }
175
176
177 # ----- INFILE PROCESSING -----
178 cat("\n\n\n\tINFILE PROCESSING INFO\n\n")
179
180 # Handle infiles
181 if (!exists("singlefile")) singlefile <- NULL
182 if (!exists("zipfile")) zipfile <- NULL
183 rawFilePath <- getRawfilePathFromArguments(singlefile, zipfile, args)
184 zipfile <- rawFilePath$zipfile
185 singlefile <- rawFilePath$singlefile
186 directory <- retrieveRawfileInTheWorkingDirectory(singlefile, zipfile)
187
188
189 # ----- MAIN PROCESSING INFO -----
190 cat("\n\tMAIN PROCESSING INFO\n")
191
192 cat("\t\tCOMPUTE\n\n")
193
194 #runGC accept either a list of files a zip folder or an xset object from xcms.xcmsSet tool
195 #From xset is an .RData file necessary to use the xcmsSet object generated by xcms.xcmsSet given by previous tools
196 if (!is.null(args$singlefile_galaxyPath)){
197 cat("Loading datas from XCMS file(s)...\n")
198 load(args$singlefile_galaxyPath)
199
200 #Transform XCMS object if needed
201 if(!exists("xset")) {
202 if(exists("xdata")) {
203 xset<-getxcmsSetObject(xdata)
204 } else {
205 error_message="no xset and no xdata... Probably a problem"
206 print(error_message)
207 stop(error_message)
208 }
209 }
210
211 #xset from xcms.xcmsSet is not well formatted for metaMS this function do the formatting
212 if (class(xset)=="xcmsSet"){
213 if (length(xset@rt$raw)>1){
214 #create an exceptable list of xset for metaMS
215 xset.l<-vector("list",length(xset@rt$raw))
216 for (i in 1:length(xset@rt$raw)){
217 xset.l[[i]]<-new("xcmsSet")
218 xset.l[[i]]@peaks<-xset@peaks[which(xset@peaks[,"sample"]==i),]
219 df<-data.frame(class=xset@phenoData[i,])
220 rownames(df)<-rownames(xset@phenoData)[i]
221 xset.l[[i]]@phenoData<-df
222 xset.l[[i]]@rt$raw<-xset@rt$raw[[i]]
223 xset.l[[i]]@rt$corrected<-xset@rt$corrected[[i]]
224 xset.l[[i]]@filepaths<-xset@filepaths[i]
225 xset.l[[i]]@profinfo<-xset@profinfo
226 }
227 } else {
228 xset.l<-xset
229 }
230
231 #create sampleMetadata, get sampleMetadata and class
232 sampleMetadata<-xset@phenoData
233 colnames(sampleMetadata) <- c("sampleMetadata","sample_group","class")
234 sampleMetadata<-sampleMetadata[,-2]
235 row.names(sampleMetadata)<-NULL
236 samples<-xset@filepaths
237 } else {
238 xset<-NULL
239 }
240 if(args$settings == "default") {
241 settingslist=TSQXLS.GC
242 if (class(xset.l[[1]])!="xsAnnotate"){
243 cat("Process xsAnnotate with CAMERA package...\n")
244 xsetCAM<-lapply(xset.l,
245 function(x) {
246 y <- xsAnnotate(x, sample = 1)
247 capture.output(z <- groupFWHM(y, perfwhm = settingslist@CAMERA$perfwhm),
248 file = NULL)
249 z})
250 } else {
251 xsetCAM <- xset.l
252 }
253
254 #default settings for GC from Wehrens et al
255 cat("Process runGC with metaMS package...\n\n")
256 print(str(TSQXLS.GC))
257 resGC<-runGC(xset=xsetCAM,settings=TSQXLS.GC, rtrange=rtrange, DB= DBgc, removeArtefacts = TRUE,
258 findUnknowns = TRUE, returnXset = TRUE, RIstandards = RIarg, nSlaves = nSlaves)
259 } else {
260 if(args$settings == "User_defined") {
261 settingslist=GALAXY.GC
262 if (class(xset.l[[1]])!="xsAnnotate") {
263 cat("Process xsAnnotate with CAMERA package...\n")
264 xsetCAM<-lapply(xset.l,
265 function(x) {
266 y <- xsAnnotate(x, sample = 1)
267 capture.output(z <- groupFWHM(y, perfwhm = settingslist@CAMERA$perfwhm),
268 file = NULL)
269 z})
270 } else {
271 xsetCAM <- xset.l
272 }
273
274 #user settings for GC
275 cat("Process runGC with metaMS package...\n\n")
276 print(str(GALAXY.GC))
277 resGC<-runGC(xset=xsetCAM,settings=GALAXY.GC,rtrange = rtrange, DB= DBgc, removeArtefacts = TRUE,
278 findUnknowns = TRUE, returnXset = TRUE, RIstandards = RIarg, nSlaves = nSlaves)
279 } else {
280 error_message <- "There is no xset"
281 print(error_message)
282 stop(error_message)
283 }
284 }
285 } else {
286 error_message <- "No galaxy path entered"
287 print(error_message)
288 stop(error_message)
289 }
290
291
292 # ----- EXPORT -----
293 #peakTable ordered by rt
294 cat("\nGenerating peakTable file")
295 peaktable<-getCorrectFileName(resGC$PeakTable,sampleMetadata)
296 cat("\t.\t.")
297 write.table(peaktable, file="peaktable.tsv", sep="\t", row.names=FALSE)
298 cat("\t.\tOK")
299
300 #peakTable for PCA
301 #dataMatrix
302 cat("\nGenerating dataMatrix file")
303 dataMatrix<-cbind(Name=peaktable[,"Name"],peaktable[,(colnames(peaktable) %in% sampleMetadata[,1])])
304 rownames(dataMatrix)<-NULL
305 cat("\t.\t.")
306 write.table(dataMatrix, file="dataMatrix.tsv", sep="\t", row.names=FALSE, quote=FALSE)
307 cat("\t.\tOK")
308
309 #variableMetadata
310 cat("\nGenerating variableMetadata file")
311 variableMetadata<-peaktable[,!(colnames(peaktable) %in% sampleMetadata[,1])]
312 rownames(variableMetadata)<-NULL
313 cat("\t.")
314 write.table(variableMetadata, file="variableMetadata.tsv", sep="\t", row.names=FALSE, quote=FALSE)
315 cat("\t.\tOK")
316
317 #sampleMetadata
318 cat("\nGenerating sampleMetadata file")
319 cat("\t.\t.")
320 write.table(sampleMetadata, file="sampleMetadata.tsv", sep="\t", row.names=FALSE, quote=FALSE)
321 cat("\t.\tOK")
322
323 #peak spectrum as MSP for DB search
324 cat("\nGenerating",length(resGC$PseudoSpectra),"peakspectra in peakspectra.msp file\n")
325 write.msp(resGC$PseudoSpectra, file="peakspectra.msp", newFile = TRUE)
326
327 #saving R data in .Rdata file to save the variables used in the present tool
328 objects2save <- c("resGC", "xset", "singlefile", "zipfile", "DBgc")
329 save(list = objects2save[objects2save %in% ls()], file = "runGC.RData")
330
331 cat("\nEnd of '", modNamC, "' Galaxy module call: ", as.character(Sys.time()), "\n", sep = "")