Mercurial > repos > yguitton > metams_rungc
view metams.r @ 1:142fbe102a9d draft
Uploaded version 2.0
author | yguitton |
---|---|
date | Wed, 24 May 2017 07:25:50 -0400 |
parents | 2066efbafd7c |
children | c75532b75ba1 |
line wrap: on
line source
#!/usr/local/public/bin/Rscript --vanilla --slave --no-site-file # metams.r version="2.0" #created by Yann GUITTON #use RI options #Redirect all stdout to the log file log_file=file("metams.log", open = "wt") sink(log_file) sink(log_file, type = "out") library(metaMS) library(batch) #necessary for parseCommandArgs function source_local <- function(fname) { argv <- commandArgs(trailingOnly = FALSE) base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)) source(paste(base_dir, fname, sep="/")) } print("step1") listArguments = parseCommandArgs(evaluate=FALSE) #interpretation of arguments given in command line as an R list of objects print("new version 2.0") print(listArguments) if (listArguments[["ri"]]!="NULL"){ RIarg=read.table(listArguments[["ri"]]) if (ncol(RIarg) < 2) RIarg=read.table(listArguments[["ri"]], h=T, sep=";") if (ncol(RIarg) < 2) RIarg=read.table(listArguments[["ri"]], h=T, sep="\t") if (ncol(RIarg) < 2) RIarg=read.table(listArguments[["ri"]], h=T, sep=",") if (ncol(RIarg) < 2) { error_message="Your RI file seems not well formatted. The column separators accepted are ; , and tabulation" print(error_message) stop(error_message) } #to do check real column names colnames(RIarg)<-c("rt","RI") # print(RIarg) } else { RIarg = NULL # cat("Ri= ",RIarg) } if (listArguments[["rishift"]]!="none"){ RIshift=listArguments[["rishift"]] cat("Rishift used= ",RIshift, "\n") } else { RIshift = "none" cat("Rishift NONE= ",RIshift, "\n") } DBarg=listArguments[["db"]] # if (listArguments[["use_db"]]!="NULL"){ if (DBarg!="NULL"){ DBarg=listArguments[["db"]] cat("Db= ",DBarg, "\n") } else { DBarg = NULL cat("NO Db : ",DBarg, "\n") } #for unknown EIC printing if (listArguments[["unkn"]][1]!="NULL") { unknarg<-listArguments[["unkn"]] } else { unknarg<-"" } print(paste("Unkn:",unknarg)) #remove unused arguments listArguments[["unkn"]]<-NULL listArguments[["db"]] <- NULL listArguments[["ri"]] <- NULL listArguments[["rishift"]] <- NULL print(" step2") #runGC accept either a list of files a zip folder or an xset object from xcms.xcmsSet tool #CASE 2 from zip file #necessary to unzip .zip file uploaded to Galaxy #thanks to .zip file it's possible to upload many file as the same time conserving the tree hierarchy of directories if (!is.null(listArguments[["zipfile"]])){ print("CASE 2 from zip file") directory=unzip(listArguments[["zipfile"]]) listArguments=append(list(directory), listArguments) metams_zip_file= listArguments[["zipfile"]] listArguments[["zipfile"]]=NULL filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") #samples<-list.files(path=directory, pattern=filepattern, all.files=FALSE,recursive=TRUE,full.names=TRUE,ignore.case=FALSE) samples<-directory[grep(filepattern, directory)] # cat(samples) #debugg #create sampleMetadata, get sampleMetadata and class sampleMetadata<-xcms:::phenoDataFromPaths(samples) sampleMetadata<-cbind(sampleMetadata=rownames(sampleMetadata),sampleMetadata) row.names(sampleMetadata)<-NULL } else { metams_zip_file="" } #CASE 3 from xset is an .RData file necessary to use the xcmsSet object generated by xcms.xcmsSet given by previous tools if (!is.null(listArguments[["xset"]])){ print("CASE 3 from xset") require(CAMERA, quietly = TRUE) load(listArguments[["xset"]]) cat(class(xset)) xsetparam=listArguments[["xset"]] listArguments[["xset"]]=NULL #remove xset from arguments #xset from xcms.xcmsSet is not well formatted for metaMS this function do the formatting if (class(xset)=="xcmsSet"){ #treat case where Rdata came from xcmsSet with zip file entry if(exists("zip_file")){ if (zip_file!=""){ directory=unzip(zip_file) print("CASE 3 from xset and with ZIP input") } else { print("CASE 3 from xset and with LIBRARY input") } } #end zip file case if (length(xset@rt$raw)>1){ #create an exceptable list of xset for metaMS xset.l<-vector("list",length(xset@rt$raw)) for (i in 1:length(xset@rt$raw)){ xset.l[[i]]<-new("xcmsSet") xset.l[[i]]@peaks<-xset@peaks[which(xset@peaks[,"sample"]==i),] df<-data.frame(class=xset@phenoData[i,]) rownames(df)<-rownames(xset@phenoData)[i] xset.l[[i]]@phenoData<-df xset.l[[i]]@rt$raw<-xset@rt$raw[[i]] xset.l[[i]]@rt$corrected<-xset@rt$corrected[[i]] xset.l[[i]]@filepaths<-xset@filepaths[i] xset.l[[i]]@profinfo<-xset@profinfo } } else { xset.l<-xset } } #create sampleMetadata, get sampleMetadata and class sampleMetadata<-xset@phenoData sampleMetadata<-cbind(sampleMetadata=rownames(sampleMetadata),sampleMetadata) row.names(sampleMetadata)<-NULL samples<-xset@filepaths } else { xsetparam<-NULL } #Import the different functions source_local("lib_metams.r") #load function for ploting EICs and pspectra # source_local("plotUnknown.r") #settings process if (listArguments[["settings"]]=="default") { data(FEMsettings) if (listArguments[["rtrange"]][1]!="NULL") { rtrange=listArguments[["rtrange"]] } else { rtrange=NULL } if (!is.null(DBarg)){ manual <- read.msp(DBarg) DBarg <- createSTDdbGC(stdInfo = NULL, settings = TSQXLS.GC, manualDB = manual) } #use RI instead of rt for time comparison vs DB if (RIshift!="none"){ TSQXLS.GC@match2DB.timeComparison<-"RI" TSQXLS.GC@match2DB.RIdiff<-as.numeric(RIshift) TSQXLS.GC@betweenSamples.timeComparison<-"RI" TSQXLS.GC@betweenSamples.RIdiff<-as.numeric(RIshift) } nSlaves=listArguments[["nSlaves"]] if(!metams_zip_file=="") { resGC<-runGC(files=samples,settings=TSQXLS.GC, rtrange=rtrange, DB= DBarg, removeArtefacts = TRUE, findUnknowns = TRUE, returnXset = TRUE, RIstandards = RIarg, nSlaves = nSlaves) #default settings for GC from Wehrens et al } if(!is.null(xsetparam)){ settingslist=TSQXLS.GC if (class(xset.l[[1]])!="xsAnnotate"){ cat("Process xsAnnotate") xset<-lapply(xset.l, function(x) { y <- xsAnnotate(x, sample = 1) capture.output(z <- groupFWHM(y, perfwhm = settingslist@CAMERA$perfwhm), file = NULL) z}) } resGC<-runGC(xset=xset,settings=TSQXLS.GC, rtrange=rtrange, DB= DBarg, removeArtefacts = TRUE, findUnknowns = TRUE, returnXset = TRUE, RIstandards = RIarg, nSlaves = nSlaves) #default settings for GC from Wehrens et al } } if (listArguments[["settings"]]=="User_defined") { listArguments[["settings"]]=NULL #delete from the list of arguments fwhmparam=listArguments[["fwhm"]] rtdiffparam=listArguments[["rtdiff"]] minfeatparam=listArguments[["minfeat"]] simthreshparam=listArguments[["simthreshold"]] minclassfractionparam=listArguments[["minclassfraction"]] minclasssizeparam=listArguments[["minclasssize"]] if (listArguments[["rtrange"]]!="NULL") { rtrange=listArguments[["rtrange"]] cat("rtrange= ",rtrange) } else { rtrange=NULL cat("rtrange= ",rtrange) } nSlaves=listArguments[["nSlaves"]] GALAXY.GC <- metaMSsettings("protocolName" = "GALAXY.GC", "chrom" = "GC", PeakPicking = list( method = "matchedFilter", step = 0.5, steps = 2, mzdiff = .5, fwhm = fwhmparam, snthresh = 2, max = 500), CAMERA = list(perfwhm = 1)) metaSetting(GALAXY.GC, "DBconstruction") <- list( minintens = 0.0, rttol = rtdiffparam, intensityMeasure = "maxo", DBthreshold = .80, minfeat = minfeatparam) metaSetting(GALAXY.GC, "match2DB") <- list( simthresh = simthreshparam, timeComparison = "rt", rtdiff = rtdiffparam, RIdiff = 5, minfeat = minfeatparam) #to used if contaminant filter # metaSetting(GALAXY.GC, "matchIrrelevants") <- list( # irrelevantClasses = c("Bleeding", "Plasticizers"), # timeComparison = "RI", # RIdiff = RIdiffparam, # rtdiff = rtdiffparam, # simthresh = simthreshparam) metaSetting(GALAXY.GC, "betweenSamples") <- list( min.class.fraction = minclassfractionparam, min.class.size = minclasssizeparam, timeComparison = "rt", rtdiff = rtdiffparam, RIdiff = 2, simthresh = simthreshparam) #ONLY use RI instead of rt for time comparison vs DB or samples if (RIshift!="none"){ GALAXY.GC@match2DB.timeComparison<-"RI" GALAXY.GC@match2DB.RIdiff<-as.numeric(RIshift) GALAXY.GC@betweenSamples.timeComparison<-"RI" GALAXY.GC@betweenSamples.RIdiff<-as.numeric(RIshift) } # files, xset, settings, rtrange = NULL, DB = NULL, # removeArtefacts = TRUE, findUnknowns = nexp > 1, # returnXset = FALSE, RIstandards = NULL, nSlaves = 0 if (!is.null(DBarg)){ manual <- read.msp(DBarg) DBarg <- createSTDdbGC(stdInfo = NULL, settings = GALAXY.GC, manualDB = manual) } if (!metams_zip_file=="") { resGC<-runGC(files=samples,settings=GALAXY.GC,rtrange = rtrange, DB= DBarg , removeArtefacts = TRUE, findUnknowns = TRUE, returnXset = TRUE, RIstandards = RIarg, nSlaves = nSlaves) } if(!is.null(xsetparam)) { settingslist=GALAXY.GC if (class(xset.l[[1]])!="xsAnnotate") { print("Process xsAnnotate") xset<-lapply(xset.l, function(x) { y <- xsAnnotate(x, sample = 1) capture.output(z <- groupFWHM(y, perfwhm = settingslist@CAMERA$perfwhm), file = NULL) z}) } resGC<-runGC(xset=xset,settings=GALAXY.GC,rtrange = rtrange, DB= DBarg, removeArtefacts = TRUE, findUnknowns = TRUE, returnXset = TRUE, RIstandards = RIarg, nSlaves = nSlaves) } } #peakTable ordered by rt peaktable<-resGC$PeakTable<-resGC$PeakTable[order(resGC$PeakTable[,"rt"]),] write.table(peaktable, file="peaktable.tsv", sep="\t", row.names=FALSE) #peakTable for PCA #dataMatrix dataMatrix<-cbind(Name=resGC$PeakTable[,"Name"],resGC$PeakTable[,(colnames(resGC$PeakTable) %in% sampleMetadata[,1])]) rownames(dataMatrix)<-NULL write.table(dataMatrix, file="dataMatrix.tsv", sep="\t", row.names=FALSE, quote=FALSE) #variableMetadata variableMetadata<-resGC$PeakTable[,!(colnames(resGC$PeakTable) %in% sampleMetadata[,1])] rownames(variableMetadata)<-NULL write.table(variableMetadata, file="variableMetadata.tsv", sep="\t", row.names=FALSE, quote=FALSE) #sampleMetadata write.table(sampleMetadata, file="sampleMetadata.tsv", sep="\t", row.names=FALSE, quote=FALSE) #peak spectrum as MSP for DB search write.msp(resGC$PseudoSpectra, file="peakspectra.msp", newFile = TRUE) #TIC/BPC picture generation # use files as entry not xset that do not exist print("TICs") #Use getTIC2s and getBPC2s because getTICs and getBPCs can exists due to transfert of function in Rdata b<-getTIC2s(files = samples, pdfname="TICs_raw.pdf", rt="raw") print("BPCs") c<-getBPC2s(files=samples, rt="raw", pdfname="BPCs_raw.pdf") print("Step QC plot") #to do check if no peaks found #Quality controls plots but only working in R (don't know why) a<-plotUnknowns(resGC=resGC, unkn=unknarg) #use unknparam value # create a mergpdf #test system(paste('gs -o TICsBPCs_merged.pdf -sDEVICE=pdfwrite -dPDFSETTINGS=/prepress *Cs_raw.pdf')) system(paste('gs -o GCMS_EIC.pdf -sDEVICE=pdfwrite -dPDFSETTINGS=/prepress Unknown_*')) ############################################TEST FUNCTION START################################################################# ############################################TEST FUNCTION END################################################################# #zip files of all runGC outputs system(paste('ls . | grep -e "tsv$" -e "msp$" -e "GCMS_" | zip -r -@ "rungc.zip"')) #delete the parameters to avoid the passage to the next tool in .RData image rm(listArguments) #saving R data in .Rdata file to save the variables used in the present tool save.image(paste("runGC","RData",sep="."))