view metams.r @ 1:142fbe102a9d draft

Uploaded version 2.0
author yguitton
date Wed, 24 May 2017 07:25:50 -0400
parents 2066efbafd7c
children c75532b75ba1
line wrap: on
line source

#!/usr/local/public/bin/Rscript --vanilla --slave --no-site-file
# metams.r version="2.0"
#created by Yann GUITTON 
#use RI options

#Redirect all stdout to the log file
log_file=file("metams.log", open = "wt")
sink(log_file)
sink(log_file, type = "out")

library(metaMS)
library(batch) #necessary for parseCommandArgs function

source_local <- function(fname) {
    argv <- commandArgs(trailingOnly = FALSE)
    base_dir <- dirname(substring(argv[grep("--file=", argv)], 8))
    source(paste(base_dir, fname, sep="/"))
}
print("step1")


listArguments = parseCommandArgs(evaluate=FALSE) #interpretation of arguments given in command line as an R list of objects
print("new version 2.0")


print(listArguments)


if (listArguments[["ri"]]!="NULL"){
    RIarg=read.table(listArguments[["ri"]])
	if (ncol(RIarg) < 2) RIarg=read.table(listArguments[["ri"]], h=T, sep=";")
	if (ncol(RIarg) < 2) RIarg=read.table(listArguments[["ri"]], h=T, sep="\t")
	if (ncol(RIarg) < 2) RIarg=read.table(listArguments[["ri"]], h=T, sep=",")
	if (ncol(RIarg) < 2) {
		error_message="Your RI file seems not well formatted. The column separators accepted are ; , and tabulation"
		print(error_message)
		stop(error_message)
	}
#to do check real column names
    colnames(RIarg)<-c("rt","RI")
    # print(RIarg)
} else {
    RIarg = NULL
    # cat("Ri= ",RIarg)
}

if (listArguments[["rishift"]]!="none"){
    RIshift=listArguments[["rishift"]]
   cat("Rishift used= ",RIshift, "\n")
} else {
    RIshift = "none"
    cat("Rishift NONE= ",RIshift, "\n")
}

DBarg=listArguments[["db"]]
# if (listArguments[["use_db"]]!="NULL"){
if (DBarg!="NULL"){
    DBarg=listArguments[["db"]]
    cat("Db= ",DBarg, "\n")
} else {
    DBarg = NULL
    cat("NO Db : ",DBarg, "\n")
}



#for unknown EIC printing

if (listArguments[["unkn"]][1]!="NULL") {
    unknarg<-listArguments[["unkn"]]
} else { 
    unknarg<-""
}

print(paste("Unkn:",unknarg))

#remove unused arguments
listArguments[["unkn"]]<-NULL
listArguments[["db"]] <- NULL
listArguments[["ri"]] <- NULL
listArguments[["rishift"]] <- NULL

print(" step2")

#runGC accept either a list of files a zip folder or an xset object from xcms.xcmsSet tool

#CASE 2  from zip file
#necessary to unzip .zip file uploaded to Galaxy
#thanks to .zip file it's possible to upload many file as the same time conserving the tree hierarchy of directories

if (!is.null(listArguments[["zipfile"]])){
    print("CASE 2 from zip file")
    directory=unzip(listArguments[["zipfile"]])
    listArguments=append(list(directory), listArguments)

    metams_zip_file= listArguments[["zipfile"]]
    listArguments[["zipfile"]]=NULL

    filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
    filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") 
    #samples<-list.files(path=directory, pattern=filepattern, all.files=FALSE,recursive=TRUE,full.names=TRUE,ignore.case=FALSE)
    samples<-directory[grep(filepattern, directory)]
    # cat(samples) #debugg
    #create sampleMetadata, get sampleMetadata and class
    sampleMetadata<-xcms:::phenoDataFromPaths(samples)
    sampleMetadata<-cbind(sampleMetadata=rownames(sampleMetadata),sampleMetadata)
    row.names(sampleMetadata)<-NULL
} else {
	metams_zip_file=""
}

#CASE 3 from xset is an .RData file necessary to use the xcmsSet object generated by xcms.xcmsSet given by previous tools
if (!is.null(listArguments[["xset"]])){
    print("CASE 3 from xset")
    require(CAMERA, quietly = TRUE)
    load(listArguments[["xset"]])
    cat(class(xset))
    xsetparam=listArguments[["xset"]]
    listArguments[["xset"]]=NULL #remove xset from arguments
    
    #xset from xcms.xcmsSet is not well formatted for metaMS this function do the formatting
    if (class(xset)=="xcmsSet"){
    
        #treat case where Rdata came from xcmsSet with zip file entry
    	if(exists("zip_file")){
            if (zip_file!=""){
                directory=unzip(zip_file)
                print("CASE 3 from xset and with ZIP input")
                
            } else {
                print("CASE 3 from xset and with LIBRARY input")
            }
        }
        #end zip file case
        if (length(xset@rt$raw)>1){
            #create an exceptable list of xset for metaMS
            xset.l<-vector("list",length(xset@rt$raw))
            for (i in 1:length(xset@rt$raw)){
                xset.l[[i]]<-new("xcmsSet")
                xset.l[[i]]@peaks<-xset@peaks[which(xset@peaks[,"sample"]==i),]
                df<-data.frame(class=xset@phenoData[i,])
                rownames(df)<-rownames(xset@phenoData)[i]
                xset.l[[i]]@phenoData<-df
                xset.l[[i]]@rt$raw<-xset@rt$raw[[i]]
                xset.l[[i]]@rt$corrected<-xset@rt$corrected[[i]]
                xset.l[[i]]@filepaths<-xset@filepaths[i]
                xset.l[[i]]@profinfo<-xset@profinfo
             }
        } else {
            xset.l<-xset
        }
           
    }
    #create sampleMetadata, get sampleMetadata and class
    sampleMetadata<-xset@phenoData
    sampleMetadata<-cbind(sampleMetadata=rownames(sampleMetadata),sampleMetadata)
    row.names(sampleMetadata)<-NULL
    samples<-xset@filepaths
} else {
   xsetparam<-NULL
}     



#Import the different functions
source_local("lib_metams.r")

#load function for ploting EICs and pspectra
# source_local("plotUnknown.r")

#settings process
if (listArguments[["settings"]]=="default") {
	data(FEMsettings) 
    if (listArguments[["rtrange"]][1]!="NULL") {
        rtrange=listArguments[["rtrange"]]
    } else {
        rtrange=NULL
    }
	
	if (!is.null(DBarg)){
		manual <- read.msp(DBarg)
		DBarg <- createSTDdbGC(stdInfo = NULL, settings = TSQXLS.GC, manualDB = manual)
	}
	
	#use RI instead of rt for time comparison vs DB
	if (RIshift!="none"){
		TSQXLS.GC@match2DB.timeComparison<-"RI"
		TSQXLS.GC@match2DB.RIdiff<-as.numeric(RIshift)
		TSQXLS.GC@betweenSamples.timeComparison<-"RI"
		TSQXLS.GC@betweenSamples.RIdiff<-as.numeric(RIshift)
	}
	
   nSlaves=listArguments[["nSlaves"]]
	
	
    if(!metams_zip_file=="") {
        resGC<-runGC(files=samples,settings=TSQXLS.GC, rtrange=rtrange, DB= DBarg, removeArtefacts = TRUE, findUnknowns = TRUE, returnXset = TRUE, RIstandards = RIarg, nSlaves = nSlaves) #default settings for GC from Wehrens et al
    }
    if(!is.null(xsetparam)){
        settingslist=TSQXLS.GC
        if (class(xset.l[[1]])!="xsAnnotate"){
            cat("Process xsAnnotate")
            xset<-lapply(xset.l,
                 function(x) {
                   y <- xsAnnotate(x, sample = 1)
                   capture.output(z <- groupFWHM(y, perfwhm = settingslist@CAMERA$perfwhm),
                                  file = NULL)
                   z})
           
        }
             
        resGC<-runGC(xset=xset,settings=TSQXLS.GC, rtrange=rtrange, DB= DBarg, removeArtefacts = TRUE, findUnknowns = TRUE, returnXset = TRUE, RIstandards = RIarg, nSlaves = nSlaves) #default settings for GC from Wehrens et al
    }
}

if (listArguments[["settings"]]=="User_defined") {
	listArguments[["settings"]]=NULL #delete from the list of arguments
	fwhmparam=listArguments[["fwhm"]]
	rtdiffparam=listArguments[["rtdiff"]]
	minfeatparam=listArguments[["minfeat"]]
	simthreshparam=listArguments[["simthreshold"]]
    minclassfractionparam=listArguments[["minclassfraction"]]
    minclasssizeparam=listArguments[["minclasssize"]]
	
    if (listArguments[["rtrange"]]!="NULL") {
        rtrange=listArguments[["rtrange"]]
        cat("rtrange= ",rtrange)
    } else {
        rtrange=NULL
        cat("rtrange= ",rtrange)
    }
	
	
    nSlaves=listArguments[["nSlaves"]]
	
	GALAXY.GC <- metaMSsettings("protocolName" = "GALAXY.GC",
								"chrom" = "GC",
								PeakPicking = list(
								  method = "matchedFilter",
								  step = 0.5,
								  steps = 2,
								  mzdiff = .5,
								  fwhm = fwhmparam,
								  snthresh = 2,
								  max = 500),
							   CAMERA = list(perfwhm = 1))
   
	metaSetting(GALAXY.GC, "DBconstruction") <- list(
				minintens = 0.0,
				rttol = rtdiffparam,
				intensityMeasure = "maxo",
				DBthreshold = .80, 
				minfeat = minfeatparam)
	metaSetting(GALAXY.GC, "match2DB") <- list(
				simthresh = simthreshparam,
				timeComparison = "rt",
				rtdiff = rtdiffparam,
				RIdiff = 5,
				minfeat = minfeatparam)
				
    #to used if contaminant filter
	
		# metaSetting(GALAXY.GC, "matchIrrelevants") <- list(
					# irrelevantClasses = c("Bleeding", "Plasticizers"),
					# timeComparison = "RI",
					# RIdiff = RIdiffparam,    
					# rtdiff = rtdiffparam,
					# simthresh = simthreshparam)
	
	metaSetting(GALAXY.GC, "betweenSamples") <- list(
				min.class.fraction = minclassfractionparam,
				min.class.size = minclasssizeparam,
				timeComparison = "rt",
				rtdiff = rtdiffparam,
				RIdiff = 2,    
				simthresh = simthreshparam)

	#ONLY use RI instead of rt for time comparison vs DB or samples
	if (RIshift!="none"){
		GALAXY.GC@match2DB.timeComparison<-"RI"
		GALAXY.GC@match2DB.RIdiff<-as.numeric(RIshift)
		GALAXY.GC@betweenSamples.timeComparison<-"RI"
		GALAXY.GC@betweenSamples.RIdiff<-as.numeric(RIshift)
	}
        # files, xset, settings, rtrange = NULL, DB = NULL,
       # removeArtefacts = TRUE, findUnknowns = nexp > 1,
       # returnXset = FALSE, RIstandards = NULL, nSlaves = 0
	if (!is.null(DBarg)){
		manual <- read.msp(DBarg)
		DBarg <- createSTDdbGC(stdInfo = NULL, settings = GALAXY.GC, manualDB = manual)
	}
    if (!metams_zip_file=="") {
        resGC<-runGC(files=samples,settings=GALAXY.GC,rtrange = rtrange, DB= DBarg , removeArtefacts = TRUE, findUnknowns = TRUE, returnXset = TRUE, RIstandards = RIarg, nSlaves = nSlaves)
    }
    if(!is.null(xsetparam)) {
        settingslist=GALAXY.GC
        if (class(xset.l[[1]])!="xsAnnotate") {
            print("Process xsAnnotate")
            xset<-lapply(xset.l,
                 function(x) {
                   y <- xsAnnotate(x, sample = 1)
                   capture.output(z <- groupFWHM(y, perfwhm = settingslist@CAMERA$perfwhm),
                                  file = NULL)
                   z})
           
        }
        resGC<-runGC(xset=xset,settings=GALAXY.GC,rtrange = rtrange, DB= DBarg, removeArtefacts = TRUE, findUnknowns = TRUE, returnXset = TRUE, RIstandards = RIarg, nSlaves = nSlaves)
    }
}

#peakTable ordered by rt
peaktable<-resGC$PeakTable<-resGC$PeakTable[order(resGC$PeakTable[,"rt"]),]
write.table(peaktable, file="peaktable.tsv", sep="\t", row.names=FALSE)

#peakTable for PCA 
#dataMatrix
dataMatrix<-cbind(Name=resGC$PeakTable[,"Name"],resGC$PeakTable[,(colnames(resGC$PeakTable) %in% sampleMetadata[,1])])
rownames(dataMatrix)<-NULL
write.table(dataMatrix, file="dataMatrix.tsv", sep="\t", row.names=FALSE, quote=FALSE)

#variableMetadata
variableMetadata<-resGC$PeakTable[,!(colnames(resGC$PeakTable) %in% sampleMetadata[,1])]
rownames(variableMetadata)<-NULL
write.table(variableMetadata, file="variableMetadata.tsv", sep="\t", row.names=FALSE, quote=FALSE)

#sampleMetadata
write.table(sampleMetadata, file="sampleMetadata.tsv", sep="\t", row.names=FALSE, quote=FALSE)

#peak spectrum as MSP for DB search
write.msp(resGC$PseudoSpectra, file="peakspectra.msp", newFile = TRUE)

#TIC/BPC picture generation
 # use files as entry not xset that do not exist   

print("TICs")
#Use getTIC2s and getBPC2s because getTICs and getBPCs can exists due to transfert of function in Rdata
b<-getTIC2s(files = samples, pdfname="TICs_raw.pdf", rt="raw")

print("BPCs")
c<-getBPC2s(files=samples, rt="raw", pdfname="BPCs_raw.pdf")  
       
print("Step QC plot")

#to do check if no peaks found
#Quality controls plots but only working in R (don't know why)
a<-plotUnknowns(resGC=resGC, unkn=unknarg) #use unknparam value

# create a mergpdf

#test
system(paste('gs  -o TICsBPCs_merged.pdf  -sDEVICE=pdfwrite  -dPDFSETTINGS=/prepress  *Cs_raw.pdf'))
system(paste('gs  -o GCMS_EIC.pdf  -sDEVICE=pdfwrite  -dPDFSETTINGS=/prepress  Unknown_*'))


############################################TEST FUNCTION START#################################################################

############################################TEST FUNCTION END#################################################################




#zip files of all runGC outputs

system(paste('ls . | grep -e "tsv$"  -e "msp$"  -e "GCMS_" | zip -r -@ "rungc.zip"'))


#delete the parameters to avoid the passage to the next tool in .RData image
rm(listArguments)

#saving R data in .Rdata file to save the variables used in the present tool
save.image(paste("runGC","RData",sep="."))