Mercurial > repos > lecorguille > camera_annotate
view lib.r @ 22:01bbba6d57ff draft
"planemo upload commit e363c270e2f9d72cb0ede1a7f3bc61418b72c4c4"
author | workflow4metabolomics |
---|---|
date | Mon, 30 Mar 2020 09:27:57 -0400 |
parents | b979ba5888f7 |
children | abf1775ac14d |
line wrap: on
line source
# lib.r #@author G. Le Corguille # solve an issue with batch if arguments are logical TRUE/FALSE parseCommandArgs <- function(...) { args <- batch::parseCommandArgs(...) for (key in names(args)) { if (args[key] %in% c("TRUE","FALSE")) args[key] = as.logical(args[key]) } return(args) } #@author G. Le Corguille # This function will # - load the packages # - display the sessionInfo loadAndDisplayPackages <- function(pkgs) { for(pkg in pkgs) suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE))) sessioninfo = sessionInfo() cat(sessioninfo$R.version$version.string,"\n") cat("Main packages:\n") for (pkg in names(sessioninfo$otherPkgs)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") cat("Other loaded packages:\n") for (pkg in names(sessioninfo$loadedOnly)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") } # This function retrieve a xset like object #@author Gildas Le Corguille lecorguille@sb-roscoff.fr getxcmsSetObject <- function(xobject) { # XCMS 1.x if (class(xobject) == "xcmsSet") return (xobject) # XCMS 3.x if (class(xobject) == "XCMSnExp") { # Get the legacy xcmsSet object suppressWarnings(xset <- as(xobject, 'xcmsSet')) if (is.null(xset@phenoData$sample_group)) sampclass(xset) = "." else sampclass(xset) <- xset@phenoData$sample_group if (!is.null(xset@phenoData$sample_name)) rownames(xset@phenoData) = xset@phenoData$sample_name return (xset) } } #@author G. Le Corguille #The function create a pdf from the different png generated by diffreport diffreport_png2pdf <- function(filebase) { dir.create("pdf") pdfEicOutput = paste0("pdf/",filebase,"-eic_pdf.pdf") pdfBoxOutput = paste0("pdf/",filebase,"-box_pdf.pdf") system(paste0("gm convert ",filebase,"_eic/*.png ",pdfEicOutput)) system(paste0("gm convert ",filebase,"_box/*.png ",pdfBoxOutput)) } #@author G. Le Corguille #The function create a zip archive from the different png generated by diffreport diffreport_png2zip <- function() { zip("eic.zip", dir(pattern="_eic"), zip=Sys.which("zip")) zip("box.zip", dir(pattern="_box"), zip=Sys.which("zip")) } #The function create a zip archive from the different tabular generated by diffreport diffreport_tabular2zip <- function() { zip("tabular.zip", dir(pattern="tabular/*"), zip=Sys.which("zip")) } #@author G. Le Corguille #This function convert if it is required the Retention Time in minutes RTSecondToMinute <- function(variableMetadata, convertRTMinute) { if (convertRTMinute){ #converting the retention times (seconds) into minutes print("converting the retention times into minutes in the variableMetadata") variableMetadata[,"rt"]=variableMetadata[,"rt"]/60 variableMetadata[,"rtmin"]=variableMetadata[,"rtmin"]/60 variableMetadata[,"rtmax"]=variableMetadata[,"rtmax"]/60 } return (variableMetadata) } #@author G. Le Corguille #This function format ions identifiers formatIonIdentifiers <- function(variableMetadata, numDigitsRT=0, numDigitsMZ=0) { splitDeco = strsplit(as.character(variableMetadata$name),"_") idsDeco = sapply(splitDeco, function(x) { deco=unlist(x)[2]; if (is.na(deco)) return ("") else return(paste0("_",deco)) }) namecustom = make.unique(paste0("M",round(variableMetadata[,"mz"],numDigitsMZ),"T",round(variableMetadata[,"rt"],numDigitsRT),idsDeco)) variableMetadata=cbind(name=variableMetadata$name, namecustom=namecustom, variableMetadata[,!(colnames(variableMetadata) %in% c("name"))]) return(variableMetadata) } #The function annotateDiffreport without the corr function which bugs annotatediff <- function(xset=xset, args=args, variableMetadataOutput="variableMetadata.tsv") { # Resolve the bug with x11, with the function png options(bitmapType='cairo') #Check if the fillpeaks step has been done previously, if it hasn't, there is an error message and the execution is stopped. res=try(is.null(xset@filled)) # ------ annot ------- args$calcCiS=as.logical(args$calcCiS) args$calcIso=as.logical(args$calcIso) args$calcCaS=as.logical(args$calcCaS) # common parameters args4annotate = list(object=xset, nSlaves=args$nSlaves,sigma=args$sigma,perfwhm=args$perfwhm, maxcharge=args$maxcharge,maxiso=args$maxiso,minfrac=args$minfrac, ppm=args$ppm,mzabs=args$mzabs,quick=args$quick, polarity=args$polarity,max_peaks=args$max_peaks,intval=args$intval) # quick == FALSE if(args$quick==FALSE) { args4annotate = append(args4annotate, list(graphMethod=args$graphMethod,cor_eic_th=args$cor_eic_th,pval=args$pval, calcCiS=args$calcCiS,calcIso=args$calcIso,calcCaS=args$calcCaS)) # no ruleset if (!is.null(args$multiplier)) { args4annotate = append(args4annotate, list(multiplier=args$multiplier)) } # ruleset else { rulset=read.table(args$rules, h=T, sep=";") if (ncol(rulset) < 4) rulset=read.table(args$rules, h=T, sep="\t") if (ncol(rulset) < 4) rulset=read.table(args$rules, h=T, sep=",") if (ncol(rulset) < 4) { error_message="Your ruleset file seems not well formatted. The column separators accepted are ; , and tabulation" print(error_message) stop(error_message) } args4annotate = append(args4annotate, list(rules=rulset)) } } # launch annotate xa = do.call("annotate", args4annotate) peakList=getPeaklist(xa,intval=args$intval) peakList=cbind(groupnames(xa@xcmsSet),peakList); colnames(peakList)[1] = c("name"); # --- Multi condition : diffreport --- diffrepOri=NULL if (!is.null(args$runDiffreport) & nlevels(sampclass(xset))>=2) { #Check if the fillpeaks step has been done previously, if it hasn't, there is an error message and the execution is stopped. res=try(is.null(xset@filled)) classes=levels(sampclass(xset)) x=1:(length(classes)-1) for (i in seq(along=x) ) { y=1:(length(classes)) for (n in seq(along=y)){ if(i+n <= length(classes)){ filebase=paste(classes[i],class2=classes[i+n],sep="-vs-") diffrep=diffreport( object=xset,class1=classes[i],class2=classes[i+n], filebase=filebase,eicmax=args$eicmax,eicwidth=args$eicwidth, sortpval=TRUE,value=args$value,h=args$h,w=args$w,mzdec=args$mzdec,missing=0) diffrepOri = diffrep # renamming of the column rtmed to rt to fit with camera peaklist function output colnames(diffrep)[colnames(diffrep)=="rtmed"] <- "rt" colnames(diffrep)[colnames(diffrep)=="mzmed"] <- "mz" # combines results and reorder columns diffrep = merge(peakList, diffrep[,c("name","fold","tstat","pvalue")], by.x="name", by.y="name", sort=F) diffrep = cbind(diffrep[,!(colnames(diffrep) %in% c(sampnames(xa@xcmsSet)))],diffrep[,(colnames(diffrep) %in% c(sampnames(xa@xcmsSet)))]) diffrep = RTSecondToMinute(diffrep, args$convertRTMinute) diffrep = formatIonIdentifiers(diffrep, numDigitsRT=args$numDigitsRT, numDigitsMZ=args$numDigitsMZ) if(args$sortpval){ diffrep=diffrep[order(diffrep$pvalue), ] } dir.create("tabular", showWarnings = FALSE) write.table(diffrep, sep="\t", quote=FALSE, row.names=FALSE, file=paste("tabular/",filebase,"_tsv.tabular",sep="")) if (args$eicmax != 0) { if (args$png2 == "pdf") diffreport_png2pdf(filebase) if (args$png2 == "zip") diffreport_png2zip() } } } } if (args$tabular2 == "zip") diffreport_tabular2zip() } # --- variableMetadata --- variableMetadata=peakList[,!(make.names(colnames(peakList)) %in% c(make.names(sampnames(xa@xcmsSet))))] variableMetadata = RTSecondToMinute(variableMetadata, args$convertRTMinute) variableMetadata = formatIonIdentifiers(variableMetadata, numDigitsRT=args$numDigitsRT, numDigitsMZ=args$numDigitsMZ) # if we have 2 conditions, we keep stat of diffrep if (!is.null(args$runDiffreport) & nlevels(sampclass(xset))==2) { variableMetadata = merge(variableMetadata, diffrep[,c("name","fold","tstat","pvalue")],by.x="name", by.y="name", sort=F) if(exists("args[[\"sortpval\"]]")){ variableMetadata=variableMetadata[order(variableMetadata$pvalue), ] } } variableMetadataOri=variableMetadata write.table(variableMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=variableMetadataOutput) return(list("xa"=xa,"diffrep"=diffrepOri,"variableMetadata"=variableMetadataOri)); } combinexsAnnos_function <- function(xaP, xaN, diffrepP=NULL,diffrepN=NULL, pos=TRUE,tol=2,ruleset=NULL,keep_meta=TRUE, convertRTMinute=F, numDigitsMZ=0, numDigitsRT=0, variableMetadataOutput="variableMetadata.tsv"){ #Load the two Rdata to extract the xset objects from positive and negative mode cat("\tObject xset from positive mode\n") print(xaP) cat("\n") cat("\tObject xset from negative mode\n") print(xaN) cat("\n") cat("\n") cat("\tCombining...\n") #Convert the string to numeric for creating matrix row=as.numeric(strsplit(ruleset,",")[[1]][1]) column=as.numeric(strsplit(ruleset,",")[[1]][2]) ruleset=cbind(row,column) #Test if the file comes from an older version tool if ((!is.null(xaP)) & (!is.null(xaN))) { #Launch the combinexsannos function from CAMERA cAnnot=combinexsAnnos(xaP, xaN,pos=pos,tol=tol,ruleset=ruleset) } else { stop("You must relauch the CAMERA.annotate step with the lastest version.") } if(pos){ xa=xaP mode="neg. Mode" } else { xa=xaN mode="pos. Mode" } peakList=getPeaklist(xa) peakList=cbind(groupnames(xa@xcmsSet),peakList); colnames(peakList)[1] = c("name"); variableMetadata=cbind(peakList, cAnnot[, c("isotopes", "adduct", "pcgroup",mode)]); variableMetadata=variableMetadata[,!(colnames(variableMetadata) %in% c(sampnames(xa@xcmsSet)))] #Test if there are more than two classes (conditions) if ( nlevels(sampclass(xaP@xcmsSet))==2 & (!is.null(diffrepN)) & (!is.null(diffrepP))) { diffrepP = diffrepP[,c("name","fold","tstat","pvalue")]; colnames(diffrepP) = paste("P.",colnames(diffrepP),sep="") diffrepN = diffrepN[,c("name","fold","tstat","pvalue")]; colnames(diffrepN) = paste("N.",colnames(diffrepN),sep="") variableMetadata = merge(variableMetadata, diffrepP, by.x="name", by.y="P.name") variableMetadata = merge(variableMetadata, diffrepN, by.x="name", by.y="N.name") } rownames(variableMetadata) = NULL #TODO: checker #colnames(variableMetadata)[1:2] = c("name","mz/rt"); variableMetadata = RTSecondToMinute(variableMetadata, convertRTMinute) variableMetadata = formatIonIdentifiers(variableMetadata, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ) #If the user want to keep only the metabolites which match a difference if(keep_meta){ variableMetadata=variableMetadata[variableMetadata[,c(mode)]!="",] } #Write the output into a tsv file write.table(variableMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=variableMetadataOutput) return(variableMetadata); } # This function get the raw file path from the arguments getRawfilePathFromArguments <- function(singlefile, zipfile, args) { if (!is.null(args$zipfile)) zipfile = args$zipfile if (!is.null(args$zipfilePositive)) zipfile = args$zipfilePositive if (!is.null(args$zipfileNegative)) zipfile = args$zipfileNegative if (!is.null(args$singlefile_galaxyPath)) { singlefile_galaxyPaths = args$singlefile_galaxyPath; singlefile_sampleNames = args$singlefile_sampleName } if (!is.null(args$singlefile_galaxyPathPositive)) { singlefile_galaxyPaths = args$singlefile_galaxyPathPositive; singlefile_sampleNames = args$singlefile_sampleNamePositive } if (!is.null(args$singlefile_galaxyPathNegative)) { singlefile_galaxyPaths = args$singlefile_galaxyPathNegative; singlefile_sampleNames = args$singlefile_sampleNameNegative } if (exists("singlefile_galaxyPaths")){ singlefile_galaxyPaths = unlist(strsplit(singlefile_galaxyPaths,",")) singlefile_sampleNames = unlist(strsplit(singlefile_sampleNames,",")) singlefile=NULL for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) { singlefile_galaxyPath=singlefile_galaxyPaths[singlefile_galaxyPath_i] singlefile_sampleName=singlefile_sampleNames[singlefile_galaxyPath_i] singlefile[[singlefile_sampleName]] = singlefile_galaxyPath } } for (argument in c("zipfile", "zipfilePositive", "zipfileNegative", "singlefile_galaxyPath", "singlefile_sampleName", "singlefile_galaxyPathPositive", "singlefile_sampleNamePositive", "singlefile_galaxyPathNegative","singlefile_sampleNameNegative")) { args[[argument]]=NULL } return(list(zipfile=zipfile, singlefile=singlefile, args=args)) } # This function retrieve the raw file in the working directory # - if zipfile: unzip the file with its directory tree # - if singlefiles: set symlink with the good filename retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile) { if(!is.null(singlefile) && (length("singlefile")>0)) { for (singlefile_sampleName in names(singlefile)) { singlefile_galaxyPath = singlefile[[singlefile_sampleName]] if(!file.exists(singlefile_galaxyPath)){ error_message=paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!") print(error_message); stop(error_message) } file.symlink(singlefile_galaxyPath,singlefile_sampleName) } directory = "." } if(!is.null(zipfile) && (zipfile!="")) { if(!file.exists(zipfile)){ error_message=paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!") print(error_message) stop(error_message) } #list all file in the zip file #zip_files=unzip(zipfile,list=T)[,"Name"] #unzip suppressWarnings(unzip(zipfile, unzip="unzip")) #get the directory name filesInZip=unzip(zipfile, list=T); directories=unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))); directories=directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] directory = "." if (length(directories) == 1) directory = directories cat("files_root_directory\t",directory,"\n") } return (directory) }