diff lib.r @ 9:7da9252dd983 draft

planemo upload commit 9d47e3b467dbbe0af0d90a937c5e9f2c4b958c4b
author lecorguille
date Mon, 25 Apr 2016 11:06:25 -0400
parents
children 1c30ff90f3ae
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib.r	Mon Apr 25 11:06:25 2016 -0400
@@ -0,0 +1,188 @@
+# lib.r version="2.2.1"
+
+#The function create a pdf from the different png generated by diffreport
+diffreport_png2pdf <- function(filebase, new_file_path) {
+
+  pdfEicOutput = paste(new_file_path,filebase,"-eic_visible_pdf",sep="")
+  pdfBoxOutput = paste(new_file_path,filebase,"-box_visible_pdf",sep="")
+
+  system(paste("gm convert ",filebase,"_eic/*.png ",filebase,"_eic.pdf",sep=""))
+  system(paste("gm convert ",filebase,"_box/*.png ",filebase,"_box.pdf",sep=""))
+
+  file.copy(paste(filebase,"_eic.pdf",sep=""), pdfEicOutput)
+  file.copy(paste(filebase,"_box.pdf",sep=""), pdfBoxOutput)
+}
+
+#The function annotateDiffreport without the corr function which bugs
+annotatediff <- function(xset=xset, listArguments=listArguments, variableMetadataOutput="variableMetadata.tsv", dataMatrixOutput="dataMatrix.tsv",new_file_path=NULL) {
+  # Resolve the bug with x11, with the function png
+  options(bitmapType='cairo')
+
+  #Check if the fillpeaks step has been done previously, if it hasn't, there is an error message and the execution is stopped.
+  res=try(is.null(xset@filled))
+
+  # ------ annot -------
+  listArguments[["calcCiS"]]=as.logical(listArguments[["calcCiS"]])
+  listArguments[["calcIso"]]=as.logical(listArguments[["calcIso"]])
+  listArguments[["calcCaS"]]=as.logical(listArguments[["calcCaS"]])
+
+  #graphMethod parameter bugs where this parameter is not defined in quick=true
+  if(listArguments[["quick"]]==TRUE) {
+    xa= annotate(object=xset,nSlaves=1,sigma=listArguments[["sigma"]],perfwhm=listArguments[["perfwhm"]],maxcharge=listArguments[["maxcharge"]],maxiso=listArguments[["maxiso"]],minfrac=listArguments[["minfrac"]],ppm=listArguments[["ppm"]],mzabs=listArguments[["mzabs"]],quick=listArguments[["quick"]],polarity=listArguments[["polarity"]],max_peaks=listArguments[["max_peaks"]],intval=listArguments[["intval"]])
+  }
+  else {
+    xa= annotate(object=xset,nSlaves=1,sigma=listArguments[["sigma"]],perfwhm=listArguments[["perfwhm"]],graphMethod=listArguments[["graphMethod"]],cor_eic_th=listArguments[["cor_eic_th"]],pval=listArguments[["pval"]],calcCiS=listArguments[["calcCiS"]],calcIso=listArguments[["calcIso"]],calcCaS=listArguments[["calcCaS"]],multiplier=listArguments[["multiplier"]],maxcharge=listArguments[["maxcharge"]],maxiso=listArguments[["maxiso"]],minfrac=listArguments[["minfrac"]],ppm=listArguments[["ppm"]],mzabs=listArguments[["mzabs"]],quick=listArguments[["quick"]],polarity=listArguments[["polarity"]],max_peaks=listArguments[["max_peaks"]],intval=listArguments[["intval"]])
+
+  }  
+  peakList=getPeaklist(xa,intval=listArguments[["intval"]])
+  peakList=cbind(groupnames(xa@xcmsSet),peakList); colnames(peakList)[1] = c("name");
+
+
+  # --- Multi condition : diffreport --- 
+  diffrep=NULL
+  if (!is.null(listArguments[["runDiffreport"]]) & nlevels(sampclass(xset))>=2) {
+    #Check if the fillpeaks step has been done previously, if it hasn't, there is an error message and the execution is stopped.
+    res=try(is.null(xset@filled))
+    classes=levels(sampclass(xset))
+    x=1:(length(classes)-1)
+    for (i in seq(along=x) ) {
+      y=1:(length(classes))
+      for (n in seq(along=y)){
+        if(i+n <= length(classes)){
+          filebase=paste(classes[i],class2=classes[i+n],sep="-vs-")
+
+          diffrep=diffreport(object=xset,class1=classes[i],class2=classes[i+n],filebase=filebase,eicmax=listArguments[["eicmax"]],eicwidth=listArguments[["eicwidth"]],sortpval=TRUE,value=listArguments[["value"]],h=listArguments[["h"]],w=listArguments[["w"]],mzdec=listArguments[["mzdec"]])
+          #combines results
+          diffreportTSV=merge(peakList, diffrep[,c("name","fold","tstat","pvalue")], by.x="name", by.y="name", sort=F)
+          diffreportTSV=cbind(diffreportTSV[,!(colnames(diffreportTSV) %in% c(sampnames(xa@xcmsSet)))],diffreportTSV[,(colnames(diffreportTSV) %in% c(sampnames(xa@xcmsSet)))])
+
+          if(listArguments[["sortpval"]]){
+            diffreportTSV=diffreportTSV[order(diffreportTSV$pvalue), ]
+          }
+    
+          if (listArguments[["convert_param"]]){
+              #converting the retention times (seconds) into minutes
+              diffreportTSV$rt=diffreportTSV$rt/60;diffreportTSV$rtmin=diffreportTSV$rtmin/60; diffreportTSV$rtmax=diffreportTSV$rtmax/60;
+          }
+          write.table(diffreportTSV, sep="\t", quote=FALSE, row.names=FALSE, file=paste(new_file_path,filebase,"-tabular_visible_tabular",sep=""))
+
+          if (listArguments[["eicmax"]] != 0) {
+              diffreport_png2pdf(filebase, new_file_path)
+          }
+        }
+      }
+    }
+  }
+
+
+
+
+  # --- variableMetadata ---
+  variableMetadata=peakList[,!(make.names(colnames(peakList)) %in% c(make.names(sampnames(xa@xcmsSet))))]
+  # if we have 2 conditions, we keep stat of diffrep
+  if (!is.null(listArguments[["runDiffreport"]]) & nlevels(sampclass(xset))==2) {
+    variableMetadata = merge(variableMetadata, diffrep[,c("name","fold","tstat","pvalue")],by.x="name", by.y="name", sort=F)
+    if(exists("listArguments[[\"sortpval\"]]")){
+      variableMetadata=variableMetadata[order(variableMetadata$pvalue), ]
+    }
+  }
+
+  variableMetadataOri=variableMetadata
+  if (listArguments[["convert_param"]]){
+    #converting the retention times (seconds) into minutes
+    print("converting the retention times into minutes in the variableMetadata")
+    variableMetadata$rt=variableMetadata$rt/60;variableMetadata$rtmin=variableMetadata$rtmin/60; variableMetadata$rtmax=variableMetadata$rtmax/60;
+  }
+  #Transform metabolites name
+  variableMetadata$name= paste("M",round(variableMetadata$mz,digits=listArguments[["num_digits"]]),"T",round(variableMetadata$rt),sep="")    
+  write.table(variableMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=variableMetadataOutput)
+
+  # --- dataMatrix ---
+  dataMatrix = peakList[,(make.names(colnames(peakList)) %in% c(make.names(sampnames(xa@xcmsSet))))]
+  dataMatrix=cbind(peakList$name,dataMatrix); colnames(dataMatrix)[1] = c("name");
+
+  if (listArguments[["convert_param"]]){
+    #converting the retention times (seconds) into minutes
+    print("converting the retention times into minutes in the dataMatrix ids")
+    peakList$rt=peakList$rt/60
+  }
+  dataMatrix$name= paste("M",round(peakList$mz,digits=listArguments[["num_digits"]]),"T",round(peakList$rt),sep="")
+  write.table(dataMatrix, sep="\t", quote=FALSE, row.names=FALSE, file=dataMatrixOutput)
+  
+  return(list("xa"=xa,"diffrep"=diffrep,"variableMetadata"=variableMetadataOri));
+
+}
+
+
+combinexsAnnos_function <- function(xaP, xaN, listOFlistArgumentsP,listOFlistArgumentsN, diffrepP=NULL,diffrepN=NULL,convert_param=FALSE,pos=TRUE,tol=2,ruleset=NULL,keep_meta=TRUE, variableMetadataOutput="variableMetadata.tsv"){
+
+  #Load the two Rdata to extract the xset objects from positive and negative mode
+  cat("\tObject xset from positive mode\n")
+  print(xaP)
+  cat("\n")  
+
+  cat("\tObject xset from negative mode\n")
+  print(xaN)
+  cat("\n")
+    
+  cat("\n")
+  cat("\tCombining...\n")
+  #Convert the string to numeric for creating matrix
+  row=as.numeric(strsplit(ruleset,",")[[1]][1])
+  column=as.numeric(strsplit(ruleset,",")[[1]][2])
+  ruleset=cbind(row,column)
+  #Test if the file comes from an older version tool
+  if ((!is.null(xaP)) & (!is.null(xaN))) {
+    #Launch the combinexsannos function from CAMERA
+    cAnnot=combinexsAnnos(xaP, xaN,pos=pos,tol=tol,ruleset=ruleset)
+  } else {
+    stop("You must relauch the CAMERA.annotate step with the lastest version.")
+  }
+
+  
+
+  if(pos){
+    xa=xaP
+    listOFlistArgumentsP=listOFlistArguments
+    mode="neg. Mode"
+  } else {
+    xa=xaN
+    listOFlistArgumentsN=listOFlistArguments
+    mode="pos. Mode"
+  }
+  intval = "into"; for (steps in names(listOFlistArguments)) { if (!is.null(listOFlistArguments[[steps]]$intval)) intval = listOFlistArguments[[steps]]$intval }
+  peakList=getPeaklist(xa,intval=intval)
+  peakList=cbind(groupnames(xa@xcmsSet),peakList); colnames(peakList)[1] = c("name");
+  variableMetadata=cbind(peakList, cAnnot[, c("isotopes", "adduct", "pcgroup",mode)]);
+  variableMetadata=variableMetadata[,!(colnames(variableMetadata) %in% c(sampnames(xa@xcmsSet)))]
+
+  #Test if there are more than two classes (conditions)
+  if ( nlevels(sampclass(xaP@xcmsSet))==2 & (!is.null(diffrepN)) & (!is.null(diffrepP))) {
+    diffrepP = diffrepP[,c("name","fold","tstat","pvalue")]; colnames(diffrepP) = paste("P.",colnames(diffrepP),sep="")
+    diffrepN = diffrepN[,c("name","fold","tstat","pvalue")]; colnames(diffrepN) = paste("N.",colnames(diffrepN),sep="")
+    
+    variableMetadata = merge(variableMetadata, diffrepP, by.x="name", by.y="P.name")
+    variableMetadata = merge(variableMetadata, diffrepN, by.x="name", by.y="N.name")
+  } 
+    
+  rownames(variableMetadata) = NULL
+  #TODO: checker
+  #colnames(variableMetadata)[1:2] = c("name","mz/rt");
+
+  #If the user want to convert the retention times (seconds) into minutes.
+  if (listArguments[["convert_param"]]){
+    #converting the retention times (seconds) into minutes
+    cat("\tConverting the retention times into minutes\n")
+    variableMetadata$rtmed=cAnnot$rt/60; variableMetadata$rtmin=cAnnot$rtmin/60; variableMetadata$rtmax=cAnnot$rtmax/60;
+  }
+
+  #If the user want to keep only the metabolites which match a difference
+  if(keep_meta){
+    variableMetadata=variableMetadata[variableMetadata[,c(mode)]!="",]
+  }
+      
+  #Write the output into a tsv file
+  write.table(variableMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=variableMetadataOutput)
+  return(variableMetadata);
+
+}