diff NmrNormalization_script.R @ 0:e1b29d705286 draft

planemo upload for repository https://github.com/workflow4metabolomics/nmr_normalization commit 0a2ec9e30fbf7690a80695c751e6ea432b10a759-dirty
author marie-tremblay-metatoul
date Mon, 18 Apr 2016 11:29:30 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/NmrNormalization_script.R	Mon Apr 18 11:29:30 2016 -0400
@@ -0,0 +1,185 @@
+#################################################################################################################
+# SPECTRA NORMALIZATION FROM BUCKETED AND INTEGRATED SPECTRA                                                    #
+# User : Galaxy                                                                                                 #
+# Original data : --                                                                                            #
+# Starting date : 20-10-2014                                                                                    #
+# Version 1 : 27-01-2015                                                                                        #
+# Version 2 : 27-02-2015                                                                                        #
+#                                                                                                               #
+# Input files:                                                                                                  #
+#   - Data matrix containing bucketed and integrated spectra to normalize                                       #
+#   - Sample metadata matrix containing at least biological factor of interest                                  #
+#   - Scaling method: Total intensity/Probabilistic Quotient Normalization                                      #
+#   - Control group: name of control to compute median reference spectra                                        #
+#   - Graph: normalization result representation                                                                #
+#################################################################################################################
+NmrNormalization <- function(dataMatrix,scalingMethod=c("None","Total","PQN","BioFactor"),sampleMetadata=NULL,
+                             bioFactor=NULL,ControlGroup=NULL,graph=c("None","Overlay","One_per_individual"),
+                             nomFichier=NULL,savLog.txtC=NULL)
+{
+
+  ## Option
+  ##---------------
+  strAsFacL <- options()$stringsAsFactors
+  options(stingsAsFactors = FALSE)
+  options(warn = -1)
+  
+  
+  ## Constants
+  ##---------------
+  topEnvC <- environment()
+  flgC <- "\n"
+  
+  ## Log file (in case of integration into Galaxy)
+  ##----------------------------------------------
+  if(!is.null(savLog.txtC))
+    sink(savLog.txtC, append = TRUE)
+  
+  ## Functions definition
+  ##---------------------  
+  #################################################################################################################
+  # Total intensity normalization
+  # Input parameters
+  #   - data : bucketed spectra (rows=buckets; columns=samples)
+  #################################################################################################################  
+  NmrBrucker_total <- function(data)
+  {
+    # Total intensity normalization
+    data.total <- apply(data,2,sum)
+    data.normalized <- data[,1]/data.total[1]
+    for (i in 2:ncol(data))
+      data.normalized <- cbind(data.normalized,data[,i]/data.total[i]) 
+    colnames(data.normalized) <- colnames(data)
+    rownames(data.normalized) <- rownames(data)
+    return(data.normalized)
+  }
+
+  
+  #################################################################################################################
+  # Biological factor normalization
+  # Input parameters
+  #   - data : bucketed spectra (rows=buckets; columns=samples)
+  #   - sampleMetadata : dataframe with biological factor of interest measured for each invidual
+  #   - bioFactor : name of the column cotaining the biological factor of interest
+  #################################################################################################################
+  NmrBrucker_bioFact <- function(data,sampleMetadata,bioFactor)
+  {
+    # Total intensity normalization
+    data.normalized <- data[,1]/bioFactor[1]
+    for (i in 2:ncol(data))
+      data.normalized <- cbind(data.normalized,data[,i]/bioFactor[i]) 
+    colnames(data.normalized) <- colnames(data)
+    rownames(data.normalized) <- rownames(data)
+    return(data.normalized)
+  }
+  
+  
+  #################################################################################################################
+  # Probabilistic quotient normalization (PQN)
+  # Input parameters
+  #   - data : bucketed spectra (rows=buckets; columns=samples)
+  #   - sampleMetadata : dataframe with treatment group of inviduals
+  #   - pqnFactor : number of the column cotaining the biological facor of interest
+  #   - nomControl : name of the treatment group
+  #################################################################################################################
+  NmrBrucker_pqn <- function(data,sampleMetadata,pqnFactor,nomControl)
+  {
+    # Total intensity normalization
+    data.total <- apply(data,2,sum)
+    data.normalized <- data[,1]/data.total[1]
+    for (i in 2:ncol(data))
+      data.normalized <- cbind(data.normalized,data[,i]/data.total[i]) 
+    colnames(data.normalized) <- colnames(data)
+    rownames(data.normalized) <- rownames(data)
+    
+    # Reference spectrum
+    # Recuperation spectres individus controle
+    control.spectra <- data.normalized[,sampleMetadata[,pqnFactor]==nomControl]
+    spectrum.ref <- apply(control.spectra,1,median)
+    
+    # Ratio between normalized and reference spectra
+    data.normalized.ref <- data.normalized/spectrum.ref
+    
+    # Median ratio
+    data.normalized.ref.median <- apply(data.normalized.ref,1,median)
+    
+    # Normalization
+    data.normalizedPQN <- data.normalized[,1]/data.normalized.ref.median
+    for (i in 2:ncol(data))
+      data.normalizedPQN <- cbind(data.normalizedPQN,data.normalized[,i]/data.normalized.ref.median)
+    colnames(data.normalizedPQN) <- colnames(data)
+    rownames(data.normalizedPQN) <- rownames(data)
+    
+    return(data.normalizedPQN)
+  }
+  
+  
+  ## Tests
+  if (scalingMethod=="QuantitativeVariable")
+  {
+    if(mode(sampleMetadata[,bioFactor]) == "character")
+       bioFact <- factor(sampleMetadata[,bioFactor])
+    else
+       bioFact <- sampleMetadata[,bioFactor]
+  }
+  
+  ## Spectra scaling depending on the user choice
+  if (scalingMethod == "None")
+  {
+    NormalizedBucketedSpectra <- dataMatrix
+  }
+  else if (scalingMethod == "Total")
+  {
+    NormalizedBucketedSpectra <- NmrBrucker_total(dataMatrix)    
+  }
+  else if (scalingMethod == "PQN")
+  {
+    NormalizedBucketedSpectra <- NmrBrucker_pqn(dataMatrix,sampleMetadata,bioFactor,ControlGroup)
+  }
+  else if (scalingMethod == "QuantitativeVariable")
+  {
+    NormalizedBucketedSpectra <- NmrBrucker_bioFact(dataMatrix,sampleMetadata,bioFact)
+  }
+  
+  # Graphic
+  if (graph != "None")
+  {
+    # Graphic Device opening
+    pdf(nomFichier,onefile=TRUE)
+    if (graph == "Overlay")
+    {
+      ymax <- max(NormalizedBucketedSpectra)
+      plot(1:length(NormalizedBucketedSpectra[,1]),NormalizedBucketedSpectra[,1],ylim=c(0,ymax),
+            type='l',col=1,xlab="Chemical shift",ylab="Intensity")
+      for (i in 2:ncol(NormalizedBucketedSpectra))
+      {
+        spectre <- NormalizedBucketedSpectra[,i]
+        lines(spectre,col=i)
+      }
+    }
+    else
+    {
+      for (i in 1:ncol(NormalizedBucketedSpectra))
+      {
+        plot(1:length(NormalizedBucketedSpectra[,i]),NormalizedBucketedSpectra[,i],type='l',col=1,
+             xlab="Chemical shift",ylab="Intensity")
+      }
+    }
+    dev.off()
+  }
+  
+  # Output datasets creation
+  if (scalingMethod == "None" || scalingMethod == "Total")
+  {
+      sampleMetadata <- data.frame(1:ncol(NormalizedBucketedSpectra))
+      rownames(sampleMetadata) <- colnames(NormalizedBucketedSpectra)
+      colnames(sampleMetadata) <- "SampleOrder"
+  }
+   
+  variableMetadata <- data.frame(1:nrow(NormalizedBucketedSpectra))
+  rownames(variableMetadata) <- rownames(NormalizedBucketedSpectra)
+  colnames(variableMetadata) <- "VariableOrder"
+
+  return(list(NormalizedBucketedSpectra,sampleMetadata,variableMetadata))
+
+}