Mercurial > repos > marie-tremblay-metatoul > spectral_normalization
diff NmrNormalization_script.R @ 0:a5e6499f1b4d draft default tip
planemo upload for repository https://github.com/workflow4metabolomics/spectral_normalization commit 559844c168f0450b9657346ba890d9c028d7537a
author | marie-tremblay-metatoul |
---|---|
date | Tue, 22 Nov 2016 06:42:44 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/NmrNormalization_script.R Tue Nov 22 06:42:44 2016 -0500 @@ -0,0 +1,185 @@ +################################################################################################################# +# SPECTRA NORMALIZATION FROM SPECTRAL DATA # +# User : Galaxy # +# Original data : -- # +# Starting date : 20-10-2014 # +# Version 1 : 27-01-2015 # +# Version 2 : 27-02-2015 # +# # +# Input files: # +# - Data matrix containing bucketed and integrated spectra to normalize # +# - Sample metadata matrix containing at least biological factor of interest # +# - Scaling method: Total intensity/Probabilistic Quotient Normalization # +# - Control group: name of control to compute median reference spectra # +# - Graph: normalization result representation # +################################################################################################################# +NmrNormalization <- function(dataMatrix,scalingMethod=c("None","Total","PQN","BioFactor"),sampleMetadata=NULL, + bioFactor=NULL,ControlGroup=NULL,graph=c("None","Overlay","One_per_individual"), + nomFichier=NULL,savLog.txtC=NULL) +{ + + ## Option + ##--------------- + strAsFacL <- options()$stringsAsFactors + options(stingsAsFactors = FALSE) + options(warn = -1) + + + ## Constants + ##--------------- + topEnvC <- environment() + flgC <- "\n" + + ## Log file (in case of integration into Galaxy) + ##---------------------------------------------- + if(!is.null(savLog.txtC)) + sink(savLog.txtC, append = TRUE) + + ## Functions definition + ##--------------------- + ################################################################################################################# + # Total intensity normalization + # Input parameters + # - data : bucketed spectra (rows=buckets; columns=samples) + ################################################################################################################# + NmrBrucker_total <- function(data) + { + # Total intensity normalization + data.total <- apply(data,2,sum) + data.normalized <- data[,1]/data.total[1] + for (i in 2:ncol(data)) + data.normalized <- cbind(data.normalized,data[,i]/data.total[i]) + colnames(data.normalized) <- colnames(data) + rownames(data.normalized) <- rownames(data) + return(data.normalized) + } + + + ################################################################################################################# + # Biological factor normalization + # Input parameters + # - data : bucketed spectra (rows=buckets; columns=samples) + # - sampleMetadata : dataframe with biological factor of interest measured for each invidual + # - bioFactor : name of the column cotaining the biological factor of interest + ################################################################################################################# + NmrBrucker_bioFact <- function(data,sampleMetadata,bioFactor) + { + # Total intensity normalization + data.normalized <- data[,1]/bioFactor[1] + for (i in 2:ncol(data)) + data.normalized <- cbind(data.normalized,data[,i]/bioFactor[i]) + colnames(data.normalized) <- colnames(data) + rownames(data.normalized) <- rownames(data) + return(data.normalized) + } + + + ################################################################################################################# + # Probabilistic quotient normalization (PQN) + # Input parameters + # - data : bucketed spectra (rows=buckets; columns=samples) + # - sampleMetadata : dataframe with treatment group of inviduals + # - pqnFactor : number of the column cotaining the biological facor of interest + # - nomControl : name of the treatment group + ################################################################################################################# + NmrBrucker_pqn <- function(data,sampleMetadata,pqnFactor,nomControl) + { + # Total intensity normalization + data.total <- apply(data,2,sum) + data.normalized <- data[,1]/data.total[1] + for (i in 2:ncol(data)) + data.normalized <- cbind(data.normalized,data[,i]/data.total[i]) + colnames(data.normalized) <- colnames(data) + rownames(data.normalized) <- rownames(data) + + # Reference spectrum + # Recuperation spectres individus controle + control.spectra <- data.normalized[,sampleMetadata[,pqnFactor]==nomControl] + spectrum.ref <- apply(control.spectra,1,median) + + # Ratio between normalized and reference spectra + data.normalized.ref <- data.normalized/spectrum.ref + + # Median ratio + data.normalized.ref.median <- apply(data.normalized.ref,1,median) + + # Normalization + data.normalizedPQN <- data.normalized[,1]/data.normalized.ref.median + for (i in 2:ncol(data)) + data.normalizedPQN <- cbind(data.normalizedPQN,data.normalized[,i]/data.normalized.ref.median) + colnames(data.normalizedPQN) <- colnames(data) + rownames(data.normalizedPQN) <- rownames(data) + + return(data.normalizedPQN) + } + + + ## Tests + if (scalingMethod=="QuantitativeVariable") + { + if(mode(sampleMetadata[,bioFactor]) == "character") + bioFact <- factor(sampleMetadata[,bioFactor]) + else + bioFact <- sampleMetadata[,bioFactor] + } + + ## Spectra scaling depending on the user choice + if (scalingMethod == "None") + { + NormalizedBucketedSpectra <- dataMatrix + } + else if (scalingMethod == "Total") + { + NormalizedBucketedSpectra <- NmrBrucker_total(dataMatrix) + } + else if (scalingMethod == "PQN") + { + NormalizedBucketedSpectra <- NmrBrucker_pqn(dataMatrix,sampleMetadata,bioFactor,ControlGroup) + } + else if (scalingMethod == "QuantitativeVariable") + { + NormalizedBucketedSpectra <- NmrBrucker_bioFact(dataMatrix,sampleMetadata,bioFact) + } + + # Graphic + if (graph != "None") + { + # Graphic Device opening + pdf(nomFichier,onefile=TRUE) + if (graph == "Overlay") + { + ymax <- max(NormalizedBucketedSpectra) + plot(1:length(NormalizedBucketedSpectra[,1]),NormalizedBucketedSpectra[,1],ylim=c(0,ymax), + type='l',col=1,xlab="Chemical shift",ylab="Intensity") + for (i in 2:ncol(NormalizedBucketedSpectra)) + { + spectre <- NormalizedBucketedSpectra[,i] + lines(spectre,col=i) + } + } + else + { + for (i in 1:ncol(NormalizedBucketedSpectra)) + { + plot(1:length(NormalizedBucketedSpectra[,i]),NormalizedBucketedSpectra[,i],type='l',col=1, + xlab="Chemical shift",ylab="Intensity") + } + } + dev.off() + } + + # Output datasets creation + if (scalingMethod == "None" || scalingMethod == "Total") + { + sampleMetadata <- data.frame(1:ncol(NormalizedBucketedSpectra)) + rownames(sampleMetadata) <- colnames(NormalizedBucketedSpectra) + colnames(sampleMetadata) <- "SampleOrder" + } + + variableMetadata <- data.frame(1:nrow(NormalizedBucketedSpectra)) + rownames(variableMetadata) <- rownames(NormalizedBucketedSpectra) + colnames(variableMetadata) <- "VariableOrder" + + return(list(NormalizedBucketedSpectra,sampleMetadata,variableMetadata)) + +}