Mercurial > repos > marie-tremblay-metatoul > spectral_normalization
comparison NmrNormalization_script.R @ 0:a5e6499f1b4d draft default tip
planemo upload for repository https://github.com/workflow4metabolomics/spectral_normalization commit 559844c168f0450b9657346ba890d9c028d7537a
| author | marie-tremblay-metatoul |
|---|---|
| date | Tue, 22 Nov 2016 06:42:44 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:a5e6499f1b4d |
|---|---|
| 1 ################################################################################################################# | |
| 2 # SPECTRA NORMALIZATION FROM SPECTRAL DATA # | |
| 3 # User : Galaxy # | |
| 4 # Original data : -- # | |
| 5 # Starting date : 20-10-2014 # | |
| 6 # Version 1 : 27-01-2015 # | |
| 7 # Version 2 : 27-02-2015 # | |
| 8 # # | |
| 9 # Input files: # | |
| 10 # - Data matrix containing bucketed and integrated spectra to normalize # | |
| 11 # - Sample metadata matrix containing at least biological factor of interest # | |
| 12 # - Scaling method: Total intensity/Probabilistic Quotient Normalization # | |
| 13 # - Control group: name of control to compute median reference spectra # | |
| 14 # - Graph: normalization result representation # | |
| 15 ################################################################################################################# | |
| 16 NmrNormalization <- function(dataMatrix,scalingMethod=c("None","Total","PQN","BioFactor"),sampleMetadata=NULL, | |
| 17 bioFactor=NULL,ControlGroup=NULL,graph=c("None","Overlay","One_per_individual"), | |
| 18 nomFichier=NULL,savLog.txtC=NULL) | |
| 19 { | |
| 20 | |
| 21 ## Option | |
| 22 ##--------------- | |
| 23 strAsFacL <- options()$stringsAsFactors | |
| 24 options(stingsAsFactors = FALSE) | |
| 25 options(warn = -1) | |
| 26 | |
| 27 | |
| 28 ## Constants | |
| 29 ##--------------- | |
| 30 topEnvC <- environment() | |
| 31 flgC <- "\n" | |
| 32 | |
| 33 ## Log file (in case of integration into Galaxy) | |
| 34 ##---------------------------------------------- | |
| 35 if(!is.null(savLog.txtC)) | |
| 36 sink(savLog.txtC, append = TRUE) | |
| 37 | |
| 38 ## Functions definition | |
| 39 ##--------------------- | |
| 40 ################################################################################################################# | |
| 41 # Total intensity normalization | |
| 42 # Input parameters | |
| 43 # - data : bucketed spectra (rows=buckets; columns=samples) | |
| 44 ################################################################################################################# | |
| 45 NmrBrucker_total <- function(data) | |
| 46 { | |
| 47 # Total intensity normalization | |
| 48 data.total <- apply(data,2,sum) | |
| 49 data.normalized <- data[,1]/data.total[1] | |
| 50 for (i in 2:ncol(data)) | |
| 51 data.normalized <- cbind(data.normalized,data[,i]/data.total[i]) | |
| 52 colnames(data.normalized) <- colnames(data) | |
| 53 rownames(data.normalized) <- rownames(data) | |
| 54 return(data.normalized) | |
| 55 } | |
| 56 | |
| 57 | |
| 58 ################################################################################################################# | |
| 59 # Biological factor normalization | |
| 60 # Input parameters | |
| 61 # - data : bucketed spectra (rows=buckets; columns=samples) | |
| 62 # - sampleMetadata : dataframe with biological factor of interest measured for each invidual | |
| 63 # - bioFactor : name of the column cotaining the biological factor of interest | |
| 64 ################################################################################################################# | |
| 65 NmrBrucker_bioFact <- function(data,sampleMetadata,bioFactor) | |
| 66 { | |
| 67 # Total intensity normalization | |
| 68 data.normalized <- data[,1]/bioFactor[1] | |
| 69 for (i in 2:ncol(data)) | |
| 70 data.normalized <- cbind(data.normalized,data[,i]/bioFactor[i]) | |
| 71 colnames(data.normalized) <- colnames(data) | |
| 72 rownames(data.normalized) <- rownames(data) | |
| 73 return(data.normalized) | |
| 74 } | |
| 75 | |
| 76 | |
| 77 ################################################################################################################# | |
| 78 # Probabilistic quotient normalization (PQN) | |
| 79 # Input parameters | |
| 80 # - data : bucketed spectra (rows=buckets; columns=samples) | |
| 81 # - sampleMetadata : dataframe with treatment group of inviduals | |
| 82 # - pqnFactor : number of the column cotaining the biological facor of interest | |
| 83 # - nomControl : name of the treatment group | |
| 84 ################################################################################################################# | |
| 85 NmrBrucker_pqn <- function(data,sampleMetadata,pqnFactor,nomControl) | |
| 86 { | |
| 87 # Total intensity normalization | |
| 88 data.total <- apply(data,2,sum) | |
| 89 data.normalized <- data[,1]/data.total[1] | |
| 90 for (i in 2:ncol(data)) | |
| 91 data.normalized <- cbind(data.normalized,data[,i]/data.total[i]) | |
| 92 colnames(data.normalized) <- colnames(data) | |
| 93 rownames(data.normalized) <- rownames(data) | |
| 94 | |
| 95 # Reference spectrum | |
| 96 # Recuperation spectres individus controle | |
| 97 control.spectra <- data.normalized[,sampleMetadata[,pqnFactor]==nomControl] | |
| 98 spectrum.ref <- apply(control.spectra,1,median) | |
| 99 | |
| 100 # Ratio between normalized and reference spectra | |
| 101 data.normalized.ref <- data.normalized/spectrum.ref | |
| 102 | |
| 103 # Median ratio | |
| 104 data.normalized.ref.median <- apply(data.normalized.ref,1,median) | |
| 105 | |
| 106 # Normalization | |
| 107 data.normalizedPQN <- data.normalized[,1]/data.normalized.ref.median | |
| 108 for (i in 2:ncol(data)) | |
| 109 data.normalizedPQN <- cbind(data.normalizedPQN,data.normalized[,i]/data.normalized.ref.median) | |
| 110 colnames(data.normalizedPQN) <- colnames(data) | |
| 111 rownames(data.normalizedPQN) <- rownames(data) | |
| 112 | |
| 113 return(data.normalizedPQN) | |
| 114 } | |
| 115 | |
| 116 | |
| 117 ## Tests | |
| 118 if (scalingMethod=="QuantitativeVariable") | |
| 119 { | |
| 120 if(mode(sampleMetadata[,bioFactor]) == "character") | |
| 121 bioFact <- factor(sampleMetadata[,bioFactor]) | |
| 122 else | |
| 123 bioFact <- sampleMetadata[,bioFactor] | |
| 124 } | |
| 125 | |
| 126 ## Spectra scaling depending on the user choice | |
| 127 if (scalingMethod == "None") | |
| 128 { | |
| 129 NormalizedBucketedSpectra <- dataMatrix | |
| 130 } | |
| 131 else if (scalingMethod == "Total") | |
| 132 { | |
| 133 NormalizedBucketedSpectra <- NmrBrucker_total(dataMatrix) | |
| 134 } | |
| 135 else if (scalingMethod == "PQN") | |
| 136 { | |
| 137 NormalizedBucketedSpectra <- NmrBrucker_pqn(dataMatrix,sampleMetadata,bioFactor,ControlGroup) | |
| 138 } | |
| 139 else if (scalingMethod == "QuantitativeVariable") | |
| 140 { | |
| 141 NormalizedBucketedSpectra <- NmrBrucker_bioFact(dataMatrix,sampleMetadata,bioFact) | |
| 142 } | |
| 143 | |
| 144 # Graphic | |
| 145 if (graph != "None") | |
| 146 { | |
| 147 # Graphic Device opening | |
| 148 pdf(nomFichier,onefile=TRUE) | |
| 149 if (graph == "Overlay") | |
| 150 { | |
| 151 ymax <- max(NormalizedBucketedSpectra) | |
| 152 plot(1:length(NormalizedBucketedSpectra[,1]),NormalizedBucketedSpectra[,1],ylim=c(0,ymax), | |
| 153 type='l',col=1,xlab="Chemical shift",ylab="Intensity") | |
| 154 for (i in 2:ncol(NormalizedBucketedSpectra)) | |
| 155 { | |
| 156 spectre <- NormalizedBucketedSpectra[,i] | |
| 157 lines(spectre,col=i) | |
| 158 } | |
| 159 } | |
| 160 else | |
| 161 { | |
| 162 for (i in 1:ncol(NormalizedBucketedSpectra)) | |
| 163 { | |
| 164 plot(1:length(NormalizedBucketedSpectra[,i]),NormalizedBucketedSpectra[,i],type='l',col=1, | |
| 165 xlab="Chemical shift",ylab="Intensity") | |
| 166 } | |
| 167 } | |
| 168 dev.off() | |
| 169 } | |
| 170 | |
| 171 # Output datasets creation | |
| 172 if (scalingMethod == "None" || scalingMethod == "Total") | |
| 173 { | |
| 174 sampleMetadata <- data.frame(1:ncol(NormalizedBucketedSpectra)) | |
| 175 rownames(sampleMetadata) <- colnames(NormalizedBucketedSpectra) | |
| 176 colnames(sampleMetadata) <- "SampleOrder" | |
| 177 } | |
| 178 | |
| 179 variableMetadata <- data.frame(1:nrow(NormalizedBucketedSpectra)) | |
| 180 rownames(variableMetadata) <- rownames(NormalizedBucketedSpectra) | |
| 181 colnames(variableMetadata) <- "VariableOrder" | |
| 182 | |
| 183 return(list(NormalizedBucketedSpectra,sampleMetadata,variableMetadata)) | |
| 184 | |
| 185 } |
