comparison NmrNormalization_script.R @ 0:e1b29d705286 draft

planemo upload for repository https://github.com/workflow4metabolomics/nmr_normalization commit 0a2ec9e30fbf7690a80695c751e6ea432b10a759-dirty
author marie-tremblay-metatoul
date Mon, 18 Apr 2016 11:29:30 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e1b29d705286
1 #################################################################################################################
2 # SPECTRA NORMALIZATION FROM BUCKETED AND INTEGRATED SPECTRA #
3 # User : Galaxy #
4 # Original data : -- #
5 # Starting date : 20-10-2014 #
6 # Version 1 : 27-01-2015 #
7 # Version 2 : 27-02-2015 #
8 # #
9 # Input files: #
10 # - Data matrix containing bucketed and integrated spectra to normalize #
11 # - Sample metadata matrix containing at least biological factor of interest #
12 # - Scaling method: Total intensity/Probabilistic Quotient Normalization #
13 # - Control group: name of control to compute median reference spectra #
14 # - Graph: normalization result representation #
15 #################################################################################################################
16 NmrNormalization <- function(dataMatrix,scalingMethod=c("None","Total","PQN","BioFactor"),sampleMetadata=NULL,
17 bioFactor=NULL,ControlGroup=NULL,graph=c("None","Overlay","One_per_individual"),
18 nomFichier=NULL,savLog.txtC=NULL)
19 {
20
21 ## Option
22 ##---------------
23 strAsFacL <- options()$stringsAsFactors
24 options(stingsAsFactors = FALSE)
25 options(warn = -1)
26
27
28 ## Constants
29 ##---------------
30 topEnvC <- environment()
31 flgC <- "\n"
32
33 ## Log file (in case of integration into Galaxy)
34 ##----------------------------------------------
35 if(!is.null(savLog.txtC))
36 sink(savLog.txtC, append = TRUE)
37
38 ## Functions definition
39 ##---------------------
40 #################################################################################################################
41 # Total intensity normalization
42 # Input parameters
43 # - data : bucketed spectra (rows=buckets; columns=samples)
44 #################################################################################################################
45 NmrBrucker_total <- function(data)
46 {
47 # Total intensity normalization
48 data.total <- apply(data,2,sum)
49 data.normalized <- data[,1]/data.total[1]
50 for (i in 2:ncol(data))
51 data.normalized <- cbind(data.normalized,data[,i]/data.total[i])
52 colnames(data.normalized) <- colnames(data)
53 rownames(data.normalized) <- rownames(data)
54 return(data.normalized)
55 }
56
57
58 #################################################################################################################
59 # Biological factor normalization
60 # Input parameters
61 # - data : bucketed spectra (rows=buckets; columns=samples)
62 # - sampleMetadata : dataframe with biological factor of interest measured for each invidual
63 # - bioFactor : name of the column cotaining the biological factor of interest
64 #################################################################################################################
65 NmrBrucker_bioFact <- function(data,sampleMetadata,bioFactor)
66 {
67 # Total intensity normalization
68 data.normalized <- data[,1]/bioFactor[1]
69 for (i in 2:ncol(data))
70 data.normalized <- cbind(data.normalized,data[,i]/bioFactor[i])
71 colnames(data.normalized) <- colnames(data)
72 rownames(data.normalized) <- rownames(data)
73 return(data.normalized)
74 }
75
76
77 #################################################################################################################
78 # Probabilistic quotient normalization (PQN)
79 # Input parameters
80 # - data : bucketed spectra (rows=buckets; columns=samples)
81 # - sampleMetadata : dataframe with treatment group of inviduals
82 # - pqnFactor : number of the column cotaining the biological facor of interest
83 # - nomControl : name of the treatment group
84 #################################################################################################################
85 NmrBrucker_pqn <- function(data,sampleMetadata,pqnFactor,nomControl)
86 {
87 # Total intensity normalization
88 data.total <- apply(data,2,sum)
89 data.normalized <- data[,1]/data.total[1]
90 for (i in 2:ncol(data))
91 data.normalized <- cbind(data.normalized,data[,i]/data.total[i])
92 colnames(data.normalized) <- colnames(data)
93 rownames(data.normalized) <- rownames(data)
94
95 # Reference spectrum
96 # Recuperation spectres individus controle
97 control.spectra <- data.normalized[,sampleMetadata[,pqnFactor]==nomControl]
98 spectrum.ref <- apply(control.spectra,1,median)
99
100 # Ratio between normalized and reference spectra
101 data.normalized.ref <- data.normalized/spectrum.ref
102
103 # Median ratio
104 data.normalized.ref.median <- apply(data.normalized.ref,1,median)
105
106 # Normalization
107 data.normalizedPQN <- data.normalized[,1]/data.normalized.ref.median
108 for (i in 2:ncol(data))
109 data.normalizedPQN <- cbind(data.normalizedPQN,data.normalized[,i]/data.normalized.ref.median)
110 colnames(data.normalizedPQN) <- colnames(data)
111 rownames(data.normalizedPQN) <- rownames(data)
112
113 return(data.normalizedPQN)
114 }
115
116
117 ## Tests
118 if (scalingMethod=="QuantitativeVariable")
119 {
120 if(mode(sampleMetadata[,bioFactor]) == "character")
121 bioFact <- factor(sampleMetadata[,bioFactor])
122 else
123 bioFact <- sampleMetadata[,bioFactor]
124 }
125
126 ## Spectra scaling depending on the user choice
127 if (scalingMethod == "None")
128 {
129 NormalizedBucketedSpectra <- dataMatrix
130 }
131 else if (scalingMethod == "Total")
132 {
133 NormalizedBucketedSpectra <- NmrBrucker_total(dataMatrix)
134 }
135 else if (scalingMethod == "PQN")
136 {
137 NormalizedBucketedSpectra <- NmrBrucker_pqn(dataMatrix,sampleMetadata,bioFactor,ControlGroup)
138 }
139 else if (scalingMethod == "QuantitativeVariable")
140 {
141 NormalizedBucketedSpectra <- NmrBrucker_bioFact(dataMatrix,sampleMetadata,bioFact)
142 }
143
144 # Graphic
145 if (graph != "None")
146 {
147 # Graphic Device opening
148 pdf(nomFichier,onefile=TRUE)
149 if (graph == "Overlay")
150 {
151 ymax <- max(NormalizedBucketedSpectra)
152 plot(1:length(NormalizedBucketedSpectra[,1]),NormalizedBucketedSpectra[,1],ylim=c(0,ymax),
153 type='l',col=1,xlab="Chemical shift",ylab="Intensity")
154 for (i in 2:ncol(NormalizedBucketedSpectra))
155 {
156 spectre <- NormalizedBucketedSpectra[,i]
157 lines(spectre,col=i)
158 }
159 }
160 else
161 {
162 for (i in 1:ncol(NormalizedBucketedSpectra))
163 {
164 plot(1:length(NormalizedBucketedSpectra[,i]),NormalizedBucketedSpectra[,i],type='l',col=1,
165 xlab="Chemical shift",ylab="Intensity")
166 }
167 }
168 dev.off()
169 }
170
171 # Output datasets creation
172 if (scalingMethod == "None" || scalingMethod == "Total")
173 {
174 sampleMetadata <- data.frame(1:ncol(NormalizedBucketedSpectra))
175 rownames(sampleMetadata) <- colnames(NormalizedBucketedSpectra)
176 colnames(sampleMetadata) <- "SampleOrder"
177 }
178
179 variableMetadata <- data.frame(1:nrow(NormalizedBucketedSpectra))
180 rownames(variableMetadata) <- rownames(NormalizedBucketedSpectra)
181 colnames(variableMetadata) <- "VariableOrder"
182
183 return(list(NormalizedBucketedSpectra,sampleMetadata,variableMetadata))
184
185 }