Mercurial > repos > marie-tremblay-metatoul > spectral_normalization
comparison NmrNormalization_script.R @ 0:a5e6499f1b4d draft default tip
planemo upload for repository https://github.com/workflow4metabolomics/spectral_normalization commit 559844c168f0450b9657346ba890d9c028d7537a
author | marie-tremblay-metatoul |
---|---|
date | Tue, 22 Nov 2016 06:42:44 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a5e6499f1b4d |
---|---|
1 ################################################################################################################# | |
2 # SPECTRA NORMALIZATION FROM SPECTRAL DATA # | |
3 # User : Galaxy # | |
4 # Original data : -- # | |
5 # Starting date : 20-10-2014 # | |
6 # Version 1 : 27-01-2015 # | |
7 # Version 2 : 27-02-2015 # | |
8 # # | |
9 # Input files: # | |
10 # - Data matrix containing bucketed and integrated spectra to normalize # | |
11 # - Sample metadata matrix containing at least biological factor of interest # | |
12 # - Scaling method: Total intensity/Probabilistic Quotient Normalization # | |
13 # - Control group: name of control to compute median reference spectra # | |
14 # - Graph: normalization result representation # | |
15 ################################################################################################################# | |
16 NmrNormalization <- function(dataMatrix,scalingMethod=c("None","Total","PQN","BioFactor"),sampleMetadata=NULL, | |
17 bioFactor=NULL,ControlGroup=NULL,graph=c("None","Overlay","One_per_individual"), | |
18 nomFichier=NULL,savLog.txtC=NULL) | |
19 { | |
20 | |
21 ## Option | |
22 ##--------------- | |
23 strAsFacL <- options()$stringsAsFactors | |
24 options(stingsAsFactors = FALSE) | |
25 options(warn = -1) | |
26 | |
27 | |
28 ## Constants | |
29 ##--------------- | |
30 topEnvC <- environment() | |
31 flgC <- "\n" | |
32 | |
33 ## Log file (in case of integration into Galaxy) | |
34 ##---------------------------------------------- | |
35 if(!is.null(savLog.txtC)) | |
36 sink(savLog.txtC, append = TRUE) | |
37 | |
38 ## Functions definition | |
39 ##--------------------- | |
40 ################################################################################################################# | |
41 # Total intensity normalization | |
42 # Input parameters | |
43 # - data : bucketed spectra (rows=buckets; columns=samples) | |
44 ################################################################################################################# | |
45 NmrBrucker_total <- function(data) | |
46 { | |
47 # Total intensity normalization | |
48 data.total <- apply(data,2,sum) | |
49 data.normalized <- data[,1]/data.total[1] | |
50 for (i in 2:ncol(data)) | |
51 data.normalized <- cbind(data.normalized,data[,i]/data.total[i]) | |
52 colnames(data.normalized) <- colnames(data) | |
53 rownames(data.normalized) <- rownames(data) | |
54 return(data.normalized) | |
55 } | |
56 | |
57 | |
58 ################################################################################################################# | |
59 # Biological factor normalization | |
60 # Input parameters | |
61 # - data : bucketed spectra (rows=buckets; columns=samples) | |
62 # - sampleMetadata : dataframe with biological factor of interest measured for each invidual | |
63 # - bioFactor : name of the column cotaining the biological factor of interest | |
64 ################################################################################################################# | |
65 NmrBrucker_bioFact <- function(data,sampleMetadata,bioFactor) | |
66 { | |
67 # Total intensity normalization | |
68 data.normalized <- data[,1]/bioFactor[1] | |
69 for (i in 2:ncol(data)) | |
70 data.normalized <- cbind(data.normalized,data[,i]/bioFactor[i]) | |
71 colnames(data.normalized) <- colnames(data) | |
72 rownames(data.normalized) <- rownames(data) | |
73 return(data.normalized) | |
74 } | |
75 | |
76 | |
77 ################################################################################################################# | |
78 # Probabilistic quotient normalization (PQN) | |
79 # Input parameters | |
80 # - data : bucketed spectra (rows=buckets; columns=samples) | |
81 # - sampleMetadata : dataframe with treatment group of inviduals | |
82 # - pqnFactor : number of the column cotaining the biological facor of interest | |
83 # - nomControl : name of the treatment group | |
84 ################################################################################################################# | |
85 NmrBrucker_pqn <- function(data,sampleMetadata,pqnFactor,nomControl) | |
86 { | |
87 # Total intensity normalization | |
88 data.total <- apply(data,2,sum) | |
89 data.normalized <- data[,1]/data.total[1] | |
90 for (i in 2:ncol(data)) | |
91 data.normalized <- cbind(data.normalized,data[,i]/data.total[i]) | |
92 colnames(data.normalized) <- colnames(data) | |
93 rownames(data.normalized) <- rownames(data) | |
94 | |
95 # Reference spectrum | |
96 # Recuperation spectres individus controle | |
97 control.spectra <- data.normalized[,sampleMetadata[,pqnFactor]==nomControl] | |
98 spectrum.ref <- apply(control.spectra,1,median) | |
99 | |
100 # Ratio between normalized and reference spectra | |
101 data.normalized.ref <- data.normalized/spectrum.ref | |
102 | |
103 # Median ratio | |
104 data.normalized.ref.median <- apply(data.normalized.ref,1,median) | |
105 | |
106 # Normalization | |
107 data.normalizedPQN <- data.normalized[,1]/data.normalized.ref.median | |
108 for (i in 2:ncol(data)) | |
109 data.normalizedPQN <- cbind(data.normalizedPQN,data.normalized[,i]/data.normalized.ref.median) | |
110 colnames(data.normalizedPQN) <- colnames(data) | |
111 rownames(data.normalizedPQN) <- rownames(data) | |
112 | |
113 return(data.normalizedPQN) | |
114 } | |
115 | |
116 | |
117 ## Tests | |
118 if (scalingMethod=="QuantitativeVariable") | |
119 { | |
120 if(mode(sampleMetadata[,bioFactor]) == "character") | |
121 bioFact <- factor(sampleMetadata[,bioFactor]) | |
122 else | |
123 bioFact <- sampleMetadata[,bioFactor] | |
124 } | |
125 | |
126 ## Spectra scaling depending on the user choice | |
127 if (scalingMethod == "None") | |
128 { | |
129 NormalizedBucketedSpectra <- dataMatrix | |
130 } | |
131 else if (scalingMethod == "Total") | |
132 { | |
133 NormalizedBucketedSpectra <- NmrBrucker_total(dataMatrix) | |
134 } | |
135 else if (scalingMethod == "PQN") | |
136 { | |
137 NormalizedBucketedSpectra <- NmrBrucker_pqn(dataMatrix,sampleMetadata,bioFactor,ControlGroup) | |
138 } | |
139 else if (scalingMethod == "QuantitativeVariable") | |
140 { | |
141 NormalizedBucketedSpectra <- NmrBrucker_bioFact(dataMatrix,sampleMetadata,bioFact) | |
142 } | |
143 | |
144 # Graphic | |
145 if (graph != "None") | |
146 { | |
147 # Graphic Device opening | |
148 pdf(nomFichier,onefile=TRUE) | |
149 if (graph == "Overlay") | |
150 { | |
151 ymax <- max(NormalizedBucketedSpectra) | |
152 plot(1:length(NormalizedBucketedSpectra[,1]),NormalizedBucketedSpectra[,1],ylim=c(0,ymax), | |
153 type='l',col=1,xlab="Chemical shift",ylab="Intensity") | |
154 for (i in 2:ncol(NormalizedBucketedSpectra)) | |
155 { | |
156 spectre <- NormalizedBucketedSpectra[,i] | |
157 lines(spectre,col=i) | |
158 } | |
159 } | |
160 else | |
161 { | |
162 for (i in 1:ncol(NormalizedBucketedSpectra)) | |
163 { | |
164 plot(1:length(NormalizedBucketedSpectra[,i]),NormalizedBucketedSpectra[,i],type='l',col=1, | |
165 xlab="Chemical shift",ylab="Intensity") | |
166 } | |
167 } | |
168 dev.off() | |
169 } | |
170 | |
171 # Output datasets creation | |
172 if (scalingMethod == "None" || scalingMethod == "Total") | |
173 { | |
174 sampleMetadata <- data.frame(1:ncol(NormalizedBucketedSpectra)) | |
175 rownames(sampleMetadata) <- colnames(NormalizedBucketedSpectra) | |
176 colnames(sampleMetadata) <- "SampleOrder" | |
177 } | |
178 | |
179 variableMetadata <- data.frame(1:nrow(NormalizedBucketedSpectra)) | |
180 rownames(variableMetadata) <- rownames(NormalizedBucketedSpectra) | |
181 colnames(variableMetadata) <- "VariableOrder" | |
182 | |
183 return(list(NormalizedBucketedSpectra,sampleMetadata,variableMetadata)) | |
184 | |
185 } |