Mercurial > repos > dereeper > sniplay
comparison egglib/egglib-2.1.5/include/egglib-cpp/FStatistics.hpp @ 1:420b57c3c185 draft
Uploaded
author | dereeper |
---|---|
date | Fri, 10 Jul 2015 04:39:30 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:3e19d0dfcf3e | 1:420b57c3c185 |
---|---|
1 /* | |
2 Copyright 2009 Stéphane De Mita, Mathieu Siol | |
3 | |
4 This file is part of the EggLib library. | |
5 | |
6 EggLib is free software: you can redistribute it and/or modify | |
7 it under the terms of the GNU General Public License as published by | |
8 the Free Software Foundation, either version 3 of the License, or | |
9 (at your option) any later version. | |
10 | |
11 EggLib is distributed in the hope that it will be useful, | |
12 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 GNU General Public License for more details. | |
15 | |
16 You should have received a copy of the GNU General Public License | |
17 along with EggLib. If not, see <http://www.gnu.org/licenses/>. | |
18 */ | |
19 | |
20 #ifndef EGGLIB_FSTATISTICS_HPP | |
21 #define EGGLIB_FSTATISTICS_HPP | |
22 | |
23 | |
24 | |
25 namespace egglib { | |
26 | |
27 | |
28 /** \brief Computes Fis, Fst and Fit from diploid data | |
29 * | |
30 * The class requires loading data. Data are loaded by individual | |
31 * (two genotypes per individual). The analyses are cached: they are | |
32 * performed upon the first call to statistics accessors. The cache | |
33 * is emptied whenever a datum is loaded. | |
34 * | |
35 * The computations are performed after Weir and Cockerham. The | |
36 * statistics F, theta and f are generalized for multiple alleles. | |
37 * To allow computation of multi-locus statistics, variance | |
38 * components are also available. The three components of the | |
39 * variance are Vpopulation (between-population), Vindividual | |
40 * (within-population, between-individual) and Vallele (within- | |
41 * individual). The formulas to compute the F-statistics are as | |
42 * follows: | |
43 * - 1-F = Vallele/(Vpopulation+Vindividual+Vallele) | |
44 * - theta = Vpopulation/(Vpopulation+Vindividual+Vallele) | |
45 * - 1-f = Vallele/(Vindividual+Vallele). | |
46 * | |
47 * \ingroup polymorphism | |
48 * | |
49 */ | |
50 class FStatistics { | |
51 | |
52 public: | |
53 | |
54 /** \brief Constructor | |
55 * | |
56 */ | |
57 FStatistics(); | |
58 | |
59 | |
60 /** \brief Destructor | |
61 * | |
62 */ | |
63 virtual ~FStatistics(); | |
64 | |
65 | |
66 /** \brief Reserve sufficient memory for a given number of | |
67 * individuals. | |
68 * | |
69 * This method makes the load function faster by allocating | |
70 * all required memory at once. | |
71 * | |
72 * \param numberOfIndividuals a strictly positive integer. | |
73 * | |
74 */ | |
75 void reserve(unsigned int numberOfIndividuals); | |
76 | |
77 | |
78 /** \brief Loads the data for one individual | |
79 * | |
80 * \param genotype1 an integer giving the first allele. | |
81 * \param genotype2 an integer giving the second allele. | |
82 * \param populationLabel an integer indication belonging to | |
83 * a population. | |
84 * | |
85 * Genotypes and population labels are not required to be | |
86 * consecutive (both are labels, not indices). They are | |
87 * internally mapped to indices (the mapping can be obtained | |
88 * by accessors populationLabel and allele). | |
89 * | |
90 * All genotypes are considered to be valid (no missing data). | |
91 * If statistics were computed previous to call to this | |
92 * function, all data will be erase. | |
93 * | |
94 */ | |
95 void loadIndividual(unsigned int genotype1, | |
96 unsigned int genotype2, unsigned int populationLabel); | |
97 | |
98 | |
99 /** \brief Label of a population | |
100 * | |
101 * The index corresponds to the local mapping of populations | |
102 * regardless of the ranking of population labels. (No out | |
103 * of bound checking.) | |
104 * | |
105 */ | |
106 unsigned int populationLabel(unsigned int populationIndex); | |
107 | |
108 | |
109 /** \brief Value of an allele | |
110 * | |
111 * The index corresponds to the local mapping of alleles | |
112 * regardless of the ranking of allele values. (No out of | |
113 * bound checking.) | |
114 * | |
115 */ | |
116 unsigned int alleleValue(unsigned int alleleIndex); | |
117 | |
118 | |
119 /// First allele of a given individual (no checking) | |
120 unsigned int firstAllele(unsigned int individualIndex) const; | |
121 | |
122 /// Second allele of a given individual (no checking) | |
123 unsigned int secondAllele(unsigned int individualIndex) const; | |
124 | |
125 /// Population label of a given individual (no checking) | |
126 unsigned int individualLabel(unsigned int individualIndex) const; | |
127 | |
128 | |
129 /** \brief Number of alleles | |
130 * | |
131 */ | |
132 unsigned int numberOfAlleles(); | |
133 | |
134 | |
135 /** \brief Number of populations | |
136 * | |
137 */ | |
138 unsigned int numberOfPopulations(); | |
139 | |
140 | |
141 /** \brief Number of loaded genotypes | |
142 * | |
143 */ | |
144 unsigned int numberOfGenotypes() const; | |
145 | |
146 | |
147 /** \brief Absolute total allele frequency | |
148 * | |
149 */ | |
150 unsigned int alleleFrequencyTotal(unsigned int alleleIndex); | |
151 | |
152 | |
153 /** \brief Absolute allele frequency in a population | |
154 * | |
155 */ | |
156 unsigned int alleleFrequencyPerPopulation(unsigned int populationIndex, unsigned int alleleIndex); | |
157 | |
158 | |
159 /** \brief Absolute genotype frequency | |
160 * | |
161 * Note that allele AB is considered different to BA (this | |
162 * means that values can be accessed both sides of the | |
163 * diagonal. | |
164 * | |
165 */ | |
166 unsigned int genotypeFrequencyTotal(unsigned int alleleIndex1, unsigned int alleleIndex2); | |
167 | |
168 | |
169 /** \brief Absolute genotype frequency in a population | |
170 * | |
171 * Note that allele AB is considered different to BA (this | |
172 * means that values can be accessed both sides of the | |
173 * diagonal. | |
174 * | |
175 */ | |
176 unsigned int genotypeFrequencyPerPopulation(unsigned int populationIndex, unsigned int alleleIndex1, unsigned int alleleIndex2); | |
177 | |
178 | |
179 /** \brief Sample size of a population | |
180 * | |
181 */ | |
182 unsigned int populationFrequency(unsigned int populationIndex); | |
183 | |
184 | |
185 /** \brief Weir-Cockerham F-statistic | |
186 * | |
187 * Note: equivalent to Fit. | |
188 * | |
189 */ | |
190 double F(); | |
191 | |
192 | |
193 /** \brief Weir-Cockerham theta-statistic | |
194 * | |
195 * Note: equivalent to Fst. | |
196 * | |
197 */ | |
198 double theta(); | |
199 | |
200 | |
201 /** \brief Weir-Cockerham f-statistic | |
202 * | |
203 * Note: equivalent to Fis. | |
204 * | |
205 */ | |
206 double f(); | |
207 | |
208 | |
209 /** \brief Between-population component of variance | |
210 * | |
211 */ | |
212 double Vpopulation(); | |
213 | |
214 | |
215 /** \brief Within-population, between-individual component of variance | |
216 * | |
217 */ | |
218 double Vindividual(); | |
219 | |
220 | |
221 /** \brief Within-individual component of variance | |
222 * | |
223 */ | |
224 double Vallele(); | |
225 | |
226 | |
227 protected: | |
228 | |
229 bool d_flag; | |
230 void d_init(); | |
231 void d_clear(); | |
232 unsigned int d_reserved; | |
233 unsigned int d_numberOfGenotypes; | |
234 unsigned int *d_genotypes; | |
235 unsigned int *d_populationLabels; | |
236 | |
237 bool s_flag; | |
238 void s_init(); | |
239 void s_clear(); | |
240 void s_compute(); | |
241 void processPopulations(); | |
242 void processAlleles(); | |
243 unsigned int getPopulationIndex(unsigned int) const; | |
244 unsigned int getAlleleIndex(unsigned int) const; | |
245 unsigned int s_numberOfAlleles; | |
246 unsigned int *s_alleleValueMapping; | |
247 unsigned int s_numberOfPopulations; | |
248 unsigned int *s_populationLabelMapping; | |
249 unsigned int *s_populationFrequencies; | |
250 unsigned int *s_alleleFrequenciesTotal; | |
251 unsigned int **s_alleleFrequenciesPerPopulation; | |
252 unsigned int **s_genotypeFrequenciesTotal; | |
253 unsigned int ***s_genotypeFrequenciesPerPopulation; | |
254 | |
255 bool w_flag; | |
256 void w_init(); | |
257 void w_clear(); | |
258 void w_compute(); | |
259 double w_F; | |
260 double w_T; | |
261 double w_f; | |
262 double *w_a; | |
263 double *w_b; | |
264 double *w_c; | |
265 double w_nbar; | |
266 double w_nc; | |
267 double *w_pbar; | |
268 double *w_ssquare; | |
269 double *w_hbar; | |
270 double w_sum_a; | |
271 double w_sum_b; | |
272 double w_sum_c; | |
273 double w_sum_abc; | |
274 double w_sum_bc; | |
275 | |
276 | |
277 private: | |
278 | |
279 FStatistics(const FStatistics& source) { } | |
280 | |
281 FStatistics& operator=(const FStatistics& source) { | |
282 return *this; | |
283 } | |
284 | |
285 }; | |
286 } | |
287 | |
288 #endif |