1
|
1 /*
|
|
2 Copyright 2010 Stéphane De Mita, Mathieu Siol
|
|
3
|
|
4 This file is part of the EggLib library.
|
|
5
|
|
6 EggLib is free software: you can redistribute it and/or modify
|
|
7 it under the terms of the GNU General Public License as published by
|
|
8 the Free Software Foundation, either version 3 of the License, or
|
|
9 (at your option) any later version.
|
|
10
|
|
11 EggLib is distributed in the hope that it will be useful,
|
|
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14 GNU General Public License for more details.
|
|
15
|
|
16 You should have received a copy of the GNU General Public License
|
|
17 along with EggLib. If not, see <http://www.gnu.org/licenses/>.
|
|
18 */
|
|
19
|
|
20 #ifndef EGGLIB_HFSTATISTICS_HPP
|
|
21 #define EGGLIB_HFSTATISTICS_HPP
|
|
22
|
|
23
|
|
24
|
|
25 namespace egglib {
|
|
26
|
|
27
|
|
28 /** \brief Computes Fst and Fit from haploid data
|
|
29 *
|
|
30 * The class requires loading data. Data are loaded by haploid
|
|
31 * (one genotype per individual). The analyses are cached: they are
|
|
32 * performed upon the first call to statistics accessors. The cache
|
|
33 * is emptied whenever a datum is loaded.
|
|
34 *
|
|
35 * The computations are performed after Weir and Cockerham. The
|
|
36 * statistic theta is generalized for multiple alleles. To allow
|
|
37 * computation of multi-locus statistics, variance components are
|
|
38 * also available. The two components of the variance are T1 and T2
|
|
39 * and theta is T1/T2 (from Weir 1996 "Genetic Data Analysis II",
|
|
40 * Sinauer associates, Sunderland MA).
|
|
41 *
|
|
42 * \ingroup polymorphism
|
|
43 *
|
|
44 */
|
|
45 class HFStatistics {
|
|
46
|
|
47 public:
|
|
48
|
|
49 /** \brief Constructor
|
|
50 *
|
|
51 */
|
|
52 HFStatistics();
|
|
53
|
|
54
|
|
55 /** \brief Destructor
|
|
56 *
|
|
57 */
|
|
58 virtual ~HFStatistics();
|
|
59
|
|
60
|
|
61 /** \brief Reserve sufficient memory for a given number of
|
|
62 * individuals.
|
|
63 *
|
|
64 * This method makes the load function faster by allocating
|
|
65 * all required memory at once.
|
|
66 *
|
|
67 * \param numberOfIndividuals a strictly positive integer.
|
|
68 *
|
|
69 */
|
|
70 void reserve(unsigned int numberOfIndividuals);
|
|
71
|
|
72
|
|
73 /** \brief Loads the data for one individual
|
|
74 *
|
|
75 * \param genotype an integer giving the allele.
|
|
76 * \param populationLabel an integer indication belonging to
|
|
77 * a population.
|
|
78 *
|
|
79 * Genotypes and population labels are not required to be
|
|
80 * consecutive (both are labels, not indices). They are
|
|
81 * internally mapped to indices (the mapping can be obtained
|
|
82 * by accessors populationLabel and allele).
|
|
83 *
|
|
84 * All genotypes are considered to be valid (no missing data).
|
|
85 * If statistics were computed previous to call to this
|
|
86 * function, all data will be erased.
|
|
87 *
|
|
88 */
|
|
89 void loadIndividual(unsigned int genotype, unsigned int populationLabel);
|
|
90
|
|
91
|
|
92 /** \brief Label of a population
|
|
93 *
|
|
94 * The index corresponds to the local mapping of populations
|
|
95 * regardless of the ranking of population labels. (No out
|
|
96 * of bound checking.)
|
|
97 *
|
|
98 */
|
|
99 unsigned int populationLabel(unsigned int populationIndex);
|
|
100
|
|
101
|
|
102 /** \brief Value of an allele
|
|
103 *
|
|
104 * The index corresponds to the local mapping of alleles
|
|
105 * regardless of the ranking of allele values. (No out of
|
|
106 * bound checking.)
|
|
107 *
|
|
108 */
|
|
109 unsigned int alleleValue(unsigned int alleleIndex);
|
|
110
|
|
111
|
|
112 /// Allele of a given individual (no checking)
|
|
113 unsigned int allele(unsigned int individualIndex) const;
|
|
114
|
|
115 /// Population label of a given individual (no checking)
|
|
116 unsigned int individualLabel(unsigned int individualIndex) const;
|
|
117
|
|
118
|
|
119 /** \brief Number of alleles
|
|
120 *
|
|
121 */
|
|
122 unsigned int numberOfAlleles();
|
|
123
|
|
124
|
|
125 /** \brief Number of populations
|
|
126 *
|
|
127 */
|
|
128 unsigned int numberOfPopulations();
|
|
129
|
|
130
|
|
131 /** \brief Number of loaded genotypes
|
|
132 *
|
|
133 */
|
|
134 unsigned int numberOfGenotypes() const;
|
|
135
|
|
136
|
|
137 /** \brief Absolute total allele frequency
|
|
138 *
|
|
139 */
|
|
140 unsigned int alleleFrequencyTotal(unsigned int alleleIndex);
|
|
141
|
|
142
|
|
143 /** \brief Absolute allele frequency in a population
|
|
144 *
|
|
145 */
|
|
146 unsigned int alleleFrequencyPerPopulation(unsigned int populationIndex, unsigned int alleleIndex);
|
|
147
|
|
148
|
|
149 /** \brief Sample size of a population
|
|
150 *
|
|
151 */
|
|
152 unsigned int populationFrequency(unsigned int populationIndex);
|
|
153
|
|
154
|
|
155 /** \brief Weir-Cockerham theta-statistic
|
|
156 *
|
|
157 * Note: equivalent to Fst.
|
|
158 *
|
|
159 */
|
|
160 double theta();
|
|
161
|
|
162
|
|
163 /** \brief Between-population component of variance
|
|
164 *
|
|
165 */
|
|
166 double T1();
|
|
167
|
|
168
|
|
169 /** \brief Total variance
|
|
170 *
|
|
171 */
|
|
172 double T2();
|
|
173
|
|
174
|
|
175 protected:
|
|
176
|
|
177 bool d_flag;
|
|
178 void d_init();
|
|
179 void d_clear();
|
|
180 unsigned int d_reserved;
|
|
181 unsigned int d_numberOfGenotypes;
|
|
182 unsigned int *d_genotypes;
|
|
183 unsigned int *d_populationLabels;
|
|
184
|
|
185 bool s_flag;
|
|
186 void s_init();
|
|
187 void s_clear();
|
|
188 void s_compute();
|
|
189 void processPopulations();
|
|
190 void processAlleles();
|
|
191 unsigned int getPopulationIndex(unsigned int) const;
|
|
192 unsigned int getAlleleIndex(unsigned int) const;
|
|
193 unsigned int s_numberOfAlleles;
|
|
194 unsigned int *s_alleleValueMapping;
|
|
195 unsigned int s_numberOfPopulations;
|
|
196 unsigned int *s_populationLabelMapping;
|
|
197 unsigned int *s_populationFrequencies;
|
|
198 unsigned int *s_alleleFrequenciesTotal;
|
|
199 unsigned int **s_alleleFrequenciesPerPopulation;
|
|
200
|
|
201 bool w_flag;
|
|
202 void w_init();
|
|
203 void w_clear();
|
|
204 void w_compute();
|
|
205 double w_T;
|
|
206 double *w_T1;
|
|
207 double *w_T2;
|
|
208 double w_nbar;
|
|
209 double w_nc;
|
|
210 double *w_pbar;
|
|
211 double *w_ssquare;
|
|
212 double w_sum_T1;
|
|
213 double w_sum_T2;
|
|
214
|
|
215
|
|
216 private:
|
|
217
|
|
218 HFStatistics(const HFStatistics& source) { }
|
|
219
|
|
220 HFStatistics& operator=(const HFStatistics& source) {
|
|
221 return *this;
|
|
222 }
|
|
223
|
|
224 };
|
|
225 }
|
|
226
|
|
227 #endif
|