annotate egglib/egglib-2.1.5/include/egglib-cpp/BaseDiversity.hpp @ 3:345f88a8f483 draft

Uploaded
author dereeper
date Fri, 10 Jul 2015 10:38:43 -0400
parents 420b57c3c185
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
1 /*
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
2 Copyright 2009 Stéphane De Mita, Mathieu Siol
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
3
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
4 This file is part of the EggLib library.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
5
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
6 EggLib is free software: you can redistribute it and/or modify
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
7 it under the terms of the GNU General Public License as published by
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
8 the Free Software Foundation, either version 3 of the License, or
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
9 (at your option) any later version.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
10
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
11 EggLib is distributed in the hope that it will be useful,
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
14 GNU General Public License for more details.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
15
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
16 You should have received a copy of the GNU General Public License
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
17 along with EggLib. If not, see <http://www.gnu.org/licenses/>.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
18 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
19
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
20 #ifndef EGGLIB_BASEDIVERSITY_HPP
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
21 #define EGGLIB_BASEDIVERSITY_HPP
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
22
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
23 #include "CharMatrix.hpp"
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
24 #include "SitePolymorphism.hpp"
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
25 #include <string>
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
26
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
27 /** \defgroup polymorphism polymorphism
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
28 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
29 * \brief Diversity analyses
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
30 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
31 * Two classes are contained in this module: NucleotideDiversity, that
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
32 * performs site-centered polymorphism analyses, and HaplotypeDiversity,
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
33 * that performs haplotype-centered analyses. The detection of
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
34 * polymorphic sites is common to both, through the base class
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
35 * BaseDiversity. However this phase must be repeated when stats from
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
36 * the two classes are needed. To reduce the computational burden, the
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
37 * function reserve() can be use, that directly allocates needed memory
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
38 * when the eventual number of polymorphic sites is known prior to
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
39 * analysis (even if not precisely). For both classes, a set of
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
40 * statistics are computed immediately upon load of a data set. For
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
41 * NucleotideDiversity, additional statistics are computed per group
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
42 * upon use of the corresponding accessors. This number of operations
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
43 * performed several times is strictly limited. This is particularly
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
44 * useful when different statistics are needed for a given alignment.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
45 * However, this system allows not computing unnecessary statistics to
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
46 * a certain extend.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
47 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
48 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
49
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
50 namespace egglib {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
51
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
52 /** \brief Base class of diversity classes
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
53 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
54 * Mutualizes the analysis of polymorphic sites through the method
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
55 * importSites() and related accessors.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
56 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
57 * \ingroup polymorphism
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
58 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
59 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
60 class BaseDiversity {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
61
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
62 public:
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
63
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
64 /** \brief Constructor
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
65 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
66 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
67 BaseDiversity();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
68
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
69 /** \brief Destructor
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
70 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
71 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
72 virtual ~BaseDiversity();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
73
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
74 /** \brief Reserve sufficient memory for a given number of
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
75 * polymorphic sites.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
76 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
77 * This method makes importSite function faster when you
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
78 * already know how many polymorphic sites to expect, since
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
79 * the necessary memory will be allocated prior the screening
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
80 * of data. It is possible to use reserve() even if with a
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
81 * number of sites that is not matching what importSites()
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
82 * will find.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
83 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
84 * \param numberOfSites a strictly positive integer.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
85 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
86 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
87 virtual void reserve(unsigned int numberOfSites);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
88
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
89 /// Gets a site
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
90 const SitePolymorphism* get_site(unsigned int index) const;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
91
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
92 /// Gets a site position
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
93 unsigned int get_position(unsigned int index) const;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
94
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
95 /** \brief Predefined mapping string for DNA data
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
96 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
97 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
98 static const std::string dnaMapping;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
99
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
100
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
101 /** \brief Predefined mapping string for RNA data
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
102 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
103 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
104 static const std::string rnaMapping;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
105
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
106
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
107 /** \brief Predefined mapping string for amino acid data
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
108 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
109 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
110 static const std::string aaMapping;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
111
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
112
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
113 /// Clears and re-initializes object
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
114 virtual void reset();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
115
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
116
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
117 protected:
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
118
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
119 virtual void init();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
120 virtual void clear();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
121
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
122 //
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
123 void importSites(CharMatrix& data, bool allowMultipleMutations,
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
124 double minimumExploitableData, unsigned int ignoreFrequency,
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
125 std::string characterMapping, bool useZeroAsAncestral,
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
126 bool ignoreOutgroup);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
127
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
128 //
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
129 void analyzeSite(CharMatrix& data, unsigned int index, double maxMissingData, bool ignoreOutgroup); // analyzes a site, adds a Site to the Site container if the site is polymorphic
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
130 unsigned int getPopIndex(unsigned int label) const; // returns v_npop if not found
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
131
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
132 SitePolymorphism** v_sites; // holder of polymorphic site addresses
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
133 bool* v_orientables; // stores whether the sites are orientable or not
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
134 unsigned int* v_sitePositions; // stores position of sites
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
135
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
136 unsigned int v_reserved;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
137 unsigned int v_ns; // maximum number of sequences analyzed (max of sites' ns)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
138 unsigned int v_S; // number of polymorphic sites
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
139 unsigned int v_So; // number of orientable sites
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
140 unsigned int v_eta; // number of mutation (whatever multiple)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
141 double v_nseff; // average number of analyzed sequence
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
142 unsigned int v_lseff; // number of analyzed sites
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
143 double v_nseffo; // average number of analyzed sequences for analyzes with outgroup
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
144 unsigned int v_lseffo; // number of analyzed sites for analyzes with outgroup
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
145 unsigned int v_npop; // number of populations
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
146 unsigned int *v_popLabel; // label of each pop
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
147
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
148 // options
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
149 bool p_allowMultipleMutations;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
150 double p_minimumExploitableData;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
151 std::string p_characterMapping;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
152 unsigned int p_pos_sep_mapping;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
153 bool p_useZeroAsAncestral;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
154 unsigned int p_ignoreFrequency;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
155
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
156
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
157
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
158 private:
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
159
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
160 BaseDiversity(const BaseDiversity& source) { }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
161
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
162 BaseDiversity& operator=(const BaseDiversity& source) {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
163 return *this;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
164 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
165
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
166 };
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
167 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
168
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
169 #endif