annotate egglib/egglib-2.1.5/include/egglib-cpp/Consensus.hpp @ 1:420b57c3c185 draft

Uploaded
author dereeper
date Fri, 10 Jul 2015 04:39:30 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
1 /*
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
2 Copyright 2008-2009 Stéphane De Mita, Mathieu Siol
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
3
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
4 This file is part of the EggLib library.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
5
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
6 EggLib is free software: you can redistribute it and/or modify
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
7 it under the terms of the GNU General Public License as published by
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
8 the Free Software Foundation, either version 3 of the License, or
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
9 (at your option) any later version.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
10
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
11 EggLib is distributed in the hope that it will be useful,
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
14 GNU General Public License for more details.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
15
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
16 You should have received a copy of the GNU General Public License
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
17 along with EggLib. If not, see <http://www.gnu.org/licenses/>.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
18 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
19
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
20 #ifndef EGGLIB_CONSENSUS_HPP
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
21 #define EGGLIB_CONSENSUS_HPP
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
22
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
23 #include "Align.hpp"
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
24 #include <sstream>
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
25 #include <string>
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
26 #include <vector>
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
27
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
28 namespace egglib {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
29
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
30 /** \brief Generates consensus sequences
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
31 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
32 * \ingroup polymorphism
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
33 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
34 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
35 * A consensus is generated when two sequences have the same name,
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
36 * ignoring everything after the first separator character (by
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
37 * default, "_"). Hence, the names "foo", "foo_goo" and "foo_third"
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
38 * will be treated as identical and the root will be "foo". The root
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
39 * will be used to name the resulting sequence. Note that the
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
40 * class works only for DNA sequences.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
41 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
42 * Symbol convention:
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
43 * - A: adenosine
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
44 * - C: cytosine
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
45 * - G: guanine
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
46 * - T: thymine
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
47 * - M: A or C
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
48 * - R: A or G
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
49 * - W: A or T (weak)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
50 * - S: C or G (strong)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
51 * - Y: C or T
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
52 * - K: G or T
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
53 * - B: C or G or T(not A)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
54 * - D: A or G or T (not C)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
55 * - H: A or C or T (not G)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
56 * - V: A or C or G (not T)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
57 * - N: A or C or G or T
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
58 * - ?: nonsequenced position
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
59 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
60 * Other symbols will be treated as ? (lowercase are supported).
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
61 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
62 * Rigorous (alias liberal or strong) mode:
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
63 * - If two characters are the same, it is retained whatever it is
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
64 * (A + A = A)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
65 * - Otherwise:
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
66 * - If one is the missing character (?) the other is retained
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
67 * whatever it is (A + ? = A).
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
68 * - If characters are consistent, that is one contains
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
69 * more information, that one is retained (A + M = A).
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
70 * - If characters are not consistent, the closest
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
71 * generic symbol is retained (A + C = M).
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
72 * .
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
73 * Note that the feedback of inconsistent characters in the
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
74 * outcome is not garanteed.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
75 * In fact, (A + A + G) will result in R (as expected) but (A +
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
76 * G + A) will result in A, masking the problem.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
77 * However, the position will indeed be counted as inconsistent.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
78 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
79 * Not rigorous (conservative/weak) mode:
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
80 * - If two characters are the same, it is retained whatever it
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
81 * is (A + A = A).
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
82 * - Otherwise:
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
83 * - If one is ? the other is retained whatever it is (A + ?
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
84 * = A).
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
85 * - Otherwise an inconsistent character (by default, Z) is
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
86 * retained (A + C = Z).
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
87 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
88 * Iterative process of consensus:
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
89 * - Each sequence is taken in turn.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
90 * - Each pair involving the focus sequence is processed and a
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
91 * consensus is generated.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
92 * - When all pair have been processsed, the consensus already
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
93 * generated are themselves iteratively processed until only one
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
94 * remains.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
95 * - Note that at each time the last two are taken first.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
96 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
97 * A transparent interface gives access to the data for all steps of
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
98 * the consensus process, as vectors that covers all pairs (including
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
99 * intermediate steps of the iterative procedure described above) as
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
100 * well as singleton sequences. For the latter, the second name is
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
101 * not filled and all counts are set to 0. Note also that the name of
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
102 * such singleton sequence is shortened to the separator as well.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
103 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
104 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
105 class Consensus {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
106 public:
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
107 /** \brief Constructor
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
108 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
109 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
110 Consensus();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
111
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
112 /** \brief Destructor
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
113 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
114 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
115 virtual ~Consensus() {}
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
116
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
117 /// Sets the character interpreted as missing (default: ?)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
118 void setMissing(char);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
119
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
120 /// Sets the character used to point to disagreements (default: Z)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
121 void setDisagreement(char);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
122
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
123 /// Checks all the characters
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
124 bool check_sequences(Align& align);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
125
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
126 /** \brief Reduces the sequence alignment by making consensus sequences
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
127 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
128 * \param align the original alignment.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
129 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
130 * \param separator the character used to separated the root
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
131 * name of sequences to the variable part, as in (for the
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
132 * default value: "sequence_read1".
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
133 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
134 * \param rigorous consensus mode.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
135 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
136 * \return An Align instance with duplicated sequences consensed.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
137 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
138 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
139 Align consensus(Align& align, char separator='_', bool rigorous=true);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
140
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
141 /// First name of consensed pairs
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
142 const std::vector<std::string>& firstSequenceNames();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
143
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
144 /// Second names of consensed pairs
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
145 const std::vector<std::string>& secondSequenceNames();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
146
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
147 /// Root names of consensed pairs
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
148 const std::vector<std::string>& roots();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
149
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
150 /// Number of consistent positions for all consensed pairs
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
151 const std::vector<int>& consistentPositions();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
152
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
153 /// Number of complementary positions for all consensed pairs
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
154 const std::vector<int>& complementaryPositions();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
155
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
156 /// Number of uninformative positions for all consensed pairs
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
157 const std::vector<int>& uninformativePositions();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
158
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
159 /// Number of ambiguous positions for all consensed pairs
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
160 const std::vector<int>& ambiguousPositions();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
161
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
162 /// Number of at least partially resolved ambiguities for all consensed pairs
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
163 const std::vector<int>& atLeastPartiallyResolvedAmbiguities();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
164
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
165 /// Vector of inconsistent positions ofr all consensed pairs
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
166 const std::vector<std::vector<int> >& inconsistentPositions();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
167
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
168
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
169 private:
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
170
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
171 /** \brief Copying this class is not allowed
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
172 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
173 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
174 Consensus(const Consensus& source) { }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
175
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
176 /** \brief Copying this class is not allowed
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
177 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
178 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
179 Consensus& operator=(const Consensus& source) { return *this; }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
180
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
181 // A private helper
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
182 class PairwiseConsensus;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
183
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
184 // report data
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
185 std::vector<std::string> t_firstSequenceNames;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
186 std::vector<std::string> t_secondSequenceNames;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
187 std::vector<std::string> t_roots;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
188 std::vector<int> t_consistentPositions;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
189 std::vector<int> t_complementaryPositions;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
190 std::vector<int> t_uninformativePositions;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
191 std::vector<int> t_ambiguousPositions;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
192 std::vector<int> t_atLeastPartiallyResolvedAmbiguities;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
193 std::vector<std::vector<int> > t_inconsistentPositions;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
194
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
195 // Code for missing data (usually ?)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
196 char MISSING;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
197
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
198 // Code for disgrement
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
199 char DISAGREEMENT;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
200
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
201 // Helper class managing a single pair
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
202 class PairwiseConsensus {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
203 public:
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
204 // Default object creation
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
205 PairwiseConsensus();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
206
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
207 // Object destruction
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
208 virtual ~PairwiseConsensus() {}
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
209
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
210 // Usual object creation
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
211 PairwiseConsensus(std::string, std::string);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
212
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
213 // Fills an object created with the default constructor
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
214 void load(std::string,std::string);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
215
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
216 // Changes the MISSING character
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
217 void setUndeterminedCharacter(char);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
218
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
219 // Changes the DISAGREEMENT character
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
220 void setDisagreementCharacter(char);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
221
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
222 /* Uses the conservative mode of consensus
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
223 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
224 * Tries to avoid to make decisions, and adds the
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
225 * character set by DISAGREEMENT upon inconsistencies
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
226 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
227 * return The number of inconsistencies.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
228 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
229 int generateSoftConsensus();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
230
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
231 /* Strict mode of consensus
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
232 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
233 * The number of inconsistencies.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
234 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
235 int generateHardConsensus();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
236
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
237 // Two fully resolved (including gap) and identical characters
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
238 int getConsistentPositions();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
239
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
240 // One informative (including gap) and one missing
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
241 int getComplementaryPositions();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
242
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
243 // None missing, but different and incompatible
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
244 int getInconsistentPositions();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
245
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
246 // Both are missing
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
247 int getUninformativePositions();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
248
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
249 // Both identical or one missing, but not fully resolved
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
250 int getAmbiguousPositions();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
251
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
252 // Different, not missing, complementary.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
253 int getAtLeastPartiallyResolvedAmbiguities();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
254
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
255 // Accessor
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
256 int getThisInconsistentPosition(unsigned int);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
257
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
258 // Generates the consensus sequence
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
259 std::string getConsensus();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
260
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
261 private:
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
262
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
263 inline bool isValid(char c) {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
264 switch (c) {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
265 case 'A': case 'a':
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
266 case 'C': case 'c':
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
267 case 'G': case 'g':
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
268 case 'T': case 't':
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
269 return true;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
270 default:
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
271 return false;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
272 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
273 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
274
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
275 // This initiates a series of embedded objects
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
276 void setCharacterContainers();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
277
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
278 // The first sequence
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
279 std::string seqA;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
280
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
281 // The second sequence
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
282 std::string seqB;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
283
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
284 // The resulting consensus
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
285 std::string cons;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
286
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
287 // The vecotr storing the inconsistent positions
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
288 std::vector<int> posIncons;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
289
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
290 // The length of the sequences
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
291 unsigned int ls;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
292
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
293 // Counter
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
294 int cntConsistentPositions;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
295
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
296 // Counter
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
297 int cntComplementaryPositions;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
298
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
299 // Counter
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
300 int cntAmbiguousPositions;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
301
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
302 // Counter
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
303 int cntInconsistentPositions;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
304
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
305 // Counter
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
306 int cntUninformativePositions;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
307
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
308 // Counter
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
309 int cntAtLeastPartiallyResolvedAmbiguities;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
310
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
311 // Code for missing data (usually ?)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
312 char MISSING;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
313
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
314 // Code for disgrement
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
315 char DISAGREEMENT;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
316
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
317 public:
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
318 // This class manages relationships different symbols
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
319 class CharacterContainer {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
320 public:
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
321 // Default value: @
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
322 CharacterContainer();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
323
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
324 // Initiates to a given symbol
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
325 CharacterContainer(const char&);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
326
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
327 // Assignment operator
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
328 CharacterContainer& operator=(const char&);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
329
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
330 // Sets the symbol
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
331 void setValue(char);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
332
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
333 // Set the descendants
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
334 void setSons(std::vector<CharacterContainer>);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
335
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
336 // Tests whether the symbol is the same
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
337 bool is(CharacterContainer);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
338
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
339 // Tests if the query is contained amongst the sons
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
340 bool has(CharacterContainer);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
341
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
342 // Tests if the query is contained amongst the sons
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
343 bool has(char);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
344
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
345 /* Tests whether the left character has the left one
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
346 * Should be called on the N object only.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
347 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
348 char lhas(CharacterContainer,CharacterContainer);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
349
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
350 /* Creates the object with the proper sons
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
351 * Should be called on the N object only.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
352 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
353 CharacterContainer init(char);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
354
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
355 // The symbol
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
356 char value;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
357
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
358 // The descendants
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
359 std::vector<CharacterContainer> sons;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
360 };
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
361
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
362 private:
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
363 // Symbol ?
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
364 CharacterContainer ccQ;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
365
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
366 // Symbol A
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
367 CharacterContainer ccA;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
368
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
369 // Symbol C
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
370 CharacterContainer ccC;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
371
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
372 // Symbol G
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
373 CharacterContainer ccG;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
374
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
375 // Symbol T
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
376 CharacterContainer ccT;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
377
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
378 // Symbol U
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
379 CharacterContainer ccU;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
380
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
381 // Symbol M
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
382 CharacterContainer ccM;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
383
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
384 // Symbol R
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
385 CharacterContainer ccR;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
386
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
387 // Symbol W
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
388 CharacterContainer ccW;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
389
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
390 // Symbol S
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
391 CharacterContainer ccS;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
392
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
393 // Symbol Y
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
394 CharacterContainer ccY;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
395
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
396 // Symbol K
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
397 CharacterContainer ccK;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
398
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
399 // Symbol B
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
400 CharacterContainer ccB;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
401
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
402 // Symbol D
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
403 CharacterContainer ccD;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
404
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
405 // Symbol H
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
406 CharacterContainer ccH;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
407
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
408 // Symbol V
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
409 CharacterContainer ccV;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
410
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
411 // Symbol N
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
412 CharacterContainer ccN;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
413
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
414 // Symbol -
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
415 CharacterContainer ccGAP;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
416 };
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
417 };
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
418 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
419
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
420 #endif
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
421