annotate egglib/egglib-2.1.5/include/egglib-cpp/Convert.hpp @ 1:420b57c3c185 draft

Uploaded
author dereeper
date Fri, 10 Jul 2015 04:39:30 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
1 /*
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
2 Copyright 2009 Stéphane De Mita, Mathieu Siol
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
3
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
4 This file is part of the EggLib library.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
5
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
6 EggLib is free software: you can redistribute it and/or modify
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
7 it under the terms of the GNU General Public License as published by
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
8 the Free Software Foundation, either version 3 of the License, or
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
9 (at your option) any later version.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
10
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
11 EggLib is distributed in the hope that it will be useful,
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
14 GNU General Public License for more details.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
15
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
16 You should have received a copy of the GNU General Public License
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
17 along with EggLib. If not, see <http://www.gnu.org/licenses/>.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
18 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
19
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
20
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
21 #ifndef EGGLIB_CONVERT_HPP
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
22 #define EGGLIB_CONVERT_HPP
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
23
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
24
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
25 #include "DataMatrix.hpp"
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
26 #include "Align.hpp"
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
27 #include "EggException.hpp"
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
28 #include "Random.hpp"
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
29 #include <string>
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
30
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
31 #include "config.h"
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
32
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
33 #ifdef HAVE_LIBBPP_SEQ
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
34 #include <Bpp/Seq/Alphabet.all>
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
35 #include <Bpp/Seq/Sequence.h>
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
36 #include <Bpp/Seq/Container.all>
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
37 #endif
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
38
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
39
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
40
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
41 namespace egglib {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
42
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
43
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
44 /** \brief Performs conversion between sequence holder types
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
45 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
46 * \ingroup core
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
47 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
48 * Static methods of this class allows conversion between sequence
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
49 * holder types implying parametrizable modifications.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
50 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
51 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
52 class Convert {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
53
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
54 public:
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
55
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
56 /** \brief DataMatrix to Align conversion
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
57 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
58 * By defaut, this method generates an Align instance
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
59 * containing only the polymorphic sites. The integers of
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
60 * the DataMatrix will be converted as follow: 0 to A, 1 to
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
61 * C, 2 to G and 3 to T. This behaviour can be largely
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
62 * modified using options.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
63 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
64 * \param dataMatrix DataMatrix instance.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
65 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
66 * \param length length of the desired alignment. Non-varying
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
67 * stretches of data will be introduced to reach the
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
68 * specified length. By default the positions of segregating
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
69 * sites will be determined from the positions given by the
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
70 * DataMatrix object. Those positions are expressed in a
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
71 * continuous range, and will be discretized. Mutations
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
72 * falling on the same site will be moved of one position
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
73 * left or right (always preserving the order of mutation
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
74 * sites). If positions are all zero (the default of the
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
75 * DataMatrix class) and if length is larger than the number
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
76 * of segregating sites, then all segregating sites will
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
77 * cluster on the left-hand side of the alignment.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
78 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
79 * \param random the address to a Random object allowing to
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
80 * draw random numbers (for randomizing positions and/or
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
81 * non-varying states). If an address is provided but no
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
82 * random numbers are required, it is ignored. If no address
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
83 * if provided and random numbers are required, a Random
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
84 * instance is built internally.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
85 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
86 * \param randomizePositions if true, the positions specified
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
87 * in the DataMatrix objects are ignored and the positions of
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
88 * mutations are drawn randomly along the interval (only if
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
89 * the specified length is larger than the number of
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
90 * segregating sites). If randomizePositions and false and
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
91 * positions are not
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
92 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
93 * \param enforceLength specify whether a
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
94 * EggRuntimeError should be thrown when the number of
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
95 * polymorphic sites is larger than the specified length. If
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
96 * false (the default) and in cases where the specified
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
97 * length is too short to harbor all polymorphic sites, the
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
98 * alignment length will be increased as needed.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
99 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
100 * \param randomizeNonVaryingStates if true, the stretches of
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
101 * conserved positions (between segregating sites) will be
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
102 * randomly drawn from the current symbol mapping. Otherwise,
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
103 * the symbol given by fixed will be used.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
104 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
105 * \param randomizeAlleles if true, alleles will be drawn
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
106 * randomly from the mapped characters. Note that if a
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
107 * genotype value is larger than the size of the mapping, it
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
108 * will be replaced by the character given by unknown,
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
109 * without randomization. In other words, with the mapping
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
110 * "ACGT", alleles 0, 1, 2 and 3 will be randomly assigned
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
111 * to these four characters, but larger and negative alleles
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
112 * will be assigned to the unknown character.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
113 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
114 * \param mapping a string given the character to assign to
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
115 * different character values read from the DataMatrix. If
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
116 * the read value is 0, the first character of the string
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
117 * will used, the the value is 1, the second character will
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
118 * be used, and so on. If the integer read is out of range
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
119 * (in particular, for any negative value), then the
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
120 * character given by unknown will be used. An empty string
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
121 * will always lead to alignments containing only the
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
122 * character given by unknown. The string "01" is suitable
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
123 * for binary data.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
124 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
125 * \param unknown the character to use if an integer genotype
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
126 * value is not mapped in the mapping string (that is, if
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
127 * the mapping string is too short).
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
128 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
129 * \param nonVaryingState character to use for conserved
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
130 * stretches of data. It doesn't have to be included in the
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
131 * mapping. If randomizeNonVaryingState is true, this
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
132 * argument is ignored.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
133 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
134 * \return The resulting Align object.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
135 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
136 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
137 static Align align(
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
138 DataMatrix& dataMatrix,
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
139 unsigned int length=0,
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
140 Random* random=NULL,
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
141 bool randomizePositions=false,
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
142 bool randomizeNonVaryingStates=false,
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
143 bool randomizeAlleles=false,
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
144 bool enforceLength=false,
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
145 std::string mapping="ACGT",
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
146 char unknown='?',
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
147 char nonVaryingState='A'
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
148 );
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
149
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
150
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
151 #ifdef HAVE_LIBBPP_SEQ
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
152
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
153 /** \brief Converts an alignment to the equivalent Bio++ type
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
154 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
155 * During conversion, name information is lost (arbitrary
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
156 * names are generated in order toprevent duplicate names).
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
157 * The object is attached to an alphabet matching the passed
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
158 * integer. The names are bare rank integers (starting at the
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
159 * value giving by *offset*).
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
160 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
161 * \param align the source alignment object.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
162 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
163 * \param alphabetID an integer indicating which alphabet to
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
164 * use:
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
165 * - 1 for DNA
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
166 * - 2 for RNA
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
167 * - 3 for proteins
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
168 * - 4 for standard codon
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
169 * - 5 for vertebrate mitochondrial codon
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
170 * - 6 for invertebrate mitochondrial codon
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
171 * - 7 for echinoderm mitochondrial codon
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
172 * .
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
173 * Other values will result in an exception.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
174 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
175 * \param outgroupFlag an integer indicating whether to
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
176 * include outgroup sequences:
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
177 * - 0 use all sequences
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
178 * - 1 use only sequences without 999 label (ingroup)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
179 * - 2 use only sequences with 999 label (outgroup)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
180 * .
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
181 * Other values will result in an exception.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
182 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
183 * \param offset enter an integer to shift the names of the
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
184 * resulting alignment (useful to merge alignment and ensure
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
185 * that names are not duplicated).
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
186 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
187 * \return A Bio++ alignment.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
188 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
189 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
190 static bpp::AlignedSequenceContainer egglib2bpp(Align& align, unsigned int alphabetID, unsigned int outgroupFlag, unsigned int offset=0);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
191
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
192 #endif
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
193
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
194
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
195
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
196 protected:
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
197
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
198 /** \brief This class cannot be instantiated
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
199 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
200 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
201 Convert() { }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
202
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
203
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
204 /** \brief This class cannot be instantiated
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
205 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
206 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
207 Convert(const Convert& source) { }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
208
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
209
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
210 /** \brief This class cannot be instantiated
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
211 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
212 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
213 Convert& operator=(const Convert& source) { return *this; }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
214
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
215
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
216 /** \brief This class cannot be instantiated
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
217 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
218 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
219 virtual ~Convert() { }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
220
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
221 #ifdef HAVE_LIBBPP_SEQ
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
222 static bpp::DNA dnaAlphabet;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
223 static bpp::RNA rnaAlphabet;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
224 static bpp::ProteicAlphabet proteicAlphabet;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
225 static bpp::StandardCodonAlphabet standardCodonAlphabet;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
226 static bpp::VertebrateMitochondrialCodonAlphabet vertebrateMitochondrialCodonAlphabet;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
227 static bpp::InvertebrateMitochondrialCodonAlphabet invertebrateMitochondrialCodonAlphabet;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
228 static bpp::EchinodermMitochondrialCodonAlphabet echinodermMitochondrialCodonAlphabet;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
229 #endif
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
230
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
231 };
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
232 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
233
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
234 #endif