Mercurial > repos > dereeper > sniplay
comparison egglib/egglib-2.1.5/include/egglib-cpp/LinkageDisequilibrium.hpp @ 1:420b57c3c185 draft
Uploaded
| author | dereeper |
|---|---|
| date | Fri, 10 Jul 2015 04:39:30 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:3e19d0dfcf3e | 1:420b57c3c185 |
|---|---|
| 1 /* | |
| 2 Copyright 2009 Stéphane De Mita, Mathieu Siol | |
| 3 | |
| 4 This file is part of the EggLib library. | |
| 5 | |
| 6 EggLib is free software: you can redistribute it and/or modify | |
| 7 it under the terms of the GNU General Public License as published by | |
| 8 the Free Software Foundation, either version 3 of the License, or | |
| 9 (at your option) any later version. | |
| 10 | |
| 11 EggLib is distributed in the hope that it will be useful, | |
| 12 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 14 GNU General Public License for more details. | |
| 15 | |
| 16 You should have received a copy of the GNU General Public License | |
| 17 along with EggLib. If not, see <http://www.gnu.org/licenses/>. | |
| 18 */ | |
| 19 | |
| 20 #ifndef EGGLIB_LINKAGEDISEQUILIBRUM_HPP | |
| 21 #define EGGLIB_LINKAGEDISEQUILIBRUM_HPP | |
| 22 | |
| 23 | |
| 24 #include "BaseDiversity.hpp" | |
| 25 #include "EggException.hpp" | |
| 26 | |
| 27 | |
| 28 namespace egglib { | |
| 29 | |
| 30 /** \brief Analyzes linkage disequilibrium per pair of polymorphic sites | |
| 31 * | |
| 32 * \ingroup polymorphism | |
| 33 * | |
| 34 * The class considers an alignment and detects polymorphic sites | |
| 35 * using the BaseDiversity functionality (shared with other classes | |
| 36 * of the module). Only sites with exactly two alleles are | |
| 37 * considered. Statistics of pairwise linkage disequilibrium can | |
| 38 * be accessed by pair index (note that out-of-range errors are not | |
| 39 * checked). Population labels are ignored (but outgroups are | |
| 40 * excluded from the analysis). | |
| 41 * | |
| 42 */ | |
| 43 class LinkageDisequilibrium : public BaseDiversity { | |
| 44 | |
| 45 public: | |
| 46 | |
| 47 /// Default constructor | |
| 48 LinkageDisequilibrium(); | |
| 49 | |
| 50 /// Destructor | |
| 51 virtual ~LinkageDisequilibrium(); | |
| 52 | |
| 53 /** \brief Analyzes polymorphic sites of an alignment | |
| 54 * | |
| 55 * \param data an alignment object (subclass of CharMatrix). | |
| 56 * The presence of outgroup or of different populations will | |
| 57 * be detected based on the populationLabel members of the | |
| 58 * passed object. The populationLabel 999 will be interpreted | |
| 59 * as outgroups. If several outgroups are passed, sites were | |
| 60 * the outgroups are not consistent will be treated as "non- | |
| 61 * orientable". | |
| 62 * | |
| 63 * \param minimumExploitableData site where the non-missing | |
| 64 * data (as defined by characterMapping) are at a frequency | |
| 65 * larger than this value will be removed from the analysis. | |
| 66 * Use 1. to take only 'complete' sites into account and 0. | |
| 67 * to use all sites. | |
| 68 * | |
| 69 * \param ignoreFrequency removes sites that are polymorphic | |
| 70 * because of an allele at absolute frequency smaller than or | |
| 71 * equal to this value. If ignoreFrequency=1, no sites are | |
| 72 * removed, if ignoreFrequency=1, singleton sites are | |
| 73 * ignored. Such sites are completely removed from the | |
| 74 * analysis (not counted in lseff). Note that if more than | |
| 75 * one mutation is allowed, the site is removed only if all | |
| 76 * the alleles but one are smaller than or equal to this | |
| 77 * value. For example, an alignment column AAAAAAGAAT is | |
| 78 * ignored with an ignoreFrequency of 1, but AAAAAAGGAT is | |
| 79 * conserved (including the third allele T which is a | |
| 80 * singleton). | |
| 81 * | |
| 82 * \param characterMapping a string giving the list of | |
| 83 * characters that should be considered as valid data. If a | |
| 84 * space is present in the string, the characters left of the | |
| 85 * space will be treated as valid data and the characters | |
| 86 * right of the space will be treated as missing data, that | |
| 87 * is tolerated but ignored. All characters not in the string | |
| 88 * will cause an EggInvalidCharacterError to be raised. | |
| 89 */ | |
| 90 void load(CharMatrix& data, | |
| 91 double minimumExploitableData=1., | |
| 92 unsigned int ignoreFrequency=0, | |
| 93 std::string characterMapping=dnaMapping); | |
| 94 | |
| 95 | |
| 96 /// Number of pairs contained in the instance | |
| 97 unsigned int numberOfPairs() const; | |
| 98 | |
| 99 /// Alignment distance between a given pair | |
| 100 int d(unsigned int pair_index); | |
| 101 | |
| 102 /// D statistic for a given pair | |
| 103 double D(unsigned int pair_index); | |
| 104 | |
| 105 /// D' statistic for a given pair | |
| 106 double Dp(unsigned int pair_index); | |
| 107 | |
| 108 /// r statistic for a given pair | |
| 109 double r(unsigned int pair_index); | |
| 110 | |
| 111 /// r2 statistic for a given pair | |
| 112 double r2(unsigned int pair_index); | |
| 113 | |
| 114 /// position of the first site for a given pair | |
| 115 unsigned int site1(unsigned int pair_index); | |
| 116 | |
| 117 /// position of the second site for a given pair | |
| 118 unsigned int site2(unsigned int pair_index); | |
| 119 | |
| 120 /// correlation coefficient between r2 and distance | |
| 121 double correl() const; | |
| 122 | |
| 123 /** \brief Computes the minimal number of recombination events | |
| 124 * | |
| 125 * The computation is performed as described in Hudson, RR and | |
| 126 * NL Kaplan. 1985. Statistical properties of the number of | |
| 127 * recombination events in the history of a sample of DNA | |
| 128 * sequences. Genetics 111: 147-164. The returned parameter is | |
| 129 * the minimal number of recombination events, given by the | |
| 130 * number of non-overlapping pairs of segregating sites violating | |
| 131 * the rule of the four gamete. Only sites with two alleles are | |
| 132 * considered. Note that homoplasy (multiple mutations) mimicks | |
| 133 * recombination. The result of this function is not stored | |
| 134 * in this instance, and re-computed at each call. | |
| 135 * | |
| 136 * \param data the same CharMatrix instance as passed to the load | |
| 137 * method. The instance must not have been modified. | |
| 138 * | |
| 139 */ | |
| 140 unsigned int Rmin(CharMatrix& data) const; | |
| 141 | |
| 142 | |
| 143 | |
| 144 protected: | |
| 145 | |
| 146 // adds a pair of polymorphic sites | |
| 147 // assume position2>position1, | |
| 148 // sites are polymorphic with exactly 2 alleles | |
| 149 void add(CharMatrix& data, unsigned int position1, unsigned int position2); | |
| 150 | |
| 151 // Constructor help | |
| 152 void init(); | |
| 153 | |
| 154 // Destructor helper | |
| 155 void clear(); | |
| 156 | |
| 157 // Resizes arrays | |
| 158 void reset(); | |
| 159 | |
| 160 // Small helper | |
| 161 inline double min(double a, double b) { return (a>b)?a:b;} | |
| 162 | |
| 163 // Small helper | |
| 164 inline double max(double a, double b) { return (a>b)?b:a;} | |
| 165 | |
| 166 // Small helper | |
| 167 inline void check(unsigned int pos) { if (pos>=_n) throw EggRuntimeError("tried to access an invalid index"); } | |
| 168 | |
| 169 /* Performs correlation | |
| 170 * | |
| 171 * This function works independently from the rest of the class. | |
| 172 * | |
| 173 * \param n length of data arrays. | |
| 174 * \param x first data vector. | |
| 175 * \param y second data vector. | |
| 176 * \param r variable to receive the correlation coefficient. | |
| 177 * \param a variable to receive the regression slope. | |
| 178 */ | |
| 179 static void _correl(unsigned int n, const int* x, const double* y, double& r, double& a); | |
| 180 | |
| 181 // Distance between pairs | |
| 182 int* _d; | |
| 183 | |
| 184 // D (classical) measure of LD | |
| 185 double *_D; | |
| 186 | |
| 187 // D' | |
| 188 double *_Dp; | |
| 189 | |
| 190 // r, correlation coefficient | |
| 191 double *_r; | |
| 192 | |
| 193 // square r | |
| 194 double *_r2; | |
| 195 | |
| 196 // Data array (not managed by the instance) | |
| 197 unsigned int *_site1; | |
| 198 | |
| 199 // Data array (not managed by the instance) | |
| 200 unsigned int *_site2; | |
| 201 | |
| 202 // Number of pairs | |
| 203 unsigned int _n; | |
| 204 | |
| 205 private: | |
| 206 | |
| 207 /// Copy constructor not available | |
| 208 LinkageDisequilibrium(const LinkageDisequilibrium&) { } | |
| 209 | |
| 210 /// Assignment operator not available | |
| 211 LinkageDisequilibrium& operator=(const LinkageDisequilibrium&) { | |
| 212 return *this; | |
| 213 } | |
| 214 | |
| 215 | |
| 216 class Interval { | |
| 217 public: | |
| 218 Interval(unsigned int, unsigned int); | |
| 219 unsigned int a() const; | |
| 220 unsigned int b() const; | |
| 221 bool good() const; | |
| 222 void set_false(); | |
| 223 private: | |
| 224 unsigned int _a; | |
| 225 unsigned int _b; | |
| 226 unsigned int _good; | |
| 227 }; | |
| 228 | |
| 229 | |
| 230 }; | |
| 231 } | |
| 232 | |
| 233 #endif |
