diff egglib/egglib-2.1.5/include/egglib-cpp/HFStatistics.hpp @ 1:420b57c3c185 draft

Uploaded
author dereeper
date Fri, 10 Jul 2015 04:39:30 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/egglib/egglib-2.1.5/include/egglib-cpp/HFStatistics.hpp	Fri Jul 10 04:39:30 2015 -0400
@@ -0,0 +1,227 @@
+/*
+    Copyright 2010 Stéphane De Mita, Mathieu Siol
+
+    This file is part of the EggLib library.
+
+    EggLib is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    EggLib is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with EggLib.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef EGGLIB_HFSTATISTICS_HPP
+#define EGGLIB_HFSTATISTICS_HPP
+
+
+
+namespace egglib {
+
+
+    /** \brief Computes Fst and Fit from haploid data
+    *
+    * The class requires loading data. Data are loaded by haploid
+    * (one genotype per individual). The analyses are cached: they are
+    * performed upon the first call to statistics accessors. The cache
+    * is emptied whenever a datum is loaded.
+    * 
+    * The computations are performed after Weir and Cockerham. The
+    * statistic theta is generalized for multiple alleles. To allow
+    * computation of multi-locus statistics, variance components are
+    * also available. The two components of the variance are T1 and T2
+    * and theta is T1/T2 (from Weir 1996 "Genetic Data Analysis II",
+    * Sinauer associates, Sunderland MA).
+    * 
+    * \ingroup polymorphism
+    *
+    */
+    class HFStatistics {
+    
+        public:
+    
+           /** \brief Constructor
+            * 
+            */ 
+            HFStatistics();
+
+            
+           /** \brief Destructor
+            * 
+            */ 
+            virtual ~HFStatistics();
+
+            
+           /** \brief Reserve sufficient memory for a given number of
+            * individuals.
+            * 
+            * This method makes the load function faster by allocating
+            * all required memory at once.
+            * 
+            * \param numberOfIndividuals a strictly positive integer.
+            * 
+            */
+            void reserve(unsigned int numberOfIndividuals);
+
+
+           /** \brief Loads the data for one individual
+            * 
+            * \param genotype an integer giving the allele.
+            * \param populationLabel an integer indication belonging to
+            * a population.
+            * 
+            * Genotypes and population labels are not required to be
+            * consecutive (both are labels, not indices). They are
+            * internally mapped to indices (the mapping can be obtained
+            * by accessors populationLabel and allele).
+            * 
+            * All genotypes are considered to be valid (no missing data).
+            * If statistics were computed previous to call to this
+            * function, all data will be erased.
+            * 
+            */
+            void loadIndividual(unsigned int genotype, unsigned int populationLabel);
+
+
+           /** \brief Label of a population
+            * 
+            * The index corresponds to the local mapping of populations
+            * regardless of the ranking of population labels. (No out
+            * of bound checking.)
+            * 
+            */
+            unsigned int populationLabel(unsigned int populationIndex);
+
+
+           /** \brief Value of an allele
+            * 
+            * The index corresponds to the local mapping of alleles
+            * regardless of the ranking of allele values. (No out of
+            * bound checking.)
+            * 
+            */
+            unsigned int alleleValue(unsigned int alleleIndex);
+
+
+            /// Allele of a given individual (no checking)
+            unsigned int allele(unsigned int individualIndex) const;
+
+            /// Population label of a given individual (no checking)
+            unsigned int individualLabel(unsigned int individualIndex) const;
+
+
+           /** \brief Number of alleles
+            * 
+            */
+            unsigned int numberOfAlleles();
+
+
+           /** \brief Number of populations
+            * 
+            */
+            unsigned int numberOfPopulations();
+
+
+           /** \brief Number of loaded genotypes
+            * 
+            */
+            unsigned int numberOfGenotypes() const;
+
+
+           /** \brief Absolute total allele frequency
+            * 
+            */
+            unsigned int alleleFrequencyTotal(unsigned int alleleIndex);
+            
+
+           /** \brief Absolute allele frequency in a population
+            * 
+            */
+            unsigned int alleleFrequencyPerPopulation(unsigned int populationIndex, unsigned int alleleIndex);
+
+
+           /** \brief Sample size of a population
+            * 
+            */
+            unsigned int populationFrequency(unsigned int populationIndex);
+
+
+           /** \brief Weir-Cockerham theta-statistic
+            * 
+            * Note: equivalent to Fst.
+            * 
+            */
+            double theta();
+
+
+           /** \brief Between-population component of variance
+            * 
+            */
+            double T1();
+
+
+           /** \brief Total variance
+            * 
+            */
+            double T2();
+            
+
+        protected:
+    
+            bool d_flag;
+            void d_init();
+            void d_clear();
+            unsigned int  d_reserved;
+            unsigned int  d_numberOfGenotypes;
+            unsigned int *d_genotypes;
+            unsigned int *d_populationLabels;
+
+            bool s_flag;
+            void s_init();
+            void s_clear();
+            void s_compute();
+            void processPopulations();
+            void processAlleles();
+            unsigned int getPopulationIndex(unsigned int) const;
+            unsigned int getAlleleIndex(unsigned int) const;
+            unsigned int    s_numberOfAlleles;
+            unsigned int   *s_alleleValueMapping;
+            unsigned int    s_numberOfPopulations;
+            unsigned int   *s_populationLabelMapping;
+            unsigned int   *s_populationFrequencies;
+            unsigned int   *s_alleleFrequenciesTotal;
+            unsigned int  **s_alleleFrequenciesPerPopulation;
+
+            bool w_flag;
+            void w_init();
+            void w_clear();
+            void w_compute();
+            double  w_T;
+            double *w_T1;
+            double *w_T2;
+            double  w_nbar;
+            double  w_nc;
+            double *w_pbar;
+            double *w_ssquare;
+            double  w_sum_T1;
+            double  w_sum_T2;
+
+    
+        private:
+            
+            HFStatistics(const HFStatistics& source) { }
+            
+            HFStatistics& operator=(const HFStatistics& source) {
+                return *this;
+            }
+
+    };
+}
+
+#endif