diff egglib/egglib-2.1.5/include/egglib-cpp/Mutator.hpp @ 1:420b57c3c185 draft

Uploaded
author dereeper
date Fri, 10 Jul 2015 04:39:30 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/egglib/egglib-2.1.5/include/egglib-cpp/Mutator.hpp	Fri Jul 10 04:39:30 2015 -0400
@@ -0,0 +1,397 @@
+/*
+    Copyright 2009, 2010, 2012 Stéphane De Mita, Mathieu Siol
+
+    This file is part of the EggLib library.
+
+    EggLib is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    EggLib is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with EggLib.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef EGGLIB_MUTATOR_HPP
+#define EGGLIB_MUTATOR_HPP
+
+
+#include "DataMatrix.hpp"
+#include "Random.hpp"
+#include "Arg.hpp"
+#include "Mutation.hpp"
+
+
+namespace egglib {
+    
+
+   /** \brief Implements mutation models
+    *
+    * \ingroup coalesce
+    * 
+    * Works with a previously built Ancestral Reconbination Graph. The
+    * user must sets options using the setter-based interface. After
+    * that he or she can call the method mute() that will generates
+    * a DataMatrix object.
+    * 
+    * Genotype data are represented by integer numbers. Regardless of
+    * the mutation model, the ancestral state is always 0. The user can
+    * set the rate of mutation (or, alternatively, fix the number of
+    * mutations that occurred - which is the number of segregating sites
+    * only with an infinite site model).
+    * 
+    * Other options fall into two separate groups: the positions of the
+    * mutated sites and the process of mutation (how new alleles are
+    * generated).
+    * 
+    * Concerning allele generation, several mutation models are available
+    * (coded by single letters):
+    *   - F: fixed number of alleles. Among other markers, this model is
+    *        appropriate for simulating nucleotides. The user is able
+    *        to choose the number of alleles (where 2 is the standard
+    *        for an infinite site model and 4 for a finite site model).
+    *        Mutator allows assigning independent weights between all
+    *        different transition types and can draw randomly the
+    *        ancestral states, providing a way to emulate evolution of
+    *        nucleotides with multiple mutations at the same site and
+    *        reversion.
+    *   - I: infinite number of alleles. At a given site, each mutation
+    *        raises a new allele. The value of the alleles is therefore
+    *        irrelevant (it only denotes its order of appearance). This
+    *        model does not permit homoplasy.
+    *   - S: stepwise mutation model. In this model the value of the
+    *        alleles are interpreted as a size (typically for simulating
+    *        a microsatellite marker). Each mutation either increases
+    *        or decreases the allele size by an increment of one.
+    *   - T: two-phase mutation model. This model is a generalization
+    *        of the stepwise mutation model (S). For a mutation, the
+    *        increment (either increase or decrease) is 1 with the
+    *        probability given by the parameter (1-TPMproba). With
+    *        probability TPMproba, the increment is drawn from a
+    *        geometric distribution of parameter given by the other
+    *        parameter (TPMparam).
+    * 
+    * By default, the program will assume an infinite site model (ISM).
+    * Each mutation will occur to a new position drawn from the interval
+    * [0,1]. It is possible to set any mutation model with an ISM 
+    * (including microsatellite-like models I, S and T). Alternatively,
+    * the user can specify a finite number of sites available for
+    * mutation. For a microsatellite marker, the user will want to
+    * specify a single site. It is possible to set a finite number of
+    * sites for all mutation models. In all cases, the mutations will
+    * be forced to target these sites. It is possible to apply weights
+    * independently to all sites. The higher the weight value
+    * (comparatively to the other sites), the higher the probability
+    * that this site mutes. The weights needs not to be relative. In
+    * addition, the user can set the positions of the different sites.
+    * Nothings forces him or her to place them in order. Note that this
+    * does not affect the mutation process, but on the amount of
+    * recombination that will be allowed between sites.
+    *
+    */
+    class Mutator {
+
+        public:
+        
+           /** \brief Initializes with default values
+            * 
+            * List of default values:
+            *   - theta = 0
+            *   - fixedNumberOfMutations = 0
+            *   - model = F (fixed number of alleles)
+            *   - fixed number of alleles = 2
+            *   - infinite site model
+            *   - TPM parameters are both preset to 0.5
+            * 
+            */
+            Mutator();
+
+            
+           /** \brief Destroys the object
+            * 
+            */
+            ~Mutator();
+            
+            
+           /** \brief Copy constructor
+            * 
+            */
+            Mutator(const Mutator&);
+        
+        
+           /** \brief Assignement operator
+            * 
+            */
+            Mutator& operator=(const Mutator&);
+
+
+           /** \brief Restores default values of all parameters
+            * 
+            */
+            void reset();
+            
+            
+           /** \brief Gets the fixed number of mutations
+            * 
+            */
+            unsigned int fixedNumberOfMutations() const;
+        
+
+           /** \brief Sets the fixed number of mutations
+            * 
+            * The value can be 0. It is not allowed to set both the
+            * fixed number of mutations and the mutation rate at
+            * non-zero value
+            * 
+            */
+            void fixedNumberOfMutations(unsigned int);
+
+
+           /** \brief Gets the mutation rate
+            * 
+            */
+            double mutationRate() const;
+        
+        
+           /** \brief Sets the mutation rate
+            * 
+            * The value cannot be negative. The value can be 0.  It is
+            * not allowed to set both the fixed number of mutations and
+            * the mutation rate at non-zero value
+            * 
+            */
+            void mutationRate(double);
+
+
+           /** \brief Gets the mutation model
+            * 
+            * See the class documentation for the signification of the
+            * different one-letter codes.
+            * 
+            */
+            char mutationModel() const;
+            
+            
+           /** \brief Sets the mutation model
+            * 
+            * The passed character must be one of F, I, S and T. See the
+            * class documentation for their signification.
+            * 
+            */
+            void mutationModel(char);
+            
+            
+           /** \brief Gets the fixed number of possible alleles
+            * 
+            */
+            unsigned int numberOfAlleles() const;
+           
+            
+           /** \brief Sets the fixed number of possible alleles
+            * 
+            * The value must be larger than 1. This parameter is
+            * meaningful only for the fixed number allele model of
+            * mutation, and ignored otherwise.
+            *
+            */
+            void numberOfAlleles(unsigned int);
+           
+           
+           /** \brief Sets a transition weight
+            * 
+            * \param i row (previous allele index).
+            * \param j column (next allele index).
+            * \param value weight to apply.
+            * 
+            * Indices i and j must be different. Weights can be any
+            * strictly positive value.
+            * 
+            */
+            void transitionWeight(unsigned int i, unsigned int j, double value);
+
+
+           /** \brief Gets a transition weight
+            * 
+            * \param i row (previous allele index).
+            * \param j column (next allele index).
+            * 
+            * Indices i and j must be different.
+            * 
+            */
+            double transitionWeight(unsigned int i, unsigned int j);
+            
+            
+           /** \brief Set to true to draw ancestral alleles randomly
+            * 
+            * By default, the ancestral allele is always 0. If this
+            * variable is set to true, the ancestral allele will be
+            * randomly drawn from the defined number of alleles. This
+            * option is always ignored unless in combination with the
+            * Fixed Allele Number model.
+            * 
+            */
+            void randomAncestralAllele(bool flag);
+
+            
+            /** \brief true if ancestral alleles must be drawn randomly
+             * 
+             */
+            bool randomAncestralAllele() const;
+
+           
+           /** \brief Gets the TPM probability parameter
+            * 
+            */
+            double TPMproba() const;
+            
+            
+           /** \brief Sets the TPM probability parameter
+            * 
+            * This parameter is considered only if the mutation model
+            * is T (two-phase mutation model). It gives the probability
+            * that a mutation step is not fixed to be 1. If TPMproba is
+            * 0, the mutation model is SMM.
+            * 
+            * The value must be >=0. and <=1. 
+            * 
+            */
+            void TPMproba(double value);
+           
+            
+           /** \brief Gets the TPM distribution parameter
+            * 
+            */
+            double TPMparam() const;
+            
+            
+           /** \brief Sets the TPM distribution parameter
+            * 
+            * This parameter is considered only if the mutation model
+            * is T (two-phase mutation model). It gives the parameter
+            * of the geometric distribution which is used to generate
+            * the mutation step (if it is not one).
+            * 
+            * The value must be >=0. and <=1. 
+            * 
+            */
+            void TPMparam(double value);
+
+
+           /** \brief Gets the number of mutable sites
+            * 
+            * A value a zero must be interpreted as the infinite site
+            * model. Note that after all calls all data from the tables
+            * sitePositions and siteWeights will be reset.
+            * 
+            */
+            unsigned int numberOfSites() const;
+           
+            
+           /** \brief Sets the number of mutable sites
+            * 
+            * The value of zero is accepted and imposed the infinite
+            * site model.
+            * 
+            */
+            void numberOfSites(unsigned int);
+            
+            
+           /** \brief Gets the position of a given site
+            * 
+            */
+            double sitePosition(unsigned int siteIndex) const;
+
+            
+           /** \brief Set the position of a given site
+            * 
+            * The position must be >=0 and <=1
+            * 
+            */
+            void sitePosition(unsigned int siteIndex, double position);
+
+
+           /** \brief Gets the mutation weight of a given site
+            * 
+            */
+            double siteWeight(unsigned int siteIndex) const;
+
+            
+           /** \brief Set the site weight of a given site
+            * 
+            * The weight must be strictly positive.
+            * 
+            */
+            void siteWeight(unsigned int siteIndex, double weight);
+
+
+           /** \brief Performs mutation
+            * 
+            * \param arg Ancestral recombination graph instance. If the
+            * ARG is partially built or not a all, or improperly so,
+            * the behaviour of this method is not defined.
+            * 
+            * \param random The address of a Random instance to be
+            * used for generating random numbers.
+            * 
+            * \return A DataMatrix instance containing simulated data.
+            * 
+            */
+            DataMatrix mute(Arg* arg, Random* random);
+
+
+          /** \brief Gets the last number of mutations
+            *
+            * Returns the number of mutations of the last call of mute( ).
+            * By default, this method returns 0.
+            *
+            */
+            unsigned int numberOfMutations() const; 
+
+
+        private:
+        
+            void clear();
+            void init();
+            void copy(const Mutator&);
+
+            //int nextAllele(int allele, Random* random);
+            int TPMstep(double inTPMproba, Random* random);
+            void apply_mutation(unsigned int matrixIndex,
+                                unsigned int actualSite, DataMatrix& data,
+                                const Edge* edge, int allele,
+                                unsigned int segment, Random* random);
+
+        
+            char _model;
+            double _mutationRate;
+            unsigned int _fixedNumberOfMutations;
+            unsigned int _numberOfAlleles;
+            double** _transitionWeights;
+            bool _randomAncestralAllele;
+            unsigned int _numberOfSites;
+            double* _sitePositions;
+            double* _siteWeights;
+            double _TPMproba;
+            double _TPMparam;
+            int maxAllele;
+            unsigned int _numberOfMutations;
+            std::vector<Mutation> _cache_mutations;
+            unsigned int _cache_mutations_reserved;
+
+    };
+
+
+    bool compare(Mutation mutation1, Mutation mutation2); // returns True if mutation1 is older
+
+}
+
+
+
+
+#endif
+