view ngsutils/support/llh.py @ 2:7a68005de299 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ngsutils commit 9a243c616a4a3156347e38fdb5f35863ae5133f9
author iuc
date Sun, 27 Nov 2016 15:01:21 -0500
parents 4e4e4093d65d
children
line wrap: on
line source

'''
Methods for calculating log-likelihoods for nucleotide frequencies
'''
import collections
import math

from ngsutils.support import memoize

_default_background = {'A': 0.3, 'T': 0.3, 'C': 0.2, 'G': 0.2}

NucleotideLogLikelihood = collections.namedtuple('NucleotideLogLikelihood', 'A C G T pseudo')


@memoize
def pseudo_count(N, bg):
    '''
    >>> pseudo_count(100, _default_background['A'])
    3
    >>> pseudo_count(100, _default_background['C'])
    2
    '''

    return bg * math.sqrt(N)


def calc_llh(A, C, G, T, bg=_default_background, pseudo='auto'):
    if pseudo == 'auto':
        N = A + C + G + T
        Ap = float(A) + pseudo_count(N, bg['A'])
        Cp = float(C) + pseudo_count(N, bg['C'])
        Gp = float(G) + pseudo_count(N, bg['G'])
        Tp = float(T) + pseudo_count(N, bg['T'])
    elif pseudo:
        Ap = float(A) + pseudo
        Cp = float(C) + pseudo
        Gp = float(G) + pseudo
        Tp = float(T) + pseudo
    else:
        Ap = float(A)
        Cp = float(C)
        Gp = float(G)
        Tp = float(T)

    Np = Ap + Cp + Gp + Tp

    freqA = Ap / Np
    freqC = Cp / Np
    freqG = Gp / Np
    freqT = Tp / Np

    return NucleotideLogLikelihood(math.log(freqA / bg['A']), math.log(freqC / bg['C']), math.log(freqG / bg['G']), math.log(freqT / bg['T']), pseudo)


if __name__ == '__main__':
    import doctest
    doctest.testmod()