Mercurial > repos > davidmurphy > codonlogo

#!/usr/bin/env python

#  Copyright (c) 2006, The Regents of the University of California, through
#  Lawrence Berkeley National Laboratory (subject to receipt of any required
#  approvals from the U.S. Dept. of Energy).  All rights reserved.

#  This software is distributed under the new BSD Open Source License.
#  <http://www.opensource.org/licenses/bsd-license.html>
#
#  Redistribution and use in source and binary forms, with or without
#  modification, are permitted provided that the following conditions are met:
#
#  (1) Redistributions of source code must retain the above copyright notice,
#  this list of conditions and the following disclaimer.
#
#  (2) Redistributions in binary form must reproduce the above copyright
#  notice, this list of conditions and the following disclaimer in the
#  documentation and or other materials provided with the distribution.
#
#  (3) Neither the name of the University of California, Lawrence Berkeley
#  National Laboratory, U.S. Dept. of Energy nor the names of its contributors
#  may be used to endorse or promote products derived from this software
#  without specific prior written permission.
#
#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
#  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
#  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
#  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
#  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
#  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
#  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
#  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
#  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
#  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
#  POSSIBILITY OF SUCH DAMAGE.


import unittest

import weblogolib
from weblogolib import  *
from weblogolib import  parse_prior, GhostscriptAPI
from weblogolib.color import *
from weblogolib.colorscheme import *
from StringIO import StringIO
import sys

from numpy import array, asarray, float64, ones, zeros, int32,all,any, shape
import numpy as na

from corebio import seq_io
from corebio.seq import *

# python2.3 compatability
from corebio._future.subprocess import *
from corebio._future import resource_stream

from corebio.moremath import entropy
from math import log, sqrt
codon_alphabetU=['AAA', 'AAC', 'AAG', 'AAU', 'ACA', 'ACC', 'ACG', 'ACU', 'AGA', 'AGC', 'AGG', 'AGU', 'AUA', 'AUC', 'AUG', 'AUU', 'CAA', 'CAC', 'CAG', 'CAU', 'CCA', 'CCC', 'CCG', 'CCU', 'CGA', 'CGC', 'CGG', 'CGU', 'CUA', 'CUC', 'CUG', 'CUU', 'GAA', 'GAC', 'GAG', 'GAU', 'GCA', 'GCC', 'GCG', 'GCU', 'GGA', 'GGC', 'GGG', 'GGU', 'GUA', 'GUC', 'GUG', 'GUU', 'UAA', 'UAC', 'UAG', 'UAU', 'UCA', 'UCC', 'UCG', 'UCU', 'UGA', 'UGC', 'UGG', 'UGU', 'UUA', 'UUC', 'UUG', 'UUU']
codon_alphabetT=['AAA', 'AAC', 'AAG', 'AAT', 'ACA', 'ACC', 'ACG', 'ACT', 'AGA', 'AGC', 'AGG', 'AGT', 'ATA', 'ATC', 'ATG', 'ATT', 'CAA', 'CAC', 'CAG', 'CAT', 'CCA', 'CCC', 'CCG', 'CCT', 'CGA', 'CGC', 'CGG', 'CGT', 'CTA', 'CTC', 'CTG', 'CTT', 'GAA', 'GAC', 'GAG', 'GAT', 'GCA', 'GCC', 'GCG', 'GCT', 'GGA', 'GGC', 'GGG', 'GGT', 'GTA', 'GTC', 'GTG', 'GTT', 'TAA', 'TAC', 'TAG', 'TAT', 'TCA', 'TCC', 'TCG', 'TCT', 'TGA', 'TGC', 'TGG', 'TGT', 'TTA', 'TTC', 'TTG', 'TTT']


def testdata_stream( name ):
    return resource_stream(__name__, 'tests/data/'+name, __file__)

class test_logoformat(unittest.TestCase) :

    def test_options(self) :
        options = LogoOptions()


class test_ghostscript(unittest.TestCase) :
    def test_version(self) :
        version = GhostscriptAPI().version


class test_parse_prior(unittest.TestCase) :
    def assertTrue(self, bool) :
        self.assertEquals( bool, True)

    def test_parse_prior_none(self) :
        self.assertEquals( None,
            parse_prior(None, unambiguous_protein_alphabet ) )
        self.assertEquals( None,
            parse_prior( 'none', unambiguous_protein_alphabet ) )
        self.assertEquals( None,
            parse_prior( 'noNe', None) )

    def test_parse_prior_equiprobable(self) :
        self.assertTrue( all(20.*equiprobable_distribution(20)  ==
            parse_prior( 'equiprobable',  unambiguous_protein_alphabet ) ) )

        self.assertTrue(
            all( 1.2* equiprobable_distribution(3)
            == parse_prior( ' equiprobablE  ',  Alphabet('123'), 1.2 )  ) )

    def test_parse_prior_percentage(self) :
        #print       parse_prior( '50%', unambiguous_dna_alphabet, 1. )
        self.assertTrue( all( equiprobable_distribution(4)
            == parse_prior( '50%', unambiguous_dna_alphabet, 1. )  ) )

        self.assertTrue( all( equiprobable_distribution(4)
            == parse_prior( ' 50.0 % ', unambiguous_dna_alphabet, 1. )  ) )

        self.assertTrue( all( array( (0.3,0.2,0.2,0.3), float64)
            == parse_prior( ' 40.0 % ', unambiguous_dna_alphabet, 1. )  ) )

    def test_parse_prior_float(self) :
        self.assertTrue( all( equiprobable_distribution(4)
            == parse_prior( '0.5', unambiguous_dna_alphabet, 1. )  ) )

        self.assertTrue( all( equiprobable_distribution(4)
            == parse_prior( ' 0.500 ', unambiguous_dna_alphabet, 1. )  ) )

        self.assertTrue( all( array( (0.3,0.2,0.2,0.3), float64)
            == parse_prior( ' 0.40 ', unambiguous_dna_alphabet, 1. )  ) )

    def test_auto(self) :
        self.assertTrue( all(4.*equiprobable_distribution(4)  ==
            parse_prior( 'auto',  unambiguous_dna_alphabet ) ) )
        self.assertTrue( all(4.*equiprobable_distribution(4)  ==
            parse_prior( 'automatic',  unambiguous_dna_alphabet ) ) )

    def test_weight(self) :
        self.assertTrue( all(4.*equiprobable_distribution(4)  ==
            parse_prior( 'automatic',  unambiguous_dna_alphabet ) ) )
        self.assertTrue( all(123.123*equiprobable_distribution(4)  ==
            parse_prior( 'auto',  unambiguous_dna_alphabet , 123.123) ) )

    def test_explicit(self) :
        s = "{'A':10, 'C':40, 'G':40, 'T':10}"
        p = array( (10, 40, 40,10), float64)*4./100.
        self.assertTrue( all(
            p == parse_prior( s,  unambiguous_dna_alphabet ) ) )


class test_logooptions(unittest.TestCase) :
    def test_create(self) :
        opt = LogoOptions()
        opt.small_fontsize =10
        options = repr(opt)

        opt = LogoOptions(title="sometitle")
        assert opt.title == "sometitle"

class test_logosize(unittest.TestCase) :
    def test_create(self) :
        s = LogoSize(101.0,10.0)
        assert s.stack_width == 101.0
        r = repr(s)


class test_seqlogo(unittest.TestCase) :
    # FIXME: The version of python used by Popen may not be the
    # same as that used to run this test.
    def _exec(self,  args, outputtext, returncode =0, stdin=None) :
        if not stdin :
            stdin = testdata_stream("cap.fa")
        args = ["./weblogo"] + args
        p = Popen(args,stdin=stdin,stdout=PIPE, stderr=PIPE)
        (out, err) = p.communicate()
        if returncode ==0 and p.returncode >0 :
            print err
        self.assertEquals(returncode, p.returncode)
        if returncode == 0 : self.assertEquals( len(err), 0)

        for item in outputtext :
            self.failUnless(item in out)


    def test_malformed_options(self) :
        self._exec( ["--notarealoption"], [], 2)
        self._exec( ["extrajunk"], [], 2)
        self._exec( ["-I"], [], 2)

    def test_help_option(self) :
        self._exec( ["-h"], ["options"])
        self._exec( ["--help"], ["options"])

    def test_version_option(self) :
        self._exec( ['--version'], weblogolib.__version__)


    def test_default_build(self) :
        self._exec( [], ["%%Title:        Sequence Logo:"] )


    # Format options
    def test_width(self) :
        self._exec( ['-W','1234'], ["/stack_width         1234"] )
        self._exec( ['--stack-width','1234'], ["/stack_width         1234"] )

    def test_height(self) :
        self._exec( ['-H','1234'], ["/stack_height        1234"] )
        self._exec( ['--stack-height','1234'], ["/stack_height        1234"] )


    def test_stacks_per_line(self) :
        self._exec( ['-n','7'], ["/stacks_per_line     7 def"] )
        self._exec( ['--stacks-per-line','7'], ["/stacks_per_line     7 def"] )


    def test_title(self) :
        self._exec( ['-t', '3456'], ['/logo_title         (3456) def',
            '/show_title         True def'])
        self._exec( ['-t', ''], ['/logo_title         () def',
            '/show_title         False def'])
        self._exec( ['--title', '3456'], ['/logo_title         (3456) def',
            '/show_title         True def'])


class test_which(unittest.TestCase) :
    def test_which(self):
        tests = (
        (seq_io.read(testdata_stream('cap.fa')), codon_alphabetT),
        (seq_io.read(testdata_stream('capu.fa')), codon_alphabetU),

            #(seq_io.read(testdata_stream('cox2.msf')), unambiguous_protein_alphabet),
            #(seq_io.read(testdata_stream('Rv3829c.fasta')), unambiguous_protein_alphabet),
        )
        for t in tests :
            self.failUnlessEqual(which_alphabet(t[0]), t[1])


class test_colorscheme(unittest.TestCase) :

    def test_colorgroup(self) :
        cr = ColorGroup( "ABC", "black", "Because")
        self.assertEquals( cr.description, "Because")

    def test_colorscheme(self) :
        cs = ColorScheme([
                ColorGroup("G", "orange"),
                ColorGroup("TU", "red"),
                ColorGroup("C",  "blue"),
                ColorGroup("A",  "green")
            ],
            title = "title",
            description = "description",
            )

        self.assertEquals( cs.color('A'), Color.by_name("green"))
        self.assertEquals( cs.color('X'), cs.default_color)


class test_color(unittest.TestCase) :
    # 2.3 Python compatibility
    assertTrue  = unittest.TestCase.failUnless
    assertFalse = unittest.TestCase.failIf

    def test_color_names(self) :
        names = Color.names()
        self.failUnlessEqual( len(names), 147)

        for n in names:
            c = Color.by_name(n)
            self.assertTrue( c != None )


    def test_color_components(self) :
        white = Color.by_name("white")
        self.failUnlessEqual( 1.0, white.red)
        self.failUnlessEqual( 1.0, white.green)
        self.failUnlessEqual( 1.0, white.blue)


        c = Color(0.3, 0.4, 0.2)
        self.failUnlessEqual( 0.3, c.red)
        self.failUnlessEqual( 0.4, c.green)
        self.failUnlessEqual( 0.2, c.blue)

        c = Color(0,128,0)
        self.failUnlessEqual( 0.0, c.red)
        self.failUnlessEqual( 128./255., c.green)
        self.failUnlessEqual( 0.0, c.blue)


    def test_color_from_rgb(self) :
        white = Color.by_name("white")

        self.failUnlessEqual(white, Color(1.,1.,1.) )
        self.failUnlessEqual(white, Color(255,255,255) )
        self.failUnlessEqual(white, Color.from_rgb(1.,1.,1.) )
        self.failUnlessEqual(white, Color.from_rgb(255,255,255) )


    def test_color_from_hsl(self) :
        red = Color.by_name("red")
        lime = Color.by_name("lime")
        saddlebrown = Color.by_name("saddlebrown")
        darkgreen = Color.by_name("darkgreen")
        blue = Color.by_name("blue")
        green = Color.by_name("green")

        self.failUnlessEqual(red, Color.from_hsl(0, 1.0,0.5) )
        self.failUnlessEqual(lime, Color.from_hsl(120, 1.0, 0.5) )
        self.failUnlessEqual(blue, Color.from_hsl(240, 1.0, 0.5) )
        self.failUnlessEqual(Color.by_name("gray"), Color.from_hsl(0,0,0.5) )

        self.failUnlessEqual(saddlebrown, Color.from_hsl(25, 0.76, 0.31) )

        self.failUnlessEqual(darkgreen, Color.from_hsl(120, 1.0, 0.197) )


    def test_color_by_name(self):
        white = Color.by_name("white")
        self.failUnlessEqual(white, Color.by_name("white"))
        self.failUnlessEqual(white, Color.by_name("WHITE"))
        self.failUnlessEqual(white, Color.by_name(" wHiTe \t\n\t"))


        self.failUnlessEqual(Color(255,255,240), Color.by_name("ivory"))
        self.failUnlessEqual(Color(70,130,180), Color.by_name("steelblue"))

        self.failUnlessEqual(Color(0,128,0), Color.by_name("green"))


    def test_color_from_invalid_name(self):
        self.failUnlessRaises( ValueError, Color.by_name, "not_a_color")


    def test_color_clipping(self):
        red = Color.by_name("red")
        self.failUnlessEqual(red, Color(255,0,0) )
        self.failUnlessEqual(red, Color(260,-10,0) )
        self.failUnlessEqual(red, Color(1.1,-0.,-1.) )

        self.failUnlessEqual( Color(1.0001,   213.0,  1.2).red, 1.0 )
        self.failUnlessEqual( Color(-0.001, -2183.0, -1.0).red, 0.0 )
        self.failUnlessEqual( Color(1.0001,   213.0,  1.2).green, 1.0 )
        self.failUnlessEqual( Color(-0.001, -2183.0, -1.0).green, 0.0 )
        self.failUnlessEqual( Color(1.0001,   213.0,  1.2).blue, 1.0 )
        self.failUnlessEqual( Color(-0.001, -2183.0, -1.0).blue, 0.0 )


    def test_color_fail_on_mixed_type(self):
        self.failUnlessRaises( TypeError, Color.from_rgb, 1,1,1.0 )
        self.failUnlessRaises( TypeError, Color.from_rgb, 1.0,1,1.0 )

    def test_color_red(self) :
        # Check Usage comment in Color
        red = Color.by_name("red")
        self.failUnlessEqual( red , Color(255,0,0) )
        self.failUnlessEqual( red,  Color(1., 0., 0.) )

        self.failUnlessEqual( red , Color.from_rgb(1.,0.,0.) )
        self.failUnlessEqual( red , Color.from_rgb(255,0,0) )
        self.failUnlessEqual( red , Color.from_hsl(0.,1., 0.5) )

        self.failUnlessEqual( red , Color.from_string("red") )
        self.failUnlessEqual( red , Color.from_string("RED") )
        self.failUnlessEqual( red , Color.from_string("#F00") )
        self.failUnlessEqual( red , Color.from_string("#FF0000") )
        self.failUnlessEqual( red , Color.from_string("rgb(255, 0, 0)") )
        self.failUnlessEqual( red , Color.from_string("rgb(100%, 0%, 0%)") )
        self.failUnlessEqual( red , Color.from_string("hsl(0, 100%, 50%)") )


    def test_color_from_string(self) :
        purple = Color(128,0,128)
        red    = Color(255,0,0)
        skyblue = Color(135,206,235)

        red_strings = ("red",
                        "ReD",
                        "RED",
                        "   Red \t",
                        "#F00",
                        "#FF0000",
                        "rgb(255, 0, 0)",
                        "rgb(100%, 0%, 0%)",
                        "hsl(0, 100%, 50%)")

        for s in red_strings:
            self.failUnlessEqual( red, Color.from_string(s) )

        skyblue_strings = ("skyblue",
                        "SKYBLUE",
                        "  \t\n SkyBlue  \t",
                        "#87ceeb",
                        "rgb(135,206,235)"
                        )

        for s in skyblue_strings:
            self.failUnlessEqual( skyblue, Color.from_string(s) )


    def test_color_equality(self):
        c1 = Color(123,99,12)
        c2 = Color(123,99,12)

        self.failUnlessEqual(c1,c2)


class test_Dirichlet(unittest.TestCase) :
    # 2.3 Python compatibility
    assertTrue  = unittest.TestCase.failUnless
    assertFalse = unittest.TestCase.failIf


    def test_init(self) :
        d = Dirichlet( ( 1,1,1,1,) )


    def test_random(self) :


        def do_test( alpha, samples = 1000) :
            ent = zeros( (samples,), float64)
            #alpha = ones( ( K,), Float64 ) * A/K

            #pt = zeros( (len(alpha) ,), Float64)
            d = Dirichlet(alpha)
            for s in range(samples) :
                p = d.sample()
                #print p
                #pt +=p
                ent[s] = entropy(p)

            #print pt/samples

            m = mean(ent)
            v = var(ent)

            dm = d.mean_entropy()
            dv = d.variance_entropy()

           #print alpha, ':', m, v, dm, dv
            error = 4. * sqrt(v/samples)
            self.assertTrue( abs(m-dm) < error)
            self.assertTrue( abs(v-dv) < error) # dodgy error estimate


        do_test( (1., 1.) )
        do_test( (2., 1.) )
        do_test( (3., 1.) )
        do_test( (4., 1.) )
        do_test( (5., 1.) )
        do_test( (6., 1.) )

        do_test( (1., 1.) )
        do_test( (20., 20.) )
        do_test( (1., 1., 1., 1., 1., 1., 1., 1., 1., 1.) )
        do_test( (.1, .1, .1, .1, .1, .1, .1, .1, .1, .1) )
        do_test( (.01, .01, .01, .01, .01, .01, .01, .01, .01, .01) )
        do_test( (2.0, 6.0, 1.0, 1.0) )


    def test_mean(self) :
        alpha = ones( ( 10,), float64 ) * 23.
        d = Dirichlet(alpha)
        m = d.mean()
        self.assertAlmostEqual( m[2], 1./10)
        self.assertAlmostEqual( sum(m), 1.0)

    def test_covariance(self) :
        alpha = ones( ( 4,), float64 )
        d = Dirichlet(alpha)
        cv = d.covariance()
        self.assertEqual( cv.shape, (4,4)  )
        self.assertAlmostEqual( cv[0,0], 1.0 * (1.0 - 1./4.0)/ (4.0 * 5.0)  )
        self.assertAlmostEqual( cv[0,1],  - 1 / ( 4. * 4. * 5.) )

    def test_mean_x(self) :
        alpha = (1.0, 2.0, 3.0, 4.0)
        xx = (2.0, 2.0, 2.0, 2.0)
        m = Dirichlet(alpha).mean_x(xx)
        self.assertEquals( m, 2.0)

        alpha = (1.0, 1.0, 1.0, 1.0)
        xx = (2.0, 3.0, 4.0, 3.0)
        m = Dirichlet(alpha).mean_x(xx)
        self.assertEquals( m, 3.0)

    def test_variance_x(self) :
        alpha = (1.0, 1.0, 1.0, 1.0)
        xx = (2.0, 2.0, 2.0, 2.0)
        v = Dirichlet(alpha).variance_x(xx)
        self.assertAlmostEquals( v, 0.0)

        alpha = (1.0, 2.0, 3.0, 4.0)
        xx = (2.0, 0.0, 1.0, 10.0)
        v = Dirichlet(alpha).variance_x(xx)
        #print v
        # TODO: Don't actually know if this is correct

    def test_relative_entropy(self):
        alpha = (2.0, 10.0, 1.0, 1.0)
        d = Dirichlet(alpha)
        pvec = (0.1, 0.2, 0.3, 0.4)

        rent = d.mean_relative_entropy(pvec)
        vrent = d.variance_relative_entropy(pvec)
        low, high = d.interval_relative_entropy(pvec, 0.95)

        #print
        #print '> ', rent, vrent, low, high

        # This test can fail randomly, but the precision form a few
        # thousand samples is low. Increasing samples, 1000->2000
        samples = 2000
        sent = zeros( (samples,), float64)

        for s in range(samples) :
            post = d.sample()
            e = -entropy(post)
            for k in range(4) :
                e += - post[k] * log(pvec[k])
            sent[s] = e
        sent.sort()
        self.assertTrue( abs(sent.mean() - rent) < 4.*sqrt(vrent) )
        self.assertAlmostEqual( sent.std(), sqrt(vrent), 1 )
        self.assertTrue( abs(low-sent[ int( samples *0.025)])<0.2 )
        self.assertTrue( abs(high-sent[ int( samples *0.975)])<0.2 )

        #print '>>', mean(sent), var(sent), sent[ int( samples *0.025)] ,sent[ int( samples *0.975)]


def mean( a) :
    return sum(a)/ len(a)

def var(a) :
    return (sum(a*a) /len(a) ) - mean(a)**2


if __name__ == '__main__':
    unittest.main()
author	davidmurphy
date	Mon, 30 Jan 2012 21:17:50 -0500
parents	c55bdc2fb9fa
children