Mercurial > repos > davidmurphy > codonlogo
diff test_weblogo.py @ 0:c55bdc2fb9fa
Uploaded
author | davidmurphy |
---|---|
date | Thu, 27 Oct 2011 12:09:09 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test_weblogo.py Thu Oct 27 12:09:09 2011 -0400 @@ -0,0 +1,544 @@ +#!/usr/bin/env python + +# Copyright (c) 2006, The Regents of the University of California, through +# Lawrence Berkeley National Laboratory (subject to receipt of any required +# approvals from the U.S. Dept. of Energy). All rights reserved. + +# This software is distributed under the new BSD Open Source License. +# <http://www.opensource.org/licenses/bsd-license.html> +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# (1) Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# (2) Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and or other materials provided with the distribution. +# +# (3) Neither the name of the University of California, Lawrence Berkeley +# National Laboratory, U.S. Dept. of Energy nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + + +import unittest + +import weblogolib +from weblogolib import * +from weblogolib import parse_prior, GhostscriptAPI +from weblogolib.color import * +from weblogolib.colorscheme import * +from StringIO import StringIO +import sys + +from numpy import array, asarray, float64, ones, zeros, int32,all,any, shape +import numpy as na + +from corebio import seq_io +from corebio.seq import * + +# python2.3 compatability +from corebio._future.subprocess import * +from corebio._future import resource_stream + +from corebio.moremath import entropy +from math import log, sqrt +codon_alphabetU=['AAA', 'AAC', 'AAG', 'AAU', 'ACA', 'ACC', 'ACG', 'ACU', 'AGA', 'AGC', 'AGG', 'AGU', 'AUA', 'AUC', 'AUG', 'AUU', 'CAA', 'CAC', 'CAG', 'CAU', 'CCA', 'CCC', 'CCG', 'CCU', 'CGA', 'CGC', 'CGG', 'CGU', 'CUA', 'CUC', 'CUG', 'CUU', 'GAA', 'GAC', 'GAG', 'GAU', 'GCA', 'GCC', 'GCG', 'GCU', 'GGA', 'GGC', 'GGG', 'GGU', 'GUA', 'GUC', 'GUG', 'GUU', 'UAA', 'UAC', 'UAG', 'UAU', 'UCA', 'UCC', 'UCG', 'UCU', 'UGA', 'UGC', 'UGG', 'UGU', 'UUA', 'UUC', 'UUG', 'UUU'] +codon_alphabetT=['AAA', 'AAC', 'AAG', 'AAT', 'ACA', 'ACC', 'ACG', 'ACT', 'AGA', 'AGC', 'AGG', 'AGT', 'ATA', 'ATC', 'ATG', 'ATT', 'CAA', 'CAC', 'CAG', 'CAT', 'CCA', 'CCC', 'CCG', 'CCT', 'CGA', 'CGC', 'CGG', 'CGT', 'CTA', 'CTC', 'CTG', 'CTT', 'GAA', 'GAC', 'GAG', 'GAT', 'GCA', 'GCC', 'GCG', 'GCT', 'GGA', 'GGC', 'GGG', 'GGT', 'GTA', 'GTC', 'GTG', 'GTT', 'TAA', 'TAC', 'TAG', 'TAT', 'TCA', 'TCC', 'TCG', 'TCT', 'TGA', 'TGC', 'TGG', 'TGT', 'TTA', 'TTC', 'TTG', 'TTT'] + + +def testdata_stream( name ): + return resource_stream(__name__, 'tests/data/'+name, __file__) + +class test_logoformat(unittest.TestCase) : + + def test_options(self) : + options = LogoOptions() + + +class test_ghostscript(unittest.TestCase) : + def test_version(self) : + version = GhostscriptAPI().version + + + +class test_parse_prior(unittest.TestCase) : + def assertTrue(self, bool) : + self.assertEquals( bool, True) + + def test_parse_prior_none(self) : + self.assertEquals( None, + parse_prior(None, unambiguous_protein_alphabet ) ) + self.assertEquals( None, + parse_prior( 'none', unambiguous_protein_alphabet ) ) + self.assertEquals( None, + parse_prior( 'noNe', None) ) + + def test_parse_prior_equiprobable(self) : + self.assertTrue( all(20.*equiprobable_distribution(20) == + parse_prior( 'equiprobable', unambiguous_protein_alphabet ) ) ) + + self.assertTrue( + all( 1.2* equiprobable_distribution(3) + == parse_prior( ' equiprobablE ', Alphabet('123'), 1.2 ) ) ) + + def test_parse_prior_percentage(self) : + #print parse_prior( '50%', unambiguous_dna_alphabet, 1. ) + self.assertTrue( all( equiprobable_distribution(4) + == parse_prior( '50%', unambiguous_dna_alphabet, 1. ) ) ) + + self.assertTrue( all( equiprobable_distribution(4) + == parse_prior( ' 50.0 % ', unambiguous_dna_alphabet, 1. ) ) ) + + self.assertTrue( all( array( (0.3,0.2,0.2,0.3), float64) + == parse_prior( ' 40.0 % ', unambiguous_dna_alphabet, 1. ) ) ) + + def test_parse_prior_float(self) : + self.assertTrue( all( equiprobable_distribution(4) + == parse_prior( '0.5', unambiguous_dna_alphabet, 1. ) ) ) + + self.assertTrue( all( equiprobable_distribution(4) + == parse_prior( ' 0.500 ', unambiguous_dna_alphabet, 1. ) ) ) + + self.assertTrue( all( array( (0.3,0.2,0.2,0.3), float64) + == parse_prior( ' 0.40 ', unambiguous_dna_alphabet, 1. ) ) ) + + def test_auto(self) : + self.assertTrue( all(4.*equiprobable_distribution(4) == + parse_prior( 'auto', unambiguous_dna_alphabet ) ) ) + self.assertTrue( all(4.*equiprobable_distribution(4) == + parse_prior( 'automatic', unambiguous_dna_alphabet ) ) ) + + def test_weight(self) : + self.assertTrue( all(4.*equiprobable_distribution(4) == + parse_prior( 'automatic', unambiguous_dna_alphabet ) ) ) + self.assertTrue( all(123.123*equiprobable_distribution(4) == + parse_prior( 'auto', unambiguous_dna_alphabet , 123.123) ) ) + + def test_explicit(self) : + s = "{'A':10, 'C':40, 'G':40, 'T':10}" + p = array( (10, 40, 40,10), float64)*4./100. + self.assertTrue( all( + p == parse_prior( s, unambiguous_dna_alphabet ) ) ) + + +class test_logooptions(unittest.TestCase) : + def test_create(self) : + opt = LogoOptions() + opt.small_fontsize =10 + options = repr(opt) + + opt = LogoOptions(title="sometitle") + assert opt.title == "sometitle" + +class test_logosize(unittest.TestCase) : + def test_create(self) : + s = LogoSize(101.0,10.0) + assert s.stack_width == 101.0 + r = repr(s) + + +class test_seqlogo(unittest.TestCase) : + # FIXME: The version of python used by Popen may not be the + # same as that used to run this test. + def _exec(self, args, outputtext, returncode =0, stdin=None) : + if not stdin : + stdin = testdata_stream("cap.fa") + args = ["./weblogo"] + args + p = Popen(args,stdin=stdin,stdout=PIPE, stderr=PIPE) + (out, err) = p.communicate() + if returncode ==0 and p.returncode >0 : + print err + self.assertEquals(returncode, p.returncode) + if returncode == 0 : self.assertEquals( len(err), 0) + + for item in outputtext : + self.failUnless(item in out) + + + + def test_malformed_options(self) : + self._exec( ["--notarealoption"], [], 2) + self._exec( ["extrajunk"], [], 2) + self._exec( ["-I"], [], 2) + + def test_help_option(self) : + self._exec( ["-h"], ["options"]) + self._exec( ["--help"], ["options"]) + + def test_version_option(self) : + self._exec( ['--version'], weblogolib.__version__) + + + def test_default_build(self) : + self._exec( [], ["%%Title: Sequence Logo:"] ) + + + # Format options + def test_width(self) : + self._exec( ['-W','1234'], ["/stack_width 1234"] ) + self._exec( ['--stack-width','1234'], ["/stack_width 1234"] ) + + def test_height(self) : + self._exec( ['-H','1234'], ["/stack_height 1234"] ) + self._exec( ['--stack-height','1234'], ["/stack_height 1234"] ) + + + def test_stacks_per_line(self) : + self._exec( ['-n','7'], ["/stacks_per_line 7 def"] ) + self._exec( ['--stacks-per-line','7'], ["/stacks_per_line 7 def"] ) + + + def test_title(self) : + self._exec( ['-t', '3456'], ['/logo_title (3456) def', + '/show_title True def']) + self._exec( ['-t', ''], ['/logo_title () def', + '/show_title False def']) + self._exec( ['--title', '3456'], ['/logo_title (3456) def', + '/show_title True def']) + + + + + +class test_which(unittest.TestCase) : + def test_which(self): + tests = ( + (seq_io.read(testdata_stream('cap.fa')), codon_alphabetT), + (seq_io.read(testdata_stream('capu.fa')), codon_alphabetU), + + #(seq_io.read(testdata_stream('cox2.msf')), unambiguous_protein_alphabet), + #(seq_io.read(testdata_stream('Rv3829c.fasta')), unambiguous_protein_alphabet), + ) + for t in tests : + self.failUnlessEqual(which_alphabet(t[0]), t[1]) + + + + +class test_colorscheme(unittest.TestCase) : + + def test_colorgroup(self) : + cr = ColorGroup( "ABC", "black", "Because") + self.assertEquals( cr.description, "Because") + + def test_colorscheme(self) : + cs = ColorScheme([ + ColorGroup("G", "orange"), + ColorGroup("TU", "red"), + ColorGroup("C", "blue"), + ColorGroup("A", "green") + ], + title = "title", + description = "description", + ) + + self.assertEquals( cs.color('A'), Color.by_name("green")) + self.assertEquals( cs.color('X'), cs.default_color) + + + +class test_color(unittest.TestCase) : + # 2.3 Python compatibility + assertTrue = unittest.TestCase.failUnless + assertFalse = unittest.TestCase.failIf + + def test_color_names(self) : + names = Color.names() + self.failUnlessEqual( len(names), 147) + + for n in names: + c = Color.by_name(n) + self.assertTrue( c != None ) + + + def test_color_components(self) : + white = Color.by_name("white") + self.failUnlessEqual( 1.0, white.red) + self.failUnlessEqual( 1.0, white.green) + self.failUnlessEqual( 1.0, white.blue) + + + c = Color(0.3, 0.4, 0.2) + self.failUnlessEqual( 0.3, c.red) + self.failUnlessEqual( 0.4, c.green) + self.failUnlessEqual( 0.2, c.blue) + + c = Color(0,128,0) + self.failUnlessEqual( 0.0, c.red) + self.failUnlessEqual( 128./255., c.green) + self.failUnlessEqual( 0.0, c.blue) + + + def test_color_from_rgb(self) : + white = Color.by_name("white") + + self.failUnlessEqual(white, Color(1.,1.,1.) ) + self.failUnlessEqual(white, Color(255,255,255) ) + self.failUnlessEqual(white, Color.from_rgb(1.,1.,1.) ) + self.failUnlessEqual(white, Color.from_rgb(255,255,255) ) + + + def test_color_from_hsl(self) : + red = Color.by_name("red") + lime = Color.by_name("lime") + saddlebrown = Color.by_name("saddlebrown") + darkgreen = Color.by_name("darkgreen") + blue = Color.by_name("blue") + green = Color.by_name("green") + + self.failUnlessEqual(red, Color.from_hsl(0, 1.0,0.5) ) + self.failUnlessEqual(lime, Color.from_hsl(120, 1.0, 0.5) ) + self.failUnlessEqual(blue, Color.from_hsl(240, 1.0, 0.5) ) + self.failUnlessEqual(Color.by_name("gray"), Color.from_hsl(0,0,0.5) ) + + self.failUnlessEqual(saddlebrown, Color.from_hsl(25, 0.76, 0.31) ) + + self.failUnlessEqual(darkgreen, Color.from_hsl(120, 1.0, 0.197) ) + + + def test_color_by_name(self): + white = Color.by_name("white") + self.failUnlessEqual(white, Color.by_name("white")) + self.failUnlessEqual(white, Color.by_name("WHITE")) + self.failUnlessEqual(white, Color.by_name(" wHiTe \t\n\t")) + + + self.failUnlessEqual(Color(255,255,240), Color.by_name("ivory")) + self.failUnlessEqual(Color(70,130,180), Color.by_name("steelblue")) + + self.failUnlessEqual(Color(0,128,0), Color.by_name("green")) + + + def test_color_from_invalid_name(self): + self.failUnlessRaises( ValueError, Color.by_name, "not_a_color") + + + def test_color_clipping(self): + red = Color.by_name("red") + self.failUnlessEqual(red, Color(255,0,0) ) + self.failUnlessEqual(red, Color(260,-10,0) ) + self.failUnlessEqual(red, Color(1.1,-0.,-1.) ) + + self.failUnlessEqual( Color(1.0001, 213.0, 1.2).red, 1.0 ) + self.failUnlessEqual( Color(-0.001, -2183.0, -1.0).red, 0.0 ) + self.failUnlessEqual( Color(1.0001, 213.0, 1.2).green, 1.0 ) + self.failUnlessEqual( Color(-0.001, -2183.0, -1.0).green, 0.0 ) + self.failUnlessEqual( Color(1.0001, 213.0, 1.2).blue, 1.0 ) + self.failUnlessEqual( Color(-0.001, -2183.0, -1.0).blue, 0.0 ) + + + def test_color_fail_on_mixed_type(self): + self.failUnlessRaises( TypeError, Color.from_rgb, 1,1,1.0 ) + self.failUnlessRaises( TypeError, Color.from_rgb, 1.0,1,1.0 ) + + def test_color_red(self) : + # Check Usage comment in Color + red = Color.by_name("red") + self.failUnlessEqual( red , Color(255,0,0) ) + self.failUnlessEqual( red, Color(1., 0., 0.) ) + + self.failUnlessEqual( red , Color.from_rgb(1.,0.,0.) ) + self.failUnlessEqual( red , Color.from_rgb(255,0,0) ) + self.failUnlessEqual( red , Color.from_hsl(0.,1., 0.5) ) + + self.failUnlessEqual( red , Color.from_string("red") ) + self.failUnlessEqual( red , Color.from_string("RED") ) + self.failUnlessEqual( red , Color.from_string("#F00") ) + self.failUnlessEqual( red , Color.from_string("#FF0000") ) + self.failUnlessEqual( red , Color.from_string("rgb(255, 0, 0)") ) + self.failUnlessEqual( red , Color.from_string("rgb(100%, 0%, 0%)") ) + self.failUnlessEqual( red , Color.from_string("hsl(0, 100%, 50%)") ) + + + def test_color_from_string(self) : + purple = Color(128,0,128) + red = Color(255,0,0) + skyblue = Color(135,206,235) + + red_strings = ("red", + "ReD", + "RED", + " Red \t", + "#F00", + "#FF0000", + "rgb(255, 0, 0)", + "rgb(100%, 0%, 0%)", + "hsl(0, 100%, 50%)") + + for s in red_strings: + self.failUnlessEqual( red, Color.from_string(s) ) + + skyblue_strings = ("skyblue", + "SKYBLUE", + " \t\n SkyBlue \t", + "#87ceeb", + "rgb(135,206,235)" + ) + + for s in skyblue_strings: + self.failUnlessEqual( skyblue, Color.from_string(s) ) + + + + def test_color_equality(self): + c1 = Color(123,99,12) + c2 = Color(123,99,12) + + self.failUnlessEqual(c1,c2) + + + + + + +class test_Dirichlet(unittest.TestCase) : + # 2.3 Python compatibility + assertTrue = unittest.TestCase.failUnless + assertFalse = unittest.TestCase.failIf + + + def test_init(self) : + d = Dirichlet( ( 1,1,1,1,) ) + + + def test_random(self) : + + + def do_test( alpha, samples = 1000) : + ent = zeros( (samples,), float64) + #alpha = ones( ( K,), Float64 ) * A/K + + #pt = zeros( (len(alpha) ,), Float64) + d = Dirichlet(alpha) + for s in range(samples) : + p = d.sample() + #print p + #pt +=p + ent[s] = entropy(p) + + #print pt/samples + + m = mean(ent) + v = var(ent) + + dm = d.mean_entropy() + dv = d.variance_entropy() + + #print alpha, ':', m, v, dm, dv + error = 4. * sqrt(v/samples) + self.assertTrue( abs(m-dm) < error) + self.assertTrue( abs(v-dv) < error) # dodgy error estimate + + + do_test( (1., 1.) ) + do_test( (2., 1.) ) + do_test( (3., 1.) ) + do_test( (4., 1.) ) + do_test( (5., 1.) ) + do_test( (6., 1.) ) + + do_test( (1., 1.) ) + do_test( (20., 20.) ) + do_test( (1., 1., 1., 1., 1., 1., 1., 1., 1., 1.) ) + do_test( (.1, .1, .1, .1, .1, .1, .1, .1, .1, .1) ) + do_test( (.01, .01, .01, .01, .01, .01, .01, .01, .01, .01) ) + do_test( (2.0, 6.0, 1.0, 1.0) ) + + + def test_mean(self) : + alpha = ones( ( 10,), float64 ) * 23. + d = Dirichlet(alpha) + m = d.mean() + self.assertAlmostEqual( m[2], 1./10) + self.assertAlmostEqual( sum(m), 1.0) + + def test_covariance(self) : + alpha = ones( ( 4,), float64 ) + d = Dirichlet(alpha) + cv = d.covariance() + self.assertEqual( cv.shape, (4,4) ) + self.assertAlmostEqual( cv[0,0], 1.0 * (1.0 - 1./4.0)/ (4.0 * 5.0) ) + self.assertAlmostEqual( cv[0,1], - 1 / ( 4. * 4. * 5.) ) + + def test_mean_x(self) : + alpha = (1.0, 2.0, 3.0, 4.0) + xx = (2.0, 2.0, 2.0, 2.0) + m = Dirichlet(alpha).mean_x(xx) + self.assertEquals( m, 2.0) + + alpha = (1.0, 1.0, 1.0, 1.0) + xx = (2.0, 3.0, 4.0, 3.0) + m = Dirichlet(alpha).mean_x(xx) + self.assertEquals( m, 3.0) + + def test_variance_x(self) : + alpha = (1.0, 1.0, 1.0, 1.0) + xx = (2.0, 2.0, 2.0, 2.0) + v = Dirichlet(alpha).variance_x(xx) + self.assertAlmostEquals( v, 0.0) + + alpha = (1.0, 2.0, 3.0, 4.0) + xx = (2.0, 0.0, 1.0, 10.0) + v = Dirichlet(alpha).variance_x(xx) + #print v + # TODO: Don't actually know if this is correct + + def test_relative_entropy(self): + alpha = (2.0, 10.0, 1.0, 1.0) + d = Dirichlet(alpha) + pvec = (0.1, 0.2, 0.3, 0.4) + + rent = d.mean_relative_entropy(pvec) + vrent = d.variance_relative_entropy(pvec) + low, high = d.interval_relative_entropy(pvec, 0.95) + + #print + #print '> ', rent, vrent, low, high + + # This test can fail randomly, but the precision form a few + # thousand samples is low. Increasing samples, 1000->2000 + samples = 2000 + sent = zeros( (samples,), float64) + + for s in range(samples) : + post = d.sample() + e = -entropy(post) + for k in range(4) : + e += - post[k] * log(pvec[k]) + sent[s] = e + sent.sort() + self.assertTrue( abs(sent.mean() - rent) < 4.*sqrt(vrent) ) + self.assertAlmostEqual( sent.std(), sqrt(vrent), 1 ) + self.assertTrue( abs(low-sent[ int( samples *0.025)])<0.2 ) + self.assertTrue( abs(high-sent[ int( samples *0.975)])<0.2 ) + + #print '>>', mean(sent), var(sent), sent[ int( samples *0.025)] ,sent[ int( samples *0.975)] + + + +def mean( a) : + return sum(a)/ len(a) + +def var(a) : + return (sum(a*a) /len(a) ) - mean(a)**2 + + + + +if __name__ == '__main__': + unittest.main()