view clipkit_repo/tests/unit/test_helpers.py @ 0:49b058e85902 draft

"planemo upload for repository https://github.com/jlsteenwyk/clipkit commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
author padge
date Fri, 25 Mar 2022 13:04:31 +0000
parents
children
line wrap: on
line source

import pytest
import pytest_mock
from pathlib import Path


import numpy as np
from Bio import AlignIO
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.Align import MultipleSeqAlignment

from clipkit.helpers import count_characters_at_position
from clipkit.helpers import get_sequence_at_position_and_report_features
from clipkit.helpers import parsimony_informative_or_constant
from clipkit.helpers import populate_empty_keepD_and_trimD
from clipkit.helpers import join_keepD_and_trimD
from clipkit.helpers import write_trimD
from clipkit.helpers import write_keepD
from clipkit.files import FileFormat

here = Path(__file__)


@pytest.fixture
def sample_msa():
    return MultipleSeqAlignment(
        [
            SeqRecord(
                seq=Seq("['A']"),
                id="1",
                name="<unknown name>",
                description="",
                dbxrefs=[],
            ),
            SeqRecord(
                seq=Seq("['A']"),
                id="2",
                name="<unknown name>",
                description="",
                dbxrefs=[],
            ),
            SeqRecord(
                seq=Seq("['A']"),
                id="3",
                name="<unknown name>",
                description="",
                dbxrefs=[],
            ),
            SeqRecord(
                seq=Seq("['A']"),
                id="4",
                name="<unknown name>",
                description="",
                dbxrefs=[],
            ),
            SeqRecord(
                seq=Seq("['A']"),
                id="5",
                name="<unknown name>",
                description="",
                dbxrefs=[],
            ),
        ]
    )


class TestCountCharactersAtPosition(object):
    def test_gives_count_for_each_char(self):
        ## setup
        s = "ACTTTGGG"

        ## execution
        res = count_characters_at_position(s)

        ## check results
        # test that each character has an associated key
        for char in s:
            assert char in res.keys()

        # test that the len of the res is equal to the
        # number of unique string characters
        assert len(res) == len(set(s))


class TestGetSequenceAtPositionAndReportFeatures(object):
    def test_gets_sequence_and_gappyness(self):
        ## setup
        alignment = AlignIO.read(f"{here.parent}/examples/simple.fa", "fasta")
        i = int(5)

        ## execution
        seq, gappyness = get_sequence_at_position_and_report_features(alignment, i)

        ## check results
        # test output types
        assert isinstance(seq, str)
        assert isinstance(gappyness, float)


class TestParsimonyInformativeOrConstant(object):
    def test_parsimony_informative_or_constant(self):
        ## set up
        # pi = parsimony informative
        num_occurences_pi = {"A": 5, "T": 10, "G": 2, "C": 4}
        # npi = not parsimony informative
        num_occurences_npi = {"A": 1, "T": 10, "G": 1}
        # Const = constant
        num_occurences_const = {"A": 10}
        # nConst = not constant
        num_occurences_nconst = {"A": 1}

        ## execution
        # result is True and False
        (
            is_parsimony_informative,
            constant_site_holder_is_pi,
        ) = parsimony_informative_or_constant(num_occurences_pi)
        # result is False and False
        (
            is_not_parsimony_informative,
            constant_site_holder_is_npi,
        ) = parsimony_informative_or_constant(num_occurences_npi)
        # result is False and True
        is_not_pi_0, is_constant_site = parsimony_informative_or_constant(
            num_occurences_const
        )
        # result is False and False
        is_not_pi_1, is_not_constant_site = parsimony_informative_or_constant(
            num_occurences_nconst
        )

        ## check results
        assert is_parsimony_informative == True and constant_site_holder_is_pi == False
        assert (
            is_not_parsimony_informative == False
            and constant_site_holder_is_npi == False
        )
        assert is_not_pi_0 == False and is_constant_site == True
        assert is_not_pi_1 == False and is_not_constant_site == False


class TestPopulateEmptyKeepDAndTrimD(object):
    def test_populate_empty_keepD_and_trimD(self):
        ## set up
        alignment = AlignIO.read(f"{here.parent}/examples/simple.fa", "fasta")

        ## execution
        keepD, trimD = populate_empty_keepD_and_trimD(alignment)

        ## check results
        expected_keepD = {
            "1": np.zeros([6], dtype=bytes),
            "2": np.zeros([6], dtype=bytes),
            "3": np.zeros([6], dtype=bytes),
            "4": np.zeros([6], dtype=bytes),
            "5": np.zeros([6], dtype=bytes),
        }
        expected_trimD = {
            "1": np.zeros([6], dtype=bytes),
            "2": np.zeros([6], dtype=bytes),
            "3": np.zeros([6], dtype=bytes),
            "4": np.zeros([6], dtype=bytes),
            "5": np.zeros([6], dtype=bytes),
        }

        assert expected_keepD.keys() == keepD.keys()
        assert all(
            np.array_equal(expected_keepD[key], keepD[key]) for key in expected_keepD
        )
        assert expected_trimD.keys() == trimD.keys()
        assert all(
            np.array_equal(expected_trimD[key], trimD[key]) for key in expected_trimD
        )


class TestJoinKeepDAndTrimD(object):
    def test_join_keepD_and_trimD(self):
        ## set up

        keepD = {
            '1': np.array([b'A', b'-', b'G', b'T', b'A', b'T'], dtype='|S1'),
            '2': np.array([b'A', b'-', b'G', b'-', b'A', b'T'], dtype='|S1'),
            '3': np.array([b'A', b'-', b'G', b'-', b'T', b'A'], dtype='|S1'),
            '4': np.array([b'A', b'G', b'A', b'-', b'T', b'A'], dtype='|S1'),
            '5': np.array([b'A', b'C', b'a', b'-', b'T', b'-'], dtype='|S1')
        }
        
        trimD = {
            '1': np.array([b'', b'', b'', b'', b'', b''], dtype='|S1'),
            '2': np.array([b'', b'', b'', b'', b'', b''], dtype='|S1'),
            '3': np.array([b'', b'', b'', b'', b'', b''], dtype='|S1'),
            '4': np.array([b'', b'', b'', b'', b'', b''], dtype='|S1'),
            '5': np.array([b'', b'', b'', b'', b'', b''], dtype='|S1')
        }

        ## execution
        keepD, trimD = join_keepD_and_trimD(keepD, trimD)

        ## check results
        expected_keepD = {
            "1": "A-GTAT",
            "2": "A-G-AT",
            "3": "A-G-TA",
            "4": "AGA-TA",
            "5": "ACa-T-",
        }
        expected_trimD = {"1": "", "2": "", "3": "", "4": "", "5": ""}

        assert expected_keepD == keepD
        assert expected_trimD == trimD


class TestWriteKeepD(object):
    def test_write_keepD_writes_file(self, mocker, sample_msa):
        ## set up
        keepD = {"1": ["A"], "2": ["A"], "3": ["A"], "4": ["A"], "5": ["A"]}
        out_file = "output_file_name.fa"
        out_file_format = FileFormat.fasta
        mock_msa = mocker.patch("clipkit.helpers.MultipleSeqAlignment")
        mock_msa.return_value = sample_msa
        mock_write = mocker.patch("clipkit.helpers.SeqIO.write")

        ## execution
        write_keepD(keepD, out_file, out_file_format)

        ## check results
        mock_write.assert_called_once_with(sample_msa, out_file, out_file_format.value)


class TestWriteTrimD(object):
    def test_write_trimD_calls_seqio_write(self, mocker, sample_msa):
        ## set up
        trimD = {"1": ["A"], "2": ["A"], "3": ["A"], "4": ["A"], "5": ["A"]}
        out_file = "output_file_name.fa"
        out_file_format = FileFormat.fasta
        mock_msa = mocker.patch("clipkit.helpers.MultipleSeqAlignment")
        mock_msa.return_value = sample_msa
        mock_write = mocker.patch("Bio.SeqIO.write")

        ## execution
        write_trimD(trimD, out_file, out_file_format)

        ## check results
        expected_completmentOut = f"{out_file}.complement"
        mock_write.assert_called_once_with(
            sample_msa, expected_completmentOut, out_file_format.value
        )