Mercurial > repos > padge > clipkit
view clipkit_repo/tests/unit/test_helpers.py @ 0:49b058e85902 draft
"planemo upload for repository https://github.com/jlsteenwyk/clipkit commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
author | padge |
---|---|
date | Fri, 25 Mar 2022 13:04:31 +0000 |
parents | |
children |
line wrap: on
line source
import pytest import pytest_mock from pathlib import Path import numpy as np from Bio import AlignIO from Bio import SeqIO from Bio.Seq import Seq from Bio.SeqRecord import SeqRecord from Bio.Align import MultipleSeqAlignment from clipkit.helpers import count_characters_at_position from clipkit.helpers import get_sequence_at_position_and_report_features from clipkit.helpers import parsimony_informative_or_constant from clipkit.helpers import populate_empty_keepD_and_trimD from clipkit.helpers import join_keepD_and_trimD from clipkit.helpers import write_trimD from clipkit.helpers import write_keepD from clipkit.files import FileFormat here = Path(__file__) @pytest.fixture def sample_msa(): return MultipleSeqAlignment( [ SeqRecord( seq=Seq("['A']"), id="1", name="<unknown name>", description="", dbxrefs=[], ), SeqRecord( seq=Seq("['A']"), id="2", name="<unknown name>", description="", dbxrefs=[], ), SeqRecord( seq=Seq("['A']"), id="3", name="<unknown name>", description="", dbxrefs=[], ), SeqRecord( seq=Seq("['A']"), id="4", name="<unknown name>", description="", dbxrefs=[], ), SeqRecord( seq=Seq("['A']"), id="5", name="<unknown name>", description="", dbxrefs=[], ), ] ) class TestCountCharactersAtPosition(object): def test_gives_count_for_each_char(self): ## setup s = "ACTTTGGG" ## execution res = count_characters_at_position(s) ## check results # test that each character has an associated key for char in s: assert char in res.keys() # test that the len of the res is equal to the # number of unique string characters assert len(res) == len(set(s)) class TestGetSequenceAtPositionAndReportFeatures(object): def test_gets_sequence_and_gappyness(self): ## setup alignment = AlignIO.read(f"{here.parent}/examples/simple.fa", "fasta") i = int(5) ## execution seq, gappyness = get_sequence_at_position_and_report_features(alignment, i) ## check results # test output types assert isinstance(seq, str) assert isinstance(gappyness, float) class TestParsimonyInformativeOrConstant(object): def test_parsimony_informative_or_constant(self): ## set up # pi = parsimony informative num_occurences_pi = {"A": 5, "T": 10, "G": 2, "C": 4} # npi = not parsimony informative num_occurences_npi = {"A": 1, "T": 10, "G": 1} # Const = constant num_occurences_const = {"A": 10} # nConst = not constant num_occurences_nconst = {"A": 1} ## execution # result is True and False ( is_parsimony_informative, constant_site_holder_is_pi, ) = parsimony_informative_or_constant(num_occurences_pi) # result is False and False ( is_not_parsimony_informative, constant_site_holder_is_npi, ) = parsimony_informative_or_constant(num_occurences_npi) # result is False and True is_not_pi_0, is_constant_site = parsimony_informative_or_constant( num_occurences_const ) # result is False and False is_not_pi_1, is_not_constant_site = parsimony_informative_or_constant( num_occurences_nconst ) ## check results assert is_parsimony_informative == True and constant_site_holder_is_pi == False assert ( is_not_parsimony_informative == False and constant_site_holder_is_npi == False ) assert is_not_pi_0 == False and is_constant_site == True assert is_not_pi_1 == False and is_not_constant_site == False class TestPopulateEmptyKeepDAndTrimD(object): def test_populate_empty_keepD_and_trimD(self): ## set up alignment = AlignIO.read(f"{here.parent}/examples/simple.fa", "fasta") ## execution keepD, trimD = populate_empty_keepD_and_trimD(alignment) ## check results expected_keepD = { "1": np.zeros([6], dtype=bytes), "2": np.zeros([6], dtype=bytes), "3": np.zeros([6], dtype=bytes), "4": np.zeros([6], dtype=bytes), "5": np.zeros([6], dtype=bytes), } expected_trimD = { "1": np.zeros([6], dtype=bytes), "2": np.zeros([6], dtype=bytes), "3": np.zeros([6], dtype=bytes), "4": np.zeros([6], dtype=bytes), "5": np.zeros([6], dtype=bytes), } assert expected_keepD.keys() == keepD.keys() assert all( np.array_equal(expected_keepD[key], keepD[key]) for key in expected_keepD ) assert expected_trimD.keys() == trimD.keys() assert all( np.array_equal(expected_trimD[key], trimD[key]) for key in expected_trimD ) class TestJoinKeepDAndTrimD(object): def test_join_keepD_and_trimD(self): ## set up keepD = { '1': np.array([b'A', b'-', b'G', b'T', b'A', b'T'], dtype='|S1'), '2': np.array([b'A', b'-', b'G', b'-', b'A', b'T'], dtype='|S1'), '3': np.array([b'A', b'-', b'G', b'-', b'T', b'A'], dtype='|S1'), '4': np.array([b'A', b'G', b'A', b'-', b'T', b'A'], dtype='|S1'), '5': np.array([b'A', b'C', b'a', b'-', b'T', b'-'], dtype='|S1') } trimD = { '1': np.array([b'', b'', b'', b'', b'', b''], dtype='|S1'), '2': np.array([b'', b'', b'', b'', b'', b''], dtype='|S1'), '3': np.array([b'', b'', b'', b'', b'', b''], dtype='|S1'), '4': np.array([b'', b'', b'', b'', b'', b''], dtype='|S1'), '5': np.array([b'', b'', b'', b'', b'', b''], dtype='|S1') } ## execution keepD, trimD = join_keepD_and_trimD(keepD, trimD) ## check results expected_keepD = { "1": "A-GTAT", "2": "A-G-AT", "3": "A-G-TA", "4": "AGA-TA", "5": "ACa-T-", } expected_trimD = {"1": "", "2": "", "3": "", "4": "", "5": ""} assert expected_keepD == keepD assert expected_trimD == trimD class TestWriteKeepD(object): def test_write_keepD_writes_file(self, mocker, sample_msa): ## set up keepD = {"1": ["A"], "2": ["A"], "3": ["A"], "4": ["A"], "5": ["A"]} out_file = "output_file_name.fa" out_file_format = FileFormat.fasta mock_msa = mocker.patch("clipkit.helpers.MultipleSeqAlignment") mock_msa.return_value = sample_msa mock_write = mocker.patch("clipkit.helpers.SeqIO.write") ## execution write_keepD(keepD, out_file, out_file_format) ## check results mock_write.assert_called_once_with(sample_msa, out_file, out_file_format.value) class TestWriteTrimD(object): def test_write_trimD_calls_seqio_write(self, mocker, sample_msa): ## set up trimD = {"1": ["A"], "2": ["A"], "3": ["A"], "4": ["A"], "5": ["A"]} out_file = "output_file_name.fa" out_file_format = FileFormat.fasta mock_msa = mocker.patch("clipkit.helpers.MultipleSeqAlignment") mock_msa.return_value = sample_msa mock_write = mocker.patch("Bio.SeqIO.write") ## execution write_trimD(trimD, out_file, out_file_format) ## check results expected_completmentOut = f"{out_file}.complement" mock_write.assert_called_once_with( sample_msa, expected_completmentOut, out_file_format.value )