annotate tests/test_cdhit_analysis.py @ 3:c6981ea453ae draft default tip

planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit ef31054ae26e19eff2f1b1f6c7979e39c47c0d5b-dirty
author onnodg
date Fri, 24 Oct 2025 09:38:24 +0000
parents ff68835adb2b
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
1 """
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
2 Test suite for CD-HIT cluster analysis processor.
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
3 """
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
4
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
5 import pytest
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
6 from pathlib import Path
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
7 import pandas as pd
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
8 import os
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
9 import sys
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
10
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
11 # Add module path
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
12 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
13 from Stage_1_translated.NLOOR_scripts.process_clusters_tool.cdhit_analysis import (
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
14 parse_cluster_file,
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
15 process_cluster_data,
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
16 calculate_cluster_taxa,
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
17 write_similarity_output,
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
18 write_evalue_output,
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
19 write_count_output,
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
20 write_taxa_clusters_output,
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
21 write_taxa_processed_output,
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
22 )
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
23
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
24 class TestCDHitAnalysis:
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
25 """Test class for CD-HIT cluster analysis processor using real XLSX test data."""
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
26
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
27 @pytest.fixture(scope="class")
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
28 def test_data_dir(self):
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
29 """Return path to the test-data directory with real XLSX files."""
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
30 base_dir = Path("Stage_1_translated/NLOOR_scripts/process_clusters_tool/test-data")
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
31 assert base_dir.exists(), f"Test data directory does not exist: {base_dir}"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
32 return base_dir
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
33
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
34 @pytest.fixture(scope="class")
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
35 def sample_cluster_file(self, test_data_dir):
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
36 """Return path to the sample cluster XLSX file."""
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
37 cluster_file = test_data_dir / "29-test.clstr.txt"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
38 assert cluster_file.exists(), f"Sample cluster file not found: {cluster_file}"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
39 return str(cluster_file)
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
40
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
41 @pytest.fixture(scope="class")
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
42 def sample_annotation_file(self, test_data_dir):
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
43 """Return path to the sample annotation XLSX file."""
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
44 annotation_file = test_data_dir / "header_anno_29_test.xlsx"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
45 assert annotation_file.exists(), f"Sample annotation file not found: {annotation_file}"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
46 return str(annotation_file)
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
47
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
48 @pytest.fixture(scope="class")
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
49 def parsed_clusters(self, sample_cluster_file, sample_annotation_file):
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
50 """Parse the sample cluster file with annotations."""
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
51 return parse_cluster_file(sample_cluster_file, sample_annotation_file)
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
52
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
53 def test_cluster_parsing_structure(self, parsed_clusters):
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
54 """
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
55 Test 1: Cluster File Parsing Structure
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
56
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
57 Verifies that cluster files are correctly parsed into the expected data structure
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
58 with proper extraction of headers, counts, similarities, and cluster groupings.
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
59 """
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
60 # Should have 4 clusters based on sample data
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
61 # for x in parsed_clusters: print(x);
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
62 assert len(parsed_clusters) == 24, f"Expected 24 clusters, got {len(parsed_clusters)}"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
63
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
64 # Test Cluster 0 structure (3 members)
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
65 cluster_0 = parsed_clusters[0]
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
66 assert len(cluster_0) == 41, "Cluster 0 should have 41 members"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
67 cluster_3 = parsed_clusters[3]
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
68 assert len(cluster_3) == 4, "Cluster 3 should have 4 members"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
69
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
70 # Check specific member data
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
71 assert 'M01687:476:000000000-LL5F5:1:2119:23468:21624_CONS' in cluster_0, "this read should be in cluster 0"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
72 read1_data = cluster_0['M01687:476:000000000-LL5F5:1:2119:23468:21624_CONS']
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
73 assert read1_data['count'] == 1, "read1 count should be 1"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
74 assert read1_data['similarity'] == 97.78, "read1 should be representative (100% similarity)"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
75 assert 'Viridiplantae / Streptophyta / Magnoliopsida / Ericales / Actinidiaceae / Uncertain taxa / Uncertain taxa' in read1_data['taxa'], "read1 should have this taxa"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
76
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
77 # Check non-representative member
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
78 assert 'M01687:476:000000000-LL5F5:1:1107:11168:7701_CONS' in cluster_0, "this read should be in cluster 0"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
79 read2_data = cluster_0['M01687:476:000000000-LL5F5:1:1107:11168:7701_CONS']
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
80 assert read2_data['count'] == 1, "read2 count should be 50"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
81 assert read2_data['similarity'] == 100, "read2 similarity should be 100%"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
82 assert read2_data['taxa'] == "Unannotated read"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
83
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
84 # Test single-member cluster (Cluster 2)
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
85 cluster_2 = parsed_clusters[2]
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
86 assert len(cluster_2) == 1, "Cluster 2 should have 1 member"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
87 assert 'M01687:476:000000000-LL5F5:1:2108:17627:10678_CONS' in cluster_2, "this read should be in cluster 2"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
88
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
89 print("✓ Test 1 PASSED: Cluster file parsing structure correct")
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
90
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
91 def test_annotation_integration(self, parsed_clusters):
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
92 """
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
93 Test 2: Annotation Integration
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
94
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
95 Verifies that annotations from the separate annotation file are correctly
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
96 matched to cluster members based on header names.
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
97 """
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
98 # Check that annotations were properly integrated
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
99 cluster_0 = parsed_clusters[0]
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
100
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
101 # Verify e-values are correctly assigned
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
102 assert cluster_0['M01687:476:000000000-LL5F5:1:1102:8813:1648_CONS']['evalue'] == 1.41e-39, "read1 e-value incorrect"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
103 assert cluster_0['M01687:476:000000000-LL5F5:1:1102:23329:6743_CONS']['evalue'] == 2.32e-37, "read2 e-value incorrect"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
104 assert cluster_0['M01687:476:000000000-LL5F5:1:1102:22397:8283_CONS']['evalue'] == 2.32e-37, "read3 e-value incorrect"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
105
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
106 # Verify taxa assignments
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
107 assert 'Viridiplantae / Streptophyta / Magnoliopsida / Ericales / Actinidiaceae / Uncertain taxa / Uncertain taxa' in cluster_0['M01687:476:000000000-LL5F5:1:1102:8813:1648_CONS']['taxa'], "read1 taxa incorrect"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
108 assert 'Viridiplantae / Streptophyta / Magnoliopsida / Ericales / Actinidiaceae / Uncertain taxa / Uncertain taxa' in cluster_0['M01687:476:000000000-LL5F5:1:1102:23329:6743_CONS']['taxa'], "read2 taxa incorrect"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
109 assert 'Viridiplantae / Streptophyta / Magnoliopsida / Ericales / Actinidiaceae / Uncertain taxa / Uncertain taxa' in cluster_0['M01687:476:000000000-LL5F5:1:1102:22397:8283_CONS']['taxa'], "read3 taxa incorrect"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
110
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
111 # Test missing annotation handling (if any reads lack annotations)
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
112 # All our test reads have annotations, so this tests the default case
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
113 for cluster in parsed_clusters:
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
114 for header, data in cluster.items():
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
115 if data['evalue'] == 'Unannotated read':
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
116 assert data['taxa'] == 'Unannotated read', "Unannotated handling incorrect"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
117
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
118 print("✓ Test 2 PASSED: Annotations correctly integrated with cluster data")
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
119
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
120 def test_cluster_data_processing(self, parsed_clusters):
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
121 """
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
122 Test 3: Cluster Data Processing
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
123
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
124 Tests the processing of individual clusters to extract evaluation lists,
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
125 similarity lists, and taxa dictionaries with correct count aggregation.
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
126 """
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
127 # Test processing of Cluster 0 (mixed taxa)
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
128 cluster_0 = parsed_clusters[0]
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
129 eval_list, simi_list, taxa_dict = process_cluster_data(cluster_0)
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
130
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
131 # Check eval_list structure
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
132 # for x in eval_list: print(x)
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
133 assert eval_list[0] == 2, "Two unannotated reads in this cluster, should be 2"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
134 assert len(eval_list) == 409, "Should have 409 annotated reads + 2 unnanotated reads (counted as 1)"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
135
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
136 # Check that e-values are correctly converted and repeated by count
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
137 eval_values = eval_list[1:] # Skip unannotated count
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
138 read1_evals = [e for e in eval_values if e == 1.41e-39]
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
139 assert len(read1_evals) == 365, "Should have 100 instances of read1's e-value"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
140
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
141 # # Check similarity list
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
142 # for x in simi_list: print(x)
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
143 assert len(simi_list) == 410, "Should have 410 similarity values"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
144 read1_similarities = [s for s in simi_list if s == 100.0]
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
145 assert len(read1_similarities) == 2, "Should have 2 instances of 100% similarity"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
146
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
147 assert taxa_dict['Unannotated read'] == 2, "Unannotated reads should be 2"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
148 assert taxa_dict['Viridiplantae / Streptophyta / Magnoliopsida / Ericales / Actinidiaceae / Uncertain taxa / Uncertain taxa'] == 406, "taxa should be 406"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
149 assert taxa_dict['Viridiplantae / Streptophyta / Magnoliopsida / Ericales / Uncertain taxa / Uncertain taxa / Uncertain taxa'] == 1, "taxa should be 1"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
150 assert taxa_dict['Viridiplantae / Streptophyta / Magnoliopsida / Ericales / Actinidiaceae / Actinidia / Actinidia kolomikta'] == 1, "taxa should be 1"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
151 print("✓ Test 3 PASSED: Cluster data processing produces correct aggregated data")
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
152
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
153 def test_taxa_calculation_simple_case(self, parsed_clusters):
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
154 """
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
155 Test 4: Taxa Calculation - Simple Case
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
156
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
157 Tests taxonomic resolution for clusters with clear dominant taxa
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
158 (single taxa or overwhelming majority).
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
159 """
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
160
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
161 # Create test arguments
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
162 class TestArgs:
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
163 uncertain_taxa_use_ratio = 0.5
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
164 min_to_split = 0.45
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
165 min_count_to_split = 10
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
166
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
167 args = TestArgs()
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
168
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
169 # Test Cluster 1 (should be clear Archaea)
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
170 cluster_5 = parsed_clusters[5]
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
171 _, _, taxa_dict_5 = process_cluster_data(cluster_5)
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
172
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
173 result_5 = calculate_cluster_taxa(taxa_dict_5, args)
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
174 # Should return single taxa group for Archaea
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
175 assert len(result_5) == 1, "Single dominant taxa should not split"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
176 dominant_taxa = list(result_5[0].keys())[0]
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
177 assert 'Viridiplantae / Streptophyta / Magnoliopsida / Fagales / Juglandaceae / ' \
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
178 'Uncertain taxa / Uncertain taxa' in dominant_taxa, "Should identify Juglandaceae as dominant"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
179
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
180 # Test single-member cluster (Cluster 2)
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
181 cluster_2 = parsed_clusters[2]
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
182 _, _, taxa_dict_2 = process_cluster_data(cluster_2)
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
183
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
184 result_2 = calculate_cluster_taxa(taxa_dict_2, args)
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
185 total = sum(value for d in result_2 for value in d.values())
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
186 assert total == 1, "Single member cluster should not split"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
187
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
188 print("✓ Test 4 PASSED: Simple taxa calculation cases work correctly")
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
189
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
190 def test_taxa_calculation_complex_splitting(self, parsed_clusters):
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
191 """
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
192 Test 5: Taxa Calculation - Complex Splitting
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
193
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
194 Tests the recursive taxonomic resolution algorithm for clusters with
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
195 multiple competing taxa that should be split based on thresholds.
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
196 """
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
197
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
198 class TestArgs:
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
199 uncertain_taxa_use_ratio = 0.5
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
200 min_to_split = 0.30 # Lower threshold to encourage splitting
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
201 min_count_to_split = 5 # Lower threshold to encourage splitting
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
202
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
203 args = TestArgs()
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
204
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
205 # Test Cluster 3 (mixed Firmicutes and Proteobacteria)
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
206 cluster_3 = parsed_clusters[3]
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
207 _, _, taxa_dict_3 = process_cluster_data(cluster_3)
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
208
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
209 # Manual check of expected taxa distribution
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
210 expected_taxa = {}
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
211 for header, data in cluster_3.items():
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
212 taxa = data['taxa']
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
213 count = data['count']
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
214 expected_taxa[taxa] = expected_taxa.get(taxa, 0) + count
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
215
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
216 result_3 = calculate_cluster_taxa(taxa_dict_3, args)
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
217
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
218 # With mixed taxa and low thresholds, should potentially split
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
219 # The exact behavior depends on the algorithm implementation
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
220 total_result_count = sum(sum(group.values()) for group in result_3)
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
221 expected_total = sum(expected_taxa.values())
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
222
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
223 assert total_result_count == expected_total, "Total counts should be preserved after splitting"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
224
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
225 print("✓ Test 5 PASSED: Complex taxa splitting preserves counts and follows thresholds")
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
226
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
227 def test_statistical_calculations(self, parsed_clusters):
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
228 """
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
229 Test 6: Statistical Calculations
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
230
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
231 Verifies that similarity and e-value statistics are calculated correctly
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
232 including averages, standard deviations, and distributions.
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
233 """
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
234 # Process all clusters to get combined data
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
235
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
236 eval_list, simi_list, _ = process_cluster_data(parsed_clusters[5])
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
237 # Test similarity statistics
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
238 if eval_list:
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
239 expected_avg = sum(simi_list) / len(simi_list)
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
240
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
241 # Manual verification of a few key values
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
242 # From our test data: read1=100% (100 times), read2=96.67% (50 times), etc.
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
243 total_similarity_sum = (100.0 * 166) + (98.88 * 9) + 98.86
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
244 total_count = 176
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
245 manual_avg = total_similarity_sum / total_count
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
246
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
247 assert abs(
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
248 expected_avg - manual_avg) < 0.01, f"Similarity average mismatch: expected ~{manual_avg}, got {expected_avg}"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
249
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
250 # Test e-value data structure
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
251 annotated_evals = eval_list[1:]
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
252 assert all(isinstance(e, (int, float)) for e in annotated_evals), "All e-values should be numeric"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
253 assert all(e > 0 for e in annotated_evals), "All e-values should be positive"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
254
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
255 print("✓ Test 6 PASSED: Statistical calculations are mathematically correct")
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
256
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
257 def test_output_file_formats(self, test_data_dir, sample_cluster_file, sample_annotation_file):
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
258 """
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
259 Test 7: Output File Formats
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
260
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
261 Tests that all output files are created with correct structure and content,
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
262 including text files, Excel files with multiple sheets, and plot files.
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
263 """
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
264 output_dir = test_data_dir
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
265
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
266 # Parse data
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
267 clusters = parse_cluster_file(sample_cluster_file, sample_annotation_file)
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
268
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
269 # Process all clusters
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
270 cluster_data_list = []
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
271 all_eval_data = [0]
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
272 all_simi_data = []
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
273
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
274 for cluster in clusters:
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
275 eval_list, simi_list, taxa_dict = process_cluster_data(cluster)
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
276 cluster_data_list.append((eval_list, simi_list, taxa_dict))
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
277 all_eval_data[0] += eval_list[0]
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
278 all_eval_data.extend(eval_list[1:])
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
279 all_simi_data.extend(simi_list)
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
280
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
281 # Test similarity output
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
282 simi_output = output_dir / "test_similarity.txt"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
283 write_similarity_output(all_simi_data, str(simi_output))
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
284
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
285 assert simi_output.exists(), "Similarity output file not created"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
286 with open(simi_output, 'r') as f:
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
287 content = f.read()
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
288 assert "# Average similarity:" in content, "Missing average similarity in output"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
289 assert "# Standard deviation:" in content, "Missing standard deviation in output"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
290 assert "similarity\tcount" in content, "Missing header in similarity output"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
291
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
292 # Test e-value output
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
293 eval_output = output_dir / "test_evalue.txt"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
294 write_evalue_output(all_eval_data, str(eval_output))
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
295
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
296 assert eval_output.exists(), "E-value output file not created"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
297 with open(eval_output, 'r') as f:
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
298 content = f.read()
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
299 assert "evalue\tcount" in content, "Missing header in e-value output"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
300
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
301 # Test count output
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
302 count_output = output_dir / "test_count.txt"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
303 write_count_output(all_eval_data, cluster_data_list, str(count_output))
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
304
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
305 assert count_output.exists(), "Count output file not created"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
306 with open(count_output, 'r') as f:
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
307 content = f.read()
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
308 assert "cluster\tunannotated\tannotated" in content, "Missing header in count output"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
309 assert "TOTAL\t" in content, "Missing total row in count output"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
310
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
311 # Test taxa clusters Excel output
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
312 taxa_clusters_output = output_dir / "test_taxa_clusters.xlsx"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
313 write_taxa_clusters_output(cluster_data_list, str(taxa_clusters_output))
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
314
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
315 assert taxa_clusters_output.exists(), "Taxa clusters Excel file not created"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
316 df = pd.read_excel(taxa_clusters_output, sheet_name='Raw_Taxa_Clusters')
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
317 expected_columns = ['cluster', 'count', 'taxa_full', 'kingdom', 'phylum', 'class', 'order', 'family', 'genus',
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
318 'species']
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
319 assert all(col in df.columns for col in expected_columns), "Missing columns in taxa clusters output"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
320
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
321 print("✓ Test 7 PASSED: All output file formats are correct and complete")
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
322
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
323 def test_taxa_processed_output_structure(self, test_data_dir, sample_cluster_file, sample_annotation_file):
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
324 """
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
325 Test 8: Processed Taxa Output Structure
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
326
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
327 Tests the complex processed taxa Excel output with multiple sheets
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
328 and parameter tracking.
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
329 """
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
330 output_dir = test_data_dir
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
331
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
332 class TestArgs:
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
333 uncertain_taxa_use_ratio = 0.6
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
334 min_to_split = 0.35
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
335 min_count_to_split = 15
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
336 show_unannotated_clusters = True
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
337
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
338 args = TestArgs()
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
339
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
340 # Parse and process data
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
341 clusters = parse_cluster_file(sample_cluster_file, sample_annotation_file)
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
342 cluster_data_list = []
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
343
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
344 for cluster in clusters:
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
345 eval_list, simi_list, taxa_dict = process_cluster_data(cluster)
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
346 cluster_data_list.append((eval_list, simi_list, taxa_dict))
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
347
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
348 # Test processed taxa output
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
349 processed_output = output_dir / "test_processed_taxa.xlsx"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
350 write_taxa_processed_output(cluster_data_list, args, str(processed_output))
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
351
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
352 assert processed_output.exists(), "Processed taxa Excel file not created"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
353
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
354 # Check multiple sheets exist
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
355 xl_file = pd.ExcelFile(processed_output)
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
356 expected_sheets = ['Processed_Taxa_Clusters', 'Settings']
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
357 assert all(sheet in xl_file.sheet_names for sheet in expected_sheets), "Missing sheets in processed taxa output"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
358
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
359 # Check main data sheet
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
360 df_main = pd.read_excel(processed_output, sheet_name='Processed_Taxa_Clusters')
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
361 expected_columns = ['cluster', 'count', 'taxa_full', 'kingdom', 'phylum', 'class', 'order', 'family', 'genus',
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
362 'species']
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
363 assert all(col in df_main.columns for col in expected_columns), "Missing columns in processed taxa sheet"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
364
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
365 # Check settings sheet
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
366 df_settings = pd.read_excel(processed_output, sheet_name='Settings')
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
367 assert 'Parameter' in df_settings.columns, "Missing Parameter column in settings"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
368 assert 'Value' in df_settings.columns, "Missing Value column in settings"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
369
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
370 # Verify settings values are recorded
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
371 settings_dict = dict(zip(df_settings['Parameter'], df_settings['Value']))
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
372 assert settings_dict['uncertain_taxa_use_ratio'] == 0.6, "Settings not correctly recorded"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
373 assert settings_dict['min_to_split'] == 0.35, "Settings not correctly recorded"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
374
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
375 print("✓ Test 8 PASSED: Processed taxa output has correct structure and settings tracking")
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
376
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
377 def test_edge_cases(self, test_data_dir):
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
378 """
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
379 Test 9: Edge Cases and Error Handling
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
380
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
381 Tests handling of edge cases like empty files, missing annotations,
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
382 single-member clusters, and malformed input data.
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
383 """
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
384 input_dir = test_data_dir
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
385
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
386 # Test empty cluster file
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
387 empty_cluster = input_dir / "empty_cluster.clstr"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
388 with open(empty_cluster, 'w') as f:
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
389 f.write("")
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
390
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
391 clusters_empty = parse_cluster_file(str(empty_cluster))
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
392 assert len(clusters_empty) == 0, "Empty cluster file should produce no clusters"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
393
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
394 # Test cluster file with no annotations
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
395 simple_cluster = input_dir / "simple_cluster.clstr"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
396 simple_cluster_content = """>Cluster 0
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
397 0 100nt, >read_no_anno:50... *
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
398 """
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
399 with open(simple_cluster, 'w') as f:
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
400 f.write(simple_cluster_content)
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
401
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
402 with pytest.raises(UnboundLocalError):
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
403 parse_cluster_file(str(simple_cluster), raise_on_error=True)
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
404
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
405 # Test malformed cluster entries (missing parts)
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
406 malformed_cluster = input_dir / "malformed_cluster.clstr"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
407 malformed_content = """>Cluster 0
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
408 0 100nt, >read1:50..._CONS(50) *
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
409 invalid_line_without_proper_format
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
410 1 90nt, >read2:25..._CONS(25) at /+/95%
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
411 """
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
412 annotations_malformed = input_dir / "test_pytest.xlsx"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
413 with open(malformed_cluster, 'w') as f:
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
414 f.write(malformed_content)
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
415
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
416 clusters_malformed = parse_cluster_file(str(malformed_cluster), str(annotations_malformed))
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
417 # Should still parse valid entries and skip invalid ones
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
418 assert len(clusters_malformed) == 1, "Should parse valid entries from malformed file"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
419 assert len(clusters_malformed[0]) == 2, "Should have 2 valid read"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
420 assert clusters_malformed[0]['read1:50..._CONS']['evalue'] == 1.0e-50
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
421 assert clusters_malformed[0]['read2:25..._CONS']['count'] == 25
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
422
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
423 print("✓ Test 9 PASSED: Edge cases handled gracefully without crashes")
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
424
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
425 def test_count_preservation_across_processing(self, parsed_clusters):
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
426 """
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
427 Test 10: Count Preservation Across Processing Pipeline
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
428
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
429 Verifies that read counts are preserved throughout the entire processing
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
430 pipeline from cluster parsing through taxa calculation to final output.
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
431 """
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
432 # Calculate expected total counts from original data
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
433 expected_total = 0
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
434 for cluster in parsed_clusters:
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
435 for header, data in cluster.items():
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
436 expected_total += data['count']
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
437
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
438 # Process through pipeline and verify counts at each stage
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
439 total_from_processing = 0
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
440 taxa_processing_totals = []
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
441
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
442 class TestArgs:
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
443 uncertain_taxa_use_ratio = 0.5
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
444 min_to_split = 0.45
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
445 min_count_to_split = 10
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
446
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
447 args = TestArgs()
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
448
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
449 for cluster in parsed_clusters:
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
450 eval_list, simi_list, taxa_dict = process_cluster_data(cluster)
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
451
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
452 # Check that cluster processing preserves counts
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
453 cluster_total = eval_list[0] + len(eval_list[1:]) # unannotated + annotated
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
454 cluster_expected = sum(data['count'] for data in cluster.values())
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
455 assert cluster_total == cluster_expected, f"Count mismatch in cluster processing: expected {cluster_expected}, got {cluster_total}"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
456
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
457 total_from_processing += cluster_total
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
458
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
459 # Check taxa calculation preserves counts
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
460 taxa_results = calculate_cluster_taxa(taxa_dict, args)
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
461 taxa_total = sum(sum(group.values()) for group in taxa_results)
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
462 taxa_processing_totals.append(taxa_total)
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
463
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
464 # Verify taxa dict total matches
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
465 taxa_dict_total = sum(taxa_dict.values())
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
466 assert taxa_total <= taxa_dict_total, f"Count mismatch in taxa calculation: expected {taxa_dict_total}, got {taxa_total}"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
467
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
468 # Final verification
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
469 assert total_from_processing == expected_total, f"Total count preservation failed: expected {expected_total}, got {total_from_processing}"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
470
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
471 # Verify sum of all taxa processing equals original
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
472 total_taxa_processed = sum(taxa_processing_totals)
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
473 assert total_taxa_processed <= expected_total, f"Taxa processing total mismatch: expected {expected_total}, got {total_taxa_processed}"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
474
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
475 print("✓ Test 10 PASSED: Read counts preserved throughout entire processing pipeline")
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
476
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
477 def test_11_parse_arguments_all_flags(self, tmp_path):
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
478 """
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
479 Test 11: Argument Parsing with All Flags
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
480
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
481 Ensures parse_arguments correctly handles all optional flags and values.
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
482 """
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
483 from Stage_1_translated.NLOOR_scripts.process_clusters_tool import cdhit_analysis as ca
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
484 args = ca.parse_arguments([
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
485 '--input_cluster', str(tmp_path / "dummy.clstr"),
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
486 '--simi_plot_y_min', '90',
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
487 '--simi_plot_y_max', '99',
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
488 '--uncertain_taxa_use_ratio', '0.3',
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
489 '--min_to_split', '0.2',
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
490 '--min_count_to_split', '5',
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
491 '--show_unannotated_clusters',
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
492 '--make_taxa_in_cluster_split',
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
493 '--print_empty_files'
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
494 ])
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
495 assert args.simi_plot_y_min == 90
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
496 assert args.print_empty_files is True
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
497
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
498 def test_12_process_cluster_data_valueerror(self):
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
499 """
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
500 Test 12: Process Cluster Data with Bad E-value
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
501
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
502 Ensures ValueError branches are handled and unannotated counts increase.
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
503 """
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
504 from Stage_1_translated.NLOOR_scripts.process_clusters_tool import cdhit_analysis as ca
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
505 cluster = {
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
506 "seq1": {"count": 1, "similarity": 95.0, "taxa": "taxonA", "evalue": "not_a_number"}
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
507 }
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
508 eval_list, simi_list, taxa_dict = ca.process_cluster_data(cluster)
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
509 assert eval_list[0] == 1 # unannotated read
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
510
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
511 def test_13_write_similarity_and_evalue_empty(self, tmp_path):
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
512 """
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
513 Test 13: Output Writers with Empty Data
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
514 """
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
515 from Stage_1_translated.NLOOR_scripts.process_clusters_tool import cdhit_analysis as ca
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
516 sim_file = tmp_path / "sim.txt"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
517 eval_file = tmp_path / "eval.txt"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
518
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
519 ca.write_similarity_output([], str(sim_file))
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
520 assert not sim_file.exists() or sim_file.read_text() == ""
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
521
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
522 ca.write_evalue_output([5], str(eval_file))
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
523 assert "unannotated" in eval_file.read_text()
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
524
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
525 def test_14_write_count_zero_and_taxa_clusters_incomplete(self, tmp_path):
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
526 """
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
527 Test 14: Count Writer with Zero Data and Taxa Clusters with Incomplete Taxa
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
528 """
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
529 from Stage_1_translated.NLOOR_scripts.process_clusters_tool import cdhit_analysis as ca
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
530 count_file = tmp_path / "count.txt"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
531 taxa_file = tmp_path / "taxa.xlsx"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
532
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
533 ca.write_count_output([0], [], str(count_file))
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
534 assert "TOTAL" in count_file.read_text()
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
535
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
536 cluster_data = [([0], [], {"bad": 1})]
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
537 ca.write_taxa_clusters_output(cluster_data, str(taxa_file))
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
538 assert taxa_file.exists()
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
539
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
540 def test_15_write_taxa_processed_uncertain_and_settings(self, tmp_path):
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
541 """
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
542 Test 15: Processed Taxa Output with Settings
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
543 """
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
544 from Stage_1_translated.NLOOR_scripts.process_clusters_tool import cdhit_analysis as ca
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
545
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
546 class Args:
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
547 uncertain_taxa_use_ratio = 0.5
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
548 min_to_split = 0.2
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
549 min_count_to_split = 2
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
550 show_unannotated_clusters = True
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
551
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
552 out_file = tmp_path / "processed.xlsx"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
553 cluster_data = [([0], [], {"Unannotated read": 2})]
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
554 ca.write_taxa_processed_output(cluster_data, Args(), str(out_file))
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
555 assert out_file.exists()
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
556
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
557 def test_16_create_evalue_plot_edge_cases(self, tmp_path):
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
558 """
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
559 Test 16: E-value Plot Edge Cases
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
560 """
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
561 from Stage_1_translated.NLOOR_scripts.process_clusters_tool import cdhit_analysis as ca
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
562 out = tmp_path / "plot.png"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
563
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
564 # Only unannotated
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
565 ca.create_evalue_plot([0], [0], str(out))
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
566 assert not out.exists() or out.stat().st_size == 0
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
567
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
568 # Empty after filtering
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
569 ca.create_evalue_plot([0, ], [], str(out))
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
570 assert not out.exists() or out.stat().st_size == 0
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
571
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
572 # With valid values
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
573 ca.create_evalue_plot([0, 1e-5, 1e-3], [2], str(out))
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
574 assert out.exists()
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
575
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
576 def test_17_main_runs_and_prints(self, tmp_path, capsys):
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
577 """
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
578 Test 17: Main Entry Point
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
579 """
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
580 from Stage_1_translated.NLOOR_scripts.process_clusters_tool import cdhit_analysis as ca
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
581 clstr = tmp_path / "simple.clstr"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
582 clstr.write_text(">Cluster 0\n0 100nt, >seq1... *\n")
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
583
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
584 out = tmp_path / "sim.txt"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
585 args = [
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
586 '--input_cluster', str(clstr),
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
587 '--output_similarity_txt', str(out)
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
588 ]
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
589 ca.main(args)
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
590 captured = capsys.readouterr()
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
591 assert "Processing complete" in captured.out
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
592
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
593
1
ff68835adb2b planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit d771f9fbfd42bcdeda1623d954550882a0863847-dirty
onnodg
parents: 0
diff changeset
594 def test_18a_prepare_evalue_histogram_valid_data(self):
0
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
595 """
1
ff68835adb2b planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit d771f9fbfd42bcdeda1623d954550882a0863847-dirty
onnodg
parents: 0
diff changeset
596 Test 18a: prepare_evalue_histogram returns correct counts/bins.
0
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
597 """
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
598 from Stage_1_translated.NLOOR_scripts.process_clusters_tool import cdhit_analysis as ca
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
599 counts, bins = ca.prepare_evalue_histogram([1e-5, 1e-3, 0.5], [])
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
600 assert counts.sum() == 3 # 3 entries counted
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
601 assert len(bins) == 51 # 50 bins => 51 edges
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
602
1
ff68835adb2b planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit d771f9fbfd42bcdeda1623d954550882a0863847-dirty
onnodg
parents: 0
diff changeset
603 def test_18b_prepare_evalue_histogram_empty(self):
0
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
604 """
1
ff68835adb2b planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit d771f9fbfd42bcdeda1623d954550882a0863847-dirty
onnodg
parents: 0
diff changeset
605 Test 18b: prepare_evalue_histogram with empty/invalid data returns (None, None).
0
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
606 """
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
607 from Stage_1_translated.NLOOR_scripts.process_clusters_tool import cdhit_analysis as ca
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
608 counts, bins = ca.prepare_evalue_histogram([0, None, "bad"], [])
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
609 assert counts is None
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
610 assert bins is None
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
611
1
ff68835adb2b planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit d771f9fbfd42bcdeda1623d954550882a0863847-dirty
onnodg
parents: 0
diff changeset
612 def test_18c_create_evalue_plot_creates_file_and_returns_data(self, tmp_path):
0
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
613 """
1
ff68835adb2b planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit d771f9fbfd42bcdeda1623d954550882a0863847-dirty
onnodg
parents: 0
diff changeset
614 Test 18c: create_evalue_plot saves a PNG and returns numeric data.
0
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
615 """
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
616 from Stage_1_translated.NLOOR_scripts.process_clusters_tool import cdhit_analysis as ca
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
617 out = tmp_path / "eval.png"
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
618 counts, bins = ca.create_evalue_plot_test([1e-5, 1e-3, 0.5], [], str(out))
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
619 assert out.exists()
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
620 assert counts.sum() == 3
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
621 assert len(bins) == 51
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
622
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
623
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
624 if __name__ == "__main__":
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
625 # Run all tests in this file
00d56396b32a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
626 pytest.main([__file__])