Mercurial > repos > onnodg > cdhit_analysis
annotate tests/test_cdhit_analysis.py @ 0:00d56396b32a draft
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
| author | onnodg |
|---|---|
| date | Tue, 14 Oct 2025 09:09:46 +0000 |
| parents | |
| children | ff68835adb2b |
| rev | line source |
|---|---|
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
1 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
2 Test suite for CD-HIT cluster analysis processor. |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
3 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
4 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
5 import pytest |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
6 from pathlib import Path |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
7 import pandas as pd |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
8 import os |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
9 import sys |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
10 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
11 # Add module path |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
12 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
13 from Stage_1_translated.NLOOR_scripts.process_clusters_tool.cdhit_analysis import ( |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
14 parse_cluster_file, |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
15 process_cluster_data, |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
16 calculate_cluster_taxa, |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
17 write_similarity_output, |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
18 write_evalue_output, |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
19 write_count_output, |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
20 write_taxa_clusters_output, |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
21 write_taxa_processed_output, |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
22 ) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
23 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
24 class TestCDHitAnalysis: |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
25 """Test class for CD-HIT cluster analysis processor using real XLSX test data.""" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
26 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
27 @pytest.fixture(scope="class") |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
28 def test_data_dir(self): |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
29 """Return path to the test-data directory with real XLSX files.""" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
30 base_dir = Path("Stage_1_translated/NLOOR_scripts/process_clusters_tool/test-data") |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
31 assert base_dir.exists(), f"Test data directory does not exist: {base_dir}" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
32 return base_dir |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
33 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
34 @pytest.fixture(scope="class") |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
35 def sample_cluster_file(self, test_data_dir): |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
36 """Return path to the sample cluster XLSX file.""" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
37 cluster_file = test_data_dir / "29-test.clstr.txt" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
38 assert cluster_file.exists(), f"Sample cluster file not found: {cluster_file}" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
39 return str(cluster_file) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
40 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
41 @pytest.fixture(scope="class") |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
42 def sample_annotation_file(self, test_data_dir): |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
43 """Return path to the sample annotation XLSX file.""" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
44 annotation_file = test_data_dir / "header_anno_29_test.xlsx" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
45 assert annotation_file.exists(), f"Sample annotation file not found: {annotation_file}" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
46 return str(annotation_file) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
47 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
48 @pytest.fixture(scope="class") |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
49 def parsed_clusters(self, sample_cluster_file, sample_annotation_file): |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
50 """Parse the sample cluster file with annotations.""" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
51 return parse_cluster_file(sample_cluster_file, sample_annotation_file) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
52 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
53 def test_cluster_parsing_structure(self, parsed_clusters): |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
54 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
55 Test 1: Cluster File Parsing Structure |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
56 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
57 Verifies that cluster files are correctly parsed into the expected data structure |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
58 with proper extraction of headers, counts, similarities, and cluster groupings. |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
59 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
60 # Should have 4 clusters based on sample data |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
61 # for x in parsed_clusters: print(x); |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
62 assert len(parsed_clusters) == 24, f"Expected 24 clusters, got {len(parsed_clusters)}" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
63 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
64 # Test Cluster 0 structure (3 members) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
65 cluster_0 = parsed_clusters[0] |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
66 assert len(cluster_0) == 41, "Cluster 0 should have 41 members" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
67 cluster_3 = parsed_clusters[3] |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
68 assert len(cluster_3) == 4, "Cluster 3 should have 4 members" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
69 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
70 # Check specific member data |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
71 assert 'M01687:476:000000000-LL5F5:1:2119:23468:21624_CONS' in cluster_0, "this read should be in cluster 0" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
72 read1_data = cluster_0['M01687:476:000000000-LL5F5:1:2119:23468:21624_CONS'] |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
73 assert read1_data['count'] == 1, "read1 count should be 1" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
74 assert read1_data['similarity'] == 97.78, "read1 should be representative (100% similarity)" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
75 assert 'Viridiplantae / Streptophyta / Magnoliopsida / Ericales / Actinidiaceae / Uncertain taxa / Uncertain taxa' in read1_data['taxa'], "read1 should have this taxa" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
76 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
77 # Check non-representative member |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
78 assert 'M01687:476:000000000-LL5F5:1:1107:11168:7701_CONS' in cluster_0, "this read should be in cluster 0" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
79 read2_data = cluster_0['M01687:476:000000000-LL5F5:1:1107:11168:7701_CONS'] |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
80 assert read2_data['count'] == 1, "read2 count should be 50" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
81 assert read2_data['similarity'] == 100, "read2 similarity should be 100%" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
82 assert read2_data['taxa'] == "Unannotated read" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
83 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
84 # Test single-member cluster (Cluster 2) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
85 cluster_2 = parsed_clusters[2] |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
86 assert len(cluster_2) == 1, "Cluster 2 should have 1 member" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
87 assert 'M01687:476:000000000-LL5F5:1:2108:17627:10678_CONS' in cluster_2, "this read should be in cluster 2" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
88 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
89 print("✓ Test 1 PASSED: Cluster file parsing structure correct") |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
90 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
91 def test_annotation_integration(self, parsed_clusters): |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
92 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
93 Test 2: Annotation Integration |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
94 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
95 Verifies that annotations from the separate annotation file are correctly |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
96 matched to cluster members based on header names. |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
97 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
98 # Check that annotations were properly integrated |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
99 cluster_0 = parsed_clusters[0] |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
100 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
101 # Verify e-values are correctly assigned |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
102 assert cluster_0['M01687:476:000000000-LL5F5:1:1102:8813:1648_CONS']['evalue'] == 1.41e-39, "read1 e-value incorrect" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
103 assert cluster_0['M01687:476:000000000-LL5F5:1:1102:23329:6743_CONS']['evalue'] == 2.32e-37, "read2 e-value incorrect" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
104 assert cluster_0['M01687:476:000000000-LL5F5:1:1102:22397:8283_CONS']['evalue'] == 2.32e-37, "read3 e-value incorrect" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
105 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
106 # Verify taxa assignments |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
107 assert 'Viridiplantae / Streptophyta / Magnoliopsida / Ericales / Actinidiaceae / Uncertain taxa / Uncertain taxa' in cluster_0['M01687:476:000000000-LL5F5:1:1102:8813:1648_CONS']['taxa'], "read1 taxa incorrect" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
108 assert 'Viridiplantae / Streptophyta / Magnoliopsida / Ericales / Actinidiaceae / Uncertain taxa / Uncertain taxa' in cluster_0['M01687:476:000000000-LL5F5:1:1102:23329:6743_CONS']['taxa'], "read2 taxa incorrect" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
109 assert 'Viridiplantae / Streptophyta / Magnoliopsida / Ericales / Actinidiaceae / Uncertain taxa / Uncertain taxa' in cluster_0['M01687:476:000000000-LL5F5:1:1102:22397:8283_CONS']['taxa'], "read3 taxa incorrect" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
110 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
111 # Test missing annotation handling (if any reads lack annotations) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
112 # All our test reads have annotations, so this tests the default case |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
113 for cluster in parsed_clusters: |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
114 for header, data in cluster.items(): |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
115 if data['evalue'] == 'Unannotated read': |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
116 assert data['taxa'] == 'Unannotated read', "Unannotated handling incorrect" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
117 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
118 print("✓ Test 2 PASSED: Annotations correctly integrated with cluster data") |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
119 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
120 def test_cluster_data_processing(self, parsed_clusters): |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
121 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
122 Test 3: Cluster Data Processing |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
123 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
124 Tests the processing of individual clusters to extract evaluation lists, |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
125 similarity lists, and taxa dictionaries with correct count aggregation. |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
126 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
127 # Test processing of Cluster 0 (mixed taxa) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
128 cluster_0 = parsed_clusters[0] |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
129 eval_list, simi_list, taxa_dict = process_cluster_data(cluster_0) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
130 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
131 # Check eval_list structure |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
132 # for x in eval_list: print(x) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
133 assert eval_list[0] == 2, "Two unannotated reads in this cluster, should be 2" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
134 assert len(eval_list) == 409, "Should have 409 annotated reads + 2 unnanotated reads (counted as 1)" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
135 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
136 # Check that e-values are correctly converted and repeated by count |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
137 eval_values = eval_list[1:] # Skip unannotated count |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
138 read1_evals = [e for e in eval_values if e == 1.41e-39] |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
139 assert len(read1_evals) == 365, "Should have 100 instances of read1's e-value" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
140 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
141 # # Check similarity list |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
142 # for x in simi_list: print(x) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
143 assert len(simi_list) == 410, "Should have 410 similarity values" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
144 read1_similarities = [s for s in simi_list if s == 100.0] |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
145 assert len(read1_similarities) == 2, "Should have 2 instances of 100% similarity" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
146 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
147 assert taxa_dict['Unannotated read'] == 2, "Unannotated reads should be 2" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
148 assert taxa_dict['Viridiplantae / Streptophyta / Magnoliopsida / Ericales / Actinidiaceae / Uncertain taxa / Uncertain taxa'] == 406, "taxa should be 406" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
149 assert taxa_dict['Viridiplantae / Streptophyta / Magnoliopsida / Ericales / Uncertain taxa / Uncertain taxa / Uncertain taxa'] == 1, "taxa should be 1" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
150 assert taxa_dict['Viridiplantae / Streptophyta / Magnoliopsida / Ericales / Actinidiaceae / Actinidia / Actinidia kolomikta'] == 1, "taxa should be 1" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
151 print("✓ Test 3 PASSED: Cluster data processing produces correct aggregated data") |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
152 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
153 def test_taxa_calculation_simple_case(self, parsed_clusters): |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
154 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
155 Test 4: Taxa Calculation - Simple Case |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
156 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
157 Tests taxonomic resolution for clusters with clear dominant taxa |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
158 (single taxa or overwhelming majority). |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
159 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
160 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
161 # Create test arguments |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
162 class TestArgs: |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
163 uncertain_taxa_use_ratio = 0.5 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
164 min_to_split = 0.45 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
165 min_count_to_split = 10 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
166 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
167 args = TestArgs() |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
168 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
169 # Test Cluster 1 (should be clear Archaea) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
170 cluster_5 = parsed_clusters[5] |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
171 _, _, taxa_dict_5 = process_cluster_data(cluster_5) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
172 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
173 result_5 = calculate_cluster_taxa(taxa_dict_5, args) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
174 # Should return single taxa group for Archaea |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
175 assert len(result_5) == 1, "Single dominant taxa should not split" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
176 dominant_taxa = list(result_5[0].keys())[0] |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
177 assert 'Viridiplantae / Streptophyta / Magnoliopsida / Fagales / Juglandaceae / ' \ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
178 'Uncertain taxa / Uncertain taxa' in dominant_taxa, "Should identify Juglandaceae as dominant" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
179 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
180 # Test single-member cluster (Cluster 2) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
181 cluster_2 = parsed_clusters[2] |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
182 _, _, taxa_dict_2 = process_cluster_data(cluster_2) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
183 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
184 result_2 = calculate_cluster_taxa(taxa_dict_2, args) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
185 total = sum(value for d in result_2 for value in d.values()) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
186 assert total == 1, "Single member cluster should not split" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
187 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
188 print("✓ Test 4 PASSED: Simple taxa calculation cases work correctly") |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
189 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
190 def test_taxa_calculation_complex_splitting(self, parsed_clusters): |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
191 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
192 Test 5: Taxa Calculation - Complex Splitting |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
193 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
194 Tests the recursive taxonomic resolution algorithm for clusters with |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
195 multiple competing taxa that should be split based on thresholds. |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
196 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
197 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
198 class TestArgs: |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
199 uncertain_taxa_use_ratio = 0.5 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
200 min_to_split = 0.30 # Lower threshold to encourage splitting |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
201 min_count_to_split = 5 # Lower threshold to encourage splitting |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
202 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
203 args = TestArgs() |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
204 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
205 # Test Cluster 3 (mixed Firmicutes and Proteobacteria) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
206 cluster_3 = parsed_clusters[3] |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
207 _, _, taxa_dict_3 = process_cluster_data(cluster_3) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
208 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
209 # Manual check of expected taxa distribution |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
210 expected_taxa = {} |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
211 for header, data in cluster_3.items(): |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
212 taxa = data['taxa'] |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
213 count = data['count'] |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
214 expected_taxa[taxa] = expected_taxa.get(taxa, 0) + count |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
215 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
216 result_3 = calculate_cluster_taxa(taxa_dict_3, args) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
217 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
218 # With mixed taxa and low thresholds, should potentially split |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
219 # The exact behavior depends on the algorithm implementation |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
220 total_result_count = sum(sum(group.values()) for group in result_3) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
221 expected_total = sum(expected_taxa.values()) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
222 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
223 assert total_result_count == expected_total, "Total counts should be preserved after splitting" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
224 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
225 print("✓ Test 5 PASSED: Complex taxa splitting preserves counts and follows thresholds") |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
226 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
227 def test_statistical_calculations(self, parsed_clusters): |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
228 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
229 Test 6: Statistical Calculations |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
230 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
231 Verifies that similarity and e-value statistics are calculated correctly |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
232 including averages, standard deviations, and distributions. |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
233 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
234 # Process all clusters to get combined data |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
235 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
236 eval_list, simi_list, _ = process_cluster_data(parsed_clusters[5]) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
237 # Test similarity statistics |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
238 if eval_list: |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
239 expected_avg = sum(simi_list) / len(simi_list) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
240 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
241 # Manual verification of a few key values |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
242 # From our test data: read1=100% (100 times), read2=96.67% (50 times), etc. |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
243 total_similarity_sum = (100.0 * 166) + (98.88 * 9) + 98.86 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
244 total_count = 176 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
245 manual_avg = total_similarity_sum / total_count |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
246 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
247 assert abs( |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
248 expected_avg - manual_avg) < 0.01, f"Similarity average mismatch: expected ~{manual_avg}, got {expected_avg}" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
249 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
250 # Test e-value data structure |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
251 annotated_evals = eval_list[1:] |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
252 assert all(isinstance(e, (int, float)) for e in annotated_evals), "All e-values should be numeric" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
253 assert all(e > 0 for e in annotated_evals), "All e-values should be positive" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
254 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
255 print("✓ Test 6 PASSED: Statistical calculations are mathematically correct") |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
256 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
257 def test_output_file_formats(self, test_data_dir, sample_cluster_file, sample_annotation_file): |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
258 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
259 Test 7: Output File Formats |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
260 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
261 Tests that all output files are created with correct structure and content, |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
262 including text files, Excel files with multiple sheets, and plot files. |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
263 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
264 output_dir = test_data_dir |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
265 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
266 # Parse data |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
267 clusters = parse_cluster_file(sample_cluster_file, sample_annotation_file) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
268 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
269 # Process all clusters |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
270 cluster_data_list = [] |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
271 all_eval_data = [0] |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
272 all_simi_data = [] |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
273 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
274 for cluster in clusters: |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
275 eval_list, simi_list, taxa_dict = process_cluster_data(cluster) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
276 cluster_data_list.append((eval_list, simi_list, taxa_dict)) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
277 all_eval_data[0] += eval_list[0] |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
278 all_eval_data.extend(eval_list[1:]) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
279 all_simi_data.extend(simi_list) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
280 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
281 # Test similarity output |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
282 simi_output = output_dir / "test_similarity.txt" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
283 write_similarity_output(all_simi_data, str(simi_output)) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
284 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
285 assert simi_output.exists(), "Similarity output file not created" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
286 with open(simi_output, 'r') as f: |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
287 content = f.read() |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
288 assert "# Average similarity:" in content, "Missing average similarity in output" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
289 assert "# Standard deviation:" in content, "Missing standard deviation in output" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
290 assert "similarity\tcount" in content, "Missing header in similarity output" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
291 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
292 # Test e-value output |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
293 eval_output = output_dir / "test_evalue.txt" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
294 write_evalue_output(all_eval_data, str(eval_output)) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
295 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
296 assert eval_output.exists(), "E-value output file not created" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
297 with open(eval_output, 'r') as f: |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
298 content = f.read() |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
299 assert "evalue\tcount" in content, "Missing header in e-value output" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
300 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
301 # Test count output |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
302 count_output = output_dir / "test_count.txt" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
303 write_count_output(all_eval_data, cluster_data_list, str(count_output)) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
304 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
305 assert count_output.exists(), "Count output file not created" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
306 with open(count_output, 'r') as f: |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
307 content = f.read() |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
308 assert "cluster\tunannotated\tannotated" in content, "Missing header in count output" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
309 assert "TOTAL\t" in content, "Missing total row in count output" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
310 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
311 # Test taxa clusters Excel output |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
312 taxa_clusters_output = output_dir / "test_taxa_clusters.xlsx" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
313 write_taxa_clusters_output(cluster_data_list, str(taxa_clusters_output)) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
314 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
315 assert taxa_clusters_output.exists(), "Taxa clusters Excel file not created" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
316 df = pd.read_excel(taxa_clusters_output, sheet_name='Raw_Taxa_Clusters') |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
317 expected_columns = ['cluster', 'count', 'taxa_full', 'kingdom', 'phylum', 'class', 'order', 'family', 'genus', |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
318 'species'] |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
319 assert all(col in df.columns for col in expected_columns), "Missing columns in taxa clusters output" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
320 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
321 print("✓ Test 7 PASSED: All output file formats are correct and complete") |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
322 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
323 def test_taxa_processed_output_structure(self, test_data_dir, sample_cluster_file, sample_annotation_file): |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
324 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
325 Test 8: Processed Taxa Output Structure |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
326 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
327 Tests the complex processed taxa Excel output with multiple sheets |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
328 and parameter tracking. |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
329 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
330 output_dir = test_data_dir |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
331 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
332 class TestArgs: |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
333 uncertain_taxa_use_ratio = 0.6 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
334 min_to_split = 0.35 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
335 min_count_to_split = 15 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
336 show_unannotated_clusters = True |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
337 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
338 args = TestArgs() |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
339 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
340 # Parse and process data |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
341 clusters = parse_cluster_file(sample_cluster_file, sample_annotation_file) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
342 cluster_data_list = [] |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
343 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
344 for cluster in clusters: |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
345 eval_list, simi_list, taxa_dict = process_cluster_data(cluster) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
346 cluster_data_list.append((eval_list, simi_list, taxa_dict)) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
347 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
348 # Test processed taxa output |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
349 processed_output = output_dir / "test_processed_taxa.xlsx" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
350 write_taxa_processed_output(cluster_data_list, args, str(processed_output)) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
351 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
352 assert processed_output.exists(), "Processed taxa Excel file not created" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
353 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
354 # Check multiple sheets exist |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
355 xl_file = pd.ExcelFile(processed_output) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
356 expected_sheets = ['Processed_Taxa_Clusters', 'Settings'] |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
357 assert all(sheet in xl_file.sheet_names for sheet in expected_sheets), "Missing sheets in processed taxa output" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
358 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
359 # Check main data sheet |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
360 df_main = pd.read_excel(processed_output, sheet_name='Processed_Taxa_Clusters') |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
361 expected_columns = ['cluster', 'count', 'taxa_full', 'kingdom', 'phylum', 'class', 'order', 'family', 'genus', |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
362 'species'] |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
363 assert all(col in df_main.columns for col in expected_columns), "Missing columns in processed taxa sheet" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
364 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
365 # Check settings sheet |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
366 df_settings = pd.read_excel(processed_output, sheet_name='Settings') |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
367 assert 'Parameter' in df_settings.columns, "Missing Parameter column in settings" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
368 assert 'Value' in df_settings.columns, "Missing Value column in settings" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
369 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
370 # Verify settings values are recorded |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
371 settings_dict = dict(zip(df_settings['Parameter'], df_settings['Value'])) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
372 assert settings_dict['uncertain_taxa_use_ratio'] == 0.6, "Settings not correctly recorded" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
373 assert settings_dict['min_to_split'] == 0.35, "Settings not correctly recorded" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
374 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
375 print("✓ Test 8 PASSED: Processed taxa output has correct structure and settings tracking") |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
376 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
377 def test_edge_cases(self, test_data_dir): |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
378 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
379 Test 9: Edge Cases and Error Handling |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
380 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
381 Tests handling of edge cases like empty files, missing annotations, |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
382 single-member clusters, and malformed input data. |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
383 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
384 input_dir = test_data_dir |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
385 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
386 # Test empty cluster file |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
387 empty_cluster = input_dir / "empty_cluster.clstr" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
388 with open(empty_cluster, 'w') as f: |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
389 f.write("") |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
390 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
391 clusters_empty = parse_cluster_file(str(empty_cluster)) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
392 assert len(clusters_empty) == 0, "Empty cluster file should produce no clusters" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
393 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
394 # Test cluster file with no annotations |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
395 simple_cluster = input_dir / "simple_cluster.clstr" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
396 simple_cluster_content = """>Cluster 0 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
397 0 100nt, >read_no_anno:50... * |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
398 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
399 with open(simple_cluster, 'w') as f: |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
400 f.write(simple_cluster_content) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
401 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
402 with pytest.raises(UnboundLocalError): |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
403 parse_cluster_file(str(simple_cluster), raise_on_error=True) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
404 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
405 # Test malformed cluster entries (missing parts) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
406 malformed_cluster = input_dir / "malformed_cluster.clstr" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
407 malformed_content = """>Cluster 0 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
408 0 100nt, >read1:50..._CONS(50) * |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
409 invalid_line_without_proper_format |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
410 1 90nt, >read2:25..._CONS(25) at /+/95% |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
411 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
412 annotations_malformed = input_dir / "test_pytest.xlsx" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
413 with open(malformed_cluster, 'w') as f: |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
414 f.write(malformed_content) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
415 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
416 clusters_malformed = parse_cluster_file(str(malformed_cluster), str(annotations_malformed)) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
417 # Should still parse valid entries and skip invalid ones |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
418 assert len(clusters_malformed) == 1, "Should parse valid entries from malformed file" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
419 assert len(clusters_malformed[0]) == 2, "Should have 2 valid read" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
420 assert clusters_malformed[0]['read1:50..._CONS']['evalue'] == 1.0e-50 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
421 assert clusters_malformed[0]['read2:25..._CONS']['count'] == 25 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
422 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
423 print("✓ Test 9 PASSED: Edge cases handled gracefully without crashes") |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
424 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
425 def test_count_preservation_across_processing(self, parsed_clusters): |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
426 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
427 Test 10: Count Preservation Across Processing Pipeline |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
428 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
429 Verifies that read counts are preserved throughout the entire processing |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
430 pipeline from cluster parsing through taxa calculation to final output. |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
431 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
432 # Calculate expected total counts from original data |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
433 expected_total = 0 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
434 for cluster in parsed_clusters: |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
435 for header, data in cluster.items(): |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
436 expected_total += data['count'] |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
437 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
438 # Process through pipeline and verify counts at each stage |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
439 total_from_processing = 0 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
440 taxa_processing_totals = [] |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
441 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
442 class TestArgs: |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
443 uncertain_taxa_use_ratio = 0.5 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
444 min_to_split = 0.45 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
445 min_count_to_split = 10 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
446 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
447 args = TestArgs() |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
448 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
449 for cluster in parsed_clusters: |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
450 eval_list, simi_list, taxa_dict = process_cluster_data(cluster) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
451 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
452 # Check that cluster processing preserves counts |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
453 cluster_total = eval_list[0] + len(eval_list[1:]) # unannotated + annotated |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
454 cluster_expected = sum(data['count'] for data in cluster.values()) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
455 assert cluster_total == cluster_expected, f"Count mismatch in cluster processing: expected {cluster_expected}, got {cluster_total}" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
456 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
457 total_from_processing += cluster_total |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
458 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
459 # Check taxa calculation preserves counts |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
460 taxa_results = calculate_cluster_taxa(taxa_dict, args) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
461 taxa_total = sum(sum(group.values()) for group in taxa_results) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
462 taxa_processing_totals.append(taxa_total) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
463 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
464 # Verify taxa dict total matches |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
465 taxa_dict_total = sum(taxa_dict.values()) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
466 assert taxa_total <= taxa_dict_total, f"Count mismatch in taxa calculation: expected {taxa_dict_total}, got {taxa_total}" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
467 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
468 # Final verification |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
469 assert total_from_processing == expected_total, f"Total count preservation failed: expected {expected_total}, got {total_from_processing}" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
470 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
471 # Verify sum of all taxa processing equals original |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
472 total_taxa_processed = sum(taxa_processing_totals) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
473 assert total_taxa_processed <= expected_total, f"Taxa processing total mismatch: expected {expected_total}, got {total_taxa_processed}" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
474 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
475 print("✓ Test 10 PASSED: Read counts preserved throughout entire processing pipeline") |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
476 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
477 def test_11_parse_arguments_all_flags(self, tmp_path): |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
478 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
479 Test 11: Argument Parsing with All Flags |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
480 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
481 Ensures parse_arguments correctly handles all optional flags and values. |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
482 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
483 from Stage_1_translated.NLOOR_scripts.process_clusters_tool import cdhit_analysis as ca |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
484 args = ca.parse_arguments([ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
485 '--input_cluster', str(tmp_path / "dummy.clstr"), |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
486 '--simi_plot_y_min', '90', |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
487 '--simi_plot_y_max', '99', |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
488 '--uncertain_taxa_use_ratio', '0.3', |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
489 '--min_to_split', '0.2', |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
490 '--min_count_to_split', '5', |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
491 '--show_unannotated_clusters', |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
492 '--make_taxa_in_cluster_split', |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
493 '--print_empty_files' |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
494 ]) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
495 assert args.simi_plot_y_min == 90 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
496 assert args.print_empty_files is True |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
497 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
498 def test_12_process_cluster_data_valueerror(self): |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
499 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
500 Test 12: Process Cluster Data with Bad E-value |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
501 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
502 Ensures ValueError branches are handled and unannotated counts increase. |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
503 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
504 from Stage_1_translated.NLOOR_scripts.process_clusters_tool import cdhit_analysis as ca |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
505 cluster = { |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
506 "seq1": {"count": 1, "similarity": 95.0, "taxa": "taxonA", "evalue": "not_a_number"} |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
507 } |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
508 eval_list, simi_list, taxa_dict = ca.process_cluster_data(cluster) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
509 assert eval_list[0] == 1 # unannotated read |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
510 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
511 def test_13_write_similarity_and_evalue_empty(self, tmp_path): |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
512 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
513 Test 13: Output Writers with Empty Data |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
514 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
515 from Stage_1_translated.NLOOR_scripts.process_clusters_tool import cdhit_analysis as ca |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
516 sim_file = tmp_path / "sim.txt" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
517 eval_file = tmp_path / "eval.txt" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
518 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
519 ca.write_similarity_output([], str(sim_file)) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
520 assert not sim_file.exists() or sim_file.read_text() == "" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
521 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
522 ca.write_evalue_output([5], str(eval_file)) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
523 assert "unannotated" in eval_file.read_text() |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
524 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
525 def test_14_write_count_zero_and_taxa_clusters_incomplete(self, tmp_path): |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
526 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
527 Test 14: Count Writer with Zero Data and Taxa Clusters with Incomplete Taxa |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
528 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
529 from Stage_1_translated.NLOOR_scripts.process_clusters_tool import cdhit_analysis as ca |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
530 count_file = tmp_path / "count.txt" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
531 taxa_file = tmp_path / "taxa.xlsx" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
532 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
533 ca.write_count_output([0], [], str(count_file)) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
534 assert "TOTAL" in count_file.read_text() |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
535 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
536 cluster_data = [([0], [], {"bad": 1})] |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
537 ca.write_taxa_clusters_output(cluster_data, str(taxa_file)) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
538 assert taxa_file.exists() |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
539 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
540 def test_15_write_taxa_processed_uncertain_and_settings(self, tmp_path): |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
541 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
542 Test 15: Processed Taxa Output with Settings |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
543 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
544 from Stage_1_translated.NLOOR_scripts.process_clusters_tool import cdhit_analysis as ca |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
545 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
546 class Args: |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
547 uncertain_taxa_use_ratio = 0.5 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
548 min_to_split = 0.2 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
549 min_count_to_split = 2 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
550 show_unannotated_clusters = True |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
551 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
552 out_file = tmp_path / "processed.xlsx" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
553 cluster_data = [([0], [], {"Unannotated read": 2})] |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
554 ca.write_taxa_processed_output(cluster_data, Args(), str(out_file)) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
555 assert out_file.exists() |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
556 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
557 def test_16_create_evalue_plot_edge_cases(self, tmp_path): |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
558 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
559 Test 16: E-value Plot Edge Cases |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
560 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
561 from Stage_1_translated.NLOOR_scripts.process_clusters_tool import cdhit_analysis as ca |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
562 out = tmp_path / "plot.png" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
563 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
564 # Only unannotated |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
565 ca.create_evalue_plot([0], [0], str(out)) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
566 assert not out.exists() or out.stat().st_size == 0 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
567 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
568 # Empty after filtering |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
569 ca.create_evalue_plot([0, ], [], str(out)) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
570 assert not out.exists() or out.stat().st_size == 0 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
571 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
572 # With valid values |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
573 ca.create_evalue_plot([0, 1e-5, 1e-3], [2], str(out)) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
574 assert out.exists() |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
575 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
576 def test_17_main_runs_and_prints(self, tmp_path, capsys): |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
577 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
578 Test 17: Main Entry Point |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
579 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
580 from Stage_1_translated.NLOOR_scripts.process_clusters_tool import cdhit_analysis as ca |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
581 clstr = tmp_path / "simple.clstr" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
582 clstr.write_text(">Cluster 0\n0 100nt, >seq1... *\n") |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
583 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
584 out = tmp_path / "sim.txt" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
585 args = [ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
586 '--input_cluster', str(clstr), |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
587 '--output_similarity_txt', str(out) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
588 ] |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
589 ca.main(args) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
590 captured = capsys.readouterr() |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
591 assert "Processing complete" in captured.out |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
592 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
593 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
594 def test_16a_prepare_evalue_histogram_valid_data(self): |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
595 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
596 Test 16a: prepare_evalue_histogram returns correct counts/bins. |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
597 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
598 from Stage_1_translated.NLOOR_scripts.process_clusters_tool import cdhit_analysis as ca |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
599 counts, bins = ca.prepare_evalue_histogram([1e-5, 1e-3, 0.5], []) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
600 assert counts.sum() == 3 # 3 entries counted |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
601 assert len(bins) == 51 # 50 bins => 51 edges |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
602 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
603 def test_16b_prepare_evalue_histogram_empty(self): |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
604 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
605 Test 16b: prepare_evalue_histogram with empty/invalid data returns (None, None). |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
606 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
607 from Stage_1_translated.NLOOR_scripts.process_clusters_tool import cdhit_analysis as ca |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
608 counts, bins = ca.prepare_evalue_histogram([0, None, "bad"], []) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
609 assert counts is None |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
610 assert bins is None |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
611 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
612 def test_16c_create_evalue_plot_creates_file_and_returns_data(self, tmp_path): |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
613 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
614 Test 16c: create_evalue_plot saves a PNG and returns numeric data. |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
615 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
616 from Stage_1_translated.NLOOR_scripts.process_clusters_tool import cdhit_analysis as ca |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
617 out = tmp_path / "eval.png" |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
618 counts, bins = ca.create_evalue_plot_test([1e-5, 1e-3, 0.5], [], str(out)) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
619 assert out.exists() |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
620 assert counts.sum() == 3 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
621 assert len(bins) == 51 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
622 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
623 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
624 if __name__ == "__main__": |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
625 # Run all tests in this file |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
626 pytest.main([__file__]) |
