Mercurial > repos > onnodg > cdhit_analysis
annotate tests/test_cdhit_analysis.py @ 4:e64af72e1b8f draft default tip
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
| author | onnodg |
|---|---|
| date | Mon, 15 Dec 2025 16:44:40 +0000 |
| parents | ff68835adb2b |
| children |
| rev | line source |
|---|---|
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
1 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
2 Test suite for CD-HIT cluster analysis processor. |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
3 """ |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
4 import pytest |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
5 from pathlib import Path |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
6 import pandas as pd |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
7 import os |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
8 import sys |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
9 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
10 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
11 from Stage_1_translated.NLOOR_scripts.process_clusters_tool.cdhit_analysis import ( |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
12 parse_cluster_file, |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
13 process_cluster_data, |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
14 calculate_cluster_taxa, |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
15 write_similarity_output, |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
16 write_count_output, |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
17 write_taxa_excel, |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
18 ) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
19 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
20 |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
21 class TestCDHitAnalysis: |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
22 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
23 @pytest.fixture(scope="class") |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
24 def test_data_dir(self): |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
25 base = Path("Stage_1_translated/NLOOR_scripts/process_clusters_tool/test-data") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
26 assert base.exists() |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
27 return base |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
28 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
29 @pytest.fixture(scope="class") |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
30 def sample_cluster_file(self, test_data_dir): |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
31 f = test_data_dir / "prev_anno.txt" |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
32 assert f.exists() |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
33 return str(f) |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
34 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
35 @pytest.fixture(scope="class") |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
36 def sample_annotation_file(self, test_data_dir): |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
37 f = test_data_dir / "prev4.xlsx" |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
38 assert f.exists() |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
39 return str(f) |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
40 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
41 @pytest.fixture(scope="class") |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
42 def parsed_clusters(self, sample_cluster_file, sample_annotation_file): |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
43 return parse_cluster_file(sample_cluster_file, sample_annotation_file) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
44 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
45 |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
46 def test_cluster_parsing_structure(self, parsed_clusters): |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
47 assert len(parsed_clusters) == 514 |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
48 cluster_0 = parsed_clusters[0] |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
49 assert len(cluster_0) == 430 |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
50 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
51 read = cluster_0["M01687:460:000000000-LGY9G:1:1101:8356:6156_CONS"] |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
52 assert read["count"] == 19 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
53 assert isinstance(read["similarity"], float) |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
54 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
55 def test_annotation_integration_basic(self, parsed_clusters): |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
56 cluster_0 = parsed_clusters[0] |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
57 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
58 annotated_found = any( |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
59 data["taxa"] != "Unannotated read" for data in cluster_0.values() |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
60 ) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
61 assert annotated_found, "At least one annotated read expected" |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
62 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
63 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
64 def test_process_cluster_data_counts_and_taxa_map(self, parsed_clusters): |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
65 sim, taxa_map, annotated, unannotated = process_cluster_data(parsed_clusters[0]) |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
66 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
67 assert isinstance(sim, list) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
68 assert annotated + unannotated == sum(d["count"] for d in parsed_clusters[0].values()) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
69 assert isinstance(taxa_map, dict) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
70 assert annotated == 47004 and unannotated == 9 |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
71 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
72 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
73 def test_weighted_lca_splitting_on_uncertain_taxa(self): |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
74 taxa_dict = { |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
75 "K / P / C / O / F / G1 / S1": 60, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
76 "K / P / C / O / F / Uncertain taxa / Uncertain taxa": 60, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
77 } |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
78 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
79 class ArgsLow: |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
80 uncertain_taxa_use_ratio = 0.5 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
81 min_to_split = 0.45 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
82 min_count_to_split = 10 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
83 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
84 class ArgsHigh: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
85 uncertain_taxa_use_ratio = 1.0 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
86 min_to_split = 0.45 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
87 min_count_to_split = 10 |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
88 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
89 # LOW weight → uncertain counts half → G1 wins → no split |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
90 res_low = calculate_cluster_taxa(taxa_dict, ArgsLow()) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
91 assert len(res_low) == 1 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
92 assert sum(res_low[0].values()) == 60 # total preserved |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
93 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
94 # HIGH weight → uncertain = full weight → equal → split |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
95 res_high = calculate_cluster_taxa(taxa_dict, ArgsHigh()) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
96 assert len(res_high) == 2 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
97 total = sum(sum(g.values()) for g in res_high) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
98 assert total == 120 |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
99 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
100 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
101 def test_calculate_cluster_taxa_preserves_counts_real_cluster(self, parsed_clusters): |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
102 sim, taxa_map, annotated, unannotated = process_cluster_data(parsed_clusters[3]) |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
103 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
104 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
105 raw_total = annotated + unannotated |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
106 taxa_map_total = sum(info["count"] for info in taxa_map.values()) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
107 assert raw_total == taxa_map_total |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
108 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
109 class Args: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
110 uncertain_taxa_use_ratio = 0.5 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
111 min_to_split = 0.3 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
112 min_count_to_split = 5 |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
113 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
114 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
115 results = calculate_cluster_taxa({t: i["count"] for t, i in taxa_map.items()}, Args()) |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
116 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
117 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
118 resolved_total = sum(sum(group.values()) for group in results) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
119 assert resolved_total <= raw_total |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
120 assert resolved_total > 0 |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
121 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
122 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
123 def test_write_similarity_and_count_outputs(self, tmp_path, parsed_clusters): |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
124 out_simi = tmp_path / "simi.txt" |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
125 out_count = tmp_path / "count.txt" |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
126 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
127 cluster_data_list = [] |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
128 all_simi = [] |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
129 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
130 for c in parsed_clusters: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
131 sim, taxa_map, annotated, unannotated = process_cluster_data(c) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
132 cluster_data_list.append( |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
133 { |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
134 "similarities": sim, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
135 "taxa_map": taxa_map, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
136 "annotated": annotated, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
137 "unannotated": unannotated, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
138 } |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
139 ) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
140 all_simi.extend(sim) |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
141 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
142 write_similarity_output(cluster_data_list, str(out_simi)) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
143 assert out_simi.exists() |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
144 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
145 write_count_output(cluster_data_list, str(out_count)) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
146 assert out_count.exists() |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
147 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
148 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
149 def test_write_taxa_excel_raw_and_processed(self, tmp_path, parsed_clusters): |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
150 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
151 class Args: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
152 uncertain_taxa_use_ratio = 0.5 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
153 min_to_split = 0.45 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
154 min_count_to_split = 10 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
155 min_cluster_support = 1 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
156 make_taxa_in_cluster_split = False |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
157 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
158 cluster_data_list = [] |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
159 for c in parsed_clusters: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
160 sim, taxa_map, annotated, unannotated = process_cluster_data(c) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
161 cluster_data_list.append( |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
162 { |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
163 "similarities": sim, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
164 "taxa_map": taxa_map, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
165 "annotated": annotated, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
166 "unannotated": unannotated, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
167 } |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
168 ) |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
169 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
170 out = tmp_path / "taxa.xlsx" |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
171 write_taxa_excel( |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
172 cluster_data_list, Args(), str(out), write_raw=True, write_processed=True |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
173 ) |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
174 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
175 xl = pd.ExcelFile(out) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
176 assert "Raw_Taxa_Clusters" in xl.sheet_names |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
177 assert "Processed_Taxa_Clusters" in xl.sheet_names |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
178 assert "Settings" in xl.sheet_names |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
179 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
180 def test_write_taxa_excel_only_raw_or_only_processed(self, tmp_path, parsed_clusters): |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
181 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
182 class Args: |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
183 uncertain_taxa_use_ratio = 0.5 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
184 min_to_split = 0.45 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
185 min_count_to_split = 10 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
186 min_cluster_support = 1 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
187 make_taxa_in_cluster_split = False |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
188 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
189 cluster_data_list = [] |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
190 for c in parsed_clusters: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
191 sim, taxa_map, annotated, unannotated = process_cluster_data(c) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
192 cluster_data_list.append( |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
193 { |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
194 "similarities": sim, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
195 "taxa_map": taxa_map, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
196 "annotated": annotated, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
197 "unannotated": unannotated, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
198 } |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
199 ) |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
200 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
201 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
202 out_raw = tmp_path / "raw.xlsx" |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
203 write_taxa_excel(cluster_data_list, Args(), str(out_raw), write_raw=True, write_processed=False) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
204 xl_raw = pd.ExcelFile(out_raw) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
205 assert "Raw_Taxa_Clusters" in xl_raw.sheet_names |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
206 assert "Processed_Taxa_Clusters" not in xl_raw.sheet_names |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
207 |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
208 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
209 out_proc = tmp_path / "proc.xlsx" |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
210 write_taxa_excel(cluster_data_list, Args(), str(out_proc), write_raw=False, write_processed=True) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
211 xl_proc = pd.ExcelFile(out_proc) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
212 assert "Processed_Taxa_Clusters" in xl_proc.sheet_names |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
213 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
214 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
215 def test_parse_arguments_all_flags(self, tmp_path): |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
216 from Stage_1_translated.NLOOR_scripts.process_clusters_tool import cdhit_analysis as ca |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
217 args = ca.parse_arguments([ |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
218 "--input_cluster", str(tmp_path / "dummy.clstr"), |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
219 "--simi_plot_y_min", "90", |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
220 "--simi_plot_y_max", "99", |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
221 "--uncertain_taxa_use_ratio", "0.3", |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
222 "--min_to_split", "0.2", |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
223 "--min_count_to_split", "5", |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
224 "--output_excel", str(tmp_path / "report.xlsx"), |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
225 ]) |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
226 assert args.simi_plot_y_min == 90 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
227 assert args.simi_plot_y_max == 99 |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
228 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
229 def test_main_runs_and_creates_outputs(self, tmp_path): |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
230 from Stage_1_translated.NLOOR_scripts.process_clusters_tool import cdhit_analysis as ca |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
231 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
232 clstr = tmp_path / "simple.clstr" |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
233 clstr.write_text(">Cluster 0\n0\t88nt, >read1_CONS(3)... *\n") |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
234 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
235 anno = tmp_path / "anno.xlsx" |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
236 df = pd.DataFrame([ |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
237 { |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
238 "header": "read1_CONS", |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
239 "seq_id": "SEQ001", |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
240 "source": "Genbank", |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
241 "taxa": "K / P / C / O / F / G / S", |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
242 } |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
243 ]) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
244 with pd.ExcelWriter(anno) as w: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
245 df.to_excel(w, sheet_name="Individual_Reads", index=False) |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
246 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
247 sim_file = tmp_path / "sim.txt" |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
248 excel_file = tmp_path / "taxa.xlsx" |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
249 args = [ |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
250 "--input_cluster", str(clstr), |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
251 "--input_annotation", str(anno), |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
252 "--output_similarity_txt", str(sim_file), |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
253 "--output_excel", str(excel_file), |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
254 '--output_taxa_clusters', |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
255 '--output_taxa_processed', |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
256 '--log_file', 'test-data/new_logs.txt', |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
257 '--simi_plot_y_min', '95', |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
258 '--simi_plot_y_max', '100', |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
259 '--uncertain_taxa_use_ratio', '0.5', |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
260 '--min_to_split', '0.45', |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
261 '--min_count_to_split', '10', |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
262 '--min_cluster_support', '1' |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
263 ] |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
264 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
265 ca.main(args) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
266 assert sim_file.exists() |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
267 assert excel_file.exists() |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
268 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
269 def test_parse_cluster_file_empty_and_no_annotation(self, tmp_path): |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
270 from Stage_1_translated.NLOOR_scripts.process_clusters_tool import cdhit_analysis2 as ca |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
271 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
272 empty = tmp_path / "empty.clstr" |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
273 empty.write_text("") |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
274 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
275 clusters = ca.parse_cluster_file(str(empty), annotation_file=None, log_messages=[]) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
276 assert clusters == [] |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
277 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
278 def test_create_similarity_plot_creates_file(self, tmp_path, parsed_clusters): |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
279 from Stage_1_translated.NLOOR_scripts.process_clusters_tool import cdhit_analysis2 as ca |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
280 |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
281 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
282 cluster_data_list = [] |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
283 all_simi = [] |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
284 lengths = [] |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
285 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
286 for c in parsed_clusters[:5]: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
287 sim, taxa_map, annotated, unannotated = process_cluster_data(c) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
288 cluster_data_list.append( |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
289 {"similarities": sim, "taxa_map": taxa_map, |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
290 "annotated": annotated, "unannotated": unannotated} |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
291 ) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
292 if sim: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
293 all_simi.extend(sim) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
294 lengths.append(len(sim)) |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
295 |
|
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
296 class Args: |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
297 simi_plot_y_min = 95.0 |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
298 simi_plot_y_max = 100.0 |
|
0
00d56396b32a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
299 |
|
4
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
300 out_png = tmp_path / "sim.png" |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
301 ca.create_similarity_plot(all_simi, lengths, Args(), str(out_png)) |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
302 if all_simi: |
|
e64af72e1b8f
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_clusters_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
1
diff
changeset
|
303 assert out_png.exists() |
