Mercurial > repos > onnodg > blast_annotations_processor
annotate tests/test_blast_annotations_processor.py @ 2:9ca209477dfd draft default tip
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
| author | onnodg |
|---|---|
| date | Mon, 15 Dec 2025 16:43:36 +0000 |
| parents | a3989edf0a4a |
| children |
| rev | line source |
|---|---|
|
2
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
1 """ |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
2 Test suite for BLAST annotation processor. |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
3 """ |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
4 import re |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
5 import ast |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
6 import pytest |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
7 import os |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
8 import sys |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
9 import json |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
10 import pandas as pd |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
11 from pathlib import Path |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
12 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
13 # Add the module to path for importing |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
14 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
15 from Stage_1_translated.NLOOR_scripts.process_annotations_tool.blast_annotations_processor import ( |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
16 process_single_file, |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
17 resolve_tax_majority, |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
18 TAXONOMIC_LEVELS, |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
19 check_header_string |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
20 ) |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
21 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
22 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
23 class TestBlastAnnotationProcessor: |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
24 """Test class for BLAST annotation processor""" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
25 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
26 @pytest.fixture(scope="class") |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
27 def test_data_dir(self): |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
28 """Setup test data directory structure""" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
29 base_dir = Path("test-data") |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
30 base_dir.mkdir(exist_ok=True) |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
31 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
32 for subdir in ["input", "expected", "output"]: |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
33 (base_dir / subdir).mkdir(exist_ok=True) |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
34 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
35 return base_dir |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
36 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
37 @pytest.fixture(scope="class") |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
38 def sample_files(self, test_data_dir): |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
39 """Create sample input files for testing""" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
40 input_dir = test_data_dir / "input" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
41 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
42 blast_content = """#Query ID #Subject #Subject accession #Subject Taxonomy ID #Identity percentage #Coverage #evalue #bitscore #Source #Taxonomy |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
43 read1(100) subject2 id2 subject2 90.0 95 1e-45 180 database1 Bacteria / Firmicutes / Bacilli / Bacillales / Bacillaceae / Bacillus / Bacillus_subtilis |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
44 read1(100) subject1 id1 subject1 95.889 100 1e-50 200 database1 Bacteria / Firmicutes / Bacilli / Bacillales / Bacillaceae / Bacillus / Bacillus_subtilis |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
45 read2(50) subject3 id3 subject3 85.0 90 1e-40 160 database2 Bacteria / Proteobacteria / Gammaproteobacteria / Enterobacterales / Enterobacteriaceae / Escherichia / Escherichia_coli |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
46 read3(25) subject4 id4 subject4 80.0 85 1e-35 140 database1 Archaea / Euryarchaeota / Methanobacteria / Methanobacteriales / Methanobacteriaceae / Methanobrevibacter / Methanobrevibacter_smithii |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
47 read4(25) subject4 id4 subject4 80.0 85 1e-35 140 database1 Archaea / Euryarchaeota / Methanobacteria / Methanobacteriales / Methanobacteriaceae / Methanobrevibacter / Methanobrevibacter_blabla |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
48 read4(25) subject4 id4.1 subject4 80.0 85 1e-40 140 database1 Archaea / Euryarchaeota / Methanobacteria / Methanobacteriales / Methanobacteriaceae / Methanobrevibacter / Methanobrevibacter_eclhi |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
49 read4(25) subject4 id4 subject4 80.0 85 1e-35 140 database1 Archaea / Euryarchaeota / Methanobacteria / Methanobacteriales / Methanobacteriaceae / Methanobrevibacter / Methanobrevibacter_elchi |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
50 read4(25) subject4 id4.2 subject4 90.0 87 1e-50 160 database1 Archaea / Euryarchaeota / Methanobacteria / Methanobacteriales / Methanobacteriaceae / Methanobrevibacter / Methanobrevibacter_smithii |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
51 """ |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
52 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
53 fasta_content = """>read1(100) count=100; |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
54 ATCGATCGATCGATCG |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
55 >read2(50) count=50; |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
56 GCTAGCTAGCTAGCTA |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
57 >read3(25) count=25; |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
58 TGACTGACTGACTGAC |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
59 >read4(25) count=25; |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
60 TGAAAAAAACACCAC |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
61 """ |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
62 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
63 blast_file = input_dir / "test_blast.tabular" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
64 fasta_file = input_dir / "test_sequences.fasta" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
65 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
66 with open(blast_file, 'w') as f: |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
67 f.write(blast_content) |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
68 with open(fasta_file, 'w') as f: |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
69 f.write(fasta_content) |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
70 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
71 return { |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
72 'blast': str(blast_file), |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
73 'fasta': str(fasta_file) |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
74 } |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
75 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
76 @pytest.fixture(scope="class") |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
77 def processed_output(self, test_data_dir, sample_files): |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
78 """Run the processor on sample files and return output paths""" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
79 output_dir = test_data_dir / "output" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
80 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
81 class Args: |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
82 def __init__(self): |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
83 self.input_anno = sample_files['blast'] |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
84 self.input_unanno = sample_files['fasta'] |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
85 self.eval_plot = str(output_dir / "eval_plot.png") |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
86 self.taxa_output = str(output_dir / "taxa_output.txt") |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
87 self.circle_data = str(output_dir / "circle_data.json") |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
88 self.header_anno = str(output_dir / "header_anno.xlsx") |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
89 self.anno_stats = str(output_dir / "anno_stats.txt") |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
90 self.filtered_fasta = str(output_dir / "filtered_fasta.fasta") |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
91 self.log = str(output_dir / "log.txt") |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
92 self.uncertain_threshold = 0.9 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
93 self.eval_threshold = 1e-10 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
94 self.min_bitscore = 60 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
95 self.min_support = 1 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
96 self.ignore_rank = 'unknown' |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
97 self.ignore_taxonomy = 'environmental' |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
98 self.bitscore_perc_cutoff = 10 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
99 self.ignore_obiclean_type ='singleton' |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
100 self.ignore_illuminapairend_type = 'pairend' |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
101 self.min_identity = 70 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
102 self.min_coverage = 70 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
103 self.ignore_seqids = '' |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
104 self.use_counts = True |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
105 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
106 args = Args() |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
107 log_messages = [] |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
108 process_single_file(args.input_anno, args.input_unanno, args, log_messages) |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
109 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
110 return args |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
111 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
112 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
113 def test_read_count_consistency(self, processed_output): |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
114 """ |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
115 Test 1: Read Count Consistency |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
116 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
117 Verifies that read counts from FASTA headers are correctly preserved |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
118 and aggregated in all output files. |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
119 """ |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
120 df = pd.read_excel(processed_output.header_anno, sheet_name='Individual_Reads') |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
121 expected_counts = {'read1': 100, 'read2': 50, 'read3': 25, 'read4':25} |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
122 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
123 skipped_reads = [] |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
124 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
125 for read_name, expected_count in expected_counts.items(): |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
126 subset = df.loc[df['header'] == read_name] |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
127 if subset.empty: |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
128 skipped_reads.append(read_name) # remember we skip this read |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
129 continue |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
130 row = subset.iloc[0] |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
131 assert row['count'] == expected_count, f"Count mismatch for {read_name}" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
132 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
133 with open(processed_output.anno_stats, 'r') as f: |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
134 stats_content = f.read() |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
135 # Total unique count should be 175 (100+50+25) |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
136 assert 'total_unique: 200' in stats_content, "Total unique count incorrect in stats" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
137 if skipped_reads: |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
138 assert all(read not in df['header'].values for read in skipped_reads) |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
139 print("✓ Test 1 PASSED: Read counts consistent across all outputs") |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
140 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
141 def test_lowest_common_ancester(self, processed_output): |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
142 """ |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
143 Test 2: Big Input Files |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
144 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
145 Tests the functioning of lowest common ancestor selection with realistic inputfile sizes |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
146 """ |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
147 test_conflicts = { |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
148 'Viridiplantae / Streptophyta / Magnoliopsida / Asterales / Asteraceae / Cicerbita / Cicerbita a': 10, |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
149 'Viridiplantae / Streptophyta / Magnoliopsida / Asterales / Asteraceae / Cicerbita / Cicerbita b': 1, |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
150 'Viridiplantae / Streptophyta / Magnoliopsida / Asterales / Asteraceae / Cicerbita / Cicerbita c': 1, |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
151 'Viridiplantae / Streptophyta / Magnoliopsida / Asterales / Asteraceae / Cicerbita / Cicerbita d': 1, |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
152 'Viridiplantae / Streptophyta / Magnoliopsida / Asterales / Asteraceae / Cicerbita / Cicerbita e': 1, |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
153 'Viridiplantae / Streptophyta / Magnoliopsida / Asterales / Asteraceae / Ciceronia / Ciceronia a': 450, |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
154 'Viridiplantae / Streptophyta / Magnoliopsida / Asterales / Asteraceae / Ciceronia / Ciceronia b': 2, |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
155 'Viridiplantae / Streptophyta / Magnoliopsida / Asterales / Asteraceae / Ciceronia / Ciceronia c': 2, |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
156 'Viridiplantae / Streptophyta / Magnoliopsida / Asterales / Asteraceae / Ciceronia / Ciceronia d': 2, |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
157 'Viridiplantae / Streptophyta / Magnoliopsida / Asterales / Asteraceae / Ciceronia / Ciceronia e': 2, |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
158 'Viridiplantae / Streptophyta / Magnoliopsida / Asterales / Asteraceae / Ciceronia / Ciceronia f': 12, |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
159 'Viridiplantae / Streptophyta / Bryopsida / Funariales / Funariaceae / Funaria / Uncertain taxa': 6 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
160 } |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
161 resolved_short1, resolved_long1 = resolve_tax_majority(test_conflicts, 0.9) |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
162 assert 'Ciceronia a' in resolved_short1, "Conflict not resolved to uncertain taxa" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
163 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
164 test_90_precent_conflicts = { |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
165 'Viridiplantae / Streptophyta / Magnoliopsida / Asterales / Asteraceae / Cicerbita / Cicerbita a': 90, |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
166 'Viridiplantae / Streptophyta / Magnoliopsida / Asterales / Asteraceae / Cicerbita / Cicerbita b': 10, |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
167 'Viridiplantae / Streptophyta / Bryopsida / Funariales / Funariaceae / Funaria / Uncertain taxa': 6 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
168 } |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
169 resolved_short, resolved_long = resolve_tax_majority(test_90_precent_conflicts, 0.9) |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
170 assert 'Viridiplantae / Streptophyta / Uncertain taxa' in resolved_long, "Conflict not resolved to uncertain taxa" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
171 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
172 print("✓ Test 2 PASSED: Lowest common ancestor works correctly") |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
173 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
174 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
175 def test_taxonomic_conflict_resolution(self, processed_output): |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
176 """ |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
177 Test 3: Taxonomic Conflict Resolution |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
178 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
179 Tests the uncertainty threshold mechanism for resolving taxonomic conflicts. |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
180 Uses a controlled scenario where multiple hits have different taxa. |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
181 """ |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
182 test_conflicts = { |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
183 'Bacteria / Firmicutes / Bacilli': 2, |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
184 'Bacteria / Proteobacteria / Gammaproteobacteria': 1 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
185 } |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
186 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
187 resolved_short, resolved_long = resolve_tax_majority(test_conflicts, 0.9) |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
188 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
189 # With threshold 0.9, should resolve to most common (2/3 = 0.67 < 0.9, so uncertain) |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
190 assert 'Uncertain taxa' in resolved_short, "Conflict not resolved to uncertain taxa" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
191 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
192 test_high_confidence = { |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
193 'Bacteria / Firmicutes / Bacilli': 9, |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
194 'Bacteria / Proteobacteria / Gammaproteobacteria': 1 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
195 } |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
196 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
197 resolved_short, resolved_long = resolve_tax_majority(test_high_confidence, 0.9) |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
198 assert 'Firmicutes' in resolved_short, "High confidence case not resolved correctly" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
199 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
200 print("✓ Test 3 PASSED: Taxonomic conflict resolution working correctly") |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
201 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
202 def test_output_file_structures(self, processed_output): |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
203 """ |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
204 Test 4: Output File Structure Validation |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
205 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
206 Verifies that all output files are created with correct structure and format. |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
207 """ |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
208 excel_file = processed_output.header_anno |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
209 assert os.path.exists(excel_file), "Excel output file not created" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
210 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
211 xl_file = pd.ExcelFile(excel_file) |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
212 expected_sheets = ['Individual_Reads', 'Merged_by_Taxa'] |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
213 assert all(sheet in xl_file.sheet_names for sheet in expected_sheets), "Missing Excel sheets" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
214 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
215 df_individual = pd.read_excel(excel_file, sheet_name='Individual_Reads') |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
216 expected_cols = ['header', 'seq_id', 'source', 'count', 'taxa', 'kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species'] |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
217 assert all(col in df_individual.columns for col in expected_cols), "Missing columns in Individual_Reads" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
218 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
219 with open(processed_output.taxa_output, 'r') as f: |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
220 taxa_lines = f.readlines() |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
221 assert len(taxa_lines) == 2, "Taxa output too short" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
222 assert 'percentage_rooted\tnumber_rooted' in taxa_lines[1], "Taxa output header incorrect" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
223 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
224 with open(processed_output.anno_stats, 'r') as f: |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
225 anno_stats = f.readlines() |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
226 assert 'FASTA: headers kept after filters and min_support=1: 4\n' in anno_stats, "Taxa output header incorrect" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
227 filter_f = 4 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
228 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
229 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
230 with open(processed_output.circle_data, 'r') as f: |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
231 circle_data = json.load(f) |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
232 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
233 assert isinstance(circle_data, list), "Circle data should be a list" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
234 assert len(circle_data) == len(TAXONOMIC_LEVELS), "Circle data should have entry per taxonomic level" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
235 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
236 with open(processed_output.filtered_fasta, 'r') as f: |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
237 filtered_fasta = f.readlines() |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
238 assert len(filtered_fasta) == filter_f * 2 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
239 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
240 print("✓ Test 4 PASSED: All output files have correct structure") |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
241 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
242 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
243 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
244 def test_header_synchronization(self, test_data_dir): |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
245 """ |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
246 Test 5: Header Synchronization Between Files |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
247 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
248 Tests that the processor correctly handles mismatched headers between |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
249 annotated and unannotated files. |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
250 """ |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
251 input_dir = test_data_dir / "input" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
252 output_dir = test_data_dir / "output" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
253 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
254 # Create mismatched files |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
255 blast_content = """#Query ID #Subject #Subject accession #Subject Taxonomy ID #Identity percentage #Coverage #evalue #bitscore #Source #Taxonomy |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
256 read1(100) source=NCBI sequenceID=KR738003 superkingdom=Eukaryota kingdom=Viridiplantae phylum=Streptophyta subphylum=Streptophytina class=Magnoliopsida subclass=NA infraclass=NA order=Malvales suborder=NA infraorder=NA superfamily=NA family=Malvaceae genus=Hibiscus species=Hibiscus trionum markercode=trnL lat=0.304 lon=36.87 source=NCBI N/A 100.000 100 7.35e-30 54.7 Viridiplantae / Streptophyta / Magnoliopsida / Malvales / Malvaceae / Hibiscus / Hibiscus trionum |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
257 read1(100) source=NCBI sequenceID=KR738670 superkingdom=Eukaryota kingdom=Viridiplantae phylum=Streptophyta subphylum=Streptophytina class=Magnoliopsida subclass=NA infraclass=NA order=Malvales suborder=NA infraorder=NA superfamily=NA family=Malvaceae genus=Hibiscus species=Hibiscus trionum markercode=trnL lat=0.304 lon=36.87 source=NCBI N/A 100.000 100 7.35e-14 54.7 Viridiplantae / Streptophyta / Magnoliopsida / Malvales / Malvaceae / Hibiscus / Hibiscus trionum |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
258 read2.1(50) 1 2 3 4 5 6 7 8 9 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
259 read3(25) source=NCBI sequenceID=KR737595 superkingdom=Eukaryota kingdom=Viridiplantae phylum=Streptophyta subphylum=Streptophytina class=Magnoliopsida subclass=NA infraclass=NA order=Malvales suborder=NA infraorder=NA superfamily=NA family=Malvaceae genus=Hibiscus species=Hibiscus trionum markercode=trnL lat=0.304 lon=36.87 source=NCBI N/A 97.561 87 1.68e-14 71.3 Viridiplantae / Streptophyta / Magnoliopsida / Malvales / Malvaceae / Hibiscus / Hibiscus trionum |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
260 """ |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
261 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
262 fasta_content = """>read1(100) count=100; |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
263 ATCG |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
264 >read2(50) merged_sample={}; count=1011; direction=right; seq_b_insertion=0; sminR=40.0; ali_length=53; seq_b_deletion=248; seq_a_deletion=248; seq_a_insertion=0; mode=alignment; sminL=40.0; seq_a_single=0; seq_b_single=0; |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
265 gggcaatcctgagccaagtgactggagttcagataggtgcagagactcaatgg |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
266 >read3(25) merged_sample={}; count=179; direction=right; sminR=40.0; ali_length=49; seq_b_deletion=252; seq_a_deletion=252; seq_b_insertion=0; seq_a_insertion=0; mode=alignment; sminL=40.0; seq_a_single=0; seq_b_single=0; |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
267 gggcaatcctgagccaactggagttcagataggtgcagagactcaatgg |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
268 """ |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
269 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
270 blast_file = input_dir / "test_sync.tabular" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
271 fasta_file = input_dir / "test_sync.fasta" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
272 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
273 with open(blast_file, 'w') as f: |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
274 f.write(blast_content) |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
275 with open(fasta_file, 'w') as f: |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
276 f.write(fasta_content) |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
277 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
278 class Args: |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
279 def __init__(self): |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
280 self.input_anno = blast_file |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
281 self.input_unanno = fasta_file |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
282 self.header_anno = "Stage_1_translated/NLOOR_scripts/process_annotations_tool/test-data/sync_test.xlsx" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
283 self.eval_plot = None |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
284 self.taxa_output = None |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
285 self.circle_data = None |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
286 self.filtered_fasta = str(output_dir / "filtered_fasta.fasta") |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
287 self.anno_stats = str(output_dir / "sync_stats.txt") |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
288 self.log = str(output_dir / "log.txt") |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
289 self.uncertain_threshold = 0.9 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
290 self.eval_threshold = 1e-10 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
291 self.use_counts = True |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
292 self.min_bitscore = 50 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
293 self.min_support = 1 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
294 self.ignore_rank = 'unknown' |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
295 self.ignore_taxonomy = 'environmental' |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
296 self.bitscore_perc_cutoff = 10 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
297 self.ignore_obiclean_type = 'singleton' |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
298 self.ignore_illuminapairend_type = 'pairend' |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
299 self.min_identity = 30 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
300 self.min_coverage = 30 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
301 self.ignore_seqids = '' |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
302 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
303 args = Args() |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
304 process_single_file(args.input_anno, args.input_unanno, args, log_messages=[]) |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
305 df = pd.read_excel(args.header_anno, sheet_name='Individual_Reads') |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
306 extracted = df['header'].str.extract(r'(read\d+)') |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
307 headers = extracted[0].tolist() |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
308 # Should have read1 and read3, read2 should be skipped |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
309 assert 'read1' in headers, "read1 should be present" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
310 assert 'read2' not in headers, "read2 should not be present" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
311 assert 'read2.1' not in headers, "read2 should not be present" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
312 assert 'read3' in headers, "read3 should be present" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
313 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
314 print("✓ Test 5 PASSED: Header synchronization handled correctly") |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
315 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
316 def test_check_header_string_all_behaviors(self): |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
317 from Stage_1_translated.NLOOR_scripts.process_annotations_tool.blast_annotations_processor import \ |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
318 check_header_string |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
319 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
320 # clean header — allowed |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
321 assert check_header_string(">readA count=10;", "", "") is True |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
322 # blocks singleton |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
323 assert check_header_string(">r obiclean_status={'XXX': 's'}", "singleton", "") is False |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
324 # blocks variant |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
325 assert check_header_string(">r obiclean_status={'XXX': 'i'}", "variant", "") is False |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
326 # blocks head |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
327 assert check_header_string(">r obiclean_status={'XXX': 'h'}", "head", "") is False |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
328 # blocks pairend |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
329 assert check_header_string(">r PairEnd", "pairend", "") is False |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
330 # blocks consensus |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
331 assert check_header_string(">r CONS", "consensus", "") is False |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
332 # blocks custom string |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
333 assert check_header_string(">r FooBar", "FooBar", "") is False |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
334 # blocks when string is in second param |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
335 assert check_header_string(">r blah", "", "blah") is False |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
336 # blocks when multiple ignore values contain it |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
337 assert check_header_string(">r PairEnd obiclean_status={'XXX': 's'}", "pairend,singleton", "") is False |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
338 # allows when no match |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
339 assert check_header_string(">r something", "pairend", "") is True |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
340 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
341 def test_excel_merged_vs_individual(self, processed_output): |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
342 """ |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
343 Test 6: Excel Merged vs Individual Sheet Consistency |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
344 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
345 Verifies that the merged sheet correctly aggregates data from the individual sheet. |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
346 """ |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
347 df_individual = pd.read_excel(processed_output.header_anno, sheet_name='Individual_Reads') |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
348 df_merged = pd.read_excel(processed_output.header_anno, sheet_name='Merged_by_Taxa') |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
349 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
350 individual_taxa = df_individual['taxa'].nunique() |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
351 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
352 assert len(df_merged) == individual_taxa, "Merged sheet doesn't match unique taxa count" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
353 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
354 # Check that counts are properly aggregated |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
355 # For taxa with multiple reads, counts should be summed |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
356 for _, merged_row in df_merged.iterrows(): |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
357 taxa = merged_row['taxa'] |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
358 individual_rows = df_individual[df_individual['taxa'] == taxa] |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
359 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
360 expected_count = individual_rows['count'].sum() |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
361 actual_count = merged_row['count'] |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
362 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
363 assert actual_count == expected_count, f"Count mismatch for taxa {taxa}: expected {expected_count}, got {actual_count}" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
364 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
365 print("✓ Test 6 PASSED: Excel merged sheet correctly aggregates individual data") |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
366 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
367 def test_annotation_statistics_accuracy(self, processed_output, sample_files): |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
368 """ |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
369 Test 7: Annotation Statistics Accuracy |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
370 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
371 Verifies that calculated annotation statistics match the actual data. |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
372 Adapted for the new plain-text log file instead of tab-separated output. |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
373 """ |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
374 stats = {} |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
375 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
376 with open(processed_output.anno_stats, 'r') as f: |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
377 lines = f.readlines() |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
378 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
379 for line in lines: |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
380 line = line.strip() |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
381 if not line or ":" not in line: |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
382 continue |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
383 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
384 key, value = line.split(":", 1) |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
385 key = key.strip() |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
386 value = value.strip() |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
387 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
388 try: |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
389 stats[key] = float(value) |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
390 except ValueError: |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
391 stats[key] = value |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
392 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
393 assert stats["total_sequences"] == 4.0, "Total sequences count incorrect" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
394 assert stats["annotated_sequences"] == 3.0, "Annotated sequence count incorrect" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
395 assert stats["total_unique"] == 200.0, "Total unique count incorrect" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
396 assert stats["unique_annotated"] == 150.0, "Unique annotated count incorrect" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
397 assert stats["percentage_annotated"] == 75.0, "Percentage annotated incorrect" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
398 assert stats["percentage_unique_annotated"] == 75.0, "Percentage unique annotated incorrect" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
399 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
400 print("✓ Test 7 PASSED: Annotation statistics are accurate") |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
401 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
402 def test_combined_all_filters(self, test_data_dir): |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
403 """ |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
404 Single integrated test that validates all FASTA + BLAST filter rules. |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
405 Every read is designed to fail exactly one filter, except readOK. |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
406 """ |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
407 input_dir = test_data_dir / "input" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
408 output_dir = test_data_dir / "output" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
409 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
410 fasta = input_dir / "combined_filters.fasta" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
411 blast = input_dir / "combined_filters.tabular" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
412 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
413 fasta.write_text( |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
414 ">lowSupport(1) count=1;\nACGT\n" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
415 ">obicleanFail(10) count=10; obiclean_status={'XXX': 's'};\nACGT\n" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
416 ">pairendFail_PairEnd(10) count=10;\nACGT\n" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
417 ">identityFail(10) count=10;\nACGT\n" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
418 ">coverageFail(10) count=10;\nACGT\n" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
419 ">bitscoreFail(10) count=10;\nACGT\n" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
420 ">bscutoffHigh(10) count=10;\nACGT\n" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
421 ">envTaxFail(10) count=10;\nACGT\n" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
422 ">rankFail(10) count=10;\nACGT\n" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
423 ">seqidFail(10) count=10;\nACGT\n" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
424 ">readOK(10) count=10;\nACGT\n" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
425 ">readOK_multiple_id(10) count=10;\nACGT\n" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
426 ) |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
427 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
428 blast.write_text( |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
429 # min_support (count=1 < 5) |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
430 "lowSupport(1)\ts\tid1\t123\t99\t99\t1e-50\t200\tsrc\tA / B / C / D / E / F / G\n" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
431 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
432 # ignore_obiclean_type = singleton |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
433 "obicleanFail(10)\ts\tid2\t123\t99\t99\t1e-50\t200\tsrc\tA / B / C / D / E / F / G\n" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
434 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
435 # ignore_illuminapairedend_type = pairend |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
436 "pairendFail_PairEnd(10)\ts\tid3\t123\t99\t99\t1e-50\t200\tsrc\tA / B / C / D / E / F / G\n" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
437 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
438 # min_identity = 90 → identity = 50 fails |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
439 "identityFail(10)\ts\tid4\t123\t50\t99\t1e-50\t200\tsrc\tA / B / C / D / E / F / G\n" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
440 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
441 # min_coverage = 50 → coverage = 20 fails |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
442 "coverageFail(10)\ts\tid5\t123\t99\t20\t1e-50\t200\tsrc\tA / B / C / D / E / F / G\n" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
443 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
444 # min_bitscore = 60 → bitscore = 10 fails |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
445 "bitscoreFail(10)\ts\tid6\t123\t99\t99\t1e-50\t10\tsrc\tA / B / C / D / E / F / G\n" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
446 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
447 # bitscore_perc_cutoff: best = 200 → cutoff = 180 → bitscore 150 fails |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
448 "bscutoffHigh(10)\ts\tid7.1\t123\t99\t99\t1e-50\t200\tsrc\tA / B / C / D / E / F / G.1\n" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
449 "bscutoffHigh(10)\ts\tid7.2\t123\t99\t99\t1e-50\t150\tsrc\tA / B / C / D / E / F / G.2\n" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
450 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
451 # ignore_taxonomy = 'environmental' |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
452 "envTaxFail(10)\ts\tid8\t123\t99\t99\t1e-50\t200\tsrc\tEnvironmental / B / C / D / E / F / G\n" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
453 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
454 # ignore_rank = 'unknown' |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
455 "rankFail(10)\ts\tid9\t123\t99\t99\t1e-50\t200\tsrc\tUnknown / B / C / D / E / F / G\n" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
456 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
457 # ignore_seqids = BADSEQ |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
458 "seqidFail(10)\ts\tBADSEQ\t123\t99\t99\t1e-50\t200\tsrc\tA / B / C / D / E / F / G\n" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
459 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
460 # readOK (valid, full taxonomy) |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
461 "readOK(10)\ts\tidGood\t123\t99\t99\t1e-50\t200\tsrc\tA / B / C / D / E / F / G\n" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
462 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
463 # readOK_multiple_id (valid, full taxonomy, multiple id's) |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
464 "readOK_multiple_id(10)\ts\tidGood.1\t123\t99\t99\t1e-50\t200\tsrc\tA / B / C / D / E / F / G\n" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
465 "readOK_multiple_id(10)\ts\tidGood.2\t123\t99\t99\t1e-50\t200\tsrc\tA / B / C / D / E / F / G\n" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
466 ) |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
467 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
468 class Args: |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
469 def __init__(self): |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
470 self.input_anno = str(blast) |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
471 self.input_unanno = str(fasta) |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
472 self.header_anno = str(output_dir / "combined.xlsx") |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
473 self.filtered_fasta = str(output_dir / "combined.fasta") |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
474 self.anno_stats = str(output_dir / "combined_stats.txt") |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
475 self.eval_plot = None |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
476 self.taxa_output = None |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
477 self.circle_data = None |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
478 self.log = str(output_dir / "log.txt") |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
479 self.uncertain_threshold = 0.9 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
480 self.eval_threshold = 1e-10 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
481 self.use_counts = True |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
482 self.min_bitscore = 60 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
483 self.min_support = 5 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
484 self.ignore_rank = 'unknown' |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
485 self.ignore_taxonomy = 'environmental' |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
486 self.bitscore_perc_cutoff = 10 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
487 self.ignore_obiclean_type = 'singleton' |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
488 self.ignore_illuminapairend_type = 'pairend' |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
489 self.min_identity = 90 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
490 self.min_coverage = 50 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
491 self.ignore_seqids = 'BADSEQ' |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
492 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
493 args = Args() |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
494 process_single_file(args.input_anno, args.input_unanno, args, log_messages=[]) |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
495 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
496 with open(args.filtered_fasta) as f: |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
497 headers = [l.strip() for l in f if l.startswith(">")] |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
498 assert '>obicleanFail(10) count=10;' not in headers |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
499 assert '>pairendFail_PairEnd(10) count=10;' not in headers |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
500 assert len(headers) == 9, "FASTA filtering only applies to header-based rules" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
501 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
502 df = pd.read_excel(args.header_anno, sheet_name="Individual_Reads") |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
503 seq_ids = { |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
504 sid |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
505 for val in df["seq_id"] |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
506 for sid in (ast.literal_eval(val) if isinstance(val, str) else val) |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
507 } |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
508 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
509 expected = {'idGood.1', 'idGood', 'id7.1', 'idGood.2'} |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
510 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
511 assert seq_ids == expected, f"Expected surviving seq_ids {expected}, got {seq_ids}" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
512 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
513 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
514 def test_log_filters_count(self, processed_output): |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
515 """ |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
516 Verify that the BLAST filter counters in the log file match expected structure. |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
517 """ |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
518 with open(processed_output.anno_stats) as f: |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
519 log = f.read() |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
520 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
521 assert "=== PARAMETERS USED ===" in log |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
522 assert "input_anno:" in log |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
523 assert "input_unanno:" in log |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
524 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
525 assert "FASTA: total headers: 4" in log |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
526 assert "FASTA: headers kept after filters" in log |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
527 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
528 assert "BLAST: total hits read: 8" in log |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
529 assert "BLAST: hits kept after quality filters: 7" in log |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
530 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
531 assert "ANNOTATION: total FASTA headers considered: 4" in log |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
532 assert "ANNOTATION: reads with BLAST hits: 3" in log |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
533 assert "ANNOTATION: reads without BLAST hits: 1" in log |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
534 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
535 assert "E-value plot written to:" in log |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
536 assert "Taxa summary written to:" in log |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
537 assert "Header annotations written to:" in log |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
538 assert "Circle diagram JSON written to:" in log |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
539 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
540 assert "=== ANNOTATION STATISTICS ===" in log |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
541 assert "percentage_annotated: 75.0" in log |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
542 assert "unique_annotated: 150" in log |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
543 assert "total_unique: 200" in log |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
544 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
545 def test_missing_blast_file_graceful(self, test_data_dir): |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
546 """ |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
547 Crash / robustness test. |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
548 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
549 When the BLAST file does NOT exist, the processor should: |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
550 - not crash |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
551 - write an anno_stats log mentioning the error |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
552 - return without creating header_anno |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
553 """ |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
554 input_dir = test_data_dir / "input" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
555 output_dir = test_data_dir / "output" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
556 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
557 fasta = input_dir / "missing_blast_test.fasta" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
558 fasta.write_text(">read1(10) count=10;\nACGT\n") |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
559 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
560 missing_blast = input_dir / "this_file_does_not_exist.tabular" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
561 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
562 class Args: |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
563 def __init__(self): |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
564 self.input_anno = str(missing_blast) |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
565 self.input_unanno = str(fasta) |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
566 self.header_anno = str(output_dir / "missing_blast_header.xlsx") |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
567 self.filtered_fasta = str(output_dir / "missing_blast_filtered.fasta") |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
568 self.anno_stats = str(output_dir / "missing_blast_stats.txt") |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
569 self.eval_plot = None |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
570 self.taxa_output = None |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
571 self.circle_data = None |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
572 self.log = str(output_dir / "log.txt") |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
573 self.uncertain_threshold = 0.9 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
574 self.eval_threshold = 1e-10 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
575 self.use_counts = True |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
576 self.min_bitscore = 0 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
577 self.min_support = 1 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
578 self.ignore_rank = 'unknown' |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
579 self.ignore_taxonomy = 'environmental' |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
580 self.bitscore_perc_cutoff = 10 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
581 self.ignore_obiclean_type = 'singleton' |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
582 self.ignore_illuminapairend_type = 'pairend' |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
583 self.min_identity = 0 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
584 self.min_coverage = 0 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
585 self.ignore_seqids = '' |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
586 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
587 args = Args() |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
588 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
589 process_single_file(args.input_anno, args.input_unanno, args, log_messages=[]) |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
590 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
591 assert not os.path.exists(args.header_anno), "Header file should not be created when BLAST is missing" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
592 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
593 assert os.path.exists(args.anno_stats), "anno_stats log should be created on error" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
594 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
595 from pathlib import Path |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
596 log_text = Path(args.anno_stats).read_text() |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
597 assert "Error: Input file" in log_text, "Missing BLAST file error not logged" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
598 assert "Starting processing for FASTA" in log_text, "FASTA processing log missing" |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
599 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
600 |
|
9ca209477dfd
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents:
0
diff
changeset
|
601 if __name__ == "__main__": |
|
0
a3989edf0a4a
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
602 pytest.main([__file__]) |
