annotate tests/test_blast_annotations_processor.py @ 2:9ca209477dfd draft default tip

planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
author onnodg
date Mon, 15 Dec 2025 16:43:36 +0000
parents a3989edf0a4a
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
1 """
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
2 Test suite for BLAST annotation processor.
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
3 """
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
4 import re
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
5 import ast
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
6 import pytest
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
7 import os
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
8 import sys
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
9 import json
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
10 import pandas as pd
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
11 from pathlib import Path
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
12
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
13 # Add the module to path for importing
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
14 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
15 from Stage_1_translated.NLOOR_scripts.process_annotations_tool.blast_annotations_processor import (
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
16 process_single_file,
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
17 resolve_tax_majority,
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
18 TAXONOMIC_LEVELS,
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
19 check_header_string
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
20 )
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
21
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
22
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
23 class TestBlastAnnotationProcessor:
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
24 """Test class for BLAST annotation processor"""
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
25
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
26 @pytest.fixture(scope="class")
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
27 def test_data_dir(self):
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
28 """Setup test data directory structure"""
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
29 base_dir = Path("test-data")
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
30 base_dir.mkdir(exist_ok=True)
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
31
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
32 for subdir in ["input", "expected", "output"]:
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
33 (base_dir / subdir).mkdir(exist_ok=True)
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
34
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
35 return base_dir
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
36
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
37 @pytest.fixture(scope="class")
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
38 def sample_files(self, test_data_dir):
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
39 """Create sample input files for testing"""
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
40 input_dir = test_data_dir / "input"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
41
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
42 blast_content = """#Query ID #Subject #Subject accession #Subject Taxonomy ID #Identity percentage #Coverage #evalue #bitscore #Source #Taxonomy
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
43 read1(100) subject2 id2 subject2 90.0 95 1e-45 180 database1 Bacteria / Firmicutes / Bacilli / Bacillales / Bacillaceae / Bacillus / Bacillus_subtilis
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
44 read1(100) subject1 id1 subject1 95.889 100 1e-50 200 database1 Bacteria / Firmicutes / Bacilli / Bacillales / Bacillaceae / Bacillus / Bacillus_subtilis
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
45 read2(50) subject3 id3 subject3 85.0 90 1e-40 160 database2 Bacteria / Proteobacteria / Gammaproteobacteria / Enterobacterales / Enterobacteriaceae / Escherichia / Escherichia_coli
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
46 read3(25) subject4 id4 subject4 80.0 85 1e-35 140 database1 Archaea / Euryarchaeota / Methanobacteria / Methanobacteriales / Methanobacteriaceae / Methanobrevibacter / Methanobrevibacter_smithii
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
47 read4(25) subject4 id4 subject4 80.0 85 1e-35 140 database1 Archaea / Euryarchaeota / Methanobacteria / Methanobacteriales / Methanobacteriaceae / Methanobrevibacter / Methanobrevibacter_blabla
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
48 read4(25) subject4 id4.1 subject4 80.0 85 1e-40 140 database1 Archaea / Euryarchaeota / Methanobacteria / Methanobacteriales / Methanobacteriaceae / Methanobrevibacter / Methanobrevibacter_eclhi
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
49 read4(25) subject4 id4 subject4 80.0 85 1e-35 140 database1 Archaea / Euryarchaeota / Methanobacteria / Methanobacteriales / Methanobacteriaceae / Methanobrevibacter / Methanobrevibacter_elchi
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
50 read4(25) subject4 id4.2 subject4 90.0 87 1e-50 160 database1 Archaea / Euryarchaeota / Methanobacteria / Methanobacteriales / Methanobacteriaceae / Methanobrevibacter / Methanobrevibacter_smithii
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
51 """
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
52
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
53 fasta_content = """>read1(100) count=100;
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
54 ATCGATCGATCGATCG
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
55 >read2(50) count=50;
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
56 GCTAGCTAGCTAGCTA
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
57 >read3(25) count=25;
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
58 TGACTGACTGACTGAC
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
59 >read4(25) count=25;
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
60 TGAAAAAAACACCAC
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
61 """
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
62
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
63 blast_file = input_dir / "test_blast.tabular"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
64 fasta_file = input_dir / "test_sequences.fasta"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
65
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
66 with open(blast_file, 'w') as f:
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
67 f.write(blast_content)
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
68 with open(fasta_file, 'w') as f:
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
69 f.write(fasta_content)
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
70
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
71 return {
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
72 'blast': str(blast_file),
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
73 'fasta': str(fasta_file)
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
74 }
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
75
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
76 @pytest.fixture(scope="class")
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
77 def processed_output(self, test_data_dir, sample_files):
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
78 """Run the processor on sample files and return output paths"""
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
79 output_dir = test_data_dir / "output"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
80
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
81 class Args:
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
82 def __init__(self):
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
83 self.input_anno = sample_files['blast']
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
84 self.input_unanno = sample_files['fasta']
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
85 self.eval_plot = str(output_dir / "eval_plot.png")
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
86 self.taxa_output = str(output_dir / "taxa_output.txt")
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
87 self.circle_data = str(output_dir / "circle_data.json")
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
88 self.header_anno = str(output_dir / "header_anno.xlsx")
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
89 self.anno_stats = str(output_dir / "anno_stats.txt")
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
90 self.filtered_fasta = str(output_dir / "filtered_fasta.fasta")
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
91 self.log = str(output_dir / "log.txt")
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
92 self.uncertain_threshold = 0.9
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
93 self.eval_threshold = 1e-10
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
94 self.min_bitscore = 60
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
95 self.min_support = 1
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
96 self.ignore_rank = 'unknown'
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
97 self.ignore_taxonomy = 'environmental'
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
98 self.bitscore_perc_cutoff = 10
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
99 self.ignore_obiclean_type ='singleton'
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
100 self.ignore_illuminapairend_type = 'pairend'
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
101 self.min_identity = 70
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
102 self.min_coverage = 70
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
103 self.ignore_seqids = ''
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
104 self.use_counts = True
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
105
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
106 args = Args()
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
107 log_messages = []
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
108 process_single_file(args.input_anno, args.input_unanno, args, log_messages)
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
109
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
110 return args
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
111
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
112
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
113 def test_read_count_consistency(self, processed_output):
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
114 """
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
115 Test 1: Read Count Consistency
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
116
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
117 Verifies that read counts from FASTA headers are correctly preserved
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
118 and aggregated in all output files.
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
119 """
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
120 df = pd.read_excel(processed_output.header_anno, sheet_name='Individual_Reads')
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
121 expected_counts = {'read1': 100, 'read2': 50, 'read3': 25, 'read4':25}
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
122
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
123 skipped_reads = []
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
124
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
125 for read_name, expected_count in expected_counts.items():
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
126 subset = df.loc[df['header'] == read_name]
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
127 if subset.empty:
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
128 skipped_reads.append(read_name) # remember we skip this read
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
129 continue
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
130 row = subset.iloc[0]
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
131 assert row['count'] == expected_count, f"Count mismatch for {read_name}"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
132
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
133 with open(processed_output.anno_stats, 'r') as f:
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
134 stats_content = f.read()
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
135 # Total unique count should be 175 (100+50+25)
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
136 assert 'total_unique: 200' in stats_content, "Total unique count incorrect in stats"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
137 if skipped_reads:
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
138 assert all(read not in df['header'].values for read in skipped_reads)
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
139 print("✓ Test 1 PASSED: Read counts consistent across all outputs")
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
140
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
141 def test_lowest_common_ancester(self, processed_output):
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
142 """
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
143 Test 2: Big Input Files
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
144
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
145 Tests the functioning of lowest common ancestor selection with realistic inputfile sizes
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
146 """
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
147 test_conflicts = {
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
148 'Viridiplantae / Streptophyta / Magnoliopsida / Asterales / Asteraceae / Cicerbita / Cicerbita a': 10,
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
149 'Viridiplantae / Streptophyta / Magnoliopsida / Asterales / Asteraceae / Cicerbita / Cicerbita b': 1,
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
150 'Viridiplantae / Streptophyta / Magnoliopsida / Asterales / Asteraceae / Cicerbita / Cicerbita c': 1,
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
151 'Viridiplantae / Streptophyta / Magnoliopsida / Asterales / Asteraceae / Cicerbita / Cicerbita d': 1,
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
152 'Viridiplantae / Streptophyta / Magnoliopsida / Asterales / Asteraceae / Cicerbita / Cicerbita e': 1,
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
153 'Viridiplantae / Streptophyta / Magnoliopsida / Asterales / Asteraceae / Ciceronia / Ciceronia a': 450,
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
154 'Viridiplantae / Streptophyta / Magnoliopsida / Asterales / Asteraceae / Ciceronia / Ciceronia b': 2,
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
155 'Viridiplantae / Streptophyta / Magnoliopsida / Asterales / Asteraceae / Ciceronia / Ciceronia c': 2,
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
156 'Viridiplantae / Streptophyta / Magnoliopsida / Asterales / Asteraceae / Ciceronia / Ciceronia d': 2,
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
157 'Viridiplantae / Streptophyta / Magnoliopsida / Asterales / Asteraceae / Ciceronia / Ciceronia e': 2,
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
158 'Viridiplantae / Streptophyta / Magnoliopsida / Asterales / Asteraceae / Ciceronia / Ciceronia f': 12,
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
159 'Viridiplantae / Streptophyta / Bryopsida / Funariales / Funariaceae / Funaria / Uncertain taxa': 6
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
160 }
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
161 resolved_short1, resolved_long1 = resolve_tax_majority(test_conflicts, 0.9)
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
162 assert 'Ciceronia a' in resolved_short1, "Conflict not resolved to uncertain taxa"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
163
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
164 test_90_precent_conflicts = {
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
165 'Viridiplantae / Streptophyta / Magnoliopsida / Asterales / Asteraceae / Cicerbita / Cicerbita a': 90,
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
166 'Viridiplantae / Streptophyta / Magnoliopsida / Asterales / Asteraceae / Cicerbita / Cicerbita b': 10,
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
167 'Viridiplantae / Streptophyta / Bryopsida / Funariales / Funariaceae / Funaria / Uncertain taxa': 6
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
168 }
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
169 resolved_short, resolved_long = resolve_tax_majority(test_90_precent_conflicts, 0.9)
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
170 assert 'Viridiplantae / Streptophyta / Uncertain taxa' in resolved_long, "Conflict not resolved to uncertain taxa"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
171
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
172 print("✓ Test 2 PASSED: Lowest common ancestor works correctly")
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
173
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
174
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
175 def test_taxonomic_conflict_resolution(self, processed_output):
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
176 """
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
177 Test 3: Taxonomic Conflict Resolution
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
178
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
179 Tests the uncertainty threshold mechanism for resolving taxonomic conflicts.
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
180 Uses a controlled scenario where multiple hits have different taxa.
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
181 """
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
182 test_conflicts = {
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
183 'Bacteria / Firmicutes / Bacilli': 2,
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
184 'Bacteria / Proteobacteria / Gammaproteobacteria': 1
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
185 }
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
186
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
187 resolved_short, resolved_long = resolve_tax_majority(test_conflicts, 0.9)
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
188
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
189 # With threshold 0.9, should resolve to most common (2/3 = 0.67 < 0.9, so uncertain)
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
190 assert 'Uncertain taxa' in resolved_short, "Conflict not resolved to uncertain taxa"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
191
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
192 test_high_confidence = {
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
193 'Bacteria / Firmicutes / Bacilli': 9,
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
194 'Bacteria / Proteobacteria / Gammaproteobacteria': 1
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
195 }
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
196
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
197 resolved_short, resolved_long = resolve_tax_majority(test_high_confidence, 0.9)
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
198 assert 'Firmicutes' in resolved_short, "High confidence case not resolved correctly"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
199
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
200 print("✓ Test 3 PASSED: Taxonomic conflict resolution working correctly")
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
201
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
202 def test_output_file_structures(self, processed_output):
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
203 """
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
204 Test 4: Output File Structure Validation
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
205
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
206 Verifies that all output files are created with correct structure and format.
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
207 """
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
208 excel_file = processed_output.header_anno
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
209 assert os.path.exists(excel_file), "Excel output file not created"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
210
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
211 xl_file = pd.ExcelFile(excel_file)
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
212 expected_sheets = ['Individual_Reads', 'Merged_by_Taxa']
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
213 assert all(sheet in xl_file.sheet_names for sheet in expected_sheets), "Missing Excel sheets"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
214
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
215 df_individual = pd.read_excel(excel_file, sheet_name='Individual_Reads')
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
216 expected_cols = ['header', 'seq_id', 'source', 'count', 'taxa', 'kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species']
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
217 assert all(col in df_individual.columns for col in expected_cols), "Missing columns in Individual_Reads"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
218
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
219 with open(processed_output.taxa_output, 'r') as f:
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
220 taxa_lines = f.readlines()
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
221 assert len(taxa_lines) == 2, "Taxa output too short"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
222 assert 'percentage_rooted\tnumber_rooted' in taxa_lines[1], "Taxa output header incorrect"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
223
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
224 with open(processed_output.anno_stats, 'r') as f:
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
225 anno_stats = f.readlines()
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
226 assert 'FASTA: headers kept after filters and min_support=1: 4\n' in anno_stats, "Taxa output header incorrect"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
227 filter_f = 4
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
228
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
229
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
230 with open(processed_output.circle_data, 'r') as f:
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
231 circle_data = json.load(f)
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
232
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
233 assert isinstance(circle_data, list), "Circle data should be a list"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
234 assert len(circle_data) == len(TAXONOMIC_LEVELS), "Circle data should have entry per taxonomic level"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
235
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
236 with open(processed_output.filtered_fasta, 'r') as f:
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
237 filtered_fasta = f.readlines()
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
238 assert len(filtered_fasta) == filter_f * 2
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
239
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
240 print("✓ Test 4 PASSED: All output files have correct structure")
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
241
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
242
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
243
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
244 def test_header_synchronization(self, test_data_dir):
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
245 """
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
246 Test 5: Header Synchronization Between Files
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
247
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
248 Tests that the processor correctly handles mismatched headers between
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
249 annotated and unannotated files.
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
250 """
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
251 input_dir = test_data_dir / "input"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
252 output_dir = test_data_dir / "output"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
253
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
254 # Create mismatched files
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
255 blast_content = """#Query ID #Subject #Subject accession #Subject Taxonomy ID #Identity percentage #Coverage #evalue #bitscore #Source #Taxonomy
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
256 read1(100) source=NCBI sequenceID=KR738003 superkingdom=Eukaryota kingdom=Viridiplantae phylum=Streptophyta subphylum=Streptophytina class=Magnoliopsida subclass=NA infraclass=NA order=Malvales suborder=NA infraorder=NA superfamily=NA family=Malvaceae genus=Hibiscus species=Hibiscus trionum markercode=trnL lat=0.304 lon=36.87 source=NCBI N/A 100.000 100 7.35e-30 54.7 Viridiplantae / Streptophyta / Magnoliopsida / Malvales / Malvaceae / Hibiscus / Hibiscus trionum
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
257 read1(100) source=NCBI sequenceID=KR738670 superkingdom=Eukaryota kingdom=Viridiplantae phylum=Streptophyta subphylum=Streptophytina class=Magnoliopsida subclass=NA infraclass=NA order=Malvales suborder=NA infraorder=NA superfamily=NA family=Malvaceae genus=Hibiscus species=Hibiscus trionum markercode=trnL lat=0.304 lon=36.87 source=NCBI N/A 100.000 100 7.35e-14 54.7 Viridiplantae / Streptophyta / Magnoliopsida / Malvales / Malvaceae / Hibiscus / Hibiscus trionum
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
258 read2.1(50) 1 2 3 4 5 6 7 8 9
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
259 read3(25) source=NCBI sequenceID=KR737595 superkingdom=Eukaryota kingdom=Viridiplantae phylum=Streptophyta subphylum=Streptophytina class=Magnoliopsida subclass=NA infraclass=NA order=Malvales suborder=NA infraorder=NA superfamily=NA family=Malvaceae genus=Hibiscus species=Hibiscus trionum markercode=trnL lat=0.304 lon=36.87 source=NCBI N/A 97.561 87 1.68e-14 71.3 Viridiplantae / Streptophyta / Magnoliopsida / Malvales / Malvaceae / Hibiscus / Hibiscus trionum
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
260 """
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
261
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
262 fasta_content = """>read1(100) count=100;
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
263 ATCG
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
264 >read2(50) merged_sample={}; count=1011; direction=right; seq_b_insertion=0; sminR=40.0; ali_length=53; seq_b_deletion=248; seq_a_deletion=248; seq_a_insertion=0; mode=alignment; sminL=40.0; seq_a_single=0; seq_b_single=0;
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
265 gggcaatcctgagccaagtgactggagttcagataggtgcagagactcaatgg
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
266 >read3(25) merged_sample={}; count=179; direction=right; sminR=40.0; ali_length=49; seq_b_deletion=252; seq_a_deletion=252; seq_b_insertion=0; seq_a_insertion=0; mode=alignment; sminL=40.0; seq_a_single=0; seq_b_single=0;
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
267 gggcaatcctgagccaactggagttcagataggtgcagagactcaatgg
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
268 """
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
269
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
270 blast_file = input_dir / "test_sync.tabular"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
271 fasta_file = input_dir / "test_sync.fasta"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
272
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
273 with open(blast_file, 'w') as f:
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
274 f.write(blast_content)
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
275 with open(fasta_file, 'w') as f:
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
276 f.write(fasta_content)
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
277
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
278 class Args:
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
279 def __init__(self):
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
280 self.input_anno = blast_file
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
281 self.input_unanno = fasta_file
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
282 self.header_anno = "Stage_1_translated/NLOOR_scripts/process_annotations_tool/test-data/sync_test.xlsx"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
283 self.eval_plot = None
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
284 self.taxa_output = None
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
285 self.circle_data = None
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
286 self.filtered_fasta = str(output_dir / "filtered_fasta.fasta")
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
287 self.anno_stats = str(output_dir / "sync_stats.txt")
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
288 self.log = str(output_dir / "log.txt")
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
289 self.uncertain_threshold = 0.9
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
290 self.eval_threshold = 1e-10
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
291 self.use_counts = True
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
292 self.min_bitscore = 50
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
293 self.min_support = 1
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
294 self.ignore_rank = 'unknown'
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
295 self.ignore_taxonomy = 'environmental'
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
296 self.bitscore_perc_cutoff = 10
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
297 self.ignore_obiclean_type = 'singleton'
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
298 self.ignore_illuminapairend_type = 'pairend'
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
299 self.min_identity = 30
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
300 self.min_coverage = 30
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
301 self.ignore_seqids = ''
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
302
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
303 args = Args()
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
304 process_single_file(args.input_anno, args.input_unanno, args, log_messages=[])
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
305 df = pd.read_excel(args.header_anno, sheet_name='Individual_Reads')
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
306 extracted = df['header'].str.extract(r'(read\d+)')
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
307 headers = extracted[0].tolist()
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
308 # Should have read1 and read3, read2 should be skipped
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
309 assert 'read1' in headers, "read1 should be present"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
310 assert 'read2' not in headers, "read2 should not be present"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
311 assert 'read2.1' not in headers, "read2 should not be present"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
312 assert 'read3' in headers, "read3 should be present"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
313
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
314 print("✓ Test 5 PASSED: Header synchronization handled correctly")
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
315
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
316 def test_check_header_string_all_behaviors(self):
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
317 from Stage_1_translated.NLOOR_scripts.process_annotations_tool.blast_annotations_processor import \
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
318 check_header_string
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
319
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
320 # clean header — allowed
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
321 assert check_header_string(">readA count=10;", "", "") is True
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
322 # blocks singleton
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
323 assert check_header_string(">r obiclean_status={'XXX': 's'}", "singleton", "") is False
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
324 # blocks variant
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
325 assert check_header_string(">r obiclean_status={'XXX': 'i'}", "variant", "") is False
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
326 # blocks head
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
327 assert check_header_string(">r obiclean_status={'XXX': 'h'}", "head", "") is False
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
328 # blocks pairend
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
329 assert check_header_string(">r PairEnd", "pairend", "") is False
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
330 # blocks consensus
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
331 assert check_header_string(">r CONS", "consensus", "") is False
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
332 # blocks custom string
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
333 assert check_header_string(">r FooBar", "FooBar", "") is False
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
334 # blocks when string is in second param
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
335 assert check_header_string(">r blah", "", "blah") is False
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
336 # blocks when multiple ignore values contain it
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
337 assert check_header_string(">r PairEnd obiclean_status={'XXX': 's'}", "pairend,singleton", "") is False
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
338 # allows when no match
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
339 assert check_header_string(">r something", "pairend", "") is True
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
340
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
341 def test_excel_merged_vs_individual(self, processed_output):
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
342 """
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
343 Test 6: Excel Merged vs Individual Sheet Consistency
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
344
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
345 Verifies that the merged sheet correctly aggregates data from the individual sheet.
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
346 """
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
347 df_individual = pd.read_excel(processed_output.header_anno, sheet_name='Individual_Reads')
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
348 df_merged = pd.read_excel(processed_output.header_anno, sheet_name='Merged_by_Taxa')
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
349
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
350 individual_taxa = df_individual['taxa'].nunique()
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
351
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
352 assert len(df_merged) == individual_taxa, "Merged sheet doesn't match unique taxa count"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
353
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
354 # Check that counts are properly aggregated
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
355 # For taxa with multiple reads, counts should be summed
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
356 for _, merged_row in df_merged.iterrows():
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
357 taxa = merged_row['taxa']
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
358 individual_rows = df_individual[df_individual['taxa'] == taxa]
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
359
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
360 expected_count = individual_rows['count'].sum()
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
361 actual_count = merged_row['count']
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
362
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
363 assert actual_count == expected_count, f"Count mismatch for taxa {taxa}: expected {expected_count}, got {actual_count}"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
364
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
365 print("✓ Test 6 PASSED: Excel merged sheet correctly aggregates individual data")
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
366
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
367 def test_annotation_statistics_accuracy(self, processed_output, sample_files):
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
368 """
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
369 Test 7: Annotation Statistics Accuracy
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
370
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
371 Verifies that calculated annotation statistics match the actual data.
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
372 Adapted for the new plain-text log file instead of tab-separated output.
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
373 """
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
374 stats = {}
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
375
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
376 with open(processed_output.anno_stats, 'r') as f:
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
377 lines = f.readlines()
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
378
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
379 for line in lines:
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
380 line = line.strip()
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
381 if not line or ":" not in line:
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
382 continue
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
383
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
384 key, value = line.split(":", 1)
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
385 key = key.strip()
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
386 value = value.strip()
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
387
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
388 try:
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
389 stats[key] = float(value)
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
390 except ValueError:
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
391 stats[key] = value
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
392
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
393 assert stats["total_sequences"] == 4.0, "Total sequences count incorrect"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
394 assert stats["annotated_sequences"] == 3.0, "Annotated sequence count incorrect"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
395 assert stats["total_unique"] == 200.0, "Total unique count incorrect"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
396 assert stats["unique_annotated"] == 150.0, "Unique annotated count incorrect"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
397 assert stats["percentage_annotated"] == 75.0, "Percentage annotated incorrect"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
398 assert stats["percentage_unique_annotated"] == 75.0, "Percentage unique annotated incorrect"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
399
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
400 print("✓ Test 7 PASSED: Annotation statistics are accurate")
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
401
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
402 def test_combined_all_filters(self, test_data_dir):
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
403 """
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
404 Single integrated test that validates all FASTA + BLAST filter rules.
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
405 Every read is designed to fail exactly one filter, except readOK.
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
406 """
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
407 input_dir = test_data_dir / "input"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
408 output_dir = test_data_dir / "output"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
409
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
410 fasta = input_dir / "combined_filters.fasta"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
411 blast = input_dir / "combined_filters.tabular"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
412
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
413 fasta.write_text(
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
414 ">lowSupport(1) count=1;\nACGT\n"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
415 ">obicleanFail(10) count=10; obiclean_status={'XXX': 's'};\nACGT\n"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
416 ">pairendFail_PairEnd(10) count=10;\nACGT\n"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
417 ">identityFail(10) count=10;\nACGT\n"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
418 ">coverageFail(10) count=10;\nACGT\n"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
419 ">bitscoreFail(10) count=10;\nACGT\n"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
420 ">bscutoffHigh(10) count=10;\nACGT\n"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
421 ">envTaxFail(10) count=10;\nACGT\n"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
422 ">rankFail(10) count=10;\nACGT\n"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
423 ">seqidFail(10) count=10;\nACGT\n"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
424 ">readOK(10) count=10;\nACGT\n"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
425 ">readOK_multiple_id(10) count=10;\nACGT\n"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
426 )
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
427
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
428 blast.write_text(
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
429 # min_support (count=1 < 5)
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
430 "lowSupport(1)\ts\tid1\t123\t99\t99\t1e-50\t200\tsrc\tA / B / C / D / E / F / G\n"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
431
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
432 # ignore_obiclean_type = singleton
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
433 "obicleanFail(10)\ts\tid2\t123\t99\t99\t1e-50\t200\tsrc\tA / B / C / D / E / F / G\n"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
434
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
435 # ignore_illuminapairedend_type = pairend
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
436 "pairendFail_PairEnd(10)\ts\tid3\t123\t99\t99\t1e-50\t200\tsrc\tA / B / C / D / E / F / G\n"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
437
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
438 # min_identity = 90 → identity = 50 fails
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
439 "identityFail(10)\ts\tid4\t123\t50\t99\t1e-50\t200\tsrc\tA / B / C / D / E / F / G\n"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
440
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
441 # min_coverage = 50 → coverage = 20 fails
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
442 "coverageFail(10)\ts\tid5\t123\t99\t20\t1e-50\t200\tsrc\tA / B / C / D / E / F / G\n"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
443
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
444 # min_bitscore = 60 → bitscore = 10 fails
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
445 "bitscoreFail(10)\ts\tid6\t123\t99\t99\t1e-50\t10\tsrc\tA / B / C / D / E / F / G\n"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
446
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
447 # bitscore_perc_cutoff: best = 200 → cutoff = 180 → bitscore 150 fails
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
448 "bscutoffHigh(10)\ts\tid7.1\t123\t99\t99\t1e-50\t200\tsrc\tA / B / C / D / E / F / G.1\n"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
449 "bscutoffHigh(10)\ts\tid7.2\t123\t99\t99\t1e-50\t150\tsrc\tA / B / C / D / E / F / G.2\n"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
450
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
451 # ignore_taxonomy = 'environmental'
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
452 "envTaxFail(10)\ts\tid8\t123\t99\t99\t1e-50\t200\tsrc\tEnvironmental / B / C / D / E / F / G\n"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
453
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
454 # ignore_rank = 'unknown'
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
455 "rankFail(10)\ts\tid9\t123\t99\t99\t1e-50\t200\tsrc\tUnknown / B / C / D / E / F / G\n"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
456
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
457 # ignore_seqids = BADSEQ
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
458 "seqidFail(10)\ts\tBADSEQ\t123\t99\t99\t1e-50\t200\tsrc\tA / B / C / D / E / F / G\n"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
459
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
460 # readOK (valid, full taxonomy)
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
461 "readOK(10)\ts\tidGood\t123\t99\t99\t1e-50\t200\tsrc\tA / B / C / D / E / F / G\n"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
462
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
463 # readOK_multiple_id (valid, full taxonomy, multiple id's)
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
464 "readOK_multiple_id(10)\ts\tidGood.1\t123\t99\t99\t1e-50\t200\tsrc\tA / B / C / D / E / F / G\n"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
465 "readOK_multiple_id(10)\ts\tidGood.2\t123\t99\t99\t1e-50\t200\tsrc\tA / B / C / D / E / F / G\n"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
466 )
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
467
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
468 class Args:
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
469 def __init__(self):
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
470 self.input_anno = str(blast)
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
471 self.input_unanno = str(fasta)
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
472 self.header_anno = str(output_dir / "combined.xlsx")
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
473 self.filtered_fasta = str(output_dir / "combined.fasta")
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
474 self.anno_stats = str(output_dir / "combined_stats.txt")
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
475 self.eval_plot = None
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
476 self.taxa_output = None
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
477 self.circle_data = None
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
478 self.log = str(output_dir / "log.txt")
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
479 self.uncertain_threshold = 0.9
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
480 self.eval_threshold = 1e-10
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
481 self.use_counts = True
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
482 self.min_bitscore = 60
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
483 self.min_support = 5
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
484 self.ignore_rank = 'unknown'
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
485 self.ignore_taxonomy = 'environmental'
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
486 self.bitscore_perc_cutoff = 10
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
487 self.ignore_obiclean_type = 'singleton'
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
488 self.ignore_illuminapairend_type = 'pairend'
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
489 self.min_identity = 90
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
490 self.min_coverage = 50
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
491 self.ignore_seqids = 'BADSEQ'
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
492
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
493 args = Args()
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
494 process_single_file(args.input_anno, args.input_unanno, args, log_messages=[])
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
495
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
496 with open(args.filtered_fasta) as f:
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
497 headers = [l.strip() for l in f if l.startswith(">")]
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
498 assert '>obicleanFail(10) count=10;' not in headers
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
499 assert '>pairendFail_PairEnd(10) count=10;' not in headers
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
500 assert len(headers) == 9, "FASTA filtering only applies to header-based rules"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
501
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
502 df = pd.read_excel(args.header_anno, sheet_name="Individual_Reads")
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
503 seq_ids = {
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
504 sid
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
505 for val in df["seq_id"]
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
506 for sid in (ast.literal_eval(val) if isinstance(val, str) else val)
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
507 }
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
508
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
509 expected = {'idGood.1', 'idGood', 'id7.1', 'idGood.2'}
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
510
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
511 assert seq_ids == expected, f"Expected surviving seq_ids {expected}, got {seq_ids}"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
512
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
513
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
514 def test_log_filters_count(self, processed_output):
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
515 """
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
516 Verify that the BLAST filter counters in the log file match expected structure.
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
517 """
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
518 with open(processed_output.anno_stats) as f:
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
519 log = f.read()
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
520
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
521 assert "=== PARAMETERS USED ===" in log
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
522 assert "input_anno:" in log
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
523 assert "input_unanno:" in log
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
524
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
525 assert "FASTA: total headers: 4" in log
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
526 assert "FASTA: headers kept after filters" in log
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
527
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
528 assert "BLAST: total hits read: 8" in log
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
529 assert "BLAST: hits kept after quality filters: 7" in log
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
530
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
531 assert "ANNOTATION: total FASTA headers considered: 4" in log
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
532 assert "ANNOTATION: reads with BLAST hits: 3" in log
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
533 assert "ANNOTATION: reads without BLAST hits: 1" in log
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
534
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
535 assert "E-value plot written to:" in log
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
536 assert "Taxa summary written to:" in log
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
537 assert "Header annotations written to:" in log
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
538 assert "Circle diagram JSON written to:" in log
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
539
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
540 assert "=== ANNOTATION STATISTICS ===" in log
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
541 assert "percentage_annotated: 75.0" in log
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
542 assert "unique_annotated: 150" in log
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
543 assert "total_unique: 200" in log
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
544
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
545 def test_missing_blast_file_graceful(self, test_data_dir):
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
546 """
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
547 Crash / robustness test.
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
548
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
549 When the BLAST file does NOT exist, the processor should:
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
550 - not crash
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
551 - write an anno_stats log mentioning the error
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
552 - return without creating header_anno
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
553 """
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
554 input_dir = test_data_dir / "input"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
555 output_dir = test_data_dir / "output"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
556
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
557 fasta = input_dir / "missing_blast_test.fasta"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
558 fasta.write_text(">read1(10) count=10;\nACGT\n")
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
559
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
560 missing_blast = input_dir / "this_file_does_not_exist.tabular"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
561
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
562 class Args:
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
563 def __init__(self):
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
564 self.input_anno = str(missing_blast)
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
565 self.input_unanno = str(fasta)
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
566 self.header_anno = str(output_dir / "missing_blast_header.xlsx")
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
567 self.filtered_fasta = str(output_dir / "missing_blast_filtered.fasta")
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
568 self.anno_stats = str(output_dir / "missing_blast_stats.txt")
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
569 self.eval_plot = None
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
570 self.taxa_output = None
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
571 self.circle_data = None
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
572 self.log = str(output_dir / "log.txt")
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
573 self.uncertain_threshold = 0.9
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
574 self.eval_threshold = 1e-10
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
575 self.use_counts = True
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
576 self.min_bitscore = 0
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
577 self.min_support = 1
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
578 self.ignore_rank = 'unknown'
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
579 self.ignore_taxonomy = 'environmental'
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
580 self.bitscore_perc_cutoff = 10
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
581 self.ignore_obiclean_type = 'singleton'
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
582 self.ignore_illuminapairend_type = 'pairend'
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
583 self.min_identity = 0
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
584 self.min_coverage = 0
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
585 self.ignore_seqids = ''
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
586
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
587 args = Args()
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
588
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
589 process_single_file(args.input_anno, args.input_unanno, args, log_messages=[])
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
590
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
591 assert not os.path.exists(args.header_anno), "Header file should not be created when BLAST is missing"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
592
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
593 assert os.path.exists(args.anno_stats), "anno_stats log should be created on error"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
594
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
595 from pathlib import Path
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
596 log_text = Path(args.anno_stats).read_text()
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
597 assert "Error: Input file" in log_text, "Missing BLAST file error not logged"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
598 assert "Starting processing for FASTA" in log_text, "FASTA processing log missing"
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
599
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
600
9ca209477dfd planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
onnodg
parents: 0
diff changeset
601 if __name__ == "__main__":
0
a3989edf0a4a planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff changeset
602 pytest.main([__file__])