Mercurial > repos > onnodg > add_taxonomic_labels
annotate tests/test_add_taxonomic_labels.py @ 1:5155c1c41198 draft default tip
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit d771f9fbfd42bcdeda1623d954550882a0863847-dirty
| author | onnodg |
|---|---|
| date | Mon, 20 Oct 2025 12:25:29 +0000 |
| parents | abd214795fa5 |
| children |
| rev | line source |
|---|---|
|
0
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
1 """ |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
2 Test suite for add taxonomic labels tool. |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
3 """ |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
4 |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
5 import pytest |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
6 import tempfile |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
7 import os |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
8 |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
9 from Stage_1_translated.NLOOR_scripts.add_header_tool.add_taxonomic_labels import add_labels |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
10 |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
11 |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
12 class TestTaxonomyProcessing: |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
13 """Test the main taxonomy processing functionality.""" |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
14 |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
15 @pytest.fixture |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
16 def sample_input_data(self): |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
17 """Provide sample input data for testing.""" |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
18 return """#Query ID #Subject #Subject accession #Subject Taxonomy ID #Identity percentage #Coverage #evalue #bitscore #Source #Taxonomy |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
19 M01687:476:000000000-LL5F5:1:1101:20413:7447_PairEnd(1) source=NCBI sequenceID=HM590330 superkingdom=Eukaryota kingdom=Viridiplantae phylum=Streptophyta subphylum=Streptophytina class=Magnoliopsida subclass=NA infraclass=NA order=Malpighiales suborder=NA infraorder=NA superfamily=NA family=Salicaceae genus=Populus species=Populus tremula markercode=trnL lat=50.47 lon=-104.37 source=NCBI N/A 100.000 91 8.47e-10 54.7 Genbank unknown kingdom / unknown phylum / unknown class / unknown order / unknown family / unknown genus / unknown species |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
20 M01687:476:000000000-LL5F5:1:2115:26447:7735_CONS(16) source=NCBI sequenceID=KC539736 superkingdom=Eukaryota kingdom=Viridiplantae phylum=Streptophyta subphylum=Streptophytina class=Magnoliopsida subclass=NA infraclass=NA order=Rosales suborder=NA infraorder=NA superfamily=NA family=Ulmaceae genus=Ulmus species=Ulmus laevis markercode=trnL lat=NA lon=NA source=NCBI N/A 100.000 89 1.44e-38 152 Genbank unknown kingdom / unknown phylum / unknown class / unknown order / unknown family / unknown genus / unknown species""" |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
21 |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
22 @pytest.fixture |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
23 def temp_files(self): |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
24 """Create temporary input and output files.""" |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
25 input_file = tempfile.NamedTemporaryFile(mode='w', delete=False) |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
26 input_file.close() |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
27 output_file = tempfile.NamedTemporaryFile(mode='w', delete=False) |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
28 output_file.close() |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
29 |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
30 yield input_file.name, output_file.name |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
31 |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
32 # Cleanup |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
33 os.unlink(input_file.name) |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
34 os.unlink(output_file.name) |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
35 |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
36 def test_header_creation(self, temp_files): |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
37 """Test that the correct header is written to output file.""" |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
38 input_file, output_file = temp_files |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
39 |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
40 # Create empty input file |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
41 with open(input_file, 'w') as f: |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
42 f.write("#Query ID header line\n") |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
43 |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
44 add_labels(input_file, output_file, [1, 2, 4, 7, 11, 12, 13]) |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
45 |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
46 with open(output_file, 'r') as f: |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
47 header = f.readline().strip() |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
48 |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
49 expected_header = '#Query ID\t#Subject\t#Subject accession\t#Subject Taxonomy ID\t#Identity percentage\t#Coverage\t#evalue\t#bitscore\t#Source\t#Taxonomy' |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
50 assert header == expected_header |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
51 |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
52 def test_basic_taxonomy_processing(self, temp_files, sample_input_data): |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
53 """Test basic taxonomy line processing.""" |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
54 input_file, output_file = temp_files |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
55 |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
56 with open(input_file, 'w') as f: |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
57 f.write(sample_input_data) |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
58 |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
59 add_labels(input_file, output_file, [1, 2, 4, 7, 11, 12, 13]) # kingdom, phylum, species |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
60 |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
61 with open(output_file, 'r') as f: |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
62 lines = f.readlines() |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
63 |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
64 # Should have header + 2 data lines |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
65 assert len(lines) == 3 |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
66 |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
67 # Check first data line |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
68 first_data_line = lines[1].strip() |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
69 assert 'M01687:476:000000000-LL5F5:1:1101:20413:7447_PairEnd(1) source=NCBI sequenceID=HM590330 superkingdom=Eukaryota kingdom=Viridiplantae phylum=Streptophyta subphylum=Streptophytina class=Magnoliopsida subclass=NA infraclass=NA order=Malpighiales suborder=NA infraorder=NA superfamily=NA family=Salicaceae genus=Populus species=Populus tremula markercode=trnL lat=50.47 lon=-104.37 source=NCBI N/A 100.000 91 8.47e-10 54.7 NCBI Viridiplantae / Streptophyta / Magnoliopsida / Malpighiales / Salicaceae / Populus / Populus tremula' in first_data_line |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
70 |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
71 # Check second data line |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
72 second_data_line = lines[2].strip() |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
73 assert 'M01687:476:000000000-LL5F5:1:2115:26447:7735_CONS(16) source=NCBI sequenceID=KC539736 superkingdom=Eukaryota kingdom=Viridiplantae phylum=Streptophyta subphylum=Streptophytina class=Magnoliopsida subclass=NA infraclass=NA order=Rosales suborder=NA infraorder=NA superfamily=NA family=Ulmaceae genus=Ulmus species=Ulmus laevis markercode=trnL lat=NA lon=NA source=NCBI N/A 100.000 89 1.44e-38 152 NCBI Viridiplantae / Streptophyta / Magnoliopsida / Rosales / Ulmaceae / Ulmus / Ulmus laevis' in second_data_line |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
74 |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
75 def test_single_taxon_level(self, temp_files): |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
76 """Test processing with only one taxonomic level.""" |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
77 input_file, output_file = temp_files |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
78 |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
79 test_data = """#Query ID #Subject #Subject accession #Subject Taxonomy ID #Identity percentage #Coverage #evalue #bitscore #Source #Taxonomy |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
80 M01687:476:000000000-LL5F5:1:1101:20413:7447_PairEnd(1) source=NCBI sequenceID=HM590330 superkingdom=Eukaryota kingdom=Viridiplantae phylum=Streptophyta subphylum=Streptophytina class=Magnoliopsida subclass=NA infraclass=NA order=Malpighiales suborder=NA infraorder=NA superfamily=NA family=Salicaceae genus=Populus species=Populus tremula markercode=trnL lat=50.47 lon=-104.37 source=NCBI N/A 100.000 91 8.47e-10 54.7 Genbank unknown kingdom / unknown phylum / unknown class / unknown order / unknown family / unknown genus / unknown species""" |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
81 with open(input_file, 'w') as f: |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
82 f.write(test_data) |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
83 |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
84 add_labels(input_file, output_file, [13]) |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
85 |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
86 with open(output_file, 'r') as f: |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
87 lines = f.readlines() |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
88 |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
89 data_line = lines[1].strip() |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
90 assert data_line.endswith('Populus tremula') # Should not have ' / ' since it's the last level |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
91 |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
92 def test_default_taxon_levels(self, temp_files, sample_input_data): |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
93 """Test processing with default taxonomic levels.""" |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
94 input_file, output_file = temp_files |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
95 |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
96 with open(input_file, 'w') as f: |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
97 f.write(sample_input_data) |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
98 |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
99 # Use default levels |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
100 add_labels(input_file, output_file, [1, 2, 4, 7, 11, 12, 13]) |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
101 |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
102 with open(output_file, 'r') as f: |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
103 lines = f.readlines() |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
104 |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
105 # Check that the taxonomy string has the expected structure |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
106 first_data_line = lines[1] |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
107 taxonomy_part = first_data_line.split('\t')[-1].strip() |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
108 |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
109 # Should have ' / ' separators between levels (except the last one) |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
110 separator_count = taxonomy_part.count(' / ') |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
111 assert separator_count == 6 # 7 levels - 1 = 6 separators |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
112 |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
113 |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
114 class TestEdgeCases: |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
115 """Test edge cases and error conditions.""" |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
116 |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
117 def test_different_input_file(self, tmp_path): |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
118 """Test processing an input file with unexpected data.""" |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
119 input_file = tmp_path / "empty_input.txt" |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
120 output_file = tmp_path / "output.txt" |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
121 |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
122 input_file.write_text("""#Query ID #Subject #Subject accession #Subject Taxonomy ID #Identity percentage #Coverage #evalue #bitscore #Source #Taxonomy |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
123 M01687:476:000000000-LL5F5:1:1102:11130:1143 source=NCBI sequenceID=KP794848 superkingdom=Eukaryota kingdom=Viridiplantae phylum=Streptophyta subphylum=Streptophytina class=Magnoliopsida subclass=NA infraclass=NA order=Rosales suborder=NA infraorder=NA superfamily=NA family=Rosaceae genus=Sorbus species=Sorbus aucuparia markercode=trnL lat=NA lon=NA source=NCBI N/A 100.000 100 5.00e-43 167 Viridiplantae / Streptophyta / Magnoliopsida / Rosales / Rosaceae / Sorbus / Sorbus aucuparia""") |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
124 |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
125 with pytest.raises(ValueError, match="Line does not contain expected fields: superkingdom, markercode, or Genbank"): |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
126 add_labels(str(input_file), str(output_file), [1, 2, 4, 7, 11, 12, 13]) |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
127 |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
128 |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
129 # Example of how to run these tests |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
130 if __name__ == "__main__": |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
131 # Run all tests in this file |
|
abd214795fa5
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/add_header_tool commit c944fd5685f295acba06679e85b67973c173b137
onnodg
parents:
diff
changeset
|
132 pytest.main([__file__]) |
