Mercurial > repos > onnodg > blast_annotations_processor
diff tests/test_blast_annotations_processor.py @ 3:ca2f07b71581 draft default tip
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 600e5a50a13a3a16a1970d6d4d31cb4f7bd549bf-dirty
| author | onnodg |
|---|---|
| date | Thu, 12 Feb 2026 13:52:07 +0000 |
| parents | 9ca209477dfd |
| children |
line wrap: on
line diff
--- a/tests/test_blast_annotations_processor.py Mon Dec 15 16:43:36 2025 +0000 +++ b/tests/test_blast_annotations_processor.py Thu Feb 12 13:52:07 2026 +0000 @@ -50,13 +50,13 @@ read4(25) subject4 id4.2 subject4 90.0 87 1e-50 160 database1 Archaea / Euryarchaeota / Methanobacteria / Methanobacteriales / Methanobacteriaceae / Methanobrevibacter / Methanobrevibacter_smithii """ - fasta_content = """>read1(100) count=100; + fasta_content = """>read1(100) count=100; obiclean_count={'XXX': 100}; ATCGATCGATCGATCG ->read2(50) count=50; +>read2(50) count=50; obiclean_count={'XXX': 50} GCTAGCTAGCTAGCTA ->read3(25) count=25; +>read3(25) count=25; obiclean_count={'XXX': 25} TGACTGACTGACTGAC ->read4(25) count=25; +>read4(25) count=25; obiclean_count={'XXX': 25} TGAAAAAAACACCAC """ @@ -259,11 +259,11 @@ read3(25) source=NCBI sequenceID=KR737595 superkingdom=Eukaryota kingdom=Viridiplantae phylum=Streptophyta subphylum=Streptophytina class=Magnoliopsida subclass=NA infraclass=NA order=Malvales suborder=NA infraorder=NA superfamily=NA family=Malvaceae genus=Hibiscus species=Hibiscus trionum markercode=trnL lat=0.304 lon=36.87 source=NCBI N/A 97.561 87 1.68e-14 71.3 Viridiplantae / Streptophyta / Magnoliopsida / Malvales / Malvaceae / Hibiscus / Hibiscus trionum """ - fasta_content = """>read1(100) count=100; + fasta_content = """>read1(100) count=100; obiclean_count={'XXX': 100}; ATCG ->read2(50) merged_sample={}; count=1011; direction=right; seq_b_insertion=0; sminR=40.0; ali_length=53; seq_b_deletion=248; seq_a_deletion=248; seq_a_insertion=0; mode=alignment; sminL=40.0; seq_a_single=0; seq_b_single=0; +>read2(50) merged_sample={}; count=1011; obiclean_count={'XXX': 1011}; direction=right; seq_b_insertion=0; sminR=40.0; ali_length=53; seq_b_deletion=248; seq_a_deletion=248; seq_a_insertion=0; mode=alignment; sminL=40.0; seq_a_single=0; seq_b_single=0; gggcaatcctgagccaagtgactggagttcagataggtgcagagactcaatgg ->read3(25) merged_sample={}; count=179; direction=right; sminR=40.0; ali_length=49; seq_b_deletion=252; seq_a_deletion=252; seq_b_insertion=0; seq_a_insertion=0; mode=alignment; sminL=40.0; seq_a_single=0; seq_b_single=0; +>read3(25) merged_sample={}; count=179; obiclean_count={'XXX': 179}; direction=right; sminR=40.0; ali_length=49; seq_b_deletion=252; seq_a_deletion=252; seq_b_insertion=0; seq_a_insertion=0; mode=alignment; sminL=40.0; seq_a_single=0; seq_b_single=0; gggcaatcctgagccaactggagttcagataggtgcagagactcaatgg """ @@ -411,18 +411,18 @@ blast = input_dir / "combined_filters.tabular" fasta.write_text( - ">lowSupport(1) count=1;\nACGT\n" - ">obicleanFail(10) count=10; obiclean_status={'XXX': 's'};\nACGT\n" - ">pairendFail_PairEnd(10) count=10;\nACGT\n" - ">identityFail(10) count=10;\nACGT\n" - ">coverageFail(10) count=10;\nACGT\n" - ">bitscoreFail(10) count=10;\nACGT\n" - ">bscutoffHigh(10) count=10;\nACGT\n" - ">envTaxFail(10) count=10;\nACGT\n" - ">rankFail(10) count=10;\nACGT\n" - ">seqidFail(10) count=10;\nACGT\n" - ">readOK(10) count=10;\nACGT\n" - ">readOK_multiple_id(10) count=10;\nACGT\n" + ">lowSupport(1) obiclean_count={'XXX': 1}; count=1;\nACGT\n" + ">obicleanFail(10) count=10; obiclean_count={'XXX': 10}; obiclean_status={'XXX': 's'};\nACGT\n" + ">pairendFail_PairEnd(10) count=10; obiclean_count={'XXX': 10}; \nACGT\n" + ">identityFail(10) count=10; obiclean_count={'XXX': 10}; \nACGT\n" + ">coverageFail(10) count=10; obiclean_count={'XXX': 10}; \nACGT\n" + ">bitscoreFail(10) count=10; obiclean_count={'XXX': 10}; ACGT\n" + ">bscutoffHigh(10) count=10; obiclean_count={'XXX': 10}; \nACGT\n" + ">envTaxFail(10) count=10; obiclean_count={'XXX': 10}; \nACGT\n" + ">rankFail(10) count=10; obiclean_count={'XXX': 10}; \nACGT\n" + ">seqidFail(10) count=10; obiclean_count={'XXX': 10}; \nACGT\n" + ">readOK(10) count=10; obiclean_count={'XXX': 10}; \nACGT\n" + ">readOK_multiple_id(10) count=10; obiclean_count={'XXX': 10}; \nACGT\n" ) blast.write_text( @@ -495,8 +495,8 @@ with open(args.filtered_fasta) as f: headers = [l.strip() for l in f if l.startswith(">")] - assert '>obicleanFail(10) count=10;' not in headers - assert '>pairendFail_PairEnd(10) count=10;' not in headers + assert ">obicleanFail(10) count=10; obiclean_count={'XXX': 10};" not in headers + assert ">pairendFail_PairEnd(10) count=10; obiclean_count={'XXX': 10};" not in headers assert len(headers) == 9, "FASTA filtering only applies to header-based rules" df = pd.read_excel(args.header_anno, sheet_name="Individual_Reads")
