changeset 1:2acf82433aa4 draft default tip

planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit d771f9fbfd42bcdeda1623d954550882a0863847-dirty
author onnodg
date Mon, 20 Oct 2025 12:26:51 +0000
parents a3989edf0a4a
children
files __pycache__/__init__.cpython-313.pyc __pycache__/blast_annotations_processor.cpython-313.pyc blast_annotations_processor.xml test-data/sorted_test.fasta test-data/sorted_test.tabular test-data/sync_test.xlsx tests/__pycache__/test_blast_annotations_processor.cpython-313-pytest-8.4.2.pyc
diffstat 7 files changed, 49 insertions(+), 26 deletions(-) [+]
line wrap: on
line diff
Binary file __pycache__/__init__.cpython-313.pyc has changed
Binary file __pycache__/blast_annotations_processor.cpython-313.pyc has changed
--- a/blast_annotations_processor.xml	Tue Oct 14 09:08:30 2025 +0000
+++ b/blast_annotations_processor.xml	Mon Oct 20 12:26:51 2025 +0000
@@ -1,4 +1,4 @@
-<tool id="blast_annotation_processor" name="BLAST Annotation Processor" version="1.0.0">
+<tool id="blast_annotation_processor" name="BLAST Annotation Processor" version="1.0.1">
     <description>Process BLAST annotation results with taxonomic analysis</description>
 
     <requirements>
@@ -57,7 +57,7 @@
             <option value="eval_plot">E-value distribution plot</option>
             <option value="taxa_output">Taxonomic report (Kraken2-like format)</option>
             <option value="circle_data">Circular taxonomic datafile</option>
-            <option value="header_anno">Header annotations table</option>
+            <option value="header_anno">Annotations per header (in Excel)</option>
             <option value="anno_stats">Annotation statistics</option>
         </param>
 
@@ -165,38 +165,61 @@
 
 - **Circular taxonomic data**: Json data to generate a circular sunburst-style diagram showing taxonomic composition across all taxonomic levels (Kingdom -> Species).
 
-- **Header annotations table**: Excel workbook listing each sequence header with its taxonomic assignment and E-value.
+- **Annotations per header**: Excel workbook listing each sequence header with its taxonomic assignment and E-value.
 
 - **Annotation statistics**: Summary statistics about annotation success rates and sequence counts.
 
 **Parameters:**
 
-- **Uncertain threshold**: When multiple conflicting taxonomic assignments exist for a sequence, this threshold determines whether to use the most common assignment (if it exceeds the threshold) or mark it as "Uncertain taxa".
+- **Uncertain threshold**: Treshold for lca. When multiple conflicting taxonomic assignments exist for a sequence, this threshold determines whether to use the most common assignment (if it exceeds the threshold) or mark it as "Uncertain taxa".
 
 - **E-value threshold**: Sequences with E-values higher than this threshold are filtered out from the analysis.
 
 - **Use read counts**: Determines whether circular data reflects the abundance of reads (checked) or just count unique taxonomic assignments (unchecked).
-#Query ID	#Subject	#Subject accession	#Subject Taxonomy ID	#Identity percentage
-	#Coverage	#evalue	#bitscore	#Source	#Taxonomy
+
 **Expected Input Format:**
 
 The annotated BLAST file should be in tabular format with at least 7 columns:
-1. Query ID
-2. Subject ID
-3. Subject accession
-4. Subject Taxonomy ID
-5. Identity percentage
-6. Coverage
-7. Evalue
-8. Bitscore
-9. Source
-10. Taxonomy
+
+- 1. Query ID
+
+- 2. Subject ID
+
+- 3. Subject accession
+
+- 4. Subject Taxonomy ID
+
+- 5. Identity percentage
+
+- 6. Coverage
+
+- 7. Evalue
+
+- 8. Bitscore
+
+- 9. Source
+
+- 10. Taxonomy
 
 **Note:** This tool processes files that have been deduplicated and contain read count information in the sequence headers in the format: `sequence_name(count_number)`.
 
+-------------
+
+.. class:: infomark
+
 **Credits**
-Authors = Onno de Gorter, 2025.
+
 Based on a script by Nick Kortleven, translated, modified and wrapped by Onno de Gorter,
-Developed for the New light on old remedies project, a PhD research by Anja Fischer
+Developed for the New light on old remedies project, a PhD research by Anja Fischer.
+
+Link to the project website:
+
+* https://ahm.uva.nl/funded-research-projects/new-lights-on-old-remedies/new-lights-on-old-remedies.html
+
     ]]></help>
+    <creator>
+        <organization name="Naturalis Biodiversity Center" url="https://www.naturalis.nl/en/science" />
+        <person givenName="Onno" familyName="de Gorter" url="https://github.com/Onnodg"/>
+        <person givenName="Nick" familyName="Kortleven" url="https://github.com/tombkingsts" />
+    </creator>
 </tool>
\ No newline at end of file
--- a/test-data/sorted_test.fasta	Tue Oct 14 09:08:30 2025 +0000
+++ b/test-data/sorted_test.fasta	Mon Oct 20 12:26:51 2025 +0000
@@ -1,4 +1,4 @@
->read1(100) count=100;
-ATCG
->read2(50) count=50;
-GCTA
+>read1(100) count=100;
+ATCG
+>read2(50) count=50;
+GCTA
--- a/test-data/sorted_test.tabular	Tue Oct 14 09:08:30 2025 +0000
+++ b/test-data/sorted_test.tabular	Mon Oct 20 12:26:51 2025 +0000
@@ -1,4 +1,4 @@
-#Query ID	#Subject	#Subject accession	#Subject Taxonomy ID	#Identity percentage	#Coverage	#evalue	#bitscore	#Source	#Taxonomy
-        read1(100)	subject1	95.0	100	50	75	1e-50	200	database1	Viridiplantae / Streptophyta / Magnoliopsida / Fagales / Juglandaceae / Uncertain taxa / Uncertain taxa
-read1(100)	subject2	90.0	95	45	70	1e-5	180	database1	Viridiplantae / Streptophyta / Magnoliopsida / Rosales / Rosaceae / Sorbus / Sorbus aucuparia
-read2(50)	subject3	85.0	90	40	65	1e-3	160	database2	Viridiplantae / Streptophyta / Magnoliopsida / Solanales / Solanaceae / Uncertain taxa / Uncertain taxa
+#Query ID	#Subject	#Subject accession	#Subject Taxonomy ID	#Identity percentage	#Coverage	#evalue	#bitscore	#Source	#Taxonomy
+        read1(100)	subject1	95.0	100	50	75	1e-50	200	database1	Viridiplantae / Streptophyta / Magnoliopsida / Fagales / Juglandaceae / Uncertain taxa / Uncertain taxa
+read1(100)	subject2	90.0	95	45	70	1e-5	180	database1	Viridiplantae / Streptophyta / Magnoliopsida / Rosales / Rosaceae / Sorbus / Sorbus aucuparia
+read2(50)	subject3	85.0	90	40	65	1e-3	160	database2	Viridiplantae / Streptophyta / Magnoliopsida / Solanales / Solanaceae / Uncertain taxa / Uncertain taxa
Binary file test-data/sync_test.xlsx has changed
Binary file tests/__pycache__/test_blast_annotations_processor.cpython-313-pytest-8.4.2.pyc has changed