Mercurial > repos > onnodg > blast_annotations_processor
comparison blast_annotations_processor.xml @ 2:9ca209477dfd draft default tip
planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 4017d38cf327c48a6252e488ba792527dae97a70-dirty
| author | onnodg |
|---|---|
| date | Mon, 15 Dec 2025 16:43:36 +0000 |
| parents | 2acf82433aa4 |
| children |
comparison
equal
deleted
inserted
replaced
| 1:2acf82433aa4 | 2:9ca209477dfd |
|---|---|
| 1 <tool id="blast_annotation_processor" name="BLAST Annotation Processor" version="1.0.1"> | 1 <tool id="blast_annotation_processor" name="BLAST Annotation Processor" version="2.0.0"> |
| 2 <description>Process BLAST annotation results with taxonomic analysis</description> | 2 <description>Process BLAST annotation results with taxonomic analysis</description> |
| 3 | 3 |
| 4 <requirements> | 4 <requirements> |
| 5 <requirement type="package" version="3.12.3">python</requirement> | 5 <requirement type="package" version="3.12.3">python</requirement> |
| 6 <requirement type="package" version="3.10.6">matplotlib</requirement> | 6 <requirement type="package" version="3.10.6">matplotlib</requirement> |
| 11 | 11 |
| 12 <command detect_errors="exit_code"><![CDATA[ | 12 <command detect_errors="exit_code"><![CDATA[ |
| 13 python '$__tool_directory__/blast_annotations_processor.py' | 13 python '$__tool_directory__/blast_annotations_processor.py' |
| 14 --input-anno '$input_anno' | 14 --input-anno '$input_anno' |
| 15 --input-unanno '$input_unanno' | 15 --input-unanno '$input_unanno' |
| 16 | 16 --filtered-fasta '$filtered_fasta' |
| 17 #if $outputs and 'eval_plot' in $outputs | 17 #if $outputs and 'eval_plot' in $outputs |
| 18 --eval-plot '$eval_plot' | 18 --eval-plot '$eval_plot' |
| 19 #end if | 19 #end if |
| 20 | 20 |
| 21 #if $outputs and 'taxa_output' in $outputs | 21 #if $outputs and 'taxa_output' in $outputs |
| 27 #end if | 27 #end if |
| 28 | 28 |
| 29 #if $outputs and 'header_anno' in $outputs | 29 #if $outputs and 'header_anno' in $outputs |
| 30 --header-anno '$header_anno' | 30 --header-anno '$header_anno' |
| 31 #end if | 31 #end if |
| 32 | 32 --log '$log' |
| 33 #if $outputs and 'anno_stats' in $outputs | |
| 34 --anno-stats '$anno_stats' | |
| 35 #end if | |
| 36 | 33 |
| 37 --uncertain-threshold $advanced.uncertain_threshold | 34 --uncertain-threshold $advanced.uncertain_threshold |
| 38 --eval-threshold $advanced.eval_threshold | 35 --eval-threshold $advanced.blast.eval_threshold |
| 36 --min-identity $advanced.blast.min_identity | |
| 37 --min-coverage $advanced.blast.min_coverage | |
| 38 --min-bitscore $advanced.blast.min_bitscore | |
| 39 --bitscore-perc-cutoff $advanced.blast.bitscore_perc_cutoff | |
| 40 --min-support $advanced.fasta.min_support | |
| 41 #if $advanced.blast.ignore_seqids != "" | |
| 42 --ignore-seqids '$advanced.blast.ignore_seqids' | |
| 43 #end if | |
| 44 #if $advanced.blast.ignore_rank != "" | |
| 45 --ignore-rank '$advanced.blast.ignore_rank' | |
| 46 #end if | |
| 47 #if $advanced.blast.ignore_taxonomy != "" | |
| 48 --ignore-taxonomy '$advanced.blast.ignore_taxonomy' | |
| 49 #end if | |
| 50 #if $advanced.fasta.ignore_obiclean_type != "" | |
| 51 --ignore-obiclean-type '$advanced.fasta.ignore_obiclean_type' | |
| 52 #end if | |
| 53 #if $advanced.fasta.ignore_illuminapairend_type != "" | |
| 54 --ignore-illuminapairend-type '$advanced.fasta.ignore_illuminapairend_type' | |
| 55 #end if | |
| 39 #if $advanced.use_counts | 56 #if $advanced.use_counts |
| 40 --use-counts | 57 --use-counts |
| 41 #end if | 58 #end if |
| 42 ]]></command> | 59 ]]></command> |
| 43 | 60 |
| 44 <inputs> | 61 <inputs> |
| 45 <!-- Required Input Files --> | |
| 46 <param name="input_anno" type="data" format="tabular" | 62 <param name="input_anno" type="data" format="tabular" |
| 47 label="Annotated BLAST output file" | 63 label="Annotated BLAST output file" |
| 48 help="Tabular BLAST output with taxonomic annotations"/> | 64 help="Tabular BLAST output with taxonomic annotations"/> |
| 49 | 65 |
| 50 <param name="input_unanno" type="data" format="fasta" | 66 <param name="input_unanno" type="data" format="fasta" |
| 51 label="Original unannotated sequences" | 67 label="Original unannotated sequences" |
| 52 help="FASTA file with original sequences before BLAST annotation"/> | 68 help="FASTA file with original sequences before BLAST annotation"/> |
| 53 | 69 |
| 54 <!-- Output Selection --> | |
| 55 <param name="outputs" type="select" multiple="true" display="checkboxes" | 70 <param name="outputs" type="select" multiple="true" display="checkboxes" |
| 56 label="Select outputs to generate" help="Choose which analysis outputs to create"> | 71 label="Select outputs to generate" help="Choose which analysis outputs to create"> |
| 57 <option value="eval_plot">E-value distribution plot</option> | 72 <option value="eval_plot">E-value distribution plot</option> |
| 58 <option value="taxa_output">Taxonomic report (Kraken2-like format)</option> | 73 <option value="taxa_output">Taxonomic report (Kraken2-like format)</option> |
| 59 <option value="circle_data">Circular taxonomic datafile</option> | 74 <option value="circle_data">Circular taxonomic datafile</option> |
| 60 <option value="header_anno">Annotations per header (in Excel)</option> | 75 <option value="header_anno">Annotations per header (in Excel)</option> |
| 61 <option value="anno_stats">Annotation statistics</option> | |
| 62 </param> | 76 </param> |
| 63 | 77 |
| 64 <!-- Processing Parameters --> | |
| 65 <section name="advanced" title="Advanced Parameters" expanded="false"> | 78 <section name="advanced" title="Advanced Parameters" expanded="false"> |
| 66 <param name="uncertain_threshold" type="float" value="0.9" min="0.0" max="1.0" | 79 <param name="uncertain_threshold" type="float" value="0.9" min="0.0" max="1.0" |
| 67 label="Uncertain threshold" | 80 label="Uncertainty threshold" |
| 68 help="Threshold for resolving taxonomic conflicts (0.0-1.0). If one taxon represents more than this fraction of reads, it will be used instead of 'Uncertain taxa'"/> | 81 help="Threshold for resolving taxonomic conflicts (0.0-1.0). If one taxon represents more than this fraction of reads, it will be used instead of 'Uncertain taxa'"/> |
| 69 | |
| 70 <param name="eval_threshold" type="float" value="1e-10" min="0" | |
| 71 label="E-value threshold" | |
| 72 help="Maximum E-value to consider for annotations. Results with higher E-values will be filtered out"/> | |
| 73 | |
| 74 <param name="use_counts" type="boolean" checked="true" | 82 <param name="use_counts" type="boolean" checked="true" |
| 75 label="Use read counts in circular diagrams" | 83 label="Use read counts in circular diagrams" |
| 76 help="If checked, circular diagrams will reflect read abundance. If unchecked, only unique taxa are counted"/> | 84 help="If checked, circular diagrams will reflect read abundance. If unchecked, only unique taxa are counted"/> |
| 85 <section name="fasta" title="Fasta filters" expanded="false"> | |
| 86 <param name="min_support" type="integer" value="1" min="1" max="1000" label="Minimum support" | |
| 87 help="The minimum times a read should occur before dereplication"/> | |
| 88 <param name="ignore_obiclean_type" type="text" value="singleton" label="Ignore obiclean type" | |
| 89 help="The tool skips reads that are flagged as this obiclean type, options are: singleton,variant,head. Values must be comma seperated"/> | |
| 90 <param name="ignore_illuminapairend_type" type="text" value="pairend" label="Ignore R1-R2 merge failure" | |
| 91 help="The tool skips reads that are flagged as this illuminapairend type, options are: pairend,consensus. Values must be comma seperated"/> | |
| 92 </section> | |
| 93 <section name="blast" title="Blast filters" expanded="false"> | |
| 94 <param name="min_identity" type="integer" value="80" min="1" max="100" label="Minimum identity"/> | |
| 95 <param name="min_coverage" type="integer" value="70" min="1" max="100" label="Minimum coverage"/> | |
| 96 <param name="min_bitscore" type="integer" value="100" min="1" max="1000" label="Minimum bitscore"/> | |
| 97 <param name="bitscore_perc_cutoff" type="float" value="8" min="0" max="100" label="Top bitscore percentage cutoff" | |
| 98 help="The percentage that the bitscore can be lower than the top bitscore to still be considered. To disable this function put the value as 0"/> | |
| 99 <param name="eval_threshold" type="text" value="1e-10" label="E-value threshold"/> | |
| 100 <param name="ignore_seqids" type="text" value="" label="Ignore sequence identifiers" | |
| 101 help="The tool skips hits that have these sequence identifiers. Values must be comma seperated"/> | |
| 102 <param name="ignore_rank" type="text" value="unknown" label="Ignore rank when containing:" | |
| 103 help="The tool skips hits that have this string in taxonomy ranks. Values must be comma seperated"/> | |
| 104 <param name="ignore_taxonomy" type="text" value="environmental" label="Ignore taxonomy when containing:" | |
| 105 help="The tool skips hits that have this string as taxonomy. Values must be comma seperated"/> | |
| 106 </section> | |
| 77 </section> | 107 </section> |
| 78 </inputs> | 108 </inputs> |
| 79 | 109 |
| 80 <outputs> | 110 <outputs> |
| 81 <!-- E-value Plot --> | |
| 82 <data name="eval_plot" format="png" label="E-value distribution plot on ${on_string}"> | 111 <data name="eval_plot" format="png" label="E-value distribution plot on ${on_string}"> |
| 83 <filter>outputs and 'eval_plot' in outputs</filter> | 112 <filter>outputs and 'eval_plot' in outputs</filter> |
| 84 </data> | 113 </data> |
| 85 | |
| 86 <!-- Taxa Output Report --> | |
| 87 <data name="taxa_output" format="txt" label="Taxonomic report on ${on_string}"> | 114 <data name="taxa_output" format="txt" label="Taxonomic report on ${on_string}"> |
| 88 <filter>outputs and 'taxa_output' in outputs</filter> | 115 <filter>outputs and 'taxa_output' in outputs</filter> |
| 89 </data> | 116 </data> |
| 90 | |
| 91 <!-- Circular Taxonomy Diagram --> | |
| 92 <data name="circle_data" format="txt" label="Circular taxonomic data on ${on_string}"> | 117 <data name="circle_data" format="txt" label="Circular taxonomic data on ${on_string}"> |
| 93 <filter>outputs and 'circle_data' in outputs</filter> | 118 <filter>outputs and 'circle_data' in outputs</filter> |
| 94 </data> | 119 </data> |
| 95 | |
| 96 <!-- Header Annotations --> | |
| 97 <data name="header_anno" format="xlsx" label="Header annotations on ${on_string}"> | 120 <data name="header_anno" format="xlsx" label="Header annotations on ${on_string}"> |
| 98 <filter>outputs and 'header_anno' in outputs</filter> | 121 <filter>outputs and 'header_anno' in outputs</filter> |
| 99 </data> | 122 </data> |
| 100 | 123 <data name="log" format="txt" label="log on ${on_string}"/> |
| 101 <!-- Annotation Statistics --> | 124 <data name="filtered_fasta" format="fasta" label="Filtered fasta on ${on_string}"/> |
| 102 <data name="anno_stats" format="txt" label="Annotation statistics on ${on_string}"> | |
| 103 <filter>outputs and 'anno_stats' in outputs</filter> | |
| 104 </data> | |
| 105 </outputs> | 125 </outputs> |
| 106 | 126 |
| 107 <tests> | 127 <tests> |
| 108 <test expect_num_outputs="5"> | 128 <test expect_num_outputs="6"> |
| 129 <param name="input_anno" value="test_curated_nov_blast_headers.tabular"/> | |
| 130 <param name="input_unanno" value="test_curated_nov.fasta"/> | |
| 131 <param name="outputs" value="eval_plot,taxa_output,circle_data,header_anno"/> | |
| 132 <output name="eval_plot" file="test_nov_eval.png" compare="sim_size"/> | |
| 133 <output name="taxa_output" file="test_curated_nova_taxa_output.txt"/> | |
| 134 <output name="circle_data" file="test_curated_nova.txt"/> | |
| 135 <output name="header_anno" file="test_curated_nova_header_anno_excel.xlsx" decompress="true"/> | |
| 136 <output name="log" file="test_curated_nova_anno_out.txt"/> | |
| 137 <output name="filtered_fasta" file="test_curated_nov_filtered.fasta"/> | |
| 138 <section name="advanced"> | |
| 139 <param name="uncertain_threshold" value="0.9"/> | |
| 140 <param name="use_counts" value="True"/> | |
| 141 <section name="fasta"> | |
| 142 <param name="min_support" value="1"/> | |
| 143 <param name="ignore_obiclean_type" value="singleton"/> | |
| 144 <param name="ignore_illuminapairend_type" value="pairend"/> | |
| 145 </section> | |
| 146 <section name="blast"> | |
| 147 <param name="min_identity" value="80"/> | |
| 148 <param name="min_coverage" value="70"/> | |
| 149 <param name="min_bitscore" value="100"/> | |
| 150 <param name="bitscore_perc_cutoff" value="8"/> | |
| 151 <param name="eval_threshold" value="1e-10"/> | |
| 152 <param name="ignore_seqids" value=""/> | |
| 153 <param name="ignore_rank" value="unknown"/> | |
| 154 <param name="ignore_taxonomy" value="environmental"/> | |
| 155 </section> | |
| 156 </section> | |
| 157 </test> | |
| 158 | |
| 159 <test expect_num_outputs="6"> | |
| 160 <param name="input_anno" value="test_genbank_nov_blast.tabular"/> | |
| 161 <param name="input_unanno" value="test_genbank_nov.fasta"/> | |
| 162 <param name="outputs" value="eval_plot,taxa_output,circle_data,header_anno"/> | |
| 163 <output name="eval_plot" file="output_genbank_eval.png" compare="sim_size"/> | |
| 164 <output name="taxa_output" file="output_genbank_taxa_output.txt"/> | |
| 165 <output name="circle_data" file="output_genbank_circle_data.txt"/> | |
| 166 <output name="header_anno" file="output_genbank_header_anno.xlsx" decompress="true"/> | |
| 167 <output name="log" file="output_genbank_anno_out.txt"/> | |
| 168 <output name="filtered_fasta" file="genbank_filtered.fasta"/> | |
| 169 <section name="advanced"> | |
| 170 <param name="uncertain_threshold" value="0.9"/> | |
| 171 <param name="use_counts" value="True"/> | |
| 172 <section name="fasta"> | |
| 173 <param name="min_support" value="1"/> | |
| 174 <param name="ignore_obiclean_type" value="singleton"/> | |
| 175 <param name="ignore_illuminapairend_type" value="pairend"/> | |
| 176 </section> | |
| 177 <section name="blast"> | |
| 178 <param name="min_identity" value="80"/> | |
| 179 <param name="min_coverage" value="70"/> | |
| 180 <param name="min_bitscore" value="100"/> | |
| 181 <param name="bitscore_perc_cutoff" value="8"/> | |
| 182 <param name="eval_threshold" value="1e-10"/> | |
| 183 <param name="ignore_seqids" value=""/> | |
| 184 <param name="ignore_rank" value="unknown"/> | |
| 185 <param name="ignore_taxonomy" value="environmental"/> | |
| 186 </section> | |
| 187 </section> | |
| 188 </test> | |
| 189 <test expect_num_outputs="4"> | |
| 190 <param name="input_anno" value="test_genbank_nov_blast.tabular"/> | |
| 191 <param name="input_unanno" value="test_genbank_nov.fasta"/> | |
| 192 <param name="outputs" value="circle_data,header_anno"/> | |
| 193 <output name="circle_data" file="advanced_circle_data.txt"/> | |
| 194 <output name="header_anno" file="output_advanced_header_anno.xlsx" decompress="true"/> | |
| 195 <output name="log" file="output_advanced_anno_out.txt"/> | |
| 196 <output name="filtered_fasta" file="advanced_filtered.fasta"/> | |
| 197 <section name="advanced"> | |
| 198 <param name="uncertain_threshold" value="0.8"/> | |
| 199 <param name="use_counts" value="False"/> | |
| 200 <section name="fasta"> | |
| 201 <param name="min_support" value="2"/> | |
| 202 <param name="ignore_obiclean_type" value="singleton,variant"/> | |
| 203 <param name="ignore_illuminapairend_type" value="pairend"/> | |
| 204 </section> | |
| 205 <section name="blast"> | |
| 206 <param name="min_identity" value="70"/> | |
| 207 <param name="min_coverage" value="60"/> | |
| 208 <param name="min_bitscore" value="80"/> | |
| 209 <param name="bitscore_perc_cutoff" value="0"/> | |
| 210 <param name="eval_threshold" value="1e-8"/> | |
| 211 <param name="ignore_seqids" value="NC_051949"/> | |
| 212 <param name="ignore_rank" value="unknown"/> | |
| 213 <param name="ignore_taxonomy" value="environmental"/> | |
| 214 </section> | |
| 215 </section> | |
| 216 </test> | |
| 217 <test expect_num_outputs="3"> | |
| 109 <param name="input_anno" value="input_test_curated_labels.tabular"/> | 218 <param name="input_anno" value="input_test_curated_labels.tabular"/> |
| 110 <param name="input_unanno" value="input_test_curated.fasta"/> | 219 <param name="input_unanno" value="input_test_curated.fasta"/> |
| 111 <param name="outputs" value="eval_plot,taxa_output,circle_data,header_anno,anno_stats"/> | 220 <param name="outputs" value="header_anno"/> |
| 112 <output name="taxa_output" file="output_taxa_output.txt"/> | 221 <output name="header_anno" file="strict_header_anno.xlsx" decompress="true"/> |
| 113 <output name="eval_plot" file="output_eval.png" compare="sim_size"/> | 222 <output name="log" file="strict_anno_stats.txt" lines_diff="50"/> |
| 114 <output name="header_anno" file="header_anno_excel.xlsx" decompress="true"/> | 223 <output name="filtered_fasta" file="strict_filtered.fasta"/> |
| 115 <output name="anno_stats" file="output_anno_out.txt"/> | 224 |
| 116 <output name="circle_data" file="output_circle_data.txt"/> | |
| 117 <section name="advanced"> | 225 <section name="advanced"> |
| 118 <param name="uncertain_threshold" value="0.9"/> | 226 <param name="uncertain_threshold" value="0.95"/> |
| 119 <param name="eval_threshold" value="1e-10"/> | 227 <param name="use_counts" value="False"/> |
| 120 <param name="use_counts" value="True"/> | 228 |
| 229 <section name="fasta"> | |
| 230 <param name="min_support" value="1"/> | |
| 231 <param name="ignore_obiclean_type" value=""/> | |
| 232 <param name="ignore_illuminapairend_type" value=""/> | |
| 233 </section> | |
| 234 | |
| 235 <section name="blast"> | |
| 236 <param name="min_identity" value="98"/> | |
| 237 <param name="min_coverage" value="95"/> | |
| 238 <param name="min_bitscore" value="150"/> | |
| 239 <param name="bitscore_perc_cutoff" value="0"/> | |
| 240 <param name="eval_threshold" value="1e-20"/> | |
| 241 <param name="ignore_seqids" value=""/> | |
| 242 <param name="ignore_rank" value=""/> | |
| 243 <param name="ignore_taxonomy" value=""/> | |
| 244 </section> | |
| 121 </section> | 245 </section> |
| 122 </test> | 246 </test> |
| 123 <test expect_num_outputs="5"> | |
| 124 <param name="input_anno" value="galaxy_input_genbank.fa.tabular"/> | |
| 125 <param name="input_unanno" value="galaxy_input_pre.fasta"/> | |
| 126 <param name="outputs" value="eval_plot,taxa_output,circle_data,header_anno,anno_stats"/> | |
| 127 <output name="taxa_output" file="output_genbank_taxa_output.txt"/> | |
| 128 <output name="eval_plot" file="output_genbank_eval.png" compare="sim_size"/> | |
| 129 <output name="header_anno" file="output_genbank_header_anno.xlsx" decompress="true"/> | |
| 130 <output name="anno_stats" file="output_genbank_anno_out.txt"/> | |
| 131 <output name="circle_data" file="output_genbank_circle_data.txt"/> | |
| 132 </test> | |
| 133 <test expect_num_outputs="3"> | |
| 134 <param name="input_anno" value="galaxy_input_genbank.fa.tabular"/> | |
| 135 <param name="input_unanno" value="galaxy_input_pre.fasta"/> | |
| 136 <param name="outputs" value="circle_data,header_anno,anno_stats"/> | |
| 137 <output name="header_anno" file="output_advanced_header_anno.xlsx" decompress="true"/> | |
| 138 <output name="anno_stats" file="output_advanced_anno_out.txt"/> | |
| 139 <output name="circle_data" file="advanced_circle_data.txt"/> | |
| 140 <section name="advanced"> | |
| 141 <param name="uncertain_threshold" value="0.8"/> | |
| 142 <param name="eval_threshold" value="1e-8"/> | |
| 143 <param name="use_counts" value="True"/> | |
| 144 </section> | |
| 145 </test> | |
| 146 | |
| 147 </tests> | 247 </tests> |
| 148 | 248 |
| 149 <help><