Mercurial > repos > q2d2 > qiime2__rescript__evaluate_fit_classifier
changeset 0:4366a48a594a draft
planemo upload for repository https://github.com/qiime2/galaxy-tools/tree/main/tools/suite_qiime2__rescript commit 389df0134cd0763dcf02aac6e623fc15f8861c1e
author | q2d2 |
---|---|
date | Thu, 25 Apr 2024 21:22:03 +0000 |
parents | |
children | ce620c0559c3 |
files | qiime2__rescript__evaluate_fit_classifier.xml test-data/.gitkeep |
diffstat | 1 files changed, 96 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qiime2__rescript__evaluate_fit_classifier.xml Thu Apr 25 21:22:03 2024 +0000 @@ -0,0 +1,96 @@ +<?xml version='1.0' encoding='utf-8'?> +<!-- +Copyright (c) 2024, QIIME 2 development team. + +Distributed under the terms of the Modified BSD License. (SPDX: BSD-3-Clause) +--> +<!-- +This tool was automatically generated by: + q2galaxy (version: 2024.2.1) +for: + qiime2 (version: 2024.2.0) +--> +<tool name="qiime2 rescript evaluate-fit-classifier" id="qiime2__rescript__evaluate_fit_classifier" version="2024.2.2+q2galaxy.2024.2.1" profile="22.05" license="BSD-3-Clause"> + <description>Evaluate and train naive Bayes classifier on reference sequences.</description> + <requirements> + <container type="docker">quay.io/qiime2/amplicon:2024.2</container> + </requirements> + <version_command>q2galaxy version rescript</version_command> + <command detect_errors="exit_code">q2galaxy run rescript evaluate_fit_classifier '$inputs'</command> + <configfiles> + <inputs name="inputs" data_style="staging_path_and_source_path"/> + </configfiles> + <inputs> + <param name="sequences" type="data" format="qza" label="sequences: FeatureData[Sequence]" help="[required] Reference sequences to use for classifier training/testing."> + <options options_filter_attribute="metadata.semantic_type"> + <filter type="add_value" value="FeatureData[Sequence]"/> + </options> + <validator type="expression" message="Incompatible type">hasattr(value.metadata, "semantic_type") and value.metadata.semantic_type in ['FeatureData[Sequence]']</validator> + </param> + <param name="taxonomy" type="data" format="qza" label="taxonomy: FeatureData[Taxonomy]" help="[required] Reference taxonomy to use for classifier training/testing."> + <options options_filter_attribute="metadata.semantic_type"> + <filter type="add_value" value="FeatureData[Taxonomy]"/> + </options> + <validator type="expression" message="Incompatible type">hasattr(value.metadata, "semantic_type") and value.metadata.semantic_type in ['FeatureData[Taxonomy]']</validator> + </param> + <section name="__q2galaxy__GUI__section__extra_opts__" title="Click here for additional options"> + <conditional name="__q2galaxy__GUI__conditional__reads_per_batch__"> + <param name="__q2galaxy__GUI__select__" type="select" label="reads_per_batch: Int % Range(1, None) | Str % Choices('auto')" help="[default: 'auto'] Number of reads to process in each batch. If "auto", this parameter is autoscaled to min( number of query sequences / n_jobs, 20000)."> + <option value="auto" selected="true">auto (Str)</option> + <option value="__q2galaxy__::control::Int X Range(1__comma__ None)">Provide a value (Int % Range(1, None))</option> + </param> + <when value="auto"> + <param name="reads_per_batch" type="hidden" value="auto"/> + </when> + <when value="__q2galaxy__::control::Int X Range(1__comma__ None)"> + <param name="reads_per_batch" type="integer" min="1" value="" label="reads_per_batch: Int % Range(1, None)" help="[required] Number of reads to process in each batch. If "auto", this parameter is autoscaled to min( number of query sequences / n_jobs, 20000)."/> + </when> + </conditional> + <conditional name="__q2galaxy__GUI__conditional__confidence__"> + <param name="__q2galaxy__GUI__select__" type="select" label="confidence: Float % Range(0, 1, inclusive_end=True) | Str % Choices('disable')" help="[default: 0.7] Confidence threshold for limiting taxonomic depth. Set to "disable" to disable confidence calculation, or 0 to calculate confidence but not apply it to limit the taxonomic depth of the assignments."> + <option value="disable">disable (Str)</option> + <option value="__q2galaxy__::control::Float X Range(0__comma__ 1__comma__ inclusive_end=True)" selected="true">Provide a value (Float % Range(0, 1, inclusive_end=True))</option> + </param> + <when value="disable"> + <param name="confidence" type="hidden" value="disable"/> + </when> + <when value="__q2galaxy__::control::Float X Range(0__comma__ 1__comma__ inclusive_end=True)"> + <param name="confidence" type="float" min="0" max="1" value="0.7" label="confidence: Float % Range(0, 1, inclusive_end=True)" help="[default: 0.7] Confidence threshold for limiting taxonomic depth. Set to "disable" to disable confidence calculation, or 0 to calculate confidence but not apply it to limit the taxonomic depth of the assignments."/> + </when> + </conditional> + </section> + </inputs> + <outputs> + <data name="classifier" format="qza" label="${tool.name} on ${on_string}: classifier.qza" from_work_dir="classifier.qza"/> + <data name="evaluation" format="qzv" label="${tool.name} on ${on_string}: evaluation.qzv" from_work_dir="evaluation.qzv"/> + <data name="observed_taxonomy" format="qza" label="${tool.name} on ${on_string}: observed_taxonomy.qza" from_work_dir="observed_taxonomy.qza"/> + </outputs> + <tests/> + <help> +QIIME 2: rescript evaluate-fit-classifier +========================================= +Evaluate and train naive Bayes classifier on reference sequences. + + +Outputs: +-------- +:classifier.qza: Trained naive Bayes taxonomic classifier. +:evaluation.qzv: Visualization of classification accuracy results. +:observed_taxonomy.qza: Observed taxonomic label for each input sequence, predicted by the trained classifier. + +| + +Description: +------------ +Train a naive Bayes classifier on a set of reference sequences, then test performance accuracy on this same set of sequences. This results in a "perfect" classifier that "knows" the correct identity of each input sequence. Such a leaky classifier indicates the upper limit of classification accuracy based on sequence information alone, as misclassifications are an indication of unresolvable kmer profiles. This test simulates the case where all query sequences are present in a fully comprehensive reference database. To simulate more realistic conditions, see `evaluate_cross_validate`. THE CLASSIFIER OUTPUT BY THIS PIPELINE IS PRODUCTION-READY and can be re-used for classification of other sequences (provided the reference data are viable), hence THIS PIPELINE IS USEFUL FOR TRAINING FEATURE CLASSIFIERS AND THEN EVALUATING THEM ON-THE-FLY. + + +| + +</help> + <citations> + <citation type="doi">10.1186/s40168-018-0470-z</citation> + <citation type="doi">10.1371/journal.pcbi.1009581</citation> + <citation type="doi">10.1038/s41587-019-0209-9</citation> + </citations> +</tool>