Mercurial > repos > q2d2 > qiime2__vsearch__cluster_features_open_reference
changeset 0:d4d790913b2d draft
planemo upload for repository https://github.com/qiime2/galaxy-tools/tree/main/tools/suite_qiime2__vsearch commit 9023cfd83495a517fbcbb6f91d5b01a6f1afcda1
author | q2d2 |
---|---|
date | Mon, 29 Aug 2022 20:32:14 +0000 |
parents | |
children | df39d339740c |
files | qiime2__vsearch__cluster_features_open_reference.xml test-data/.gitkeep |
diffstat | 1 files changed, 84 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qiime2__vsearch__cluster_features_open_reference.xml Mon Aug 29 20:32:14 2022 +0000 @@ -0,0 +1,84 @@ +<?xml version='1.0' encoding='utf-8'?> +<!-- +Copyright (c) 2022, QIIME 2 development team. + +Distributed under the terms of the Modified BSD License. (SPDX: BSD-3-Clause) +--> +<!-- +This tool was automatically generated by: + q2galaxy (version: 2022.8.1) +for: + qiime2 (version: 2022.8.1) +--> +<tool name="qiime2 vsearch cluster-features-open-reference" id="qiime2__vsearch__cluster_features_open_reference" version="2022.8.0+q2galaxy.2022.8.1.2" profile="22.05" license="BSD-3-Clause"> + <description>Open-reference clustering of features.</description> + <requirements> + <container type="docker">quay.io/qiime2/core:2022.8</container> + </requirements> + <version_command>q2galaxy version vsearch</version_command> + <command detect_errors="aggressive">q2galaxy run vsearch cluster_features_open_reference '$inputs'</command> + <configfiles> + <inputs name="inputs" data_style="paths"/> + </configfiles> + <inputs> + <param name="sequences" type="data" format="qza" label="sequences: FeatureData[Sequence]" help="[required] The sequences corresponding to the features in table."> + <options options_filter_attribute="metadata.semantic_type"> + <filter type="add_value" value="FeatureData[Sequence]"/> + </options> + <validator type="expression" message="Incompatible type">hasattr(value.metadata, "semantic_type") and value.metadata.semantic_type in ['FeatureData[Sequence]']</validator> + </param> + <param name="table" type="data" format="qza" label="table: FeatureTable[Frequency]" help="[required] The feature table to be clustered."> + <options options_filter_attribute="metadata.semantic_type"> + <filter type="add_value" value="FeatureTable[Frequency]"/> + </options> + <validator type="expression" message="Incompatible type">hasattr(value.metadata, "semantic_type") and value.metadata.semantic_type in ['FeatureTable[Frequency]']</validator> + </param> + <param name="reference_sequences" type="data" format="qza" label="reference_sequences: FeatureData[Sequence]" help="[required] The sequences to use as cluster centroids."> + <options options_filter_attribute="metadata.semantic_type"> + <filter type="add_value" value="FeatureData[Sequence]"/> + </options> + <validator type="expression" message="Incompatible type">hasattr(value.metadata, "semantic_type") and value.metadata.semantic_type in ['FeatureData[Sequence]']</validator> + </param> + <param name="perc_identity" type="float" min="1e-06" max="1" value="" label="perc_identity: Float % Range(0, 1, inclusive_start=False, inclusive_end=True)" help="[required] The percent identity at which clustering should be performed. This parameter maps to vsearch's --id parameter."/> + <section name="__q2galaxy__GUI__section__extra_opts__" title="Click here for additional options"> + <param name="strand" type="select" label="strand: Str % Choices('plus', 'both')" display="radio"> + <option value="plus" selected="true">plus</option> + <option value="both">both</option> + </param> + <param name="threads" type="integer" min="0" max="256" value="1" label="threads: Int % Range(0, 256, inclusive_end=True)" help="[default: 1] The number of threads to use for computation. Passing 0 will launch one thread per CPU core."/> + </section> + </inputs> + <outputs> + <data name="clustered_table" format="qza" label="${tool.name} on ${on_string}: clustered_table.qza" from_work_dir="clustered_table.qza"/> + <data name="clustered_sequences" format="qza" label="${tool.name} on ${on_string}: clustered_sequences.qza" from_work_dir="clustered_sequences.qza"/> + <data name="new_reference_sequences" format="qza" label="${tool.name} on ${on_string}: new_reference_sequences.qza" from_work_dir="new_reference_sequences.qza"/> + </outputs> + <tests/> + <help> +QIIME 2: vsearch cluster-features-open-reference +================================================ +Open-reference clustering of features. + + +Outputs: +-------- +:clustered_table.qza: The table following clustering of features. +:clustered_sequences.qza: Sequences representing clustered features. +:new_reference_sequences.qza: The new reference sequences. This can be used for subsequent runs of open-reference clustering for consistent definitions of features across open-reference feature tables. + +| + +Description: +------------ +Given a feature table and the associated feature sequences, cluster the features against a reference database based on user-specified percent identity threshold of their sequences. Any sequences that don't match are then clustered de novo. This is not a general-purpose clustering method, but rather is intended to be used for clustering the results of quality-filtering/dereplication methods, such as DADA2, or for re-clustering a FeatureTable at a lower percent identity than it was originally clustered at. When a group of features in the input table are clustered into a single feature, the frequency of that single feature in a given sample is the sum of the frequencies of the features that were clustered in that sample. Feature identifiers will be inherited from the centroid feature of each cluster. For features that match a reference sequence, the centroid feature is that reference sequence, so its identifier will become the feature identifier. The clustered_sequences result will contain feature representative sequences that are derived from the sequences input for all features in clustered_table. This will always be the most abundant sequence in the cluster. The new_reference_sequences result will contain the entire reference database, plus feature representative sequences for any de novo features. This is intended to be used as a reference database in subsequent iterations of cluster_features_open_reference, if applicable. See the vsearch documentation for details on how sequence clustering is performed. + + +| + +</help> + <citations> + <citation type="doi">10.7717/peerj.545</citation> + <citation type="doi">10.7717/peerj.2584</citation> + <citation type="doi">10.1038/s41587-019-0209-9</citation> + </citations> +</tool>