Mercurial > repos > iuc > tn93_cluster

<tool id="tn93_cluster" name="TN93 Cluster" version="@TOOL_VERSION@+galaxy1">
    <description>sequences that lie within a specific distance of each other</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements">
        <requirement type="package" version="3.9">python</requirement>
    </expand>
    <version_command><![CDATA[tn93 --version]]></version_command>
    <command detect_errors="exit_code"><![CDATA[
    python '$__tool_directory__/tn93_cluster.py' --input '$input_fasta' --reference '$reference' --output '$tn93_clusters'
        --threshold $threshold
        --ambigs $ambigs
        --cluster-type $cluster_type
        --overlap $overlap
        --fraction $fraction
        --cluster-count $cluster_count
        #if $compress:
            --compressed '$tn93_compressed_clusters'
        #end if
    ]]></command>
    <inputs>
        <param name="input_fasta" type="data" format="fasta" label="Input in FASTA format" />
        <param name="reference" type="data" format="fasta" label="Reference in FASTA format" />
        <param argument="--compress" type="boolean" truevalue="--compress" falsevalue="" label="Output additional fasta dataset with compressed clusters" />
        <param argument="--cluster-count" type="integer" value="200" label="Only retain this many clusters" />
        <param argument="--threshold" type="float" value="0.0005" min="0" max="1" label="Distance threshold" help="Sequences which lie within this distance will be clustered" />
        <param argument="--ambigs" type="select" label="Strategy for ambiguous nucleotides">
            <option value="resolve" selected="true">resolve</option>
            <option value="average">average</option>
            <option value="skip">skip</option>
            <option value="gapmm">gapmm</option>
        </param>
        <param  argument="--cluster_type" type="select" label="Clustering rules" help="&quot;All&quot;: Each sequence within the cluster must be within the specified distance of every other sequence in the cluster. &quot;Any&quot;: Each sequence within the cluster must be within the specified distance of at least one other sequence.">
            <option value="all">All</option>
            <option value="any">Any</option>
        </param>
        <param name="overlap" argument="-l" type="integer" value="100"
            label="Only process pairs that overlap by at least N bases" />
        <param name="fraction" argument="-g" type="float" value="1.0"
            label="Maximum tolerated fraction of ambiguous characters" />
    </inputs>
    <outputs>
        <data format="json" name="tn93_clusters" from_work_dir="clusters.json" />
        <data format="fasta" name="tn93_compressed_clusters" from_work_dir="clusters.fa">
            <filter>compress</filter>
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="2">
            <param name="input_fasta" value="cluster-in1-1.fa" />
            <param name="reference" value="cluster-in1-2.fa" />
            <param name="threshold" value="0.35" />
            <param name="compress" value="true" />
            <output file="cluster-out1.json" ftype="json" name="tn93_clusters" />
            <output file="cluster-out1.fa" ftype="fasta" name="tn93_compressed_clusters" />
        </test>
    </tests>
    <help><![CDATA[
TN93-Cluster
============

Clusters sequences within a minimum distance of each other using the 1993
Tamura-Nei distance calculation.

]]></help>
    <expand macro="citations">
        <citation type="doi">10.1093/oxfordjournals.molbev.a040023</citation>
    </expand>
</tool>
author	iuc
date	Wed, 20 Apr 2022 17:00:11 +0000
parents	af03f3398f03
children	eb6f0ec5b95e