Mercurial > repos > iuc > drep_dereplicate

<?xml version="1.0"?>
<macros>
    <token name="@TOOL_VERSION@">3.4.2</token>
    <token name="@VERSION_SUFFIX@">0</token>
    <token name="@PROFILE@">20.01</token>
    <xml name="biotools">
        <xrefs>
            <xref type="bio.tools">drep</xref>
        </xrefs>
    </xml>
    <xml name="requirements">
        <requirements>
            <requirement type="package" version="@TOOL_VERSION@">drep</requirement>
            <yield/>
        </requirements>
    </xml>
    <xml name="citations">
        <citations>
            <citation type="doi">10.1038/ismej.2017.126</citation>
            <yield />
        </citations>
    </xml>

    <xml name="genomes">
        <param argument="--genomes" type="data" format="fasta" multiple="true" label="Genomes to filer"/>
    </xml>

<!-- Addition of ".fasta" after names to avoid string to be read as integer
Bug in dRep: probably fixed in next version -->
    <token name="@PREPARE_GENOMES@"><![CDATA[
#import re
#set $genomefiles = []
#for $genome in $genomes
    #set $input_name = $re.sub('[^\w\-_.]', '_',str($genome.element_identifier.split('/')[-1]))
ln -s '${genome}' '${input_name}.fasta' &&
$genomefiles.append($input_name)
#end for
    ]]></token>
    <token name="@GENOMES@"><![CDATA[
    -g
#for $genomefile in $genomefiles
    '${genomefile}.fasta'
#end for
    ]]></token>

    <xml name="filtering_options">
        <section name="filter" title="Genome filtering" expanded="true">
            <param argument="--length" type="integer" value="50000" label="Minimum genome length"/>
            <param argument="--completeness" type="integer" value="75" min="0" max="100" label="Minimum genome completeness percent"/>
            <param argument="--contamination" type="integer" value="25" min="0" max="100" label="Maximum genome contamination percent"/>
        </section>
    </xml>
    <xml name="test_default_filtering_options">
        <section name="filter">
            <param name="length" value="50000"/>
            <param name="completeness" value="75"/>
            <param name="contamination" value="100"/>
        </section>
    </xml>
    <token name="@FILTER_OPTIONS@"><![CDATA[
    --length $filter.length
    --completeness $filter.completeness
    --contamination $filter.contamination
]]></token>

    <xml name="quality_assessment_options">
        <conditional name="quality">
            <param name="source" type="select" label="Genome quality filtering" help="No checkM or quality filtering is not recommened but with bacteriophages or eukaryotes or things where checkM scoring does not work. Will only choose genomes based on length and N50.">
                <option value="checkm" selected="true">Run checkM</option>
                <option value="genomeInfo">Provide quality information on the genome (CSV file)</option>
                <option value="ignoreGenomeQuality">Don't run checkM or do any quality filtering (--ignoreGenomeQuality) - NOT RECOMMENDED!</option>
            </param>
            <when value="checkm">
                <param argument="--checkM_method" type="select" label="CheckM method">
                    <option value="lineage_wf" selected="true">lineage_wf: Lineage-specific Workflow - quality estimates with lineage-specific markers (more accurate)</option>
                    <option value="taxonomy_wf">taxonomy_wf: Taxonomic-specific Workflow - quality estimates with taxonomic-specific markers (faster)</option>
                </param>
                <param argument="--set_recursion" type="integer" optional="true" label="Increases the python recursion limit" help="NOT RECOMMENDED unless checkM is crashing due to recursion issues. Recommended to set to 2000 if needed, but setting this could crash Python"/>
                <param argument="--checkm_group_size" type="integer" value="2000" min="1" label="Number of genomes passed to checkM at a time" help="Increasing this increases RAM but makes checkM faster"/>
            </when>
            <when value="genomeInfo">
                <param argument="--genomeInfo" type="data" format="csv" label="Quality information on the genomes">
                    <help><![CDATA[
                    A CSV dataset that must contain: [
                    "genome"(history dataset name of .fasta dataset of that genome),
                    "completeness"(0-100 value for completeness of the genome),
                    "contamination"(0-100 value of the contamination of the genome)]
                    ]]></help>
                </param>
            </when>
            <when value="ignoreGenomeQuality"/>
        </conditional>
    </xml>
    <xml name="test_default_quality_assessment_options">
        <conditional name="quality">
            <param name="source" value="checkm"/>
            <param name="checkM_method" value="taxonomy_wf"/>
            <param name="checkm_group_size" value="2000"/>
        </conditional>
    </xml>
    <token name="@QUALITY_ASSESSMENT_OPTIONS@"><![CDATA[
#if $quality.source == 'checkm'
    --checkM_method '$quality.checkM_method'
    #if str($quality.set_recursion) != ''
    --set_recurison $filter.set_recursion
    #end if
    --checkm_group_size $quality.checkm_group_size
#else if $quality.source == 'genomeInfo'
    --genomeInfo '$quality.genomeInfo'
#else if $quality.source == 'ignoreGenomeQuality'
    --ignoreGenomeQuality
#end if
]]></token>

    <xml name="mash">
        <param argument="--MASH_sketch" type="integer" value="1000" min="0" label="MASH sketch size"/>
        <param argument="--P_ani" type="float" value="0.9" min="0." max="1." label="ANI threshold to form primary clusters"/>
        <param argument="--multiround_primary_clustering" type='boolean' checked="false" truevalue='--multiround_primary_clustering' falsevalue='' label="Cluster each primary clunk separately and merge at the end with single linkage?" help="Decreases RAM usage and        increases speed, and the cost of a minor loss in precision and the inability to plot primary_clustering_dendrograms. Especially helpful when clustering 5000+ genomes. Will be done with single linkage clustering"/>
        <param argument="--primary_chunksize" type="integer" value="5000" min="1" label="Impacts multiround_primary_clusterings" help=" If you have more than this many genomes, process them in chunks of this size"/>
    </xml>
    <xml name="test_default_mash">
        <param name="MASH_sketch" value="1000"/>
        <param name="P_ani" value="0.9"/>
        <param name="multiround_primary_clustering" value=''/>
        <param name="primary_chunksize" value="5000"/>
    </xml>
    <token name="@MASH@"><![CDATA[
    --MASH_sketch '$comp_clust.steps.MASH_sketch'
    --P_ani $comp_clust.steps.P_ani
    $comp_clust.steps.multiround_primary_clustering
    --primary_chunksize $comp_clust.steps.primary_chunksize
]]></token>

    <xml name="nucmer">
        <param argument="--n_PRESET" type="select" label="Presets to pass to nucmer">
            <option value="normal" selected="true">normal: default ANIn parameters</option>
            <option value="tight">tight: only align highly conserved regions</option>
        </param>
    </xml>
    <xml name="test_default_nucmer">
        <param name="n_PRESET" value="normal"/>
    </xml>
    <token name="@NUCMER@"><![CDATA[
    --n_PRESET '$comp_clust.steps.clustering.n_PRESET'
]]></token>

    <xml name="coverage_method">
        <param argument="--coverage_method" type="select" label="Method to calculate coverage of an alignment">
            <option value="larger" selected="true">Larger = max((aligned length / genome 1), (aligned_length / genome2))</option>
            <option value="total">Total = 2*(aligned length) / (sum of total genome lengths)</option>
        </param>
    </xml>
    <xml name="test_default_coverage_method">
        <param name="coverage_method" value="larger"/>
    </xml>
    <token name="@COVERAGE_METHOD@"><![CDATA[
    --coverage_method '$comp_clust.steps.clustering.coverage_method'
]]></token>

    <xml name="secondary_clustering">
        <conditional name="clustering">
            <param argument="--S_algorithm" type="select" label="Algorithm for secondary clustering comparisons">
                <option value="fastANI">fastANI: Kmer-based approach - very fast</option>
                <option value="ANImf" selected="true">ANImf: Align whole genomes with nucmer; filter alignment; compare aligned regions - RECOMMENDED</option>
                <option value="ANIn">ANIn: Align whole genomes with nucmer; compare aligned regions</option>
                <option value="gANI">gANI: Identify and align ORFs; compare aligned ORFS</option>
                <option value="goANI">Open source version of gANI; requires nsmimscan</option>
            </param>
            <when value="fastANI">
                <param argument="--greedy_secondary_clustering" type='boolean' checked="false" truevalue='--greedy_secondary_clustering' falsevalue='' label="Use a heuristic to avoid pair-wise comparisons when doing secondary clustering?" help="Will be done with single linkage clustering"/>
            </when>
            <when value="ANImf">
                <expand macro="nucmer"/>
                <expand macro="coverage_method"/>
            </when>
            <when value="ANIn">
                <expand macro="nucmer"/>
                <expand macro="coverage_method"/>
            </when>
            <when value="gANI"/>
            <when value="goANI"/>
        </conditional>
        <param argument="--S_ani" type="float" value="0.99" min="0." max="1." label="ANI threshold to form secondary clusters"/>
        <param argument="--cov_thresh" type="float" value="0.1" min="0." max="1." label="Minmum level of overlap between genomes when doing secondary comparisons"/>
    </xml>
    <xml name="test_default_secondary_clustering">
        <conditional name="clustering">
            <param name="S_algorithm" value="ANImf"/>
            <expand macro="test_default_nucmer"/>
            <expand macro="test_default_coverage_method"/>
        </conditional>
        <param name="S_ani" value="0.99"/>
        <param name="cov_thresh" value="0.1"/>
    </xml>
    <token name="@SECONDARY_CLUSTERING@"><![CDATA[
    --S_algorithm '$comp_clust.steps.clustering.S_algorithm'
    #if $comp_clust.steps.clustering.S_algorithm == 'fastANI'
    $comp_clust.steps.clustering.greedy_secondary_clustering
    #else if $comp_clust.steps.clustering.S_algorithm == 'ANImf'
    @NUCMER@
    @COVERAGE_METHOD@
    #else if $comp_clust.steps.clustering.S_algorithm == 'ANIn'
    @NUCMER@
    @COVERAGE_METHOD@
    #end if
    --S_ani $comp_clust.steps.S_ani
    --cov_thresh $comp_clust.steps.cov_thresh
]]></token>

    <xml name="comparison_clustering_options">
        <section name="comp_clust" title="Genome comparison and clustering" expanded="false">
            <conditional name="steps">
                <param name="select" type="select" label="Steps in genome comparison">
                    <option value="default" selected="true">Default: Run MASH clustering and a secondary clustering</option>
                    <option value="SkipMash">Skip MASH clustering, just do secondary clustering on all genomes</option>
                    <option value="SkipSecondary">Skip secondary clustering, just perform MASH clustering</option>
                </param>
                <when value="default">
                    <expand macro="mash"/>
                    <expand macro="secondary_clustering"/>
                </when>
                <when value="SkipMash">
                    <expand macro="secondary_clustering"/>
                </when>
                <when value="SkipSecondary">
                    <expand macro="mash"/>
                </when>
            </conditional>
            <param argument="--clusterAlg" type="select" label="Algorithm used to cluster genomes" help="Passed to scipy.cluster.hierarchy.linkage">
                <option value="average" selected="true">average</option>
                <option value="ward">ward</option>
                <option value="single">single</option>
                <option value="median">median</option>
                <option value="centroid">centroid</option>
                <option value="weighted">weighted</option>
            </param>
            <param argument="--run_tertiary_clustering" type='boolean' checked="false" truevalue='--run_tertiary_clustering' falsevalue='' label="Run an additional round of clustering on the final genome set?" help="This is especially useful when greedy clustering is performed and/or to handle cases where similar genomes end up in different primary clusters."/>
        </section>
    </xml>
    <xml name="test_default_comparison_clustering_options">
        <section name="comp_clust">
            <conditional name="steps">
                <param name="select" value="default" />
                <expand macro="test_default_mash"/>
                <expand macro="test_default_secondary_clustering"/>
            </conditional>
            <param name="clusterAlg" value="average"/>
            <param name="run_tertiary_clustering" value=''/>
        </section>
    </xml>
    <token name="@COMPARISON_CLUSTERING_OPTIONS@"><![CDATA[
#if $comp_clust.steps.select == 'default'
    @MASH@
    @SECONDARY_CLUSTERING@
#else if $comp_clust.steps.select == 'SkipMash'
    --SkipMash
    @SECONDARY_CLUSTERING@
#else
    @MASH@
    --SkipSecondary
#end if
    --clusterAlg '$comp_clust.clusterAlg'
    $comp_clust.run_tertiary_clustering
]]></token>

    <xml name="scoring_options">
        <section name="scoring" title="Scoring criteria" expanded="false" help="Based off of the formula: A*Completeness - B*Contamination + C*(Contamination * (strain_heterogeneity/100)) + D*log(N50) + E*log(size) + F*(centrality - S_ani). With A = completeness_weight; B = contamination_weight; C = strain_heterogeneity_weight; D = N50_weight; E = size_weight; F = cent_weight">
            <param argument="--completeness_weight" type="float" value="1" label="Completeness weight"/>
            <param argument="--contamination_weight" type="float" value="5" label="Contamination weight"/>
            <param argument="--strain_heterogeneity_weight" type="float" value="1" min="0." max="1." label="Strain heterogeneity weight"/>
            <param argument="--N50_weight" type="float" value=".5" label="Weight of log(genome N50)"/>
            <param argument="--size_weight" type="float" value="0" label="Weight of log(genome size)"/>
            <param argument="--centrality_weight" type="float" value="1" label="Weight of (centrality - S_ani)"/>
            <param argument="--extra_weight_table" type="data" format="tabular" multiple="false" optional="true" label="Genome specific extra weight"/>
        </section>
    </xml>
    <xml name="test_default_scoring_options">
        <section name="scoring">
            <param name="completeness_weight" value="1"/>
            <param name="contamination_weight" value="5"/>
            <param name="strain_heterogeneity_weight" value="1"/>
            <param name="N50_weight" value=".5" />
            <param name="size_weight" value="0"/>
            <param name="centrality_weight" value="1"/>
        </section>
    </xml>
    <xml name="test_extra_weight_table_scoring_options">
        <section name="scoring">
            <param name="completeness_weight" value="1"/>
            <param name="contamination_weight" value="5"/>
            <param name="strain_heterogeneity_weight" value="1"/>
            <param name="N50_weight" value=".5" />
            <param name="size_weight" value="0"/>
            <param name="centrality_weight" value="1"/>
            <param name="extra_weight_table" ftype="tabular" value="extra_weight_table_test"/>
        </section>
    </xml>
    <token name="@SCORING_OPTIONS@"><![CDATA[
    --completeness_weight $scoring.completeness_weight
    --contamination_weight $scoring.contamination_weight
    --strain_heterogeneity_weight $scoring.strain_heterogeneity_weight
    --N50_weight $scoring.N50_weight
    --size_weight $scoring.size_weight
    --centrality_weight $scoring.centrality_weight
    #if str($extra_weight_table) != 'None'
    	--extra_weight_table $extra_weight_table
    #end if

]]></token>

    <xml name="warning_options">
        <section name="warning" title="Warnings" expanded="false">
            <param argument="--warn_dist" type="float" value="0.25" min="0" max="1" label="How far from the threshold to throw cluster warnings"/>
            <param argument="--warn_sim" type="float" value="0.98" min="0" max="1" label="Similarity threshold for warnings between dereplicated genomes"/>
            <param argument="--warn_aln" type="float" value="0.25" min="0" max="1" label="Minimum aligned fraction for warnings between dereplicated genomes (ANIn)"/>
        </section>
    </xml>
    <xml name="test_default_warning_options">
        <section name="warning">
            <param name="warn_dist" value="0.25"/>
            <param name="warn_sim" value="0.98"/>
            <param name="warn_aln" value="0.25"/>
        </section>
    </xml>
    <token name="@WARNING_OPTIONS@"><![CDATA[
    --warn_dist $warning.warn_dist
    --warn_sim $warning.warn_sim
    --warn_aln $warning.warn_aln
]]></token>

    <xml name="select_outputs">
        <param name="select_outputs" type="select" multiple="true" optional="false" label="Select outputs">
            <option value="log" selected="true">log</option>
            <option value="warnings" selected="true">Warnings</option>
            <option value="Primary_clustering_dendrogram" selected="true">Primary_clustering_dendrogram.pdf</option>
            <option value="Secondary_clustering_dendrograms">Secondary_clustering_dendrograms.pdf</option>
            <option value="Secondary_clustering_MDS">Secondary_clustering_MDS.pdf</option>
            <option value="Clustering_scatterplots" selected="true">Clustering_scatterplots.pdf</option>
            <yield/>
        </param>
    </xml>
    <xml name="select_drep_outputs">
        <expand macro="select_outputs">
            <option value="Cluster_scoring">Cluster_scoring.pdf</option>
            <option value="Winning_genomes">Winning_genomes.pdf</option>
            <option value="Widb">Widb.csv</option>
            <option value="Chdb">Chdb.tsv</option>
        </expand>
    </xml>
    <xml name="test_default_select_drep_outputs">
        <param name="select_outputs" value="log,warnings,Primary_clustering_dendrogram,Clustering_scatterplots,Cluster_scoring,Winning_genomes,Widb" />
    </xml>
    <xml name="test_default_select_outputs">
        <param name="select_outputs" value="log,warnings,Primary_clustering_dendrogram,Clustering_scatterplots" />
    </xml>

    <xml name="common_outputs">
        <data name="log" format="txt" label="${tool.name} on ${on_string}: Log" from_work_dir="outdir/log/logger.log">
            <filter>'log' in select_outputs or not select_outputs</filter>
        </data>
        <data name="warnings" format="txt" label="${tool.name} on ${on_string}: Warnings" from_work_dir="outdir/log/warnings.txt">
            <filter>'warnings' in select_outputs</filter>
        </data>
        <data name="Primary_clustering_dendrogram" format="pdf" label="${tool.name} on ${on_string}: Primary_clustering_dendrogram.pdf" from_work_dir="outdir/figures/Primary_clustering_dendrogram.pdf">
            <filter>'Primary_clustering_dendrogram' in select_outputs</filter>
        </data>
        <data name="Secondary_clustering_dendrograms" format="pdf" label="${tool.name} on ${on_string}: Secondary_clustering_dendrograms.pdf" from_work_dir="outdir/figures/Secondary_clustering_dendrograms.pdf">
            <filter>'Secondary_clustering_dendrograms' in select_outputs</filter>
        </data>
        <data name="Secondary_clustering_MDS" format="pdf" label="${tool.name} on ${on_string}: Secondary_clustering_MDS.pdf" from_work_dir="outdir/figures/Secondary_clustering_MDS.pdf">
            <filter>'Secondary_clustering_MDS' in select_outputs</filter>
        </data>
        <data name="Clustering_scatterplots" format="pdf" label="${tool.name} on ${on_string}: Clustering_scatterplots.pdf" from_work_dir="outdir/figures/Clustering_scatterplots.pdf">
            <filter>'Clustering_scatterplots' in select_outputs</filter>
        </data>
    </xml>
    <xml name="drep_outputs">
        <expand macro="common_outputs"/>
        <data name="Cluster_scoring" format="pdf" label="${tool.name} on ${on_string}: Cluster_scoring.pdf" from_work_dir="outdir/figures/Cluster_scoring.pdf">
            <filter>'Cluster_scoring' in select_outputs</filter>
        </data>
        <data name="Winning_genomes" format="pdf" label="${tool.name} on ${on_string}: Winning_genomes.pdf" from_work_dir="outdir/figures/Winning_genomes.pdf">
            <filter>'Winning_genomes' in select_outputs</filter>
        </data>
        <data name="Widb" format="csv" label="${tool.name} on ${on_string}: Widb.csv" from_work_dir="outdir/data_tables/Widb.csv">
            <filter>'Widb' in select_outputs</filter>
        </data>
        <data name="Chdb" format="tabular" label="${tool.name} on ${on_string}: Chdb.tsv" from_work_dir="outdir/data/checkM/checkM_outdir/Chdb.tsv">
            <filter>'Chdb' in select_outputs</filter>
        </data>
    </xml>
    <xml name="test_string_inputs">
        <param name="genomes" ftype="fasta" value="Enterococcus_casseliflavus_EC20.fasta,Enterococcus_faecalis_T2.fna,Enterococcus_faecalis_TX0104.fa"/>
    </xml>
    <xml name="test_integer_inputs">
        <param name="genomes" ftype="fasta" value="001,002,003"/>
    </xml>
    <xml name="test_log_output">
        <output name="log">
            <assert_contents>
                <yield/>
            </assert_contents>
        </output>
    </xml>
    <token name="@GENOMES_HELP@"><![CDATA[
I/O PARAMETERS:
  -g [GENOMES [GENOMES ...]], --genomes [GENOMES [GENOMES ...]]
                        genomes to cluster in .fasta format
                        (default: None)


]]></token>
    <token name="@FILTERING_HELP@"><![CDATA[
FILTERING OPTIONS:
  -l LENGTH, --length LENGTH
                        Minimum genome length
                        (default: 50000)


  -comp COMPLETENESS, --completeness COMPLETENESS
                        Minumum genome completeness
                        (default: 75)


  -con CONTAMINATION, --contamination CONTAMINATION
                        Maximum genome contamination
                        (default: 25)


  --ignoreGenomeQuality
                        Don't run checkM or do any quality filtering. NOT
                        RECOMMENDED! This is useful for use with
                        bacteriophages or eukaryotes or things where checkM
                        scoring does not work. Will only choose genomes based
                        on length and N50 (default: False)


]]></token>
    <token name="@GENOME_COMPARISON_HELP@"><![CDATA[
GENOME COMPARISON PARAMETERS:
  -ms MASH_SKETCH, --MASH_sketch MASH_SKETCH
                        MASH sketch size (default: 1000)

  --S_algorithm {goANI,ANIn,ANImf,gANI}
                        Algorithm for secondary clustering comaprisons:
                        ANImf = (RECOMMENDED) Align whole genomes with nucmer; filter alignment; compare aligned regions
                        ANIn  = Align whole genomes with nucmer; compare aligned regions
                        gANI  = Identify and align ORFs; compare aligned ORFS
                        (default: ANImf)

  -n_PRESET {normal,tight}
                        Presets to pass to nucmer
                        tight   = only align highly conserved regions
                        normal  = default ANIn parameters (default: normal)

]]></token>
    <token name="@CLUSTERING_HELP@"><![CDATA[
CLUSTERING PARAMETERS:
  -pa P_ANI, --P_ani P_ANI
                        ANI threshold to form primary (MASH) clusters
                        (default: 0.9)
  -sa S_ANI, --S_ani S_ANI
                        ANI threshold to form secondary clusters
                        (default: 0.99)

  --SkipMash            Skip MASH clustering, just do secondary clustering on
                        all genomes (default: False)
  --SkipSecondary       Skip secondary clustering, just perform MASH clustering
                        (default: False)

  -nc COV_THRESH, --cov_thresh COV_THRESH
                        Minmum level of overlap between genomes when doing
                        secondary comparisons (default: 0.1)
  -cm {total,larger}, --coverage_method {total,larger}
                        Method to calculate coverage of an alignment
                        (for ANIn/ANImf only; gANI can only do larger method)
                        total   = 2*(aligned length) / (sum of total genome lengths)
                        larger  = max((aligned length / genome 1), (aligned_length / genome2))
                        (default: larger)

  --clusterAlg CLUSTERALG
                        Algorithm used to cluster genomes (passed to
                        scipy.cluster.hierarchy.linkage (default: average)

]]></token>
    <token name="@SCORING_HELP@"><![CDATA[
SCORING CRITERIA
Based off of the formula:
A*Completeness - B*Contamination + C*(Contamination * (strain_heterogeneity/100)) + D*log(N50) + E*log(size)

A = completeness_weight; B = contamination_weight; C = strain_heterogeneity_weight; D = N50_weight; E = size_weight:
  -comW COMPLETENESS_WEIGHT, --completeness_weight COMPLETENESS_WEIGHT
                        completeness weight (default: 1)
  -conW CONTAMINATION_WEIGHT, --contamination_weight CONTAMINATION_WEIGHT
                        contamination weight (default: 5)
  -strW STRAIN_HETEROGENEITY_WEIGHT, --strain_heterogeneity_weight STRAIN_HETEROGENEITY_WEIGHT
                        strain heterogeneity weight (default: 1)
  -N50W N50_WEIGHT, --N50_weight N50_WEIGHT
                        weight of log(genome N50) (default: 0.5)
  -sizeW SIZE_WEIGHT, --size_weight SIZE_WEIGHT
                        weight of log(genome size) (default: 0)
  -extraW EXTRA_WEIGHT_TABLE, --extra_weight_table EXTRA_WEIGHT_TABLE
  			Path to a tab-separated file with two-columns, no headers, listing genome and extra score to apply to that genome (optional)


]]></token>
    <token name="@TAXONOMY_HELP@"><![CDATA[
TAXONOMY:
  --run_tax             generate taxonomy information (Tdb)
                        (default: False)

  --tax_method {percent,max}
                        Method of determining taxonomy
                        percent = The most descriptive taxonimic level with at least (per) hits
                        max     = The centrifuge taxonomic level with the most overall hits
                        (default: percent)


  -per PERCENT, --percent PERCENT
                        minimum percent for percent method
                        (default: 50)


  --cent_index CENT_INDEX
                        path to centrifuge index (for example,
                        /home/mattolm/download/centrifuge/indices/b+h+v
                        (default: None)

]]></token>
    <token name="@WARNINGS_HELP@"><![CDATA[
WARNINGS:
  --warn_dist WARN_DIST
                        How far from the threshold to throw cluster warnings
                        (default: 0.25)
  --warn_sim WARN_SIM   Similarity threshold for warnings between dereplicated
                        genomes (default: 0.98)
  --warn_aln WARN_ALN   Minimum aligned fraction for warnings between
                        dereplicated genomes (ANIn) (default: 0.25)

]]></token>
</macros>
author	iuc
date	Wed, 15 Mar 2023 11:05:04 +0000
parents	368cb4bef9d8
children	1faea6e8c92b