view blast2lca.xml @ 0:ad69d2a05c3c draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/megan commit 2a49a6cdc1b4d37ab30eb85b8c658ccf9f5a0644"
author iuc
date Wed, 24 Nov 2021 21:52:36 +0000
parents
children 1930eb870dca
line wrap: on
line source

<tool id="megan_blast2lca" name="MEGAN Blast2LCA: apply LCA alignment" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>to produce a taxonomic classification</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="bio_tools"/>
    <expand macro="requirements"/>
    <command detect_errors="exit_code"><![CDATA[
#import re

#if $blast_input.is_of_type('daa'):
    #set blast_format = 'DAA'
#else if $blast_input.is_of_type('txt'):
    #set blast_format = 'BlastText'
#else if $blast_input.is_of_type('blastxml'):
    #set blast_format = 'BlastXML'
#else if $blast_input.is_of_type('tabular'):
    #set blast_format = 'BlastTab'
#else if $blast_input.is_of_type('sam'):
    #set blast_format = 'SAM'
#end if
#set blast_ext = '.' + $blast_format
#if $blast_input.ext.endswith('.gz'):
    #set blast_ext = $blast_ext + '.gz'
#end if

#set blast_input_identifier = 'blast_input' + $blast_ext
ln -s '${blast_input}' '${blast_input_identifier}' &&

blast2lca 
    --input '${blast_input_identifier}'
    --format '${blast_format}'
    --mode '${mode}'
    $advanced_options.showRanks
    $advanced_options.officialRanksOnly
    $advanced_options.showTaxIds
    --minScore $advanced_options.minScore
    --maxExpected $advanced_options.maxExpected
    --topPercent $advanced_options.topPercent
    --minPercentIdentity $advanced_options.minPercentIdentity
    --maxKeggPerRead $advanced_options.maxKeggPerRead
    $advanced_options.applyTopPercentKegg
    $advanced_options.parseTaxonNames
    #if $advanced_options.mapDB:
        --mapDB '$advanced_options.mapDB'
    #end if
    #if $advanced_options.acc2taxa:
        --acc2taxa '$advanced_options.acc2taxa'
    #end if
    #if $advanced_options.syn2taxa:
        --syn2taxa '$advanced_options.syn2taxa'
    #end if
    #if $advanced_options.acc2kegg:
        --acc2kegg '$advanced_options.acc2kegg'
    #end if
    #if $advanced_options.syn2kegg:
        --syn2kegg '$advanced_options.syn2kegg'
    #end if
    $advanced_options.firstWordIsAccession
    #if str($advanced_options.accessionTags) != '':
        --accessionTags '$advanced_options.maccessionTags'
    #end if
    #if $advanced_options.kegg:
        --kegg
        --keggOutput '$kegg_output'
    #end if
    --output '${taxonomy_output}'
]]></command>
    <inputs>
        <param name="blast_input" argument="--input" type="data" format="daa,blastxml,sam,tabular,txt" label="Blast file"/>
        <param argument="--mode" type="select" label="Blast mode">
            <expand macro="blast_mode_options"/>
        </param>
        <section name="advanced_options" title="Advanced options" expanded="false">
            <param argument="--kegg" type="boolean" truevalue="--kegg" falsevalue="" checked="false" label="Map reads to KEGG KOs?"/>
            <param argument="--showRanks" type="boolean" truevalue="--showRanks" falsevalue="" checked="true" label="Show taxonomic ranks?"/>
            <param argument="--officialRanksOnly" type="boolean" truevalue="--officialRanksOnly" falsevalue="" checked="true" label="Report only taxa that have an official rank?"/>
            <param  argument="--showTaxIds" type="boolean" truevalue="--showTaxIds" falsevalue="" checked="false" label="Report taxon ids rather than taxon names?"/>
            <expand macro="common_blast_params"/>
            <param argument="--maxKeggPerRead" type="integer" value="4" label="Maximum number of KEGG assignments to report for a read"/>
            <param argument="--applyTopPercentKegg" type="boolean" truevalue="--applyTopPercentKegg" falsevalue="" checked="true" label="Apply top percent filter in KEGG KO analysis?"/>
            <param argument="--parseTaxonNames" type="boolean" truevalue="--parseTaxonNames" falsevalue="" checked="true" label="Apply top percent filter in KEGG KO analysis?"/>
            <param argument="--mapDB" type="data" format="sqlite" optional="true" label="MEGAN mapping db"/>
            <param argument="--acc2taxa" type="data" format="sqlite" optional="true" label="Accession-to-Taxonomy mapping file"/>
            <param argument="--syn2taxa" type="data" format="sqlite" optional="true" label="Synonyms-to-Taxonomy mapping file"/>
            <param argument="--acc2kegg" type="data" format="sqlite" optional="true" label="Accession-to-KEGG mapping file"/>
            <param argument="--syn2kegg" type="data" format="sqlite" optional="true" label="Synonyms-to-KEGG mapping file"/>
            <param argument="--firstWordIsAccession" type="boolean" truevalue="--firstWordIsAccession" falsevalue="" checked="true" label="First word in reference header is accession number?" help="Set to true for NCBI-nr downloaded Sep 2016 or later"/>
            <param argument="--accessionTags" type="text" optional="true" label="List of accession tags" help="Specify a space-separated list of tags (e.g., 'gb|' 'ref|')">
                <expand macro="sanitize_query" validinitial="string.ascii_letters,string.punctuation"/>
            </param>
        </section>
    </inputs>
    <outputs>
        <data name="taxonomy_output" format="txt"/>
        <data name="kegg_output" format="txt" label="${tool.name} on ${on_string} (KEGG)">
            <filter>advanced_options['kegg']</filter>
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="2">
            <param name="blast_input" value="blast_R1.txt" ftype="txt"/>
            <param name="mode" value="BlastN"/>
            <section name="advanced_options">
                <param name="kegg" value="true"/>
            </section>
            <output name="taxonomy_output" file="taxonomy_output.txt" ftype="txt"/>
            <output name="kegg_output" file="kegg_output.txt" ftype="txt"/>
        </test>
    </tests>
    <help>
**What it does**

Applies the LCA alignment to reads and can also perform KEGG classification.  The input is a BLAST file or something similar.
This wrapper supports the following formats for the input Blast file.  The SAM, Tabular and Text formats can be produced by
The Galaxy MALT Analyzer tool.  When these formats are used, this tool will apply the SAM, BlastText and BlastTab format options
required by MEGAN.

 * **Direct Access Archive (DAA)** - a proprietary file format developed by PowerISO Computing for disk image files
 * **BlastXML** - XML output from Blast
 * **Sequence Alignment/Map (SAM)** - a tab-delimited text format consisting of a header section, which is optional, and an alignment section
 * **Tabular** - information presented in the form of a table with rows and columns
 * **Text** - plain text format

The tool produces a text file for the LCA alignment.

If the option to Map reads to KEGG KOs is selected, an additional text file containing the KEGG classification is produced.
The KEGG database provides a collection of metabolic pathways and other pathways, but due to KEGG licensing restrictions, the
Community Edition of KEGG (used by this tool) ships with an early 2011 version of the KEGG classification, so KEGG pathways
cannot be viewed in the putput.

The KEGG classification can be displayed as a tree.  Genes are mapped onto so-called KO groups and these are present in one or
more pathways.  The MEGAN program will attempt to map each read onto a gene that has a valid KO identifier and thus, to one or
more pathways.

To perform this analysis, MEGAN uses a mapping of GI numbers to KO groups. Hence, if a KEGG-based analysis is desired, then
the database that is used in the BLAST alignment must contain GI numbers.
    </help>
    <citations>
        <citation type="doi">https://doi.org/10.1101/050559</citation>
    </citations>
</tool>