view eggnog_mapper.xml @ 14:d9c3016f7283 draft default tip

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/eggnog_mapper commit 192779247c3385704ebd98a2cc47a210fb1f4d0c
author galaxyp
date Thu, 07 Sep 2023 18:51:23 +0000
parents 844fa988236b
children
line wrap: on
line source

<tool id="eggnog_mapper" name="eggNOG Mapper" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>functional sequence annotation by orthology</description>
    <macros>
        <import>eggnog_macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <expand macro="version_command"/>
    <command detect_errors="aggressive"><![CDATA[
        @MERGE_ANNOTATIONS@

        emapper.py
        @DB_TOKEN@
        @ORTHO_SEARCH_TOKEN@
        #if $annotation_options.no_annot == "--no_annot"
            --no_annot
        #else
            @ANNOTATION_TOKEN@
        #end if
        $output_options.no_file_comments
        $output_options.report_orthologs
        $output_options.md5
        --output='results'
        --cpu "\${GALAXY_SLOTS:-4}"
        --scratch_dir \${TEMP:-\$_GALAXY_JOB_TMP_DIR}
        --temp_dir \${TEMP:-\$_GALAXY_JOB_TMP_DIR}
    ]]></command>
    <inputs>
        <expand macro="db_macro"/>
        <expand macro="ortho_full_macro"/>
        <conditional name="annotation_options">
            <param argument="--no_annot" type="select" label="Annotate seed orthologs">
                <option value="">Yes</option>
                <option value="--no_annot">No</option>
            </param>
            <when value="">
                <expand macro="annotation_options_macro"/>
            </when>
            <when value="--no_annot"/>
        </conditional>
        
        <expand macro="output_options_annotate_macro"/>
    </inputs>
    <outputs>
        <expand macro="ortho_search_output_macro"/>
        <expand macro="annotation_output_macro">
            <filter>annotation_options['no_annot'] == ''</filter>
        </expand>
        <expand macro="annotation_orthologs_output_macro"/>
    </outputs>
    <tests>
        <!-- test producing only seed orthologs-->
        <test expect_num_outputs="1">
            <param name="eggnog_data" value="@EGGNOG_DB_VERSION@"/>
            <conditional name="ortho_method">
                <param name="input" value="Nmar_0135.fa" ftype="fasta"/>
            </conditional>
            <conditional name="annotation_options">
                <param name="no_annot" value="--no_annot"/>
            </conditional>
            <section name="output_options">
            </section>
            <expand macro="seed_orthologs_assertion"/>
            <expand macro="stdout_assertion"/>
        </test>
        
        <!-- test producing annotations form seed orthologs -->
        <test expect_num_outputs="2">
            <param name="eggnog_data" value="@EGGNOG_DB_VERSION@"/> <!-- not passed in test, but required for test to work -->
            <conditional name="ortho_method">
                <param name="m" value="no_search"/>
                <param name="annotate_hits_table" value="DIA_nlim.emapper.seed_orthologs" ftype="tabular">
                    <!-- this has no effect at the moment since column_names can not be set in uploads <metadata name="column_names" value="@SEED_ORTHOLOG_COLUMNS@"/> -->
                </param>
            </conditional>
            <conditional name="annotation_options">
            </conditional>
            <section name="output_options">
                <param name="report_orthologs" value="true"/>
            </section>
            <expand macro="annotations_assertion"/>
            <expand macro="annotations_orthologs_assertion"/>
            <expand macro="stdout_assertion"/>
        </test>

        <!-- test producing seed orthologs and annotations at once (ie the same as the previous two tests but in a single tool run) -->
        <test expect_num_outputs="3">
            <param name="eggnog_data" value="@EGGNOG_DB_VERSION@"/> <!-- not passed in test, but required for test to work -->
            <conditional name="ortho_method">
                <param name="input" value="Nmar_0135.fa" ftype="fasta"/>
            </conditional>
            <section name="output_options">
                <param name="report_orthologs" value="true"/>
                <param name="no_file_comments" value="false"/>
                <param name="md5" value="true"/>
            </section>
            <expand macro="seed_orthologs_assertion" nocomments="false"/>
            <expand macro="annotations_assertion" columns="22" add_metadata_columm_names=",md5" add_column_names="&#009;md5" add_column_re="\t[\d\w]+" nocomments="false"/>
            <expand macro="annotations_orthologs_assertion" nocomments="false"/>
            <expand macro="stdout_assertion"/>
        </test>
        
        <!-- test using chached annotations from previous run -->
        <test expect_num_outputs="2">
            <param name="eggnog_data" value="@EGGNOG_DB_VERSION@"/> <!-- not passed in test, but required for test to work -->
            <conditional name="ortho_method">
                <param name="m" value="cache"/>
                <param name="input" value="Nmar_0135.fa" ftype="fasta"/>
                <param name="cache" value="DIA_nlim.emapper.annotations_cached" ftype="tabular"/>
            </conditional>
            <section name="output_options">
                <param name="report_orthologs" value="true"/>
                <param name="md5" value="true"/>
            </section>
            <expand macro="annotations_assertion" columns="22" add_metadata_columm_names=",md5" add_column_names="&#009;md5" add_column_re="\t[\d\w]+"/>
            <output name="no_annotations" ftype="fasta">
                <assert_contents>
                    <has_n_lines n="0"/>
                </assert_contents>
            </output>
            <expand macro="stdout_assertion"/>
        </test>

        <!-- test setting tax scope-->
        <test expect_num_outputs="3">
            <param name="eggnog_data" value="@EGGNOG_DB_VERSION@"/> <!-- not passed in test, but required for test to work -->
            <conditional name="ortho_method">
                <param name="input" value="Nmar_0135.fa" ftype="fasta"/>
            </conditional>
            <section name="annotation_options">
                <param name="tax_scope" value="651137" />
            </section>
            <section name="output_options">
                <param name="report_orthologs" value="true"/>
            </section>
            <expand macro="seed_orthologs_assertion"/>
            <expand macro="annotations_assertion"/>
            <expand macro="annotations_orthologs_assertion"/>
            <expand macro="stdout_assertion">
                <has_text text="--tax_scope=651137"/>
            </expand>
        </test>

        <!-- test setting a diamond option-->
        <test expect_num_outputs="3">
            <param name="eggnog_data" value="@EGGNOG_DB_VERSION@"/> <!-- not passed in test, but required for test to work -->
            <conditional name="ortho_method">
                <param name="m" value="diamond" />
                <param name="input" value="Nmar_0135.fa" ftype="fasta"/>
                <param name="sensmode" value="fast" />
            </conditional>
            <section name="output_options">
                <param name="report_orthologs" value="true"/>
            </section>
            <expand macro="seed_orthologs_assertion"/>
            <expand macro="annotations_assertion"/>
            <expand macro="annotations_orthologs_assertion"/>
            <expand macro="stdout_assertion">
                <has_text text="--sensmode fast"/>
            </expand>
        </test>
        <!-- not enabled as it requires a specific .db file, hard to minimize -->
        <!--test>
            <param name="eggnog_data" value="@EGGNOG_DB_VERSION@"/>
            <param name="input" value="Nmar_0135.fa" ftype="fasta"/>
            <section name="seed_ortho_options">
                <conditional name="ortho_method">
                    <param name="m" value="mmseqs" />
                    <param name="start_sens" value="4" />
                </conditional>
            </section>
            <param name="report_orthologs" value="true"/>
            <param name="no_file_comments" value="true"/>
            <output name="seed_orthologs" file="DIA_nlim.emapper.seed_orthologs" ftype="tabular" compare="sim_size"/>
            <output name="annotations" file="DIA_nlim.emapper.annotations" ftype="tabular" compare="sim_size"/>
            <output name="annotations_orthologs" file="DIA_nlim.emapper.annotations_orthologs" ftype="tabular"/>
        </test-->
    </tests>
    <help><![CDATA[

eggnog-mapper
=============
Overview
--------

``eggnog-mapper`` is a tool for fast functional annotation of novel sequences (genes or proteins) using precomputed eggNOG-based orthology assignments.
Obvious examples include the annotation of novel genomes, transcriptomes or even metagenomic gene catalogs.
The use of orthology predictions for functional annotation is considered more precise than traditional homology searches,
as it avoids transferring annotations from paralogs (duplicate genes with a higher chance of being involved in functional divergence).

EggNOG-mapper is also available as a public online resource:  `<http://beta-eggnogdb.embl.de/#/app/emapper>`_.

Outputs
-------

@HELP_SEARCH_OUTPUTS@

@HELP_ANNOTATION_OUTPUTS@



**Recommentation for large input data**

EggNOG-mapper consists of two phases

1. finding seed orthologous sequences (compute intensive)
2. expanding annotations (IO intensive)

by default (i.e. if *Method to search seed orthologs* is not *Skip search stage...* and *Annotate seed orthologs* is *Yes*)
both phases are executed within one tool run. 

For large input FASTA datasets in can be favourable to split this in two separate
tool runs as follows:

1. Split the FASTA (e.g. 1M seqs per data set)
2. Run the search phase only (set *Annotate seed orthologs* to *No*) on the separate FASTA files.
3. Run the annotation phase (set *Method to search seed orthologs* to *Skip search stage...*)

See [also](https://github.com/eggnogdb/eggnog-mapper/wiki/eggNOG-mapper-v2.1.5-to-v2.1.8#Setting_up_large_annotation_jobs)

Another alternative is to use cached annotations (produced in a run with --md5 enabled).

    ]]></help>
    <expand macro="citations"/>
</tool>