Galaxy |

Changeset 13:844fa988236b (2023-09-04)

Previous changeset 12:9d1fbff733cf (2022-07-19) Next changeset 14:d9c3016f7283 (2023-09-07)

Commit message:
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/eggnog_mapper commit 468bd31b8858adbba2854f118e4cbe31f4cd68cb

modified:
eggnog_macros.xml
eggnog_mapper.xml

added:
README
eggnog_mapper_annotate.xml
eggnog_mapper_search.xml

diff -r 9d1fbff733cf -r 844fa988236b README
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README Mon Sep 04 12:47:09 2023 +0000

@@ -0,0 +1,20 @@
+This folder contains three tools:
+
+1. eggnogg_mapper: which runs the search and annotation phase in a single tool
+2. eggnogg_mapper_search: which implements the search phase
+3. eggnogg_mapper_annotate: which implements the annotation phase
+
+While the search phase of eggnog_mapper is very CPU intense and is efficient
+also for a larger number of threads, the annotation phase is very IO intensive
+and can be very inefficient (depending on the configuration, e.g. if the
+reference data is located on a slow partition).
+
+While for most applications eggnogg_mapper will be sufficient to separate the
+two phases can be more efficient:
+
+- sending eggnogg_mapper_search to a destination using many threads
+- and eggnogg_mapper_annotate to a destination using a small number of threads
+
+Admins can choose to set the environment variable ``EGGNOG_DBMEM=--dbmem``
+which will copy the complete EggNOG annotation DB into memory which is usually
+much faster than using multiple cores (but needs approx. 37GB of RAM).
\ No newline at end of file

diff -r 9d1fbff733cf -r 844fa988236b eggnog_macros.xml
--- a/eggnog_macros.xml Tue Jul 19 15:14:52 2022 +0000
+++ b/eggnog_macros.xml Mon Sep 04 12:47:09 2023 +0000

[

b'@@ -3,6 +3,7 @@\n <token name="@TOOL_VERSION@">2.1.8</token>\n <token name="@VERSION_SUFFIX@">3</token>\n <token name="@EGGNOG_DB_VERSION@">5.0.2</token>\n+ <token name="@PROFILE@">22.01</token>\n \n+ \n <xml name="fasta_input">\n <param argument="-i" name="input" type="data" format="fasta" label="Fasta sequences to annotate"/>\n <conditional name="input_trans">\n@@ -105,41 +127,494 @@\n <param argument="--evalue" type="float" optional="true" min="0" label="Minimum query coverage" help="Report only alignments below or equal the e-value" />\n <param argument="--score" type="float" value="0.001" optional="true" min="0" label="Minimum query coverage" help="Report only alignments above or equal the score" />\n </xml>\n- <token name="@SEED_ORTHOLOG_COLUMNS@">query_name,seed_eggNOG_ortholog,seed_ortholog_evalue,seed_ortholog_score,query_start,query_end,seed_start,seed_end,pident,query_cov,seed_cov</token>\n+\n+ <xml name="ortho_macro">\n+ <conditional name="ortho_method">\n+ <param argument="-m" type="select" label="Basis for annotation">\n+ <yield name="search_options"/>\n+ <yield name="reuse_options"/>\n+ </param>\n+ <yield name="search_whens"/>\n+ <yield name="reuse_whens"/>\n+ </conditional>\n+ </xml>\n+\n+ <xml name="ortho_search_macro">\n+ <expand macro="ortho_macro">\n+ <token name="search_options">\n+ <expand macro="search_options_macro"/>\n+ </token>\n+ <token name="search_whens">\n+ <expand macro="search_whens_macro"/>\n+ </token>\n+ </expand>\n+ </xml>\n+\n+ <xml name="ortho_annotate_macro">\n+ <expand macro="ortho_macro">\n+ <token name="reuse_options">\n+ <expand macro="reuse_options_macro"/>\n+ </token>\n+ <token name="reuse_whens">\n+ <expand macro="reuse_whens_macro"/>\n+ </token>\n+ </expand>\n+ </xml>\n+\n+ <xml name="ortho_full_macro">\n+ <expand macro="ortho_macro">\n+ <token name="search_options">\n+ <expand macro="search_options_macro"/>\n+ </token>\n+ <token name="reuse_options">\n+ <expand macro="reuse_options_macro"/>\n+ </token>\n+ <token name="search_whens">\n+ <expand macro="search_whens_macro"/>\n+ </token>\n+ <token name="reuse_whens">\n+ <expand macro="reuse_whens_macro"/>\n+ </token>\n+ </expand>\n+ </xml>\n+\n+ <xml name="search_options_macro">\n+ <option value="diamond">Seed orthologs computed with Diamond (diamond)</option>\n+ <option value="mmseqs">Seed orthologs computed with MMseqs2 (mmseqs)</option>\n+ </xml>\n+\n+ <xml name="reuse_options_macro">\n+ <option value="no_search">Use existing seed orthologs (no_search)</option>\n+ <option value="cache">Use cached annotations (cache). See also --md5</option>\n+ </xml>\n+\n+ <xml name="search_whens_macro">\n+ <when v'..b' <metadata name="columns" value="@COLUMNS@" />\n \n <assert_contents>\n <has_line line="#query	seed_ortholog	evalue	score	eggNOG_OGs	max_annot_lvl	COG_category	Description	Preferred_name	GOs	EC	KEGG_ko	KEGG_Pathway	KEGG_Module	KEGG_Reaction	KEGG_rclass	BRITE	KEGG_TC	CAZy	BiGG_Reaction	PFAMs@ADD_COLUMN_NAMES@"/>\n <has_line_matching expression="(\\S+\\t){2}[-+.e\\d]+\\t[.\\d]+(\\t\\S+){7}\\tko:\\S+(\\t\\S+){9}@ADD_COLUMN_RE@" n="1"/>\n+ <has_line_matching expression="##.*" negate="@NOCOMMENTS@"/>\n </assert_contents>\n </output>\n </xml>\n- <xml name="annotations_orthologs_assertion">\n+ <xml name="annotations_orthologs_assertion" token_nocomments="true">\n <output name="annotations_orthologs" ftype="tabular">\n <metadata name="columns" value="4" />\n <metadata name="column_names" value="query,orth_type,species,orthologs" />\n <assert_contents>\n <has_line line="#query	orth_type	species	orthologs"/>\n <has_line_matching expression="\\S+\\t(one2one|many2one|one2many|many2many|seed)(\\t[^\\t]+){2}" n="2"/>\n+ <has_line_matching expression="##.*" negate="@NOCOMMENTS@"/>\n </assert_contents>\n </output>\n </xml>\n+ <token name="@HELP_ANNOTATION_OUTPUTS@"><![CDATA[\n+ **annotations**\n+\n+ This file provides final annotations of each query. Tab-delimited columns in the file are:\n+ \n+ - ``query_name``: query sequence name\n+ - ``seed_eggNOG_ortholog``: best protein match in eggNOG\n+ - ``seed_ortholog_evalue``: best protein match (e-value)\n+ - ``seed_ortholog_score``: best protein match (bit-score)\n+ - ``predicted_taxonomic_group``\n+ - ``predicted_protein_name``: Predicted protein name for query sequences\n+ - ``GO_terms``: Comma delimited list of predicted Gene Ontology terms\n+ - ``EC_number``\n+ - ``KEGG_KO``\n+ - ``KEGG_Pathway``: Comma delimited list of predicted KEGG pathways\n+ - ``KEGG_Module``\n+ - ``KEGG_Reaction``\n+ - ``KEGG_rclass``\n+ - ``BRITE``\n+ - ``KEGG_TC``\n+ - ``CAZy``\n+ - ``BiGG_Reactions``\n+ - ``Annotation_tax_scope``: The taxonomic scope used to annotate this query sequence\n+ - ``Matching_OGs``: Comma delimited list of matching eggNOG Orthologous Groups\n+ - ``best_OG|evalue|score``: Best matching Orthologous Groups (deprecated, use smallest from eggnog OGs)\n+ - ``COG_functional_categories``: COG functional category inferred from best matching OG\n+ - ``eggNOG_free_text_description``\n+ \n+ **orthologs**\n+\n+ This output is only created if the option ``--report_orthologs`` is checked.\n+ It provides the orthologs used for the annotation. It\'s a tab delimited file with the following columns:\n+ \n+ - ``query``\n+ - ``orth_type`` Type of orthologs in this row. See --target_orthologs.\n+ - ``species``\n+ - ``orthologs`` comma-separated list of orthologs (If an ortholog shows a "*", such ortholog was used to transfer its annotations to the query.)\n+ \n+ **sequences without annotation **\n+\n+ This output is created if cached annotations are used as input. \n+ It is a FASTA file containing all sequences that are not found in the cached annotations.\n+ These sequences can then be used as input for another run of the EggNOG mapper\n+ computing seed orthologs with diamond, etc.\n+ ]]></token>\n </macros>\n'

diff -r 9d1fbff733cf -r 844fa988236b eggnog_mapper.xml
--- a/eggnog_mapper.xml Tue Jul 19 15:14:52 2022 +0000
+++ b/eggnog_mapper.xml Mon Sep 04 12:47:09 2023 +0000

[

b'@@ -1,4 +1,4 @@\n-<tool id="eggnog_mapper" name="eggNOG Mapper" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">\n+<tool id="eggnog_mapper" name="eggNOG Mapper" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">\n <description>functional sequence annotation by orthology</description>\n <macros>\n <import>eggnog_macros.xml</import>\n@@ -6,86 +6,15 @@\n <expand macro="requirements"/>\n <expand macro="version_command"/>\n <command detect_errors="aggressive"><![CDATA[\n- #if $ortho_method.m == "no_search"\n- cat \n- #for aht in $ortho_method.annotate_hits_table\n- $aht\n- #end for\n- > annotate_hits_table.tsv\n- &&\n- #end if\n+ @MERGE_ANNOTATIONS@\n \n emapper.py\n- --data_dir \'$eggnog_data.fields.path\'\n- -m \'$ortho_method.m\'\n-\n- #if $ortho_method.m in [\'diamond\', \'mmseqs\', \'cache\']:\n- -i \'$ortho_method.input\'\n- --itype \'$ortho_method.input_trans.itype\'\n- #if $ortho_method.input_trans.itype in [\'CDS\', \'genome\', \'metagenome\']:\n- $ortho_method.input_trans.translate\n- #end if\n- #if $ortho_method.input_trans.itype in [\'genome\', \'metagenome\']:\n- --genepred $ortho_method.input_trans.genepred\n- #end if\n- #elif $ortho_method.m == "no_search"\n- --annotate_hits_table annotate_hits_table.tsv\n- #end if\n- \n- #if $ortho_method.m == \'cache\'\n- --cache \'$ortho_method.cache\'\n- #end if\n-\n- #if $ortho_method.m in [\'diamond\', \'mmseqs\']:\n- ## Diamond option\n- #if $ortho_method.m == "diamond":\n- --matrix \'$ortho_method.matrix_gapcosts.matrix\'\n- $ortho_method.matrix_gapcosts.gap_costs\n- --sensmode $ortho_method.sensmode\n- $ortho_method.dmnd_iterate\n- $ortho_method.dmnd_ignore_warnings\n- #elif $ortho_method.m == "mmseqs":\n- --start_sens $ortho_method.start_sens\n- --sens_steps $ortho_method.sens_steps\n- --final_sens $ortho_method.final_sens\n- #end if\n-\n- ## Common options for search filtering (applies to diamond and mmseqs only)\n- #if str($ortho_method.query_cover):\n- --query_cover $ortho_method.query_cover\n- #end if\n- #if str($ortho_method.subject_cover):\n- --subject_cover $ortho_method.subject_cover\n- #end if\n- #if str($ortho_method.pident):\n- --pident $ortho_method.pident\n- #end if\n- #if str($ortho_method.evalue):\n- --evalue $ortho_method.evalue\n- #end if\n- #if str($ortho_method.score):\n- --score $ortho_method.score\n- #end if\n- #end if\n-\n+ @DB_TOKEN@\n+ @ORTHO_SEARCH_TOKEN@\n #if $annotation_options.no_annot == "--no_annot"\n --no_annot\n #else\n- #if str($annotation_options.seed_ortholog_evalue):\n- --seed_ortholog_evalue $annotation_options.seed_ortholog_evalue\n- #end if\n- #if str($annotation_options.seed_ortholog_score):\n- --seed_ortholog_score $annotation_options.seed_ortholog_score\n- #end if\n- #if $annotation_options.tax_scope:\n- --tax_scope=$annotation_options.tax_scope\n- #end if\n- #if $annotation_options.target_orthologs:\n- --target_orthologs=$annotation_options.target_orthologs\n- #end if\n- #if $annotation_options.go_evidence:\n- --go_evidence=$annotation_options.go_evidence\n- #end if\n+ @ANNOTATION_TOKEN@\n #end if\n $output_options.no_file_comments\n $output_options.report_orthologs\n@@ -96,261 +25,27 @@\n --temp_dir \\${TEMP:-\\$_GA'..b' <param name="report_orthologs" value="true"/>\n- <param name="no_file_comments" value="true"/>\n+ <param name="no_file_comments" value="false"/>\n <param name="md5" value="true"/>\n </section>\n- <expand macro="seed_orthologs_assertion"/>\n- <expand macro="annotations_assertion" columns="22" add_metadata_columm_names=",md5" add_column_names="	md5" add_column_re="\\t[\\d\\w]+"/>\n- <expand macro="annotations_orthologs_assertion"/>\n+ <expand macro="seed_orthologs_assertion" nocomments="false"/>\n+ <expand macro="annotations_assertion" columns="22" add_metadata_columm_names=",md5" add_column_names="	md5" add_column_re="\\t[\\d\\w]+" nocomments="false"/>\n+ <expand macro="annotations_orthologs_assertion" nocomments="false"/>\n <expand macro="stdout_assertion"/>\n </test>\n \n@@ -416,7 +109,6 @@\n </conditional>\n <section name="output_options">\n <param name="report_orthologs" value="true"/>\n- <param name="no_file_comments" value="true"/>\n <param name="md5" value="true"/>\n </section>\n <expand macro="annotations_assertion" columns="22" add_metadata_columm_names=",md5" add_column_names="	md5" add_column_re="\\t[\\d\\w]+"/>\n@@ -439,7 +131,6 @@\n </section>\n <section name="output_options">\n <param name="report_orthologs" value="true"/>\n- <param name="no_file_comments" value="true"/>\n </section>\n <expand macro="seed_orthologs_assertion"/>\n <expand macro="annotations_assertion"/>\n@@ -459,7 +150,6 @@\n </conditional>\n <section name="output_options">\n <param name="report_orthologs" value="true"/>\n- <param name="no_file_comments" value="true"/>\n </section>\n <expand macro="seed_orthologs_assertion"/>\n <expand macro="annotations_assertion"/>\n@@ -502,40 +192,11 @@\n Outputs\n -------\n \n-**seed_orthologs**\n-\n-each line in the file provides the best match of each query within the best Orthologous Group (OG)\n-reported in the [project].hmm_hits file, obtained running PHMMER against all sequences within the best OG.\n-The seed ortholog is used to fetch fine-grained orthology relationships from eggNOG.\n-If using the diamond search mode, seed orthologs are directly\n-obtained from the best matching sequences by running DIAMOND against the whole eggNOG protein space.\n-\n-**annotations**\n-\n-This file provides final annotations of each query. Tab-delimited columns in the file are:\n+@HELP_SEARCH_OUTPUTS@\n \n-- ``query_name``: query sequence name\n-- ``seed_eggNOG_ortholog``: best protein match in eggNOG\n-- ``seed_ortholog_evalue``: best protein match (e-value)\n-- ``seed_ortholog_score``: best protein match (bit-score)\n-- ``predicted_taxonomic_group``\n-- ``predicted_protein_name``: Predicted protein name for query sequences\n-- ``GO_terms``: Comma delimited list of predicted Gene Ontology terms\n-- ``EC_number``\n-- ``KEGG_KO``\n-- ``KEGG_Pathway``: Comma delimited list of predicted KEGG pathways\n-- ``KEGG_Module``\n-- ``KEGG_Reaction``\n-- ``KEGG_rclass``\n-- ``BRITE``\n-- ``KEGG_TC``\n-- ``CAZy``\n-- ``BiGG_Reactions``\n-- ``Annotation_tax_scope``: The taxonomic scope used to annotate this query sequence\n-- ``Matching_OGs``: Comma delimited list of matching eggNOG Orthologous Groups\n-- ``best_OG|evalue|score``: Best matching Orthologous Groups (deprecated, use smallest from eggnog OGs)\n-- ``COG_functional_categories``: COG functional category inferred from best matching OG\n-- ``eggNOG_free_text_description``\n+@HELP_ANNOTATION_OUTPUTS@\n+\n+\n \n **Recommentation for large input data**\n \n@@ -558,7 +219,6 @@\n \n Another alternative is to use cached annotations (produced in a run with --md5 enabled).\n \n-\n ]]></help>\n <expand macro="citations"/>\n </tool>\n'

diff -r 9d1fbff733cf -r 844fa988236b eggnog_mapper_annotate.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/eggnog_mapper_annotate.xml Mon Sep 04 12:47:09 2023 +0000

[

@@ -0,0 +1,141 @@
+<tool id="eggnog_mapper_annotate" name="eggNOG Mapper" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>annotation phase</description>
+    <macros>
+        <import>eggnog_macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <expand macro="version_command"/>
+    <command detect_errors="aggressive"><![CDATA[
+        @MERGE_ANNOTATIONS@
+
+        emapper.py
+        @DB_TOKEN@
+        -m no_search
+        @ORTHO_SEARCH_TOKEN@
+
+        @ANNOTATION_TOKEN@
+        $output_options.no_file_comments
+        $output_options.report_orthologs
+        $output_options.md5
+        --output='results'
+        --cpu "\${GALAXY_SLOTS:-1}"
+        --scratch_dir \${TEMP:-\$_GALAXY_JOB_TMP_DIR}
+        --temp_dir \${TEMP:-\$_GALAXY_JOB_TMP_DIR}
+    ]]></command>
+    <inputs>
+        <expand macro="db_macro"/>
+        <expand macro="ortho_annotate_macro"/>
+
+        <section name="annotation_options" title="Annotation options">
+            <expand macro="annotation_options_macro"/>
+        </section>
+
+        <expand macro="output_options_annotate_macro"/>
+    </inputs>
+    <outputs>
+        <expand macro="annotation_output_macro"/>
+        <expand macro="annotation_orthologs_output_macro"/>
+    </outputs>
+    <tests>
+        
+        <test expect_num_outputs="1">
+            <param name="eggnog_data" value="@EGGNOG_DB_VERSION@"/> 
+            <param name="annotate_hits_table" value="DIA_nlim.emapper.seed_orthologs" ftype="tabular">
+                
+            </param>
+            <conditional name="annotation_options">
+            </conditional>
+            <section name="output_options">
+                <param name="report_orthologs" value="false"/>
+                <param name="no_file_comments" value="true"/>
+            </section>
+            <expand macro="annotations_assertion"/>
+            <expand macro="stdout_assertion"/>
+        </test>
+
+        
+        <test expect_num_outputs="2">
+            <param name="eggnog_data" value="@EGGNOG_DB_VERSION@"/> 
+            <conditional name="ortho_method">
+                <param name="m" value="cache"/>
+                <param name="input" value="Nmar_0135.fa" ftype="fasta"/>
+                <param name="cache" value="DIA_nlim.emapper.annotations_cached" ftype="tabular"/>
+            </conditional>
+            <section name="output_options">
+                <param name="report_orthologs" value="true"/>
+                <param name="no_file_comments" value="true"/>
+                <param name="md5" value="true"/>
+            </section>
+            <expand macro="annotations_assertion" columns="22" add_metadata_columm_names=",md5" add_column_names="	md5" add_column_re="\t[\d\w]+"/>
+            <output name="no_annotations" ftype="fasta">
+                <assert_contents>
+                    <has_n_lines n="0"/>
+                </assert_contents>
+            </output>
+            <expand macro="stdout_assertion"/>
+        </test>
+
+        
+        <test expect_num_outputs="2">
+            <param name="eggnog_data" value="@EGGNOG_DB_VERSION@"/> 
+            <param name="annotate_hits_table" value="DIA_nlim.emapper.seed_orthologs" ftype="tabular">
+                
+            </param>
+            <section name="annotation_options">
+                <param name="tax_scope" value="651137" />
+            </section>
+            <section name="output_options">
+                <param name="report_orthologs" value="true"/>
+                <param name="no_file_comments" value="true"/>
+            </section>
+            <expand macro="annotations_assertion"/>
+            <expand macro="annotations_orthologs_assertion"/>
+            <expand macro="stdout_assertion">
+                <has_text text="--tax_scope=651137"/>
+            </expand>
+        </test>
+    </tests>
+    <help><![CDATA[
+
+eggnog-mapper
+=============
+Overview
+--------
+
+``eggnog-mapper`` is a tool for fast functional annotation of novel sequences (genes or proteins) using precomputed eggNOG-based orthology assignments.
+Obvious examples include the annotation of novel genomes, transcriptomes or even metagenomic gene catalogs.
+The use of orthology predictions for functional annotation is considered more precise than traditional homology searches,
+as it avoids transferring annotations from paralogs (duplicate genes with a higher chance of being involved in functional divergence).
+
+EggNOG-mapper is also available as a public online resource:  `<http://beta-eggnogdb.embl.de/#/app/emapper>`_.
+
+Outputs
+-------
+
+@HELP_ANNOTATION_OUTPUTS@
+
+**Recommentation for large input data**
+
+EggNOG-mapper consists of two phases
+
+1. finding seed orthologous sequences (compute intensive)
+2. expanding annotations (IO intensive)
+
+by default (i.e. if *Method to search seed orthologs* is not *Skip search stage...* and *Annotate seed orthologs* is *Yes*)
+both phases are executed within one tool run.
+
+For large input FASTA datasets in can be favourable to split this in two separate
+tool runs as follows:
+
+1. Split the FASTA (e.g. 1M seqs per data set)
+2. Run the search phase only (set *Annotate seed orthologs* to *No*) on the separate FASTA files.
+3. Run the annotation phase (set *Method to search seed orthologs* to *Skip search stage...*)
+
+See [also](https://github.com/eggnogdb/eggnog-mapper/wiki/eggNOG-mapper-v2.1.5-to-v2.1.8#Setting_up_large_annotation_jobs)
+
+Another alternative is to use cached annotations (produced in a run with --md5 enabled).
+
+
+    ]]></help>
+    <expand macro="citations"/>
+</tool>

diff -r 9d1fbff733cf -r 844fa988236b eggnog_mapper_search.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/eggnog_mapper_search.xml Mon Sep 04 12:47:09 2023 +0000

[

@@ -0,0 +1,101 @@
+<tool id="eggnog_mapper_search" name="eggNOG Mapper" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>search phase</description>
+    <macros>
+        <import>eggnog_macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <expand macro="version_command"/>
+    <command detect_errors="aggressive"><![CDATA[
+        emapper.py
+        @DB_TOKEN@
+        @ORTHO_SEARCH_TOKEN@
+
+        $output_options.no_file_comments
+        --output='results'
+        --cpu "\${GALAXY_SLOTS:-4}"
+        --scratch_dir \${TEMP:-\$_GALAXY_JOB_TMP_DIR}
+        --temp_dir \${TEMP:-\$_GALAXY_JOB_TMP_DIR}
+    ]]></command>
+    <inputs>
+        <expand macro="db_macro"/>
+        <expand macro="ortho_search_macro"/>
+        <expand macro="output_options_macro"/>
+    </inputs>
+    <outputs>
+        <expand macro="ortho_search_output_macro"/>
+    </outputs>
+    <tests>
+        
+        <test expect_num_outputs="1">
+            <param name="eggnog_data" value="@EGGNOG_DB_VERSION@"/>
+            <conditional name="ortho_method">
+                <param name="input" value="Nmar_0135.fa" ftype="fasta"/>
+            </conditional>
+            <section name="output_options">
+                <param name="no_file_comments" value="true"/>
+            </section>
+            <expand macro="seed_orthologs_assertion"/>
+            <expand macro="stdout_assertion"/>
+        </test>
+
+        
+        <test expect_num_outputs="1">
+            <param name="eggnog_data" value="@EGGNOG_DB_VERSION@"/> 
+            <conditional name="ortho_method">
+                <param name="m" value="diamond" />
+                <param name="input" value="Nmar_0135.fa" ftype="fasta"/>
+                <param name="sensmode" value="fast" />
+            </conditional>
+            <section name="output_options">
+                <param name="no_file_comments" value="true"/>
+            </section>
+            <expand macro="seed_orthologs_assertion"/>
+            <expand macro="stdout_assertion">
+                <has_text text="--sensmode fast"/>
+            </expand>
+        </test>
+    </tests>
+    <help><![CDATA[
+
+eggnog-mapper
+=============
+Overview
+--------
+
+``eggnog-mapper`` is a tool for fast functional annotation of novel sequences (genes or proteins) using precomputed eggNOG-based orthology assignments.
+Obvious examples include the annotation of novel genomes, transcriptomes or even metagenomic gene catalogs.
+The use of orthology predictions for functional annotation is considered more precise than traditional homology searches,
+as it avoids transferring annotations from paralogs (duplicate genes with a higher chance of being involved in functional divergence).
+
+EggNOG-mapper is also available as a public online resource:  `<http://beta-eggnogdb.embl.de/#/app/emapper>`_.
+
+Outputs
+-------
+
+@HELP_SEARCH_OUTPUTS@
+
+**Recommentation for large input data**
+
+EggNOG-mapper consists of two phases
+
+1. finding seed orthologous sequences (compute intensive)
+2. expanding annotations (IO intensive)
+
+by default (i.e. if *Method to search seed orthologs* is not *Skip search stage...* and *Annotate seed orthologs* is *Yes*)
+both phases are executed within one tool run.
+
+For large input FASTA datasets in can be favourable to split this in two separate
+tool runs as follows:
+
+1. Split the FASTA (e.g. 1M seqs per data set)
+2. Run the search phase only (set *Annotate seed orthologs* to *No*) on the separate FASTA files.
+3. Run the annotation phase (set *Method to search seed orthologs* to *Skip search stage...*)
+
+See [also](https://github.com/eggnogdb/eggnog-mapper/wiki/eggNOG-mapper-v2.1.5-to-v2.1.8#Setting_up_large_annotation_jobs)
+
+Another alternative is to use cached annotations (produced in a run with --md5 enabled).
+
+
+    ]]></help>
+    <expand macro="citations"/>
+</tool>