Repository 'eggnog_mapper'
hg clone https://toolshed.g2.bx.psu.edu/repos/galaxyp/eggnog_mapper

Changeset 13:844fa988236b (2023-09-04)
Previous changeset 12:9d1fbff733cf (2022-07-19) Next changeset 14:d9c3016f7283 (2023-09-07)
Commit message:
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/eggnog_mapper commit 468bd31b8858adbba2854f118e4cbe31f4cd68cb
modified:
eggnog_macros.xml
eggnog_mapper.xml
added:
README
eggnog_mapper_annotate.xml
eggnog_mapper_search.xml
b
diff -r 9d1fbff733cf -r 844fa988236b README
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README Mon Sep 04 12:47:09 2023 +0000
b
@@ -0,0 +1,20 @@
+This folder contains three tools: 
+
+1. eggnogg_mapper: which runs the search and annotation phase in a single tool
+2. eggnogg_mapper_search: which implements the search phase
+3. eggnogg_mapper_annotate: which implements the annotation phase
+
+While the search phase of eggnog_mapper is very CPU intense and is efficient
+also for a larger number of threads, the annotation phase is very IO intensive
+and can be very inefficient (depending on the configuration, e.g. if the
+reference data is located on a slow partition).
+
+While for most applications eggnogg_mapper will be sufficient to separate the
+two phases can be more efficient: 
+
+- sending eggnogg_mapper_search to a destination using many threads
+- and eggnogg_mapper_annotate to a destination using a small number of threads
+
+Admins can choose to set the environment variable ``EGGNOG_DBMEM=--dbmem``
+which will copy the complete EggNOG annotation DB into memory which is usually
+much faster than using multiple cores (but needs approx. 37GB of RAM).
\ No newline at end of file
b
diff -r 9d1fbff733cf -r 844fa988236b eggnog_macros.xml
--- a/eggnog_macros.xml Tue Jul 19 15:14:52 2022 +0000
+++ b/eggnog_macros.xml Mon Sep 04 12:47:09 2023 +0000
[
b'@@ -3,6 +3,7 @@\n    <token name="@TOOL_VERSION@">2.1.8</token>\n    <token name="@VERSION_SUFFIX@">3</token>\n    <token name="@EGGNOG_DB_VERSION@">5.0.2</token>\n+   <token name="@PROFILE@">22.01</token>\n     <!--\n     # DB versionning was super confusing for eggnog-mapper 2.0.x:\n     # eggnog-mapper 1.* needed a db v4.5 (based on eggnog v4.5)\n@@ -65,6 +66,27 @@\n         </test>\n     </xml>\n \n+    \n+    <xml name="stdout_assertion">\n+        <assert_stdout>\n+            <has_line line="#  emapper-@TOOL_VERSION@"/>\n+            <has_line line="FINISHED"/>\n+            <yield/>\n+        </assert_stdout>\n+    </xml>\n+    <xml name="db_macro">\n+        <param name="eggnog_data" type="select" label="Version of eggNOG Database">\n+            <options from_data_table="eggnog_mapper_db_versioned">\n+                <filter type="static_value" column="3" value="@IDX_VERSION@" />\n+            </options>\n+        </param>\n+    </xml>\n+    <token name="@DB_TOKEN@"><![CDATA[\n+        --data_dir \'$eggnog_data.fields.path\'\n+    ]]></token>\n+\n+    <!-- macros and tokens for search -->\n+    \n     <xml name="fasta_input">\n         <param argument="-i" name="input" type="data" format="fasta" label="Fasta sequences to annotate"/>\n         <conditional name="input_trans">\n@@ -105,41 +127,494 @@\n         <param argument="--evalue" type="float" optional="true" min="0" label="Minimum query coverage" help="Report only alignments below or equal the e-value" />\n         <param argument="--score" type="float" value="0.001" optional="true" min="0" label="Minimum query coverage" help="Report only alignments above or equal the score" />\n     </xml>\n-    <token name="@SEED_ORTHOLOG_COLUMNS@">query_name,seed_eggNOG_ortholog,seed_ortholog_evalue,seed_ortholog_score,query_start,query_end,seed_start,seed_end,pident,query_cov,seed_cov</token>\n+\n+    <xml name="ortho_macro">\n+        <conditional name="ortho_method">\n+            <param argument="-m" type="select" label="Basis for annotation">\n+                <yield name="search_options"/>\n+                <yield name="reuse_options"/>\n+            </param>\n+            <yield name="search_whens"/>\n+            <yield name="reuse_whens"/>\n+        </conditional>\n+    </xml>\n+\n+    <xml name="ortho_search_macro">\n+        <expand macro="ortho_macro">\n+            <token name="search_options">\n+                <expand macro="search_options_macro"/>\n+            </token>\n+            <token name="search_whens">\n+                <expand macro="search_whens_macro"/>\n+            </token>\n+        </expand>\n+    </xml>\n+\n+    <xml name="ortho_annotate_macro">\n+        <expand macro="ortho_macro">\n+            <token name="reuse_options">\n+                <expand macro="reuse_options_macro"/>\n+            </token>\n+            <token name="reuse_whens">\n+                <expand macro="reuse_whens_macro"/>\n+            </token>\n+        </expand>\n+    </xml>\n+\n+    <xml name="ortho_full_macro">\n+        <expand macro="ortho_macro">\n+            <token name="search_options">\n+                <expand macro="search_options_macro"/>\n+            </token>\n+            <token name="reuse_options">\n+                <expand macro="reuse_options_macro"/>\n+            </token>\n+            <token name="search_whens">\n+                <expand macro="search_whens_macro"/>\n+            </token>\n+            <token name="reuse_whens">\n+                <expand macro="reuse_whens_macro"/>\n+            </token>\n+        </expand>\n+    </xml>\n+\n+    <xml name="search_options_macro">\n+        <option value="diamond">Seed orthologs computed with Diamond (diamond)</option>\n+        <option value="mmseqs">Seed orthologs computed with MMseqs2 (mmseqs)</option>\n+    </xml>\n+\n+    <xml name="reuse_options_macro">\n+        <option value="no_search">Use existing seed orthologs (no_search)</option>\n+        <option value="cache">Use cached annotations (cache). See also --md5</option>\n+    </xml>\n+\n+    <xml name="search_whens_macro">\n+        <when v'..b'   <metadata name="columns" value="@COLUMNS@" />\n             <!-- <metadata name="column_names" value="query,seed_ortholog,evalue,score,max_annot_lvl,COG_category,Description,Preferred_name,GOs,EC,KEGG_ko,KEGG_Pathway,KEGG_Module,KEGG_Reaction,KEGG_rclass,BRITE,KEGG_TC,CAZy,BiGG_Reaction,PFAMseggNOG_OGs@ADD_METADATA_COLUMN_NAMES@" /> -->\n             <assert_contents>\n                 <has_line line="#query&#009;seed_ortholog&#009;evalue&#009;score&#009;eggNOG_OGs&#009;max_annot_lvl&#009;COG_category&#009;Description&#009;Preferred_name&#009;GOs&#009;EC&#009;KEGG_ko&#009;KEGG_Pathway&#009;KEGG_Module&#009;KEGG_Reaction&#009;KEGG_rclass&#009;BRITE&#009;KEGG_TC&#009;CAZy&#009;BiGG_Reaction&#009;PFAMs@ADD_COLUMN_NAMES@"/>\n                 <has_line_matching expression="(\\S+\\t){2}[-+.e\\d]+\\t[.\\d]+(\\t\\S+){7}\\tko:\\S+(\\t\\S+){9}@ADD_COLUMN_RE@" n="1"/>\n+                <has_line_matching expression="##.*" negate="@NOCOMMENTS@"/>\n             </assert_contents>\n         </output>\n     </xml>\n-    <xml name="annotations_orthologs_assertion">\n+    <xml name="annotations_orthologs_assertion" token_nocomments="true">\n         <output name="annotations_orthologs" ftype="tabular">\n             <metadata name="columns" value="4" />\n             <metadata name="column_names" value="query,orth_type,species,orthologs" />\n             <assert_contents>\n                 <has_line line="#query&#009;orth_type&#009;species&#009;orthologs"/>\n                 <has_line_matching expression="\\S+\\t(one2one|many2one|one2many|many2many|seed)(\\t[^\\t]+){2}" n="2"/>\n+                <has_line_matching expression="##.*" negate="@NOCOMMENTS@"/>\n             </assert_contents>\n         </output>\n     </xml>\n+    <token name="@HELP_ANNOTATION_OUTPUTS@"><![CDATA[\n+        **annotations**\n+\n+        This file provides final annotations of each query. Tab-delimited columns in the file are:\n+        \n+        - ``query_name``: query sequence name\n+        - ``seed_eggNOG_ortholog``: best protein match in eggNOG\n+        - ``seed_ortholog_evalue``: best protein match (e-value)\n+        - ``seed_ortholog_score``: best protein match (bit-score)\n+        - ``predicted_taxonomic_group``\n+        - ``predicted_protein_name``: Predicted protein name for query sequences\n+        - ``GO_terms``: Comma delimited list of predicted Gene Ontology terms\n+        - ``EC_number``\n+        - ``KEGG_KO``\n+        - ``KEGG_Pathway``: Comma delimited list of predicted KEGG pathways\n+        - ``KEGG_Module``\n+        - ``KEGG_Reaction``\n+        - ``KEGG_rclass``\n+        - ``BRITE``\n+        - ``KEGG_TC``\n+        - ``CAZy``\n+        - ``BiGG_Reactions``\n+        - ``Annotation_tax_scope``: The taxonomic scope used to annotate this query sequence\n+        - ``Matching_OGs``: Comma delimited list of matching eggNOG Orthologous Groups\n+        - ``best_OG|evalue|score``: Best matching Orthologous Groups (deprecated, use smallest from eggnog OGs)\n+        - ``COG_functional_categories``: COG functional category inferred from best matching OG\n+        - ``eggNOG_free_text_description``\n+        \n+        **orthologs**\n+\n+        This output is only created if the option ``--report_orthologs`` is checked.\n+        It provides the orthologs used for the annotation. It\'s a tab delimited file with the following columns:\n+        \n+        - ``query``\n+        - ``orth_type`` Type of orthologs in this row. See --target_orthologs.\n+        - ``species``\n+        - ``orthologs`` comma-separated list of orthologs (If an ortholog shows a "*", such ortholog was used to transfer its annotations to the query.)\n+        \n+        **sequences without annotation **\n+\n+        This output is created if cached annotations are used as input. \n+        It is a FASTA file containing all sequences that are not found in the cached annotations.\n+        These sequences can then be used as input for another run of the EggNOG mapper\n+        computing seed orthologs with diamond, etc.\n+    ]]></token>\n </macros>\n'
b
diff -r 9d1fbff733cf -r 844fa988236b eggnog_mapper.xml
--- a/eggnog_mapper.xml Tue Jul 19 15:14:52 2022 +0000
+++ b/eggnog_mapper.xml Mon Sep 04 12:47:09 2023 +0000
[
b'@@ -1,4 +1,4 @@\n-<tool id="eggnog_mapper" name="eggNOG Mapper" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">\n+<tool id="eggnog_mapper" name="eggNOG Mapper" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">\n     <description>functional sequence annotation by orthology</description>\n     <macros>\n         <import>eggnog_macros.xml</import>\n@@ -6,86 +6,15 @@\n     <expand macro="requirements"/>\n     <expand macro="version_command"/>\n     <command detect_errors="aggressive"><![CDATA[\n-        #if $ortho_method.m == "no_search"\n-            cat \n-            #for aht in $ortho_method.annotate_hits_table\n-                $aht\n-            #end for\n-            > annotate_hits_table.tsv\n-            &&\n-        #end if\n+        @MERGE_ANNOTATIONS@\n \n         emapper.py\n-        --data_dir \'$eggnog_data.fields.path\'\n-        -m \'$ortho_method.m\'\n-\n-        #if $ortho_method.m in [\'diamond\', \'mmseqs\', \'cache\']:\n-            -i \'$ortho_method.input\'\n-            --itype \'$ortho_method.input_trans.itype\'\n-            #if $ortho_method.input_trans.itype in [\'CDS\', \'genome\', \'metagenome\']:\n-                $ortho_method.input_trans.translate\n-            #end if\n-            #if $ortho_method.input_trans.itype in [\'genome\', \'metagenome\']:\n-                --genepred $ortho_method.input_trans.genepred\n-            #end if\n-        #elif $ortho_method.m == "no_search"\n-            --annotate_hits_table annotate_hits_table.tsv\n-        #end if\n-        \n-        #if $ortho_method.m == \'cache\'\n-            --cache \'$ortho_method.cache\'\n-        #end if\n-\n-        #if $ortho_method.m in [\'diamond\', \'mmseqs\']:\n-            ## Diamond option\n-            #if $ortho_method.m == "diamond":\n-                --matrix \'$ortho_method.matrix_gapcosts.matrix\'\n-                $ortho_method.matrix_gapcosts.gap_costs\n-                --sensmode $ortho_method.sensmode\n-                $ortho_method.dmnd_iterate\n-                $ortho_method.dmnd_ignore_warnings\n-            #elif $ortho_method.m == "mmseqs":\n-                --start_sens $ortho_method.start_sens\n-                --sens_steps $ortho_method.sens_steps\n-                --final_sens $ortho_method.final_sens\n-            #end if\n-\n-            ## Common options for search filtering (applies to diamond and mmseqs only)\n-            #if str($ortho_method.query_cover):\n-                --query_cover $ortho_method.query_cover\n-            #end if\n-            #if str($ortho_method.subject_cover):\n-                --subject_cover $ortho_method.subject_cover\n-            #end if\n-            #if str($ortho_method.pident):\n-                --pident $ortho_method.pident\n-            #end if\n-            #if str($ortho_method.evalue):\n-                --evalue $ortho_method.evalue\n-            #end if\n-            #if str($ortho_method.score):\n-                --score $ortho_method.score\n-            #end if\n-        #end if\n-\n+        @DB_TOKEN@\n+        @ORTHO_SEARCH_TOKEN@\n         #if $annotation_options.no_annot == "--no_annot"\n             --no_annot\n         #else\n-            #if str($annotation_options.seed_ortholog_evalue):\n-                --seed_ortholog_evalue $annotation_options.seed_ortholog_evalue\n-            #end if\n-            #if str($annotation_options.seed_ortholog_score):\n-                --seed_ortholog_score $annotation_options.seed_ortholog_score\n-            #end if\n-            #if $annotation_options.tax_scope:\n-                --tax_scope=$annotation_options.tax_scope\n-            #end if\n-            #if $annotation_options.target_orthologs:\n-                --target_orthologs=$annotation_options.target_orthologs\n-            #end if\n-            #if $annotation_options.go_evidence:\n-                --go_evidence=$annotation_options.go_evidence\n-            #end if\n+            @ANNOTATION_TOKEN@\n         #end if\n         $output_options.no_file_comments\n         $output_options.report_orthologs\n@@ -96,261 +25,27 @@\n         --temp_dir \\${TEMP:-\\$_GA'..b'      <param name="report_orthologs" value="true"/>\n-                <param name="no_file_comments" value="true"/>\n+                <param name="no_file_comments" value="false"/>\n                 <param name="md5" value="true"/>\n             </section>\n-            <expand macro="seed_orthologs_assertion"/>\n-            <expand macro="annotations_assertion" columns="22" add_metadata_columm_names=",md5" add_column_names="&#009;md5" add_column_re="\\t[\\d\\w]+"/>\n-            <expand macro="annotations_orthologs_assertion"/>\n+            <expand macro="seed_orthologs_assertion" nocomments="false"/>\n+            <expand macro="annotations_assertion" columns="22" add_metadata_columm_names=",md5" add_column_names="&#009;md5" add_column_re="\\t[\\d\\w]+" nocomments="false"/>\n+            <expand macro="annotations_orthologs_assertion" nocomments="false"/>\n             <expand macro="stdout_assertion"/>\n         </test>\n         \n@@ -416,7 +109,6 @@\n             </conditional>\n             <section name="output_options">\n                 <param name="report_orthologs" value="true"/>\n-                <param name="no_file_comments" value="true"/>\n                 <param name="md5" value="true"/>\n             </section>\n             <expand macro="annotations_assertion" columns="22" add_metadata_columm_names=",md5" add_column_names="&#009;md5" add_column_re="\\t[\\d\\w]+"/>\n@@ -439,7 +131,6 @@\n             </section>\n             <section name="output_options">\n                 <param name="report_orthologs" value="true"/>\n-                <param name="no_file_comments" value="true"/>\n             </section>\n             <expand macro="seed_orthologs_assertion"/>\n             <expand macro="annotations_assertion"/>\n@@ -459,7 +150,6 @@\n             </conditional>\n             <section name="output_options">\n                 <param name="report_orthologs" value="true"/>\n-                <param name="no_file_comments" value="true"/>\n             </section>\n             <expand macro="seed_orthologs_assertion"/>\n             <expand macro="annotations_assertion"/>\n@@ -502,40 +192,11 @@\n Outputs\n -------\n \n-**seed_orthologs**\n-\n-each line in the file provides the best match of each query within the best Orthologous Group (OG)\n-reported in the [project].hmm_hits file, obtained running PHMMER against all sequences within the best OG.\n-The seed ortholog is used to fetch fine-grained orthology relationships from eggNOG.\n-If using the diamond search mode, seed orthologs are directly\n-obtained from the best matching sequences by running DIAMOND against the whole eggNOG protein space.\n-\n-**annotations**\n-\n-This file provides final annotations of each query. Tab-delimited columns in the file are:\n+@HELP_SEARCH_OUTPUTS@\n \n-- ``query_name``: query sequence name\n-- ``seed_eggNOG_ortholog``: best protein match in eggNOG\n-- ``seed_ortholog_evalue``: best protein match (e-value)\n-- ``seed_ortholog_score``: best protein match (bit-score)\n-- ``predicted_taxonomic_group``\n-- ``predicted_protein_name``: Predicted protein name for query sequences\n-- ``GO_terms``: Comma delimited list of predicted Gene Ontology terms\n-- ``EC_number``\n-- ``KEGG_KO``\n-- ``KEGG_Pathway``: Comma delimited list of predicted KEGG pathways\n-- ``KEGG_Module``\n-- ``KEGG_Reaction``\n-- ``KEGG_rclass``\n-- ``BRITE``\n-- ``KEGG_TC``\n-- ``CAZy``\n-- ``BiGG_Reactions``\n-- ``Annotation_tax_scope``: The taxonomic scope used to annotate this query sequence\n-- ``Matching_OGs``: Comma delimited list of matching eggNOG Orthologous Groups\n-- ``best_OG|evalue|score``: Best matching Orthologous Groups (deprecated, use smallest from eggnog OGs)\n-- ``COG_functional_categories``: COG functional category inferred from best matching OG\n-- ``eggNOG_free_text_description``\n+@HELP_ANNOTATION_OUTPUTS@\n+\n+\n \n **Recommentation for large input data**\n \n@@ -558,7 +219,6 @@\n \n Another alternative is to use cached annotations (produced in a run with --md5 enabled).\n \n-\n     ]]></help>\n     <expand macro="citations"/>\n </tool>\n'
b
diff -r 9d1fbff733cf -r 844fa988236b eggnog_mapper_annotate.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/eggnog_mapper_annotate.xml Mon Sep 04 12:47:09 2023 +0000
[
@@ -0,0 +1,141 @@
+<tool id="eggnog_mapper_annotate" name="eggNOG Mapper" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>annotation phase</description>
+    <macros>
+        <import>eggnog_macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <expand macro="version_command"/>
+    <command detect_errors="aggressive"><![CDATA[
+        @MERGE_ANNOTATIONS@
+
+        emapper.py
+        @DB_TOKEN@
+        -m no_search
+        @ORTHO_SEARCH_TOKEN@
+
+        @ANNOTATION_TOKEN@
+        $output_options.no_file_comments
+        $output_options.report_orthologs
+        $output_options.md5
+        --output='results'
+        --cpu "\${GALAXY_SLOTS:-1}"
+        --scratch_dir \${TEMP:-\$_GALAXY_JOB_TMP_DIR}
+        --temp_dir \${TEMP:-\$_GALAXY_JOB_TMP_DIR}
+    ]]></command>
+    <inputs>
+        <expand macro="db_macro"/>
+        <expand macro="ortho_annotate_macro"/>
+
+        <section name="annotation_options" title="Annotation options">
+            <expand macro="annotation_options_macro"/>
+        </section>
+        
+        <expand macro="output_options_annotate_macro"/>
+    </inputs>
+    <outputs>
+        <expand macro="annotation_output_macro"/>
+        <expand macro="annotation_orthologs_output_macro"/>
+    </outputs>
+    <tests>
+        <!-- test producing annotations form seed orthologs -->
+        <test expect_num_outputs="1">
+            <param name="eggnog_data" value="@EGGNOG_DB_VERSION@"/> <!-- not passed in test, but required for test to work -->
+            <param name="annotate_hits_table" value="DIA_nlim.emapper.seed_orthologs" ftype="tabular">
+                <!-- this has no effect at the moment since column_names can not be set in uploads <metadata name="column_names" value="@SEED_ORTHOLOG_COLUMNS@"/> -->
+            </param>
+            <conditional name="annotation_options">
+            </conditional>
+            <section name="output_options">
+                <param name="report_orthologs" value="false"/>
+                <param name="no_file_comments" value="true"/>
+            </section>
+            <expand macro="annotations_assertion"/>
+            <expand macro="stdout_assertion"/>
+        </test>
+
+        <!-- test using chached annotations from previous run -->
+        <test expect_num_outputs="2">
+            <param name="eggnog_data" value="@EGGNOG_DB_VERSION@"/> <!-- not passed in test, but required for test to work -->
+            <conditional name="ortho_method">
+                <param name="m" value="cache"/>
+                <param name="input" value="Nmar_0135.fa" ftype="fasta"/>
+                <param name="cache" value="DIA_nlim.emapper.annotations_cached" ftype="tabular"/>
+            </conditional>
+            <section name="output_options">
+                <param name="report_orthologs" value="true"/>
+                <param name="no_file_comments" value="true"/>
+                <param name="md5" value="true"/>
+            </section>
+            <expand macro="annotations_assertion" columns="22" add_metadata_columm_names=",md5" add_column_names="&#009;md5" add_column_re="\t[\d\w]+"/>
+            <output name="no_annotations" ftype="fasta">
+                <assert_contents>
+                    <has_n_lines n="0"/>
+                </assert_contents>
+            </output>
+            <expand macro="stdout_assertion"/>
+        </test>
+
+        <!-- test setting tax scope-->
+        <test expect_num_outputs="2">
+            <param name="eggnog_data" value="@EGGNOG_DB_VERSION@"/> <!-- not passed in test, but required for test to work -->
+            <param name="annotate_hits_table" value="DIA_nlim.emapper.seed_orthologs" ftype="tabular">
+                <!-- this has no effect at the moment since column_names can not be set in uploads <metadata name="column_names" value="@SEED_ORTHOLOG_COLUMNS@"/> -->
+            </param>
+            <section name="annotation_options">
+                <param name="tax_scope" value="651137" />
+            </section>
+            <section name="output_options">
+                <param name="report_orthologs" value="true"/>
+                <param name="no_file_comments" value="true"/>
+            </section>
+            <expand macro="annotations_assertion"/>
+            <expand macro="annotations_orthologs_assertion"/>
+            <expand macro="stdout_assertion">
+                <has_text text="--tax_scope=651137"/>
+            </expand>
+        </test>
+    </tests>
+    <help><![CDATA[
+
+eggnog-mapper
+=============
+Overview
+--------
+
+``eggnog-mapper`` is a tool for fast functional annotation of novel sequences (genes or proteins) using precomputed eggNOG-based orthology assignments.
+Obvious examples include the annotation of novel genomes, transcriptomes or even metagenomic gene catalogs.
+The use of orthology predictions for functional annotation is considered more precise than traditional homology searches,
+as it avoids transferring annotations from paralogs (duplicate genes with a higher chance of being involved in functional divergence).
+
+EggNOG-mapper is also available as a public online resource:  `<http://beta-eggnogdb.embl.de/#/app/emapper>`_.
+
+Outputs
+-------
+
+@HELP_ANNOTATION_OUTPUTS@
+
+**Recommentation for large input data**
+
+EggNOG-mapper consists of two phases
+
+1. finding seed orthologous sequences (compute intensive)
+2. expanding annotations (IO intensive)
+
+by default (i.e. if *Method to search seed orthologs* is not *Skip search stage...* and *Annotate seed orthologs* is *Yes*)
+both phases are executed within one tool run. 
+
+For large input FASTA datasets in can be favourable to split this in two separate
+tool runs as follows:
+
+1. Split the FASTA (e.g. 1M seqs per data set)
+2. Run the search phase only (set *Annotate seed orthologs* to *No*) on the separate FASTA files.
+3. Run the annotation phase (set *Method to search seed orthologs* to *Skip search stage...*)
+
+See [also](https://github.com/eggnogdb/eggnog-mapper/wiki/eggNOG-mapper-v2.1.5-to-v2.1.8#Setting_up_large_annotation_jobs)
+
+Another alternative is to use cached annotations (produced in a run with --md5 enabled).
+
+
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
b
diff -r 9d1fbff733cf -r 844fa988236b eggnog_mapper_search.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/eggnog_mapper_search.xml Mon Sep 04 12:47:09 2023 +0000
[
@@ -0,0 +1,101 @@
+<tool id="eggnog_mapper_search" name="eggNOG Mapper" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>search phase</description>
+    <macros>
+        <import>eggnog_macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <expand macro="version_command"/>
+    <command detect_errors="aggressive"><![CDATA[
+        emapper.py
+        @DB_TOKEN@
+        @ORTHO_SEARCH_TOKEN@
+
+        $output_options.no_file_comments
+        --output='results'
+        --cpu "\${GALAXY_SLOTS:-4}"
+        --scratch_dir \${TEMP:-\$_GALAXY_JOB_TMP_DIR}
+        --temp_dir \${TEMP:-\$_GALAXY_JOB_TMP_DIR}
+    ]]></command>
+    <inputs>
+        <expand macro="db_macro"/>
+        <expand macro="ortho_search_macro"/>
+        <expand macro="output_options_macro"/>
+    </inputs>
+    <outputs>
+        <expand macro="ortho_search_output_macro"/>
+    </outputs>
+    <tests>
+        <!-- test producing only seed orthologs-->
+        <test expect_num_outputs="1">
+            <param name="eggnog_data" value="@EGGNOG_DB_VERSION@"/>
+            <conditional name="ortho_method">
+                <param name="input" value="Nmar_0135.fa" ftype="fasta"/>
+            </conditional>
+            <section name="output_options">
+                <param name="no_file_comments" value="true"/>
+            </section>
+            <expand macro="seed_orthologs_assertion"/>
+            <expand macro="stdout_assertion"/>
+        </test>
+        
+        <!-- test setting a diamond option-->
+        <test expect_num_outputs="1">
+            <param name="eggnog_data" value="@EGGNOG_DB_VERSION@"/> <!-- not passed in test, but required for test to work -->
+            <conditional name="ortho_method">
+                <param name="m" value="diamond" />
+                <param name="input" value="Nmar_0135.fa" ftype="fasta"/>
+                <param name="sensmode" value="fast" />
+            </conditional>
+            <section name="output_options">
+                <param name="no_file_comments" value="true"/>
+            </section>
+            <expand macro="seed_orthologs_assertion"/>
+            <expand macro="stdout_assertion">
+                <has_text text="--sensmode fast"/>
+            </expand>
+        </test>
+    </tests>
+    <help><![CDATA[
+
+eggnog-mapper
+=============
+Overview
+--------
+
+``eggnog-mapper`` is a tool for fast functional annotation of novel sequences (genes or proteins) using precomputed eggNOG-based orthology assignments.
+Obvious examples include the annotation of novel genomes, transcriptomes or even metagenomic gene catalogs.
+The use of orthology predictions for functional annotation is considered more precise than traditional homology searches,
+as it avoids transferring annotations from paralogs (duplicate genes with a higher chance of being involved in functional divergence).
+
+EggNOG-mapper is also available as a public online resource:  `<http://beta-eggnogdb.embl.de/#/app/emapper>`_.
+
+Outputs
+-------
+
+@HELP_SEARCH_OUTPUTS@
+
+**Recommentation for large input data**
+
+EggNOG-mapper consists of two phases
+
+1. finding seed orthologous sequences (compute intensive)
+2. expanding annotations (IO intensive)
+
+by default (i.e. if *Method to search seed orthologs* is not *Skip search stage...* and *Annotate seed orthologs* is *Yes*)
+both phases are executed within one tool run. 
+
+For large input FASTA datasets in can be favourable to split this in two separate
+tool runs as follows:
+
+1. Split the FASTA (e.g. 1M seqs per data set)
+2. Run the search phase only (set *Annotate seed orthologs* to *No*) on the separate FASTA files.
+3. Run the annotation phase (set *Method to search seed orthologs* to *Skip search stage...*)
+
+See [also](https://github.com/eggnogdb/eggnog-mapper/wiki/eggNOG-mapper-v2.1.5-to-v2.1.8#Setting_up_large_annotation_jobs)
+
+Another alternative is to use cached annotations (produced in a run with --md5 enabled).
+
+
+    ]]></help>
+    <expand macro="citations"/>
+</tool>