# HG changeset patch
# User galaxyp
# Date 1693831629 0
# Node ID 844fa988236b9a07be8f1de9931cf7cb1fc806c9
# Parent 9d1fbff733cfbc64fb3538e8c604517d9f43cedb
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/eggnog_mapper commit 468bd31b8858adbba2854f118e4cbe31f4cd68cb
diff -r 9d1fbff733cf -r 844fa988236b README
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README Mon Sep 04 12:47:09 2023 +0000
@@ -0,0 +1,20 @@
+This folder contains three tools:
+
+1. eggnogg_mapper: which runs the search and annotation phase in a single tool
+2. eggnogg_mapper_search: which implements the search phase
+3. eggnogg_mapper_annotate: which implements the annotation phase
+
+While the search phase of eggnog_mapper is very CPU intense and is efficient
+also for a larger number of threads, the annotation phase is very IO intensive
+and can be very inefficient (depending on the configuration, e.g. if the
+reference data is located on a slow partition).
+
+While for most applications eggnogg_mapper will be sufficient to separate the
+two phases can be more efficient:
+
+- sending eggnogg_mapper_search to a destination using many threads
+- and eggnogg_mapper_annotate to a destination using a small number of threads
+
+Admins can choose to set the environment variable ``EGGNOG_DBMEM=--dbmem``
+which will copy the complete EggNOG annotation DB into memory which is usually
+much faster than using multiple cores (but needs approx. 37GB of RAM).
\ No newline at end of file
diff -r 9d1fbff733cf -r 844fa988236b eggnog_macros.xml
--- a/eggnog_macros.xml Tue Jul 19 15:14:52 2022 +0000
+++ b/eggnog_macros.xml Mon Sep 04 12:47:09 2023 +0000
@@ -3,6 +3,7 @@
2.1.8
3
5.0.2
+ 22.01
+
@@ -105,41 +127,494 @@
- query_name,seed_eggNOG_ortholog,seed_ortholog_evalue,seed_ortholog_score,query_start,query_end,seed_start,seed_end,pident,query_cov,seed_cov
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ value.metadata.columns == 11
+
+
+
+
+
+
+ value.metadata.columns == 22
+
+
+
+
+
+ annotate_hits_table.tsv
+ &&
+ #end if
+ ]]>
+
+
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
-
+
+ query_name,seed_eggNOG_ortholog,seed_ortholog_evalue,seed_ortholog_score,query_start,query_end,seed_start,seed_end,pident,query_cov,seed_cov
+
+
+
+ ortho_method['m'] not in ['no_search', 'cache']
+
+
+
+
+
+
+
-
+
+
+
+
+
+
+
+
+ Min E-value expected when searching for seed eggNOG ortholog. Applies to phmmer/diamond searches.
+ Queries not having a significant seed orthologs (E-value less than threshold) will not be annotated.
+
+
+
+
+ Min bit score expected when searching for seed eggNOG ortholog.
+ Queries not having a significant seed orthologs will not be annotated.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ortho_method['m'] == 'cache' and ortho_method['output_no_annotations']
+
+
+
+
+
+ ortho_method['m'] != 'cache'
+ output_options['report_orthologs']
+
+
+
+
+
+
+
-
+
+
diff -r 9d1fbff733cf -r 844fa988236b eggnog_mapper.xml
--- a/eggnog_mapper.xml Tue Jul 19 15:14:52 2022 +0000
+++ b/eggnog_mapper.xml Mon Sep 04 12:47:09 2023 +0000
@@ -1,4 +1,4 @@
-
+
functional sequence annotation by orthology
eggnog_macros.xml
@@ -6,86 +6,15 @@
annotate_hits_table.tsv
- &&
- #end if
+ @MERGE_ANNOTATIONS@
emapper.py
- --data_dir '$eggnog_data.fields.path'
- -m '$ortho_method.m'
-
- #if $ortho_method.m in ['diamond', 'mmseqs', 'cache']:
- -i '$ortho_method.input'
- --itype '$ortho_method.input_trans.itype'
- #if $ortho_method.input_trans.itype in ['CDS', 'genome', 'metagenome']:
- $ortho_method.input_trans.translate
- #end if
- #if $ortho_method.input_trans.itype in ['genome', 'metagenome']:
- --genepred $ortho_method.input_trans.genepred
- #end if
- #elif $ortho_method.m == "no_search"
- --annotate_hits_table annotate_hits_table.tsv
- #end if
-
- #if $ortho_method.m == 'cache'
- --cache '$ortho_method.cache'
- #end if
-
- #if $ortho_method.m in ['diamond', 'mmseqs']:
- ## Diamond option
- #if $ortho_method.m == "diamond":
- --matrix '$ortho_method.matrix_gapcosts.matrix'
- $ortho_method.matrix_gapcosts.gap_costs
- --sensmode $ortho_method.sensmode
- $ortho_method.dmnd_iterate
- $ortho_method.dmnd_ignore_warnings
- #elif $ortho_method.m == "mmseqs":
- --start_sens $ortho_method.start_sens
- --sens_steps $ortho_method.sens_steps
- --final_sens $ortho_method.final_sens
- #end if
-
- ## Common options for search filtering (applies to diamond and mmseqs only)
- #if str($ortho_method.query_cover):
- --query_cover $ortho_method.query_cover
- #end if
- #if str($ortho_method.subject_cover):
- --subject_cover $ortho_method.subject_cover
- #end if
- #if str($ortho_method.pident):
- --pident $ortho_method.pident
- #end if
- #if str($ortho_method.evalue):
- --evalue $ortho_method.evalue
- #end if
- #if str($ortho_method.score):
- --score $ortho_method.score
- #end if
- #end if
-
+ @DB_TOKEN@
+ @ORTHO_SEARCH_TOKEN@
#if $annotation_options.no_annot == "--no_annot"
--no_annot
#else
- #if str($annotation_options.seed_ortholog_evalue):
- --seed_ortholog_evalue $annotation_options.seed_ortholog_evalue
- #end if
- #if str($annotation_options.seed_ortholog_score):
- --seed_ortholog_score $annotation_options.seed_ortholog_score
- #end if
- #if $annotation_options.tax_scope:
- --tax_scope=$annotation_options.tax_scope
- #end if
- #if $annotation_options.target_orthologs:
- --target_orthologs=$annotation_options.target_orthologs
- #end if
- #if $annotation_options.go_evidence:
- --go_evidence=$annotation_options.go_evidence
- #end if
+ @ANNOTATION_TOKEN@
#end if
$output_options.no_file_comments
$output_options.report_orthologs
@@ -96,261 +25,27 @@
--temp_dir \${TEMP:-\$_GALAXY_JOB_TMP_DIR}
]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- value.metadata.columns == 11
-
-
-
-
-
-
- value.metadata.columns == 22
-
-
-
-
-
+
+
-
-
- Min E-value expected when searching for seed eggNOG ortholog. Applies to phmmer/diamond searches.
- Queries not having a significant seed orthologs (E-value less than threshold) will not be annotated.
-
-
-
-
- Min bit score expected when searching for seed eggNOG ortholog.
- Queries not having a significant seed orthologs will not be annotated.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
-
+
-
- ortho_method['m'] not in ['no_search', 'cache']
-
-
-
-
-
+
+
annotation_options['no_annot'] == ''
-
-
-
-
-
-
-
-
-
-
-
-
- ortho_method['m'] != 'cache' and output_options['report_orthologs']
-
-
-
-
-
- ortho_method['m'] == 'cache' and output_options['output_no_annotations']
-
+
+
@@ -363,7 +58,6 @@
@@ -382,7 +76,6 @@
@@ -397,12 +90,12 @@
-
-
-
+
+
+
@@ -416,7 +109,6 @@
@@ -439,7 +131,6 @@
@@ -459,7 +150,6 @@
@@ -502,40 +192,11 @@
Outputs
-------
-**seed_orthologs**
-
-each line in the file provides the best match of each query within the best Orthologous Group (OG)
-reported in the [project].hmm_hits file, obtained running PHMMER against all sequences within the best OG.
-The seed ortholog is used to fetch fine-grained orthology relationships from eggNOG.
-If using the diamond search mode, seed orthologs are directly
-obtained from the best matching sequences by running DIAMOND against the whole eggNOG protein space.
-
-**annotations**
-
-This file provides final annotations of each query. Tab-delimited columns in the file are:
+@HELP_SEARCH_OUTPUTS@
-- ``query_name``: query sequence name
-- ``seed_eggNOG_ortholog``: best protein match in eggNOG
-- ``seed_ortholog_evalue``: best protein match (e-value)
-- ``seed_ortholog_score``: best protein match (bit-score)
-- ``predicted_taxonomic_group``
-- ``predicted_protein_name``: Predicted protein name for query sequences
-- ``GO_terms``: Comma delimited list of predicted Gene Ontology terms
-- ``EC_number``
-- ``KEGG_KO``
-- ``KEGG_Pathway``: Comma delimited list of predicted KEGG pathways
-- ``KEGG_Module``
-- ``KEGG_Reaction``
-- ``KEGG_rclass``
-- ``BRITE``
-- ``KEGG_TC``
-- ``CAZy``
-- ``BiGG_Reactions``
-- ``Annotation_tax_scope``: The taxonomic scope used to annotate this query sequence
-- ``Matching_OGs``: Comma delimited list of matching eggNOG Orthologous Groups
-- ``best_OG|evalue|score``: Best matching Orthologous Groups (deprecated, use smallest from eggnog OGs)
-- ``COG_functional_categories``: COG functional category inferred from best matching OG
-- ``eggNOG_free_text_description``
+@HELP_ANNOTATION_OUTPUTS@
+
+
**Recommentation for large input data**
@@ -558,7 +219,6 @@
Another alternative is to use cached annotations (produced in a run with --md5 enabled).
-
]]>
diff -r 9d1fbff733cf -r 844fa988236b eggnog_mapper_annotate.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/eggnog_mapper_annotate.xml Mon Sep 04 12:47:09 2023 +0000
@@ -0,0 +1,141 @@
+
+ annotation phase
+
+ eggnog_macros.xml
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ `_.
+
+Outputs
+-------
+
+@HELP_ANNOTATION_OUTPUTS@
+
+**Recommentation for large input data**
+
+EggNOG-mapper consists of two phases
+
+1. finding seed orthologous sequences (compute intensive)
+2. expanding annotations (IO intensive)
+
+by default (i.e. if *Method to search seed orthologs* is not *Skip search stage...* and *Annotate seed orthologs* is *Yes*)
+both phases are executed within one tool run.
+
+For large input FASTA datasets in can be favourable to split this in two separate
+tool runs as follows:
+
+1. Split the FASTA (e.g. 1M seqs per data set)
+2. Run the search phase only (set *Annotate seed orthologs* to *No*) on the separate FASTA files.
+3. Run the annotation phase (set *Method to search seed orthologs* to *Skip search stage...*)
+
+See [also](https://github.com/eggnogdb/eggnog-mapper/wiki/eggNOG-mapper-v2.1.5-to-v2.1.8#Setting_up_large_annotation_jobs)
+
+Another alternative is to use cached annotations (produced in a run with --md5 enabled).
+
+
+ ]]>
+
+
diff -r 9d1fbff733cf -r 844fa988236b eggnog_mapper_search.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/eggnog_mapper_search.xml Mon Sep 04 12:47:09 2023 +0000
@@ -0,0 +1,101 @@
+
+ search phase
+
+ eggnog_macros.xml
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ `_.
+
+Outputs
+-------
+
+@HELP_SEARCH_OUTPUTS@
+
+**Recommentation for large input data**
+
+EggNOG-mapper consists of two phases
+
+1. finding seed orthologous sequences (compute intensive)
+2. expanding annotations (IO intensive)
+
+by default (i.e. if *Method to search seed orthologs* is not *Skip search stage...* and *Annotate seed orthologs* is *Yes*)
+both phases are executed within one tool run.
+
+For large input FASTA datasets in can be favourable to split this in two separate
+tool runs as follows:
+
+1. Split the FASTA (e.g. 1M seqs per data set)
+2. Run the search phase only (set *Annotate seed orthologs* to *No*) on the separate FASTA files.
+3. Run the annotation phase (set *Method to search seed orthologs* to *Skip search stage...*)
+
+See [also](https://github.com/eggnogdb/eggnog-mapper/wiki/eggNOG-mapper-v2.1.5-to-v2.1.8#Setting_up_large_annotation_jobs)
+
+Another alternative is to use cached annotations (produced in a run with --md5 enabled).
+
+
+ ]]>
+
+