Previous changeset 12:9d1fbff733cf (2022-07-19) Next changeset 14:d9c3016f7283 (2023-09-07) |
Commit message:
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/eggnog_mapper commit 468bd31b8858adbba2854f118e4cbe31f4cd68cb |
modified:
eggnog_macros.xml eggnog_mapper.xml |
added:
README eggnog_mapper_annotate.xml eggnog_mapper_search.xml |
b |
diff -r 9d1fbff733cf -r 844fa988236b README --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README Mon Sep 04 12:47:09 2023 +0000 |
b |
@@ -0,0 +1,20 @@ +This folder contains three tools: + +1. eggnogg_mapper: which runs the search and annotation phase in a single tool +2. eggnogg_mapper_search: which implements the search phase +3. eggnogg_mapper_annotate: which implements the annotation phase + +While the search phase of eggnog_mapper is very CPU intense and is efficient +also for a larger number of threads, the annotation phase is very IO intensive +and can be very inefficient (depending on the configuration, e.g. if the +reference data is located on a slow partition). + +While for most applications eggnogg_mapper will be sufficient to separate the +two phases can be more efficient: + +- sending eggnogg_mapper_search to a destination using many threads +- and eggnogg_mapper_annotate to a destination using a small number of threads + +Admins can choose to set the environment variable ``EGGNOG_DBMEM=--dbmem`` +which will copy the complete EggNOG annotation DB into memory which is usually +much faster than using multiple cores (but needs approx. 37GB of RAM). \ No newline at end of file |
b |
diff -r 9d1fbff733cf -r 844fa988236b eggnog_macros.xml --- a/eggnog_macros.xml Tue Jul 19 15:14:52 2022 +0000 +++ b/eggnog_macros.xml Mon Sep 04 12:47:09 2023 +0000 |
[ |
b'@@ -3,6 +3,7 @@\n <token name="@TOOL_VERSION@">2.1.8</token>\n <token name="@VERSION_SUFFIX@">3</token>\n <token name="@EGGNOG_DB_VERSION@">5.0.2</token>\n+ <token name="@PROFILE@">22.01</token>\n <!--\n # DB versionning was super confusing for eggnog-mapper 2.0.x:\n # eggnog-mapper 1.* needed a db v4.5 (based on eggnog v4.5)\n@@ -65,6 +66,27 @@\n </test>\n </xml>\n \n+ \n+ <xml name="stdout_assertion">\n+ <assert_stdout>\n+ <has_line line="# emapper-@TOOL_VERSION@"/>\n+ <has_line line="FINISHED"/>\n+ <yield/>\n+ </assert_stdout>\n+ </xml>\n+ <xml name="db_macro">\n+ <param name="eggnog_data" type="select" label="Version of eggNOG Database">\n+ <options from_data_table="eggnog_mapper_db_versioned">\n+ <filter type="static_value" column="3" value="@IDX_VERSION@" />\n+ </options>\n+ </param>\n+ </xml>\n+ <token name="@DB_TOKEN@"><![CDATA[\n+ --data_dir \'$eggnog_data.fields.path\'\n+ ]]></token>\n+\n+ <!-- macros and tokens for search -->\n+ \n <xml name="fasta_input">\n <param argument="-i" name="input" type="data" format="fasta" label="Fasta sequences to annotate"/>\n <conditional name="input_trans">\n@@ -105,41 +127,494 @@\n <param argument="--evalue" type="float" optional="true" min="0" label="Minimum query coverage" help="Report only alignments below or equal the e-value" />\n <param argument="--score" type="float" value="0.001" optional="true" min="0" label="Minimum query coverage" help="Report only alignments above or equal the score" />\n </xml>\n- <token name="@SEED_ORTHOLOG_COLUMNS@">query_name,seed_eggNOG_ortholog,seed_ortholog_evalue,seed_ortholog_score,query_start,query_end,seed_start,seed_end,pident,query_cov,seed_cov</token>\n+\n+ <xml name="ortho_macro">\n+ <conditional name="ortho_method">\n+ <param argument="-m" type="select" label="Basis for annotation">\n+ <yield name="search_options"/>\n+ <yield name="reuse_options"/>\n+ </param>\n+ <yield name="search_whens"/>\n+ <yield name="reuse_whens"/>\n+ </conditional>\n+ </xml>\n+\n+ <xml name="ortho_search_macro">\n+ <expand macro="ortho_macro">\n+ <token name="search_options">\n+ <expand macro="search_options_macro"/>\n+ </token>\n+ <token name="search_whens">\n+ <expand macro="search_whens_macro"/>\n+ </token>\n+ </expand>\n+ </xml>\n+\n+ <xml name="ortho_annotate_macro">\n+ <expand macro="ortho_macro">\n+ <token name="reuse_options">\n+ <expand macro="reuse_options_macro"/>\n+ </token>\n+ <token name="reuse_whens">\n+ <expand macro="reuse_whens_macro"/>\n+ </token>\n+ </expand>\n+ </xml>\n+\n+ <xml name="ortho_full_macro">\n+ <expand macro="ortho_macro">\n+ <token name="search_options">\n+ <expand macro="search_options_macro"/>\n+ </token>\n+ <token name="reuse_options">\n+ <expand macro="reuse_options_macro"/>\n+ </token>\n+ <token name="search_whens">\n+ <expand macro="search_whens_macro"/>\n+ </token>\n+ <token name="reuse_whens">\n+ <expand macro="reuse_whens_macro"/>\n+ </token>\n+ </expand>\n+ </xml>\n+\n+ <xml name="search_options_macro">\n+ <option value="diamond">Seed orthologs computed with Diamond (diamond)</option>\n+ <option value="mmseqs">Seed orthologs computed with MMseqs2 (mmseqs)</option>\n+ </xml>\n+\n+ <xml name="reuse_options_macro">\n+ <option value="no_search">Use existing seed orthologs (no_search)</option>\n+ <option value="cache">Use cached annotations (cache). See also --md5</option>\n+ </xml>\n+\n+ <xml name="search_whens_macro">\n+ <when v'..b' <metadata name="columns" value="@COLUMNS@" />\n <!-- <metadata name="column_names" value="query,seed_ortholog,evalue,score,max_annot_lvl,COG_category,Description,Preferred_name,GOs,EC,KEGG_ko,KEGG_Pathway,KEGG_Module,KEGG_Reaction,KEGG_rclass,BRITE,KEGG_TC,CAZy,BiGG_Reaction,PFAMseggNOG_OGs@ADD_METADATA_COLUMN_NAMES@" /> -->\n <assert_contents>\n <has_line line="#query	seed_ortholog	evalue	score	eggNOG_OGs	max_annot_lvl	COG_category	Description	Preferred_name	GOs	EC	KEGG_ko	KEGG_Pathway	KEGG_Module	KEGG_Reaction	KEGG_rclass	BRITE	KEGG_TC	CAZy	BiGG_Reaction	PFAMs@ADD_COLUMN_NAMES@"/>\n <has_line_matching expression="(\\S+\\t){2}[-+.e\\d]+\\t[.\\d]+(\\t\\S+){7}\\tko:\\S+(\\t\\S+){9}@ADD_COLUMN_RE@" n="1"/>\n+ <has_line_matching expression="##.*" negate="@NOCOMMENTS@"/>\n </assert_contents>\n </output>\n </xml>\n- <xml name="annotations_orthologs_assertion">\n+ <xml name="annotations_orthologs_assertion" token_nocomments="true">\n <output name="annotations_orthologs" ftype="tabular">\n <metadata name="columns" value="4" />\n <metadata name="column_names" value="query,orth_type,species,orthologs" />\n <assert_contents>\n <has_line line="#query	orth_type	species	orthologs"/>\n <has_line_matching expression="\\S+\\t(one2one|many2one|one2many|many2many|seed)(\\t[^\\t]+){2}" n="2"/>\n+ <has_line_matching expression="##.*" negate="@NOCOMMENTS@"/>\n </assert_contents>\n </output>\n </xml>\n+ <token name="@HELP_ANNOTATION_OUTPUTS@"><![CDATA[\n+ **annotations**\n+\n+ This file provides final annotations of each query. Tab-delimited columns in the file are:\n+ \n+ - ``query_name``: query sequence name\n+ - ``seed_eggNOG_ortholog``: best protein match in eggNOG\n+ - ``seed_ortholog_evalue``: best protein match (e-value)\n+ - ``seed_ortholog_score``: best protein match (bit-score)\n+ - ``predicted_taxonomic_group``\n+ - ``predicted_protein_name``: Predicted protein name for query sequences\n+ - ``GO_terms``: Comma delimited list of predicted Gene Ontology terms\n+ - ``EC_number``\n+ - ``KEGG_KO``\n+ - ``KEGG_Pathway``: Comma delimited list of predicted KEGG pathways\n+ - ``KEGG_Module``\n+ - ``KEGG_Reaction``\n+ - ``KEGG_rclass``\n+ - ``BRITE``\n+ - ``KEGG_TC``\n+ - ``CAZy``\n+ - ``BiGG_Reactions``\n+ - ``Annotation_tax_scope``: The taxonomic scope used to annotate this query sequence\n+ - ``Matching_OGs``: Comma delimited list of matching eggNOG Orthologous Groups\n+ - ``best_OG|evalue|score``: Best matching Orthologous Groups (deprecated, use smallest from eggnog OGs)\n+ - ``COG_functional_categories``: COG functional category inferred from best matching OG\n+ - ``eggNOG_free_text_description``\n+ \n+ **orthologs**\n+\n+ This output is only created if the option ``--report_orthologs`` is checked.\n+ It provides the orthologs used for the annotation. It\'s a tab delimited file with the following columns:\n+ \n+ - ``query``\n+ - ``orth_type`` Type of orthologs in this row. See --target_orthologs.\n+ - ``species``\n+ - ``orthologs`` comma-separated list of orthologs (If an ortholog shows a "*", such ortholog was used to transfer its annotations to the query.)\n+ \n+ **sequences without annotation **\n+\n+ This output is created if cached annotations are used as input. \n+ It is a FASTA file containing all sequences that are not found in the cached annotations.\n+ These sequences can then be used as input for another run of the EggNOG mapper\n+ computing seed orthologs with diamond, etc.\n+ ]]></token>\n </macros>\n' |
b |
diff -r 9d1fbff733cf -r 844fa988236b eggnog_mapper.xml --- a/eggnog_mapper.xml Tue Jul 19 15:14:52 2022 +0000 +++ b/eggnog_mapper.xml Mon Sep 04 12:47:09 2023 +0000 |
[ |
b'@@ -1,4 +1,4 @@\n-<tool id="eggnog_mapper" name="eggNOG Mapper" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">\n+<tool id="eggnog_mapper" name="eggNOG Mapper" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">\n <description>functional sequence annotation by orthology</description>\n <macros>\n <import>eggnog_macros.xml</import>\n@@ -6,86 +6,15 @@\n <expand macro="requirements"/>\n <expand macro="version_command"/>\n <command detect_errors="aggressive"><![CDATA[\n- #if $ortho_method.m == "no_search"\n- cat \n- #for aht in $ortho_method.annotate_hits_table\n- $aht\n- #end for\n- > annotate_hits_table.tsv\n- &&\n- #end if\n+ @MERGE_ANNOTATIONS@\n \n emapper.py\n- --data_dir \'$eggnog_data.fields.path\'\n- -m \'$ortho_method.m\'\n-\n- #if $ortho_method.m in [\'diamond\', \'mmseqs\', \'cache\']:\n- -i \'$ortho_method.input\'\n- --itype \'$ortho_method.input_trans.itype\'\n- #if $ortho_method.input_trans.itype in [\'CDS\', \'genome\', \'metagenome\']:\n- $ortho_method.input_trans.translate\n- #end if\n- #if $ortho_method.input_trans.itype in [\'genome\', \'metagenome\']:\n- --genepred $ortho_method.input_trans.genepred\n- #end if\n- #elif $ortho_method.m == "no_search"\n- --annotate_hits_table annotate_hits_table.tsv\n- #end if\n- \n- #if $ortho_method.m == \'cache\'\n- --cache \'$ortho_method.cache\'\n- #end if\n-\n- #if $ortho_method.m in [\'diamond\', \'mmseqs\']:\n- ## Diamond option\n- #if $ortho_method.m == "diamond":\n- --matrix \'$ortho_method.matrix_gapcosts.matrix\'\n- $ortho_method.matrix_gapcosts.gap_costs\n- --sensmode $ortho_method.sensmode\n- $ortho_method.dmnd_iterate\n- $ortho_method.dmnd_ignore_warnings\n- #elif $ortho_method.m == "mmseqs":\n- --start_sens $ortho_method.start_sens\n- --sens_steps $ortho_method.sens_steps\n- --final_sens $ortho_method.final_sens\n- #end if\n-\n- ## Common options for search filtering (applies to diamond and mmseqs only)\n- #if str($ortho_method.query_cover):\n- --query_cover $ortho_method.query_cover\n- #end if\n- #if str($ortho_method.subject_cover):\n- --subject_cover $ortho_method.subject_cover\n- #end if\n- #if str($ortho_method.pident):\n- --pident $ortho_method.pident\n- #end if\n- #if str($ortho_method.evalue):\n- --evalue $ortho_method.evalue\n- #end if\n- #if str($ortho_method.score):\n- --score $ortho_method.score\n- #end if\n- #end if\n-\n+ @DB_TOKEN@\n+ @ORTHO_SEARCH_TOKEN@\n #if $annotation_options.no_annot == "--no_annot"\n --no_annot\n #else\n- #if str($annotation_options.seed_ortholog_evalue):\n- --seed_ortholog_evalue $annotation_options.seed_ortholog_evalue\n- #end if\n- #if str($annotation_options.seed_ortholog_score):\n- --seed_ortholog_score $annotation_options.seed_ortholog_score\n- #end if\n- #if $annotation_options.tax_scope:\n- --tax_scope=$annotation_options.tax_scope\n- #end if\n- #if $annotation_options.target_orthologs:\n- --target_orthologs=$annotation_options.target_orthologs\n- #end if\n- #if $annotation_options.go_evidence:\n- --go_evidence=$annotation_options.go_evidence\n- #end if\n+ @ANNOTATION_TOKEN@\n #end if\n $output_options.no_file_comments\n $output_options.report_orthologs\n@@ -96,261 +25,27 @@\n --temp_dir \\${TEMP:-\\$_GA'..b' <param name="report_orthologs" value="true"/>\n- <param name="no_file_comments" value="true"/>\n+ <param name="no_file_comments" value="false"/>\n <param name="md5" value="true"/>\n </section>\n- <expand macro="seed_orthologs_assertion"/>\n- <expand macro="annotations_assertion" columns="22" add_metadata_columm_names=",md5" add_column_names="	md5" add_column_re="\\t[\\d\\w]+"/>\n- <expand macro="annotations_orthologs_assertion"/>\n+ <expand macro="seed_orthologs_assertion" nocomments="false"/>\n+ <expand macro="annotations_assertion" columns="22" add_metadata_columm_names=",md5" add_column_names="	md5" add_column_re="\\t[\\d\\w]+" nocomments="false"/>\n+ <expand macro="annotations_orthologs_assertion" nocomments="false"/>\n <expand macro="stdout_assertion"/>\n </test>\n \n@@ -416,7 +109,6 @@\n </conditional>\n <section name="output_options">\n <param name="report_orthologs" value="true"/>\n- <param name="no_file_comments" value="true"/>\n <param name="md5" value="true"/>\n </section>\n <expand macro="annotations_assertion" columns="22" add_metadata_columm_names=",md5" add_column_names="	md5" add_column_re="\\t[\\d\\w]+"/>\n@@ -439,7 +131,6 @@\n </section>\n <section name="output_options">\n <param name="report_orthologs" value="true"/>\n- <param name="no_file_comments" value="true"/>\n </section>\n <expand macro="seed_orthologs_assertion"/>\n <expand macro="annotations_assertion"/>\n@@ -459,7 +150,6 @@\n </conditional>\n <section name="output_options">\n <param name="report_orthologs" value="true"/>\n- <param name="no_file_comments" value="true"/>\n </section>\n <expand macro="seed_orthologs_assertion"/>\n <expand macro="annotations_assertion"/>\n@@ -502,40 +192,11 @@\n Outputs\n -------\n \n-**seed_orthologs**\n-\n-each line in the file provides the best match of each query within the best Orthologous Group (OG)\n-reported in the [project].hmm_hits file, obtained running PHMMER against all sequences within the best OG.\n-The seed ortholog is used to fetch fine-grained orthology relationships from eggNOG.\n-If using the diamond search mode, seed orthologs are directly\n-obtained from the best matching sequences by running DIAMOND against the whole eggNOG protein space.\n-\n-**annotations**\n-\n-This file provides final annotations of each query. Tab-delimited columns in the file are:\n+@HELP_SEARCH_OUTPUTS@\n \n-- ``query_name``: query sequence name\n-- ``seed_eggNOG_ortholog``: best protein match in eggNOG\n-- ``seed_ortholog_evalue``: best protein match (e-value)\n-- ``seed_ortholog_score``: best protein match (bit-score)\n-- ``predicted_taxonomic_group``\n-- ``predicted_protein_name``: Predicted protein name for query sequences\n-- ``GO_terms``: Comma delimited list of predicted Gene Ontology terms\n-- ``EC_number``\n-- ``KEGG_KO``\n-- ``KEGG_Pathway``: Comma delimited list of predicted KEGG pathways\n-- ``KEGG_Module``\n-- ``KEGG_Reaction``\n-- ``KEGG_rclass``\n-- ``BRITE``\n-- ``KEGG_TC``\n-- ``CAZy``\n-- ``BiGG_Reactions``\n-- ``Annotation_tax_scope``: The taxonomic scope used to annotate this query sequence\n-- ``Matching_OGs``: Comma delimited list of matching eggNOG Orthologous Groups\n-- ``best_OG|evalue|score``: Best matching Orthologous Groups (deprecated, use smallest from eggnog OGs)\n-- ``COG_functional_categories``: COG functional category inferred from best matching OG\n-- ``eggNOG_free_text_description``\n+@HELP_ANNOTATION_OUTPUTS@\n+\n+\n \n **Recommentation for large input data**\n \n@@ -558,7 +219,6 @@\n \n Another alternative is to use cached annotations (produced in a run with --md5 enabled).\n \n-\n ]]></help>\n <expand macro="citations"/>\n </tool>\n' |
b |
diff -r 9d1fbff733cf -r 844fa988236b eggnog_mapper_annotate.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/eggnog_mapper_annotate.xml Mon Sep 04 12:47:09 2023 +0000 |
[ |
@@ -0,0 +1,141 @@ +<tool id="eggnog_mapper_annotate" name="eggNOG Mapper" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>annotation phase</description> + <macros> + <import>eggnog_macros.xml</import> + </macros> + <expand macro="requirements"/> + <expand macro="version_command"/> + <command detect_errors="aggressive"><![CDATA[ + @MERGE_ANNOTATIONS@ + + emapper.py + @DB_TOKEN@ + -m no_search + @ORTHO_SEARCH_TOKEN@ + + @ANNOTATION_TOKEN@ + $output_options.no_file_comments + $output_options.report_orthologs + $output_options.md5 + --output='results' + --cpu "\${GALAXY_SLOTS:-1}" + --scratch_dir \${TEMP:-\$_GALAXY_JOB_TMP_DIR} + --temp_dir \${TEMP:-\$_GALAXY_JOB_TMP_DIR} + ]]></command> + <inputs> + <expand macro="db_macro"/> + <expand macro="ortho_annotate_macro"/> + + <section name="annotation_options" title="Annotation options"> + <expand macro="annotation_options_macro"/> + </section> + + <expand macro="output_options_annotate_macro"/> + </inputs> + <outputs> + <expand macro="annotation_output_macro"/> + <expand macro="annotation_orthologs_output_macro"/> + </outputs> + <tests> + <!-- test producing annotations form seed orthologs --> + <test expect_num_outputs="1"> + <param name="eggnog_data" value="@EGGNOG_DB_VERSION@"/> <!-- not passed in test, but required for test to work --> + <param name="annotate_hits_table" value="DIA_nlim.emapper.seed_orthologs" ftype="tabular"> + <!-- this has no effect at the moment since column_names can not be set in uploads <metadata name="column_names" value="@SEED_ORTHOLOG_COLUMNS@"/> --> + </param> + <conditional name="annotation_options"> + </conditional> + <section name="output_options"> + <param name="report_orthologs" value="false"/> + <param name="no_file_comments" value="true"/> + </section> + <expand macro="annotations_assertion"/> + <expand macro="stdout_assertion"/> + </test> + + <!-- test using chached annotations from previous run --> + <test expect_num_outputs="2"> + <param name="eggnog_data" value="@EGGNOG_DB_VERSION@"/> <!-- not passed in test, but required for test to work --> + <conditional name="ortho_method"> + <param name="m" value="cache"/> + <param name="input" value="Nmar_0135.fa" ftype="fasta"/> + <param name="cache" value="DIA_nlim.emapper.annotations_cached" ftype="tabular"/> + </conditional> + <section name="output_options"> + <param name="report_orthologs" value="true"/> + <param name="no_file_comments" value="true"/> + <param name="md5" value="true"/> + </section> + <expand macro="annotations_assertion" columns="22" add_metadata_columm_names=",md5" add_column_names="	md5" add_column_re="\t[\d\w]+"/> + <output name="no_annotations" ftype="fasta"> + <assert_contents> + <has_n_lines n="0"/> + </assert_contents> + </output> + <expand macro="stdout_assertion"/> + </test> + + <!-- test setting tax scope--> + <test expect_num_outputs="2"> + <param name="eggnog_data" value="@EGGNOG_DB_VERSION@"/> <!-- not passed in test, but required for test to work --> + <param name="annotate_hits_table" value="DIA_nlim.emapper.seed_orthologs" ftype="tabular"> + <!-- this has no effect at the moment since column_names can not be set in uploads <metadata name="column_names" value="@SEED_ORTHOLOG_COLUMNS@"/> --> + </param> + <section name="annotation_options"> + <param name="tax_scope" value="651137" /> + </section> + <section name="output_options"> + <param name="report_orthologs" value="true"/> + <param name="no_file_comments" value="true"/> + </section> + <expand macro="annotations_assertion"/> + <expand macro="annotations_orthologs_assertion"/> + <expand macro="stdout_assertion"> + <has_text text="--tax_scope=651137"/> + </expand> + </test> + </tests> + <help><![CDATA[ + +eggnog-mapper +============= +Overview +-------- + +``eggnog-mapper`` is a tool for fast functional annotation of novel sequences (genes or proteins) using precomputed eggNOG-based orthology assignments. +Obvious examples include the annotation of novel genomes, transcriptomes or even metagenomic gene catalogs. +The use of orthology predictions for functional annotation is considered more precise than traditional homology searches, +as it avoids transferring annotations from paralogs (duplicate genes with a higher chance of being involved in functional divergence). + +EggNOG-mapper is also available as a public online resource: `<http://beta-eggnogdb.embl.de/#/app/emapper>`_. + +Outputs +------- + +@HELP_ANNOTATION_OUTPUTS@ + +**Recommentation for large input data** + +EggNOG-mapper consists of two phases + +1. finding seed orthologous sequences (compute intensive) +2. expanding annotations (IO intensive) + +by default (i.e. if *Method to search seed orthologs* is not *Skip search stage...* and *Annotate seed orthologs* is *Yes*) +both phases are executed within one tool run. + +For large input FASTA datasets in can be favourable to split this in two separate +tool runs as follows: + +1. Split the FASTA (e.g. 1M seqs per data set) +2. Run the search phase only (set *Annotate seed orthologs* to *No*) on the separate FASTA files. +3. Run the annotation phase (set *Method to search seed orthologs* to *Skip search stage...*) + +See [also](https://github.com/eggnogdb/eggnog-mapper/wiki/eggNOG-mapper-v2.1.5-to-v2.1.8#Setting_up_large_annotation_jobs) + +Another alternative is to use cached annotations (produced in a run with --md5 enabled). + + + ]]></help> + <expand macro="citations"/> +</tool> |
b |
diff -r 9d1fbff733cf -r 844fa988236b eggnog_mapper_search.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/eggnog_mapper_search.xml Mon Sep 04 12:47:09 2023 +0000 |
[ |
@@ -0,0 +1,101 @@ +<tool id="eggnog_mapper_search" name="eggNOG Mapper" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>search phase</description> + <macros> + <import>eggnog_macros.xml</import> + </macros> + <expand macro="requirements"/> + <expand macro="version_command"/> + <command detect_errors="aggressive"><![CDATA[ + emapper.py + @DB_TOKEN@ + @ORTHO_SEARCH_TOKEN@ + + $output_options.no_file_comments + --output='results' + --cpu "\${GALAXY_SLOTS:-4}" + --scratch_dir \${TEMP:-\$_GALAXY_JOB_TMP_DIR} + --temp_dir \${TEMP:-\$_GALAXY_JOB_TMP_DIR} + ]]></command> + <inputs> + <expand macro="db_macro"/> + <expand macro="ortho_search_macro"/> + <expand macro="output_options_macro"/> + </inputs> + <outputs> + <expand macro="ortho_search_output_macro"/> + </outputs> + <tests> + <!-- test producing only seed orthologs--> + <test expect_num_outputs="1"> + <param name="eggnog_data" value="@EGGNOG_DB_VERSION@"/> + <conditional name="ortho_method"> + <param name="input" value="Nmar_0135.fa" ftype="fasta"/> + </conditional> + <section name="output_options"> + <param name="no_file_comments" value="true"/> + </section> + <expand macro="seed_orthologs_assertion"/> + <expand macro="stdout_assertion"/> + </test> + + <!-- test setting a diamond option--> + <test expect_num_outputs="1"> + <param name="eggnog_data" value="@EGGNOG_DB_VERSION@"/> <!-- not passed in test, but required for test to work --> + <conditional name="ortho_method"> + <param name="m" value="diamond" /> + <param name="input" value="Nmar_0135.fa" ftype="fasta"/> + <param name="sensmode" value="fast" /> + </conditional> + <section name="output_options"> + <param name="no_file_comments" value="true"/> + </section> + <expand macro="seed_orthologs_assertion"/> + <expand macro="stdout_assertion"> + <has_text text="--sensmode fast"/> + </expand> + </test> + </tests> + <help><![CDATA[ + +eggnog-mapper +============= +Overview +-------- + +``eggnog-mapper`` is a tool for fast functional annotation of novel sequences (genes or proteins) using precomputed eggNOG-based orthology assignments. +Obvious examples include the annotation of novel genomes, transcriptomes or even metagenomic gene catalogs. +The use of orthology predictions for functional annotation is considered more precise than traditional homology searches, +as it avoids transferring annotations from paralogs (duplicate genes with a higher chance of being involved in functional divergence). + +EggNOG-mapper is also available as a public online resource: `<http://beta-eggnogdb.embl.de/#/app/emapper>`_. + +Outputs +------- + +@HELP_SEARCH_OUTPUTS@ + +**Recommentation for large input data** + +EggNOG-mapper consists of two phases + +1. finding seed orthologous sequences (compute intensive) +2. expanding annotations (IO intensive) + +by default (i.e. if *Method to search seed orthologs* is not *Skip search stage...* and *Annotate seed orthologs* is *Yes*) +both phases are executed within one tool run. + +For large input FASTA datasets in can be favourable to split this in two separate +tool runs as follows: + +1. Split the FASTA (e.g. 1M seqs per data set) +2. Run the search phase only (set *Annotate seed orthologs* to *No*) on the separate FASTA files. +3. Run the annotation phase (set *Method to search seed orthologs* to *Skip search stage...*) + +See [also](https://github.com/eggnogdb/eggnog-mapper/wiki/eggNOG-mapper-v2.1.5-to-v2.1.8#Setting_up_large_annotation_jobs) + +Another alternative is to use cached annotations (produced in a run with --md5 enabled). + + + ]]></help> + <expand macro="citations"/> +</tool> |