Previous changeset 11:5a30ae278db0 (2022-06-20) Next changeset 13:844fa988236b (2023-09-04) |
Commit message:
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/eggnog_mapper commit 07877ba6d3fb6e28e94500f2392db6393cf325fd |
modified:
eggnog_macros.xml eggnog_mapper.xml |
added:
test-data/DIA_nlim.emapper.annotations_cached |
removed:
test-data/DIA_nlim.emapper.annotations test-data/DIA_nlim.emapper.annotations_orthologs test-data/nlim_1033.fasta test-data/scoped.emapper.annotations test-data/scoped.emapper.annotations_orthologs test-data/scoped.emapper.seed_orthologs |
b |
diff -r 5a30ae278db0 -r 9d1fbff733cf eggnog_macros.xml --- a/eggnog_macros.xml Mon Jun 20 12:49:52 2022 +0000 +++ b/eggnog_macros.xml Tue Jul 19 15:14:52 2022 +0000 |
[ |
@@ -1,7 +1,7 @@ <?xml version="1.0"?> <macros> <token name="@TOOL_VERSION@">2.1.8</token> - <token name="@VERSION_SUFFIX@">2.1.8</token> + <token name="@VERSION_SUFFIX@">3</token> <token name="@EGGNOG_DB_VERSION@">5.0.2</token> <!-- # DB versionning was super confusing for eggnog-mapper 2.0.x: @@ -64,10 +64,82 @@ </output> </test> </xml> + + <xml name="fasta_input"> + <param argument="-i" name="input" type="data" format="fasta" label="Fasta sequences to annotate"/> + <conditional name="input_trans"> + <param argument="--itype" type="select" label="Type of sequences"> + <option value="proteins" selected="true">proteins</option> + <option value="CDS">CDS</option> + <option value="genome">genome</option> + <option value="metagenome">metagenome</option> + </param> + <when value="proteins"/> + <when value="CDS"> + <param argument="--translate" type="boolean" truevalue="--translate" falsevalue="" checked="false" + label="Translate CDS to proteins before search"/> + </when> + <when value="genome"> + <param argument="--translate" type="boolean" truevalue="--translate" falsevalue="" checked="false" + label="Translate predicted CDS from blastx hits to proteins"/> + <param argument="--genepred" type="select" label="Type of sequences"> + <option value="search">Inferred from Diamond/MMseqs2 blastx hits</option> + <option value="prodigal" selected="true">Performed using Prodigal</option> + </param> + </when> + <when value="metagenome"> + <param argument="--translate" type="boolean" truevalue="--translate" falsevalue="" checked="false" + label="Translate predicted CDS from blastx hits to proteins"/> + <param argument="--genepred" type="select" label="Type of sequences"> + <option value="search">Inferred from Diamond/MMseqs2 blastx hits</option> + <option value="prodigal" selected="true">Performed using Prodigal</option> + </param> + </when> + </conditional> + </xml> + <!-- Common options for search filtering (applies to diamond and mmseqs only) --> + <xml name="common_search_options"> + <param argument="--query_cover" type="integer" optional="true" min="0" max="100" label="Minimum query coverage" help="Report only alignments above the given percentage of query cover" /> + <param argument="--subject_cover" type="integer" optional="true" min="0" max="100" label="Minimum subject coverage" help="Report only alignments above the given percentage of subject cover" /> + <param argument="--pident" type="integer" optional="true" min="0" max="100" label="Minimum query coverage" help="Report only alignments above the given percentage of identity" /> + <param argument="--evalue" type="float" optional="true" min="0" label="Minimum query coverage" help="Report only alignments below or equal the e-value" /> + <param argument="--score" type="float" value="0.001" optional="true" min="0" label="Minimum query coverage" help="Report only alignments above or equal the score" /> + </xml> + <token name="@SEED_ORTHOLOG_COLUMNS@">query_name,seed_eggNOG_ortholog,seed_ortholog_evalue,seed_ortholog_score,query_start,query_end,seed_start,seed_end,pident,query_cov,seed_cov</token> + <xml name="stdout_assertion"> <assert_stdout> <has_line line="# emapper-@TOOL_VERSION@"/> <has_line line="FINISHED"/> + <yield/> </assert_stdout> </xml> + <xml name="seed_orthologs_assertion"> + <output name="seed_orthologs" ftype="tabular"> + <assert_contents> + <has_line line="#qseqid	sseqid	evalue	bitscore	qstart	qend	sstart	send	pident	qcov	scov"/> + <has_line_matching expression="(\S+\t){2}[-+.e\d]+\t[.\d]+(\t\d+){4}(\t[.\d]+){3}" n="1"/> + </assert_contents> + </output> + </xml> + <xml name="annotations_assertion" token_columns="21" token_add_metadata_columm_names="" token_add_column_names="" token_add_column_re=""> + <output name="annotations" ftype="tabular"> + <metadata name="columns" value="@COLUMNS@" /> + <!-- <metadata name="column_names" value="query,seed_ortholog,evalue,score,max_annot_lvl,COG_category,Description,Preferred_name,GOs,EC,KEGG_ko,KEGG_Pathway,KEGG_Module,KEGG_Reaction,KEGG_rclass,BRITE,KEGG_TC,CAZy,BiGG_Reaction,PFAMseggNOG_OGs@ADD_METADATA_COLUMN_NAMES@" /> --> + <assert_contents> + <has_line line="#query	seed_ortholog	evalue	score	eggNOG_OGs	max_annot_lvl	COG_category	Description	Preferred_name	GOs	EC	KEGG_ko	KEGG_Pathway	KEGG_Module	KEGG_Reaction	KEGG_rclass	BRITE	KEGG_TC	CAZy	BiGG_Reaction	PFAMs@ADD_COLUMN_NAMES@"/> + <has_line_matching expression="(\S+\t){2}[-+.e\d]+\t[.\d]+(\t\S+){7}\tko:\S+(\t\S+){9}@ADD_COLUMN_RE@" n="1"/> + </assert_contents> + </output> + </xml> + <xml name="annotations_orthologs_assertion"> + <output name="annotations_orthologs" ftype="tabular"> + <metadata name="columns" value="4" /> + <metadata name="column_names" value="query,orth_type,species,orthologs" /> + <assert_contents> + <has_line line="#query	orth_type	species	orthologs"/> + <has_line_matching expression="\S+\t(one2one|many2one|one2many|many2many|seed)(\t[^\t]+){2}" n="2"/> + </assert_contents> + </output> + </xml> </macros> |
b |
diff -r 5a30ae278db0 -r 9d1fbff733cf eggnog_mapper.xml --- a/eggnog_mapper.xml Mon Jun 20 12:49:52 2022 +0000 +++ b/eggnog_mapper.xml Tue Jul 19 15:14:52 2022 +0000 |
[ |
b'@@ -6,370 +6,472 @@\n <expand macro="requirements"/>\n <expand macro="version_command"/>\n <command detect_errors="aggressive"><![CDATA[\n- emapper.py\n- --data_dir \'$eggnog_data.fields.path\'\n- -m \'$seed_ortho_options.ortho_method.m\'\n- --itype \'${input_trans.itype}\'\n- #if $input_trans.itype in [\'CDS\', \'genome\', \'metagenome\']:\n- $input_trans.translate\n- #end if\n- #if $input_trans.itype in [\'genome\', \'metagenome\']:\n- --genepred $input_trans.genepred\n+ #if $ortho_method.m == "no_search"\n+ cat \n+ #for aht in $ortho_method.annotate_hits_table\n+ $aht\n+ #end for\n+ > annotate_hits_table.tsv\n+ &&\n #end if\n \n- ## Diamond option\n- #if $seed_ortho_options.ortho_method.m == "diamond":\n- --matrix \'$seed_ortho_options.ortho_method.matrix_gapcosts.matrix\'\n- $seed_ortho_options.ortho_method.matrix_gapcosts.gap_costs\n- --sensmode $seed_ortho_options.ortho_method.sensmode\n- $seed_ortho_options.ortho_method.dmnd_iterate\n- $seed_ortho_options.ortho_method.dmnd_ignore_warnings\n- #elif $seed_ortho_options.ortho_method.m == "mmseqs":\n- --start_sens $seed_ortho_options.ortho_method.start_sens\n- --sens_steps $seed_ortho_options.ortho_method.sens_steps\n- --final_sens $seed_ortho_options.ortho_method.final_sens\n+ emapper.py\n+ --data_dir \'$eggnog_data.fields.path\'\n+ -m \'$ortho_method.m\'\n+\n+ #if $ortho_method.m in [\'diamond\', \'mmseqs\', \'cache\']:\n+ -i \'$ortho_method.input\'\n+ --itype \'$ortho_method.input_trans.itype\'\n+ #if $ortho_method.input_trans.itype in [\'CDS\', \'genome\', \'metagenome\']:\n+ $ortho_method.input_trans.translate\n+ #end if\n+ #if $ortho_method.input_trans.itype in [\'genome\', \'metagenome\']:\n+ --genepred $ortho_method.input_trans.genepred\n+ #end if\n+ #elif $ortho_method.m == "no_search"\n+ --annotate_hits_table annotate_hits_table.tsv\n #end if\n-\n- ## Common options for search filtering\n- #if $seed_ortho_options.query_cover:\n- --query_cover $seed_ortho_options.query_cover\n- #end if\n- #if $seed_ortho_options.subject_cover:\n- --subject_cover $seed_ortho_options.subject_cover\n- #end if\n- #if $seed_ortho_options.pident:\n- --pident $seed_ortho_options.pident\n+ \n+ #if $ortho_method.m == \'cache\'\n+ --cache \'$ortho_method.cache\'\n #end if\n \n- #if $annotation_options.tax_scope:\n- --tax_scope=$annotation_options.tax_scope\n- #end if\n- #if $annotation_options.target_orthologs:\n- --target_orthologs=$annotation_options.target_orthologs\n+ #if $ortho_method.m in [\'diamond\', \'mmseqs\']:\n+ ## Diamond option\n+ #if $ortho_method.m == "diamond":\n+ --matrix \'$ortho_method.matrix_gapcosts.matrix\'\n+ $ortho_method.matrix_gapcosts.gap_costs\n+ --sensmode $ortho_method.sensmode\n+ $ortho_method.dmnd_iterate\n+ $ortho_method.dmnd_ignore_warnings\n+ #elif $ortho_method.m == "mmseqs":\n+ --start_sens $ortho_method.start_sens\n+ --sens_steps $ortho_method.sens_steps\n+ --final_sens $ortho_method.final_sens\n+ #end if\n+\n+ ## Common options for search filtering (applies to diamond and mmseqs only)\n+ #if str($ortho_method.query_cover):\n+ --query_cover $ortho_method.query_cover\n+ #end if\n+ #if str($ortho_method.subject_cover):\n+ --subject_cover $ortho_method.subject_cover\n+ #end if\n+ #if str($ortho_method.pident):\n+ --pident $ortho_method.pident\n+ #end if\n'..b'_orthologs" ftype="tabular"/>\n- <expand macro="stdout_assertion"/>\n+ <section name="output_options">\n+ <param name="report_orthologs" value="true"/>\n+ <param name="no_file_comments" value="true"/>\n+ </section>\n+ <expand macro="seed_orthologs_assertion"/>\n+ <expand macro="annotations_assertion"/>\n+ <expand macro="annotations_orthologs_assertion"/>\n+ <expand macro="stdout_assertion">\n+ <has_text text="--tax_scope=651137"/>\n+ </expand>\n </test>\n+\n+ <!-- test setting a diamond option-->\n <test expect_num_outputs="3">\n- <param name="input" value="Nmar_0135.fa" ftype="fasta"/>\n <param name="eggnog_data" value="@EGGNOG_DB_VERSION@"/> <!-- not passed in test, but required for test to work -->\n- <section name="seed_ortho_options">\n- <conditional name="ortho_method">\n- <param name="m" value="diamond" />\n- <param name="sensmode" value="fast" />\n- </conditional>\n+ <conditional name="ortho_method">\n+ <param name="m" value="diamond" />\n+ <param name="input" value="Nmar_0135.fa" ftype="fasta"/>\n+ <param name="sensmode" value="fast" />\n+ </conditional>\n+ <section name="output_options">\n+ <param name="report_orthologs" value="true"/>\n+ <param name="no_file_comments" value="true"/>\n </section>\n- <param name="report_orthologs" value="true"/>\n- <param name="no_file_comments" value="true"/>\n- <output name="seed_orthologs" file="DIA_nlim.emapper.seed_orthologs" ftype="tabular" compare="sim_size"/>\n- <output name="annotations" file="DIA_nlim.emapper.annotations" ftype="tabular" compare="sim_size"/>\n- <output name="annotations_orthologs" file="DIA_nlim.emapper.annotations_orthologs" ftype="tabular"/>\n- <expand macro="stdout_assertion"/>\n+ <expand macro="seed_orthologs_assertion"/>\n+ <expand macro="annotations_assertion"/>\n+ <expand macro="annotations_orthologs_assertion"/>\n+ <expand macro="stdout_assertion">\n+ <has_text text="--sensmode fast"/>\n+ </expand>\n </test>\n <!-- not enabled as it requires a specific .db file, hard to minimize -->\n <!--test>\n+ <param name="eggnog_data" value="@EGGNOG_DB_VERSION@"/>\n <param name="input" value="Nmar_0135.fa" ftype="fasta"/>\n- <param name="eggnog_data" value="@EGGNOG_DB_VERSION@"/>\n <section name="seed_ortho_options">\n <conditional name="ortho_method">\n <param name="m" value="mmseqs" />\n@@ -435,6 +537,28 @@\n - ``COG_functional_categories``: COG functional category inferred from best matching OG\n - ``eggNOG_free_text_description``\n \n+**Recommentation for large input data**\n+\n+EggNOG-mapper consists of two phases\n+\n+1. finding seed orthologous sequences (compute intensive)\n+2. expanding annotations (IO intensive)\n+\n+by default (i.e. if *Method to search seed orthologs* is not *Skip search stage...* and *Annotate seed orthologs* is *Yes*)\n+both phases are executed within one tool run. \n+\n+For large input FASTA datasets in can be favourable to split this in two separate\n+tool runs as follows:\n+\n+1. Split the FASTA (e.g. 1M seqs per data set)\n+2. Run the search phase only (set *Annotate seed orthologs* to *No*) on the separate FASTA files.\n+3. Run the annotation phase (set *Method to search seed orthologs* to *Skip search stage...*)\n+\n+See [also](https://github.com/eggnogdb/eggnog-mapper/wiki/eggNOG-mapper-v2.1.5-to-v2.1.8#Setting_up_large_annotation_jobs)\n+\n+Another alternative is to use cached annotations (produced in a run with --md5 enabled).\n+\n+\n ]]></help>\n <expand macro="citations"/>\n </tool>\n' |
b |
diff -r 5a30ae278db0 -r 9d1fbff733cf test-data/DIA_nlim.emapper.annotations --- a/test-data/DIA_nlim.emapper.annotations Mon Jun 20 12:49:52 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,2 +0,0 @@ -#query seed_ortholog evalue score eggNOG_OGs max_annot_lvl COG_category Description Preferred_name GOs EC KEGG_ko KEGG_Pathway KEGG_Module KEGG_Reaction KEGG_rclass BRITE KEGG_TC CAZy BiGG_Reaction PFAMs -Nmar_0135 436308.Nmar_0135 7.67e-188 503.0 COG1083@1|root,arCOG04817@2157|Archaea,41T2K@651137|Thaumarchaeota 651137|Thaumarchaeota M Cytidylyltransferase - - - ko:K07257 - - - - ko00000 - - - - |
b |
diff -r 5a30ae278db0 -r 9d1fbff733cf test-data/DIA_nlim.emapper.annotations_cached --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/DIA_nlim.emapper.annotations_cached Tue Jul 19 15:14:52 2022 +0000 |
b |
@@ -0,0 +1,2 @@ +#query seed_ortholog evalue score eggNOG_OGs max_annot_lvl COG_category Description Preferred_name GOs EC KEGG_ko KEGG_Pathway KEGG_Module KEGG_Reaction KEGG_rclass BRITE KEGG_TC CAZy BiGG_Reaction PFAMs md5 +Nmar_0135 436308.Nmar_0135 7.67e-188 503.0 COG1083@1|root,arCOG04817@2157|Archaea,41T2K@651137|Thaumarchaeota 651137|Thaumarchaeota M Cytidylyltransferase - - - ko:K07257 - - - - ko00000 - - - - 9fb52f96004c566b17893d597c94054e |
b |
diff -r 5a30ae278db0 -r 9d1fbff733cf test-data/DIA_nlim.emapper.annotations_orthologs --- a/test-data/DIA_nlim.emapper.annotations_orthologs Mon Jun 20 12:49:52 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,3 +0,0 @@ -#query orth_type species orthologs -Nmar_0135 one2one Marine Group I thaumarchaeote SCGC AB-629-I23(1131266) *ARWQ01000003_gene1537 -Nmar_0135 seed Nitrosopumilus maritimus SCM1(436308) *Nmar_0135 |
b |
diff -r 5a30ae278db0 -r 9d1fbff733cf test-data/nlim_1033.fasta --- a/test-data/nlim_1033.fasta Mon Jun 20 12:49:52 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,7 +0,0 @@ ->Nlim_1033886738.Nlim_1033Cytidyltransferase-like protein -MELIKKSILTELYLSGITGKSHIDNLTKKGFTQKLIDLE -IDELIKNKLVKEDRAILTELGRSSLRVVLAGGVFDIIHP -GHIYTLNAAKSLGDVLIVVVATDNTALKMKKRQPLHSKE -QRQELVNSLIMVDLCLIGQEDDIFKTVNLVKPQIIALGY -DQVHQEKFIIDGCKKIQLDAKVARLQSPIPESSSSKIQK -EYGESIHGI |
b |
diff -r 5a30ae278db0 -r 9d1fbff733cf test-data/scoped.emapper.annotations --- a/test-data/scoped.emapper.annotations Mon Jun 20 12:49:52 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,2 +0,0 @@ -#query seed_ortholog evalue score eggNOG_OGs max_annot_lvl COG_category Description Preferred_name GOs EC KEGG_ko KEGG_Pathway KEGG_Module KEGG_Reaction KEGG_rclass BRITE KEGG_TC CAZy BiGG_Reaction PFAMs -Nmar_0135 436308.Nmar_0135 7.67e-188 503.0 COG1083@1|root,arCOG04817@2157|Archaea,41T2K@651137|Thaumarchaeota 651137|Thaumarchaeota M Cytidylyltransferase - - - ko:K07257 - - - - ko00000 - - - - |
b |
diff -r 5a30ae278db0 -r 9d1fbff733cf test-data/scoped.emapper.annotations_orthologs --- a/test-data/scoped.emapper.annotations_orthologs Mon Jun 20 12:49:52 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,3 +0,0 @@ -#query orth_type species orthologs -Nmar_0135 one2one Marine Group I thaumarchaeote SCGC AB-629-I23(1131266) *ARWQ01000003_gene1537 -Nmar_0135 seed Nitrosopumilus maritimus SCM1(436308) *Nmar_0135 |
b |
diff -r 5a30ae278db0 -r 9d1fbff733cf test-data/scoped.emapper.seed_orthologs --- a/test-data/scoped.emapper.seed_orthologs Mon Jun 20 12:49:52 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,2 +0,0 @@ -#qseqid sseqid evalue bitscore qstart qend sstart send pident qcov scov -Nmar_0135 436308.Nmar_0135 7.67e-188 503.0 1 252 1 252 100.0 100.0 100.0 |