Repository 'eggnog_mapper'
hg clone https://toolshed.g2.bx.psu.edu/repos/galaxyp/eggnog_mapper

Changeset 8:96cac424c870 (2022-01-25)
Previous changeset 7:4e4c6329f6cd (2021-02-19) Next changeset 9:63662ae295d6 (2022-02-13)
Commit message:
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/eggnog_mapper commit e45c15081260025e470d23975ef5a734d3f8fc66"
modified:
eggnog_macros.xml
eggnog_mapper.xml
test-data/DIA_nlim.emapper.annotations
test-data/DIA_nlim.emapper.annotations_orthologs
test-data/DIA_nlim.emapper.seed_orthologs
test-data/README
test-data/cached_locally/eggnog.db
test-data/cached_locally/eggnog_mapper_db_versioned.loc
test-data/scoped.emapper.annotations
test-data/scoped.emapper.annotations_orthologs
test-data/scoped.emapper.seed_orthologs
tool-data/eggnog_mapper_db_versioned.loc.sample
added:
test-data/cached_locally/eggnog.taxa.db
test-data/eggnogg_tiny.sh
test-data/eggnogg_tiny_taxa.sh
b
diff -r 4e4c6329f6cd -r 96cac424c870 eggnog_macros.xml
--- a/eggnog_macros.xml Fri Feb 19 18:54:25 2021 +0000
+++ b/eggnog_macros.xml Tue Jan 25 13:51:50 2022 +0000
b
@@ -1,14 +1,15 @@
 <?xml version="1.0"?>
 <macros>
-   <token name="@VERSION@">2.0.1</token>
-   <token name="@EGGNOG_DB_VERSION@">2.0</token>
+   <token name="@VERSION@">2.1.6</token>
+   <token name="@EGGNOG_DB_VERSION@">5.0.2</token>
     <!--
-    # Versionning is super confusing:
+    # DB versionning was super confusing for eggnog-mapper 2.0.x:
     # eggnog-mapper 1.* needed a db v4.5 (based on eggnog v4.5)
-    # eggnog-mapper 2.0 needs a db v2.0 (based on eggnog v5.0)
-    # db v4.5 are not compatible with eggnog-mapper 2.0
+    # eggnog-mapper 2.x needed a db v2.0 (based on eggnog v5.0)
+    # (db v4.5 are not compatible with eggnog-mapper 2.0)
+    # Starting with eggnog-mapper 2.1.* db versioning looks better: 2.1.0 requires db v5.0.2
     -->
-   <token name="@IDX_VERSION@">2.0</token>
+   <token name="@IDX_VERSION@">5.0.2</token>
    <xml name="citations">
         <citations>
             <citation type="doi">10.1093/nar/gkv1248</citation>
@@ -26,6 +27,8 @@
     <xml name="data_manager_params">
         <param name="test" type="hidden" value="false" />
         <param name="diamond_database" type="boolean" truevalue="" falsevalue="-D" checked="true" label="Install the diamond database" help="Takes ~9Gb, you most probably want it."/>
+        <param name="mmseqs_database" type="boolean" truevalue="-M" falsevalue="" checked="true" label="Install the MMseqs2 database" help="Required for mmseqs seed ortholog search mode. Takes ~11Gb, you most probably want it."/>
+        <param name="pfam_database" type="boolean" truevalue="-P" falsevalue="" checked="true" label="Install the Pfam database" help="Rquired for de novo annotation or realignment. Takes ~3Gb, you most probably want it."/>
     </xml>
     <xml name="data_manager_outputs">
         <outputs>
@@ -40,7 +43,7 @@
 #end if
 mkdir -p '${install_path}' &&
 download_eggnog_data.py
-  $diamond_database -y -q
+  $diamond_database $mmseqs_database $pfam_database -y -q
 #if $test == 'true'
   -s
 #end if
b
diff -r 4e4c6329f6cd -r 96cac424c870 eggnog_mapper.xml
--- a/eggnog_mapper.xml Fri Feb 19 18:54:25 2021 +0000
+++ b/eggnog_mapper.xml Tue Jan 25 13:51:50 2022 +0000
[
b'@@ -8,14 +8,38 @@\n     <command detect_errors="aggressive"><![CDATA[\n         emapper.py\n         --data_dir \'$eggnog_data.fields.path\'\n-        -m diamond\n-        $translate\n+        -m \'$seed_ortho_options.ortho_method.m\'\n+        --itype \'${input_trans.itype}\'\n+        #if $input_trans.itype in [\'CDS\', \'genome\', \'metagenome\']:\n+            $input_trans.translate\n+        #end if\n+        #if $input_trans.itype in [\'genome\', \'metagenome\']:\n+            $input_trans.genepred\n+        #end if\n \n         ## Diamond option\n-        --matrix \'$diamond.matrix_gapcosts.matrix\'\n-        $diamond.matrix_gapcosts.gap_costs\n-        --query-cover $diamond.query_cover\n-        --subject-cover $diamond.subject_cover\n+        #if $seed_ortho_options.ortho_method.m == "diamond":\n+            --matrix \'$seed_ortho_options.ortho_method.matrix_gapcosts.matrix\'\n+            $seed_ortho_options.ortho_method.matrix_gapcosts.gap_costs\n+            --sensmode $seed_ortho_options.ortho_method.sensmode\n+            $seed_ortho_options.ortho_method.dmnd_iterate\n+            $seed_ortho_options.ortho_method.dmnd_ignore_warnings\n+        #elif $seed_ortho_options.ortho_method.m == "mmseqs":\n+            --start_sens $seed_ortho_options.ortho_method.start_sens\n+            --sens_steps $seed_ortho_options.ortho_method.sens_steps\n+            --final_sens $seed_ortho_options.ortho_method.final_sens\n+        #end if\n+\n+        ## Common options for search filtering\n+        #if $seed_ortho_options.query_cover:\n+        --query_cover $seed_ortho_options.query_cover\n+        #end if\n+        #if $seed_ortho_options.subject_cover:\n+        --subject_cover $seed_ortho_options.subject_cover\n+        #end if\n+        #if $seed_ortho_options.pident:\n+        --pident $seed_ortho_options.pident\n+        #end if\n \n         #if $annotation_options.tax_scope:\n             --tax_scope=$annotation_options.tax_scope\n@@ -27,10 +51,10 @@\n             --go_evidence=$annotation_options.go_evidence\n         #end if\n         #if $seed_ortholog_options.seed_ortholog_evalue:\n-            --seed_ortholog_evalue=$seed_ortholog_options.seed_ortholog_evalue\n+            --evalue=$seed_ortholog_options.seed_ortholog_evalue\n         #end if\n         #if str($seed_ortholog_options.seed_ortholog_score):\n-            --seed_ortholog_score=$seed_ortholog_options.seed_ortholog_score\n+            --score=$seed_ortholog_options.seed_ortholog_score\n         #end if\n         $output_options.no_file_comments\n         $output_options.no_annot\n@@ -41,145 +65,199 @@\n     ]]></command>\n     <inputs>\n         <param name="input" type="data" format="fasta" label="Fasta sequences to annotate"/>\n+        <conditional name="input_trans">\n+            <param argument="--itype" type="select" label="Type of sequences">\n+                <option value="proteins" selected="true">proteins</option>\n+                <option value="CDS">CDS</option>\n+                <option value="genome">genome</option>\n+                <option value="metagenome">metagenome</option>\n+            </param>\n+            <when value="proteins"/>\n+            <when value="CDS">\n+                <param name="translate" type="boolean" truevalue="--translate" falsevalue="" checked="false"\n+                    label="Translate CDS to proteins before search"/>\n+            </when>\n+            <when value="genome">\n+                <param name="translate" type="boolean" truevalue="--translate" falsevalue="" checked="false"\n+                    label="Translate predicted CDS from blastx hits to proteins"/>\n+                <param argument="--genepred" type="select" label="Type of sequences">\n+                    <option value="search" selected="true">Inferred from Diamond/MMseqs2 blastx hits</option>\n+                    <option value="prodigal">Performed using Prodigal</option>\n+                </param>\n+            </when>\n+            <when value="metagenome">\n+                <param name="translate" type="boolean" truevalue="--tran'..b',query_end,seed_start,seed_end,pident,query_cov,seed_cov"/>\n             </actions>\n         </data>\n         <data name="annotations" format="tabular" label="${tool.name} on ${on_string}: annotations" from_work_dir="results.emapper.annotations">\n             <filter>not output_options[\'no_annot\']</filter>\n             <actions>\n-                <action name="column_names" type="metadata" default="query_name,seed_eggNOG_ortholog,seed_ortholog_evalue,seed_ortholog_score,predicted_taxonomic_group,predicted_protein_name,GO_terms,EC_number,KEGG_KO,KEGG_Pathway,KEGG_Module,KEGG_Reaction,KEGG_rclass,BRITE,KEGG_TC,CAZy,BiGG_Reactions,Annotation_tax_scope,Matching_OGs,best_OG|evalue|score,COG_functional_categories,eggNOG_free_text_description"/>\n+                <action name="column_names" type="metadata" default="query,seed_ortholog,evalue,score,max_annot_lvl,COG_category,Description,Preferred_name,GOs,EC,KEGG_ko,KEGG_Pathway,KEGG_Module,KEGG_Reaction,KEGG_rclass,BRITE,KEGG_TC,CAZy,BiGG_Reaction,PFAMseggNOG_OGs"/>\n             </actions>\n         </data>\n-        <data name="annotations_orthologs" format="tabular" label="${tool.name} on ${on_string}: annotations.orthologs"  from_work_dir="results.emapper.annotations.orthologs">\n+        <data name="annotations_orthologs" format="tabular" label="${tool.name} on ${on_string}: orthologs"  from_work_dir="results.emapper.orthologs">\n             <filter>output_options[\'report_orthologs\']</filter>\n             <actions>\n-                <action name="column_names" type="metadata" default="query_name,orthologs"/>\n+                <action name="column_names" type="metadata" default="query,orth_type,species,orthologs"/>\n             </actions>\n         </data>\n     </outputs>\n@@ -268,6 +347,37 @@\n             <output name="annotations" file="scoped.emapper.annotations" ftype="tabular" compare="sim_size"/>\n             <output name="annotations_orthologs" file="scoped.emapper.annotations_orthologs" ftype="tabular"/>\n         </test>\n+        <test>\n+            <param name="input" value="Nmar_0135.fa" ftype="fasta"/>\n+            <param name="eggnog_data" value="@EGGNOG_DB_VERSION@"/> <!-- not passed in test, but required for test to work -->\n+            <section name="seed_ortho_options">\n+                <conditional name="ortho_method">\n+                    <param name="m" value="diamond" />\n+                    <param name="sensmode" value="fast" />\n+                </conditional>\n+            </section>\n+            <param name="report_orthologs" value="true"/>\n+            <param name="no_file_comments" value="true"/>\n+            <output name="seed_orthologs" file="DIA_nlim.emapper.seed_orthologs" ftype="tabular" compare="sim_size"/>\n+            <output name="annotations" file="DIA_nlim.emapper.annotations" ftype="tabular" compare="sim_size"/>\n+            <output name="annotations_orthologs" file="DIA_nlim.emapper.annotations_orthologs" ftype="tabular"/>\n+        </test>\n+        <!-- not enabled as it requires a specific .db file, hard to minimize -->\n+        <!--test>\n+            <param name="input" value="Nmar_0135.fa" ftype="fasta"/>\n+            <param name="eggnog_data" value="@EGGNOG_DB_VERSION@"/>\n+            <section name="seed_ortho_options">\n+                <conditional name="ortho_method">\n+                    <param name="m" value="mmseqs" />\n+                    <param name="start_sens" value="4" />\n+                </conditional>\n+            </section>\n+            <param name="report_orthologs" value="true"/>\n+            <param name="no_file_comments" value="true"/>\n+            <output name="seed_orthologs" file="DIA_nlim.emapper.seed_orthologs" ftype="tabular" compare="sim_size"/>\n+            <output name="annotations" file="DIA_nlim.emapper.annotations" ftype="tabular" compare="sim_size"/>\n+            <output name="annotations_orthologs" file="DIA_nlim.emapper.annotations_orthologs" ftype="tabular"/>\n+        </test-->\n     </tests>\n     <help><![CDATA[\n \n'
b
diff -r 4e4c6329f6cd -r 96cac424c870 test-data/DIA_nlim.emapper.annotations
--- a/test-data/DIA_nlim.emapper.annotations Fri Feb 19 18:54:25 2021 +0000
+++ b/test-data/DIA_nlim.emapper.annotations Tue Jan 25 13:51:50 2022 +0000
b
@@ -1,1 +1,2 @@
-Nmar_0135 436308.Nmar_0135 3.8e-149 510.8 Thaumarchaeota Archaea 41T2K@651137,COG1083@1,arCOG04817@2157 NA|NA|NA M Cytidylyltransferase
+#query seed_ortholog evalue score eggNOG_OGs max_annot_lvl COG_category Description Preferred_name GOs EC KEGG_ko KEGG_Pathway KEGG_Module KEGG_Reaction KEGG_rclass BRITE KEGG_TC CAZy BiGG_Reaction PFAMs
+Nmar_0135 436308.Nmar_0135 7.67e-188 503.0 COG1083@1|root,arCOG04817@2157|Archaea,41T2K@651137|Thaumarchaeota 651137|Thaumarchaeota M Cytidylyltransferase - - - ko:K07257 - - - - ko00000 - - - -
b
diff -r 4e4c6329f6cd -r 96cac424c870 test-data/DIA_nlim.emapper.annotations_orthologs
--- a/test-data/DIA_nlim.emapper.annotations_orthologs Fri Feb 19 18:54:25 2021 +0000
+++ b/test-data/DIA_nlim.emapper.annotations_orthologs Tue Jan 25 13:51:50 2022 +0000
b
@@ -1,1 +1,3 @@
-Nmar_0135
+#query orth_type species orthologs
+Nmar_0135 one2one Marine Group I thaumarchaeote SCGC AB-629-I23(1131266) *ARWQ01000003_gene1537
+Nmar_0135 one2one Nitrosopumilus maritimus SCM1(436308) *Nmar_0135
b
diff -r 4e4c6329f6cd -r 96cac424c870 test-data/DIA_nlim.emapper.seed_orthologs
--- a/test-data/DIA_nlim.emapper.seed_orthologs Fri Feb 19 18:54:25 2021 +0000
+++ b/test-data/DIA_nlim.emapper.seed_orthologs Tue Jan 25 13:51:50 2022 +0000
b
@@ -1,1 +1,2 @@
-Nmar_0135 436308.Nmar_0135 3.8e-149 510.8
+#qseqid sseqid evalue bitscore qstart qend sstart send pident qcov scov
+Nmar_0135 436308.Nmar_0135 7.67e-188 503.0 1 252 1 252 100.0 100.0 100.0
b
diff -r 4e4c6329f6cd -r 96cac424c870 test-data/README
--- a/test-data/README Fri Feb 19 18:54:25 2021 +0000
+++ b/test-data/README Tue Jan 25 13:51:50 2022 +0000
b
@@ -1,1 +1,1 @@
-cached_locally content is a reduced database, following instructions on https://github.com/galaxyproteomics/egglet
+cached_locally content is a reduced database, using the eggnog_tiny.sh and eggnog_tiny_taxa.db (inspired from instructions on https://github.com/galaxyproteomics/egglet)
b
diff -r 4e4c6329f6cd -r 96cac424c870 test-data/cached_locally/eggnog.db
b
Binary file test-data/cached_locally/eggnog.db has changed
b
diff -r 4e4c6329f6cd -r 96cac424c870 test-data/cached_locally/eggnog.taxa.db
b
Binary file test-data/cached_locally/eggnog.taxa.db has changed
b
diff -r 4e4c6329f6cd -r 96cac424c870 test-data/cached_locally/eggnog_mapper_db_versioned.loc
--- a/test-data/cached_locally/eggnog_mapper_db_versioned.loc Fri Feb 19 18:54:25 2021 +0000
+++ b/test-data/cached_locally/eggnog_mapper_db_versioned.loc Tue Jan 25 13:51:50 2022 +0000
b
@@ -1,2 +1,2 @@
 #value name path version
-2.0 eggNOG_2.0 ${__HERE__} 2.0
+5.0.2 eggNOG_5.0.2 ${__HERE__} 5.0.2
b
diff -r 4e4c6329f6cd -r 96cac424c870 test-data/eggnogg_tiny.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/eggnogg_tiny.sh Tue Jan 25 13:51:50 2022 +0000
b
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+# Script adapted from https://github.com/galaxyproteomics/egglet to produce a minimal eggnog 5.0.2 database
+
+sqlite3 $1 << "EOF"
+
+CREATE TEMP TABLE og
+AS SELECT * FROM og
+WHERE description = 'Cytidylyltransferase'
+AND level LIKE "651137"
+LIMIT 1;
+
+CREATE TEMP TABLE event
+AS SELECT * FROM event
+WHERE level=651137
+AND og='41T2K'
+LIMIT 20;
+
+CREATE TEMP TABLE prots
+AS SELECT * FROM prots
+WHERE name = "436308.Nmar_0135";
+
+CREATE TEMP TABLE version
+AS SELECT * FROM version;
+
+
+.backup temp eggnog_tiny.db
+EOF
b
diff -r 4e4c6329f6cd -r 96cac424c870 test-data/eggnogg_tiny_taxa.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/eggnogg_tiny_taxa.sh Tue Jan 25 13:51:50 2022 +0000
b
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+# Script adapted from https://github.com/galaxyproteomics/egglet to produce a minimal eggnog 5.0.2 database
+
+sqlite3 $1 << "EOF"
+
+CREATE TEMP TABLE species
+AS SELECT * FROM species
+WHERE taxid in (1131266, 436308);
+
+CREATE TEMP TABLE synonym
+AS SELECT * FROM synonym
+WHERE taxid in (1131266, 436308);
+
+CREATE TEMP TABLE merged
+AS SELECT * FROM merged
+WHERE taxid_old in (1131266, 436308);
+
+CREATE TEMP TABLE stats
+AS SELECT * FROM stats;
+
+.backup temp eggnog_tiny_taxa.db
+EOF
b
diff -r 4e4c6329f6cd -r 96cac424c870 test-data/scoped.emapper.annotations
--- a/test-data/scoped.emapper.annotations Fri Feb 19 18:54:25 2021 +0000
+++ b/test-data/scoped.emapper.annotations Tue Jan 25 13:51:50 2022 +0000
b
@@ -1,1 +1,2 @@
-Nmar_0135 436308.Nmar_0135 3.8e-149 510.8 Thaumarchaeota ko:K07257 ko00000 Thaumarchaeota 41T2K@651137,COG1083@1,arCOG04817@2157 NA|NA|NA M Cytidylyltransferase
+#query seed_ortholog evalue score eggNOG_OGs max_annot_lvl COG_category Description Preferred_name GOs EC KEGG_ko KEGG_Pathway KEGG_Module KEGG_Reaction KEGG_rclass BRITE KEGG_TC CAZy BiGG_Reaction PFAMs
+Nmar_0135 436308.Nmar_0135 7.67e-188 503.0 COG1083@1|root,arCOG04817@2157|Archaea,41T2K@651137|Thaumarchaeota 651137|Thaumarchaeota M Cytidylyltransferase - - - ko:K07257 - - - - ko00000 - - - -
b
diff -r 4e4c6329f6cd -r 96cac424c870 test-data/scoped.emapper.annotations_orthologs
--- a/test-data/scoped.emapper.annotations_orthologs Fri Feb 19 18:54:25 2021 +0000
+++ b/test-data/scoped.emapper.annotations_orthologs Tue Jan 25 13:51:50 2022 +0000
b
@@ -1,1 +1,3 @@
-Nmar_0135 1131266.ARWQ01000003_gene1537,436308.Nmar_0135
+#query orth_type species orthologs
+Nmar_0135 one2one Marine Group I thaumarchaeote SCGC AB-629-I23(1131266) *ARWQ01000003_gene1537
+Nmar_0135 one2one Nitrosopumilus maritimus SCM1(436308) *Nmar_0135
b
diff -r 4e4c6329f6cd -r 96cac424c870 test-data/scoped.emapper.seed_orthologs
--- a/test-data/scoped.emapper.seed_orthologs Fri Feb 19 18:54:25 2021 +0000
+++ b/test-data/scoped.emapper.seed_orthologs Tue Jan 25 13:51:50 2022 +0000
b
@@ -1,1 +1,2 @@
-Nmar_0135 436308.Nmar_0135 3.8e-149 510.8
+#qseqid sseqid evalue bitscore qstart qend sstart send pident qcov scov
+Nmar_0135 436308.Nmar_0135 7.67e-188 503.0 1 252 1 252 100.0 100.0 100.0
b
diff -r 4e4c6329f6cd -r 96cac424c870 tool-data/eggnog_mapper_db_versioned.loc.sample
--- a/tool-data/eggnog_mapper_db_versioned.loc.sample Fri Feb 19 18:54:25 2021 +0000
+++ b/tool-data/eggnog_mapper_db_versioned.loc.sample Tue Jan 25 13:51:50 2022 +0000
b
@@ -3,9 +3,9 @@
 #
 # eggnog-mapper requires the following files to be installed in the data directory:
 #  https://github.com/jhcepas/eggnog-mapper/blob/master/data/og2level.tsv.gz
-#  http://eggnog5.embl.de/download/emapperdb-5.0.0/eggnog.db.gz
+#  http://eggnog5.embl.de/download/emapperdb-5.0.2/eggnog.db.gz
 # A complete diamond database is available from:
-#  http://eggnog5.embl.de/download/emapperdb-5.0.0/eggnog_proteins.dmnd.gz
+#  http://eggnog5.embl.de/download/emapperdb-5.0.2/eggnog_proteins.dmnd.gz
 #
 # The python script download_eggnog_data.py,
 # included with eggnog_mapper, can be used to download the files to the correct directory