Previous changeset 7:4e4c6329f6cd (2021-02-19) Next changeset 9:63662ae295d6 (2022-02-13) |
Commit message:
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/eggnog_mapper commit e45c15081260025e470d23975ef5a734d3f8fc66" |
modified:
eggnog_macros.xml eggnog_mapper.xml test-data/DIA_nlim.emapper.annotations test-data/DIA_nlim.emapper.annotations_orthologs test-data/DIA_nlim.emapper.seed_orthologs test-data/README test-data/cached_locally/eggnog.db test-data/cached_locally/eggnog_mapper_db_versioned.loc test-data/scoped.emapper.annotations test-data/scoped.emapper.annotations_orthologs test-data/scoped.emapper.seed_orthologs tool-data/eggnog_mapper_db_versioned.loc.sample |
added:
test-data/cached_locally/eggnog.taxa.db test-data/eggnogg_tiny.sh test-data/eggnogg_tiny_taxa.sh |
b |
diff -r 4e4c6329f6cd -r 96cac424c870 eggnog_macros.xml --- a/eggnog_macros.xml Fri Feb 19 18:54:25 2021 +0000 +++ b/eggnog_macros.xml Tue Jan 25 13:51:50 2022 +0000 |
b |
@@ -1,14 +1,15 @@ <?xml version="1.0"?> <macros> - <token name="@VERSION@">2.0.1</token> - <token name="@EGGNOG_DB_VERSION@">2.0</token> + <token name="@VERSION@">2.1.6</token> + <token name="@EGGNOG_DB_VERSION@">5.0.2</token> <!-- - # Versionning is super confusing: + # DB versionning was super confusing for eggnog-mapper 2.0.x: # eggnog-mapper 1.* needed a db v4.5 (based on eggnog v4.5) - # eggnog-mapper 2.0 needs a db v2.0 (based on eggnog v5.0) - # db v4.5 are not compatible with eggnog-mapper 2.0 + # eggnog-mapper 2.x needed a db v2.0 (based on eggnog v5.0) + # (db v4.5 are not compatible with eggnog-mapper 2.0) + # Starting with eggnog-mapper 2.1.* db versioning looks better: 2.1.0 requires db v5.0.2 --> - <token name="@IDX_VERSION@">2.0</token> + <token name="@IDX_VERSION@">5.0.2</token> <xml name="citations"> <citations> <citation type="doi">10.1093/nar/gkv1248</citation> @@ -26,6 +27,8 @@ <xml name="data_manager_params"> <param name="test" type="hidden" value="false" /> <param name="diamond_database" type="boolean" truevalue="" falsevalue="-D" checked="true" label="Install the diamond database" help="Takes ~9Gb, you most probably want it."/> + <param name="mmseqs_database" type="boolean" truevalue="-M" falsevalue="" checked="true" label="Install the MMseqs2 database" help="Required for mmseqs seed ortholog search mode. Takes ~11Gb, you most probably want it."/> + <param name="pfam_database" type="boolean" truevalue="-P" falsevalue="" checked="true" label="Install the Pfam database" help="Rquired for de novo annotation or realignment. Takes ~3Gb, you most probably want it."/> </xml> <xml name="data_manager_outputs"> <outputs> @@ -40,7 +43,7 @@ #end if mkdir -p '${install_path}' && download_eggnog_data.py - $diamond_database -y -q + $diamond_database $mmseqs_database $pfam_database -y -q #if $test == 'true' -s #end if |
b |
diff -r 4e4c6329f6cd -r 96cac424c870 eggnog_mapper.xml --- a/eggnog_mapper.xml Fri Feb 19 18:54:25 2021 +0000 +++ b/eggnog_mapper.xml Tue Jan 25 13:51:50 2022 +0000 |
[ |
b'@@ -8,14 +8,38 @@\n <command detect_errors="aggressive"><![CDATA[\n emapper.py\n --data_dir \'$eggnog_data.fields.path\'\n- -m diamond\n- $translate\n+ -m \'$seed_ortho_options.ortho_method.m\'\n+ --itype \'${input_trans.itype}\'\n+ #if $input_trans.itype in [\'CDS\', \'genome\', \'metagenome\']:\n+ $input_trans.translate\n+ #end if\n+ #if $input_trans.itype in [\'genome\', \'metagenome\']:\n+ $input_trans.genepred\n+ #end if\n \n ## Diamond option\n- --matrix \'$diamond.matrix_gapcosts.matrix\'\n- $diamond.matrix_gapcosts.gap_costs\n- --query-cover $diamond.query_cover\n- --subject-cover $diamond.subject_cover\n+ #if $seed_ortho_options.ortho_method.m == "diamond":\n+ --matrix \'$seed_ortho_options.ortho_method.matrix_gapcosts.matrix\'\n+ $seed_ortho_options.ortho_method.matrix_gapcosts.gap_costs\n+ --sensmode $seed_ortho_options.ortho_method.sensmode\n+ $seed_ortho_options.ortho_method.dmnd_iterate\n+ $seed_ortho_options.ortho_method.dmnd_ignore_warnings\n+ #elif $seed_ortho_options.ortho_method.m == "mmseqs":\n+ --start_sens $seed_ortho_options.ortho_method.start_sens\n+ --sens_steps $seed_ortho_options.ortho_method.sens_steps\n+ --final_sens $seed_ortho_options.ortho_method.final_sens\n+ #end if\n+\n+ ## Common options for search filtering\n+ #if $seed_ortho_options.query_cover:\n+ --query_cover $seed_ortho_options.query_cover\n+ #end if\n+ #if $seed_ortho_options.subject_cover:\n+ --subject_cover $seed_ortho_options.subject_cover\n+ #end if\n+ #if $seed_ortho_options.pident:\n+ --pident $seed_ortho_options.pident\n+ #end if\n \n #if $annotation_options.tax_scope:\n --tax_scope=$annotation_options.tax_scope\n@@ -27,10 +51,10 @@\n --go_evidence=$annotation_options.go_evidence\n #end if\n #if $seed_ortholog_options.seed_ortholog_evalue:\n- --seed_ortholog_evalue=$seed_ortholog_options.seed_ortholog_evalue\n+ --evalue=$seed_ortholog_options.seed_ortholog_evalue\n #end if\n #if str($seed_ortholog_options.seed_ortholog_score):\n- --seed_ortholog_score=$seed_ortholog_options.seed_ortholog_score\n+ --score=$seed_ortholog_options.seed_ortholog_score\n #end if\n $output_options.no_file_comments\n $output_options.no_annot\n@@ -41,145 +65,199 @@\n ]]></command>\n <inputs>\n <param name="input" type="data" format="fasta" label="Fasta sequences to annotate"/>\n+ <conditional name="input_trans">\n+ <param argument="--itype" type="select" label="Type of sequences">\n+ <option value="proteins" selected="true">proteins</option>\n+ <option value="CDS">CDS</option>\n+ <option value="genome">genome</option>\n+ <option value="metagenome">metagenome</option>\n+ </param>\n+ <when value="proteins"/>\n+ <when value="CDS">\n+ <param name="translate" type="boolean" truevalue="--translate" falsevalue="" checked="false"\n+ label="Translate CDS to proteins before search"/>\n+ </when>\n+ <when value="genome">\n+ <param name="translate" type="boolean" truevalue="--translate" falsevalue="" checked="false"\n+ label="Translate predicted CDS from blastx hits to proteins"/>\n+ <param argument="--genepred" type="select" label="Type of sequences">\n+ <option value="search" selected="true">Inferred from Diamond/MMseqs2 blastx hits</option>\n+ <option value="prodigal">Performed using Prodigal</option>\n+ </param>\n+ </when>\n+ <when value="metagenome">\n+ <param name="translate" type="boolean" truevalue="--tran'..b',query_end,seed_start,seed_end,pident,query_cov,seed_cov"/>\n </actions>\n </data>\n <data name="annotations" format="tabular" label="${tool.name} on ${on_string}: annotations" from_work_dir="results.emapper.annotations">\n <filter>not output_options[\'no_annot\']</filter>\n <actions>\n- <action name="column_names" type="metadata" default="query_name,seed_eggNOG_ortholog,seed_ortholog_evalue,seed_ortholog_score,predicted_taxonomic_group,predicted_protein_name,GO_terms,EC_number,KEGG_KO,KEGG_Pathway,KEGG_Module,KEGG_Reaction,KEGG_rclass,BRITE,KEGG_TC,CAZy,BiGG_Reactions,Annotation_tax_scope,Matching_OGs,best_OG|evalue|score,COG_functional_categories,eggNOG_free_text_description"/>\n+ <action name="column_names" type="metadata" default="query,seed_ortholog,evalue,score,max_annot_lvl,COG_category,Description,Preferred_name,GOs,EC,KEGG_ko,KEGG_Pathway,KEGG_Module,KEGG_Reaction,KEGG_rclass,BRITE,KEGG_TC,CAZy,BiGG_Reaction,PFAMseggNOG_OGs"/>\n </actions>\n </data>\n- <data name="annotations_orthologs" format="tabular" label="${tool.name} on ${on_string}: annotations.orthologs" from_work_dir="results.emapper.annotations.orthologs">\n+ <data name="annotations_orthologs" format="tabular" label="${tool.name} on ${on_string}: orthologs" from_work_dir="results.emapper.orthologs">\n <filter>output_options[\'report_orthologs\']</filter>\n <actions>\n- <action name="column_names" type="metadata" default="query_name,orthologs"/>\n+ <action name="column_names" type="metadata" default="query,orth_type,species,orthologs"/>\n </actions>\n </data>\n </outputs>\n@@ -268,6 +347,37 @@\n <output name="annotations" file="scoped.emapper.annotations" ftype="tabular" compare="sim_size"/>\n <output name="annotations_orthologs" file="scoped.emapper.annotations_orthologs" ftype="tabular"/>\n </test>\n+ <test>\n+ <param name="input" value="Nmar_0135.fa" ftype="fasta"/>\n+ <param name="eggnog_data" value="@EGGNOG_DB_VERSION@"/> <!-- not passed in test, but required for test to work -->\n+ <section name="seed_ortho_options">\n+ <conditional name="ortho_method">\n+ <param name="m" value="diamond" />\n+ <param name="sensmode" value="fast" />\n+ </conditional>\n+ </section>\n+ <param name="report_orthologs" value="true"/>\n+ <param name="no_file_comments" value="true"/>\n+ <output name="seed_orthologs" file="DIA_nlim.emapper.seed_orthologs" ftype="tabular" compare="sim_size"/>\n+ <output name="annotations" file="DIA_nlim.emapper.annotations" ftype="tabular" compare="sim_size"/>\n+ <output name="annotations_orthologs" file="DIA_nlim.emapper.annotations_orthologs" ftype="tabular"/>\n+ </test>\n+ <!-- not enabled as it requires a specific .db file, hard to minimize -->\n+ <!--test>\n+ <param name="input" value="Nmar_0135.fa" ftype="fasta"/>\n+ <param name="eggnog_data" value="@EGGNOG_DB_VERSION@"/>\n+ <section name="seed_ortho_options">\n+ <conditional name="ortho_method">\n+ <param name="m" value="mmseqs" />\n+ <param name="start_sens" value="4" />\n+ </conditional>\n+ </section>\n+ <param name="report_orthologs" value="true"/>\n+ <param name="no_file_comments" value="true"/>\n+ <output name="seed_orthologs" file="DIA_nlim.emapper.seed_orthologs" ftype="tabular" compare="sim_size"/>\n+ <output name="annotations" file="DIA_nlim.emapper.annotations" ftype="tabular" compare="sim_size"/>\n+ <output name="annotations_orthologs" file="DIA_nlim.emapper.annotations_orthologs" ftype="tabular"/>\n+ </test-->\n </tests>\n <help><![CDATA[\n \n' |
b |
diff -r 4e4c6329f6cd -r 96cac424c870 test-data/DIA_nlim.emapper.annotations --- a/test-data/DIA_nlim.emapper.annotations Fri Feb 19 18:54:25 2021 +0000 +++ b/test-data/DIA_nlim.emapper.annotations Tue Jan 25 13:51:50 2022 +0000 |
b |
@@ -1,1 +1,2 @@ -Nmar_0135 436308.Nmar_0135 3.8e-149 510.8 Thaumarchaeota Archaea 41T2K@651137,COG1083@1,arCOG04817@2157 NA|NA|NA M Cytidylyltransferase +#query seed_ortholog evalue score eggNOG_OGs max_annot_lvl COG_category Description Preferred_name GOs EC KEGG_ko KEGG_Pathway KEGG_Module KEGG_Reaction KEGG_rclass BRITE KEGG_TC CAZy BiGG_Reaction PFAMs +Nmar_0135 436308.Nmar_0135 7.67e-188 503.0 COG1083@1|root,arCOG04817@2157|Archaea,41T2K@651137|Thaumarchaeota 651137|Thaumarchaeota M Cytidylyltransferase - - - ko:K07257 - - - - ko00000 - - - - |
b |
diff -r 4e4c6329f6cd -r 96cac424c870 test-data/DIA_nlim.emapper.annotations_orthologs --- a/test-data/DIA_nlim.emapper.annotations_orthologs Fri Feb 19 18:54:25 2021 +0000 +++ b/test-data/DIA_nlim.emapper.annotations_orthologs Tue Jan 25 13:51:50 2022 +0000 |
b |
@@ -1,1 +1,3 @@ -Nmar_0135 +#query orth_type species orthologs +Nmar_0135 one2one Marine Group I thaumarchaeote SCGC AB-629-I23(1131266) *ARWQ01000003_gene1537 +Nmar_0135 one2one Nitrosopumilus maritimus SCM1(436308) *Nmar_0135 |
b |
diff -r 4e4c6329f6cd -r 96cac424c870 test-data/DIA_nlim.emapper.seed_orthologs --- a/test-data/DIA_nlim.emapper.seed_orthologs Fri Feb 19 18:54:25 2021 +0000 +++ b/test-data/DIA_nlim.emapper.seed_orthologs Tue Jan 25 13:51:50 2022 +0000 |
b |
@@ -1,1 +1,2 @@ -Nmar_0135 436308.Nmar_0135 3.8e-149 510.8 +#qseqid sseqid evalue bitscore qstart qend sstart send pident qcov scov +Nmar_0135 436308.Nmar_0135 7.67e-188 503.0 1 252 1 252 100.0 100.0 100.0 |
b |
diff -r 4e4c6329f6cd -r 96cac424c870 test-data/README --- a/test-data/README Fri Feb 19 18:54:25 2021 +0000 +++ b/test-data/README Tue Jan 25 13:51:50 2022 +0000 |
b |
@@ -1,1 +1,1 @@ -cached_locally content is a reduced database, following instructions on https://github.com/galaxyproteomics/egglet +cached_locally content is a reduced database, using the eggnog_tiny.sh and eggnog_tiny_taxa.db (inspired from instructions on https://github.com/galaxyproteomics/egglet) |
b |
diff -r 4e4c6329f6cd -r 96cac424c870 test-data/cached_locally/eggnog.db |
b |
Binary file test-data/cached_locally/eggnog.db has changed |
b |
diff -r 4e4c6329f6cd -r 96cac424c870 test-data/cached_locally/eggnog.taxa.db |
b |
Binary file test-data/cached_locally/eggnog.taxa.db has changed |
b |
diff -r 4e4c6329f6cd -r 96cac424c870 test-data/cached_locally/eggnog_mapper_db_versioned.loc --- a/test-data/cached_locally/eggnog_mapper_db_versioned.loc Fri Feb 19 18:54:25 2021 +0000 +++ b/test-data/cached_locally/eggnog_mapper_db_versioned.loc Tue Jan 25 13:51:50 2022 +0000 |
b |
@@ -1,2 +1,2 @@ #value name path version -2.0 eggNOG_2.0 ${__HERE__} 2.0 +5.0.2 eggNOG_5.0.2 ${__HERE__} 5.0.2 |
b |
diff -r 4e4c6329f6cd -r 96cac424c870 test-data/eggnogg_tiny.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/eggnogg_tiny.sh Tue Jan 25 13:51:50 2022 +0000 |
b |
@@ -0,0 +1,28 @@ +#!/bin/bash + +# Script adapted from https://github.com/galaxyproteomics/egglet to produce a minimal eggnog 5.0.2 database + +sqlite3 $1 << "EOF" + +CREATE TEMP TABLE og +AS SELECT * FROM og +WHERE description = 'Cytidylyltransferase' +AND level LIKE "651137" +LIMIT 1; + +CREATE TEMP TABLE event +AS SELECT * FROM event +WHERE level=651137 +AND og='41T2K' +LIMIT 20; + +CREATE TEMP TABLE prots +AS SELECT * FROM prots +WHERE name = "436308.Nmar_0135"; + +CREATE TEMP TABLE version +AS SELECT * FROM version; + + +.backup temp eggnog_tiny.db +EOF |
b |
diff -r 4e4c6329f6cd -r 96cac424c870 test-data/eggnogg_tiny_taxa.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/eggnogg_tiny_taxa.sh Tue Jan 25 13:51:50 2022 +0000 |
b |
@@ -0,0 +1,23 @@ +#!/bin/bash + +# Script adapted from https://github.com/galaxyproteomics/egglet to produce a minimal eggnog 5.0.2 database + +sqlite3 $1 << "EOF" + +CREATE TEMP TABLE species +AS SELECT * FROM species +WHERE taxid in (1131266, 436308); + +CREATE TEMP TABLE synonym +AS SELECT * FROM synonym +WHERE taxid in (1131266, 436308); + +CREATE TEMP TABLE merged +AS SELECT * FROM merged +WHERE taxid_old in (1131266, 436308); + +CREATE TEMP TABLE stats +AS SELECT * FROM stats; + +.backup temp eggnog_tiny_taxa.db +EOF |
b |
diff -r 4e4c6329f6cd -r 96cac424c870 test-data/scoped.emapper.annotations --- a/test-data/scoped.emapper.annotations Fri Feb 19 18:54:25 2021 +0000 +++ b/test-data/scoped.emapper.annotations Tue Jan 25 13:51:50 2022 +0000 |
b |
@@ -1,1 +1,2 @@ -Nmar_0135 436308.Nmar_0135 3.8e-149 510.8 Thaumarchaeota ko:K07257 ko00000 Thaumarchaeota 41T2K@651137,COG1083@1,arCOG04817@2157 NA|NA|NA M Cytidylyltransferase +#query seed_ortholog evalue score eggNOG_OGs max_annot_lvl COG_category Description Preferred_name GOs EC KEGG_ko KEGG_Pathway KEGG_Module KEGG_Reaction KEGG_rclass BRITE KEGG_TC CAZy BiGG_Reaction PFAMs +Nmar_0135 436308.Nmar_0135 7.67e-188 503.0 COG1083@1|root,arCOG04817@2157|Archaea,41T2K@651137|Thaumarchaeota 651137|Thaumarchaeota M Cytidylyltransferase - - - ko:K07257 - - - - ko00000 - - - - |
b |
diff -r 4e4c6329f6cd -r 96cac424c870 test-data/scoped.emapper.annotations_orthologs --- a/test-data/scoped.emapper.annotations_orthologs Fri Feb 19 18:54:25 2021 +0000 +++ b/test-data/scoped.emapper.annotations_orthologs Tue Jan 25 13:51:50 2022 +0000 |
b |
@@ -1,1 +1,3 @@ -Nmar_0135 1131266.ARWQ01000003_gene1537,436308.Nmar_0135 +#query orth_type species orthologs +Nmar_0135 one2one Marine Group I thaumarchaeote SCGC AB-629-I23(1131266) *ARWQ01000003_gene1537 +Nmar_0135 one2one Nitrosopumilus maritimus SCM1(436308) *Nmar_0135 |
b |
diff -r 4e4c6329f6cd -r 96cac424c870 test-data/scoped.emapper.seed_orthologs --- a/test-data/scoped.emapper.seed_orthologs Fri Feb 19 18:54:25 2021 +0000 +++ b/test-data/scoped.emapper.seed_orthologs Tue Jan 25 13:51:50 2022 +0000 |
b |
@@ -1,1 +1,2 @@ -Nmar_0135 436308.Nmar_0135 3.8e-149 510.8 +#qseqid sseqid evalue bitscore qstart qend sstart send pident qcov scov +Nmar_0135 436308.Nmar_0135 7.67e-188 503.0 1 252 1 252 100.0 100.0 100.0 |
b |
diff -r 4e4c6329f6cd -r 96cac424c870 tool-data/eggnog_mapper_db_versioned.loc.sample --- a/tool-data/eggnog_mapper_db_versioned.loc.sample Fri Feb 19 18:54:25 2021 +0000 +++ b/tool-data/eggnog_mapper_db_versioned.loc.sample Tue Jan 25 13:51:50 2022 +0000 |
b |
@@ -3,9 +3,9 @@ # # eggnog-mapper requires the following files to be installed in the data directory: # https://github.com/jhcepas/eggnog-mapper/blob/master/data/og2level.tsv.gz -# http://eggnog5.embl.de/download/emapperdb-5.0.0/eggnog.db.gz +# http://eggnog5.embl.de/download/emapperdb-5.0.2/eggnog.db.gz # A complete diamond database is available from: -# http://eggnog5.embl.de/download/emapperdb-5.0.0/eggnog_proteins.dmnd.gz +# http://eggnog5.embl.de/download/emapperdb-5.0.2/eggnog_proteins.dmnd.gz # # The python script download_eggnog_data.py, # included with eggnog_mapper, can be used to download the files to the correct directory |