Mercurial > repos > galaxyp > data_manager_eggnog_mapper
changeset 1:077cf0a99144 draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/eggnog_mapper_data_manager commit 2200885b5049b2d952959001c8a9b5ae5c62bee5"
author | galaxyp |
---|---|
date | Fri, 04 Sep 2020 21:38:59 +0000 |
parents | 6d8144eef202 |
children | 3d82020b1e3b |
files | data_manager/data_manager_eggnog.py data_manager/data_manager_eggnog.xml data_manager/eggnog_macros.xml data_manager_conf.xml test-data/cached_locally/eggnog.db test-data/cached_locally/eggnog_mapper_db.loc test-data/cached_locally/eggnog_mapper_hmm_dbs.loc tool-data/eggnog_mapper_db.loc.sample tool-data/eggnog_mapper_hmm_dbs.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
diffstat | 11 files changed, 28 insertions(+), 207 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager/data_manager_eggnog.py Mon Nov 11 11:49:16 2019 -0500 +++ b/data_manager/data_manager_eggnog.py Fri Sep 04 21:38:59 2020 +0000 @@ -11,7 +11,7 @@ def _get_db_version(sqlitedb_path): - version = '4.5' + version = '5.0' try: query = 'select version from version' conn = sqlite3.connect(sqlitedb_path) @@ -28,7 +28,6 @@ parser = argparse.ArgumentParser() parser.add_argument('--config_file') parser.add_argument('--install_path') - parser.add_argument('--dbs', default='') args = parser.parse_args() eggnog_db_path = os.path.join(args.install_path, 'eggnog.db') @@ -46,19 +45,9 @@ data_table_entry = dict(value=db_version, name=db_version, path=args.install_path) dm_dict['data_tables'][data_table].append(data_table_entry) - data_table = 'eggnog_mapper_hmm_dbs' - dm_dict['data_tables'][data_table]\ - = dm_dict['data_tables'].get(data_table, []) - if args.dbs: - dbs = [x.strip() for x in args.dbs.split(',')] - for db in dbs: - key = '%s_%s' % (db_version, db) - data_table_entry = dict(key=key, db_version=db_version, - value=db, name=db, path=db) - dm_dict['data_tables'][data_table].append(data_table_entry) # save info to json file - open(args.config_file, 'wb').write(json.dumps(dm_dict)) + open(args.config_file, 'w').write(json.dumps(dm_dict)) if __name__ == "__main__":
--- a/data_manager/data_manager_eggnog.xml Mon Nov 11 11:49:16 2019 -0500 +++ b/data_manager/data_manager_eggnog.xml Fri Sep 04 21:38:59 2020 +0000 @@ -1,4 +1,4 @@ -<tool id="data_manager_eggnog" name="EggNOG DB Download" version="@VERSION@.1" tool_type="manage_data"> +<tool id="data_manager_eggnog" name="EggNOG DB Download" version="@VERSION@" tool_type="manage_data"> <description>eggnog data</description> <macros> <import>eggnog_macros.xml</import> @@ -19,15 +19,14 @@ <expand macro="data_manager_test"/> </tests> <help><![CDATA[ -This tool downloads eggnog data using download_eggnog_data.py -and populates the data tables: eggnog_mapper_db and eggnog_mapper_hmm_dbs. -The data is located at: http://eggnogdb.embl.de/download/emapperdb-4.5.1/ -The optional eggNOG HMM databases: http://eggnogdb.embl.de/download/emapperdb-4.5.1/hmmdb_levels/ +This tool downloads eggnog data using download_eggnog_data.py +and populates the data tables: eggnog_mapper_db. +The data is located at: http://eggnog5.embl.de/download/emapperdb-5.0.0/ can vary from 1G to 80G in size. -This will install data relative to the galaxy_data_manager_data_path declared in config/galaxy.yml +This will install data relative to the galaxy_data_manager_data_path declared in config/galaxy.yml in directory eggnog_data/<eggnog data version>/. -The eggnog version is queried from the downloaded SQlite eggnog.db from table version. +The eggnog version is queried from the downloaded SQlite eggnog.db from table version. ]]></help> <expand macro="citations"/> </tool>
--- a/data_manager/eggnog_macros.xml Mon Nov 11 11:49:16 2019 -0500 +++ b/data_manager/eggnog_macros.xml Fri Sep 04 21:38:59 2020 +0000 @@ -1,9 +1,11 @@ <?xml version="1.0"?> <macros> - <token name="@VERSION@">1.0.3</token> + <token name="@VERSION@">2.0.1</token> + <token name="@EGGNOG_DB_VERSION@">5.0</token> <xml name="citations"> <citations> <citation type="doi">10.1093/nar/gkv1248</citation> + <citation type="doi">10.1093/molbev/msx148</citation> </citations> </xml> <xml name="requirements"> @@ -16,119 +18,7 @@ </xml> <xml name="data_manager_params"> <param name="test" type="hidden" value="false" /> - <param name="diamond_database" type="boolean" truevalue="" falsevalue="-D" checked="true" label="Install the diamond database"/> - <param argument="dbs" type="select" multiple="true" label="eggNOG HMM databases to download. If none are selected only diamond can be used'"> - <option value="arch" selected="true">Archea arch_1 (arch)</option> - <option value="bact" selected="true">Bacteria bact_50 (bact)</option> - <option value="euk" selected="true">Eukaryote euk_500 (euk)</option> - <option value="NOG" selected="true">All organisms (NOG)</option> - <option value="aciNOG">Acidobacteria (aciNOG)</option> - <option value="acidNOG">Acidobacteriia (acidNOG)</option> - <option value="acoNOG">Aconoidasida (acoNOG)</option> - <option value="actNOG">Actinobacteria (actNOG)</option> - <option value="agaNOG">Agaricales (agaNOG)</option> - <option value="agarNOG">Agaricomycetes (agarNOG)</option> - <option value="apiNOG">Apicomplexa (apiNOG)</option> - <option value="aproNOG">Proteobacteria_alpha (aproNOG)</option> - <option value="aquNOG">Aquificae (aquNOG)</option> - <option value="arNOG">Archaea (arNOG)</option> - <option value="arcNOG">Archaeoglobi (arcNOG)</option> - <option value="artNOG">Arthropoda (artNOG)</option> - <option value="arthNOG">Arthrodermataceae (arthNOG)</option> - <option value="ascNOG">Ascomycota (ascNOG)</option> - <option value="aveNOG">Aves (aveNOG)</option> - <option value="bacNOG">Bacilli (bacNOG)</option> - <option value="bactNOG">Bacteria (bactNOG)</option> - <option value="bacteNOG">Bacteroidia (bacteNOG)</option> - <option value="basNOG">Basidiomycota (basNOG)</option> - <option value="bctoNOG">Bacteroidetes (bctoNOG)</option> - <option value="biNOG">Bilateria (biNOG)</option> - <option value="bproNOG">Proteobacteria_beta (bproNOG)</option> - <option value="braNOG">Brassicales (braNOG)</option> - <option value="carNOG">Carnivora (carNOG)</option> - <option value="chaNOG">Chaetomiaceae (chaNOG)</option> - <option value="chlNOG">Chlorobi (chlNOG)</option> - <option value="chlaNOG">Chlamydiae (chlaNOG)</option> - <option value="chloNOG">Chloroflexi (chloNOG)</option> - <option value="chlorNOG">Chloroflexi (chlorNOG)</option> - <option value="chloroNOG">Chlorophyta (chloroNOG)</option> - <option value="chorNOG">Chordata (chorNOG)</option> - <option value="chrNOG">Chromadorea (chrNOG)</option> - <option value="cloNOG">Clostridia (cloNOG)</option> - <option value="cocNOG">Coccidia (cocNOG)</option> - <option value="creNOG">Crenarchaeota (creNOG)</option> - <option value="cryNOG">Cryptosporidiidae (cryNOG)</option> - <option value="cyaNOG">Cyanobacteria (cyaNOG)</option> - <option value="cytNOG">Cytophagia (cytNOG)</option> - <option value="debNOG">Debaryomycetaceae (debNOG)</option> - <option value="defNOG">Deferribacteres (defNOG)</option> - <option value="dehNOG">Dehalococcoidetes (dehNOG)</option> - <option value="deiNOG">Deinococcusthermus (deiNOG)</option> - <option value="delNOG">delta/epsilon (delNOG)</option> - <option value="dipNOG">Diptera (dipNOG)</option> - <option value="dotNOG">Dothideomycetes (dotNOG)</option> - <option value="dproNOG">Proteobacteria_delta (dproNOG)</option> - <option value="droNOG">Drosophilidae (droNOG)</option> - <option value="eproNOG">Proteobacteria_epsilon (eproNOG)</option> - <option value="eryNOG">Erysipelotrichi (eryNOG)</option> - <option value="euNOG">Eukaryotes (euNOG)</option> - <option value="eurNOG">Euryarchaeota (eurNOG)</option> - <option value="euroNOG">Eurotiomycetes (euroNOG)</option> - <option value="eurotNOG">Eurotiales (eurotNOG)</option> - <option value="fiNOG">Fishes (fiNOG)</option> - <option value="firmNOG">Firmicutes (firmNOG)</option> - <option value="flaNOG">Flavobacteriia (flaNOG)</option> - <option value="fuNOG">Fungi (fuNOG)</option> - <option value="fusoNOG">Fusobacteria (fusoNOG)</option> - <option value="gproNOG">Proteobacteria_gamma (gproNOG)</option> - <option value="haeNOG">Haemosporida (haeNOG)</option> - <option value="halNOG">Halobacteria (halNOG)</option> - <option value="homNOG">Hominidae (homNOG)</option> - <option value="hymNOG">Hymenoptera (hymNOG)</option> - <option value="hypNOG">Hypocreales (hypNOG)</option> - <option value="inNOG">Insects (inNOG)</option> - <option value="kinNOG">Kinetoplastida (kinNOG)</option> - <option value="lepNOG">Lepidoptera (lepNOG)</option> - <option value="lilNOG">Liliopsida (lilNOG)</option> - <option value="maNOG">Mammals (maNOG)</option> - <option value="magNOG">Magnaporthales (magNOG)</option> - <option value="meNOG">Animals (meNOG)</option> - <option value="metNOG">Methanobacteria (metNOG)</option> - <option value="methNOG">Methanococci (methNOG)</option> - <option value="methaNOG">Methanomicrobia (methaNOG)</option> - <option value="necNOG">Nectriaceae (necNOG)</option> - <option value="negNOG">Negativicutes (negNOG)</option> - <option value="nemNOG">Nematodes (nemNOG)</option> - <option value="onyNOG">Onygenales (onyNOG)</option> - <option value="opiNOG">Opisthokonts (opiNOG)</option> - <option value="perNOG">Peronosporales (perNOG)</option> - <option value="plaNOG">Planctomycetes (plaNOG)</option> - <option value="pleNOG">Pleosporales (pleNOG)</option> - <option value="poaNOG">Poales (poaNOG)</option> - <option value="prNOG">Primates (prNOG)</option> - <option value="proNOG">Proteobacteria (proNOG)</option> - <option value="rhaNOG">Rhabditida (rhaNOG)</option> - <option value="roNOG">Rodents (roNOG)</option> - <option value="sacNOG">Saccharomycetaceae (sacNOG)</option> - <option value="saccNOG">Saccharomycetes (saccNOG)</option> - <option value="sorNOG">Sordariales (sorNOG)</option> - <option value="sordNOG">Sordariomycetes (sordNOG)</option> - <option value="sphNOG">Sphingobacteriia (sphNOG)</option> - <option value="spiNOG">Spirochaetes (spiNOG)</option> - <option value="spriNOG">Supraprimates (spriNOG)</option> - <option value="strNOG">Streptophyta (strNOG)</option> - <option value="synNOG">Synergistetes (synNOG)</option> - <option value="tenNOG">Tenericutes (tenNOG)</option> - <option value="thaNOG">Thaumarchaeota (thaNOG)</option> - <option value="theNOG">Thermoplasmata (theNOG)</option> - <option value="therNOG">Thermotogae (therNOG)</option> - <option value="thermNOG">Thermococci (thermNOG)</option> - <option value="treNOG">Tremellales (treNOG)</option> - <option value="veNOG">Vertebrates (veNOG)</option> - <option value="verNOG">Verrucomicrobia (verNOG)</option> - <option value="verrNOG">Verrucomicrobiae (verrNOG)</option> - <option value="virNOG">Viridiplantae (virNOG)</option> - </param> + <param name="diamond_database" type="boolean" truevalue="" falsevalue="-D" checked="true" label="Install the diamond database" help="Takes ~9Gb, you most probably want it."/> </xml> <xml name="data_manager_outputs"> <outputs> @@ -141,33 +31,26 @@ #import os.path #set $install_path = $os.path.join($os.path.dirname($__tool_directory__), 'test-data/cached_locally') #end if -#if $dbs: -#set $eggnogdbs = ' '.join(str($dbs).split(',')) -#else -#set $eggnogdbs = 'none' -#end if mkdir -p '${install_path}' && -download_eggnog_data.py - $diamond_database -y -q +download_eggnog_data.py + $diamond_database -y -q #if $test == 'true' -s #end if - --data_dir '$install_path' - $eggnogdbs && -python '${__tool_directory__}/data_manager_eggnog.py' --config_file '$out_file' --install_path '$install_path' --dbs '$dbs' + --data_dir '$install_path' && +python '${__tool_directory__}/data_manager_eggnog.py' --config_file '$out_file' --install_path '$install_path' ]]></token> <xml name="data_manager_test"> - <!-- <test> <param name="test" value="true"/> - <param name="diamond_database" value="false"/> - <param name="dbs" value="thaNOG"/> + <param name="diamond_database" value="true"/> + <yield /> <output name="out_file"> <assert_contents> <has_text text="eggnog_mapper_db" /> + <has_text text="@EGGNOG_DB_VERSION@" /> </assert_contents> </output> </test> - --> </xml> </macros>
--- a/data_manager_conf.xml Mon Nov 11 11:49:16 2019 -0500 +++ b/data_manager_conf.xml Fri Sep 04 21:38:59 2020 +0000 @@ -14,14 +14,5 @@ </column> </output> </data_table> - <data_table name="eggnog_mapper_hmm_dbs"> <!-- Defines a Data Table to be modified. --> - <output> <!-- Handle the output of the Data Manager Tool --> - <column name="key" /> <!-- columns that are going to be specified by the Data Manager Tool --> - <column name="db_version" /> <!-- columns that are going to be specified by the Data Manager Tool --> - <column name="value" /> <!-- columns that are going to be specified by the Data Manager Tool --> - <column name="name" /> <!-- columns that are going to be specified by the Data Manager Tool --> - <column name="path" /> <!-- columns that are going to be specified by the Data Manager Tool --> - </output> - </data_table> </data_manager> </data_managers>
--- a/test-data/cached_locally/eggnog_mapper_db.loc Mon Nov 11 11:49:16 2019 -0500 +++ b/test-data/cached_locally/eggnog_mapper_db.loc Fri Sep 04 21:38:59 2020 +0000 @@ -1,2 +1,2 @@ #value name path -4.5 eggNOG_4.5 ${__HERE__} +5.0 eggNOG_5.0 ${__HERE__}
--- a/test-data/cached_locally/eggnog_mapper_hmm_dbs.loc Mon Nov 11 11:49:16 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ -#key db_version value name path -4.5_ENOG411CB2I 4.5 ENOG411CB2I ENOG411CB2I ${__HERE__}/hmmdb_levels/ENOG411CB2I/ENOG411CB2I
--- a/tool-data/eggnog_mapper_db.loc.sample Mon Nov 11 11:49:16 2019 -0500 +++ b/tool-data/eggnog_mapper_db.loc.sample Fri Sep 04 21:38:59 2020 +0000 @@ -1,25 +1,22 @@ #This is a sample file distributed with Galaxy that enables tools -#to use a directory of eggnog_mapper data files. +#to use a directory of eggnog_mapper data files. # # eggnog-mapper requires the following files to be installed in the data directory: # https://github.com/jhcepas/eggnog-mapper/blob/master/data/og2level.tsv.gz -# http://eggnogdb.embl.de/download/eggnog_4.5/eggnog-mapper-data/eggnog.db.gz -# http://eggnogdb.embl.de/download/eggnog_4.5/eggnog-mapper-data/OG_fasta.tar.gz -# In addition individual HMM DBs can be installed from: -# http://eggnogdb.embl.de/download/eggnog_4.5/eggnog-mapper-data/hmmdb_levels/ +# http://eggnog5.embl.de/download/emapperdb-5.0.0/eggnog.db.gz # A complete diamond database is available from: -# http://eggnogdb.embl.de/download/eggnog_4.5/eggnog-mapper-data/eggnog_proteins.dmnd.gz +# http://eggnog5.embl.de/download/emapperdb-5.0.0/eggnog_proteins.dmnd.gz # -# The python script download_eggnog_data.py, +# The python script download_eggnog_data.py, # included with eggnog_mapper, can be used to download the files to the correct directory # # The near-equivalence of columns "value" and "db" is needed for the tests to work, # and for the setting of --data_dir to the parent directory of eggnog.db -# The complicated eggNOG database structure makes passing custom HMM databases somewhat tricky. +# The complicated eggNOG database structure makes passing custom HMM databases somewhat tricky. # See test-data/cached_locally/eggnog_mapper.loc for how this was done with the included test databases -# In all other cases, when the appropriate HMM database (for example, "thaNOG") was downloaded from eggnogdb.embl.de, +# In all other cases, when the appropriate HMM database (for example, "thaNOG") was downloaded from eggnogdb.embl.de, # value and db should be the same (in the example, both should be "thaNOG") # # -#db_version name path -#4.5.1 eggnog_4.5.1 /path/to/directory/that/contains/eggnog.db +#db_version name path +#5.0 eggnog_5.0 /path/to/directory/that/contains/eggnog.db
--- a/tool-data/eggnog_mapper_hmm_dbs.loc.sample Mon Nov 11 11:49:16 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,28 +0,0 @@ -#This is a sample file distributed with Galaxy that enables tools -#to use a directory of eggnog_mapper data files. -# -# eggnog-mapper requires the following files to be installed in the data directory: -# https://github.com/jhcepas/eggnog-mapper/blob/master/data/og2level.tsv.gz -# http://eggnogdb.embl.de/download/eggnog_4.5/eggnog-mapper-data/eggnog.db.gz -# http://eggnogdb.embl.de/download/eggnog_4.5/eggnog-mapper-data/OG_fasta.tar.gz -# In addition individual HMM DBs can be installed from: -# http://eggnogdb.embl.de/download/eggnog_4.5/eggnog-mapper-data/hmmdb_levels/ -# A complete diamond database is available from: -# http://eggnogdb.embl.de/download/eggnog_4.5/eggnog-mapper-data/eggnog_proteins.dmnd.gz -# -# The python script download_eggnog_data.py, -# included with eggnog_mapper, can be used to download the files to the correct directory -# -# The near-equivalence of columns "value" and "db" is needed for the tests to work, -# and for the setting of --data_dir to the parent directory of eggnog.db -# The complicated eggNOG database structure makes passing custom HMM databases somewhat tricky. -# See test-data/cached_locally/eggnog_mapper.loc for how this was done with the included test databases -# In all other cases, when the appropriate HMM database (for example, "thaNOG") was downloaded from eggnogdb.embl.de, -# value and db should be the same (in the example, both should be "thaNOG") -# -# -#key db_version value name path -#4.5.1_NOG 4.5.1 NOG Full eggNOG database (NOG) -#4.5.1_euk 4.5.1 euk Eukaryotes (euk) -#4.5.1_aproNOG 4.5.1 aproNOG Proteobacteria_alpha (aproNOG) -#4.5.1_aproNOG 4.5.1 ENOG411CB2I ENOG411CB2I (custom) /path/to/custom/hmmdb/ENOG411CB2I
--- a/tool_data_table_conf.xml.sample Mon Nov 11 11:49:16 2019 -0500 +++ b/tool_data_table_conf.xml.sample Fri Sep 04 21:38:59 2020 +0000 @@ -4,8 +4,4 @@ <columns>value,name,path</columns> <file path="tool-data/eggnog_mapper_db.loc" /> </table> - <table name="eggnog_mapper_hmm_dbs" comment_char="#" allow_duplicate_entries="False"> - <columns>key,db_version,value,name,path</columns> - <file path="tool-data/eggnog_mapper_hmm_dbs.loc" /> - </table> </tables>
--- a/tool_data_table_conf.xml.test Mon Nov 11 11:49:16 2019 -0500 +++ b/tool_data_table_conf.xml.test Fri Sep 04 21:38:59 2020 +0000 @@ -4,8 +4,4 @@ <columns>value,name,path</columns> <file path="${__HERE__}/test-data/cached_locally/eggnog_mapper_db.loc" /> </table> - <table name="eggnog_mapper_hmm_dbs" comment_char="#"> - <columns>key,db_version,value,name,path</columns> - <file path="${__HERE__}/test-data/cached_locally/eggnog_mapper_hmm_dbs.loc" /> - </table> </tables>