Next changeset 1:9e86c09a6cae (2017-06-23) |
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_qiime_database_downloader commit 4934eb34300b5fa54d62d8b67e5b6e989e963ac9 |
added:
data_manager/data_manager_qiime_download.py data_manager/data_manager_qiime_download.xml data_manager_conf.xml tool-data/qiime_rep_set.loc.sample tool-data/qiime_rep_set_aligned.loc.sample tool-data/qiime_taxonomy.loc.sample tool-data/qiime_trees.loc.sample tool_data_table_conf.xml.sample |
b |
diff -r 000000000000 -r f8608fddfb23 data_manager/data_manager_qiime_download.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/data_manager_qiime_download.py Mon May 15 11:08:43 2017 -0400 |
[ |
b'@@ -0,0 +1,378 @@\n+#!/usr/bin/env python\n+# Data manager for reference data for the QIIME Galaxy tools\n+\n+import argparse\n+import ftplib\n+import json\n+import os\n+import tarfile\n+import zipfile\n+\n+import requests\n+\n+\n+protocol = {\n+ "unite": "http",\n+ "greengenes": "ftp",\n+ "silva": "http",\n+ "img": "ftp"\n+}\n+baseUrl = {\n+ "unite": "http://unite.ut.ee/sh_files/sh_qiime_release_",\n+ "greengenes": "greengenes.microbio.me",\n+ "silva": "http://www.arb-silva.de/fileadmin/silva_databases/qiime/Silva_",\n+ "img": "ftp.microbio.me"\n+}\n+ftp_dir = {\n+ "greengenes": "/greengenes_release/gg_",\n+ "img": ""\n+}\n+ftp_file_prefix = {\n+ "greengenes": "gg_",\n+ "img": ""\n+}\n+ftp_file_suffix = {\n+ "greengenes": "_otus",\n+ "img": ""\n+}\n+extension = {\n+ "unite": "zip",\n+ "greengenes": "tar.gz",\n+ "silva": {\n+ "104_release": "tgz",\n+ "108_release": "tgz",\n+ "108_release_curated": "tgz",\n+ "111_release": "tgz",\n+ "119_consensus_majority_taxonomy": "zip",\n+ "119_release": "zip",\n+ "119_release_aligned_rep_files": "tar.gz",\n+ "123_release": "zip",\n+ "128_release": "tgz"},\n+ "img": "tgz"\n+}\n+filetypes = ["rep_set", "rep_set_aligned", "taxonomy", "trees"]\n+\n+\n+# Utility functions for interacting with Galaxy JSON\n+def read_input_json(jsonfile):\n+ """Read the JSON supplied from the data manager tool\n+\n+ Returns a tuple (param_dict,extra_files_path)\n+\n+ \'param_dict\' is an arbitrary dictionary of parameters\n+ input into the tool; \'extra_files_path\' is the path\n+ to a directory where output files must be put for the\n+ receiving data manager to pick them up.\n+\n+ NB the directory pointed to by \'extra_files_path\'\n+ doesn\'t exist initially, it is the job of the script\n+ to create it if necessary.\n+\n+ """\n+ params = json.loads(open(jsonfile).read())\n+ return (params[\'param_dict\'],\n+ params[\'output_data\'][0][\'extra_files_path\'])\n+\n+\n+# Utility functions for creating data table dictionaries\n+#\n+# Example usage:\n+# >>> d = create_data_tables_dict()\n+# >>> add_data_table(d,\'my_data\')\n+# >>> add_data_table_entry(dict(dbkey=\'hg19\',value=\'human\'))\n+# >>> add_data_table_entry(dict(dbkey=\'mm9\',value=\'mouse\'))\n+# >>> print str(json.dumps(d))\n+def create_data_tables_dict():\n+ """Return a dictionary for storing data table information\n+\n+ Returns a dictionary that can be used with \'add_data_table\'\n+ and \'add_data_table_entry\' to store information about a\n+ data table. It can be converted to JSON to be sent back to\n+ the data manager.\n+\n+ """\n+ d = {}\n+ d[\'data_tables\'] = {}\n+ return d\n+\n+\n+def add_data_table(d, table):\n+ """Add a data table to the data tables dictionary\n+\n+ Creates a placeholder for a data table called \'table\'.\n+\n+ """\n+ d[\'data_tables\'][table] = []\n+\n+\n+def add_data_table_entry(d, table, entry):\n+ """Add an entry to a data table\n+\n+ Appends an entry to the data table \'table\'. \'entry\'\n+ should be a dictionary where the keys are the names of\n+ columns in the data table.\n+\n+ Raises an exception if the named data table doesn\'t\n+ exist.\n+\n+ """\n+ try:\n+ d[\'data_tables\'][table].append(entry)\n+ except KeyError:\n+ raise Exception("add_data_table_entry: no table \'%s\'" % table)\n+\n+\n+def get_ftp_file(ftp, filename):\n+ """\n+ """\n+ try:\n+ ftp.retrbinary("RETR " + filename, open(filename, \'wb\').write)\n+ except:\n+ print("Error")\n+\n+\n+def download_archive(db, version, ext):\n+ """\n+\n+ """\n+ filepath = "%s_%s.%s" % (db, version, ext)\n+ if protocol[db] == "http":\n+ url = "%s%s.%s" % (baseUrl[db], version, ext)\n+ r = requests.get(url, stream=True)\n+ r.raise_for_status()\n+ with open(filepath, "wb") as fd:\n+ for chunk in r.iter_content(chunk_size=128):\n+ fd.write(chunk)\n+ elif protocol[db] == "ftp":\n+ ftp = ftplib.FTP(baseUrl[db])\n+ ftp'..b' content_name_prefix,\n+ data_tables,\n+ target_dir,\n+ filetype)\n+ else:\n+ move_file(\n+ content_path,\n+ content_filename_prefix,\n+ content_name_prefix,\n+ data_tables,\n+ target_dir,\n+ filetype)\n+\n+\n+def move_files(archive_content_path, filename_prefix, name_prefix, data_tables, target_dir, db, version):\n+ """\n+ """\n+ for filetype in filetypes:\n+ if filetype == "rep_set_aligned":\n+ if db == "greengenes" and version == "12_10":\n+ continue\n+ filetype_target_dir = os.path.join(\n+ target_dir,\n+ filetype)\n+ filetype_path = os.path.join(\n+ archive_content_path,\n+ filetype)\n+ move_dir_content(\n+ filetype_path,\n+ filename_prefix,\n+ name_prefix,\n+ data_tables,\n+ filetype_target_dir,\n+ filetype)\n+\n+\n+def download_db(data_tables, db, version, target_dir):\n+ """Download QIIME database\n+\n+ Creates references to the specified file(s) on the Galaxy\n+ server in the appropriate data table (determined from the\n+ file extension).\n+\n+ The \'data_tables\' dictionary should have been created using\n+ the \'create_data_tables_dict\' and \'add_data_table\' functions.\n+\n+ Arguments:\n+ data_tables: a dictionary containing the data table info\n+ db: name of the database\n+ version: version of the database\n+ table_name: name of the table\n+ target_dir: directory to put copy or link to the data file\n+\n+ """\n+ ext = extension[db]\n+ if db == "silva":\n+ ext = ext[version]\n+\n+ print("Download archive")\n+ filepath = download_archive(db, version, ext)\n+\n+ print("Extract archive %s" % filepath)\n+ archive_content_path = extract_archive(filepath, ext, db)\n+\n+ print("Moving file from %s" % archive_content_path)\n+ filename_prefix = "%s_%s" % (db, version)\n+ name_prefix = "%s (%s)" % (db, version)\n+ if db == "greengenes" or db == "silva":\n+ move_files(\n+ archive_content_path,\n+ filename_prefix,\n+ name_prefix,\n+ data_tables,\n+ target_dir,\n+ db,\n+ version)\n+ elif db == "unite":\n+ move_unite_files(\n+ archive_content_path,\n+ filename_prefix,\n+ name_prefix,\n+ data_tables,\n+ target_dir)\n+\n+\n+if __name__ == "__main__":\n+ print("Starting...")\n+\n+ # Read command line\n+ parser = argparse.ArgumentParser(\n+ description=\'Download QIIME reference database\')\n+ parser.add_argument(\'--database\', help="Database name")\n+ parser.add_argument(\'--version\', help="Database version")\n+ parser.add_argument(\'--jsonfile\', help="Output JSON file")\n+ args = parser.parse_args()\n+\n+ jsonfile = args.jsonfile\n+\n+ # Read the input JSON\n+ params, target_dir = read_input_json(jsonfile)\n+\n+ # Make the target directory\n+ print("Making %s" % target_dir)\n+ os.mkdir(target_dir)\n+ os.mkdir(os.path.join(target_dir, "rep_set"))\n+ os.mkdir(os.path.join(target_dir, "rep_set_aligned"))\n+ os.mkdir(os.path.join(target_dir, "taxonomy"))\n+ os.mkdir(os.path.join(target_dir, "trees"))\n+\n+ # Set up data tables dictionary\n+ data_tables = create_data_tables_dict()\n+ add_data_table(data_tables, "qiime_rep_set")\n+ add_data_table(data_tables, "qiime_rep_set_aligned")\n+ add_data_table(data_tables, "qiime_taxonomy")\n+ add_data_table(data_tables, "qiime_trees")\n+\n+ # Fetch data from specified data sources\n+ download_db(\n+ data_tables,\n+ args.database,\n+ args.version,\n+ target_dir)\n+\n+ # Write output JSON\n+ print("Outputting JSON")\n+ print(str(json.dumps(data_tables)))\n+ with open(jsonfile, \'w\') as out:\n+ json.dump(data_tables, out)\n+ print("Done.")\n' |
b |
diff -r 000000000000 -r f8608fddfb23 data_manager/data_manager_qiime_download.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/data_manager_qiime_download.xml Mon May 15 11:08:43 2017 -0400 |
[ |
@@ -0,0 +1,95 @@ +<tool id="data_manager_qiime_download" name="Download QIIME" version="1.9.1" tool_type="manage_data"> + <description>reference databases</description> + <requirements> + <requirement type="package" version="2.13.0">requests</requirement> + </requirements> + <stdio> + <exit_code range=":-1" level="fatal" description="Error: Cannot open file" /> + <exit_code range="1:" level="fatal" description="Error" /> + </stdio> + <command><![CDATA[ + python '$__tool_directory__/data_manager_qiime_download.py' + --database '$db.database' + --version '$db.version' + --jsonfile '${out_file}' + ]]></command> + <inputs> + <conditional name="db"> + <param name="database" type="select" label="Database to download"> + <option value="greengenes" selected="true">Greengenes OTUs</option> + <option value="silva">SILVA OTUs (16S/18S)</option> + <option value="unite">UNITE OTUs (ITS)</option> + <!--<option value="img">IMG/QIIME reference protein sequences</option>--> + </param> + <when value="greengenes"> + <param name="version" type="select" label="Version of Greengenes"> + <option value="13_8" selected="true">13.8</option> + <option value="13_5">13.5</option> + <option value="12_10">12.10</option> + </param> + </when> + <when value="silva"> + <param name="version" type="select" label="Version of SILVA OTUs"> + <option value="128_release" selected="true">128</option> + <option value="123_release">123</option> + <!--<option value="119_release_aligned_rep_files">119 (aligned rep)</option> + <option value="119_release">119</option> + <option value="119_consensus_majority_taxonomy">119 (consensus majority taxonomy)</option> + <option value="111_release">111</option> + <option value="108_release">108</option> + <option value="108_release_curated">108 (curated)</option> + <option value="104_release">104</option>--> + </param> + </when> + <when value="unite"> + <param name="version" type="select" label="Version of UNITE OTUs"> + <option value="20.11.2016">7.1 (2016-11-20, with singletons set as RefS)</option> + <option value="s_20.11.2016">7.1 (2016-11-20, with global and 97% singletons)</option> + <option value="22.08.2016">7.1 (2016-08-22, with singletons set as RefS)</option> + <option value="s_22.08.2016">7.1 (2016-08-22, with global and 97% singletons)</option> + <option value="31.01.2016">7.0 (2016-01-31, with singletons set as RefS)</option> + <option value="s_31.01.2016">7.0 (2016-01-31, with global and 97% singletons)</option> + <option value="01.08.2015">7.0 (2015-08-01, with singletons set as RefS)</option> + <option value="s_01.08.2015">7.0 (2015-08-01, with global and 97% singletons)</option> + <option value="02.03.2015">7.0 (2015-03-02, with singletons set as RefS)</option> + <option value="s_02.03.2015">7.0 (2015-03-02, with global and 97% singletons)</option> + <option value="30.12.2014">6.0 (2014-12-30, with singletons set as RefS)</option> + <option value="s_30.12.2014">6.0 (2014-12-30, with global and 97% singletons)</option> + <option value="10.09.2014">6.0 (2014-09-10, with singletons set as RefS)</option> + <option value="s_10.09.2014">6.0 (2014-09-10, with global and 97% singletons)</option> + <option value="04.07.2014">6.0 (2014-07-04, with singletons set as RefS)</option> + <option value="s_04.07.2014">6.0 (2014-07-04, with global and 97% singletons)</option> + <option value="13.05.2014">6.0 (2014-05-13, with singletons set as RefS)</option> + <option value="s_13.05.2014">6.0 (2014-05-13, with global and 97% singletons)</option> + <option value="09.02.2014">6.0 (2014-02-09, with singletons set as RefS)</option> + <option value="s_09.02.2014">6.0 (2014-02-09, with global and 97% singletons)</option> + <option value="15.01.2014">6.0 (2014-01-15, with singletons set as RefS)</option> + <option value="s_15.01.2014">6.0 (2014-01-15, with global and 97% singletons)</option> + <option value="19.12.2013">6.0 (2013-12-19, with singletons set as RefS)</option> + <option value="s_19.12.2013">6.0 (2013-12-19, with global and 97% singletons)</option> + <option value="08.12.2013">6.0 (2013-12-08, with singletons set as RefS)</option> + <option value="s_08.12.2013">6.0 (2013-12-08, with global and 97% singletons)</option> + <option value="15.10.2013">5.0 (2013-10-15, with singletons set as RefS)</option> + <option value="s_15.10.2013">5.0 (2013-10-15, with global and 97% singletons)</option> + </param> + </when> + <!--<when value="img"> + <param name="version" type="select" label="Version of IMG/QIIME reference protein sequences"> + <option value="img-qiime-25oct2012" selected="true">img-qiime-25oct2012</option> + </param> + </when>--> + </conditional> + </inputs> + <outputs> + <data name="out_file" format="data_manager_json" label="${tool.name}"/> + </outputs> + <tests> + </tests> + <help><![CDATA[ +This tool downloads the reference databases for QIIME + ]]></help> + <citations> + <citation type="doi">10.1038/nmeth.f.303</citation> + <yield /> + </citations> +</tool> \ No newline at end of file |
b |
diff -r 000000000000 -r f8608fddfb23 data_manager_conf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_conf.xml Mon May 15 11:08:43 2017 -0400 |
b |
@@ -0,0 +1,65 @@ +<?xml version="1.0"?> +<data_managers> + <data_manager tool_file="data_manager/data_manager_qiime_download.xml" id="data_manager_qiime_download" > + <data_table name="qiime_rep_set"> <!-- Defines a Data Table to be modified. --> + <output> <!-- Handle the output of the Data Manager Tool --> + <column name="value" /> <!-- columns that are going to be specified by the Data Manager Tool --> + <column name="name" /> <!-- columns that are going to be specified by the Data Manager Tool --> + <column name="dbkey" /> <!-- columns that are going to be specified by the Data Manager Tool --> + <column name="path" output_ref="out_file" > + <move type="file"> + <source>${path}</source> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">qiime_rep_set/${dbkey}</target> + </move> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/qiime_rep_set/${dbkey}</value_translation> + <value_translation type="function">abspath</value_translation> + </column> + </output> + </data_table> + <data_table name="qiime_rep_set_aligned"> <!-- Defines a Data Table to be modified. --> + <output> <!-- Handle the output of the Data Manager Tool --> + <column name="value" /> <!-- columns that are going to be specified by the Data Manager Tool --> + <column name="name" /> <!-- columns that are going to be specified by the Data Manager Tool --> + <column name="dbkey" /> <!-- columns that are going to be specified by the Data Manager Tool --> + <column name="path" output_ref="out_file" > + <move type="file"> + <source>${path}</source> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">qiime_rep_set_aligned/${dbkey}</target> + </move> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/qiime_rep_set_aligned/${dbkey}</value_translation> + <value_translation type="function">abspath</value_translation> + </column> + </output> + </data_table> + <data_table name="qiime_taxonomy"> <!-- Defines a Data Table to be modified. --> + <output> <!-- Handle the output of the Data Manager Tool --> + <column name="value" /> <!-- columns that are going to be specified by the Data Manager Tool --> + <column name="name" /> <!-- columns that are going to be specified by the Data Manager Tool --> + <column name="dbkey" /> <!-- columns that are going to be specified by the Data Manager Tool --> + <column name="path" output_ref="out_file" > + <move type="file"> + <source>${path}</source> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">qiime_taxonomy/${dbkey}</target> + </move> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/qiime_taxonomy/${dbkey}</value_translation> + <value_translation type="function">abspath</value_translation> + </column> + </output> + </data_table> + <data_table name="qiime_trees"> <!-- Defines a Data Table to be modified. --> + <output> <!-- Handle the output of the Data Manager Tool --> + <column name="value" /> <!-- columns that are going to be specified by the Data Manager Tool --> + <column name="name" /> <!-- columns that are going to be specified by the Data Manager Tool --> + <column name="dbkey" /> <!-- columns that are going to be specified by the Data Manager Tool --> + <column name="path" output_ref="out_file" > + <move type="file"> + <source>${path}</source> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">qiime_trees/${dbkey}</target> + </move> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/qiime_trees/${dbkey}</value_translation> + <value_translation type="function">abspath</value_translation> + </column> + </output> + </data_table> + </data_manager> +</data_managers> |
b |
diff -r 000000000000 -r f8608fddfb23 tool-data/qiime_rep_set.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/qiime_rep_set.loc.sample Mon May 15 11:08:43 2017 -0400 |
b |
@@ -0,0 +1,6 @@ +#<unique_id> <name> <database_caption> <fasta_file_path> +# +#For each reference database, you need to download the fasta file in qiime path +# +#List of useful db for qiime: http://qiime.org/home_static/dataFiles.html +# \ No newline at end of file |
b |
diff -r 000000000000 -r f8608fddfb23 tool-data/qiime_rep_set_aligned.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/qiime_rep_set_aligned.loc.sample Mon May 15 11:08:43 2017 -0400 |
b |
@@ -0,0 +1,6 @@ +#<unique_id> <name> <database_caption> <fasta_file_path> +# +#For each reference database, you need to download the fasta file in qiime path +# +#List of useful db for qiime: http://qiime.org/home_static/dataFiles.html +# \ No newline at end of file |
b |
diff -r 000000000000 -r f8608fddfb23 tool-data/qiime_taxonomy.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/qiime_taxonomy.loc.sample Mon May 15 11:08:43 2017 -0400 |
b |
@@ -0,0 +1,6 @@ +#<unique_id> <name> <database_caption> <fasta_file_path> +# +#For each reference database, you need to download the fasta file in qiime path +# +#List of useful db for qiime: http://qiime.org/home_static/dataFiles.html +# \ No newline at end of file |
b |
diff -r 000000000000 -r f8608fddfb23 tool-data/qiime_trees.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/qiime_trees.loc.sample Mon May 15 11:08:43 2017 -0400 |
b |
@@ -0,0 +1,6 @@ +#<unique_id> <name> <database_caption> <fasta_file_path> +# +#For each reference database, you need to download the fasta file in qiime path +# +#List of useful db for qiime: http://qiime.org/home_static/dataFiles.html +# \ No newline at end of file |
b |
diff -r 000000000000 -r f8608fddfb23 tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Mon May 15 11:08:43 2017 -0400 |
b |
@@ -0,0 +1,18 @@ +<tables> + <table name="qiime_rep_set" comment_char="#"> + <columns>value, name, dbkey, path</columns> + <file path="tool-data/qiime_rep_set.loc" /> + </table> + <table name="qiime_rep_set_aligned" comment_char="#"> + <columns>value, name, dbkey, path</columns> + <file path="tool-data/qiime_rep_set_aligned.loc" /> + </table> + <table name="qiime_taxonomy" comment_char="#"> + <columns>value, name, dbkey, path</columns> + <file path="tool-data/qiime_taxonomy.loc" /> + </table> + <table name="qiime_trees" comment_char="#"> + <columns>value, name, dbkey, path</columns> + <file path="tool-data/qiime_trees.loc" /> + </table> +</tables> \ No newline at end of file |