Repository 'data_manager_qiime_database_downloader'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/data_manager_qiime_database_downloader

Changeset 0:f8608fddfb23 (2017-05-15)
Next changeset 1:9e86c09a6cae (2017-06-23)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_qiime_database_downloader commit 4934eb34300b5fa54d62d8b67e5b6e989e963ac9
added:
data_manager/data_manager_qiime_download.py
data_manager/data_manager_qiime_download.xml
data_manager_conf.xml
tool-data/qiime_rep_set.loc.sample
tool-data/qiime_rep_set_aligned.loc.sample
tool-data/qiime_taxonomy.loc.sample
tool-data/qiime_trees.loc.sample
tool_data_table_conf.xml.sample
b
diff -r 000000000000 -r f8608fddfb23 data_manager/data_manager_qiime_download.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/data_manager_qiime_download.py Mon May 15 11:08:43 2017 -0400
[
b'@@ -0,0 +1,378 @@\n+#!/usr/bin/env python\n+# Data manager for reference data for the QIIME Galaxy tools\n+\n+import argparse\n+import ftplib\n+import json\n+import os\n+import tarfile\n+import zipfile\n+\n+import requests\n+\n+\n+protocol = {\n+    "unite": "http",\n+    "greengenes": "ftp",\n+    "silva": "http",\n+    "img": "ftp"\n+}\n+baseUrl = {\n+    "unite": "http://unite.ut.ee/sh_files/sh_qiime_release_",\n+    "greengenes": "greengenes.microbio.me",\n+    "silva": "http://www.arb-silva.de/fileadmin/silva_databases/qiime/Silva_",\n+    "img": "ftp.microbio.me"\n+}\n+ftp_dir = {\n+    "greengenes": "/greengenes_release/gg_",\n+    "img": ""\n+}\n+ftp_file_prefix = {\n+    "greengenes": "gg_",\n+    "img": ""\n+}\n+ftp_file_suffix = {\n+    "greengenes": "_otus",\n+    "img": ""\n+}\n+extension = {\n+    "unite": "zip",\n+    "greengenes": "tar.gz",\n+    "silva": {\n+        "104_release": "tgz",\n+        "108_release": "tgz",\n+        "108_release_curated": "tgz",\n+        "111_release": "tgz",\n+        "119_consensus_majority_taxonomy": "zip",\n+        "119_release": "zip",\n+        "119_release_aligned_rep_files": "tar.gz",\n+        "123_release": "zip",\n+        "128_release": "tgz"},\n+    "img": "tgz"\n+}\n+filetypes = ["rep_set", "rep_set_aligned", "taxonomy", "trees"]\n+\n+\n+# Utility functions for interacting with Galaxy JSON\n+def read_input_json(jsonfile):\n+    """Read the JSON supplied from the data manager tool\n+\n+    Returns a tuple (param_dict,extra_files_path)\n+\n+    \'param_dict\' is an arbitrary dictionary of parameters\n+    input into the tool; \'extra_files_path\' is the path\n+    to a directory where output files must be put for the\n+    receiving data manager to pick them up.\n+\n+    NB the directory pointed to by \'extra_files_path\'\n+    doesn\'t exist initially, it is the job of the script\n+    to create it if necessary.\n+\n+    """\n+    params = json.loads(open(jsonfile).read())\n+    return (params[\'param_dict\'],\n+            params[\'output_data\'][0][\'extra_files_path\'])\n+\n+\n+# Utility functions for creating data table dictionaries\n+#\n+# Example usage:\n+# >>> d = create_data_tables_dict()\n+# >>> add_data_table(d,\'my_data\')\n+# >>> add_data_table_entry(dict(dbkey=\'hg19\',value=\'human\'))\n+# >>> add_data_table_entry(dict(dbkey=\'mm9\',value=\'mouse\'))\n+# >>> print str(json.dumps(d))\n+def create_data_tables_dict():\n+    """Return a dictionary for storing data table information\n+\n+    Returns a dictionary that can be used with \'add_data_table\'\n+    and \'add_data_table_entry\' to store information about a\n+    data table. It can be converted to JSON to be sent back to\n+    the data manager.\n+\n+    """\n+    d = {}\n+    d[\'data_tables\'] = {}\n+    return d\n+\n+\n+def add_data_table(d, table):\n+    """Add a data table to the data tables dictionary\n+\n+    Creates a placeholder for a data table called \'table\'.\n+\n+    """\n+    d[\'data_tables\'][table] = []\n+\n+\n+def add_data_table_entry(d, table, entry):\n+    """Add an entry to a data table\n+\n+    Appends an entry to the data table \'table\'. \'entry\'\n+    should be a dictionary where the keys are the names of\n+    columns in the data table.\n+\n+    Raises an exception if the named data table doesn\'t\n+    exist.\n+\n+    """\n+    try:\n+        d[\'data_tables\'][table].append(entry)\n+    except KeyError:\n+        raise Exception("add_data_table_entry: no table \'%s\'" % table)\n+\n+\n+def get_ftp_file(ftp, filename):\n+    """\n+    """\n+    try:\n+        ftp.retrbinary("RETR " + filename, open(filename, \'wb\').write)\n+    except:\n+        print("Error")\n+\n+\n+def download_archive(db, version, ext):\n+    """\n+\n+    """\n+    filepath = "%s_%s.%s" % (db, version, ext)\n+    if protocol[db] == "http":\n+        url = "%s%s.%s" % (baseUrl[db], version, ext)\n+        r = requests.get(url, stream=True)\n+        r.raise_for_status()\n+        with open(filepath, "wb") as fd:\n+            for chunk in r.iter_content(chunk_size=128):\n+                fd.write(chunk)\n+    elif protocol[db] == "ftp":\n+        ftp = ftplib.FTP(baseUrl[db])\n+        ftp'..b'              content_name_prefix,\n+                data_tables,\n+                target_dir,\n+                filetype)\n+        else:\n+            move_file(\n+                content_path,\n+                content_filename_prefix,\n+                content_name_prefix,\n+                data_tables,\n+                target_dir,\n+                filetype)\n+\n+\n+def move_files(archive_content_path, filename_prefix, name_prefix, data_tables, target_dir, db, version):\n+    """\n+    """\n+    for filetype in filetypes:\n+        if filetype == "rep_set_aligned":\n+            if db == "greengenes" and version == "12_10":\n+                continue\n+        filetype_target_dir = os.path.join(\n+            target_dir,\n+            filetype)\n+        filetype_path = os.path.join(\n+            archive_content_path,\n+            filetype)\n+        move_dir_content(\n+            filetype_path,\n+            filename_prefix,\n+            name_prefix,\n+            data_tables,\n+            filetype_target_dir,\n+            filetype)\n+\n+\n+def download_db(data_tables, db, version, target_dir):\n+    """Download QIIME database\n+\n+    Creates references to the specified file(s) on the Galaxy\n+    server in the appropriate data table (determined from the\n+    file extension).\n+\n+    The \'data_tables\' dictionary should have been created using\n+    the \'create_data_tables_dict\' and \'add_data_table\' functions.\n+\n+    Arguments:\n+      data_tables: a dictionary containing the data table info\n+      db: name of the database\n+      version: version of the database\n+      table_name: name of the table\n+      target_dir: directory to put copy or link to the data file\n+\n+    """\n+    ext = extension[db]\n+    if db == "silva":\n+        ext = ext[version]\n+\n+    print("Download archive")\n+    filepath = download_archive(db, version, ext)\n+\n+    print("Extract archive %s" % filepath)\n+    archive_content_path = extract_archive(filepath, ext, db)\n+\n+    print("Moving file from %s" % archive_content_path)\n+    filename_prefix = "%s_%s" % (db, version)\n+    name_prefix = "%s (%s)" % (db, version)\n+    if db == "greengenes" or db == "silva":\n+        move_files(\n+            archive_content_path,\n+            filename_prefix,\n+            name_prefix,\n+            data_tables,\n+            target_dir,\n+            db,\n+            version)\n+    elif db == "unite":\n+        move_unite_files(\n+            archive_content_path,\n+            filename_prefix,\n+            name_prefix,\n+            data_tables,\n+            target_dir)\n+\n+\n+if __name__ == "__main__":\n+    print("Starting...")\n+\n+    # Read command line\n+    parser = argparse.ArgumentParser(\n+        description=\'Download QIIME reference database\')\n+    parser.add_argument(\'--database\', help="Database name")\n+    parser.add_argument(\'--version\', help="Database version")\n+    parser.add_argument(\'--jsonfile\', help="Output JSON file")\n+    args = parser.parse_args()\n+\n+    jsonfile = args.jsonfile\n+\n+    # Read the input JSON\n+    params, target_dir = read_input_json(jsonfile)\n+\n+    # Make the target directory\n+    print("Making %s" % target_dir)\n+    os.mkdir(target_dir)\n+    os.mkdir(os.path.join(target_dir, "rep_set"))\n+    os.mkdir(os.path.join(target_dir, "rep_set_aligned"))\n+    os.mkdir(os.path.join(target_dir, "taxonomy"))\n+    os.mkdir(os.path.join(target_dir, "trees"))\n+\n+    # Set up data tables dictionary\n+    data_tables = create_data_tables_dict()\n+    add_data_table(data_tables, "qiime_rep_set")\n+    add_data_table(data_tables, "qiime_rep_set_aligned")\n+    add_data_table(data_tables, "qiime_taxonomy")\n+    add_data_table(data_tables, "qiime_trees")\n+\n+    # Fetch data from specified data sources\n+    download_db(\n+        data_tables,\n+        args.database,\n+        args.version,\n+        target_dir)\n+\n+    # Write output JSON\n+    print("Outputting JSON")\n+    print(str(json.dumps(data_tables)))\n+    with open(jsonfile, \'w\') as out:\n+        json.dump(data_tables, out)\n+    print("Done.")\n'
b
diff -r 000000000000 -r f8608fddfb23 data_manager/data_manager_qiime_download.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/data_manager_qiime_download.xml Mon May 15 11:08:43 2017 -0400
[
@@ -0,0 +1,95 @@
+<tool id="data_manager_qiime_download" name="Download QIIME" version="1.9.1" tool_type="manage_data">
+    <description>reference databases</description>
+    <requirements>
+        <requirement type="package" version="2.13.0">requests</requirement>
+    </requirements>
+    <stdio>
+        <exit_code range=":-1" level="fatal" description="Error: Cannot open file" />
+        <exit_code range="1:" level="fatal" description="Error" />
+    </stdio>
+    <command><![CDATA[
+        python '$__tool_directory__/data_manager_qiime_download.py'
+            --database '$db.database'
+            --version '$db.version'
+            --jsonfile '${out_file}'
+    ]]></command>
+    <inputs>
+        <conditional name="db">
+            <param name="database" type="select" label="Database to download">
+                <option value="greengenes" selected="true">Greengenes OTUs</option>
+                <option value="silva">SILVA OTUs (16S/18S)</option>
+                <option value="unite">UNITE OTUs (ITS)</option>
+                <!--<option value="img">IMG/QIIME reference protein sequences</option>-->
+            </param>
+            <when value="greengenes">
+                <param name="version" type="select" label="Version of Greengenes">
+                    <option value="13_8" selected="true">13.8</option>
+                    <option value="13_5">13.5</option>
+                    <option value="12_10">12.10</option>
+                </param>
+            </when>
+            <when value="silva">
+                <param name="version" type="select" label="Version of SILVA OTUs">
+                    <option value="128_release" selected="true">128</option>
+                    <option value="123_release">123</option>
+                    <!--<option value="119_release_aligned_rep_files">119 (aligned rep)</option>
+                    <option value="119_release">119</option>
+                    <option value="119_consensus_majority_taxonomy">119 (consensus majority taxonomy)</option>
+                    <option value="111_release">111</option>
+                    <option value="108_release">108</option>
+                    <option value="108_release_curated">108 (curated)</option>
+                    <option value="104_release">104</option>-->
+                </param>
+            </when>
+            <when value="unite">
+                <param name="version" type="select" label="Version of UNITE OTUs">
+                    <option value="20.11.2016">7.1 (2016-11-20, with singletons set as RefS)</option>
+                    <option value="s_20.11.2016">7.1 (2016-11-20, with global and 97% singletons)</option>
+                    <option value="22.08.2016">7.1 (2016-08-22, with singletons set as RefS)</option>
+                    <option value="s_22.08.2016">7.1 (2016-08-22, with global and 97% singletons)</option>
+                    <option value="31.01.2016">7.0 (2016-01-31, with singletons set as RefS)</option> 
+                    <option value="s_31.01.2016">7.0 (2016-01-31, with global and 97% singletons)</option> 
+                    <option value="01.08.2015">7.0 (2015-08-01, with singletons set as RefS)</option>
+                    <option value="s_01.08.2015">7.0 (2015-08-01, with global and 97% singletons)</option>
+                    <option value="02.03.2015">7.0 (2015-03-02, with singletons set as RefS)</option>
+                    <option value="s_02.03.2015">7.0 (2015-03-02, with global and 97% singletons)</option>
+                    <option value="30.12.2014">6.0 (2014-12-30, with singletons set as RefS)</option>
+                    <option value="s_30.12.2014">6.0 (2014-12-30, with global and 97% singletons)</option>
+                    <option value="10.09.2014">6.0 (2014-09-10, with singletons set as RefS)</option>
+                    <option value="s_10.09.2014">6.0 (2014-09-10, with global and 97% singletons)</option>
+                    <option value="04.07.2014">6.0 (2014-07-04, with singletons set as RefS)</option>
+                    <option value="s_04.07.2014">6.0 (2014-07-04, with global and 97% singletons)</option>
+                    <option value="13.05.2014">6.0 (2014-05-13, with singletons set as RefS)</option> 
+                    <option value="s_13.05.2014">6.0 (2014-05-13, with global and 97% singletons)</option>
+                    <option value="09.02.2014">6.0 (2014-02-09, with singletons set as RefS)</option>
+                    <option value="s_09.02.2014">6.0 (2014-02-09, with global and 97% singletons)</option>
+                    <option value="15.01.2014">6.0 (2014-01-15, with singletons set as RefS)</option>
+                    <option value="s_15.01.2014">6.0 (2014-01-15, with global and 97% singletons)</option>
+                    <option value="19.12.2013">6.0 (2013-12-19, with singletons set as RefS)</option>
+                    <option value="s_19.12.2013">6.0 (2013-12-19, with global and 97% singletons)</option>
+                    <option value="08.12.2013">6.0 (2013-12-08, with singletons set as RefS)</option>
+                    <option value="s_08.12.2013">6.0 (2013-12-08, with global and 97% singletons)</option>
+                    <option value="15.10.2013">5.0 (2013-10-15, with singletons set as RefS)</option>   
+                    <option value="s_15.10.2013">5.0 (2013-10-15, with global and 97% singletons)</option>
+                </param>
+            </when>
+            <!--<when value="img">
+                <param name="version" type="select" label="Version of IMG/QIIME reference protein sequences">
+                    <option value="img-qiime-25oct2012" selected="true">img-qiime-25oct2012</option>
+                </param>
+            </when>-->
+        </conditional>
+    </inputs>
+    <outputs>
+           <data name="out_file" format="data_manager_json" label="${tool.name}"/>
+    </outputs>
+    <tests>
+    </tests>
+    <help><![CDATA[
+This tool downloads the reference databases for QIIME
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1038/nmeth.f.303</citation>
+        <yield />
+    </citations>
+</tool>
\ No newline at end of file
b
diff -r 000000000000 -r f8608fddfb23 data_manager_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml Mon May 15 11:08:43 2017 -0400
b
@@ -0,0 +1,65 @@
+<?xml version="1.0"?>
+<data_managers>
+    <data_manager tool_file="data_manager/data_manager_qiime_download.xml" id="data_manager_qiime_download" >
+        <data_table name="qiime_rep_set">  <!-- Defines a Data Table to be modified. -->
+            <output> <!-- Handle the output of the Data Manager Tool -->
+                <column name="value" />  <!-- columns that are going to be specified by the Data Manager Tool -->
+                <column name="name" />  <!-- columns that are going to be specified by the Data Manager Tool -->
+                <column name="dbkey" /> <!-- columns that are going to be specified by the Data Manager Tool -->
+                <column name="path" output_ref="out_file" >
+                    <move type="file">
+                        <source>${path}</source>
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">qiime_rep_set/${dbkey}</target>
+                    </move>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/qiime_rep_set/${dbkey}</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+            </output>
+        </data_table>
+        <data_table name="qiime_rep_set_aligned">  <!-- Defines a Data Table to be modified. -->
+            <output> <!-- Handle the output of the Data Manager Tool -->
+                <column name="value" />  <!-- columns that are going to be specified by the Data Manager Tool -->
+                <column name="name" />  <!-- columns that are going to be specified by the Data Manager Tool -->
+                <column name="dbkey" /> <!-- columns that are going to be specified by the Data Manager Tool -->
+                <column name="path" output_ref="out_file" >
+                    <move type="file">
+                        <source>${path}</source>
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">qiime_rep_set_aligned/${dbkey}</target>
+                    </move>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/qiime_rep_set_aligned/${dbkey}</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+            </output>
+        </data_table>
+        <data_table name="qiime_taxonomy">  <!-- Defines a Data Table to be modified. -->
+            <output> <!-- Handle the output of the Data Manager Tool -->
+                <column name="value" />  <!-- columns that are going to be specified by the Data Manager Tool -->
+                <column name="name" />  <!-- columns that are going to be specified by the Data Manager Tool -->
+                <column name="dbkey" /> <!-- columns that are going to be specified by the Data Manager Tool -->
+                <column name="path" output_ref="out_file" >
+                    <move type="file">
+                        <source>${path}</source>
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">qiime_taxonomy/${dbkey}</target>
+                    </move>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/qiime_taxonomy/${dbkey}</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+            </output>
+        </data_table>
+        <data_table name="qiime_trees">  <!-- Defines a Data Table to be modified. -->
+            <output> <!-- Handle the output of the Data Manager Tool -->
+                <column name="value" />  <!-- columns that are going to be specified by the Data Manager Tool -->
+                <column name="name" />  <!-- columns that are going to be specified by the Data Manager Tool -->
+                <column name="dbkey" /> <!-- columns that are going to be specified by the Data Manager Tool -->
+                <column name="path" output_ref="out_file" >
+                    <move type="file">
+                        <source>${path}</source>
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">qiime_trees/${dbkey}</target>
+                    </move>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/qiime_trees/${dbkey}</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+            </output>
+        </data_table>
+    </data_manager>
+</data_managers>
b
diff -r 000000000000 -r f8608fddfb23 tool-data/qiime_rep_set.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/qiime_rep_set.loc.sample Mon May 15 11:08:43 2017 -0400
b
@@ -0,0 +1,6 @@
+#<unique_id>  <name>  <database_caption>  <fasta_file_path>
+#
+#For each reference database, you need to download the fasta file in qiime path
+#
+#List of useful db for qiime: http://qiime.org/home_static/dataFiles.html
+#
\ No newline at end of file
b
diff -r 000000000000 -r f8608fddfb23 tool-data/qiime_rep_set_aligned.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/qiime_rep_set_aligned.loc.sample Mon May 15 11:08:43 2017 -0400
b
@@ -0,0 +1,6 @@
+#<unique_id>  <name>  <database_caption>  <fasta_file_path>
+#
+#For each reference database, you need to download the fasta file in qiime path
+#
+#List of useful db for qiime: http://qiime.org/home_static/dataFiles.html
+#
\ No newline at end of file
b
diff -r 000000000000 -r f8608fddfb23 tool-data/qiime_taxonomy.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/qiime_taxonomy.loc.sample Mon May 15 11:08:43 2017 -0400
b
@@ -0,0 +1,6 @@
+#<unique_id>  <name>  <database_caption>  <fasta_file_path>
+#
+#For each reference database, you need to download the fasta file in qiime path
+#
+#List of useful db for qiime: http://qiime.org/home_static/dataFiles.html
+#
\ No newline at end of file
b
diff -r 000000000000 -r f8608fddfb23 tool-data/qiime_trees.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/qiime_trees.loc.sample Mon May 15 11:08:43 2017 -0400
b
@@ -0,0 +1,6 @@
+#<unique_id>  <name>  <database_caption>  <fasta_file_path>
+#
+#For each reference database, you need to download the fasta file in qiime path
+#
+#List of useful db for qiime: http://qiime.org/home_static/dataFiles.html
+#
\ No newline at end of file
b
diff -r 000000000000 -r f8608fddfb23 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Mon May 15 11:08:43 2017 -0400
b
@@ -0,0 +1,18 @@
+<tables>
+    <table name="qiime_rep_set" comment_char="#">
+        <columns>value, name, dbkey, path</columns>
+        <file path="tool-data/qiime_rep_set.loc" />
+    </table>
+    <table name="qiime_rep_set_aligned" comment_char="#">
+        <columns>value, name, dbkey, path</columns>
+        <file path="tool-data/qiime_rep_set_aligned.loc" />
+    </table>
+    <table name="qiime_taxonomy" comment_char="#">
+        <columns>value, name, dbkey, path</columns>
+        <file path="tool-data/qiime_taxonomy.loc" />
+    </table>
+    <table name="qiime_trees" comment_char="#">
+        <columns>value, name, dbkey, path</columns>
+        <file path="tool-data/qiime_trees.loc" />
+    </table>
+</tables>
\ No newline at end of file