changeset 0:bd342a6a5b0f draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dram_database_downloader commit 52575ece22fcdbb6fc3aa3582ea377075aaa4db1
author iuc
date Thu, 01 Sep 2022 17:16:07 +0000
parents
children 40c8fc0a2bb0
files data_manager/data_manager_dram_download.py data_manager/data_manager_dram_download.xml data_manager_conf.xml test-data/dram_databases.loc tool-data/dram_databases.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test
diffstat 7 files changed, 377 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/data_manager_dram_download.py	Thu Sep 01 17:16:07 2022 +0000
@@ -0,0 +1,93 @@
+#!/usr/bin/env python
+
+import argparse
+import json
+import os
+import subprocess
+
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('--kofam_hmm_loc', action='store', dest='kofam_hmm_loc', default=None, help='hmm file for KOfam')
+parser.add_argument('--kofam_ko_list_loc', action='store', dest='kofam_ko_list_loc', default=None, help='KOfam ko list file')
+parser.add_argument('--skip_uniref', action='store', dest='skip_uniref', default='no', help='Flag to Download and process uniref')
+parser.add_argument('--uniref_loc', action='store', dest='uniref_loc', default=None, help='uniref file')
+parser.add_argument('--uniref_version', action='store', dest='uniref_version', type=int, default=90, help='uniref version to download')
+parser.add_argument('--pfam_loc', action='store', dest='pfam_loc', default=None, help='pfam-A full file')
+parser.add_argument('--pfam_hmm_dat', action='store', dest='pfam_hmm_dat', help='pfam hmm .dat file to get PF descriptions')
+parser.add_argument('--dbcan_loc', action='store', dest='dbcan_loc', default=None, help='dbCAN file')
+parser.add_argument('--dbcan_fam_activities', action='store', dest='dbcan_fam_activities', default=None, help='CAZY family activities file')
+parser.add_argument('--dbcan_version', action='store', dest='dbcan_version', type=int, default=10, help='Version of dbCAN to use')
+parser.add_argument('--vogdb_loc', action='store', dest='vogdb_loc', default=None, help='hmm file for vogdb')
+parser.add_argument('--vog_annotations', action='store', dest='vog_annotations', default=None, help='vogdb annotations file')
+parser.add_argument('--viral_loc', action='store', dest='viral_loc', default=None, help='merged viral protein faa file')
+parser.add_argument('--peptidase_loc', action='store', dest='peptidase_loc', default=None, help='MEROPS peptidase fasta file')
+parser.add_argument('--genome_summary_form_loc', action='store', dest='genome_summary_form_loc', default=None, help='genome summary form file')
+parser.add_argument('--module_step_form_loc', action='store', dest='module_step_form_loc', default=None, help='module step form file')
+parser.add_argument('--etc_module_database_loc', action='store', dest='etc_module_database_loc', default=None, help='etc module database file')
+parser.add_argument('--function_heatmap_form_loc', action='store', dest='function_heatmap_form_loc', default=None, help='function heatmap form file')
+parser.add_argument('--amg_database_loc', action='store', dest='amg_database_loc', default=None, help='amg database file')
+parser.add_argument('--db_version', action='store', dest='db_version', help='Version of DRAM databases')
+parser.add_argument('--threads', action='store', dest='threads', type=int, help='Number of processes')
+parser.add_argument('--out_file', action='store', dest='out_file', help='JSON output file')
+
+args = parser.parse_args()
+
+with open(args.out_file) as fh:
+    params = json.load(fh)
+
+target_directory = params['output_data'][0]['extra_files_path']
+os.makedirs(target_directory)
+
+# Download the data.
+cmd = 'DRAM-setup.py prepare_databases --output_dir %s' % target_directory
+if args.kofam_hmm_loc is not None:
+    cmd = '%s --kofam_hmm_loc %s' % (cmd, args.kofam_hmm_loc)
+if args.kofam_ko_list_loc is not None:
+    cmd = '%s --kofam_ko_list_loc %s' % (cmd, args.kofam_ko_list_loc)
+if args.skip_uniref == 'yes':
+    cmd = '%s --skip_uniref' % cmd
+else:
+    if args.uniref_loc is not None:
+        cmd = '%s --uniref_loc %s' % (cmd, args.uniref_loc)
+    cmd = '%s --uniref_version %d' % (cmd, args.uniref_version)
+if args.pfam_loc is not None:
+    cmd = '%s --pfam_loc %s' % (cmd, args.pfam_loc)
+if args.pfam_hmm_dat is not None:
+    cmd = '%s --pfam_hmm_dat %s' % (cmd, args.pfam_hmm_dat)
+if args.dbcan_loc is not None:
+    cmd = '%s --dbcan_loc %s' % (cmd, args.dbcan_loc)
+if args.dbcan_fam_activities is not None:
+    cmd = '%s --dbcan_fam_activities %s' % (cmd, args.dbcan_fam_activities)
+cmd = '%s --dbcan_version %d' % (cmd, args.dbcan_version)
+if args.vogdb_loc is not None:
+    cmd = '%s --vogdb_loc %s' % (cmd, args.vogdb_loc)
+if args.vog_annotations is not None:
+    cmd = '%s --vog_annotations %s' % (cmd, args.vog_annotations)
+if args.viral_loc is not None:
+    cmd = '%s --viral_loc %s' % (cmd, args.viral_loc)
+if args.peptidase_loc is not None:
+    cmd = '%s --peptidase_loc %s' % (cmd, args.peptidase_loc)
+if args.genome_summary_form_loc is not None:
+    cmd = '%s --genome_summary_form_loc %s' % (cmd, args.genome_summary_form_loc)
+if args.module_step_form_loc is not None:
+    cmd = '%s --module_step_form_loc %s' % (cmd, args.module_step_form_loc)
+if args.etc_module_database_loc is not None:
+    cmd = '%s --etc_module_database_loc %s' % (cmd, args.etc_module_database_loc)
+if args.function_heatmap_form_loc is not None:
+    cmd = '%s --function_heatmap_form_loc %s' % (cmd, args.function_heatmap_form_loc)
+if args.amg_database_loc is not None:
+    cmd = '%s --amg_database_loc %s' % (cmd, args.amg_database_loc)
+cmd = '%s --threads %d' % (cmd, args.threads)
+
+subprocess.check_call(cmd, shell=True)
+
+data_manager_json = {'data_tables': {}}
+data_manager_entry = {}
+data_manager_entry['value'] = args.db_version
+data_manager_entry['name'] = 'DRAM %s databases' % args.db_version
+data_manager_entry['path'] = target_directory
+data_manager_json['data_tables']['dram_databases'] = data_manager_entry
+
+with open(args.out_file, 'w') as fh:
+    json.dump(data_manager_json, fh, sort_keys=True)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/data_manager_dram_download.xml	Thu Sep 01 17:16:07 2022 +0000
@@ -0,0 +1,235 @@
+<tool id="data_manager_dram_download" name="DRAM: Download databases" version="1.3.5" tool_type="manage_data" profile="21.05">
+    <description>required by the DRAM suite of tools</description>
+    <requirements>
+        <requirement type="package" version="1.3.5">dram</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+python '$__tool_directory__/data_manager_dram_download.py'
+--out_file '$out_file'
+#if str($kofam_hmm_loc) != '':
+    --kofam_hmm_loc '$kofam_hmm_loc'
+#end if
+#if str($kofam_ko_list_loc) != '':
+    --kofam_ko_list_loc '$kofam_ko_list_loc'
+#end if
+#if str($skip_uniref_cond.skip_uniref) == 'yes':
+    --skip_uniref '$skip_uniref_cond.skip_uniref'
+#else:
+    #if str($skip_uniref_cond.uniref_loc) != '':
+        --uniref_loc '$skip_uniref_cond.uniref_loc'
+    #end if
+    --uniref_version $skip_uniref_cond.uniref_version
+#end if
+#if str($pfam_loc) != '':
+    --pfam_loc '$pfam_loc'
+#end if
+#if str($pfam_hmm_dat) != '':
+    --pfam_hmm_dat '$pfam_hmm_dat'
+#end if
+#if str($dbcan_loc) != '':
+    --dbcan_loc '$dbcan_loc'
+#end if
+#if str($dbcan_fam_activities) != '':
+    --dbcan_fam_activities '$dbcan_fam_activities'
+#end if
+--dbcan_version $dbcan_version
+#if str($vogdb_loc) != '':
+    --vogdb_loc '$vogdb_loc'
+#end if
+#if str($vog_annotations) != '':
+    --vog_annotations '$vog_annotations'
+#end if
+#if str($viral_loc) != '':
+    --viral_loc '$viral_loc'
+#end if
+#if str($peptidase_loc) != '':
+    --peptidase_loc '$peptidase_loc'
+#end if
+#if str($genome_summary_form_loc) != '':
+    --genome_summary_form_loc '$genome_summary_form_loc'
+#end if
+#if str($module_step_form_loc) != '':
+    --module_step_form_loc '$module_step_form_loc'
+#end if
+#if str($etc_module_database_loc) != '':
+    --etc_module_database_loc '$etc_module_database_loc'
+#end if
+#if str($function_heatmap_form_loc) != '':
+    --function_heatmap_form_loc '$function_heatmap_form_loc'
+#end if
+#if str($amg_database_loc) != '':
+    --amg_database_loc '$amg_database_loc'
+#end if
+--threads \${GALAXY_SLOTS:-10}
+--db_version '$db_version'
+    ]]></command>
+    <inputs>
+        <param argument="--kofam_hmm_loc" type="text" value="" label="Absolute path to hmm file for KOfam (profiles.tar.gz) if already installed" help="Leave blank to install">
+            <sanitizer invalid_char="">
+                <valid initial="string.printable">
+                    <remove value="&apos;"/>
+                </valid>
+            </sanitizer>
+        </param>
+        <param argument="--kofam_ko_list_loc" type="text" value="" label="Absolute path to KOfam ko list file (ko_list.gz) if already installed" help="Leave blank to install">
+            <sanitizer invalid_char="">
+                <valid initial="string.printable">
+                    <remove value="&apos;"/>
+                </valid>
+            </sanitizer>
+        </param>
+
+        <conditional name="skip_uniref_cond">
+            <param argument="--skip_uniref" type="select" label="Skip downloading and processing uniref?" help="Choosing Yes saves time and memory and does not impact DRAM distillation">
+                <option value="no" selected="true">No</option>
+                <option value="yes">Yes</option>
+            </param>
+            <when value="no">
+                <param argument="--uniref_loc" type="text" value="" label="Absolute path to uniref (uniref90.fasta.gz) if already installed" help="Leave blank to install">
+                    <sanitizer invalid_char="">
+                        <valid initial="string.printable">
+                            <remove value="&apos;"/>
+                        </valid>
+                    </sanitizer>
+                </param>
+                <param argument="--uniref_version" type="integer" value="90" label="UniRef version to download" help="Ignored if uniref is not downloaded and processed"/>
+            </when>
+            <when value="yes"/>
+        </conditional>
+        <param argument="--pfam_loc" type="text" value="" label="Absolute path to pfam-A full file (Pfam-A.full.gz) if already installed" help="Leave blank to install">
+            <sanitizer invalid_char="">
+                <valid initial="string.printable">
+                    <remove value="&apos;"/>
+                </valid>
+            </sanitizer>
+        </param>
+        <param argument="--pfam_hmm_dat" type="text" value="" label="Absolute path to pfam hmm .dat file to get PF descriptions (Pfam-A.hmm.dat.gz) if already installed" help="Leave blank to install">
+            <sanitizer invalid_char="">
+                <valid initial="string.printable">
+                    <remove value="&apos;"/>
+                </valid>
+            </sanitizer>
+        </param>
+        <param argument="--dbcan_loc" type="text" value="" label="Absolute path to dbCAN file (dbCAN-HMMdb-V9.txt) if already installed" help="Leave blank to install">
+            <sanitizer invalid_char="">
+                <valid initial="string.printable">
+                    <remove value="&apos;"/>
+                </valid>
+            </sanitizer>
+        </param>
+        <param argument="--dbcan_fam_activities" type="text" value="" label="Absolute path to CAZY family activities file (CAZyDB.07302020.fam-activities.txt) if already installed" help="Leave blank to install">
+            <sanitizer invalid_char="">
+                <valid initial="string.printable">
+                    <remove value="&apos;"/>
+                </valid>
+            </sanitizer>
+        </param>
+        <param argument="--dbcan_version" type="integer" value="10" label="Version of dbCAN to use"/>
+        <param argument="--vogdb_loc" type="text" value="" label="Absolute path to hmm file for vogdb (vog.hmm.tar.gz) if already installed" help="Leave blank to install">
+            <sanitizer invalid_char="">
+                <valid initial="string.printable">
+                    <remove value="&apos;"/>
+                </valid>
+            </sanitizer>
+        </param>
+        <param argument="--vog_annotations" type="text" value="" label="Absolute path to vogdb annotations file (vog.annotations.tsv.gz) if already installed" help="Leave blank to install">
+            <sanitizer invalid_char="">
+                <valid initial="string.printable">
+                    <remove value="&apos;"/>
+                </valid>
+            </sanitizer>
+        </param>
+        <param argument="--viral_loc" type="text" value="" label="Absolute path to merged viral protein faa file (viral.x.protein.faa.gz) if already installed" help="Leave blank to install">
+            <sanitizer invalid_char="">
+                <valid initial="string.printable">
+                    <remove value="&apos;"/>
+                </valid>
+            </sanitizer>
+        </param>
+        <param argument="--peptidase_loc" type="text" value="" label="Absolute path to MEROPS peptidase fasta file (pepunit.lib) if already installed" help="Leave blank to install">
+            <sanitizer invalid_char="">
+                <valid initial="string.printable">
+                    <remove value="&apos;"/>
+                </valid>
+            </sanitizer>
+        </param>
+        <param argument="--genome_summary_form_loc" type="text" value="" label="Absolute path to genome summary form file (genome_summary_form.YYYYMMDD.tsv) if already installed" help="Leave blank to install">
+            <sanitizer invalid_char="">
+                <valid initial="string.printable">
+                    <remove value="&apos;"/>
+                </valid>
+            </sanitizer>
+        </param>
+        <param argument="--module_step_form_loc" type="text" value="" label="Absolute path to module step form file (module_step_form.YYYYMMDD.tsv) if already installed" help="Leave blank to install">
+            <sanitizer invalid_char="">
+                <valid initial="string.printable">
+                    <remove value="&apos;"/>
+                </valid>
+            </sanitizer>
+        </param>
+        <param argument="--etc_module_database_loc" type="text" value="" label="Absolute path to etc module database file (etc_mdoule_database.YYYYMMDD.tsv) if already installed" help="Leave blank to install">
+            <sanitizer invalid_char="">
+                <valid initial="string.printable">
+                    <remove value="&apos;"/>
+                </valid>
+            </sanitizer>
+        </param>
+        <param argument="--function_heatmap_form_loc" type="text" value="" label="Absolute path to function heatmap form file (function_heatmap_form.YYYYMMDD.tsv) if already installed" help="Leave blank to install">
+            <sanitizer invalid_char="">
+                <valid initial="string.printable">
+                    <remove value="&apos;"/>
+                </valid>
+            </sanitizer>
+        </param>
+        <param argument="--amg_database_loc" type="text" value="" label="Absolute path to amg database file (amg_database.YYYYMMDD.tsv) if already installed" help="Leave blank to install">
+            <sanitizer invalid_char="">
+                <valid initial="string.printable">
+                    <remove value="&apos;"/>
+                </valid>
+            </sanitizer>
+        </param>
+        <param argument="--db_version" type="text" value="1.3.5" label="Version of DRAM databases to install">
+            <sanitizer invalid_char="">
+                <valid initial="string.printable">
+                    <remove value="&apos;"/>
+                </valid>
+            </sanitizer>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="out_file" format="data_manager_json"/>
+    </outputs>
+    <tests>
+        <test expect_failure="true">
+            <param name="kofam_hmm_loc" value="profiles.tar.gz"/>
+            <param name="kofam_ko_list_loc" value="ko_list.gz"/>
+            <param name="skip_uniref" value="yes"/>
+            <param name="pfam_loc" value="Pfam-A.full.gz"/>
+            <param name="pfam_hmm_dat" value="Pfam-A.hmm.dat.gz"/>
+            <param name="dbcan_loc" value="dbCAN-HMMdb-V9.txt"/>
+            <param name="dbcan_fam_activities" value="CAZyDB.07302020.fam-activities.txt"/>
+            <param name="vogdb_loc" value="vog.hmm.tar.gz"/>
+            <param name="vog_annotations" value="vog.annotations.tsv.gz"/>
+            <param name="viral_loc" value="viral.x.protein.faa.gz"/>
+            <param name="peptidase_loc" value="pepunit.lib"/>
+            <param name="genome_summary_form_loc" value="genome_summary_form.20220715.tsv"/>
+            <param name="module_step_form_loc" value="module_step_form.20220715.tsv"/>
+            <param name="etc_module_database_loc" value="etc_mdoule_database.20220715.tsv"/>
+            <param name="function_heatmap_form_loc" value="function_heatmap_form.20220715.tsv"/>
+            <param name="amg_database_loc" value="amg_database.20220715.tsv"/>
+            <assert_stderr>
+                <has_text text="Database location does not exist"/>
+            </assert_stderr>
+        </test>
+    </tests>
+    <help>
+This tool downloads and processes DRAM databases for annotation and makes them available to corresponding versions
+of the DRAM suite of tools.
+
+See https://github.com/WrightonLabCSU/DRAM/wiki for details about DRAM.
+    </help>
+    <citations>
+        <citation type="doi">10.1093/nar/gkaa621</citation>
+    </citations>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml	Thu Sep 01 17:16:07 2022 +0000
@@ -0,0 +1,19 @@
+<data_managers>
+    <data_manager tool_file="data_manager/data_manager_dram_download.xml" id="data_manager_dram_download">
+        <data_table name="dram_databases">
+            <output>
+                <column name="value"/>
+                <column name="name"/>
+                <column name="path" output_ref="out_file">
+                    <move type="directory" relativize_symlinks="True">
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">DRAM/${value}</target>
+                    </move>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/DRAM/${value}/</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+            </output>
+        </data_table>
+    </data_manager>
+</data_managers>
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/dram_databases.loc	Thu Sep 01 17:16:07 2022 +0000
@@ -0,0 +1,12 @@
+# This is a sample file distributed with Galaxy that enables tools
+# to use a directory of DRAM databases.  The dram_databases.loc
+# file has this format (longer white space characters are TAB characters):
+#
+# <db_version> <display_name> <directory_path>
+#
+# So, for example, if you have the DRAM databases stored in 
+# /depot/data2/galaxy/dram/, 
+# then the gtdbtk_databases.loc entry would look like this:
+#
+# 3.5.1	DRAM database	/depot/data2/galaxy/dram
+3.5.1   DRAM 3.5.1 databases    ${__HERE__}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/dram_databases.loc.sample	Thu Sep 01 17:16:07 2022 +0000
@@ -0,0 +1,3 @@
+## DRAM databases
+#Value	Name	Path
+#2022-07-04	DRAM annotations (2022-07-04)	/depot/data2/galaxy/tool-data/dram_databases/2022-07-04
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Thu Sep 01 17:16:07 2022 +0000
@@ -0,0 +1,8 @@
+<tables>
+    <!-- Locations of DRAM databases version 1.3.5 and higher -->
+    <table name="dram_databases" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="tool-data/dram_databases.loc" />
+    </table>
+</tables>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Thu Sep 01 17:16:07 2022 +0000
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Location of DRAM databases version 1.3.5 and higher -->
+    <table name="dram_databases" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="${__HERE__}/test-data/dram_databases.loc" />
+    </table>
+</tables>