# HG changeset patch
# User iuc
# Date 1636996882 0
# Node ID e93e32359b67c20f72f6578e513bf49bd8b89c9c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_interproscan commit 2f5d27a375fcc2e8d77914b3d9e402a9e2df2d97"
diff -r 000000000000 -r e93e32359b67 data_manager/interproscan.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/interproscan.py Mon Nov 15 17:21:22 2021 +0000
@@ -0,0 +1,160 @@
+#!/usr/bin/env python
+
+import argparse
+import hashlib
+import json
+import operator
+import os
+import re
+import shutil
+import subprocess
+import sys
+import tarfile
+
+import requests
+
+
+GH_REPO_API = 'https://api.github.com/repos/ebi-pf-team/interproscan/'
+MD5_URL = 'http://ftp.ebi.ac.uk/pub/software/unix/iprscan/5/{version}/interproscan-{version}-64-bit.tar.gz.md5'
+DATA_URL = 'http://ftp.ebi.ac.uk/pub/software/unix/iprscan/5/{version}/interproscan-{version}-64-bit.tar.gz'
+
+# For tests: download a smaller archive containing *some* data
+PARTIAL_URL = 'https://github.com/ebi-pf-team/interproscan/archive/{version}.tar.gz'
+
+
+def list_tags(url=None):
+
+ if not url:
+ url = GH_REPO_API + 'tags'
+
+ resp = requests.get(url=url)
+ data = resp.json()
+
+ tags = []
+ for tag in data:
+ if re.match(r"^[0-9]\.[0-9]{2}-[0-9]{2}\.[0-9]$", tag['name']):
+ tags.append(tag['name'])
+
+ if 'next' in resp.links:
+ tags += list_tags(resp.links['next']['url'])
+
+ return sorted(tags)
+
+
+def download_file(url, dest):
+ with requests.get(url, stream=True) as r:
+ r.raise_for_status()
+ with open(dest, 'wb') as f:
+ for chunk in r.iter_content(chunk_size=8192):
+ f.write(chunk)
+
+
+def main():
+ parser = argparse.ArgumentParser(description='Download data for InterProScan')
+ parser.add_argument('--partial', dest='partial', action='store_true', help='Only download a small subset of data (for testing)')
+ parser.add_argument('-v', '--version', help='Specify an InterProScan version (default: latest)')
+ parser.add_argument("datatable_name")
+ parser.add_argument("galaxy_datamanager_filename")
+
+ args = parser.parse_args()
+
+ with open(args.galaxy_datamanager_filename) as fh:
+ config = json.load(fh)
+
+ output_directory = config.get("output_data", [{}])[0].get("extra_files_path", None)
+ data_manager_dict = {}
+ data_manager_dict["data_tables"] = config.get("data_tables", {})
+ data_manager_dict["data_tables"][args.datatable_name] = data_manager_dict[
+ "data_tables"
+ ].get(args.datatable_name, [])
+
+ os.mkdir(output_directory)
+
+ all_tags = list_tags()
+
+ if args.version:
+ if args.version not in all_tags:
+ raise RuntimeError("Version '%s' is not valid" % args.version)
+ tag = args.version
+ else:
+ tag = all_tags[-1]
+
+ print("Will download data for InterProScan version: %s" % tag)
+
+ print("Getting MD5 checksum:")
+ md5 = requests.get(url=MD5_URL.format(version=tag)).text
+ if not re.match(r"^([a-fA-F\d]{32}) interproscan-[0-9]\.[0-9]{2}-[0-9]{2}\.[0-9]-64-bit.tar.gz$", md5):
+ raise RuntimeError("Got invalid MD5 from the InterProScan FTP server: '%s'" % md5)
+ print("%s" % md5)
+
+ if args.partial:
+ print("Downloading partial data tarball...")
+ dest_tar = os.path.join(output_directory, PARTIAL_URL.format(version=tag).split('/')[-1])
+ download_file(PARTIAL_URL.format(version=tag), dest_tar)
+ else:
+ print("Downloading data tarball...")
+ dest_tar = os.path.join(output_directory, DATA_URL.format(version=tag).split('/')[-1])
+ download_file(DATA_URL.format(version=tag), dest_tar)
+
+ print("Finished, now checking md5...")
+ md5_computed = hashlib.md5(open(dest_tar, 'rb').read()).hexdigest()
+ if not md5.startswith(md5_computed):
+ raise RuntimeError("MD5 check failed: computed '%s', expected '%s'" % (md5_computed, md5))
+
+ print("Ok, now extracting data...")
+ tar = tarfile.open(dest_tar, "r:gz")
+ tar.extractall(output_directory)
+ tar.close()
+
+ if args.partial:
+ print("Moving partial data files around...")
+ shutil.move(os.path.join(output_directory, 'interproscan-%s' % tag, 'core/jms-implementation/support-mini-x86-32/data/'), os.path.join(output_directory, 'data'))
+ else:
+ print("Moving data files around...")
+ shutil.move(os.path.join(output_directory, 'interproscan-%s' % tag), os.path.join(output_directory, 'data'))
+
+ print("Done, removing tarball and unneeded files...")
+ os.remove(dest_tar)
+ shutil.rmtree(os.path.join(output_directory, 'interproscan-%s' % tag))
+
+ print("Running initial_setup.py (index hmm models)...")
+ # Write a temp properties file in work dir
+ prop_file_src = os.path.join(os.path.dirname(os.path.realpath(shutil.which("interproscan.sh"))), 'interproscan.properties')
+ with open(prop_file_src, 'r') as prop:
+ prop_content = prop.read()
+ prop_content = re.sub(r'^data\.directory=.*$', 'data.directory=%s' % os.path.join(output_directory, 'data'), prop_content, flags=re.M)
+ with open('interproscan.properties', 'w') as prop:
+ prop.write(prop_content)
+ # Run the index command
+ cmd_args = [os.path.join(os.path.dirname(os.path.realpath(shutil.which("interproscan.sh"))), 'initial_setup.py')]
+ proc = subprocess.Popen(args=cmd_args, shell=False)
+ out, err = proc.communicate()
+ print(out)
+ print(err, file=sys.stderr)
+ return_code = proc.wait()
+ if return_code:
+ print("Error running initial_setup.py.", file=sys.stderr)
+ sys.exit(return_code)
+
+ data_manager_dict["data_tables"][args.datatable_name].append(
+ dict(
+ value=tag,
+ description="InterProScan %s" % tag,
+ interproscan_version=tag,
+ path=output_directory,
+ )
+ )
+
+ print("Saving data table content...")
+
+ data_manager_dict["data_tables"][args.datatable_name].sort(
+ key=operator.itemgetter("value"), reverse=True
+ )
+ with open(args.galaxy_datamanager_filename, "w") as fh:
+ json.dump(data_manager_dict, fh, indent=2, sort_keys=True)
+
+ print("Finished.")
+
+
+if __name__ == "__main__":
+ main()
diff -r 000000000000 -r e93e32359b67 data_manager/interproscan.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/interproscan.xml Mon Nov 15 17:21:22 2021 +0000
@@ -0,0 +1,69 @@
+
+
+ interproscan
+ requests
+
+
+
+
+
+ ^([0-9]+\.[0-9]+-[0-9]+\.[0-9]+)?$
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 10.1093/bioinformatics/btu031
+ 10.7717/peerj.167
+ 10.1093/bioinformatics/17.9.847
+ 10.1093/nar/gki442
+ 10.1093/nar/gkn785
+
+
diff -r 000000000000 -r e93e32359b67 data_manager_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml Mon Nov 15 17:21:22 2021 +0000
@@ -0,0 +1,19 @@
+
+
+
+
+
+
+
+
diff -r 000000000000 -r e93e32359b67 test-data/interproscan.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/interproscan.loc Mon Nov 15 17:21:22 2021 +0000
@@ -0,0 +1,8 @@
+# this is a tab separated file describing the location of interproscan databases used for the
+# interproscan annotation tool
+#
+# the columns are:
+# value description interproscan_version path
+#
+# for example
+# 5.52-86.0 InterProScan 5.52-86.0 5.52-86.0 /tmp/database/interproscan/5.52-86.0/
diff -r 000000000000 -r e93e32359b67 tool-data/interproscan.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/interproscan.loc.sample Mon Nov 15 17:21:22 2021 +0000
@@ -0,0 +1,8 @@
+# this is a tab separated file describing the location of interproscan databases used for the
+# interproscan annotation tool
+#
+# the columns are:
+# value description interproscan_version path
+#
+# for example
+# 5.52-86.0 InterProScan 5.52-86.0 5.52-86.0 /tmp/database/interproscan/5.52-86.0/
diff -r 000000000000 -r e93e32359b67 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Mon Nov 15 17:21:22 2021 +0000
@@ -0,0 +1,6 @@
+
+
+ value, description, interproscan_version, path
+
+
+
diff -r 000000000000 -r e93e32359b67 tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Mon Nov 15 17:21:22 2021 +0000
@@ -0,0 +1,6 @@
+
+
+ value, description, interproscan_version, path
+
+
+