# HG changeset patch
# User iuc
# Date 1652274186 0
# Node ID 1439dface5bff18972d9add01062e414a1500701
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_vep_cache_downloader commit 2db33cd5bcf5e2d7e3a43f11855c4cfc3b1b9f56
diff -r 000000000000 -r 1439dface5bf data_manager/data_manager_vep_cache_download.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/data_manager_vep_cache_download.py Wed May 11 13:03:06 2022 +0000
@@ -0,0 +1,56 @@
+#!/usr/bin/env python
+
+import json
+import os
+import re
+import sys
+import tarfile
+from urllib.request import urlretrieve
+
+
+def main():
+ # Read in given out_file and create target directory for file download
+ with open(sys.argv[1]) as fh:
+ params = json.load(fh)
+ target_directory = params['output_data'][0]['extra_files_path']
+ os.mkdir(target_directory)
+
+ # Process parameters for metadata and file download
+ url = params['param_dict']['url'].rstrip("/") + "/" + params['param_dict']['file_name'].lstrip("/")
+ m = re.search(r"(.*?)(merged|refseq)?_vep_(\d+?)_", params['param_dict']['file_name'])
+ version = str(m.group(3))
+ cache_type = m.group(2) if m.group(2) else "default"
+ species = m.group(1).rstrip("_")
+ display_name = f"{species.capitalize().replace('_', ' ')} {params['param_dict']['dbkey']} (V{version}{'' if cache_type == 'default' else ', ' + cache_type.capitalize()})"
+
+ # Download and extract given cache archive, remove archive afterwards
+ final_file, headers = urlretrieve(url, os.path.join(target_directory, params['param_dict']['file_name']))
+ tar = tarfile.open(final_file, "r:gz")
+ tar.extractall(target_directory)
+ tar.close()
+ os.remove(final_file)
+
+ # Construct metadata for the new data table entry
+ data_manager_dict = {
+ 'data_tables': {
+ 'vep_versioned_annotation_cache': [
+ {
+ 'value': params['param_dict']['file_name'].strip(".tar.gz"),
+ 'dbkey': params['param_dict']['dbkey'],
+ 'version': version,
+ 'cachetype': cache_type,
+ 'name': display_name,
+ 'species': species,
+ 'path': './%s' % params['param_dict']['file_name'].strip(".tar.gz")
+ }
+ ]
+ }
+ }
+
+ # Save metadata to out_file
+ with open(sys.argv[1], 'w') as fh:
+ json.dump(data_manager_dict, fh, sort_keys=True)
+
+
+if __name__ == "__main__":
+ main()
diff -r 000000000000 -r 1439dface5bf data_manager/data_manager_vep_cache_download.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/data_manager_vep_cache_download.xml Wed May 11 13:03:06 2022 +0000
@@ -0,0 +1,45 @@
+
+ versioned annotation files for VEP
+
+ 106
+
+
+ python
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+This tool downloads given versions of VEP cache annotation files and makes them available to Ensembl VEP in Galaxy via the
+"vep_versioned_annotation_cache" data table. You should use the indexed version of the cache files and it is strongly
+recommended to use the cache files which version number matches the VEP version number. Note that for most genomes there
+are three versions of cache data available: default, refseq and merged (combining the former two). Choose the one suitable
+for your usage.
+
+A general introduction to the VEP cache and download links can be found on the official website:
+https://www.ensembl.org/info/docs/tools/vep/script/vep_cache.html
+
+
+ 10.1186/s13059-016-0974-4
+
+
\ No newline at end of file
diff -r 000000000000 -r 1439dface5bf data_manager_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml Wed May 11 13:03:06 2022 +0000
@@ -0,0 +1,22 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff -r 000000000000 -r 1439dface5bf test-data/dbkeys.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/dbkeys.loc Wed May 11 13:03:06 2022 +0000
@@ -0,0 +1,3 @@
+#
+hg38 Human hg38 a_path
+ce11 C. elegans ce11 a_path
\ No newline at end of file
diff -r 000000000000 -r 1439dface5bf test-data/from_test-meta.data_manager.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/from_test-meta.data_manager.json Wed May 11 13:03:06 2022 +0000
@@ -0,0 +1,1 @@
+{"data_tables": {"vep_versioned_annotation_cache": [{"cachetype": "refseq", "dbkey": "ci3", "name": "Ciona intestinalis ci3 (V106, Refseq)", "path": "./ciona_intestinalis_refseq_vep_106_KH", "species": "ciona_intestinalis", "value": "ciona_intestinalis_refseq_vep_106_KH", "version": "106"}]}}
\ No newline at end of file
diff -r 000000000000 -r 1439dface5bf test-data/vep_versioned_annotation_cache.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/vep_versioned_annotation_cache.loc Wed May 11 13:03:06 2022 +0000
@@ -0,0 +1,6 @@
+#
+#
+ciona_intestinalis_refseq_vep_105_KH ci3 105 refseq Ciona intestinalis ci3 (V105, Refseq) ciona_intestinalis /home/sebastian/galaxy/tool-data/vep/105/ci3/refseq
+caenorhabditis_elegans_vep_105_WBcel235 ce11 105 default Caenorhabditis elegans ce11 (V105) caenorhabditis_elegans /home/sebastian/galaxy/tool-data/vep/105/ce11/default
+caenorhabditis_elegans_vep_104_WBcel235 ce11 104 default Caenorhabditis elegans ce11 (V104) caenorhabditis_elegans /home/sebastian/galaxy/tool-data/vep/104/ce11/default
+drosophila_melanogaster_vep_105_BDGP6.32 dm6 105 default Drosophila melanogaster dm6 (V105) drosophila_melanogaster /home/sebastian/galaxy/tool-data/vep/105/dm6/default
diff -r 000000000000 -r 1439dface5bf tool-data/dbkeys.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/dbkeys.loc.sample Wed May 11 13:03:06 2022 +0000
@@ -0,0 +1,1 @@
+#
\ No newline at end of file
diff -r 000000000000 -r 1439dface5bf tool-data/vep_versioned_annotation_cache.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/vep_versioned_annotation_cache.loc.sample Wed May 11 13:03:06 2022 +0000
@@ -0,0 +1,11 @@
+#This file describes vep cache data and its metadata available on the server.
+#The data table has the format (white space characters are TAB characters):
+#
+#
+#
+#So, vep_versioned_annotation_cache.loc tables could look like this:
+#
+#homo_sapiens_vep_105_GRCh38 hg38 105 default Homo sapiens hg38 (V105) homo_sapiens /path/to/vep_versioned_annotation_cache/105/hg38/default
+#homo_sapiens_refseq_vep_105_GRCh38 hg38 105 refseq Homo sapiens hg38 (V105, Refseq) homo_sapiens /path/to/vep_versioned_annotation_cache/105/hg38/refseq
+#homo_sapiens_merged_vep_105_GRCh38 hg38 105 merged Homo sapiens hg38 (V105, Merged) homo_sapiens /path/to/vep_versioned_annotation_cache/105/hg38/merged
+#
\ No newline at end of file
diff -r 000000000000 -r 1439dface5bf tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Wed May 11 13:03:06 2022 +0000
@@ -0,0 +1,12 @@
+
+
+
+ value, dbkey, version, cachetype, name, species, path
+
+
+
+
+ value, name, len_path
+
+
+
\ No newline at end of file
diff -r 000000000000 -r 1439dface5bf tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Wed May 11 13:03:06 2022 +0000
@@ -0,0 +1,12 @@
+
+
+
+ value, dbkey, version, cachetype, name, species, path
+
+
+
+
+ value, name, len_path
+
+
+
\ No newline at end of file