Mercurial > repos > iuc > data_manager_plant_tribes_scaffolds_downloader
annotate data_manager/data_manager_plant_tribes_scaffolds_download.py @ 4:93253aebaf2e draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_plant_tribes_scaffolds_downloader commit 6085b7d47fcb47ad1587ac2542abdef782f61fe4"
author | iuc |
---|---|
date | Fri, 17 Jul 2020 04:19:35 -0400 |
parents | 5833ef61c1f8 |
children | 1550b1741780 |
rev | line source |
---|---|
0 | 1 #!/usr/bin/env python |
2 # | |
3 # Data manager for downloading Plant Tribes scaffolds data. | |
4 import argparse | |
5 import json | |
6 import os | |
7 import shutil | |
8 import sys | |
9 import tarfile | |
10 import zipfile | |
4
93253aebaf2e
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_plant_tribes_scaffolds_downloader commit 6085b7d47fcb47ad1587ac2542abdef782f61fe4"
iuc
parents:
3
diff
changeset
|
11 from urllib.request import Request, urlopen |
0 | 12 |
13 DEFAULT_DATA_TABLE_NAMES = ["plant_tribes_scaffolds"] | |
14 | |
15 | |
16 def add_data_table_entry(data_manager_dict, data_table_name, data_table_entry): | |
17 data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {}) | |
18 data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get(data_table_name, []) | |
19 data_manager_dict['data_tables'][data_table_name].append(data_table_entry) | |
20 return data_manager_dict | |
21 | |
22 | |
23 def make_directory(dir): | |
24 if not os.path.exists(dir): | |
25 os.makedirs(dir) | |
26 | |
27 | |
28 def remove_directory(dir): | |
29 if os.path.exists(dir): | |
30 shutil.rmtree(dir) | |
31 | |
32 | |
1 | 33 def extract_archive(file_path, work_directory): |
34 if tarfile.is_tarfile(file_path): | |
35 fh = tarfile.open(file_path, 'r:*') | |
36 elif zipfile.is_zipfile(file_path): | |
37 fh = zipfile.ZipFile(file_path, 'r') | |
38 else: | |
39 return | |
40 fh.extractall(work_directory) | |
41 | |
42 | |
43 def move_files(source_directory, target_directory): | |
44 # Move the files into defined output directory. | |
45 for filename in os.listdir(source_directory): | |
46 shutil.move(os.path.join(source_directory, filename), target_directory) | |
47 | |
48 | |
49 def url_download(url, work_directory): | |
0 | 50 file_path = os.path.join(work_directory, os.path.basename(url)) |
51 src = None | |
52 dst = None | |
53 try: | |
4
93253aebaf2e
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_plant_tribes_scaffolds_downloader commit 6085b7d47fcb47ad1587ac2542abdef782f61fe4"
iuc
parents:
3
diff
changeset
|
54 req = Request(url) |
93253aebaf2e
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_plant_tribes_scaffolds_downloader commit 6085b7d47fcb47ad1587ac2542abdef782f61fe4"
iuc
parents:
3
diff
changeset
|
55 src = urlopen(req) |
93253aebaf2e
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_plant_tribes_scaffolds_downloader commit 6085b7d47fcb47ad1587ac2542abdef782f61fe4"
iuc
parents:
3
diff
changeset
|
56 with open(file_path, 'wb') as dst: |
93253aebaf2e
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_plant_tribes_scaffolds_downloader commit 6085b7d47fcb47ad1587ac2542abdef782f61fe4"
iuc
parents:
3
diff
changeset
|
57 while True: |
93253aebaf2e
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_plant_tribes_scaffolds_downloader commit 6085b7d47fcb47ad1587ac2542abdef782f61fe4"
iuc
parents:
3
diff
changeset
|
58 chunk = src.read(2**10) |
93253aebaf2e
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_plant_tribes_scaffolds_downloader commit 6085b7d47fcb47ad1587ac2542abdef782f61fe4"
iuc
parents:
3
diff
changeset
|
59 if chunk: |
93253aebaf2e
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_plant_tribes_scaffolds_downloader commit 6085b7d47fcb47ad1587ac2542abdef782f61fe4"
iuc
parents:
3
diff
changeset
|
60 dst.write(chunk) |
93253aebaf2e
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_plant_tribes_scaffolds_downloader commit 6085b7d47fcb47ad1587ac2542abdef782f61fe4"
iuc
parents:
3
diff
changeset
|
61 else: |
93253aebaf2e
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_plant_tribes_scaffolds_downloader commit 6085b7d47fcb47ad1587ac2542abdef782f61fe4"
iuc
parents:
3
diff
changeset
|
62 break |
3
5833ef61c1f8
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_plant_tribes_scaffolds_downloader commit 3b27dae566cb21e08f5915ae20e0727f7d694707
iuc
parents:
1
diff
changeset
|
63 except Exception as e: |
4
93253aebaf2e
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_plant_tribes_scaffolds_downloader commit 6085b7d47fcb47ad1587ac2542abdef782f61fe4"
iuc
parents:
3
diff
changeset
|
64 sys.exit(str(e)) |
0 | 65 finally: |
66 if src: | |
67 src.close() | |
1 | 68 return file_path |
69 | |
70 | |
71 def download(target_directory, web_url, config_web_url, description, data_table_names=DEFAULT_DATA_TABLE_NAMES): | |
72 data_manager_dict = {} | |
73 data_table_entry = {} | |
74 # Download the scaffolds data. | |
75 work_directory = os.path.abspath(os.path.join(os.getcwd(), 'scaffolds')) | |
76 make_directory(work_directory) | |
77 file_path = url_download(web_url, work_directory) | |
78 extract_archive(file_path, work_directory) | |
0 | 79 os.remove(file_path) |
1 | 80 # Move the scaffolds data files into the defined output directory. |
81 move_files(work_directory, target_directory) | |
0 | 82 remove_directory(work_directory) |
1 | 83 # Populate the data_manager_dict with the scaffolds data entry. |
0 | 84 for file_path in os.listdir(target_directory): |
85 full_path = os.path.abspath(os.path.join(target_directory, file_path)) | |
86 entry_name = "%s" % os.path.basename(file_path) | |
1 | 87 data_table_entry['value'] = entry_name |
88 data_table_entry['name'] = entry_name | |
89 data_table_entry['path'] = full_path | |
90 data_table_entry['description'] = description | |
91 # Populate the data_manager_dict. | |
0 | 92 for data_table_name in data_table_names: |
93 data_manager_dict = add_data_table_entry(data_manager_dict, data_table_name, data_table_entry) | |
1 | 94 # Download the default configuration files. |
95 work_directory = os.path.abspath(os.path.join(os.getcwd(), 'configs')) | |
96 make_directory(work_directory) | |
97 file_path = url_download(config_web_url, work_directory) | |
98 extract_archive(file_path, work_directory) | |
99 os.remove(file_path) | |
100 # Move the default configuration files into the defined output directory. | |
101 source_configs_directory = os.path.join(work_directory, entry_name) | |
102 target_configs_directory = os.path.join(target_directory, entry_name) | |
103 move_files(source_configs_directory, target_configs_directory) | |
104 remove_directory(work_directory) | |
0 | 105 return data_manager_dict |
106 | |
107 | |
108 parser = argparse.ArgumentParser() | |
109 parser.add_argument('--description', dest='description', default=None, help='Description') | |
110 parser.add_argument('--name', dest='name', help='Data table entry unique ID') | |
111 parser.add_argument('--out_file', dest='out_file', help='JSON output file') | |
1 | 112 parser.add_argument('--web_url', dest='web_url', help='URL for downloading scaffolds') |
113 parser.add_argument('--config_web_url', dest='config_web_url', help='URL for downloading default configs') | |
0 | 114 |
115 args = parser.parse_args() | |
116 | |
4
93253aebaf2e
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_plant_tribes_scaffolds_downloader commit 6085b7d47fcb47ad1587ac2542abdef782f61fe4"
iuc
parents:
3
diff
changeset
|
117 with open(args.out_file) as fh: |
93253aebaf2e
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_plant_tribes_scaffolds_downloader commit 6085b7d47fcb47ad1587ac2542abdef782f61fe4"
iuc
parents:
3
diff
changeset
|
118 params = json.loads(fh.read()) |
0 | 119 target_directory = params['output_data'][0]['extra_files_path'] |
120 make_directory(target_directory) | |
121 | |
122 if args.description is None: | |
123 description = '' | |
124 else: | |
125 description = args.description.strip() | |
126 | |
127 # Get the scaffolds data. | |
1 | 128 data_manager_dict = download(target_directory, args.web_url, args.config_web_url, description) |
0 | 129 # Write the JSON output dataset. |
4
93253aebaf2e
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_plant_tribes_scaffolds_downloader commit 6085b7d47fcb47ad1587ac2542abdef782f61fe4"
iuc
parents:
3
diff
changeset
|
130 with open(args.out_file, 'w') as fh: |
93253aebaf2e
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_plant_tribes_scaffolds_downloader commit 6085b7d47fcb47ad1587ac2542abdef782f61fe4"
iuc
parents:
3
diff
changeset
|
131 fh.write(json.dumps(data_manager_dict, sort_keys=True)) |