comparison data_source.py @ 0:cc18c3bf2666 draft default tip

planemo upload for repository https://github.com/AquaINFRA/tools-ecology/tree/aquainfra_importer commit 2b586af4c987b6105356736f875d2517b25a8be6
author ecology
date Tue, 14 May 2024 20:10:29 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:cc18c3bf2666
1 #!/usr/bin/env python
2 # Retrieves data from external data source applications and
3 # stores in a dataset file.
4 #
5 # Data source application parameters are temporarily stored
6 # in the dataset file.
7 import json
8 import os
9 import sys
10 from urllib.parse import urlencode, urlparse
11 from urllib.request import urlopen
12
13 from galaxy.datatypes import sniff
14 from galaxy.datatypes.registry import Registry
15 from galaxy.util import (
16 DEFAULT_SOCKET_TIMEOUT,
17 get_charset_from_http_headers,
18 stream_to_open_named_file,
19 )
20
21 GALAXY_PARAM_PREFIX = "GALAXY"
22 GALAXY_ROOT_DIR = os.path.realpath(
23 os.path.join(os.path.dirname(__file__), os.pardir, os.pardir)
24 )
25 GALAXY_DATATYPES_CONF_FILE = os.path.join(
26 GALAXY_ROOT_DIR, "datatypes_conf.xml"
27 )
28
29
30 def main():
31 if len(sys.argv) >= 3:
32 max_file_size = int(sys.argv[2])
33 else:
34 max_file_size = 0
35
36 with open(sys.argv[1]) as fh:
37 params = json.load(fh)
38
39 out_data_name = params["output_data"][0]["out_data_name"]
40
41 URL = params["param_dict"].get("URL", None)
42 URL_method = params["param_dict"].get("URL_method", "get")
43
44 datatypes_registry = Registry()
45 datatypes_registry.load_datatypes(
46 root_dir=params["job_config"]["GALAXY_ROOT_DIR"],
47 config=params["job_config"]["GALAXY_DATATYPES_CONF_FILE"],
48 )
49
50 for data_dict in params["output_data"]:
51 cur_filename = data_dict["file_name"]
52 cur_URL = params["param_dict"].get(
53 "%s|%s|URL" % (GALAXY_PARAM_PREFIX,
54 data_dict["out_data_name"]), URL
55 )
56 if not cur_URL or urlparse(cur_URL).scheme not in ("http", "https",
57 "ftp"):
58 open(cur_filename, "w").write("")
59 sys.exit(
60 "The remote data source application has not sent "
61 "back a URL parameter in the request."
62 )
63
64 try:
65 if URL_method == "get":
66 page = urlopen(cur_URL, timeout=DEFAULT_SOCKET_TIMEOUT)
67 elif URL_method == "post":
68 param_dict = params["param_dict"]
69 page = urlopen(
70 cur_URL,
71 urlencode(param_dict["incoming_request_params"]).encode(
72 "utf-8"
73 ),
74 timeout=DEFAULT_SOCKET_TIMEOUT,
75 )
76 except Exception as e:
77 sys.exit(
78 "The remote data source application may "
79 "be off line, please try again later. Error: %s"
80 % str(e)
81 )
82 if max_file_size:
83 file_size = int(page.info().get("Content-Length", 0))
84 if file_size > max_file_size:
85 sys.exit(
86 "The requested data size (%d bytes) exceeds the maximum"
87 "allowed size (%d bytes) on this server."
88 % (file_size, max_file_size)
89 )
90 try:
91 cur_filename = stream_to_open_named_file(
92 page,
93 os.open(
94 cur_filename,
95 os.O_WRONLY | os.O_TRUNC | os.O_CREAT
96 ),
97 cur_filename,
98 source_encoding=get_charset_from_http_headers(page.headers),
99 )
100 except Exception as e:
101 sys.exit("Unable to fetch %s:\n%s" % (cur_URL, e))
102
103 try:
104 ext = sniff.handle_uploaded_dataset_file(
105 cur_filename, datatypes_registry, ext=data_dict["ext"]
106 )
107 except Exception as e:
108 sys.exit(str(e))
109
110 tool_provided_metadata = {out_data_name: {"ext": ext}}
111
112 with open(
113 params["job_config"]["TOOL_PROVIDED_JOB_METADATA_FILE"], "w"
114 ) as json_file:
115 json.dump(tool_provided_metadata, json_file)
116
117
118 if __name__ == "__main__":
119 main()