comparison data_source.py @ 0:fe2937ae3ee2 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/mave_tools/mavedb/ commit 13791ad3a67f107e7c5cfd925a2cbc0fb5656ab3
author bgruening
date Wed, 13 Dec 2023 16:04:48 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:fe2937ae3ee2
1 #!/usr/bin/env python
2 # Retrieves data from external data source applications and stores in a dataset file.
3 # Data source application parameters are temporarily stored in the dataset file.
4 import os
5 import sys
6 from json import (
7 dumps,
8 loads,
9 )
10 from urllib.parse import (
11 urlencode,
12 urlparse,
13 )
14 from urllib.request import urlopen
15
16 from galaxy.datatypes import sniff
17 from galaxy.datatypes.registry import Registry
18 from galaxy.jobs import TOOL_PROVIDED_JOB_METADATA_FILE
19 from galaxy.util import (
20 DEFAULT_SOCKET_TIMEOUT,
21 get_charset_from_http_headers,
22 stream_to_open_named_file,
23 )
24
25 GALAXY_PARAM_PREFIX = "GALAXY"
26 GALAXY_ROOT_DIR = os.path.realpath(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir))
27 GALAXY_DATATYPES_CONF_FILE = os.path.join(GALAXY_ROOT_DIR, "datatypes_conf.xml")
28
29
30 def stop_err(msg):
31 sys.stderr.write(msg)
32 sys.exit()
33
34
35 def load_input_parameters(filename, erase_file=True):
36 datasource_params = {}
37 try:
38 json_params = loads(open(filename).read())
39 datasource_params = json_params.get("param_dict")
40 except Exception:
41 json_params = None
42 for line in open(filename):
43 try:
44 line = line.strip()
45 fields = line.split("\t")
46 datasource_params[fields[0]] = fields[1]
47 except Exception:
48 continue
49 if erase_file:
50 open(filename, "w").close() # open file for writing, then close, removes params from file
51 return json_params, datasource_params
52
53
54 def __main__():
55 filename = sys.argv[1]
56 try:
57 max_file_size = int(sys.argv[2])
58 except Exception:
59 max_file_size = 0
60
61 job_params, params = load_input_parameters(filename)
62 if job_params is None: # using an older tabular file
63 enhanced_handling = False
64 job_params = dict(param_dict=params)
65 job_params["output_data"] = [
66 dict(out_data_name="output", ext="data", file_name=filename, extra_files_path=None)
67 ]
68 job_params["job_config"] = dict(
69 GALAXY_ROOT_DIR=GALAXY_ROOT_DIR,
70 GALAXY_DATATYPES_CONF_FILE=GALAXY_DATATYPES_CONF_FILE,
71 TOOL_PROVIDED_JOB_METADATA_FILE=TOOL_PROVIDED_JOB_METADATA_FILE,
72 )
73 else:
74 enhanced_handling = True
75 json_file = open(
76 job_params["job_config"]["TOOL_PROVIDED_JOB_METADATA_FILE"], "w"
77 ) # specially named file for output junk to pass onto set metadata
78
79 datatypes_registry = Registry()
80 datatypes_registry.load_datatypes(
81 root_dir=job_params["job_config"]["GALAXY_ROOT_DIR"],
82 config=job_params["job_config"]["GALAXY_DATATYPES_CONF_FILE"],
83 )
84
85 URL = params.get("URL", None) # using exactly URL indicates that only one dataset is being downloaded
86 URL_method = params.get("URL_method", None)
87
88 for data_dict in job_params["output_data"]:
89 cur_filename = data_dict.get("file_name", filename)
90 cur_URL = params.get("%s|%s|URL" % (GALAXY_PARAM_PREFIX, data_dict["out_data_name"]), URL)
91 if not cur_URL or urlparse(cur_URL).scheme not in ("http", "https", "ftp"):
92 open(cur_filename, "w").write("")
93 stop_err("The remote data source application has not sent back a URL parameter in the request.")
94
95 # The following calls to urlopen() will use the above default timeout
96 try:
97 if not URL_method or URL_method == "get":
98 page = urlopen(cur_URL, timeout=DEFAULT_SOCKET_TIMEOUT)
99 elif URL_method == "post":
100 page = urlopen(cur_URL, urlencode(params).encode("utf-8"), timeout=DEFAULT_SOCKET_TIMEOUT)
101 except Exception as e:
102 stop_err("The remote data source application may be off line, please try again later. Error: %s" % str(e))
103 if max_file_size:
104 file_size = int(page.info().get("Content-Length", 0))
105 if file_size > max_file_size:
106 stop_err(
107 "The size of the data (%d bytes) you have requested exceeds the maximum allowed (%d bytes) on this server."
108 % (file_size, max_file_size)
109 )
110 try:
111 cur_filename = stream_to_open_named_file(
112 page,
113 os.open(cur_filename, os.O_WRONLY | os.O_CREAT),
114 cur_filename,
115 source_encoding=get_charset_from_http_headers(page.headers),
116 )
117 except Exception as e:
118 stop_err("Unable to fetch %s:\n%s" % (cur_URL, e))
119
120 # here import checks that upload tool performs
121 if enhanced_handling:
122 try:
123 ext = sniff.handle_uploaded_dataset_file(filename, datatypes_registry, ext=data_dict["ext"])
124 except Exception as e:
125 stop_err(str(e))
126 info = dict(type="dataset", dataset_id=data_dict["dataset_id"], ext=ext)
127
128 json_file.write("%s\n" % dumps(info))
129
130
131 if __name__ == "__main__":
132 __main__()