Mercurial > repos > ecology > aquainfra_importer
comparison data_source.py @ 0:cc18c3bf2666 draft default tip
planemo upload for repository https://github.com/AquaINFRA/tools-ecology/tree/aquainfra_importer commit 2b586af4c987b6105356736f875d2517b25a8be6
| author | ecology |
|---|---|
| date | Tue, 14 May 2024 20:10:29 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:cc18c3bf2666 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 # Retrieves data from external data source applications and | |
| 3 # stores in a dataset file. | |
| 4 # | |
| 5 # Data source application parameters are temporarily stored | |
| 6 # in the dataset file. | |
| 7 import json | |
| 8 import os | |
| 9 import sys | |
| 10 from urllib.parse import urlencode, urlparse | |
| 11 from urllib.request import urlopen | |
| 12 | |
| 13 from galaxy.datatypes import sniff | |
| 14 from galaxy.datatypes.registry import Registry | |
| 15 from galaxy.util import ( | |
| 16 DEFAULT_SOCKET_TIMEOUT, | |
| 17 get_charset_from_http_headers, | |
| 18 stream_to_open_named_file, | |
| 19 ) | |
| 20 | |
| 21 GALAXY_PARAM_PREFIX = "GALAXY" | |
| 22 GALAXY_ROOT_DIR = os.path.realpath( | |
| 23 os.path.join(os.path.dirname(__file__), os.pardir, os.pardir) | |
| 24 ) | |
| 25 GALAXY_DATATYPES_CONF_FILE = os.path.join( | |
| 26 GALAXY_ROOT_DIR, "datatypes_conf.xml" | |
| 27 ) | |
| 28 | |
| 29 | |
| 30 def main(): | |
| 31 if len(sys.argv) >= 3: | |
| 32 max_file_size = int(sys.argv[2]) | |
| 33 else: | |
| 34 max_file_size = 0 | |
| 35 | |
| 36 with open(sys.argv[1]) as fh: | |
| 37 params = json.load(fh) | |
| 38 | |
| 39 out_data_name = params["output_data"][0]["out_data_name"] | |
| 40 | |
| 41 URL = params["param_dict"].get("URL", None) | |
| 42 URL_method = params["param_dict"].get("URL_method", "get") | |
| 43 | |
| 44 datatypes_registry = Registry() | |
| 45 datatypes_registry.load_datatypes( | |
| 46 root_dir=params["job_config"]["GALAXY_ROOT_DIR"], | |
| 47 config=params["job_config"]["GALAXY_DATATYPES_CONF_FILE"], | |
| 48 ) | |
| 49 | |
| 50 for data_dict in params["output_data"]: | |
| 51 cur_filename = data_dict["file_name"] | |
| 52 cur_URL = params["param_dict"].get( | |
| 53 "%s|%s|URL" % (GALAXY_PARAM_PREFIX, | |
| 54 data_dict["out_data_name"]), URL | |
| 55 ) | |
| 56 if not cur_URL or urlparse(cur_URL).scheme not in ("http", "https", | |
| 57 "ftp"): | |
| 58 open(cur_filename, "w").write("") | |
| 59 sys.exit( | |
| 60 "The remote data source application has not sent " | |
| 61 "back a URL parameter in the request." | |
| 62 ) | |
| 63 | |
| 64 try: | |
| 65 if URL_method == "get": | |
| 66 page = urlopen(cur_URL, timeout=DEFAULT_SOCKET_TIMEOUT) | |
| 67 elif URL_method == "post": | |
| 68 param_dict = params["param_dict"] | |
| 69 page = urlopen( | |
| 70 cur_URL, | |
| 71 urlencode(param_dict["incoming_request_params"]).encode( | |
| 72 "utf-8" | |
| 73 ), | |
| 74 timeout=DEFAULT_SOCKET_TIMEOUT, | |
| 75 ) | |
| 76 except Exception as e: | |
| 77 sys.exit( | |
| 78 "The remote data source application may " | |
| 79 "be off line, please try again later. Error: %s" | |
| 80 % str(e) | |
| 81 ) | |
| 82 if max_file_size: | |
| 83 file_size = int(page.info().get("Content-Length", 0)) | |
| 84 if file_size > max_file_size: | |
| 85 sys.exit( | |
| 86 "The requested data size (%d bytes) exceeds the maximum" | |
| 87 "allowed size (%d bytes) on this server." | |
| 88 % (file_size, max_file_size) | |
| 89 ) | |
| 90 try: | |
| 91 cur_filename = stream_to_open_named_file( | |
| 92 page, | |
| 93 os.open( | |
| 94 cur_filename, | |
| 95 os.O_WRONLY | os.O_TRUNC | os.O_CREAT | |
| 96 ), | |
| 97 cur_filename, | |
| 98 source_encoding=get_charset_from_http_headers(page.headers), | |
| 99 ) | |
| 100 except Exception as e: | |
| 101 sys.exit("Unable to fetch %s:\n%s" % (cur_URL, e)) | |
| 102 | |
| 103 try: | |
| 104 ext = sniff.handle_uploaded_dataset_file( | |
| 105 cur_filename, datatypes_registry, ext=data_dict["ext"] | |
| 106 ) | |
| 107 except Exception as e: | |
| 108 sys.exit(str(e)) | |
| 109 | |
| 110 tool_provided_metadata = {out_data_name: {"ext": ext}} | |
| 111 | |
| 112 with open( | |
| 113 params["job_config"]["TOOL_PROVIDED_JOB_METADATA_FILE"], "w" | |
| 114 ) as json_file: | |
| 115 json.dump(tool_provided_metadata, json_file) | |
| 116 | |
| 117 | |
| 118 if __name__ == "__main__": | |
| 119 main() |
