Mercurial > repos > bgruening > mavedb_importer
comparison data_source.py @ 0:fe2937ae3ee2 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/mave_tools/mavedb/ commit 13791ad3a67f107e7c5cfd925a2cbc0fb5656ab3
| author | bgruening |
|---|---|
| date | Wed, 13 Dec 2023 16:04:48 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:fe2937ae3ee2 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 # Retrieves data from external data source applications and stores in a dataset file. | |
| 3 # Data source application parameters are temporarily stored in the dataset file. | |
| 4 import os | |
| 5 import sys | |
| 6 from json import ( | |
| 7 dumps, | |
| 8 loads, | |
| 9 ) | |
| 10 from urllib.parse import ( | |
| 11 urlencode, | |
| 12 urlparse, | |
| 13 ) | |
| 14 from urllib.request import urlopen | |
| 15 | |
| 16 from galaxy.datatypes import sniff | |
| 17 from galaxy.datatypes.registry import Registry | |
| 18 from galaxy.jobs import TOOL_PROVIDED_JOB_METADATA_FILE | |
| 19 from galaxy.util import ( | |
| 20 DEFAULT_SOCKET_TIMEOUT, | |
| 21 get_charset_from_http_headers, | |
| 22 stream_to_open_named_file, | |
| 23 ) | |
| 24 | |
| 25 GALAXY_PARAM_PREFIX = "GALAXY" | |
| 26 GALAXY_ROOT_DIR = os.path.realpath(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir)) | |
| 27 GALAXY_DATATYPES_CONF_FILE = os.path.join(GALAXY_ROOT_DIR, "datatypes_conf.xml") | |
| 28 | |
| 29 | |
| 30 def stop_err(msg): | |
| 31 sys.stderr.write(msg) | |
| 32 sys.exit() | |
| 33 | |
| 34 | |
| 35 def load_input_parameters(filename, erase_file=True): | |
| 36 datasource_params = {} | |
| 37 try: | |
| 38 json_params = loads(open(filename).read()) | |
| 39 datasource_params = json_params.get("param_dict") | |
| 40 except Exception: | |
| 41 json_params = None | |
| 42 for line in open(filename): | |
| 43 try: | |
| 44 line = line.strip() | |
| 45 fields = line.split("\t") | |
| 46 datasource_params[fields[0]] = fields[1] | |
| 47 except Exception: | |
| 48 continue | |
| 49 if erase_file: | |
| 50 open(filename, "w").close() # open file for writing, then close, removes params from file | |
| 51 return json_params, datasource_params | |
| 52 | |
| 53 | |
| 54 def __main__(): | |
| 55 filename = sys.argv[1] | |
| 56 try: | |
| 57 max_file_size = int(sys.argv[2]) | |
| 58 except Exception: | |
| 59 max_file_size = 0 | |
| 60 | |
| 61 job_params, params = load_input_parameters(filename) | |
| 62 if job_params is None: # using an older tabular file | |
| 63 enhanced_handling = False | |
| 64 job_params = dict(param_dict=params) | |
| 65 job_params["output_data"] = [ | |
| 66 dict(out_data_name="output", ext="data", file_name=filename, extra_files_path=None) | |
| 67 ] | |
| 68 job_params["job_config"] = dict( | |
| 69 GALAXY_ROOT_DIR=GALAXY_ROOT_DIR, | |
| 70 GALAXY_DATATYPES_CONF_FILE=GALAXY_DATATYPES_CONF_FILE, | |
| 71 TOOL_PROVIDED_JOB_METADATA_FILE=TOOL_PROVIDED_JOB_METADATA_FILE, | |
| 72 ) | |
| 73 else: | |
| 74 enhanced_handling = True | |
| 75 json_file = open( | |
| 76 job_params["job_config"]["TOOL_PROVIDED_JOB_METADATA_FILE"], "w" | |
| 77 ) # specially named file for output junk to pass onto set metadata | |
| 78 | |
| 79 datatypes_registry = Registry() | |
| 80 datatypes_registry.load_datatypes( | |
| 81 root_dir=job_params["job_config"]["GALAXY_ROOT_DIR"], | |
| 82 config=job_params["job_config"]["GALAXY_DATATYPES_CONF_FILE"], | |
| 83 ) | |
| 84 | |
| 85 URL = params.get("URL", None) # using exactly URL indicates that only one dataset is being downloaded | |
| 86 URL_method = params.get("URL_method", None) | |
| 87 | |
| 88 for data_dict in job_params["output_data"]: | |
| 89 cur_filename = data_dict.get("file_name", filename) | |
| 90 cur_URL = params.get("%s|%s|URL" % (GALAXY_PARAM_PREFIX, data_dict["out_data_name"]), URL) | |
| 91 if not cur_URL or urlparse(cur_URL).scheme not in ("http", "https", "ftp"): | |
| 92 open(cur_filename, "w").write("") | |
| 93 stop_err("The remote data source application has not sent back a URL parameter in the request.") | |
| 94 | |
| 95 # The following calls to urlopen() will use the above default timeout | |
| 96 try: | |
| 97 if not URL_method or URL_method == "get": | |
| 98 page = urlopen(cur_URL, timeout=DEFAULT_SOCKET_TIMEOUT) | |
| 99 elif URL_method == "post": | |
| 100 page = urlopen(cur_URL, urlencode(params).encode("utf-8"), timeout=DEFAULT_SOCKET_TIMEOUT) | |
| 101 except Exception as e: | |
| 102 stop_err("The remote data source application may be off line, please try again later. Error: %s" % str(e)) | |
| 103 if max_file_size: | |
| 104 file_size = int(page.info().get("Content-Length", 0)) | |
| 105 if file_size > max_file_size: | |
| 106 stop_err( | |
| 107 "The size of the data (%d bytes) you have requested exceeds the maximum allowed (%d bytes) on this server." | |
| 108 % (file_size, max_file_size) | |
| 109 ) | |
| 110 try: | |
| 111 cur_filename = stream_to_open_named_file( | |
| 112 page, | |
| 113 os.open(cur_filename, os.O_WRONLY | os.O_CREAT), | |
| 114 cur_filename, | |
| 115 source_encoding=get_charset_from_http_headers(page.headers), | |
| 116 ) | |
| 117 except Exception as e: | |
| 118 stop_err("Unable to fetch %s:\n%s" % (cur_URL, e)) | |
| 119 | |
| 120 # here import checks that upload tool performs | |
| 121 if enhanced_handling: | |
| 122 try: | |
| 123 ext = sniff.handle_uploaded_dataset_file(filename, datatypes_registry, ext=data_dict["ext"]) | |
| 124 except Exception as e: | |
| 125 stop_err(str(e)) | |
| 126 info = dict(type="dataset", dataset_id=data_dict["dataset_id"], ext=ext) | |
| 127 | |
| 128 json_file.write("%s\n" % dumps(info)) | |
| 129 | |
| 130 | |
| 131 if __name__ == "__main__": | |
| 132 __main__() |
