Mercurial > repos > genouest > baric_archive_toulouse
comparison baric_archive.py @ 0:1ae6b80f1e03 draft default tip
"planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/baric_archive commit 6419e960f00c0e1c3950bad500487d3681797b40"
author | genouest |
---|---|
date | Fri, 04 Mar 2022 11:16:34 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1ae6b80f1e03 |
---|---|
1 #!/usr/bin/env python | |
2 # Retrieves data from external data source applications and stores in a dataset file. | |
3 # Data source application parameters are temporarily stored in the dataset file. | |
4 import os | |
5 import socket | |
6 import sys | |
7 import urllib | |
8 from json import dumps, loads | |
9 from urllib.parse import urlparse | |
10 from urllib.request import urlopen | |
11 | |
12 from galaxy.datatypes import sniff | |
13 from galaxy.datatypes.registry import Registry | |
14 from galaxy.jobs import TOOL_PROVIDED_JOB_METADATA_FILE | |
15 from galaxy.util import get_charset_from_http_headers | |
16 | |
17 GALAXY_PARAM_PREFIX = 'GALAXY' | |
18 GALAXY_ROOT_DIR = os.path.realpath(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir)) | |
19 GALAXY_DATATYPES_CONF_FILE = os.path.join(GALAXY_ROOT_DIR, 'datatypes_conf.xml') | |
20 | |
21 | |
22 def stop_err(msg, json_file=None): | |
23 sys.stderr.write(msg) | |
24 # Need to write valid (but empty) json to avoid metadata collection failure | |
25 # leading to "unable to finish job" error with no logs | |
26 if json_file is not None: | |
27 json_file.write("%s\n" % dumps({})) | |
28 sys.exit(1) | |
29 | |
30 | |
31 def load_input_parameters(filename, erase_file=True): | |
32 datasource_params = {} | |
33 try: | |
34 json_params = loads(open(filename, 'r').read()) | |
35 datasource_params = json_params.get('param_dict') | |
36 except Exception: | |
37 json_params = None | |
38 for line in open(filename, 'r'): | |
39 try: | |
40 line = line.strip() | |
41 fields = line.split('\t') | |
42 datasource_params[fields[0]] = fields[1] | |
43 except Exception: | |
44 continue | |
45 if erase_file: | |
46 open(filename, 'w').close() # open file for writing, then close, removes params from file | |
47 return json_params, datasource_params | |
48 | |
49 | |
50 def __main__(): | |
51 filename = sys.argv[1] | |
52 | |
53 user_email = sys.argv[2] | |
54 user_id = sys.argv[3] | |
55 | |
56 try: | |
57 max_file_size = int(sys.argv[4]) | |
58 except Exception: | |
59 max_file_size = 0 | |
60 | |
61 job_params, params = load_input_parameters(filename) | |
62 if job_params is None: # using an older tabular file | |
63 enhanced_handling = False | |
64 job_params = dict(param_dict=params) | |
65 job_params['output_data'] = [dict(out_data_name='output', | |
66 ext='auto', | |
67 file_name=filename, | |
68 extra_files_path=None)] | |
69 job_params['job_config'] = dict(GALAXY_ROOT_DIR=GALAXY_ROOT_DIR, GALAXY_DATATYPES_CONF_FILE=GALAXY_DATATYPES_CONF_FILE, TOOL_PROVIDED_JOB_METADATA_FILE=TOOL_PROVIDED_JOB_METADATA_FILE) | |
70 else: | |
71 enhanced_handling = True | |
72 json_file = open(job_params['job_config']['TOOL_PROVIDED_JOB_METADATA_FILE'], 'w') # specially named file for output junk to pass onto set metadata | |
73 | |
74 datatypes_registry = Registry() | |
75 datatypes_registry.load_datatypes(root_dir=job_params['job_config']['GALAXY_ROOT_DIR'], config=job_params['job_config']['GALAXY_DATATYPES_CONF_FILE']) | |
76 | |
77 URL = params.get('URL', None) # using exactly URL indicates that only one dataset is being downloaded | |
78 export = params.get('export', None) | |
79 userkey = params.get('userkey', 'none') | |
80 URL_method = params.get('URL_method', None) | |
81 | |
82 URL = URL + "&userkey=" + userkey + "&user_email=" + user_email + "&user_id=" + user_id | |
83 | |
84 # The Python support for fetching resources from the web is layered. urllib uses the httplib | |
85 # library, which in turn uses the socket library. As of Python 2.3 you can specify how long | |
86 # a socket should wait for a response before timing out. By default the socket module has no | |
87 # timeout and can hang. Currently, the socket timeout is not exposed at the httplib or urllib2 | |
88 # levels. However, you can set the default timeout ( in seconds ) globally for all sockets by | |
89 # doing the following. | |
90 socket.setdefaulttimeout(600) | |
91 | |
92 for data_dict in job_params['output_data']: | |
93 cur_filename = data_dict.get('file_name', filename) | |
94 cur_URL = params.get('%s|%s|URL' % (GALAXY_PARAM_PREFIX, data_dict['out_data_name']), URL) | |
95 if not cur_URL or urlparse(cur_URL).scheme not in ('http', 'https', 'ftp'): | |
96 open(cur_filename, 'w').write("") | |
97 stop_err('The remote data source application has not sent back a URL parameter in the request.', json_file) | |
98 | |
99 # The following calls to urlopen() will use the above default timeout | |
100 try: | |
101 if not URL_method or URL_method == 'get': | |
102 page = urlopen(cur_URL) | |
103 elif URL_method == 'post': | |
104 page = urlopen(cur_URL, urllib.parse.urlencode(params).encode("utf-8")) | |
105 except Exception as e: | |
106 stop_err('The remote data source application may be off line, please try again later. Error: %s' % str(e), json_file) | |
107 if max_file_size: | |
108 file_size = int(page.info().get('Content-Length', 0)) | |
109 if file_size > max_file_size: | |
110 stop_err('The size of the data (%d bytes) you have requested exceeds the maximum allowed (%d bytes) on this server.' % (file_size, max_file_size), json_file) | |
111 # handle files available locally | |
112 if export: | |
113 try: | |
114 local_file = export + page.read() | |
115 os.remove(cur_filename) | |
116 os.symlink(local_file, cur_filename) | |
117 except Exception as e: | |
118 stop_err('Unable to symlink %s to %s:\n%s' % (local_file, cur_filename, e), json_file) | |
119 else: | |
120 try: | |
121 cur_filename = sniff.stream_to_open_named_file(page, os.open(cur_filename, os.O_WRONLY | os.O_CREAT), cur_filename, source_encoding=get_charset_from_http_headers(page.headers)) | |
122 except Exception as e: | |
123 stop_err('Unable to fetch %s:\n%s' % (cur_URL, e), json_file) | |
124 | |
125 # here import checks that upload tool performs | |
126 if enhanced_handling: | |
127 try: | |
128 ext = sniff.handle_uploaded_dataset_file(filename, datatypes_registry, ext=data_dict['ext']) | |
129 except Exception as e: | |
130 stop_err(str(e), json_file) | |
131 info = dict(type='dataset', | |
132 dataset_id=data_dict['dataset_id'], | |
133 ext=ext) | |
134 | |
135 json_file.write("%s\n" % dumps(info)) | |
136 | |
137 | |
138 if __name__ == "__main__": | |
139 __main__() |