Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/galaxy/util/__init__.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4f3585e2f14b |
---|---|
1 """ | |
2 Utility functions used systemwide. | |
3 | |
4 """ | |
5 | |
6 import binascii | |
7 import collections | |
8 import errno | |
9 import importlib | |
10 import json | |
11 import os | |
12 import random | |
13 import re | |
14 import shutil | |
15 import smtplib | |
16 import stat | |
17 import string | |
18 import sys | |
19 import tempfile | |
20 import textwrap | |
21 import threading | |
22 import time | |
23 import unicodedata | |
24 import xml.dom.minidom | |
25 from datetime import datetime | |
26 from email.mime.multipart import MIMEMultipart | |
27 from email.mime.text import MIMEText | |
28 from functools import partial | |
29 from hashlib import md5 | |
30 from os.path import relpath | |
31 from urllib.parse import ( | |
32 urlencode, | |
33 urlparse, | |
34 urlsplit, | |
35 urlunsplit, | |
36 ) | |
37 | |
38 import requests | |
39 try: | |
40 import grp | |
41 except ImportError: | |
42 # For Pulsar on Windows (which does not use the function that uses grp) | |
43 grp = None # type: ignore | |
44 from boltons.iterutils import ( | |
45 default_enter, | |
46 remap, | |
47 ) | |
48 LXML_AVAILABLE = True | |
49 try: | |
50 from lxml import etree | |
51 except ImportError: | |
52 LXML_AVAILABLE = False | |
53 import xml.etree.ElementTree as etree # type: ignore | |
54 from requests.adapters import HTTPAdapter | |
55 from requests.packages.urllib3.util.retry import Retry | |
56 | |
57 try: | |
58 import docutils.core as docutils_core | |
59 import docutils.writers.html4css1 as docutils_html4css1 | |
60 except ImportError: | |
61 docutils_core = None | |
62 docutils_html4css1 = None | |
63 | |
64 try: | |
65 import uwsgi | |
66 except ImportError: | |
67 uwsgi = None | |
68 | |
69 from .custom_logging import get_logger | |
70 from .inflection import Inflector | |
71 from .path import safe_contains, safe_makedirs, safe_relpath # noqa: F401 | |
72 | |
73 inflector = Inflector() | |
74 | |
75 log = get_logger(__name__) | |
76 _lock = threading.RLock() | |
77 | |
78 namedtuple = collections.namedtuple | |
79 | |
80 CHUNK_SIZE = 65536 # 64k | |
81 | |
82 DATABASE_MAX_STRING_SIZE = 32768 | |
83 DATABASE_MAX_STRING_SIZE_PRETTY = '32K' | |
84 | |
85 gzip_magic = b'\x1f\x8b' | |
86 bz2_magic = b'BZh' | |
87 DEFAULT_ENCODING = os.environ.get('GALAXY_DEFAULT_ENCODING', 'utf-8') | |
88 NULL_CHAR = b'\x00' | |
89 BINARY_CHARS = [NULL_CHAR] | |
90 FILENAME_VALID_CHARS = '.,^_-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' | |
91 | |
92 RW_R__R__ = stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH | |
93 RWXR_XR_X = stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH | |
94 RWXRWXRWX = stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO | |
95 | |
96 XML = etree.XML | |
97 | |
98 defaultdict = collections.defaultdict | |
99 | |
100 | |
101 def remove_protocol_from_url(url): | |
102 """ Supplied URL may be null, if not ensure http:// or https:// | |
103 etc... is stripped off. | |
104 """ | |
105 if url is None: | |
106 return url | |
107 | |
108 # We have a URL | |
109 if url.find('://') > 0: | |
110 new_url = url.split('://')[1] | |
111 else: | |
112 new_url = url | |
113 return new_url.rstrip('/') | |
114 | |
115 | |
116 def is_binary(value): | |
117 """ | |
118 File is binary if it contains a null-byte by default (e.g. behavior of grep, etc.). | |
119 This may fail for utf-16 files, but so would ASCII encoding. | |
120 >>> is_binary( string.printable ) | |
121 False | |
122 >>> is_binary( b'\\xce\\x94' ) | |
123 False | |
124 >>> is_binary( b'\\x00' ) | |
125 True | |
126 """ | |
127 value = smart_str(value) | |
128 for binary_char in BINARY_CHARS: | |
129 if binary_char in value: | |
130 return True | |
131 return False | |
132 | |
133 | |
134 def is_uuid(value): | |
135 """ | |
136 This method returns True if value is a UUID, otherwise False. | |
137 >>> is_uuid( "123e4567-e89b-12d3-a456-426655440000" ) | |
138 True | |
139 >>> is_uuid( "0x3242340298902834" ) | |
140 False | |
141 """ | |
142 uuid_re = re.compile("[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}") | |
143 if re.match(uuid_re, str(value)): | |
144 return True | |
145 else: | |
146 return False | |
147 | |
148 | |
149 def directory_hash_id(id): | |
150 """ | |
151 | |
152 >>> directory_hash_id( 100 ) | |
153 ['000'] | |
154 >>> directory_hash_id( "90000" ) | |
155 ['090'] | |
156 >>> directory_hash_id("777777777") | |
157 ['000', '777', '777'] | |
158 >>> directory_hash_id("135ee48a-4f51-470c-ae2f-ce8bd78799e6") | |
159 ['1', '3', '5'] | |
160 """ | |
161 s = str(id) | |
162 l = len(s) | |
163 # Shortcut -- ids 0-999 go under ../000/ | |
164 if l < 4: | |
165 return ["000"] | |
166 if not is_uuid(s): | |
167 # Pad with zeros until a multiple of three | |
168 padded = ((3 - len(s) % 3) * "0") + s | |
169 # Drop the last three digits -- 1000 files per directory | |
170 padded = padded[:-3] | |
171 # Break into chunks of three | |
172 return [padded[i * 3:(i + 1) * 3] for i in range(len(padded) // 3)] | |
173 else: | |
174 # assume it is a UUID | |
175 return list(iter(s[0:3])) | |
176 | |
177 | |
178 def get_charset_from_http_headers(headers, default=None): | |
179 rval = headers.get('content-type', None) | |
180 if rval and 'charset=' in rval: | |
181 rval = rval.split('charset=')[-1].split(';')[0].strip() | |
182 if rval: | |
183 return rval | |
184 return default | |
185 | |
186 | |
187 def synchronized(func): | |
188 """This wrapper will serialize access to 'func' to a single thread. Use it as a decorator.""" | |
189 def caller(*params, **kparams): | |
190 _lock.acquire(True) # Wait | |
191 try: | |
192 return func(*params, **kparams) | |
193 finally: | |
194 _lock.release() | |
195 return caller | |
196 | |
197 | |
198 def iter_start_of_line(fh, chunk_size=None): | |
199 """ | |
200 Iterate over fh and call readline(chunk_size) | |
201 """ | |
202 yield from iter(partial(fh.readline, chunk_size), "") | |
203 | |
204 | |
205 def file_reader(fp, chunk_size=CHUNK_SIZE): | |
206 """This generator yields the open fileobject in chunks (default 64k). Closes the file at the end""" | |
207 while 1: | |
208 data = fp.read(chunk_size) | |
209 if not data: | |
210 break | |
211 yield data | |
212 fp.close() | |
213 | |
214 | |
215 def unique_id(KEY_SIZE=128): | |
216 """ | |
217 Generates an unique id | |
218 | |
219 >>> ids = [ unique_id() for i in range(1000) ] | |
220 >>> len(set(ids)) | |
221 1000 | |
222 """ | |
223 random_bits = str(random.getrandbits(KEY_SIZE)).encode("UTF-8") | |
224 return md5(random_bits).hexdigest() | |
225 | |
226 | |
227 def parse_xml(fname, strip_whitespace=True, remove_comments=True): | |
228 """Returns a parsed xml tree""" | |
229 parser = None | |
230 if remove_comments and LXML_AVAILABLE: | |
231 # If using stdlib etree comments are always removed, | |
232 # but lxml doesn't do this by default | |
233 parser = etree.XMLParser(remove_comments=remove_comments) | |
234 try: | |
235 tree = etree.parse(fname, parser=parser) | |
236 root = tree.getroot() | |
237 if strip_whitespace: | |
238 for elem in root.iter('*'): | |
239 if elem.text is not None: | |
240 elem.text = elem.text.strip() | |
241 if elem.tail is not None: | |
242 elem.tail = elem.tail.strip() | |
243 except OSError as e: | |
244 if e.errno is None and not os.path.exists(fname): | |
245 # lxml doesn't set errno | |
246 e.errno = errno.ENOENT | |
247 raise | |
248 except etree.ParseError: | |
249 log.exception("Error parsing file %s", fname) | |
250 raise | |
251 return tree | |
252 | |
253 | |
254 def parse_xml_string(xml_string, strip_whitespace=True): | |
255 try: | |
256 tree = etree.fromstring(xml_string) | |
257 except ValueError as e: | |
258 if 'strings with encoding declaration are not supported' in unicodify(e): | |
259 tree = etree.fromstring(xml_string.encode('utf-8')) | |
260 else: | |
261 raise e | |
262 if strip_whitespace: | |
263 for elem in tree.iter('*'): | |
264 if elem.text is not None: | |
265 elem.text = elem.text.strip() | |
266 if elem.tail is not None: | |
267 elem.tail = elem.tail.strip() | |
268 return tree | |
269 | |
270 | |
271 def xml_to_string(elem, pretty=False): | |
272 """ | |
273 Returns a string from an xml tree. | |
274 """ | |
275 try: | |
276 if elem is not None: | |
277 xml_str = etree.tostring(elem, encoding='unicode') | |
278 else: | |
279 xml_str = '' | |
280 except TypeError as e: | |
281 # we assume this is a comment | |
282 if hasattr(elem, 'text'): | |
283 return "<!-- %s -->\n" % elem.text | |
284 else: | |
285 raise e | |
286 if xml_str and pretty: | |
287 pretty_string = xml.dom.minidom.parseString(xml_str).toprettyxml(indent=' ') | |
288 return "\n".join(line for line in pretty_string.split('\n') if not re.match(r'^[\s\\nb\']*$', line)) | |
289 return xml_str | |
290 | |
291 | |
292 def xml_element_compare(elem1, elem2): | |
293 if not isinstance(elem1, dict): | |
294 elem1 = xml_element_to_dict(elem1) | |
295 if not isinstance(elem2, dict): | |
296 elem2 = xml_element_to_dict(elem2) | |
297 return elem1 == elem2 | |
298 | |
299 | |
300 def xml_element_list_compare(elem_list1, elem_list2): | |
301 return [xml_element_to_dict(elem) for elem in elem_list1] == [xml_element_to_dict(elem) for elem in elem_list2] | |
302 | |
303 | |
304 def xml_element_to_dict(elem): | |
305 rval = {} | |
306 if elem.attrib: | |
307 rval[elem.tag] = {} | |
308 else: | |
309 rval[elem.tag] = None | |
310 | |
311 sub_elems = list(elem) | |
312 if sub_elems: | |
313 sub_elem_dict = dict() | |
314 for sub_sub_elem_dict in map(xml_element_to_dict, sub_elems): | |
315 for key, value in sub_sub_elem_dict.items(): | |
316 if key not in sub_elem_dict: | |
317 sub_elem_dict[key] = [] | |
318 sub_elem_dict[key].append(value) | |
319 for key, value in sub_elem_dict.items(): | |
320 if len(value) == 1: | |
321 rval[elem.tag][key] = value[0] | |
322 else: | |
323 rval[elem.tag][key] = value | |
324 if elem.attrib: | |
325 for key, value in elem.attrib.items(): | |
326 rval[elem.tag]["@%s" % key] = value | |
327 | |
328 if elem.text: | |
329 text = elem.text.strip() | |
330 if text and sub_elems or elem.attrib: | |
331 rval[elem.tag]['#text'] = text | |
332 else: | |
333 rval[elem.tag] = text | |
334 | |
335 return rval | |
336 | |
337 | |
338 def pretty_print_xml(elem, level=0): | |
339 pad = ' ' | |
340 i = "\n" + level * pad | |
341 if len(elem): | |
342 if not elem.text or not elem.text.strip(): | |
343 elem.text = i + pad + pad | |
344 if not elem.tail or not elem.tail.strip(): | |
345 elem.tail = i | |
346 for e in elem: | |
347 pretty_print_xml(e, level + 1) | |
348 if not elem.tail or not elem.tail.strip(): | |
349 elem.tail = i | |
350 else: | |
351 if level and (not elem.tail or not elem.tail.strip()): | |
352 elem.tail = i + pad | |
353 return elem | |
354 | |
355 | |
356 def get_file_size(value, default=None): | |
357 try: | |
358 # try built-in | |
359 return os.path.getsize(value) | |
360 except Exception: | |
361 try: | |
362 # try built-in one name attribute | |
363 return os.path.getsize(value.name) | |
364 except Exception: | |
365 try: | |
366 # try tell() of end of object | |
367 offset = value.tell() | |
368 value.seek(0, 2) | |
369 rval = value.tell() | |
370 value.seek(offset) | |
371 return rval | |
372 except Exception: | |
373 # return default value | |
374 return default | |
375 | |
376 | |
377 def shrink_stream_by_size(value, size, join_by=b"..", left_larger=True, beginning_on_size_error=False, end_on_size_error=False): | |
378 """ | |
379 Shrinks bytes read from `value` to `size`. | |
380 | |
381 `value` needs to implement tell/seek, so files need to be opened in binary mode. | |
382 Returns unicode text with invalid characters replaced. | |
383 """ | |
384 rval = b'' | |
385 join_by = smart_str(join_by) | |
386 if get_file_size(value) > size: | |
387 start = value.tell() | |
388 len_join_by = len(join_by) | |
389 min_size = len_join_by + 2 | |
390 if size < min_size: | |
391 if beginning_on_size_error: | |
392 rval = value.read(size) | |
393 value.seek(start) | |
394 return rval | |
395 elif end_on_size_error: | |
396 value.seek(-size, 2) | |
397 rval = value.read(size) | |
398 value.seek(start) | |
399 return rval | |
400 raise ValueError('With the provided join_by value (%s), the minimum size value is %i.' % (join_by, min_size)) | |
401 left_index = right_index = int((size - len_join_by) / 2) | |
402 if left_index + right_index + len_join_by < size: | |
403 if left_larger: | |
404 left_index += 1 | |
405 else: | |
406 right_index += 1 | |
407 rval = value.read(left_index) + join_by | |
408 value.seek(-right_index, 2) | |
409 rval += value.read(right_index) | |
410 else: | |
411 while True: | |
412 data = value.read(CHUNK_SIZE) | |
413 if not data: | |
414 break | |
415 rval += data | |
416 return unicodify(rval) | |
417 | |
418 | |
419 def shrink_and_unicodify(stream): | |
420 stream = unicodify(stream, strip_null=True) or '' | |
421 if (len(stream) > DATABASE_MAX_STRING_SIZE): | |
422 stream = shrink_string_by_size(stream, | |
423 DATABASE_MAX_STRING_SIZE, | |
424 join_by="\n..\n", | |
425 left_larger=True, | |
426 beginning_on_size_error=True) | |
427 return stream | |
428 | |
429 | |
430 def shrink_string_by_size(value, size, join_by="..", left_larger=True, beginning_on_size_error=False, end_on_size_error=False): | |
431 if len(value) > size: | |
432 len_join_by = len(join_by) | |
433 min_size = len_join_by + 2 | |
434 if size < min_size: | |
435 if beginning_on_size_error: | |
436 return value[:size] | |
437 elif end_on_size_error: | |
438 return value[-size:] | |
439 raise ValueError('With the provided join_by value (%s), the minimum size value is %i.' % (join_by, min_size)) | |
440 left_index = right_index = int((size - len_join_by) / 2) | |
441 if left_index + right_index + len_join_by < size: | |
442 if left_larger: | |
443 left_index += 1 | |
444 else: | |
445 right_index += 1 | |
446 value = "{}{}{}".format(value[:left_index], join_by, value[-right_index:]) | |
447 return value | |
448 | |
449 | |
450 def pretty_print_time_interval(time=False, precise=False, utc=False): | |
451 """ | |
452 Get a datetime object or a int() Epoch timestamp and return a | |
453 pretty string like 'an hour ago', 'Yesterday', '3 months ago', | |
454 'just now', etc | |
455 credit: http://stackoverflow.com/questions/1551382/user-friendly-time-format-in-python | |
456 """ | |
457 if utc: | |
458 now = datetime.utcnow() | |
459 else: | |
460 now = datetime.now() | |
461 if type(time) is int: | |
462 diff = now - datetime.fromtimestamp(time) | |
463 elif isinstance(time, datetime): | |
464 diff = now - time | |
465 elif isinstance(time, str): | |
466 try: | |
467 time = datetime.strptime(time, "%Y-%m-%dT%H:%M:%S.%f") | |
468 except ValueError: | |
469 # MySQL may not support microseconds precision | |
470 time = datetime.strptime(time, "%Y-%m-%dT%H:%M:%S") | |
471 diff = now - time | |
472 else: | |
473 diff = now - now | |
474 second_diff = diff.seconds | |
475 day_diff = diff.days | |
476 | |
477 if day_diff < 0: | |
478 return '' | |
479 | |
480 if precise: | |
481 if day_diff == 0: | |
482 if second_diff < 10: | |
483 return "just now" | |
484 if second_diff < 60: | |
485 return str(second_diff) + " seconds ago" | |
486 if second_diff < 120: | |
487 return "a minute ago" | |
488 if second_diff < 3600: | |
489 return str(second_diff / 60) + " minutes ago" | |
490 if second_diff < 7200: | |
491 return "an hour ago" | |
492 if second_diff < 86400: | |
493 return str(second_diff / 3600) + " hours ago" | |
494 if day_diff == 1: | |
495 return "yesterday" | |
496 if day_diff < 7: | |
497 return str(day_diff) + " days ago" | |
498 if day_diff < 31: | |
499 return str(day_diff / 7) + " weeks ago" | |
500 if day_diff < 365: | |
501 return str(day_diff / 30) + " months ago" | |
502 return str(day_diff / 365) + " years ago" | |
503 else: | |
504 if day_diff == 0: | |
505 return "today" | |
506 if day_diff == 1: | |
507 return "yesterday" | |
508 if day_diff < 7: | |
509 return "less than a week" | |
510 if day_diff < 31: | |
511 return "less than a month" | |
512 if day_diff < 365: | |
513 return "less than a year" | |
514 return "a few years ago" | |
515 | |
516 | |
517 def pretty_print_json(json_data, is_json_string=False): | |
518 if is_json_string: | |
519 json_data = json.loads(json_data) | |
520 return json.dumps(json_data, sort_keys=True, indent=4) | |
521 | |
522 | |
523 # characters that are valid | |
524 valid_chars = set(string.ascii_letters + string.digits + " -=_.()/+*^,:?!") | |
525 | |
526 # characters that are allowed but need to be escaped | |
527 mapped_chars = {'>': '__gt__', | |
528 '<': '__lt__', | |
529 "'": '__sq__', | |
530 '"': '__dq__', | |
531 '[': '__ob__', | |
532 ']': '__cb__', | |
533 '{': '__oc__', | |
534 '}': '__cc__', | |
535 '@': '__at__', | |
536 '\n': '__cn__', | |
537 '\r': '__cr__', | |
538 '\t': '__tc__', | |
539 '#': '__pd__'} | |
540 | |
541 | |
542 def restore_text(text, character_map=mapped_chars): | |
543 """Restores sanitized text""" | |
544 if not text: | |
545 return text | |
546 for key, value in character_map.items(): | |
547 text = text.replace(value, key) | |
548 return text | |
549 | |
550 | |
551 def sanitize_text(text, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X'): | |
552 """ | |
553 Restricts the characters that are allowed in text; accepts both strings | |
554 and lists of strings; non-string entities will be cast to strings. | |
555 """ | |
556 if isinstance(text, list): | |
557 return [sanitize_text(x, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character) for x in text] | |
558 if not isinstance(text, str): | |
559 text = smart_str(text) | |
560 return _sanitize_text_helper(text, valid_characters=valid_characters, character_map=character_map) | |
561 | |
562 | |
563 def _sanitize_text_helper(text, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X'): | |
564 """Restricts the characters that are allowed in a string""" | |
565 | |
566 out = [] | |
567 for c in text: | |
568 if c in valid_characters: | |
569 out.append(c) | |
570 elif c in character_map: | |
571 out.append(character_map[c]) | |
572 else: | |
573 out.append(invalid_character) # makes debugging easier | |
574 return ''.join(out) | |
575 | |
576 | |
577 def sanitize_lists_to_string(values, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X'): | |
578 if isinstance(values, list): | |
579 rval = [] | |
580 for value in values: | |
581 rval.append(sanitize_lists_to_string(value, | |
582 valid_characters=valid_characters, | |
583 character_map=character_map, | |
584 invalid_character=invalid_character)) | |
585 values = ",".join(rval) | |
586 else: | |
587 values = sanitize_text(values, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character) | |
588 return values | |
589 | |
590 | |
591 def sanitize_param(value, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X'): | |
592 """Clean incoming parameters (strings or lists)""" | |
593 if isinstance(value, str): | |
594 return sanitize_text(value, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character) | |
595 elif isinstance(value, list): | |
596 return [sanitize_text(x, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character) for x in value] | |
597 else: | |
598 raise Exception('Unknown parameter type (%s)' % (type(value))) | |
599 | |
600 | |
601 valid_filename_chars = set(string.ascii_letters + string.digits + '_.') | |
602 invalid_filenames = ['', '.', '..'] | |
603 | |
604 | |
605 def sanitize_for_filename(text, default=None): | |
606 """ | |
607 Restricts the characters that are allowed in a filename portion; Returns default value or a unique id string if result is not a valid name. | |
608 Method is overly aggressive to minimize possible complications, but a maximum length is not considered. | |
609 """ | |
610 out = [] | |
611 for c in text: | |
612 if c in valid_filename_chars: | |
613 out.append(c) | |
614 else: | |
615 out.append('_') | |
616 out = ''.join(out) | |
617 if out in invalid_filenames: | |
618 if default is None: | |
619 return sanitize_for_filename(str(unique_id())) | |
620 return default | |
621 return out | |
622 | |
623 | |
624 def find_instance_nested(item, instances, match_key=None): | |
625 """ | |
626 Recursively find instances from lists, dicts, tuples. | |
627 | |
628 `instances` should be a tuple of valid instances | |
629 If match_key is given the key must match for an instance to be added to the list of found instances. | |
630 """ | |
631 | |
632 matches = [] | |
633 | |
634 def visit(path, key, value): | |
635 if isinstance(value, instances): | |
636 if match_key is None or match_key == key: | |
637 matches.append(value) | |
638 return key, value | |
639 | |
640 def enter(path, key, value): | |
641 if isinstance(value, instances): | |
642 return None, False | |
643 return default_enter(path, key, value) | |
644 | |
645 remap(item, visit, reraise_visit=False, enter=enter) | |
646 | |
647 return matches | |
648 | |
649 | |
650 def mask_password_from_url(url): | |
651 """ | |
652 Masks out passwords from connection urls like the database connection in galaxy.ini | |
653 | |
654 >>> mask_password_from_url( 'sqlite+postgresql://user:password@localhost/' ) | |
655 'sqlite+postgresql://user:********@localhost/' | |
656 >>> mask_password_from_url( 'amqp://user:amqp@localhost' ) | |
657 'amqp://user:********@localhost' | |
658 >>> mask_password_from_url( 'amqp://localhost') | |
659 'amqp://localhost' | |
660 """ | |
661 split = urlsplit(url) | |
662 if split.password: | |
663 if url.count(split.password) == 1: | |
664 url = url.replace(split.password, "********") | |
665 else: | |
666 # This can manipulate the input other than just masking password, | |
667 # so the previous string replace method is preferred when the | |
668 # password doesn't appear twice in the url | |
669 split = split._replace(netloc=split.netloc.replace(f"{split.username}:{split.password}", '%s:********' % split.username)) | |
670 url = urlunsplit(split) | |
671 return url | |
672 | |
673 | |
674 def ready_name_for_url(raw_name): | |
675 """ General method to convert a string (i.e. object name) to a URL-ready | |
676 slug. | |
677 | |
678 >>> ready_name_for_url( "My Cool Object" ) | |
679 'My-Cool-Object' | |
680 >>> ready_name_for_url( "!My Cool Object!" ) | |
681 'My-Cool-Object' | |
682 >>> ready_name_for_url( "Hello₩◎ґʟⅾ" ) | |
683 'Hello' | |
684 """ | |
685 | |
686 # Replace whitespace with '-' | |
687 slug_base = re.sub(r"\s+", "-", raw_name) | |
688 # Remove all non-alphanumeric characters. | |
689 slug_base = re.sub(r"[^a-zA-Z0-9\-]", "", slug_base) | |
690 # Remove trailing '-'. | |
691 if slug_base.endswith('-'): | |
692 slug_base = slug_base[:-1] | |
693 return slug_base | |
694 | |
695 | |
696 def which(file): | |
697 # http://stackoverflow.com/questions/5226958/which-equivalent-function-in-python | |
698 for path in os.environ["PATH"].split(":"): | |
699 if os.path.exists(path + "/" + file): | |
700 return path + "/" + file | |
701 | |
702 return None | |
703 | |
704 | |
705 def in_directory(file, directory, local_path_module=os.path): | |
706 """ | |
707 Return true, if the common prefix of both is equal to directory | |
708 e.g. /a/b/c/d.rst and directory is /a/b, the common prefix is /a/b. | |
709 This function isn't used exclusively for security checks, but if it is | |
710 used for such checks it is assumed that ``directory`` is a "trusted" path - | |
711 supplied by Galaxy or by the admin and ``file`` is something generated by | |
712 a tool, configuration, external web server, or user supplied input. | |
713 | |
714 local_path_module is used by Pulsar to check Windows paths while running on | |
715 a POSIX-like system. | |
716 | |
717 >>> base_dir = tempfile.mkdtemp() | |
718 >>> safe_dir = os.path.join(base_dir, "user") | |
719 >>> os.mkdir(safe_dir) | |
720 >>> good_file = os.path.join(safe_dir, "1") | |
721 >>> with open(good_file, "w") as f: _ = f.write("hello") | |
722 >>> in_directory(good_file, safe_dir) | |
723 True | |
724 >>> in_directory("/other/file/is/here.txt", safe_dir) | |
725 False | |
726 >>> unsafe_link = os.path.join(safe_dir, "2") | |
727 >>> os.symlink("/other/file/bad.fasta", unsafe_link) | |
728 >>> in_directory(unsafe_link, safe_dir) | |
729 False | |
730 """ | |
731 if local_path_module != os.path: | |
732 _safe_contains = importlib.import_module('galaxy.util.path.%s' % local_path_module.__name__).safe_contains | |
733 else: | |
734 directory = os.path.realpath(directory) | |
735 _safe_contains = safe_contains | |
736 return _safe_contains(directory, file) | |
737 | |
738 | |
739 def merge_sorted_iterables(operator, *iterables): | |
740 """ | |
741 | |
742 >>> operator = lambda x: x | |
743 >>> list( merge_sorted_iterables( operator, [1,2,3], [4,5] ) ) | |
744 [1, 2, 3, 4, 5] | |
745 >>> list( merge_sorted_iterables( operator, [4, 5], [1,2,3] ) ) | |
746 [1, 2, 3, 4, 5] | |
747 >>> list( merge_sorted_iterables( operator, [1, 4, 5], [2], [3] ) ) | |
748 [1, 2, 3, 4, 5] | |
749 """ | |
750 first_iterable = iterables[0] | |
751 if len(iterables) == 1: | |
752 yield from first_iterable | |
753 else: | |
754 yield from __merge_two_sorted_iterables( | |
755 operator, | |
756 iter(first_iterable), | |
757 merge_sorted_iterables(operator, *iterables[1:]) | |
758 ) | |
759 | |
760 | |
761 def __merge_two_sorted_iterables(operator, iterable1, iterable2): | |
762 unset = object() | |
763 continue_merge = True | |
764 next_1 = unset | |
765 next_2 = unset | |
766 while continue_merge: | |
767 try: | |
768 if next_1 is unset: | |
769 next_1 = next(iterable1) | |
770 if next_2 is unset: | |
771 next_2 = next(iterable2) | |
772 if operator(next_2) < operator(next_1): | |
773 yield next_2 | |
774 next_2 = unset | |
775 else: | |
776 yield next_1 | |
777 next_1 = unset | |
778 except StopIteration: | |
779 continue_merge = False | |
780 if next_1 is not unset: | |
781 yield next_1 | |
782 if next_2 is not unset: | |
783 yield next_2 | |
784 yield from iterable1 | |
785 yield from iterable2 | |
786 | |
787 | |
788 class Params: | |
789 """ | |
790 Stores and 'sanitizes' parameters. Alphanumeric characters and the | |
791 non-alphanumeric ones that are deemed safe are let to pass through (see L{valid_chars}). | |
792 Some non-safe characters are escaped to safe forms for example C{>} becomes C{__lt__} | |
793 (see L{mapped_chars}). All other characters are replaced with C{X}. | |
794 | |
795 Operates on string or list values only (HTTP parameters). | |
796 | |
797 >>> values = { 'status':'on', 'symbols':[ 'alpha', '<>', '$rm&#!' ] } | |
798 >>> par = Params(values) | |
799 >>> par.status | |
800 'on' | |
801 >>> par.value == None # missing attributes return None | |
802 True | |
803 >>> par.get('price', 0) | |
804 0 | |
805 >>> par.symbols # replaces unknown symbols with X | |
806 ['alpha', '__lt____gt__', 'XrmX__pd__!'] | |
807 >>> sorted(par.flatten()) # flattening to a list | |
808 [('status', 'on'), ('symbols', 'XrmX__pd__!'), ('symbols', '__lt____gt__'), ('symbols', 'alpha')] | |
809 """ | |
810 | |
811 # is NEVER_SANITIZE required now that sanitizing for tool parameters can be controlled on a per parameter basis and occurs via InputValueWrappers? | |
812 NEVER_SANITIZE = ['file_data', 'url_paste', 'URL', 'filesystem_paths'] | |
813 | |
814 def __init__(self, params, sanitize=True): | |
815 if sanitize: | |
816 for key, value in params.items(): | |
817 # sanitize check both ungrouped and grouped parameters by | |
818 # name. Anything relying on NEVER_SANITIZE should be | |
819 # changed to not require this and NEVER_SANITIZE should be | |
820 # removed. | |
821 if (value is not None and key not in self.NEVER_SANITIZE | |
822 and True not in [key.endswith("|%s" % nonsanitize_parameter) for | |
823 nonsanitize_parameter in self.NEVER_SANITIZE]): | |
824 self.__dict__[key] = sanitize_param(value) | |
825 else: | |
826 self.__dict__[key] = value | |
827 else: | |
828 self.__dict__.update(params) | |
829 | |
830 def flatten(self): | |
831 """ | |
832 Creates a tuple list from a dict with a tuple/value pair for every value that is a list | |
833 """ | |
834 flat = [] | |
835 for key, value in self.__dict__.items(): | |
836 if isinstance(value, list): | |
837 for v in value: | |
838 flat.append((key, v)) | |
839 else: | |
840 flat.append((key, value)) | |
841 return flat | |
842 | |
843 def __getattr__(self, name): | |
844 """This is here to ensure that we get None for non existing parameters""" | |
845 return None | |
846 | |
847 def get(self, key, default): | |
848 return self.__dict__.get(key, default) | |
849 | |
850 def __str__(self): | |
851 return '%s' % self.__dict__ | |
852 | |
853 def __len__(self): | |
854 return len(self.__dict__) | |
855 | |
856 def __iter__(self): | |
857 return iter(self.__dict__) | |
858 | |
859 def update(self, values): | |
860 self.__dict__.update(values) | |
861 | |
862 | |
863 def rst_to_html(s, error=False): | |
864 """Convert a blob of reStructuredText to HTML""" | |
865 log = get_logger("docutils") | |
866 | |
867 if docutils_core is None: | |
868 raise Exception("Attempted to use rst_to_html but docutils unavailable.") | |
869 | |
870 class FakeStream: | |
871 def write(self, str): | |
872 if len(str) > 0 and not str.isspace(): | |
873 if error: | |
874 raise Exception(str) | |
875 log.warning(str) | |
876 | |
877 settings_overrides = { | |
878 "embed_stylesheet": False, | |
879 "template": os.path.join(os.path.dirname(__file__), "docutils_template.txt"), | |
880 "warning_stream": FakeStream(), | |
881 "doctitle_xform": False, # without option, very different rendering depending on | |
882 # number of sections in help content. | |
883 } | |
884 | |
885 return unicodify(docutils_core.publish_string( | |
886 s, writer=docutils_html4css1.Writer(), | |
887 settings_overrides=settings_overrides)) | |
888 | |
889 | |
890 def xml_text(root, name=None): | |
891 """Returns the text inside an element""" | |
892 if name is not None: | |
893 # Try attribute first | |
894 val = root.get(name) | |
895 if val: | |
896 return val | |
897 # Then try as element | |
898 elem = root.find(name) | |
899 else: | |
900 elem = root | |
901 if elem is not None and elem.text: | |
902 text = ''.join(elem.text.splitlines()) | |
903 return text.strip() | |
904 # No luck, return empty string | |
905 return '' | |
906 | |
907 | |
908 def parse_resource_parameters(resource_param_file): | |
909 """Code shared between jobs and workflows for reading resource parameter configuration files. | |
910 | |
911 TODO: Allow YAML in addition to XML. | |
912 """ | |
913 resource_parameters = {} | |
914 if os.path.exists(resource_param_file): | |
915 resource_definitions = parse_xml(resource_param_file) | |
916 resource_definitions_root = resource_definitions.getroot() | |
917 for parameter_elem in resource_definitions_root.findall("param"): | |
918 name = parameter_elem.get("name") | |
919 resource_parameters[name] = parameter_elem | |
920 | |
921 return resource_parameters | |
922 | |
923 | |
924 # asbool implementation pulled from PasteDeploy | |
925 truthy = frozenset({'true', 'yes', 'on', 'y', 't', '1'}) | |
926 falsy = frozenset({'false', 'no', 'off', 'n', 'f', '0'}) | |
927 | |
928 | |
929 def asbool(obj): | |
930 if isinstance(obj, str): | |
931 obj = obj.strip().lower() | |
932 if obj in truthy: | |
933 return True | |
934 elif obj in falsy: | |
935 return False | |
936 else: | |
937 raise ValueError("String is not true/false: %r" % obj) | |
938 return bool(obj) | |
939 | |
940 | |
941 def string_as_bool(string): | |
942 if str(string).lower() in ('true', 'yes', 'on', '1'): | |
943 return True | |
944 else: | |
945 return False | |
946 | |
947 | |
948 def string_as_bool_or_none(string): | |
949 """ | |
950 Returns True, None or False based on the argument: | |
951 True if passed True, 'True', 'Yes', or 'On' | |
952 None if passed None or 'None' | |
953 False otherwise | |
954 | |
955 Note: string comparison is case-insensitive so lowecase versions of those | |
956 function equivalently. | |
957 """ | |
958 string = str(string).lower() | |
959 if string in ('true', 'yes', 'on'): | |
960 return True | |
961 elif string in ['none', 'null']: | |
962 return None | |
963 else: | |
964 return False | |
965 | |
966 | |
967 def listify(item, do_strip=False): | |
968 """ | |
969 Make a single item a single item list. | |
970 | |
971 If *item* is a string, it is split on comma (``,``) characters to produce the list. Optionally, if *do_strip* is | |
972 true, any extra whitespace around the split items is stripped. | |
973 | |
974 If *item* is a list it is returned unchanged. If *item* is a tuple, it is converted to a list and returned. If | |
975 *item* evaluates to False, an empty list is returned. | |
976 | |
977 :type item: object | |
978 :param item: object to make a list from | |
979 :type do_strip: bool | |
980 :param do_strip: strip whitespaces from around split items, if set to ``True`` | |
981 :rtype: list | |
982 :returns: The input as a list | |
983 """ | |
984 if not item: | |
985 return [] | |
986 elif isinstance(item, list): | |
987 return item | |
988 elif isinstance(item, tuple): | |
989 return list(item) | |
990 elif isinstance(item, str) and item.count(','): | |
991 if do_strip: | |
992 return [token.strip() for token in item.split(',')] | |
993 else: | |
994 return item.split(',') | |
995 else: | |
996 return [item] | |
997 | |
998 | |
999 def commaify(amount): | |
1000 orig = amount | |
1001 new = re.sub(r"^(-?\d+)(\d{3})", r'\g<1>,\g<2>', amount) | |
1002 if orig == new: | |
1003 return new | |
1004 else: | |
1005 return commaify(new) | |
1006 | |
1007 | |
1008 def roundify(amount, sfs=2): | |
1009 """ | |
1010 Take a number in string form and truncate to 'sfs' significant figures. | |
1011 """ | |
1012 if len(amount) <= sfs: | |
1013 return amount | |
1014 else: | |
1015 return amount[0:sfs] + '0' * (len(amount) - sfs) | |
1016 | |
1017 | |
1018 def unicodify(value, encoding=DEFAULT_ENCODING, error='replace', strip_null=False, log_exception=True): | |
1019 """ | |
1020 Returns a Unicode string or None. | |
1021 | |
1022 >>> assert unicodify(None) is None | |
1023 >>> assert unicodify('simple string') == 'simple string' | |
1024 >>> assert unicodify(3) == '3' | |
1025 >>> assert unicodify(bytearray([115, 116, 114, 196, 169, 195, 177, 103])) == 'strĩñg' | |
1026 >>> assert unicodify(Exception('strĩñg')) == 'strĩñg' | |
1027 >>> assert unicodify('cómplǐcḁtëd strĩñg') == 'cómplǐcḁtëd strĩñg' | |
1028 >>> s = 'cómplǐcḁtëd strĩñg'; assert unicodify(s) == s | |
1029 >>> s = 'lâtín strìñg'; assert unicodify(s.encode('latin-1'), 'latin-1') == s | |
1030 >>> s = 'lâtín strìñg'; assert unicodify(s.encode('latin-1')) == 'l\ufffdt\ufffdn str\ufffd\ufffdg' | |
1031 >>> s = 'lâtín strìñg'; assert unicodify(s.encode('latin-1'), error='ignore') == 'ltn strg' | |
1032 """ | |
1033 if value is None: | |
1034 return value | |
1035 try: | |
1036 if isinstance(value, bytearray): | |
1037 value = bytes(value) | |
1038 elif not isinstance(value, (str, bytes)): | |
1039 value = str(value) | |
1040 # Now value is an instance of bytes or str | |
1041 if not isinstance(value, str): | |
1042 value = str(value, encoding, error) | |
1043 except Exception as e: | |
1044 if log_exception: | |
1045 msg = "Value '{}' could not be coerced to Unicode: {}('{}')".format(repr(value), type(e).__name__, e) | |
1046 log.exception(msg) | |
1047 raise | |
1048 if strip_null: | |
1049 return value.replace('\0', '') | |
1050 return value | |
1051 | |
1052 | |
1053 def smart_str(s, encoding=DEFAULT_ENCODING, strings_only=False, errors='strict'): | |
1054 """ | |
1055 Returns a bytestring version of 's', encoded as specified in 'encoding'. | |
1056 | |
1057 If strings_only is True, don't convert (some) non-string-like objects. | |
1058 | |
1059 Adapted from an older, simpler version of django.utils.encoding.smart_str. | |
1060 | |
1061 >>> assert smart_str(None) == b'None' | |
1062 >>> assert smart_str(None, strings_only=True) is None | |
1063 >>> assert smart_str(3) == b'3' | |
1064 >>> assert smart_str(3, strings_only=True) == 3 | |
1065 >>> s = b'a bytes string'; assert smart_str(s) == s | |
1066 >>> s = bytearray(b'a bytes string'); assert smart_str(s) == s | |
1067 >>> assert smart_str('a simple unicode string') == b'a simple unicode string' | |
1068 >>> assert smart_str('à strange ünicode ڃtring') == b'\\xc3\\xa0 strange \\xc3\\xbcnicode \\xda\\x83tring' | |
1069 >>> assert smart_str(b'\\xc3\\xa0n \\xc3\\xabncoded utf-8 string', encoding='latin-1') == b'\\xe0n \\xebncoded utf-8 string' | |
1070 >>> assert smart_str(bytearray(b'\\xc3\\xa0n \\xc3\\xabncoded utf-8 string'), encoding='latin-1') == b'\\xe0n \\xebncoded utf-8 string' | |
1071 """ | |
1072 if strings_only and isinstance(s, (type(None), int)): | |
1073 return s | |
1074 if not isinstance(s, (str, bytes, bytearray)): | |
1075 s = str(s) | |
1076 # Now s is an instance of str, bytes or bytearray | |
1077 if not isinstance(s, (bytes, bytearray)): | |
1078 return s.encode(encoding, errors) | |
1079 elif s and encoding != DEFAULT_ENCODING: | |
1080 return s.decode(DEFAULT_ENCODING, errors).encode(encoding, errors) | |
1081 else: | |
1082 return s | |
1083 | |
1084 | |
1085 def strip_control_characters(s): | |
1086 """Strip unicode control characters from a string.""" | |
1087 return "".join(c for c in unicodify(s) if unicodedata.category(c) != "Cc") | |
1088 | |
1089 | |
1090 def object_to_string(obj): | |
1091 return binascii.hexlify(obj) | |
1092 | |
1093 | |
1094 def string_to_object(s): | |
1095 return binascii.unhexlify(s) | |
1096 | |
1097 | |
1098 def clean_multiline_string(multiline_string, sep='\n'): | |
1099 """ | |
1100 Dedent, split, remove first and last empty lines, rejoin. | |
1101 """ | |
1102 multiline_string = textwrap.dedent(multiline_string) | |
1103 string_list = multiline_string.split(sep) | |
1104 if not string_list[0]: | |
1105 string_list = string_list[1:] | |
1106 if not string_list[-1]: | |
1107 string_list = string_list[:-1] | |
1108 return '\n'.join(string_list) + '\n' | |
1109 | |
1110 | |
1111 class ParamsWithSpecs(collections.defaultdict): | |
1112 """ | |
1113 """ | |
1114 | |
1115 def __init__(self, specs=None, params=None): | |
1116 self.specs = specs or dict() | |
1117 self.params = params or dict() | |
1118 for name, value in self.params.items(): | |
1119 if name not in self.specs: | |
1120 self._param_unknown_error(name) | |
1121 if 'map' in self.specs[name]: | |
1122 try: | |
1123 self.params[name] = self.specs[name]['map'](value) | |
1124 except Exception: | |
1125 self._param_map_error(name, value) | |
1126 if 'valid' in self.specs[name]: | |
1127 if not self.specs[name]['valid'](value): | |
1128 self._param_vaildation_error(name, value) | |
1129 | |
1130 self.update(self.params) | |
1131 | |
1132 def __missing__(self, name): | |
1133 return self.specs[name]['default'] | |
1134 | |
1135 def __getattr__(self, name): | |
1136 return self[name] | |
1137 | |
1138 def _param_unknown_error(self, name): | |
1139 raise NotImplementedError() | |
1140 | |
1141 def _param_map_error(self, name, value): | |
1142 raise NotImplementedError() | |
1143 | |
1144 def _param_vaildation_error(self, name, value): | |
1145 raise NotImplementedError() | |
1146 | |
1147 | |
1148 def compare_urls(url1, url2, compare_scheme=True, compare_hostname=True, compare_path=True): | |
1149 url1 = urlparse(url1) | |
1150 url2 = urlparse(url2) | |
1151 if compare_scheme and url1.scheme and url2.scheme and url1.scheme != url2.scheme: | |
1152 return False | |
1153 if compare_hostname and url1.hostname and url2.hostname and url1.hostname != url2.hostname: | |
1154 return False | |
1155 if compare_path and url1.path and url2.path and url1.path != url2.path: | |
1156 return False | |
1157 return True | |
1158 | |
1159 | |
1160 def read_build_sites(filename, check_builds=True): | |
1161 """ read db names to ucsc mappings from file, this file should probably be merged with the one above """ | |
1162 build_sites = [] | |
1163 try: | |
1164 for line in open(filename): | |
1165 try: | |
1166 if line[0:1] == "#": | |
1167 continue | |
1168 fields = line.replace("\r", "").replace("\n", "").split("\t") | |
1169 site_name = fields[0] | |
1170 site = fields[1] | |
1171 if check_builds: | |
1172 site_builds = fields[2].split(",") | |
1173 site_dict = {'name': site_name, 'url': site, 'builds': site_builds} | |
1174 else: | |
1175 site_dict = {'name': site_name, 'url': site} | |
1176 build_sites.append(site_dict) | |
1177 except Exception: | |
1178 continue | |
1179 except Exception: | |
1180 log.error("ERROR: Unable to read builds for site file %s", filename) | |
1181 return build_sites | |
1182 | |
1183 | |
1184 def relativize_symlinks(path, start=None, followlinks=False): | |
1185 for root, _, files in os.walk(path, followlinks=followlinks): | |
1186 rel_start = None | |
1187 for file_name in files: | |
1188 symlink_file_name = os.path.join(root, file_name) | |
1189 if os.path.islink(symlink_file_name): | |
1190 symlink_target = os.readlink(symlink_file_name) | |
1191 if rel_start is None: | |
1192 if start is None: | |
1193 rel_start = root | |
1194 else: | |
1195 rel_start = start | |
1196 rel_path = relpath(symlink_target, rel_start) | |
1197 os.remove(symlink_file_name) | |
1198 os.symlink(rel_path, symlink_file_name) | |
1199 | |
1200 | |
1201 def stringify_dictionary_keys(in_dict): | |
1202 # returns a new dictionary | |
1203 # changes unicode keys into strings, only works on top level (does not recurse) | |
1204 # unicode keys are not valid for expansion into keyword arguments on method calls | |
1205 out_dict = {} | |
1206 for key, value in in_dict.items(): | |
1207 out_dict[str(key)] = value | |
1208 return out_dict | |
1209 | |
1210 | |
1211 def mkstemp_ln(src, prefix='mkstemp_ln_'): | |
1212 """ | |
1213 From tempfile._mkstemp_inner, generate a hard link in the same dir with a | |
1214 random name. Created so we can persist the underlying file of a | |
1215 NamedTemporaryFile upon its closure. | |
1216 """ | |
1217 dir = os.path.dirname(src) | |
1218 names = tempfile._get_candidate_names() | |
1219 for _ in range(tempfile.TMP_MAX): | |
1220 name = next(names) | |
1221 file = os.path.join(dir, prefix + name) | |
1222 try: | |
1223 os.link(src, file) | |
1224 return (os.path.abspath(file)) | |
1225 except OSError as e: | |
1226 if e.errno == errno.EEXIST: | |
1227 continue # try again | |
1228 raise | |
1229 raise OSError(errno.EEXIST, "No usable temporary file name found") | |
1230 | |
1231 | |
1232 def umask_fix_perms(path, umask, unmasked_perms, gid=None): | |
1233 """ | |
1234 umask-friendly permissions fixing | |
1235 """ | |
1236 perms = unmasked_perms & ~umask | |
1237 try: | |
1238 st = os.stat(path) | |
1239 except OSError: | |
1240 log.exception('Unable to set permissions or group on %s', path) | |
1241 return | |
1242 # fix modes | |
1243 if stat.S_IMODE(st.st_mode) != perms: | |
1244 try: | |
1245 os.chmod(path, perms) | |
1246 except Exception as e: | |
1247 log.warning('Unable to honor umask ({}) for {}, tried to set: {} but mode remains {}, error was: {}'.format(oct(umask), | |
1248 path, | |
1249 oct(perms), | |
1250 oct(stat.S_IMODE(st.st_mode)), | |
1251 unicodify(e))) | |
1252 # fix group | |
1253 if gid is not None and st.st_gid != gid: | |
1254 try: | |
1255 os.chown(path, -1, gid) | |
1256 except Exception as e: | |
1257 try: | |
1258 desired_group = grp.getgrgid(gid) | |
1259 current_group = grp.getgrgid(st.st_gid) | |
1260 except Exception: | |
1261 desired_group = gid | |
1262 current_group = st.st_gid | |
1263 log.warning('Unable to honor primary group ({}) for {}, group remains {}, error was: {}'.format(desired_group, | |
1264 path, | |
1265 current_group, | |
1266 unicodify(e))) | |
1267 | |
1268 | |
1269 def docstring_trim(docstring): | |
1270 """Trimming python doc strings. Taken from: http://www.python.org/dev/peps/pep-0257/""" | |
1271 if not docstring: | |
1272 return '' | |
1273 # Convert tabs to spaces (following the normal Python rules) | |
1274 # and split into a list of lines: | |
1275 lines = docstring.expandtabs().splitlines() | |
1276 # Determine minimum indentation (first line doesn't count): | |
1277 indent = sys.maxsize | |
1278 for line in lines[1:]: | |
1279 stripped = line.lstrip() | |
1280 if stripped: | |
1281 indent = min(indent, len(line) - len(stripped)) | |
1282 # Remove indentation (first line is special): | |
1283 trimmed = [lines[0].strip()] | |
1284 if indent < sys.maxsize: | |
1285 for line in lines[1:]: | |
1286 trimmed.append(line[indent:].rstrip()) | |
1287 # Strip off trailing and leading blank lines: | |
1288 while trimmed and not trimmed[-1]: | |
1289 trimmed.pop() | |
1290 while trimmed and not trimmed[0]: | |
1291 trimmed.pop(0) | |
1292 # Return a single string: | |
1293 return '\n'.join(trimmed) | |
1294 | |
1295 | |
1296 def nice_size(size): | |
1297 """ | |
1298 Returns a readably formatted string with the size | |
1299 | |
1300 >>> nice_size(100) | |
1301 '100 bytes' | |
1302 >>> nice_size(10000) | |
1303 '9.8 KB' | |
1304 >>> nice_size(1000000) | |
1305 '976.6 KB' | |
1306 >>> nice_size(100000000) | |
1307 '95.4 MB' | |
1308 """ | |
1309 words = ['bytes', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB'] | |
1310 prefix = '' | |
1311 try: | |
1312 size = float(size) | |
1313 if size < 0: | |
1314 size = abs(size) | |
1315 prefix = '-' | |
1316 except Exception: | |
1317 return '??? bytes' | |
1318 for ind, word in enumerate(words): | |
1319 step = 1024 ** (ind + 1) | |
1320 if step > size: | |
1321 size = size / float(1024 ** ind) | |
1322 if word == 'bytes': # No decimals for bytes | |
1323 return "%s%d bytes" % (prefix, size) | |
1324 return f"{prefix}{size:.1f} {word}" | |
1325 return '??? bytes' | |
1326 | |
1327 | |
1328 def size_to_bytes(size): | |
1329 """ | |
1330 Returns a number of bytes (as integer) if given a reasonably formatted string with the size | |
1331 | |
1332 >>> size_to_bytes('1024') | |
1333 1024 | |
1334 >>> size_to_bytes('1.0') | |
1335 1 | |
1336 >>> size_to_bytes('10 bytes') | |
1337 10 | |
1338 >>> size_to_bytes('4k') | |
1339 4096 | |
1340 >>> size_to_bytes('2.2 TB') | |
1341 2418925581107 | |
1342 >>> size_to_bytes('.01 TB') | |
1343 10995116277 | |
1344 >>> size_to_bytes('1.b') | |
1345 1 | |
1346 >>> size_to_bytes('1.2E2k') | |
1347 122880 | |
1348 """ | |
1349 # The following number regexp is based on https://stackoverflow.com/questions/385558/extract-float-double-value/385597#385597 | |
1350 size_re = re.compile(r'(?P<number>(\d+(\.\d*)?|\.\d+)(e[+-]?\d+)?)\s*(?P<multiple>[eptgmk]?(b|bytes?)?)?$') | |
1351 size_match = size_re.match(size.lower()) | |
1352 if size_match is None: | |
1353 raise ValueError("Could not parse string '%s'" % size) | |
1354 number = float(size_match.group("number")) | |
1355 multiple = size_match.group("multiple") | |
1356 if multiple == "" or multiple.startswith('b'): | |
1357 return int(number) | |
1358 elif multiple.startswith('k'): | |
1359 return int(number * 1024) | |
1360 elif multiple.startswith('m'): | |
1361 return int(number * 1024 ** 2) | |
1362 elif multiple.startswith('g'): | |
1363 return int(number * 1024 ** 3) | |
1364 elif multiple.startswith('t'): | |
1365 return int(number * 1024 ** 4) | |
1366 elif multiple.startswith('p'): | |
1367 return int(number * 1024 ** 5) | |
1368 elif multiple.startswith('e'): | |
1369 return int(number * 1024 ** 6) | |
1370 else: | |
1371 raise ValueError(f"Unknown multiplier '{multiple}' in '{size}'") | |
1372 | |
1373 | |
1374 def send_mail(frm, to, subject, body, config, html=None): | |
1375 """ | |
1376 Sends an email. | |
1377 | |
1378 :type frm: str | |
1379 :param frm: from address | |
1380 | |
1381 :type to: str | |
1382 :param to: to address | |
1383 | |
1384 :type subject: str | |
1385 :param subject: Subject line | |
1386 | |
1387 :type body: str | |
1388 :param body: Body text (should be plain text) | |
1389 | |
1390 :type config: object | |
1391 :param config: Galaxy configuration object | |
1392 | |
1393 :type html: str | |
1394 :param html: Alternative HTML representation of the body content. If | |
1395 provided will convert the message to a MIMEMultipart. (Default 'None') | |
1396 """ | |
1397 | |
1398 to = listify(to) | |
1399 if html: | |
1400 msg = MIMEMultipart('alternative') | |
1401 else: | |
1402 msg = MIMEText(body, 'plain', 'utf-8') | |
1403 | |
1404 msg['To'] = ', '.join(to) | |
1405 msg['From'] = frm | |
1406 msg['Subject'] = subject | |
1407 | |
1408 if config.smtp_server is None: | |
1409 log.error("Mail is not configured for this Galaxy instance.") | |
1410 log.info(msg) | |
1411 return | |
1412 | |
1413 if html: | |
1414 mp_text = MIMEText(body, 'plain', 'utf-8') | |
1415 mp_html = MIMEText(html, 'html', 'utf-8') | |
1416 msg.attach(mp_text) | |
1417 msg.attach(mp_html) | |
1418 | |
1419 smtp_ssl = asbool(getattr(config, 'smtp_ssl', False)) | |
1420 if smtp_ssl: | |
1421 s = smtplib.SMTP_SSL(config.smtp_server) | |
1422 else: | |
1423 s = smtplib.SMTP(config.smtp_server) | |
1424 if not smtp_ssl: | |
1425 try: | |
1426 s.starttls() | |
1427 log.debug('Initiated SSL/TLS connection to SMTP server: %s', config.smtp_server) | |
1428 except RuntimeError as e: | |
1429 log.warning('SSL/TLS support is not available to your Python interpreter: %s', unicodify(e)) | |
1430 except smtplib.SMTPHeloError as e: | |
1431 log.error("The server didn't reply properly to the HELO greeting: %s", unicodify(e)) | |
1432 s.close() | |
1433 raise | |
1434 except smtplib.SMTPException as e: | |
1435 log.warning('The server does not support the STARTTLS extension: %s', unicodify(e)) | |
1436 if config.smtp_username and config.smtp_password: | |
1437 try: | |
1438 s.login(config.smtp_username, config.smtp_password) | |
1439 except smtplib.SMTPHeloError as e: | |
1440 log.error("The server didn't reply properly to the HELO greeting: %s", unicodify(e)) | |
1441 s.close() | |
1442 raise | |
1443 except smtplib.SMTPAuthenticationError as e: | |
1444 log.error("The server didn't accept the username/password combination: %s", unicodify(e)) | |
1445 s.close() | |
1446 raise | |
1447 except smtplib.SMTPException as e: | |
1448 log.error("No suitable authentication method was found: %s", unicodify(e)) | |
1449 s.close() | |
1450 raise | |
1451 s.sendmail(frm, to, msg.as_string()) | |
1452 s.quit() | |
1453 | |
1454 | |
1455 def force_symlink(source, link_name): | |
1456 try: | |
1457 os.symlink(source, link_name) | |
1458 except OSError as e: | |
1459 if e.errno == errno.EEXIST: | |
1460 os.remove(link_name) | |
1461 os.symlink(source, link_name) | |
1462 else: | |
1463 raise e | |
1464 | |
1465 | |
1466 def move_merge(source, target): | |
1467 # when using shutil and moving a directory, if the target exists, | |
1468 # then the directory is placed inside of it | |
1469 # if the target doesn't exist, then the target is made into the directory | |
1470 # this makes it so that the target is always the target, and if it exists, | |
1471 # the source contents are moved into the target | |
1472 if os.path.isdir(source) and os.path.exists(target) and os.path.isdir(target): | |
1473 for name in os.listdir(source): | |
1474 move_merge(os.path.join(source, name), os.path.join(target, name)) | |
1475 else: | |
1476 return shutil.move(source, target) | |
1477 | |
1478 | |
1479 def safe_str_cmp(a, b): | |
1480 """safely compare two strings in a timing-attack-resistant manner | |
1481 """ | |
1482 if len(a) != len(b): | |
1483 return False | |
1484 rv = 0 | |
1485 for x, y in zip(a, b): | |
1486 rv |= ord(x) ^ ord(y) | |
1487 return rv == 0 | |
1488 | |
1489 | |
1490 galaxy_root_path = os.path.join(__path__[0], os.pardir, os.pardir, os.pardir) # type: ignore | |
1491 galaxy_samples_path = os.path.join(__path__[0], os.pardir, 'config', 'sample') # type: ignore | |
1492 | |
1493 | |
1494 def galaxy_directory(): | |
1495 root_path = os.path.abspath(galaxy_root_path) | |
1496 if os.path.basename(root_path) == "packages": | |
1497 root_path = os.path.abspath(os.path.join(root_path, "..")) | |
1498 return root_path | |
1499 | |
1500 | |
1501 def galaxy_samples_directory(): | |
1502 return os.path.abspath(galaxy_samples_path) | |
1503 | |
1504 | |
1505 def config_directories_from_setting(directories_setting, galaxy_root=galaxy_root_path): | |
1506 """ | |
1507 Parse the ``directories_setting`` into a list of relative or absolute | |
1508 filesystem paths that will be searched to discover plugins. | |
1509 | |
1510 :type galaxy_root: string | |
1511 :param galaxy_root: the root path of this galaxy installation | |
1512 :type directories_setting: string (default: None) | |
1513 :param directories_setting: the filesystem path (or paths) | |
1514 to search for plugins. Can be CSV string of paths. Will be treated as | |
1515 absolute if a path starts with '/', relative otherwise. | |
1516 :rtype: list of strings | |
1517 :returns: list of filesystem paths | |
1518 """ | |
1519 directories = [] | |
1520 if not directories_setting: | |
1521 return directories | |
1522 | |
1523 for directory in listify(directories_setting): | |
1524 directory = directory.strip() | |
1525 if not directory.startswith('/'): | |
1526 directory = os.path.join(galaxy_root, directory) | |
1527 if not os.path.exists(directory): | |
1528 log.warning('directory not found: %s', directory) | |
1529 continue | |
1530 directories.append(directory) | |
1531 return directories | |
1532 | |
1533 | |
1534 def parse_int(value, min_val=None, max_val=None, default=None, allow_none=False): | |
1535 try: | |
1536 value = int(value) | |
1537 if min_val is not None and value < min_val: | |
1538 return min_val | |
1539 if max_val is not None and value > max_val: | |
1540 return max_val | |
1541 return value | |
1542 except ValueError: | |
1543 if allow_none: | |
1544 if default is None or value == "None": | |
1545 return None | |
1546 if default: | |
1547 return default | |
1548 else: | |
1549 raise | |
1550 | |
1551 | |
1552 def parse_non_hex_float(s): | |
1553 r""" | |
1554 Parse string `s` into a float but throw a `ValueError` if the string is in | |
1555 the otherwise acceptable format `\d+e\d+` (e.g. 40000000000000e5.) | |
1556 | |
1557 This can be passed into `json.loads` to prevent a hex string in the above | |
1558 format from being incorrectly parsed as a float in scientific notation. | |
1559 | |
1560 >>> parse_non_hex_float( '123.4' ) | |
1561 123.4 | |
1562 >>> parse_non_hex_float( '2.45e+3' ) | |
1563 2450.0 | |
1564 >>> parse_non_hex_float( '2.45e-3' ) | |
1565 0.00245 | |
1566 >>> parse_non_hex_float( '40000000000000e5' ) | |
1567 Traceback (most recent call last): | |
1568 ... | |
1569 ValueError: could not convert string to float: 40000000000000e5 | |
1570 """ | |
1571 f = float(s) | |
1572 # successfully parsed as float if here - check for format in original string | |
1573 if 'e' in s and not ('+' in s or '-' in s): | |
1574 raise ValueError('could not convert string to float: ' + s) | |
1575 return f | |
1576 | |
1577 | |
1578 def build_url(base_url, port=80, scheme='http', pathspec=None, params=None, doseq=False): | |
1579 if params is None: | |
1580 params = dict() | |
1581 if pathspec is None: | |
1582 pathspec = [] | |
1583 parsed_url = urlparse(base_url) | |
1584 if scheme != 'http': | |
1585 parsed_url.scheme = scheme | |
1586 assert parsed_url.scheme in ('http', 'https', 'ftp'), 'Invalid URL scheme: %s' % scheme | |
1587 if port != 80: | |
1588 url = '%s://%s:%d/%s' % (parsed_url.scheme, parsed_url.netloc.rstrip('/'), int(port), parsed_url.path) | |
1589 else: | |
1590 url = '{}://{}/{}'.format(parsed_url.scheme, parsed_url.netloc.rstrip('/'), parsed_url.path.lstrip('/')) | |
1591 if len(pathspec) > 0: | |
1592 url = '{}/{}'.format(url.rstrip('/'), '/'.join(pathspec)) | |
1593 if parsed_url.query: | |
1594 for query_parameter in parsed_url.query.split('&'): | |
1595 key, value = query_parameter.split('=') | |
1596 params[key] = value | |
1597 if params: | |
1598 url += '?%s' % urlencode(params, doseq=doseq) | |
1599 return url | |
1600 | |
1601 | |
1602 def url_get(base_url, auth=None, pathspec=None, params=None, max_retries=5, backoff_factor=1): | |
1603 """Make contact with the uri provided and return any contents.""" | |
1604 full_url = build_url(base_url, pathspec=pathspec, params=params) | |
1605 s = requests.Session() | |
1606 retries = Retry(total=max_retries, backoff_factor=backoff_factor, status_forcelist=[429]) | |
1607 s.mount(base_url, HTTPAdapter(max_retries=retries)) | |
1608 response = s.get(full_url, auth=auth) | |
1609 response.raise_for_status() | |
1610 return response.text | |
1611 | |
1612 | |
1613 def download_to_file(url, dest_file_path, timeout=30, chunk_size=2 ** 20): | |
1614 """Download a URL to a file in chunks.""" | |
1615 with requests.get(url, timeout=timeout, stream=True) as r, open(dest_file_path, 'wb') as f: | |
1616 for chunk in r.iter_content(chunk_size): | |
1617 if chunk: | |
1618 f.write(chunk) | |
1619 | |
1620 | |
1621 class classproperty: | |
1622 | |
1623 def __init__(self, f): | |
1624 self.f = f | |
1625 | |
1626 def __get__(self, obj, owner): | |
1627 return self.f(owner) | |
1628 | |
1629 | |
1630 def get_executable(): | |
1631 exe = sys.executable | |
1632 if exe.endswith('uwsgi'): | |
1633 virtualenv = None | |
1634 if uwsgi is not None: | |
1635 for name in ('home', 'virtualenv', 'venv', 'pyhome'): | |
1636 if name in uwsgi.opt: | |
1637 virtualenv = unicodify(uwsgi.opt[name]) | |
1638 break | |
1639 if virtualenv is None and 'VIRTUAL_ENV' in os.environ: | |
1640 virtualenv = os.environ['VIRTUAL_ENV'] | |
1641 if virtualenv is not None: | |
1642 exe = os.path.join(virtualenv, 'bin', 'python') | |
1643 else: | |
1644 exe = os.path.join(os.path.dirname(exe), 'python') | |
1645 if not os.path.exists(exe): | |
1646 exe = 'python' | |
1647 return exe | |
1648 | |
1649 | |
1650 class ExecutionTimer: | |
1651 | |
1652 def __init__(self): | |
1653 self.begin = time.time() | |
1654 | |
1655 def __str__(self): | |
1656 return "(%0.3f ms)" % (self.elapsed * 1000) | |
1657 | |
1658 @property | |
1659 def elapsed(self): | |
1660 return (time.time() - self.begin) | |
1661 | |
1662 | |
1663 class StructuredExecutionTimer: | |
1664 | |
1665 def __init__(self, timer_id, template, **tags): | |
1666 self.begin = time.time() | |
1667 self.timer_id = timer_id | |
1668 self.template = template | |
1669 self.tags = tags | |
1670 | |
1671 def __str__(self): | |
1672 return self.to_str() | |
1673 | |
1674 def to_str(self, **kwd): | |
1675 if kwd: | |
1676 message = string.Template(self.template).safe_substitute(kwd) | |
1677 else: | |
1678 message = self.template | |
1679 log_message = message + " (%0.3f ms)" % (self.elapsed * 1000) | |
1680 return log_message | |
1681 | |
1682 @property | |
1683 def elapsed(self): | |
1684 return (time.time() - self.begin) | |
1685 | |
1686 | |
1687 if __name__ == '__main__': | |
1688 import doctest | |
1689 doctest.testmod(sys.modules[__name__], verbose=False) |