comparison env/lib/python3.9/site-packages/galaxy/util/__init__.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 """
2 Utility functions used systemwide.
3
4 """
5
6 import binascii
7 import collections
8 import errno
9 import importlib
10 import json
11 import os
12 import random
13 import re
14 import shutil
15 import smtplib
16 import stat
17 import string
18 import sys
19 import tempfile
20 import textwrap
21 import threading
22 import time
23 import unicodedata
24 import xml.dom.minidom
25 from datetime import datetime
26 from email.mime.multipart import MIMEMultipart
27 from email.mime.text import MIMEText
28 from functools import partial
29 from hashlib import md5
30 from os.path import relpath
31 from urllib.parse import (
32 urlencode,
33 urlparse,
34 urlsplit,
35 urlunsplit,
36 )
37
38 import requests
39 try:
40 import grp
41 except ImportError:
42 # For Pulsar on Windows (which does not use the function that uses grp)
43 grp = None # type: ignore
44 from boltons.iterutils import (
45 default_enter,
46 remap,
47 )
48 LXML_AVAILABLE = True
49 try:
50 from lxml import etree
51 except ImportError:
52 LXML_AVAILABLE = False
53 import xml.etree.ElementTree as etree # type: ignore
54 from requests.adapters import HTTPAdapter
55 from requests.packages.urllib3.util.retry import Retry
56
57 try:
58 import docutils.core as docutils_core
59 import docutils.writers.html4css1 as docutils_html4css1
60 except ImportError:
61 docutils_core = None
62 docutils_html4css1 = None
63
64 try:
65 import uwsgi
66 except ImportError:
67 uwsgi = None
68
69 from .custom_logging import get_logger
70 from .inflection import Inflector
71 from .path import safe_contains, safe_makedirs, safe_relpath # noqa: F401
72
73 inflector = Inflector()
74
75 log = get_logger(__name__)
76 _lock = threading.RLock()
77
78 namedtuple = collections.namedtuple
79
80 CHUNK_SIZE = 65536 # 64k
81
82 DATABASE_MAX_STRING_SIZE = 32768
83 DATABASE_MAX_STRING_SIZE_PRETTY = '32K'
84
85 gzip_magic = b'\x1f\x8b'
86 bz2_magic = b'BZh'
87 DEFAULT_ENCODING = os.environ.get('GALAXY_DEFAULT_ENCODING', 'utf-8')
88 NULL_CHAR = b'\x00'
89 BINARY_CHARS = [NULL_CHAR]
90 FILENAME_VALID_CHARS = '.,^_-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
91
92 RW_R__R__ = stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH
93 RWXR_XR_X = stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH
94 RWXRWXRWX = stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO
95
96 XML = etree.XML
97
98 defaultdict = collections.defaultdict
99
100
101 def remove_protocol_from_url(url):
102 """ Supplied URL may be null, if not ensure http:// or https://
103 etc... is stripped off.
104 """
105 if url is None:
106 return url
107
108 # We have a URL
109 if url.find('://') > 0:
110 new_url = url.split('://')[1]
111 else:
112 new_url = url
113 return new_url.rstrip('/')
114
115
116 def is_binary(value):
117 """
118 File is binary if it contains a null-byte by default (e.g. behavior of grep, etc.).
119 This may fail for utf-16 files, but so would ASCII encoding.
120 >>> is_binary( string.printable )
121 False
122 >>> is_binary( b'\\xce\\x94' )
123 False
124 >>> is_binary( b'\\x00' )
125 True
126 """
127 value = smart_str(value)
128 for binary_char in BINARY_CHARS:
129 if binary_char in value:
130 return True
131 return False
132
133
134 def is_uuid(value):
135 """
136 This method returns True if value is a UUID, otherwise False.
137 >>> is_uuid( "123e4567-e89b-12d3-a456-426655440000" )
138 True
139 >>> is_uuid( "0x3242340298902834" )
140 False
141 """
142 uuid_re = re.compile("[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}")
143 if re.match(uuid_re, str(value)):
144 return True
145 else:
146 return False
147
148
149 def directory_hash_id(id):
150 """
151
152 >>> directory_hash_id( 100 )
153 ['000']
154 >>> directory_hash_id( "90000" )
155 ['090']
156 >>> directory_hash_id("777777777")
157 ['000', '777', '777']
158 >>> directory_hash_id("135ee48a-4f51-470c-ae2f-ce8bd78799e6")
159 ['1', '3', '5']
160 """
161 s = str(id)
162 l = len(s)
163 # Shortcut -- ids 0-999 go under ../000/
164 if l < 4:
165 return ["000"]
166 if not is_uuid(s):
167 # Pad with zeros until a multiple of three
168 padded = ((3 - len(s) % 3) * "0") + s
169 # Drop the last three digits -- 1000 files per directory
170 padded = padded[:-3]
171 # Break into chunks of three
172 return [padded[i * 3:(i + 1) * 3] for i in range(len(padded) // 3)]
173 else:
174 # assume it is a UUID
175 return list(iter(s[0:3]))
176
177
178 def get_charset_from_http_headers(headers, default=None):
179 rval = headers.get('content-type', None)
180 if rval and 'charset=' in rval:
181 rval = rval.split('charset=')[-1].split(';')[0].strip()
182 if rval:
183 return rval
184 return default
185
186
187 def synchronized(func):
188 """This wrapper will serialize access to 'func' to a single thread. Use it as a decorator."""
189 def caller(*params, **kparams):
190 _lock.acquire(True) # Wait
191 try:
192 return func(*params, **kparams)
193 finally:
194 _lock.release()
195 return caller
196
197
198 def iter_start_of_line(fh, chunk_size=None):
199 """
200 Iterate over fh and call readline(chunk_size)
201 """
202 yield from iter(partial(fh.readline, chunk_size), "")
203
204
205 def file_reader(fp, chunk_size=CHUNK_SIZE):
206 """This generator yields the open fileobject in chunks (default 64k). Closes the file at the end"""
207 while 1:
208 data = fp.read(chunk_size)
209 if not data:
210 break
211 yield data
212 fp.close()
213
214
215 def unique_id(KEY_SIZE=128):
216 """
217 Generates an unique id
218
219 >>> ids = [ unique_id() for i in range(1000) ]
220 >>> len(set(ids))
221 1000
222 """
223 random_bits = str(random.getrandbits(KEY_SIZE)).encode("UTF-8")
224 return md5(random_bits).hexdigest()
225
226
227 def parse_xml(fname, strip_whitespace=True, remove_comments=True):
228 """Returns a parsed xml tree"""
229 parser = None
230 if remove_comments and LXML_AVAILABLE:
231 # If using stdlib etree comments are always removed,
232 # but lxml doesn't do this by default
233 parser = etree.XMLParser(remove_comments=remove_comments)
234 try:
235 tree = etree.parse(fname, parser=parser)
236 root = tree.getroot()
237 if strip_whitespace:
238 for elem in root.iter('*'):
239 if elem.text is not None:
240 elem.text = elem.text.strip()
241 if elem.tail is not None:
242 elem.tail = elem.tail.strip()
243 except OSError as e:
244 if e.errno is None and not os.path.exists(fname):
245 # lxml doesn't set errno
246 e.errno = errno.ENOENT
247 raise
248 except etree.ParseError:
249 log.exception("Error parsing file %s", fname)
250 raise
251 return tree
252
253
254 def parse_xml_string(xml_string, strip_whitespace=True):
255 try:
256 tree = etree.fromstring(xml_string)
257 except ValueError as e:
258 if 'strings with encoding declaration are not supported' in unicodify(e):
259 tree = etree.fromstring(xml_string.encode('utf-8'))
260 else:
261 raise e
262 if strip_whitespace:
263 for elem in tree.iter('*'):
264 if elem.text is not None:
265 elem.text = elem.text.strip()
266 if elem.tail is not None:
267 elem.tail = elem.tail.strip()
268 return tree
269
270
271 def xml_to_string(elem, pretty=False):
272 """
273 Returns a string from an xml tree.
274 """
275 try:
276 if elem is not None:
277 xml_str = etree.tostring(elem, encoding='unicode')
278 else:
279 xml_str = ''
280 except TypeError as e:
281 # we assume this is a comment
282 if hasattr(elem, 'text'):
283 return "<!-- %s -->\n" % elem.text
284 else:
285 raise e
286 if xml_str and pretty:
287 pretty_string = xml.dom.minidom.parseString(xml_str).toprettyxml(indent=' ')
288 return "\n".join(line for line in pretty_string.split('\n') if not re.match(r'^[\s\\nb\']*$', line))
289 return xml_str
290
291
292 def xml_element_compare(elem1, elem2):
293 if not isinstance(elem1, dict):
294 elem1 = xml_element_to_dict(elem1)
295 if not isinstance(elem2, dict):
296 elem2 = xml_element_to_dict(elem2)
297 return elem1 == elem2
298
299
300 def xml_element_list_compare(elem_list1, elem_list2):
301 return [xml_element_to_dict(elem) for elem in elem_list1] == [xml_element_to_dict(elem) for elem in elem_list2]
302
303
304 def xml_element_to_dict(elem):
305 rval = {}
306 if elem.attrib:
307 rval[elem.tag] = {}
308 else:
309 rval[elem.tag] = None
310
311 sub_elems = list(elem)
312 if sub_elems:
313 sub_elem_dict = dict()
314 for sub_sub_elem_dict in map(xml_element_to_dict, sub_elems):
315 for key, value in sub_sub_elem_dict.items():
316 if key not in sub_elem_dict:
317 sub_elem_dict[key] = []
318 sub_elem_dict[key].append(value)
319 for key, value in sub_elem_dict.items():
320 if len(value) == 1:
321 rval[elem.tag][key] = value[0]
322 else:
323 rval[elem.tag][key] = value
324 if elem.attrib:
325 for key, value in elem.attrib.items():
326 rval[elem.tag]["@%s" % key] = value
327
328 if elem.text:
329 text = elem.text.strip()
330 if text and sub_elems or elem.attrib:
331 rval[elem.tag]['#text'] = text
332 else:
333 rval[elem.tag] = text
334
335 return rval
336
337
338 def pretty_print_xml(elem, level=0):
339 pad = ' '
340 i = "\n" + level * pad
341 if len(elem):
342 if not elem.text or not elem.text.strip():
343 elem.text = i + pad + pad
344 if not elem.tail or not elem.tail.strip():
345 elem.tail = i
346 for e in elem:
347 pretty_print_xml(e, level + 1)
348 if not elem.tail or not elem.tail.strip():
349 elem.tail = i
350 else:
351 if level and (not elem.tail or not elem.tail.strip()):
352 elem.tail = i + pad
353 return elem
354
355
356 def get_file_size(value, default=None):
357 try:
358 # try built-in
359 return os.path.getsize(value)
360 except Exception:
361 try:
362 # try built-in one name attribute
363 return os.path.getsize(value.name)
364 except Exception:
365 try:
366 # try tell() of end of object
367 offset = value.tell()
368 value.seek(0, 2)
369 rval = value.tell()
370 value.seek(offset)
371 return rval
372 except Exception:
373 # return default value
374 return default
375
376
377 def shrink_stream_by_size(value, size, join_by=b"..", left_larger=True, beginning_on_size_error=False, end_on_size_error=False):
378 """
379 Shrinks bytes read from `value` to `size`.
380
381 `value` needs to implement tell/seek, so files need to be opened in binary mode.
382 Returns unicode text with invalid characters replaced.
383 """
384 rval = b''
385 join_by = smart_str(join_by)
386 if get_file_size(value) > size:
387 start = value.tell()
388 len_join_by = len(join_by)
389 min_size = len_join_by + 2
390 if size < min_size:
391 if beginning_on_size_error:
392 rval = value.read(size)
393 value.seek(start)
394 return rval
395 elif end_on_size_error:
396 value.seek(-size, 2)
397 rval = value.read(size)
398 value.seek(start)
399 return rval
400 raise ValueError('With the provided join_by value (%s), the minimum size value is %i.' % (join_by, min_size))
401 left_index = right_index = int((size - len_join_by) / 2)
402 if left_index + right_index + len_join_by < size:
403 if left_larger:
404 left_index += 1
405 else:
406 right_index += 1
407 rval = value.read(left_index) + join_by
408 value.seek(-right_index, 2)
409 rval += value.read(right_index)
410 else:
411 while True:
412 data = value.read(CHUNK_SIZE)
413 if not data:
414 break
415 rval += data
416 return unicodify(rval)
417
418
419 def shrink_and_unicodify(stream):
420 stream = unicodify(stream, strip_null=True) or ''
421 if (len(stream) > DATABASE_MAX_STRING_SIZE):
422 stream = shrink_string_by_size(stream,
423 DATABASE_MAX_STRING_SIZE,
424 join_by="\n..\n",
425 left_larger=True,
426 beginning_on_size_error=True)
427 return stream
428
429
430 def shrink_string_by_size(value, size, join_by="..", left_larger=True, beginning_on_size_error=False, end_on_size_error=False):
431 if len(value) > size:
432 len_join_by = len(join_by)
433 min_size = len_join_by + 2
434 if size < min_size:
435 if beginning_on_size_error:
436 return value[:size]
437 elif end_on_size_error:
438 return value[-size:]
439 raise ValueError('With the provided join_by value (%s), the minimum size value is %i.' % (join_by, min_size))
440 left_index = right_index = int((size - len_join_by) / 2)
441 if left_index + right_index + len_join_by < size:
442 if left_larger:
443 left_index += 1
444 else:
445 right_index += 1
446 value = "{}{}{}".format(value[:left_index], join_by, value[-right_index:])
447 return value
448
449
450 def pretty_print_time_interval(time=False, precise=False, utc=False):
451 """
452 Get a datetime object or a int() Epoch timestamp and return a
453 pretty string like 'an hour ago', 'Yesterday', '3 months ago',
454 'just now', etc
455 credit: http://stackoverflow.com/questions/1551382/user-friendly-time-format-in-python
456 """
457 if utc:
458 now = datetime.utcnow()
459 else:
460 now = datetime.now()
461 if type(time) is int:
462 diff = now - datetime.fromtimestamp(time)
463 elif isinstance(time, datetime):
464 diff = now - time
465 elif isinstance(time, str):
466 try:
467 time = datetime.strptime(time, "%Y-%m-%dT%H:%M:%S.%f")
468 except ValueError:
469 # MySQL may not support microseconds precision
470 time = datetime.strptime(time, "%Y-%m-%dT%H:%M:%S")
471 diff = now - time
472 else:
473 diff = now - now
474 second_diff = diff.seconds
475 day_diff = diff.days
476
477 if day_diff < 0:
478 return ''
479
480 if precise:
481 if day_diff == 0:
482 if second_diff < 10:
483 return "just now"
484 if second_diff < 60:
485 return str(second_diff) + " seconds ago"
486 if second_diff < 120:
487 return "a minute ago"
488 if second_diff < 3600:
489 return str(second_diff / 60) + " minutes ago"
490 if second_diff < 7200:
491 return "an hour ago"
492 if second_diff < 86400:
493 return str(second_diff / 3600) + " hours ago"
494 if day_diff == 1:
495 return "yesterday"
496 if day_diff < 7:
497 return str(day_diff) + " days ago"
498 if day_diff < 31:
499 return str(day_diff / 7) + " weeks ago"
500 if day_diff < 365:
501 return str(day_diff / 30) + " months ago"
502 return str(day_diff / 365) + " years ago"
503 else:
504 if day_diff == 0:
505 return "today"
506 if day_diff == 1:
507 return "yesterday"
508 if day_diff < 7:
509 return "less than a week"
510 if day_diff < 31:
511 return "less than a month"
512 if day_diff < 365:
513 return "less than a year"
514 return "a few years ago"
515
516
517 def pretty_print_json(json_data, is_json_string=False):
518 if is_json_string:
519 json_data = json.loads(json_data)
520 return json.dumps(json_data, sort_keys=True, indent=4)
521
522
523 # characters that are valid
524 valid_chars = set(string.ascii_letters + string.digits + " -=_.()/+*^,:?!")
525
526 # characters that are allowed but need to be escaped
527 mapped_chars = {'>': '__gt__',
528 '<': '__lt__',
529 "'": '__sq__',
530 '"': '__dq__',
531 '[': '__ob__',
532 ']': '__cb__',
533 '{': '__oc__',
534 '}': '__cc__',
535 '@': '__at__',
536 '\n': '__cn__',
537 '\r': '__cr__',
538 '\t': '__tc__',
539 '#': '__pd__'}
540
541
542 def restore_text(text, character_map=mapped_chars):
543 """Restores sanitized text"""
544 if not text:
545 return text
546 for key, value in character_map.items():
547 text = text.replace(value, key)
548 return text
549
550
551 def sanitize_text(text, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X'):
552 """
553 Restricts the characters that are allowed in text; accepts both strings
554 and lists of strings; non-string entities will be cast to strings.
555 """
556 if isinstance(text, list):
557 return [sanitize_text(x, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character) for x in text]
558 if not isinstance(text, str):
559 text = smart_str(text)
560 return _sanitize_text_helper(text, valid_characters=valid_characters, character_map=character_map)
561
562
563 def _sanitize_text_helper(text, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X'):
564 """Restricts the characters that are allowed in a string"""
565
566 out = []
567 for c in text:
568 if c in valid_characters:
569 out.append(c)
570 elif c in character_map:
571 out.append(character_map[c])
572 else:
573 out.append(invalid_character) # makes debugging easier
574 return ''.join(out)
575
576
577 def sanitize_lists_to_string(values, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X'):
578 if isinstance(values, list):
579 rval = []
580 for value in values:
581 rval.append(sanitize_lists_to_string(value,
582 valid_characters=valid_characters,
583 character_map=character_map,
584 invalid_character=invalid_character))
585 values = ",".join(rval)
586 else:
587 values = sanitize_text(values, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character)
588 return values
589
590
591 def sanitize_param(value, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X'):
592 """Clean incoming parameters (strings or lists)"""
593 if isinstance(value, str):
594 return sanitize_text(value, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character)
595 elif isinstance(value, list):
596 return [sanitize_text(x, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character) for x in value]
597 else:
598 raise Exception('Unknown parameter type (%s)' % (type(value)))
599
600
601 valid_filename_chars = set(string.ascii_letters + string.digits + '_.')
602 invalid_filenames = ['', '.', '..']
603
604
605 def sanitize_for_filename(text, default=None):
606 """
607 Restricts the characters that are allowed in a filename portion; Returns default value or a unique id string if result is not a valid name.
608 Method is overly aggressive to minimize possible complications, but a maximum length is not considered.
609 """
610 out = []
611 for c in text:
612 if c in valid_filename_chars:
613 out.append(c)
614 else:
615 out.append('_')
616 out = ''.join(out)
617 if out in invalid_filenames:
618 if default is None:
619 return sanitize_for_filename(str(unique_id()))
620 return default
621 return out
622
623
624 def find_instance_nested(item, instances, match_key=None):
625 """
626 Recursively find instances from lists, dicts, tuples.
627
628 `instances` should be a tuple of valid instances
629 If match_key is given the key must match for an instance to be added to the list of found instances.
630 """
631
632 matches = []
633
634 def visit(path, key, value):
635 if isinstance(value, instances):
636 if match_key is None or match_key == key:
637 matches.append(value)
638 return key, value
639
640 def enter(path, key, value):
641 if isinstance(value, instances):
642 return None, False
643 return default_enter(path, key, value)
644
645 remap(item, visit, reraise_visit=False, enter=enter)
646
647 return matches
648
649
650 def mask_password_from_url(url):
651 """
652 Masks out passwords from connection urls like the database connection in galaxy.ini
653
654 >>> mask_password_from_url( 'sqlite+postgresql://user:password@localhost/' )
655 'sqlite+postgresql://user:********@localhost/'
656 >>> mask_password_from_url( 'amqp://user:amqp@localhost' )
657 'amqp://user:********@localhost'
658 >>> mask_password_from_url( 'amqp://localhost')
659 'amqp://localhost'
660 """
661 split = urlsplit(url)
662 if split.password:
663 if url.count(split.password) == 1:
664 url = url.replace(split.password, "********")
665 else:
666 # This can manipulate the input other than just masking password,
667 # so the previous string replace method is preferred when the
668 # password doesn't appear twice in the url
669 split = split._replace(netloc=split.netloc.replace(f"{split.username}:{split.password}", '%s:********' % split.username))
670 url = urlunsplit(split)
671 return url
672
673
674 def ready_name_for_url(raw_name):
675 """ General method to convert a string (i.e. object name) to a URL-ready
676 slug.
677
678 >>> ready_name_for_url( "My Cool Object" )
679 'My-Cool-Object'
680 >>> ready_name_for_url( "!My Cool Object!" )
681 'My-Cool-Object'
682 >>> ready_name_for_url( "Hello₩◎ґʟⅾ" )
683 'Hello'
684 """
685
686 # Replace whitespace with '-'
687 slug_base = re.sub(r"\s+", "-", raw_name)
688 # Remove all non-alphanumeric characters.
689 slug_base = re.sub(r"[^a-zA-Z0-9\-]", "", slug_base)
690 # Remove trailing '-'.
691 if slug_base.endswith('-'):
692 slug_base = slug_base[:-1]
693 return slug_base
694
695
696 def which(file):
697 # http://stackoverflow.com/questions/5226958/which-equivalent-function-in-python
698 for path in os.environ["PATH"].split(":"):
699 if os.path.exists(path + "/" + file):
700 return path + "/" + file
701
702 return None
703
704
705 def in_directory(file, directory, local_path_module=os.path):
706 """
707 Return true, if the common prefix of both is equal to directory
708 e.g. /a/b/c/d.rst and directory is /a/b, the common prefix is /a/b.
709 This function isn't used exclusively for security checks, but if it is
710 used for such checks it is assumed that ``directory`` is a "trusted" path -
711 supplied by Galaxy or by the admin and ``file`` is something generated by
712 a tool, configuration, external web server, or user supplied input.
713
714 local_path_module is used by Pulsar to check Windows paths while running on
715 a POSIX-like system.
716
717 >>> base_dir = tempfile.mkdtemp()
718 >>> safe_dir = os.path.join(base_dir, "user")
719 >>> os.mkdir(safe_dir)
720 >>> good_file = os.path.join(safe_dir, "1")
721 >>> with open(good_file, "w") as f: _ = f.write("hello")
722 >>> in_directory(good_file, safe_dir)
723 True
724 >>> in_directory("/other/file/is/here.txt", safe_dir)
725 False
726 >>> unsafe_link = os.path.join(safe_dir, "2")
727 >>> os.symlink("/other/file/bad.fasta", unsafe_link)
728 >>> in_directory(unsafe_link, safe_dir)
729 False
730 """
731 if local_path_module != os.path:
732 _safe_contains = importlib.import_module('galaxy.util.path.%s' % local_path_module.__name__).safe_contains
733 else:
734 directory = os.path.realpath(directory)
735 _safe_contains = safe_contains
736 return _safe_contains(directory, file)
737
738
739 def merge_sorted_iterables(operator, *iterables):
740 """
741
742 >>> operator = lambda x: x
743 >>> list( merge_sorted_iterables( operator, [1,2,3], [4,5] ) )
744 [1, 2, 3, 4, 5]
745 >>> list( merge_sorted_iterables( operator, [4, 5], [1,2,3] ) )
746 [1, 2, 3, 4, 5]
747 >>> list( merge_sorted_iterables( operator, [1, 4, 5], [2], [3] ) )
748 [1, 2, 3, 4, 5]
749 """
750 first_iterable = iterables[0]
751 if len(iterables) == 1:
752 yield from first_iterable
753 else:
754 yield from __merge_two_sorted_iterables(
755 operator,
756 iter(first_iterable),
757 merge_sorted_iterables(operator, *iterables[1:])
758 )
759
760
761 def __merge_two_sorted_iterables(operator, iterable1, iterable2):
762 unset = object()
763 continue_merge = True
764 next_1 = unset
765 next_2 = unset
766 while continue_merge:
767 try:
768 if next_1 is unset:
769 next_1 = next(iterable1)
770 if next_2 is unset:
771 next_2 = next(iterable2)
772 if operator(next_2) < operator(next_1):
773 yield next_2
774 next_2 = unset
775 else:
776 yield next_1
777 next_1 = unset
778 except StopIteration:
779 continue_merge = False
780 if next_1 is not unset:
781 yield next_1
782 if next_2 is not unset:
783 yield next_2
784 yield from iterable1
785 yield from iterable2
786
787
788 class Params:
789 """
790 Stores and 'sanitizes' parameters. Alphanumeric characters and the
791 non-alphanumeric ones that are deemed safe are let to pass through (see L{valid_chars}).
792 Some non-safe characters are escaped to safe forms for example C{>} becomes C{__lt__}
793 (see L{mapped_chars}). All other characters are replaced with C{X}.
794
795 Operates on string or list values only (HTTP parameters).
796
797 >>> values = { 'status':'on', 'symbols':[ 'alpha', '<>', '$rm&#!' ] }
798 >>> par = Params(values)
799 >>> par.status
800 'on'
801 >>> par.value == None # missing attributes return None
802 True
803 >>> par.get('price', 0)
804 0
805 >>> par.symbols # replaces unknown symbols with X
806 ['alpha', '__lt____gt__', 'XrmX__pd__!']
807 >>> sorted(par.flatten()) # flattening to a list
808 [('status', 'on'), ('symbols', 'XrmX__pd__!'), ('symbols', '__lt____gt__'), ('symbols', 'alpha')]
809 """
810
811 # is NEVER_SANITIZE required now that sanitizing for tool parameters can be controlled on a per parameter basis and occurs via InputValueWrappers?
812 NEVER_SANITIZE = ['file_data', 'url_paste', 'URL', 'filesystem_paths']
813
814 def __init__(self, params, sanitize=True):
815 if sanitize:
816 for key, value in params.items():
817 # sanitize check both ungrouped and grouped parameters by
818 # name. Anything relying on NEVER_SANITIZE should be
819 # changed to not require this and NEVER_SANITIZE should be
820 # removed.
821 if (value is not None and key not in self.NEVER_SANITIZE
822 and True not in [key.endswith("|%s" % nonsanitize_parameter) for
823 nonsanitize_parameter in self.NEVER_SANITIZE]):
824 self.__dict__[key] = sanitize_param(value)
825 else:
826 self.__dict__[key] = value
827 else:
828 self.__dict__.update(params)
829
830 def flatten(self):
831 """
832 Creates a tuple list from a dict with a tuple/value pair for every value that is a list
833 """
834 flat = []
835 for key, value in self.__dict__.items():
836 if isinstance(value, list):
837 for v in value:
838 flat.append((key, v))
839 else:
840 flat.append((key, value))
841 return flat
842
843 def __getattr__(self, name):
844 """This is here to ensure that we get None for non existing parameters"""
845 return None
846
847 def get(self, key, default):
848 return self.__dict__.get(key, default)
849
850 def __str__(self):
851 return '%s' % self.__dict__
852
853 def __len__(self):
854 return len(self.__dict__)
855
856 def __iter__(self):
857 return iter(self.__dict__)
858
859 def update(self, values):
860 self.__dict__.update(values)
861
862
863 def rst_to_html(s, error=False):
864 """Convert a blob of reStructuredText to HTML"""
865 log = get_logger("docutils")
866
867 if docutils_core is None:
868 raise Exception("Attempted to use rst_to_html but docutils unavailable.")
869
870 class FakeStream:
871 def write(self, str):
872 if len(str) > 0 and not str.isspace():
873 if error:
874 raise Exception(str)
875 log.warning(str)
876
877 settings_overrides = {
878 "embed_stylesheet": False,
879 "template": os.path.join(os.path.dirname(__file__), "docutils_template.txt"),
880 "warning_stream": FakeStream(),
881 "doctitle_xform": False, # without option, very different rendering depending on
882 # number of sections in help content.
883 }
884
885 return unicodify(docutils_core.publish_string(
886 s, writer=docutils_html4css1.Writer(),
887 settings_overrides=settings_overrides))
888
889
890 def xml_text(root, name=None):
891 """Returns the text inside an element"""
892 if name is not None:
893 # Try attribute first
894 val = root.get(name)
895 if val:
896 return val
897 # Then try as element
898 elem = root.find(name)
899 else:
900 elem = root
901 if elem is not None and elem.text:
902 text = ''.join(elem.text.splitlines())
903 return text.strip()
904 # No luck, return empty string
905 return ''
906
907
908 def parse_resource_parameters(resource_param_file):
909 """Code shared between jobs and workflows for reading resource parameter configuration files.
910
911 TODO: Allow YAML in addition to XML.
912 """
913 resource_parameters = {}
914 if os.path.exists(resource_param_file):
915 resource_definitions = parse_xml(resource_param_file)
916 resource_definitions_root = resource_definitions.getroot()
917 for parameter_elem in resource_definitions_root.findall("param"):
918 name = parameter_elem.get("name")
919 resource_parameters[name] = parameter_elem
920
921 return resource_parameters
922
923
924 # asbool implementation pulled from PasteDeploy
925 truthy = frozenset({'true', 'yes', 'on', 'y', 't', '1'})
926 falsy = frozenset({'false', 'no', 'off', 'n', 'f', '0'})
927
928
929 def asbool(obj):
930 if isinstance(obj, str):
931 obj = obj.strip().lower()
932 if obj in truthy:
933 return True
934 elif obj in falsy:
935 return False
936 else:
937 raise ValueError("String is not true/false: %r" % obj)
938 return bool(obj)
939
940
941 def string_as_bool(string):
942 if str(string).lower() in ('true', 'yes', 'on', '1'):
943 return True
944 else:
945 return False
946
947
948 def string_as_bool_or_none(string):
949 """
950 Returns True, None or False based on the argument:
951 True if passed True, 'True', 'Yes', or 'On'
952 None if passed None or 'None'
953 False otherwise
954
955 Note: string comparison is case-insensitive so lowecase versions of those
956 function equivalently.
957 """
958 string = str(string).lower()
959 if string in ('true', 'yes', 'on'):
960 return True
961 elif string in ['none', 'null']:
962 return None
963 else:
964 return False
965
966
967 def listify(item, do_strip=False):
968 """
969 Make a single item a single item list.
970
971 If *item* is a string, it is split on comma (``,``) characters to produce the list. Optionally, if *do_strip* is
972 true, any extra whitespace around the split items is stripped.
973
974 If *item* is a list it is returned unchanged. If *item* is a tuple, it is converted to a list and returned. If
975 *item* evaluates to False, an empty list is returned.
976
977 :type item: object
978 :param item: object to make a list from
979 :type do_strip: bool
980 :param do_strip: strip whitespaces from around split items, if set to ``True``
981 :rtype: list
982 :returns: The input as a list
983 """
984 if not item:
985 return []
986 elif isinstance(item, list):
987 return item
988 elif isinstance(item, tuple):
989 return list(item)
990 elif isinstance(item, str) and item.count(','):
991 if do_strip:
992 return [token.strip() for token in item.split(',')]
993 else:
994 return item.split(',')
995 else:
996 return [item]
997
998
999 def commaify(amount):
1000 orig = amount
1001 new = re.sub(r"^(-?\d+)(\d{3})", r'\g<1>,\g<2>', amount)
1002 if orig == new:
1003 return new
1004 else:
1005 return commaify(new)
1006
1007
1008 def roundify(amount, sfs=2):
1009 """
1010 Take a number in string form and truncate to 'sfs' significant figures.
1011 """
1012 if len(amount) <= sfs:
1013 return amount
1014 else:
1015 return amount[0:sfs] + '0' * (len(amount) - sfs)
1016
1017
1018 def unicodify(value, encoding=DEFAULT_ENCODING, error='replace', strip_null=False, log_exception=True):
1019 """
1020 Returns a Unicode string or None.
1021
1022 >>> assert unicodify(None) is None
1023 >>> assert unicodify('simple string') == 'simple string'
1024 >>> assert unicodify(3) == '3'
1025 >>> assert unicodify(bytearray([115, 116, 114, 196, 169, 195, 177, 103])) == 'strĩñg'
1026 >>> assert unicodify(Exception('strĩñg')) == 'strĩñg'
1027 >>> assert unicodify('cómplǐcḁtëd strĩñg') == 'cómplǐcḁtëd strĩñg'
1028 >>> s = 'cómplǐcḁtëd strĩñg'; assert unicodify(s) == s
1029 >>> s = 'lâtín strìñg'; assert unicodify(s.encode('latin-1'), 'latin-1') == s
1030 >>> s = 'lâtín strìñg'; assert unicodify(s.encode('latin-1')) == 'l\ufffdt\ufffdn str\ufffd\ufffdg'
1031 >>> s = 'lâtín strìñg'; assert unicodify(s.encode('latin-1'), error='ignore') == 'ltn strg'
1032 """
1033 if value is None:
1034 return value
1035 try:
1036 if isinstance(value, bytearray):
1037 value = bytes(value)
1038 elif not isinstance(value, (str, bytes)):
1039 value = str(value)
1040 # Now value is an instance of bytes or str
1041 if not isinstance(value, str):
1042 value = str(value, encoding, error)
1043 except Exception as e:
1044 if log_exception:
1045 msg = "Value '{}' could not be coerced to Unicode: {}('{}')".format(repr(value), type(e).__name__, e)
1046 log.exception(msg)
1047 raise
1048 if strip_null:
1049 return value.replace('\0', '')
1050 return value
1051
1052
1053 def smart_str(s, encoding=DEFAULT_ENCODING, strings_only=False, errors='strict'):
1054 """
1055 Returns a bytestring version of 's', encoded as specified in 'encoding'.
1056
1057 If strings_only is True, don't convert (some) non-string-like objects.
1058
1059 Adapted from an older, simpler version of django.utils.encoding.smart_str.
1060
1061 >>> assert smart_str(None) == b'None'
1062 >>> assert smart_str(None, strings_only=True) is None
1063 >>> assert smart_str(3) == b'3'
1064 >>> assert smart_str(3, strings_only=True) == 3
1065 >>> s = b'a bytes string'; assert smart_str(s) == s
1066 >>> s = bytearray(b'a bytes string'); assert smart_str(s) == s
1067 >>> assert smart_str('a simple unicode string') == b'a simple unicode string'
1068 >>> assert smart_str('à strange ünicode ڃtring') == b'\\xc3\\xa0 strange \\xc3\\xbcnicode \\xda\\x83tring'
1069 >>> assert smart_str(b'\\xc3\\xa0n \\xc3\\xabncoded utf-8 string', encoding='latin-1') == b'\\xe0n \\xebncoded utf-8 string'
1070 >>> assert smart_str(bytearray(b'\\xc3\\xa0n \\xc3\\xabncoded utf-8 string'), encoding='latin-1') == b'\\xe0n \\xebncoded utf-8 string'
1071 """
1072 if strings_only and isinstance(s, (type(None), int)):
1073 return s
1074 if not isinstance(s, (str, bytes, bytearray)):
1075 s = str(s)
1076 # Now s is an instance of str, bytes or bytearray
1077 if not isinstance(s, (bytes, bytearray)):
1078 return s.encode(encoding, errors)
1079 elif s and encoding != DEFAULT_ENCODING:
1080 return s.decode(DEFAULT_ENCODING, errors).encode(encoding, errors)
1081 else:
1082 return s
1083
1084
1085 def strip_control_characters(s):
1086 """Strip unicode control characters from a string."""
1087 return "".join(c for c in unicodify(s) if unicodedata.category(c) != "Cc")
1088
1089
1090 def object_to_string(obj):
1091 return binascii.hexlify(obj)
1092
1093
1094 def string_to_object(s):
1095 return binascii.unhexlify(s)
1096
1097
1098 def clean_multiline_string(multiline_string, sep='\n'):
1099 """
1100 Dedent, split, remove first and last empty lines, rejoin.
1101 """
1102 multiline_string = textwrap.dedent(multiline_string)
1103 string_list = multiline_string.split(sep)
1104 if not string_list[0]:
1105 string_list = string_list[1:]
1106 if not string_list[-1]:
1107 string_list = string_list[:-1]
1108 return '\n'.join(string_list) + '\n'
1109
1110
1111 class ParamsWithSpecs(collections.defaultdict):
1112 """
1113 """
1114
1115 def __init__(self, specs=None, params=None):
1116 self.specs = specs or dict()
1117 self.params = params or dict()
1118 for name, value in self.params.items():
1119 if name not in self.specs:
1120 self._param_unknown_error(name)
1121 if 'map' in self.specs[name]:
1122 try:
1123 self.params[name] = self.specs[name]['map'](value)
1124 except Exception:
1125 self._param_map_error(name, value)
1126 if 'valid' in self.specs[name]:
1127 if not self.specs[name]['valid'](value):
1128 self._param_vaildation_error(name, value)
1129
1130 self.update(self.params)
1131
1132 def __missing__(self, name):
1133 return self.specs[name]['default']
1134
1135 def __getattr__(self, name):
1136 return self[name]
1137
1138 def _param_unknown_error(self, name):
1139 raise NotImplementedError()
1140
1141 def _param_map_error(self, name, value):
1142 raise NotImplementedError()
1143
1144 def _param_vaildation_error(self, name, value):
1145 raise NotImplementedError()
1146
1147
1148 def compare_urls(url1, url2, compare_scheme=True, compare_hostname=True, compare_path=True):
1149 url1 = urlparse(url1)
1150 url2 = urlparse(url2)
1151 if compare_scheme and url1.scheme and url2.scheme and url1.scheme != url2.scheme:
1152 return False
1153 if compare_hostname and url1.hostname and url2.hostname and url1.hostname != url2.hostname:
1154 return False
1155 if compare_path and url1.path and url2.path and url1.path != url2.path:
1156 return False
1157 return True
1158
1159
1160 def read_build_sites(filename, check_builds=True):
1161 """ read db names to ucsc mappings from file, this file should probably be merged with the one above """
1162 build_sites = []
1163 try:
1164 for line in open(filename):
1165 try:
1166 if line[0:1] == "#":
1167 continue
1168 fields = line.replace("\r", "").replace("\n", "").split("\t")
1169 site_name = fields[0]
1170 site = fields[1]
1171 if check_builds:
1172 site_builds = fields[2].split(",")
1173 site_dict = {'name': site_name, 'url': site, 'builds': site_builds}
1174 else:
1175 site_dict = {'name': site_name, 'url': site}
1176 build_sites.append(site_dict)
1177 except Exception:
1178 continue
1179 except Exception:
1180 log.error("ERROR: Unable to read builds for site file %s", filename)
1181 return build_sites
1182
1183
1184 def relativize_symlinks(path, start=None, followlinks=False):
1185 for root, _, files in os.walk(path, followlinks=followlinks):
1186 rel_start = None
1187 for file_name in files:
1188 symlink_file_name = os.path.join(root, file_name)
1189 if os.path.islink(symlink_file_name):
1190 symlink_target = os.readlink(symlink_file_name)
1191 if rel_start is None:
1192 if start is None:
1193 rel_start = root
1194 else:
1195 rel_start = start
1196 rel_path = relpath(symlink_target, rel_start)
1197 os.remove(symlink_file_name)
1198 os.symlink(rel_path, symlink_file_name)
1199
1200
1201 def stringify_dictionary_keys(in_dict):
1202 # returns a new dictionary
1203 # changes unicode keys into strings, only works on top level (does not recurse)
1204 # unicode keys are not valid for expansion into keyword arguments on method calls
1205 out_dict = {}
1206 for key, value in in_dict.items():
1207 out_dict[str(key)] = value
1208 return out_dict
1209
1210
1211 def mkstemp_ln(src, prefix='mkstemp_ln_'):
1212 """
1213 From tempfile._mkstemp_inner, generate a hard link in the same dir with a
1214 random name. Created so we can persist the underlying file of a
1215 NamedTemporaryFile upon its closure.
1216 """
1217 dir = os.path.dirname(src)
1218 names = tempfile._get_candidate_names()
1219 for _ in range(tempfile.TMP_MAX):
1220 name = next(names)
1221 file = os.path.join(dir, prefix + name)
1222 try:
1223 os.link(src, file)
1224 return (os.path.abspath(file))
1225 except OSError as e:
1226 if e.errno == errno.EEXIST:
1227 continue # try again
1228 raise
1229 raise OSError(errno.EEXIST, "No usable temporary file name found")
1230
1231
1232 def umask_fix_perms(path, umask, unmasked_perms, gid=None):
1233 """
1234 umask-friendly permissions fixing
1235 """
1236 perms = unmasked_perms & ~umask
1237 try:
1238 st = os.stat(path)
1239 except OSError:
1240 log.exception('Unable to set permissions or group on %s', path)
1241 return
1242 # fix modes
1243 if stat.S_IMODE(st.st_mode) != perms:
1244 try:
1245 os.chmod(path, perms)
1246 except Exception as e:
1247 log.warning('Unable to honor umask ({}) for {}, tried to set: {} but mode remains {}, error was: {}'.format(oct(umask),
1248 path,
1249 oct(perms),
1250 oct(stat.S_IMODE(st.st_mode)),
1251 unicodify(e)))
1252 # fix group
1253 if gid is not None and st.st_gid != gid:
1254 try:
1255 os.chown(path, -1, gid)
1256 except Exception as e:
1257 try:
1258 desired_group = grp.getgrgid(gid)
1259 current_group = grp.getgrgid(st.st_gid)
1260 except Exception:
1261 desired_group = gid
1262 current_group = st.st_gid
1263 log.warning('Unable to honor primary group ({}) for {}, group remains {}, error was: {}'.format(desired_group,
1264 path,
1265 current_group,
1266 unicodify(e)))
1267
1268
1269 def docstring_trim(docstring):
1270 """Trimming python doc strings. Taken from: http://www.python.org/dev/peps/pep-0257/"""
1271 if not docstring:
1272 return ''
1273 # Convert tabs to spaces (following the normal Python rules)
1274 # and split into a list of lines:
1275 lines = docstring.expandtabs().splitlines()
1276 # Determine minimum indentation (first line doesn't count):
1277 indent = sys.maxsize
1278 for line in lines[1:]:
1279 stripped = line.lstrip()
1280 if stripped:
1281 indent = min(indent, len(line) - len(stripped))
1282 # Remove indentation (first line is special):
1283 trimmed = [lines[0].strip()]
1284 if indent < sys.maxsize:
1285 for line in lines[1:]:
1286 trimmed.append(line[indent:].rstrip())
1287 # Strip off trailing and leading blank lines:
1288 while trimmed and not trimmed[-1]:
1289 trimmed.pop()
1290 while trimmed and not trimmed[0]:
1291 trimmed.pop(0)
1292 # Return a single string:
1293 return '\n'.join(trimmed)
1294
1295
1296 def nice_size(size):
1297 """
1298 Returns a readably formatted string with the size
1299
1300 >>> nice_size(100)
1301 '100 bytes'
1302 >>> nice_size(10000)
1303 '9.8 KB'
1304 >>> nice_size(1000000)
1305 '976.6 KB'
1306 >>> nice_size(100000000)
1307 '95.4 MB'
1308 """
1309 words = ['bytes', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB']
1310 prefix = ''
1311 try:
1312 size = float(size)
1313 if size < 0:
1314 size = abs(size)
1315 prefix = '-'
1316 except Exception:
1317 return '??? bytes'
1318 for ind, word in enumerate(words):
1319 step = 1024 ** (ind + 1)
1320 if step > size:
1321 size = size / float(1024 ** ind)
1322 if word == 'bytes': # No decimals for bytes
1323 return "%s%d bytes" % (prefix, size)
1324 return f"{prefix}{size:.1f} {word}"
1325 return '??? bytes'
1326
1327
1328 def size_to_bytes(size):
1329 """
1330 Returns a number of bytes (as integer) if given a reasonably formatted string with the size
1331
1332 >>> size_to_bytes('1024')
1333 1024
1334 >>> size_to_bytes('1.0')
1335 1
1336 >>> size_to_bytes('10 bytes')
1337 10
1338 >>> size_to_bytes('4k')
1339 4096
1340 >>> size_to_bytes('2.2 TB')
1341 2418925581107
1342 >>> size_to_bytes('.01 TB')
1343 10995116277
1344 >>> size_to_bytes('1.b')
1345 1
1346 >>> size_to_bytes('1.2E2k')
1347 122880
1348 """
1349 # The following number regexp is based on https://stackoverflow.com/questions/385558/extract-float-double-value/385597#385597
1350 size_re = re.compile(r'(?P<number>(\d+(\.\d*)?|\.\d+)(e[+-]?\d+)?)\s*(?P<multiple>[eptgmk]?(b|bytes?)?)?$')
1351 size_match = size_re.match(size.lower())
1352 if size_match is None:
1353 raise ValueError("Could not parse string '%s'" % size)
1354 number = float(size_match.group("number"))
1355 multiple = size_match.group("multiple")
1356 if multiple == "" or multiple.startswith('b'):
1357 return int(number)
1358 elif multiple.startswith('k'):
1359 return int(number * 1024)
1360 elif multiple.startswith('m'):
1361 return int(number * 1024 ** 2)
1362 elif multiple.startswith('g'):
1363 return int(number * 1024 ** 3)
1364 elif multiple.startswith('t'):
1365 return int(number * 1024 ** 4)
1366 elif multiple.startswith('p'):
1367 return int(number * 1024 ** 5)
1368 elif multiple.startswith('e'):
1369 return int(number * 1024 ** 6)
1370 else:
1371 raise ValueError(f"Unknown multiplier '{multiple}' in '{size}'")
1372
1373
1374 def send_mail(frm, to, subject, body, config, html=None):
1375 """
1376 Sends an email.
1377
1378 :type frm: str
1379 :param frm: from address
1380
1381 :type to: str
1382 :param to: to address
1383
1384 :type subject: str
1385 :param subject: Subject line
1386
1387 :type body: str
1388 :param body: Body text (should be plain text)
1389
1390 :type config: object
1391 :param config: Galaxy configuration object
1392
1393 :type html: str
1394 :param html: Alternative HTML representation of the body content. If
1395 provided will convert the message to a MIMEMultipart. (Default 'None')
1396 """
1397
1398 to = listify(to)
1399 if html:
1400 msg = MIMEMultipart('alternative')
1401 else:
1402 msg = MIMEText(body, 'plain', 'utf-8')
1403
1404 msg['To'] = ', '.join(to)
1405 msg['From'] = frm
1406 msg['Subject'] = subject
1407
1408 if config.smtp_server is None:
1409 log.error("Mail is not configured for this Galaxy instance.")
1410 log.info(msg)
1411 return
1412
1413 if html:
1414 mp_text = MIMEText(body, 'plain', 'utf-8')
1415 mp_html = MIMEText(html, 'html', 'utf-8')
1416 msg.attach(mp_text)
1417 msg.attach(mp_html)
1418
1419 smtp_ssl = asbool(getattr(config, 'smtp_ssl', False))
1420 if smtp_ssl:
1421 s = smtplib.SMTP_SSL(config.smtp_server)
1422 else:
1423 s = smtplib.SMTP(config.smtp_server)
1424 if not smtp_ssl:
1425 try:
1426 s.starttls()
1427 log.debug('Initiated SSL/TLS connection to SMTP server: %s', config.smtp_server)
1428 except RuntimeError as e:
1429 log.warning('SSL/TLS support is not available to your Python interpreter: %s', unicodify(e))
1430 except smtplib.SMTPHeloError as e:
1431 log.error("The server didn't reply properly to the HELO greeting: %s", unicodify(e))
1432 s.close()
1433 raise
1434 except smtplib.SMTPException as e:
1435 log.warning('The server does not support the STARTTLS extension: %s', unicodify(e))
1436 if config.smtp_username and config.smtp_password:
1437 try:
1438 s.login(config.smtp_username, config.smtp_password)
1439 except smtplib.SMTPHeloError as e:
1440 log.error("The server didn't reply properly to the HELO greeting: %s", unicodify(e))
1441 s.close()
1442 raise
1443 except smtplib.SMTPAuthenticationError as e:
1444 log.error("The server didn't accept the username/password combination: %s", unicodify(e))
1445 s.close()
1446 raise
1447 except smtplib.SMTPException as e:
1448 log.error("No suitable authentication method was found: %s", unicodify(e))
1449 s.close()
1450 raise
1451 s.sendmail(frm, to, msg.as_string())
1452 s.quit()
1453
1454
1455 def force_symlink(source, link_name):
1456 try:
1457 os.symlink(source, link_name)
1458 except OSError as e:
1459 if e.errno == errno.EEXIST:
1460 os.remove(link_name)
1461 os.symlink(source, link_name)
1462 else:
1463 raise e
1464
1465
1466 def move_merge(source, target):
1467 # when using shutil and moving a directory, if the target exists,
1468 # then the directory is placed inside of it
1469 # if the target doesn't exist, then the target is made into the directory
1470 # this makes it so that the target is always the target, and if it exists,
1471 # the source contents are moved into the target
1472 if os.path.isdir(source) and os.path.exists(target) and os.path.isdir(target):
1473 for name in os.listdir(source):
1474 move_merge(os.path.join(source, name), os.path.join(target, name))
1475 else:
1476 return shutil.move(source, target)
1477
1478
1479 def safe_str_cmp(a, b):
1480 """safely compare two strings in a timing-attack-resistant manner
1481 """
1482 if len(a) != len(b):
1483 return False
1484 rv = 0
1485 for x, y in zip(a, b):
1486 rv |= ord(x) ^ ord(y)
1487 return rv == 0
1488
1489
1490 galaxy_root_path = os.path.join(__path__[0], os.pardir, os.pardir, os.pardir) # type: ignore
1491 galaxy_samples_path = os.path.join(__path__[0], os.pardir, 'config', 'sample') # type: ignore
1492
1493
1494 def galaxy_directory():
1495 root_path = os.path.abspath(galaxy_root_path)
1496 if os.path.basename(root_path) == "packages":
1497 root_path = os.path.abspath(os.path.join(root_path, ".."))
1498 return root_path
1499
1500
1501 def galaxy_samples_directory():
1502 return os.path.abspath(galaxy_samples_path)
1503
1504
1505 def config_directories_from_setting(directories_setting, galaxy_root=galaxy_root_path):
1506 """
1507 Parse the ``directories_setting`` into a list of relative or absolute
1508 filesystem paths that will be searched to discover plugins.
1509
1510 :type galaxy_root: string
1511 :param galaxy_root: the root path of this galaxy installation
1512 :type directories_setting: string (default: None)
1513 :param directories_setting: the filesystem path (or paths)
1514 to search for plugins. Can be CSV string of paths. Will be treated as
1515 absolute if a path starts with '/', relative otherwise.
1516 :rtype: list of strings
1517 :returns: list of filesystem paths
1518 """
1519 directories = []
1520 if not directories_setting:
1521 return directories
1522
1523 for directory in listify(directories_setting):
1524 directory = directory.strip()
1525 if not directory.startswith('/'):
1526 directory = os.path.join(galaxy_root, directory)
1527 if not os.path.exists(directory):
1528 log.warning('directory not found: %s', directory)
1529 continue
1530 directories.append(directory)
1531 return directories
1532
1533
1534 def parse_int(value, min_val=None, max_val=None, default=None, allow_none=False):
1535 try:
1536 value = int(value)
1537 if min_val is not None and value < min_val:
1538 return min_val
1539 if max_val is not None and value > max_val:
1540 return max_val
1541 return value
1542 except ValueError:
1543 if allow_none:
1544 if default is None or value == "None":
1545 return None
1546 if default:
1547 return default
1548 else:
1549 raise
1550
1551
1552 def parse_non_hex_float(s):
1553 r"""
1554 Parse string `s` into a float but throw a `ValueError` if the string is in
1555 the otherwise acceptable format `\d+e\d+` (e.g. 40000000000000e5.)
1556
1557 This can be passed into `json.loads` to prevent a hex string in the above
1558 format from being incorrectly parsed as a float in scientific notation.
1559
1560 >>> parse_non_hex_float( '123.4' )
1561 123.4
1562 >>> parse_non_hex_float( '2.45e+3' )
1563 2450.0
1564 >>> parse_non_hex_float( '2.45e-3' )
1565 0.00245
1566 >>> parse_non_hex_float( '40000000000000e5' )
1567 Traceback (most recent call last):
1568 ...
1569 ValueError: could not convert string to float: 40000000000000e5
1570 """
1571 f = float(s)
1572 # successfully parsed as float if here - check for format in original string
1573 if 'e' in s and not ('+' in s or '-' in s):
1574 raise ValueError('could not convert string to float: ' + s)
1575 return f
1576
1577
1578 def build_url(base_url, port=80, scheme='http', pathspec=None, params=None, doseq=False):
1579 if params is None:
1580 params = dict()
1581 if pathspec is None:
1582 pathspec = []
1583 parsed_url = urlparse(base_url)
1584 if scheme != 'http':
1585 parsed_url.scheme = scheme
1586 assert parsed_url.scheme in ('http', 'https', 'ftp'), 'Invalid URL scheme: %s' % scheme
1587 if port != 80:
1588 url = '%s://%s:%d/%s' % (parsed_url.scheme, parsed_url.netloc.rstrip('/'), int(port), parsed_url.path)
1589 else:
1590 url = '{}://{}/{}'.format(parsed_url.scheme, parsed_url.netloc.rstrip('/'), parsed_url.path.lstrip('/'))
1591 if len(pathspec) > 0:
1592 url = '{}/{}'.format(url.rstrip('/'), '/'.join(pathspec))
1593 if parsed_url.query:
1594 for query_parameter in parsed_url.query.split('&'):
1595 key, value = query_parameter.split('=')
1596 params[key] = value
1597 if params:
1598 url += '?%s' % urlencode(params, doseq=doseq)
1599 return url
1600
1601
1602 def url_get(base_url, auth=None, pathspec=None, params=None, max_retries=5, backoff_factor=1):
1603 """Make contact with the uri provided and return any contents."""
1604 full_url = build_url(base_url, pathspec=pathspec, params=params)
1605 s = requests.Session()
1606 retries = Retry(total=max_retries, backoff_factor=backoff_factor, status_forcelist=[429])
1607 s.mount(base_url, HTTPAdapter(max_retries=retries))
1608 response = s.get(full_url, auth=auth)
1609 response.raise_for_status()
1610 return response.text
1611
1612
1613 def download_to_file(url, dest_file_path, timeout=30, chunk_size=2 ** 20):
1614 """Download a URL to a file in chunks."""
1615 with requests.get(url, timeout=timeout, stream=True) as r, open(dest_file_path, 'wb') as f:
1616 for chunk in r.iter_content(chunk_size):
1617 if chunk:
1618 f.write(chunk)
1619
1620
1621 class classproperty:
1622
1623 def __init__(self, f):
1624 self.f = f
1625
1626 def __get__(self, obj, owner):
1627 return self.f(owner)
1628
1629
1630 def get_executable():
1631 exe = sys.executable
1632 if exe.endswith('uwsgi'):
1633 virtualenv = None
1634 if uwsgi is not None:
1635 for name in ('home', 'virtualenv', 'venv', 'pyhome'):
1636 if name in uwsgi.opt:
1637 virtualenv = unicodify(uwsgi.opt[name])
1638 break
1639 if virtualenv is None and 'VIRTUAL_ENV' in os.environ:
1640 virtualenv = os.environ['VIRTUAL_ENV']
1641 if virtualenv is not None:
1642 exe = os.path.join(virtualenv, 'bin', 'python')
1643 else:
1644 exe = os.path.join(os.path.dirname(exe), 'python')
1645 if not os.path.exists(exe):
1646 exe = 'python'
1647 return exe
1648
1649
1650 class ExecutionTimer:
1651
1652 def __init__(self):
1653 self.begin = time.time()
1654
1655 def __str__(self):
1656 return "(%0.3f ms)" % (self.elapsed * 1000)
1657
1658 @property
1659 def elapsed(self):
1660 return (time.time() - self.begin)
1661
1662
1663 class StructuredExecutionTimer:
1664
1665 def __init__(self, timer_id, template, **tags):
1666 self.begin = time.time()
1667 self.timer_id = timer_id
1668 self.template = template
1669 self.tags = tags
1670
1671 def __str__(self):
1672 return self.to_str()
1673
1674 def to_str(self, **kwd):
1675 if kwd:
1676 message = string.Template(self.template).safe_substitute(kwd)
1677 else:
1678 message = self.template
1679 log_message = message + " (%0.3f ms)" % (self.elapsed * 1000)
1680 return log_message
1681
1682 @property
1683 def elapsed(self):
1684 return (time.time() - self.begin)
1685
1686
1687 if __name__ == '__main__':
1688 import doctest
1689 doctest.testmod(sys.modules[__name__], verbose=False)