Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/galaxy/util/__init__.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
| author | guerler |
|---|---|
| date | Fri, 31 Jul 2020 00:32:28 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:d30785e31577 | 1:56ad4e20f292 |
|---|---|
| 1 # -*- coding: utf-8 -*- | |
| 2 """ | |
| 3 Utility functions used systemwide. | |
| 4 | |
| 5 """ | |
| 6 from __future__ import absolute_import | |
| 7 | |
| 8 import binascii | |
| 9 import collections | |
| 10 import errno | |
| 11 import importlib | |
| 12 import json | |
| 13 import os | |
| 14 import random | |
| 15 import re | |
| 16 import shutil | |
| 17 import smtplib | |
| 18 import stat | |
| 19 import string | |
| 20 import sys | |
| 21 import tempfile | |
| 22 import threading | |
| 23 import time | |
| 24 import unicodedata | |
| 25 import xml.dom.minidom | |
| 26 from datetime import datetime | |
| 27 from email.mime.multipart import MIMEMultipart | |
| 28 from email.mime.text import MIMEText | |
| 29 from functools import partial | |
| 30 from hashlib import md5 | |
| 31 from os.path import relpath | |
| 32 | |
| 33 import requests | |
| 34 try: | |
| 35 import grp | |
| 36 except ImportError: | |
| 37 # For Pulsar on Windows (which does not use the function that uses grp) | |
| 38 grp = None | |
| 39 from boltons.iterutils import ( | |
| 40 default_enter, | |
| 41 remap, | |
| 42 ) | |
| 43 LXML_AVAILABLE = True | |
| 44 try: | |
| 45 from lxml import etree | |
| 46 except ImportError: | |
| 47 LXML_AVAILABLE = False | |
| 48 import xml.etree.ElementTree as etree | |
| 49 from requests.adapters import HTTPAdapter | |
| 50 from requests.packages.urllib3.util.retry import Retry | |
| 51 from six import binary_type, iteritems, PY2, string_types, text_type | |
| 52 from six.moves import ( | |
| 53 xrange, | |
| 54 zip | |
| 55 ) | |
| 56 from six.moves.urllib import parse as urlparse | |
| 57 | |
| 58 try: | |
| 59 import docutils.core as docutils_core | |
| 60 import docutils.writers.html4css1 as docutils_html4css1 | |
| 61 except ImportError: | |
| 62 docutils_core = None | |
| 63 docutils_html4css1 = None | |
| 64 | |
| 65 try: | |
| 66 import uwsgi | |
| 67 except ImportError: | |
| 68 uwsgi = None | |
| 69 | |
| 70 from .custom_logging import get_logger | |
| 71 from .inflection import English, Inflector | |
| 72 from .path import safe_contains, safe_makedirs, safe_relpath # noqa: F401 | |
| 73 | |
| 74 inflector = Inflector(English) | |
| 75 | |
| 76 log = get_logger(__name__) | |
| 77 _lock = threading.RLock() | |
| 78 | |
| 79 namedtuple = collections.namedtuple | |
| 80 | |
| 81 CHUNK_SIZE = 65536 # 64k | |
| 82 | |
| 83 DATABASE_MAX_STRING_SIZE = 32768 | |
| 84 DATABASE_MAX_STRING_SIZE_PRETTY = '32K' | |
| 85 | |
| 86 gzip_magic = b'\x1f\x8b' | |
| 87 bz2_magic = b'BZh' | |
| 88 DEFAULT_ENCODING = os.environ.get('GALAXY_DEFAULT_ENCODING', 'utf-8') | |
| 89 NULL_CHAR = b'\x00' | |
| 90 BINARY_CHARS = [NULL_CHAR] | |
| 91 FILENAME_VALID_CHARS = '.,^_-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' | |
| 92 | |
| 93 RW_R__R__ = stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH | |
| 94 RWXR_XR_X = stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH | |
| 95 RWXRWXRWX = stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO | |
| 96 | |
| 97 XML = etree.XML | |
| 98 | |
| 99 defaultdict = collections.defaultdict | |
| 100 | |
| 101 | |
| 102 def remove_protocol_from_url(url): | |
| 103 """ Supplied URL may be null, if not ensure http:// or https:// | |
| 104 etc... is stripped off. | |
| 105 """ | |
| 106 if url is None: | |
| 107 return url | |
| 108 | |
| 109 # We have a URL | |
| 110 if url.find('://') > 0: | |
| 111 new_url = url.split('://')[1] | |
| 112 else: | |
| 113 new_url = url | |
| 114 return new_url.rstrip('/') | |
| 115 | |
| 116 | |
| 117 def is_binary(value): | |
| 118 """ | |
| 119 File is binary if it contains a null-byte by default (e.g. behavior of grep, etc.). | |
| 120 This may fail for utf-16 files, but so would ASCII encoding. | |
| 121 >>> is_binary( string.printable ) | |
| 122 False | |
| 123 >>> is_binary( b'\\xce\\x94' ) | |
| 124 False | |
| 125 >>> is_binary( b'\\x00' ) | |
| 126 True | |
| 127 """ | |
| 128 value = smart_str(value) | |
| 129 for binary_char in BINARY_CHARS: | |
| 130 if binary_char in value: | |
| 131 return True | |
| 132 return False | |
| 133 | |
| 134 | |
| 135 def is_uuid(value): | |
| 136 """ | |
| 137 This method returns True if value is a UUID, otherwise False. | |
| 138 >>> is_uuid( "123e4567-e89b-12d3-a456-426655440000" ) | |
| 139 True | |
| 140 >>> is_uuid( "0x3242340298902834" ) | |
| 141 False | |
| 142 """ | |
| 143 uuid_re = re.compile("[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}") | |
| 144 if re.match(uuid_re, str(value)): | |
| 145 return True | |
| 146 else: | |
| 147 return False | |
| 148 | |
| 149 | |
| 150 def directory_hash_id(id): | |
| 151 """ | |
| 152 | |
| 153 >>> directory_hash_id( 100 ) | |
| 154 ['000'] | |
| 155 >>> directory_hash_id( "90000" ) | |
| 156 ['090'] | |
| 157 >>> directory_hash_id("777777777") | |
| 158 ['000', '777', '777'] | |
| 159 >>> directory_hash_id("135ee48a-4f51-470c-ae2f-ce8bd78799e6") | |
| 160 ['1', '3', '5'] | |
| 161 """ | |
| 162 s = str(id) | |
| 163 l = len(s) | |
| 164 # Shortcut -- ids 0-999 go under ../000/ | |
| 165 if l < 4: | |
| 166 return ["000"] | |
| 167 if not is_uuid(s): | |
| 168 # Pad with zeros until a multiple of three | |
| 169 padded = ((3 - len(s) % 3) * "0") + s | |
| 170 # Drop the last three digits -- 1000 files per directory | |
| 171 padded = padded[:-3] | |
| 172 # Break into chunks of three | |
| 173 return [padded[i * 3:(i + 1) * 3] for i in range(len(padded) // 3)] | |
| 174 else: | |
| 175 # assume it is a UUID | |
| 176 return list(iter(s[0:3])) | |
| 177 | |
| 178 | |
| 179 def get_charset_from_http_headers(headers, default=None): | |
| 180 rval = headers.get('content-type', None) | |
| 181 if rval and 'charset=' in rval: | |
| 182 rval = rval.split('charset=')[-1].split(';')[0].strip() | |
| 183 if rval: | |
| 184 return rval | |
| 185 return default | |
| 186 | |
| 187 | |
| 188 def synchronized(func): | |
| 189 """This wrapper will serialize access to 'func' to a single thread. Use it as a decorator.""" | |
| 190 def caller(*params, **kparams): | |
| 191 _lock.acquire(True) # Wait | |
| 192 try: | |
| 193 return func(*params, **kparams) | |
| 194 finally: | |
| 195 _lock.release() | |
| 196 return caller | |
| 197 | |
| 198 | |
| 199 def iter_start_of_line(fh, chunk_size=None): | |
| 200 """ | |
| 201 Iterate over fh and call readline(chunk_size) | |
| 202 """ | |
| 203 for line in iter(partial(fh.readline, chunk_size), ""): | |
| 204 yield line | |
| 205 | |
| 206 | |
| 207 def file_reader(fp, chunk_size=CHUNK_SIZE): | |
| 208 """This generator yields the open fileobject in chunks (default 64k). Closes the file at the end""" | |
| 209 while 1: | |
| 210 data = fp.read(chunk_size) | |
| 211 if not data: | |
| 212 break | |
| 213 yield data | |
| 214 fp.close() | |
| 215 | |
| 216 | |
| 217 def unique_id(KEY_SIZE=128): | |
| 218 """ | |
| 219 Generates an unique id | |
| 220 | |
| 221 >>> ids = [ unique_id() for i in range(1000) ] | |
| 222 >>> len(set(ids)) | |
| 223 1000 | |
| 224 """ | |
| 225 random_bits = text_type(random.getrandbits(KEY_SIZE)).encode("UTF-8") | |
| 226 return md5(random_bits).hexdigest() | |
| 227 | |
| 228 | |
| 229 def parse_xml(fname, strip_whitespace=True, remove_comments=True): | |
| 230 """Returns a parsed xml tree""" | |
| 231 parser = None | |
| 232 if remove_comments and LXML_AVAILABLE: | |
| 233 # If using stdlib etree comments are always removed, | |
| 234 # but lxml doesn't do this by default | |
| 235 parser = etree.XMLParser(remove_comments=remove_comments) | |
| 236 try: | |
| 237 tree = etree.parse(fname, parser=parser) | |
| 238 root = tree.getroot() | |
| 239 if strip_whitespace: | |
| 240 for elem in root.iter('*'): | |
| 241 if elem.text is not None: | |
| 242 elem.text = elem.text.strip() | |
| 243 if elem.tail is not None: | |
| 244 elem.tail = elem.tail.strip() | |
| 245 except IOError as e: | |
| 246 if e.errno is None and not os.path.exists(fname): | |
| 247 # lxml doesn't set errno | |
| 248 e.errno = errno.ENOENT | |
| 249 raise | |
| 250 except etree.ParseError: | |
| 251 log.exception("Error parsing file %s", fname) | |
| 252 raise | |
| 253 return tree | |
| 254 | |
| 255 | |
| 256 def parse_xml_string(xml_string, strip_whitespace=True): | |
| 257 try: | |
| 258 tree = etree.fromstring(xml_string) | |
| 259 except ValueError as e: | |
| 260 if 'strings with encoding declaration are not supported' in unicodify(e): | |
| 261 tree = etree.fromstring(xml_string.encode('utf-8')) | |
| 262 else: | |
| 263 raise e | |
| 264 if strip_whitespace: | |
| 265 for elem in tree.iter('*'): | |
| 266 if elem.text is not None: | |
| 267 elem.text = elem.text.strip() | |
| 268 if elem.tail is not None: | |
| 269 elem.tail = elem.tail.strip() | |
| 270 return tree | |
| 271 | |
| 272 | |
| 273 def xml_to_string(elem, pretty=False): | |
| 274 """ | |
| 275 Returns a string from an xml tree. | |
| 276 """ | |
| 277 try: | |
| 278 if elem is not None: | |
| 279 if PY2: | |
| 280 xml_str = etree.tostring(elem, encoding='utf-8') | |
| 281 else: | |
| 282 xml_str = etree.tostring(elem, encoding='unicode') | |
| 283 else: | |
| 284 xml_str = '' | |
| 285 except TypeError as e: | |
| 286 # we assume this is a comment | |
| 287 if hasattr(elem, 'text'): | |
| 288 return u"<!-- %s -->\n" % elem.text | |
| 289 else: | |
| 290 raise e | |
| 291 if xml_str and pretty: | |
| 292 pretty_string = xml.dom.minidom.parseString(xml_str).toprettyxml(indent=' ') | |
| 293 return "\n".join(line for line in pretty_string.split('\n') if not re.match(r'^[\s\\nb\']*$', line)) | |
| 294 return xml_str | |
| 295 | |
| 296 | |
| 297 def xml_element_compare(elem1, elem2): | |
| 298 if not isinstance(elem1, dict): | |
| 299 elem1 = xml_element_to_dict(elem1) | |
| 300 if not isinstance(elem2, dict): | |
| 301 elem2 = xml_element_to_dict(elem2) | |
| 302 return elem1 == elem2 | |
| 303 | |
| 304 | |
| 305 def xml_element_list_compare(elem_list1, elem_list2): | |
| 306 return [xml_element_to_dict(elem) for elem in elem_list1] == [xml_element_to_dict(elem) for elem in elem_list2] | |
| 307 | |
| 308 | |
| 309 def xml_element_to_dict(elem): | |
| 310 rval = {} | |
| 311 if elem.attrib: | |
| 312 rval[elem.tag] = {} | |
| 313 else: | |
| 314 rval[elem.tag] = None | |
| 315 | |
| 316 sub_elems = list(elem) | |
| 317 if sub_elems: | |
| 318 sub_elem_dict = dict() | |
| 319 for sub_sub_elem_dict in map(xml_element_to_dict, sub_elems): | |
| 320 for key, value in iteritems(sub_sub_elem_dict): | |
| 321 if key not in sub_elem_dict: | |
| 322 sub_elem_dict[key] = [] | |
| 323 sub_elem_dict[key].append(value) | |
| 324 for key, value in iteritems(sub_elem_dict): | |
| 325 if len(value) == 1: | |
| 326 rval[elem.tag][key] = value[0] | |
| 327 else: | |
| 328 rval[elem.tag][key] = value | |
| 329 if elem.attrib: | |
| 330 for key, value in iteritems(elem.attrib): | |
| 331 rval[elem.tag]["@%s" % key] = value | |
| 332 | |
| 333 if elem.text: | |
| 334 text = elem.text.strip() | |
| 335 if text and sub_elems or elem.attrib: | |
| 336 rval[elem.tag]['#text'] = text | |
| 337 else: | |
| 338 rval[elem.tag] = text | |
| 339 | |
| 340 return rval | |
| 341 | |
| 342 | |
| 343 def pretty_print_xml(elem, level=0): | |
| 344 pad = ' ' | |
| 345 i = "\n" + level * pad | |
| 346 if len(elem): | |
| 347 if not elem.text or not elem.text.strip(): | |
| 348 elem.text = i + pad + pad | |
| 349 if not elem.tail or not elem.tail.strip(): | |
| 350 elem.tail = i | |
| 351 for e in elem: | |
| 352 pretty_print_xml(e, level + 1) | |
| 353 if not elem.tail or not elem.tail.strip(): | |
| 354 elem.tail = i | |
| 355 else: | |
| 356 if level and (not elem.tail or not elem.tail.strip()): | |
| 357 elem.tail = i + pad | |
| 358 return elem | |
| 359 | |
| 360 | |
| 361 def get_file_size(value, default=None): | |
| 362 try: | |
| 363 # try built-in | |
| 364 return os.path.getsize(value) | |
| 365 except Exception: | |
| 366 try: | |
| 367 # try built-in one name attribute | |
| 368 return os.path.getsize(value.name) | |
| 369 except Exception: | |
| 370 try: | |
| 371 # try tell() of end of object | |
| 372 offset = value.tell() | |
| 373 value.seek(0, 2) | |
| 374 rval = value.tell() | |
| 375 value.seek(offset) | |
| 376 return rval | |
| 377 except Exception: | |
| 378 # return default value | |
| 379 return default | |
| 380 | |
| 381 | |
| 382 def shrink_stream_by_size(value, size, join_by=b"..", left_larger=True, beginning_on_size_error=False, end_on_size_error=False): | |
| 383 """ | |
| 384 Shrinks bytes read from `value` to `size`. | |
| 385 | |
| 386 `value` needs to implement tell/seek, so files need to be opened in binary mode. | |
| 387 Returns unicode text with invalid characters replaced. | |
| 388 """ | |
| 389 rval = b'' | |
| 390 join_by = smart_str(join_by) | |
| 391 if get_file_size(value) > size: | |
| 392 start = value.tell() | |
| 393 len_join_by = len(join_by) | |
| 394 min_size = len_join_by + 2 | |
| 395 if size < min_size: | |
| 396 if beginning_on_size_error: | |
| 397 rval = value.read(size) | |
| 398 value.seek(start) | |
| 399 return rval | |
| 400 elif end_on_size_error: | |
| 401 value.seek(-size, 2) | |
| 402 rval = value.read(size) | |
| 403 value.seek(start) | |
| 404 return rval | |
| 405 raise ValueError('With the provided join_by value (%s), the minimum size value is %i.' % (join_by, min_size)) | |
| 406 left_index = right_index = int((size - len_join_by) / 2) | |
| 407 if left_index + right_index + len_join_by < size: | |
| 408 if left_larger: | |
| 409 left_index += 1 | |
| 410 else: | |
| 411 right_index += 1 | |
| 412 rval = value.read(left_index) + join_by | |
| 413 value.seek(-right_index, 2) | |
| 414 rval += value.read(right_index) | |
| 415 else: | |
| 416 while True: | |
| 417 data = value.read(CHUNK_SIZE) | |
| 418 if not data: | |
| 419 break | |
| 420 rval += data | |
| 421 return unicodify(rval) | |
| 422 | |
| 423 | |
| 424 def shrink_and_unicodify(stream): | |
| 425 stream = unicodify(stream, strip_null=True) or u'' | |
| 426 if (len(stream) > DATABASE_MAX_STRING_SIZE): | |
| 427 stream = shrink_string_by_size(stream, | |
| 428 DATABASE_MAX_STRING_SIZE, | |
| 429 join_by="\n..\n", | |
| 430 left_larger=True, | |
| 431 beginning_on_size_error=True) | |
| 432 return stream | |
| 433 | |
| 434 | |
| 435 def shrink_string_by_size(value, size, join_by="..", left_larger=True, beginning_on_size_error=False, end_on_size_error=False): | |
| 436 if len(value) > size: | |
| 437 len_join_by = len(join_by) | |
| 438 min_size = len_join_by + 2 | |
| 439 if size < min_size: | |
| 440 if beginning_on_size_error: | |
| 441 return value[:size] | |
| 442 elif end_on_size_error: | |
| 443 return value[-size:] | |
| 444 raise ValueError('With the provided join_by value (%s), the minimum size value is %i.' % (join_by, min_size)) | |
| 445 left_index = right_index = int((size - len_join_by) / 2) | |
| 446 if left_index + right_index + len_join_by < size: | |
| 447 if left_larger: | |
| 448 left_index += 1 | |
| 449 else: | |
| 450 right_index += 1 | |
| 451 value = "%s%s%s" % (value[:left_index], join_by, value[-right_index:]) | |
| 452 return value | |
| 453 | |
| 454 | |
| 455 def pretty_print_time_interval(time=False, precise=False, utc=False): | |
| 456 """ | |
| 457 Get a datetime object or a int() Epoch timestamp and return a | |
| 458 pretty string like 'an hour ago', 'Yesterday', '3 months ago', | |
| 459 'just now', etc | |
| 460 credit: http://stackoverflow.com/questions/1551382/user-friendly-time-format-in-python | |
| 461 """ | |
| 462 if utc: | |
| 463 now = datetime.utcnow() | |
| 464 else: | |
| 465 now = datetime.now() | |
| 466 if type(time) is int: | |
| 467 diff = now - datetime.fromtimestamp(time) | |
| 468 elif isinstance(time, datetime): | |
| 469 diff = now - time | |
| 470 elif isinstance(time, string_types): | |
| 471 try: | |
| 472 time = datetime.strptime(time, "%Y-%m-%dT%H:%M:%S.%f") | |
| 473 except ValueError: | |
| 474 # MySQL may not support microseconds precision | |
| 475 time = datetime.strptime(time, "%Y-%m-%dT%H:%M:%S") | |
| 476 diff = now - time | |
| 477 else: | |
| 478 diff = now - now | |
| 479 second_diff = diff.seconds | |
| 480 day_diff = diff.days | |
| 481 | |
| 482 if day_diff < 0: | |
| 483 return '' | |
| 484 | |
| 485 if precise: | |
| 486 if day_diff == 0: | |
| 487 if second_diff < 10: | |
| 488 return "just now" | |
| 489 if second_diff < 60: | |
| 490 return str(second_diff) + " seconds ago" | |
| 491 if second_diff < 120: | |
| 492 return "a minute ago" | |
| 493 if second_diff < 3600: | |
| 494 return str(second_diff / 60) + " minutes ago" | |
| 495 if second_diff < 7200: | |
| 496 return "an hour ago" | |
| 497 if second_diff < 86400: | |
| 498 return str(second_diff / 3600) + " hours ago" | |
| 499 if day_diff == 1: | |
| 500 return "yesterday" | |
| 501 if day_diff < 7: | |
| 502 return str(day_diff) + " days ago" | |
| 503 if day_diff < 31: | |
| 504 return str(day_diff / 7) + " weeks ago" | |
| 505 if day_diff < 365: | |
| 506 return str(day_diff / 30) + " months ago" | |
| 507 return str(day_diff / 365) + " years ago" | |
| 508 else: | |
| 509 if day_diff == 0: | |
| 510 return "today" | |
| 511 if day_diff == 1: | |
| 512 return "yesterday" | |
| 513 if day_diff < 7: | |
| 514 return "less than a week" | |
| 515 if day_diff < 31: | |
| 516 return "less than a month" | |
| 517 if day_diff < 365: | |
| 518 return "less than a year" | |
| 519 return "a few years ago" | |
| 520 | |
| 521 | |
| 522 def pretty_print_json(json_data, is_json_string=False): | |
| 523 if is_json_string: | |
| 524 json_data = json.loads(json_data) | |
| 525 return json.dumps(json_data, sort_keys=True, indent=4) | |
| 526 | |
| 527 | |
| 528 # characters that are valid | |
| 529 valid_chars = set(string.ascii_letters + string.digits + " -=_.()/+*^,:?!") | |
| 530 | |
| 531 # characters that are allowed but need to be escaped | |
| 532 mapped_chars = {'>': '__gt__', | |
| 533 '<': '__lt__', | |
| 534 "'": '__sq__', | |
| 535 '"': '__dq__', | |
| 536 '[': '__ob__', | |
| 537 ']': '__cb__', | |
| 538 '{': '__oc__', | |
| 539 '}': '__cc__', | |
| 540 '@': '__at__', | |
| 541 '\n': '__cn__', | |
| 542 '\r': '__cr__', | |
| 543 '\t': '__tc__', | |
| 544 '#': '__pd__'} | |
| 545 | |
| 546 | |
| 547 def restore_text(text, character_map=mapped_chars): | |
| 548 """Restores sanitized text""" | |
| 549 if not text: | |
| 550 return text | |
| 551 for key, value in character_map.items(): | |
| 552 text = text.replace(value, key) | |
| 553 return text | |
| 554 | |
| 555 | |
| 556 def sanitize_text(text, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X'): | |
| 557 """ | |
| 558 Restricts the characters that are allowed in text; accepts both strings | |
| 559 and lists of strings; non-string entities will be cast to strings. | |
| 560 """ | |
| 561 if isinstance(text, list): | |
| 562 return [sanitize_text(x, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character) for x in text] | |
| 563 if not isinstance(text, string_types): | |
| 564 text = smart_str(text) | |
| 565 return _sanitize_text_helper(text, valid_characters=valid_characters, character_map=character_map) | |
| 566 | |
| 567 | |
| 568 def _sanitize_text_helper(text, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X'): | |
| 569 """Restricts the characters that are allowed in a string""" | |
| 570 | |
| 571 out = [] | |
| 572 for c in text: | |
| 573 if c in valid_characters: | |
| 574 out.append(c) | |
| 575 elif c in character_map: | |
| 576 out.append(character_map[c]) | |
| 577 else: | |
| 578 out.append(invalid_character) # makes debugging easier | |
| 579 return ''.join(out) | |
| 580 | |
| 581 | |
| 582 def sanitize_lists_to_string(values, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X'): | |
| 583 if isinstance(values, list): | |
| 584 rval = [] | |
| 585 for value in values: | |
| 586 rval.append(sanitize_lists_to_string(value, | |
| 587 valid_characters=valid_characters, | |
| 588 character_map=character_map, | |
| 589 invalid_character=invalid_character)) | |
| 590 values = ",".join(rval) | |
| 591 else: | |
| 592 values = sanitize_text(values, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character) | |
| 593 return values | |
| 594 | |
| 595 | |
| 596 def sanitize_param(value, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X'): | |
| 597 """Clean incoming parameters (strings or lists)""" | |
| 598 if isinstance(value, string_types): | |
| 599 return sanitize_text(value, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character) | |
| 600 elif isinstance(value, list): | |
| 601 return [sanitize_text(x, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character) for x in value] | |
| 602 else: | |
| 603 raise Exception('Unknown parameter type (%s)' % (type(value))) | |
| 604 | |
| 605 | |
| 606 valid_filename_chars = set(string.ascii_letters + string.digits + '_.') | |
| 607 invalid_filenames = ['', '.', '..'] | |
| 608 | |
| 609 | |
| 610 def sanitize_for_filename(text, default=None): | |
| 611 """ | |
| 612 Restricts the characters that are allowed in a filename portion; Returns default value or a unique id string if result is not a valid name. | |
| 613 Method is overly aggressive to minimize possible complications, but a maximum length is not considered. | |
| 614 """ | |
| 615 out = [] | |
| 616 for c in text: | |
| 617 if c in valid_filename_chars: | |
| 618 out.append(c) | |
| 619 else: | |
| 620 out.append('_') | |
| 621 out = ''.join(out) | |
| 622 if out in invalid_filenames: | |
| 623 if default is None: | |
| 624 return sanitize_for_filename(str(unique_id())) | |
| 625 return default | |
| 626 return out | |
| 627 | |
| 628 | |
| 629 def find_instance_nested(item, instances, match_key=None): | |
| 630 """ | |
| 631 Recursively find instances from lists, dicts, tuples. | |
| 632 | |
| 633 `instances` should be a tuple of valid instances | |
| 634 If match_key is given the key must match for an instance to be added to the list of found instances. | |
| 635 """ | |
| 636 | |
| 637 matches = [] | |
| 638 | |
| 639 def visit(path, key, value): | |
| 640 if isinstance(value, instances): | |
| 641 if match_key is None or match_key == key: | |
| 642 matches.append(value) | |
| 643 return key, value | |
| 644 | |
| 645 def enter(path, key, value): | |
| 646 if isinstance(value, instances): | |
| 647 return None, False | |
| 648 return default_enter(path, key, value) | |
| 649 | |
| 650 remap(item, visit, reraise_visit=False, enter=enter) | |
| 651 | |
| 652 return matches | |
| 653 | |
| 654 | |
| 655 def mask_password_from_url(url): | |
| 656 """ | |
| 657 Masks out passwords from connection urls like the database connection in galaxy.ini | |
| 658 | |
| 659 >>> mask_password_from_url( 'sqlite+postgresql://user:password@localhost/' ) | |
| 660 'sqlite+postgresql://user:********@localhost/' | |
| 661 >>> mask_password_from_url( 'amqp://user:amqp@localhost' ) | |
| 662 'amqp://user:********@localhost' | |
| 663 >>> mask_password_from_url( 'amqp://localhost') | |
| 664 'amqp://localhost' | |
| 665 """ | |
| 666 split = urlparse.urlsplit(url) | |
| 667 if split.password: | |
| 668 if url.count(split.password) == 1: | |
| 669 url = url.replace(split.password, "********") | |
| 670 else: | |
| 671 # This can manipulate the input other than just masking password, | |
| 672 # so the previous string replace method is preferred when the | |
| 673 # password doesn't appear twice in the url | |
| 674 split = split._replace(netloc=split.netloc.replace("%s:%s" % (split.username, split.password), '%s:********' % split.username)) | |
| 675 url = urlparse.urlunsplit(split) | |
| 676 return url | |
| 677 | |
| 678 | |
| 679 def ready_name_for_url(raw_name): | |
| 680 u""" General method to convert a string (i.e. object name) to a URL-ready | |
| 681 slug. | |
| 682 | |
| 683 >>> ready_name_for_url( "My Cool Object" ) | |
| 684 'My-Cool-Object' | |
| 685 >>> ready_name_for_url( "!My Cool Object!" ) | |
| 686 'My-Cool-Object' | |
| 687 >>> ready_name_for_url( "Hello₩◎ґʟⅾ" ) | |
| 688 'Hello' | |
| 689 """ | |
| 690 | |
| 691 # Replace whitespace with '-' | |
| 692 slug_base = re.sub(r"\s+", "-", raw_name) | |
| 693 # Remove all non-alphanumeric characters. | |
| 694 slug_base = re.sub(r"[^a-zA-Z0-9\-]", "", slug_base) | |
| 695 # Remove trailing '-'. | |
| 696 if slug_base.endswith('-'): | |
| 697 slug_base = slug_base[:-1] | |
| 698 return slug_base | |
| 699 | |
| 700 | |
| 701 def which(file): | |
| 702 # http://stackoverflow.com/questions/5226958/which-equivalent-function-in-python | |
| 703 for path in os.environ["PATH"].split(":"): | |
| 704 if os.path.exists(path + "/" + file): | |
| 705 return path + "/" + file | |
| 706 | |
| 707 return None | |
| 708 | |
| 709 | |
| 710 def in_directory(file, directory, local_path_module=os.path): | |
| 711 """ | |
| 712 Return true, if the common prefix of both is equal to directory | |
| 713 e.g. /a/b/c/d.rst and directory is /a/b, the common prefix is /a/b. | |
| 714 This function isn't used exclusively for security checks, but if it is | |
| 715 used for such checks it is assumed that ``directory`` is a "trusted" path - | |
| 716 supplied by Galaxy or by the admin and ``file`` is something generated by | |
| 717 a tool, configuration, external web server, or user supplied input. | |
| 718 | |
| 719 local_path_module is used by Pulsar to check Windows paths while running on | |
| 720 a POSIX-like system. | |
| 721 | |
| 722 >>> base_dir = tempfile.mkdtemp() | |
| 723 >>> safe_dir = os.path.join(base_dir, "user") | |
| 724 >>> os.mkdir(safe_dir) | |
| 725 >>> good_file = os.path.join(safe_dir, "1") | |
| 726 >>> with open(good_file, "w") as f: _ = f.write("hello") | |
| 727 >>> in_directory(good_file, safe_dir) | |
| 728 True | |
| 729 >>> in_directory("/other/file/is/here.txt", safe_dir) | |
| 730 False | |
| 731 >>> unsafe_link = os.path.join(safe_dir, "2") | |
| 732 >>> os.symlink("/other/file/bad.fasta", unsafe_link) | |
| 733 >>> in_directory(unsafe_link, safe_dir) | |
| 734 False | |
| 735 """ | |
| 736 if local_path_module != os.path: | |
| 737 _safe_contains = importlib.import_module('galaxy.util.path.%s' % local_path_module.__name__).safe_contains | |
| 738 else: | |
| 739 directory = os.path.realpath(directory) | |
| 740 _safe_contains = safe_contains | |
| 741 return _safe_contains(directory, file) | |
| 742 | |
| 743 | |
| 744 def merge_sorted_iterables(operator, *iterables): | |
| 745 """ | |
| 746 | |
| 747 >>> operator = lambda x: x | |
| 748 >>> list( merge_sorted_iterables( operator, [1,2,3], [4,5] ) ) | |
| 749 [1, 2, 3, 4, 5] | |
| 750 >>> list( merge_sorted_iterables( operator, [4, 5], [1,2,3] ) ) | |
| 751 [1, 2, 3, 4, 5] | |
| 752 >>> list( merge_sorted_iterables( operator, [1, 4, 5], [2], [3] ) ) | |
| 753 [1, 2, 3, 4, 5] | |
| 754 """ | |
| 755 first_iterable = iterables[0] | |
| 756 if len(iterables) == 1: | |
| 757 for el in first_iterable: | |
| 758 yield el | |
| 759 else: | |
| 760 for el in __merge_two_sorted_iterables( | |
| 761 operator, | |
| 762 iter(first_iterable), | |
| 763 merge_sorted_iterables(operator, *iterables[1:]) | |
| 764 ): | |
| 765 yield el | |
| 766 | |
| 767 | |
| 768 def __merge_two_sorted_iterables(operator, iterable1, iterable2): | |
| 769 unset = object() | |
| 770 continue_merge = True | |
| 771 next_1 = unset | |
| 772 next_2 = unset | |
| 773 while continue_merge: | |
| 774 try: | |
| 775 if next_1 is unset: | |
| 776 next_1 = next(iterable1) | |
| 777 if next_2 is unset: | |
| 778 next_2 = next(iterable2) | |
| 779 if operator(next_2) < operator(next_1): | |
| 780 yield next_2 | |
| 781 next_2 = unset | |
| 782 else: | |
| 783 yield next_1 | |
| 784 next_1 = unset | |
| 785 except StopIteration: | |
| 786 continue_merge = False | |
| 787 if next_1 is not unset: | |
| 788 yield next_1 | |
| 789 if next_2 is not unset: | |
| 790 yield next_2 | |
| 791 for el in iterable1: | |
| 792 yield el | |
| 793 for el in iterable2: | |
| 794 yield el | |
| 795 | |
| 796 | |
| 797 class Params(object): | |
| 798 """ | |
| 799 Stores and 'sanitizes' parameters. Alphanumeric characters and the | |
| 800 non-alphanumeric ones that are deemed safe are let to pass through (see L{valid_chars}). | |
| 801 Some non-safe characters are escaped to safe forms for example C{>} becomes C{__lt__} | |
| 802 (see L{mapped_chars}). All other characters are replaced with C{X}. | |
| 803 | |
| 804 Operates on string or list values only (HTTP parameters). | |
| 805 | |
| 806 >>> values = { 'status':'on', 'symbols':[ 'alpha', '<>', '$rm&#!' ] } | |
| 807 >>> par = Params(values) | |
| 808 >>> par.status | |
| 809 'on' | |
| 810 >>> par.value == None # missing attributes return None | |
| 811 True | |
| 812 >>> par.get('price', 0) | |
| 813 0 | |
| 814 >>> par.symbols # replaces unknown symbols with X | |
| 815 ['alpha', '__lt____gt__', 'XrmX__pd__!'] | |
| 816 >>> sorted(par.flatten()) # flattening to a list | |
| 817 [('status', 'on'), ('symbols', 'XrmX__pd__!'), ('symbols', '__lt____gt__'), ('symbols', 'alpha')] | |
| 818 """ | |
| 819 | |
| 820 # is NEVER_SANITIZE required now that sanitizing for tool parameters can be controlled on a per parameter basis and occurs via InputValueWrappers? | |
| 821 NEVER_SANITIZE = ['file_data', 'url_paste', 'URL', 'filesystem_paths'] | |
| 822 | |
| 823 def __init__(self, params, sanitize=True): | |
| 824 if sanitize: | |
| 825 for key, value in params.items(): | |
| 826 # sanitize check both ungrouped and grouped parameters by | |
| 827 # name. Anything relying on NEVER_SANITIZE should be | |
| 828 # changed to not require this and NEVER_SANITIZE should be | |
| 829 # removed. | |
| 830 if (value is not None and | |
| 831 key not in self.NEVER_SANITIZE and | |
| 832 True not in [key.endswith("|%s" % nonsanitize_parameter) for | |
| 833 nonsanitize_parameter in self.NEVER_SANITIZE]): | |
| 834 self.__dict__[key] = sanitize_param(value) | |
| 835 else: | |
| 836 self.__dict__[key] = value | |
| 837 else: | |
| 838 self.__dict__.update(params) | |
| 839 | |
| 840 def flatten(self): | |
| 841 """ | |
| 842 Creates a tuple list from a dict with a tuple/value pair for every value that is a list | |
| 843 """ | |
| 844 flat = [] | |
| 845 for key, value in self.__dict__.items(): | |
| 846 if isinstance(value, list): | |
| 847 for v in value: | |
| 848 flat.append((key, v)) | |
| 849 else: | |
| 850 flat.append((key, value)) | |
| 851 return flat | |
| 852 | |
| 853 def __getattr__(self, name): | |
| 854 """This is here to ensure that we get None for non existing parameters""" | |
| 855 return None | |
| 856 | |
| 857 def get(self, key, default): | |
| 858 return self.__dict__.get(key, default) | |
| 859 | |
| 860 def __str__(self): | |
| 861 return '%s' % self.__dict__ | |
| 862 | |
| 863 def __len__(self): | |
| 864 return len(self.__dict__) | |
| 865 | |
| 866 def __iter__(self): | |
| 867 return iter(self.__dict__) | |
| 868 | |
| 869 def update(self, values): | |
| 870 self.__dict__.update(values) | |
| 871 | |
| 872 | |
| 873 def rst_to_html(s, error=False): | |
| 874 """Convert a blob of reStructuredText to HTML""" | |
| 875 log = get_logger("docutils") | |
| 876 | |
| 877 if docutils_core is None: | |
| 878 raise Exception("Attempted to use rst_to_html but docutils unavailable.") | |
| 879 | |
| 880 class FakeStream(object): | |
| 881 def write(self, str): | |
| 882 if len(str) > 0 and not str.isspace(): | |
| 883 if error: | |
| 884 raise Exception(str) | |
| 885 log.warning(str) | |
| 886 | |
| 887 settings_overrides = { | |
| 888 "embed_stylesheet": False, | |
| 889 "template": os.path.join(os.path.dirname(__file__), "docutils_template.txt"), | |
| 890 "warning_stream": FakeStream(), | |
| 891 "doctitle_xform": False, # without option, very different rendering depending on | |
| 892 # number of sections in help content. | |
| 893 } | |
| 894 | |
| 895 return unicodify(docutils_core.publish_string( | |
| 896 s, writer=docutils_html4css1.Writer(), | |
| 897 settings_overrides=settings_overrides)) | |
| 898 | |
| 899 | |
| 900 def xml_text(root, name=None): | |
| 901 """Returns the text inside an element""" | |
| 902 if name is not None: | |
| 903 # Try attribute first | |
| 904 val = root.get(name) | |
| 905 if val: | |
| 906 return val | |
| 907 # Then try as element | |
| 908 elem = root.find(name) | |
| 909 else: | |
| 910 elem = root | |
| 911 if elem is not None and elem.text: | |
| 912 text = ''.join(elem.text.splitlines()) | |
| 913 return text.strip() | |
| 914 # No luck, return empty string | |
| 915 return '' | |
| 916 | |
| 917 | |
| 918 def parse_resource_parameters(resource_param_file): | |
| 919 """Code shared between jobs and workflows for reading resource parameter configuration files. | |
| 920 | |
| 921 TODO: Allow YAML in addition to XML. | |
| 922 """ | |
| 923 resource_parameters = {} | |
| 924 if os.path.exists(resource_param_file): | |
| 925 resource_definitions = parse_xml(resource_param_file) | |
| 926 resource_definitions_root = resource_definitions.getroot() | |
| 927 for parameter_elem in resource_definitions_root.findall("param"): | |
| 928 name = parameter_elem.get("name") | |
| 929 resource_parameters[name] = parameter_elem | |
| 930 | |
| 931 return resource_parameters | |
| 932 | |
| 933 | |
| 934 # asbool implementation pulled from PasteDeploy | |
| 935 truthy = frozenset({'true', 'yes', 'on', 'y', 't', '1'}) | |
| 936 falsy = frozenset({'false', 'no', 'off', 'n', 'f', '0'}) | |
| 937 | |
| 938 | |
| 939 def asbool(obj): | |
| 940 if isinstance(obj, string_types): | |
| 941 obj = obj.strip().lower() | |
| 942 if obj in truthy: | |
| 943 return True | |
| 944 elif obj in falsy: | |
| 945 return False | |
| 946 else: | |
| 947 raise ValueError("String is not true/false: %r" % obj) | |
| 948 return bool(obj) | |
| 949 | |
| 950 | |
| 951 def string_as_bool(string): | |
| 952 if str(string).lower() in ('true', 'yes', 'on', '1'): | |
| 953 return True | |
| 954 else: | |
| 955 return False | |
| 956 | |
| 957 | |
| 958 def string_as_bool_or_none(string): | |
| 959 """ | |
| 960 Returns True, None or False based on the argument: | |
| 961 True if passed True, 'True', 'Yes', or 'On' | |
| 962 None if passed None or 'None' | |
| 963 False otherwise | |
| 964 | |
| 965 Note: string comparison is case-insensitive so lowecase versions of those | |
| 966 function equivalently. | |
| 967 """ | |
| 968 string = str(string).lower() | |
| 969 if string in ('true', 'yes', 'on'): | |
| 970 return True | |
| 971 elif string == 'none': | |
| 972 return None | |
| 973 else: | |
| 974 return False | |
| 975 | |
| 976 | |
| 977 def listify(item, do_strip=False): | |
| 978 """ | |
| 979 Make a single item a single item list. | |
| 980 | |
| 981 If *item* is a string, it is split on comma (``,``) characters to produce the list. Optionally, if *do_strip* is | |
| 982 true, any extra whitespace around the split items is stripped. | |
| 983 | |
| 984 If *item* is a list it is returned unchanged. If *item* is a tuple, it is converted to a list and returned. If | |
| 985 *item* evaluates to False, an empty list is returned. | |
| 986 | |
| 987 :type item: object | |
| 988 :param item: object to make a list from | |
| 989 :type do_strip: bool | |
| 990 :param do_strip: strip whitespaces from around split items, if set to ``True`` | |
| 991 :rtype: list | |
| 992 :returns: The input as a list | |
| 993 """ | |
| 994 if not item: | |
| 995 return [] | |
| 996 elif isinstance(item, list): | |
| 997 return item | |
| 998 elif isinstance(item, tuple): | |
| 999 return list(item) | |
| 1000 elif isinstance(item, string_types) and item.count(','): | |
| 1001 if do_strip: | |
| 1002 return [token.strip() for token in item.split(',')] | |
| 1003 else: | |
| 1004 return item.split(',') | |
| 1005 else: | |
| 1006 return [item] | |
| 1007 | |
| 1008 | |
| 1009 def commaify(amount): | |
| 1010 orig = amount | |
| 1011 new = re.sub(r"^(-?\d+)(\d{3})", r'\g<1>,\g<2>', amount) | |
| 1012 if orig == new: | |
| 1013 return new | |
| 1014 else: | |
| 1015 return commaify(new) | |
| 1016 | |
| 1017 | |
| 1018 def roundify(amount, sfs=2): | |
| 1019 """ | |
| 1020 Take a number in string form and truncate to 'sfs' significant figures. | |
| 1021 """ | |
| 1022 if len(amount) <= sfs: | |
| 1023 return amount | |
| 1024 else: | |
| 1025 return amount[0:sfs] + '0' * (len(amount) - sfs) | |
| 1026 | |
| 1027 | |
| 1028 def unicodify(value, encoding=DEFAULT_ENCODING, error='replace', strip_null=False): | |
| 1029 u""" | |
| 1030 Returns a Unicode string or None. | |
| 1031 | |
| 1032 >>> assert unicodify(None) is None | |
| 1033 >>> assert unicodify('simple string') == u'simple string' | |
| 1034 >>> assert unicodify(3) == u'3' | |
| 1035 >>> assert unicodify(bytearray([115, 116, 114, 196, 169, 195, 177, 103])) == u'strĩñg' | |
| 1036 >>> assert unicodify(Exception(u'strĩñg')) == u'strĩñg' | |
| 1037 >>> assert unicodify('cómplǐcḁtëd strĩñg') == u'cómplǐcḁtëd strĩñg' | |
| 1038 >>> s = u'cómplǐcḁtëd strĩñg'; assert unicodify(s) == s | |
| 1039 >>> s = u'lâtín strìñg'; assert unicodify(s.encode('latin-1'), 'latin-1') == s | |
| 1040 >>> s = u'lâtín strìñg'; assert unicodify(s.encode('latin-1')) == u'l\ufffdt\ufffdn str\ufffd\ufffdg' | |
| 1041 >>> s = u'lâtín strìñg'; assert unicodify(s.encode('latin-1'), error='ignore') == u'ltn strg' | |
| 1042 >>> if PY2: assert unicodify(Exception(u'¼ cup of flour'.encode('latin-1')), error='ignore') == ' cup of flour' | |
| 1043 """ | |
| 1044 if value is None: | |
| 1045 return value | |
| 1046 try: | |
| 1047 if isinstance(value, bytearray): | |
| 1048 value = bytes(value) | |
| 1049 elif not isinstance(value, string_types) and not isinstance(value, binary_type): | |
| 1050 # In Python 2, value is not an instance of basestring (i.e. str or unicode) | |
| 1051 # In Python 3, value is not an instance of bytes or str | |
| 1052 try: | |
| 1053 value = text_type(value) | |
| 1054 except Exception: | |
| 1055 value = str(value) | |
| 1056 # Now in Python 2, value is an instance of basestring, but may be not unicode | |
| 1057 # Now in Python 3, value is an instance of bytes or str | |
| 1058 if not isinstance(value, text_type): | |
| 1059 value = text_type(value, encoding, error) | |
| 1060 except Exception as e: | |
| 1061 msg = "Value '%s' could not be coerced to Unicode: %s('%s')" % (value, type(e).__name__, e) | |
| 1062 raise Exception(msg) | |
| 1063 if strip_null: | |
| 1064 return value.replace('\0', '') | |
| 1065 return value | |
| 1066 | |
| 1067 | |
| 1068 def smart_str(s, encoding=DEFAULT_ENCODING, strings_only=False, errors='strict'): | |
| 1069 u""" | |
| 1070 Returns a bytestring version of 's', encoded as specified in 'encoding'. | |
| 1071 | |
| 1072 If strings_only is True, don't convert (some) non-string-like objects. | |
| 1073 | |
| 1074 Adapted from an older, simpler version of django.utils.encoding.smart_str. | |
| 1075 | |
| 1076 >>> assert smart_str(None) == b'None' | |
| 1077 >>> assert smart_str(None, strings_only=True) is None | |
| 1078 >>> assert smart_str(3) == b'3' | |
| 1079 >>> assert smart_str(3, strings_only=True) == 3 | |
| 1080 >>> s = b'a bytes string'; assert smart_str(s) == s | |
| 1081 >>> s = bytearray(b'a bytes string'); assert smart_str(s) == s | |
| 1082 >>> assert smart_str(u'a simple unicode string') == b'a simple unicode string' | |
| 1083 >>> assert smart_str(u'à strange ünicode ڃtring') == b'\\xc3\\xa0 strange \\xc3\\xbcnicode \\xda\\x83tring' | |
| 1084 >>> assert smart_str(b'\\xc3\\xa0n \\xc3\\xabncoded utf-8 string', encoding='latin-1') == b'\\xe0n \\xebncoded utf-8 string' | |
| 1085 >>> assert smart_str(bytearray(b'\\xc3\\xa0n \\xc3\\xabncoded utf-8 string'), encoding='latin-1') == b'\\xe0n \\xebncoded utf-8 string' | |
| 1086 """ | |
| 1087 if strings_only and isinstance(s, (type(None), int)): | |
| 1088 return s | |
| 1089 if not isinstance(s, string_types) and not isinstance(s, (binary_type, bytearray)): | |
| 1090 # In Python 2, s is not an instance of basestring or bytearray | |
| 1091 # In Python 3, s is not an instance of str, bytes or bytearray | |
| 1092 s = str(s) | |
| 1093 # Now in Python 2, value is an instance of basestring or bytearray | |
| 1094 # Now in Python 3, value is an instance of str, bytes or bytearray | |
| 1095 if not isinstance(s, (binary_type, bytearray)): | |
| 1096 return s.encode(encoding, errors) | |
| 1097 elif s and encoding != DEFAULT_ENCODING: | |
| 1098 return s.decode(DEFAULT_ENCODING, errors).encode(encoding, errors) | |
| 1099 else: | |
| 1100 return s | |
| 1101 | |
| 1102 | |
| 1103 def strip_control_characters(s): | |
| 1104 """Strip unicode control characters from a string.""" | |
| 1105 return "".join(c for c in unicodify(s) if unicodedata.category(c) != "Cc") | |
| 1106 | |
| 1107 | |
| 1108 def strip_control_characters_nested(item): | |
| 1109 """Recursively strips control characters from lists, dicts, tuples.""" | |
| 1110 | |
| 1111 def visit(path, key, value): | |
| 1112 if isinstance(key, string_types): | |
| 1113 key = strip_control_characters(key) | |
| 1114 if isinstance(value, string_types): | |
| 1115 value = strip_control_characters(value) | |
| 1116 return key, value | |
| 1117 | |
| 1118 return remap(item, visit) | |
| 1119 | |
| 1120 | |
| 1121 def object_to_string(obj): | |
| 1122 return binascii.hexlify(obj) | |
| 1123 | |
| 1124 | |
| 1125 def string_to_object(s): | |
| 1126 return binascii.unhexlify(s) | |
| 1127 | |
| 1128 | |
| 1129 class ParamsWithSpecs(collections.defaultdict): | |
| 1130 """ | |
| 1131 """ | |
| 1132 | |
| 1133 def __init__(self, specs=None, params=None): | |
| 1134 self.specs = specs or dict() | |
| 1135 self.params = params or dict() | |
| 1136 for name, value in self.params.items(): | |
| 1137 if name not in self.specs: | |
| 1138 self._param_unknown_error(name) | |
| 1139 if 'map' in self.specs[name]: | |
| 1140 try: | |
| 1141 self.params[name] = self.specs[name]['map'](value) | |
| 1142 except Exception: | |
| 1143 self._param_map_error(name, value) | |
| 1144 if 'valid' in self.specs[name]: | |
| 1145 if not self.specs[name]['valid'](value): | |
| 1146 self._param_vaildation_error(name, value) | |
| 1147 | |
| 1148 self.update(self.params) | |
| 1149 | |
| 1150 def __missing__(self, name): | |
| 1151 return self.specs[name]['default'] | |
| 1152 | |
| 1153 def __getattr__(self, name): | |
| 1154 return self[name] | |
| 1155 | |
| 1156 def _param_unknown_error(self, name): | |
| 1157 raise NotImplementedError() | |
| 1158 | |
| 1159 def _param_map_error(self, name, value): | |
| 1160 raise NotImplementedError() | |
| 1161 | |
| 1162 def _param_vaildation_error(self, name, value): | |
| 1163 raise NotImplementedError() | |
| 1164 | |
| 1165 | |
| 1166 def compare_urls(url1, url2, compare_scheme=True, compare_hostname=True, compare_path=True): | |
| 1167 url1 = urlparse.urlparse(url1) | |
| 1168 url2 = urlparse.urlparse(url2) | |
| 1169 if compare_scheme and url1.scheme and url2.scheme and url1.scheme != url2.scheme: | |
| 1170 return False | |
| 1171 if compare_hostname and url1.hostname and url2.hostname and url1.hostname != url2.hostname: | |
| 1172 return False | |
| 1173 if compare_path and url1.path and url2.path and url1.path != url2.path: | |
| 1174 return False | |
| 1175 return True | |
| 1176 | |
| 1177 | |
| 1178 def read_dbnames(filename): | |
| 1179 """ Read build names from file """ | |
| 1180 class DBNames(list): | |
| 1181 default_value = "?" | |
| 1182 default_name = "unspecified (?)" | |
| 1183 db_names = DBNames() | |
| 1184 try: | |
| 1185 ucsc_builds = {} | |
| 1186 man_builds = [] # assume these are integers | |
| 1187 name_to_db_base = {} | |
| 1188 if filename is None: | |
| 1189 # Should only be happening with the galaxy.tools.parameters.basic:GenomeBuildParameter docstring unit test | |
| 1190 filename = os.path.join('tool-data', 'shared', 'ucsc', 'builds.txt.sample') | |
| 1191 for line in open(filename): | |
| 1192 try: | |
| 1193 if line[0:1] == "#": | |
| 1194 continue | |
| 1195 fields = line.replace("\r", "").replace("\n", "").split("\t") | |
| 1196 # Special case of unspecified build is at top of list | |
| 1197 if fields[0] == "?": | |
| 1198 db_names.insert(0, (fields[0], fields[1])) | |
| 1199 continue | |
| 1200 try: # manual build (i.e. microbes) | |
| 1201 int(fields[0]) | |
| 1202 man_builds.append((fields[1], fields[0])) | |
| 1203 except Exception: # UCSC build | |
| 1204 db_base = fields[0].rstrip('0123456789') | |
| 1205 if db_base not in ucsc_builds: | |
| 1206 ucsc_builds[db_base] = [] | |
| 1207 name_to_db_base[fields[1]] = db_base | |
| 1208 # we want to sort within a species numerically by revision number | |
| 1209 build_rev = re.compile(r'\d+$') | |
| 1210 try: | |
| 1211 build_rev = int(build_rev.findall(fields[0])[0]) | |
| 1212 except Exception: | |
| 1213 build_rev = 0 | |
| 1214 ucsc_builds[db_base].append((build_rev, fields[0], fields[1])) | |
| 1215 except Exception: | |
| 1216 continue | |
| 1217 sort_names = sorted(name_to_db_base.keys()) | |
| 1218 for name in sort_names: | |
| 1219 db_base = name_to_db_base[name] | |
| 1220 ucsc_builds[db_base].sort() | |
| 1221 ucsc_builds[db_base].reverse() | |
| 1222 ucsc_builds[db_base] = [(build, name) for _, build, name in ucsc_builds[db_base]] | |
| 1223 db_names = DBNames(db_names + ucsc_builds[db_base]) | |
| 1224 if len(db_names) > 1 and len(man_builds) > 0: | |
| 1225 db_names.append((db_names.default_value, '----- Additional Species Are Below -----')) | |
| 1226 man_builds.sort() | |
| 1227 man_builds = [(build, name) for name, build in man_builds] | |
| 1228 db_names = DBNames(db_names + man_builds) | |
| 1229 except Exception as e: | |
| 1230 log.error("ERROR: Unable to read builds file: %s", unicodify(e)) | |
| 1231 if len(db_names) < 1: | |
| 1232 db_names = DBNames([(db_names.default_value, db_names.default_name)]) | |
| 1233 return db_names | |
| 1234 | |
| 1235 | |
| 1236 def read_build_sites(filename, check_builds=True): | |
| 1237 """ read db names to ucsc mappings from file, this file should probably be merged with the one above """ | |
| 1238 build_sites = [] | |
| 1239 try: | |
| 1240 for line in open(filename): | |
| 1241 try: | |
| 1242 if line[0:1] == "#": | |
| 1243 continue | |
| 1244 fields = line.replace("\r", "").replace("\n", "").split("\t") | |
| 1245 site_name = fields[0] | |
| 1246 site = fields[1] | |
| 1247 if check_builds: | |
| 1248 site_builds = fields[2].split(",") | |
| 1249 site_dict = {'name': site_name, 'url': site, 'builds': site_builds} | |
| 1250 else: | |
| 1251 site_dict = {'name': site_name, 'url': site} | |
| 1252 build_sites.append(site_dict) | |
| 1253 except Exception: | |
| 1254 continue | |
| 1255 except Exception: | |
| 1256 log.error("ERROR: Unable to read builds for site file %s", filename) | |
| 1257 return build_sites | |
| 1258 | |
| 1259 | |
| 1260 def relativize_symlinks(path, start=None, followlinks=False): | |
| 1261 for root, dirs, files in os.walk(path, followlinks=followlinks): | |
| 1262 rel_start = None | |
| 1263 for file_name in files: | |
| 1264 symlink_file_name = os.path.join(root, file_name) | |
| 1265 if os.path.islink(symlink_file_name): | |
| 1266 symlink_target = os.readlink(symlink_file_name) | |
| 1267 if rel_start is None: | |
| 1268 if start is None: | |
| 1269 rel_start = root | |
| 1270 else: | |
| 1271 rel_start = start | |
| 1272 rel_path = relpath(symlink_target, rel_start) | |
| 1273 os.remove(symlink_file_name) | |
| 1274 os.symlink(rel_path, symlink_file_name) | |
| 1275 | |
| 1276 | |
| 1277 def stringify_dictionary_keys(in_dict): | |
| 1278 # returns a new dictionary | |
| 1279 # changes unicode keys into strings, only works on top level (does not recurse) | |
| 1280 # unicode keys are not valid for expansion into keyword arguments on method calls | |
| 1281 out_dict = {} | |
| 1282 for key, value in iteritems(in_dict): | |
| 1283 out_dict[str(key)] = value | |
| 1284 return out_dict | |
| 1285 | |
| 1286 | |
| 1287 def mkstemp_ln(src, prefix='mkstemp_ln_'): | |
| 1288 """ | |
| 1289 From tempfile._mkstemp_inner, generate a hard link in the same dir with a | |
| 1290 random name. Created so we can persist the underlying file of a | |
| 1291 NamedTemporaryFile upon its closure. | |
| 1292 """ | |
| 1293 dir = os.path.dirname(src) | |
| 1294 names = tempfile._get_candidate_names() | |
| 1295 for seq in xrange(tempfile.TMP_MAX): | |
| 1296 name = next(names) | |
| 1297 file = os.path.join(dir, prefix + name) | |
| 1298 try: | |
| 1299 os.link(src, file) | |
| 1300 return (os.path.abspath(file)) | |
| 1301 except OSError as e: | |
| 1302 if e.errno == errno.EEXIST: | |
| 1303 continue # try again | |
| 1304 raise | |
| 1305 raise IOError(errno.EEXIST, "No usable temporary file name found") | |
| 1306 | |
| 1307 | |
| 1308 def umask_fix_perms(path, umask, unmasked_perms, gid=None): | |
| 1309 """ | |
| 1310 umask-friendly permissions fixing | |
| 1311 """ | |
| 1312 perms = unmasked_perms & ~umask | |
| 1313 try: | |
| 1314 st = os.stat(path) | |
| 1315 except OSError: | |
| 1316 log.exception('Unable to set permissions or group on %s', path) | |
| 1317 return | |
| 1318 # fix modes | |
| 1319 if stat.S_IMODE(st.st_mode) != perms: | |
| 1320 try: | |
| 1321 os.chmod(path, perms) | |
| 1322 except Exception as e: | |
| 1323 log.warning('Unable to honor umask (%s) for %s, tried to set: %s but mode remains %s, error was: %s' % (oct(umask), | |
| 1324 path, | |
| 1325 oct(perms), | |
| 1326 oct(stat.S_IMODE(st.st_mode)), | |
| 1327 unicodify(e))) | |
| 1328 # fix group | |
| 1329 if gid is not None and st.st_gid != gid: | |
| 1330 try: | |
| 1331 os.chown(path, -1, gid) | |
| 1332 except Exception as e: | |
| 1333 try: | |
| 1334 desired_group = grp.getgrgid(gid) | |
| 1335 current_group = grp.getgrgid(st.st_gid) | |
| 1336 except Exception: | |
| 1337 desired_group = gid | |
| 1338 current_group = st.st_gid | |
| 1339 log.warning('Unable to honor primary group (%s) for %s, group remains %s, error was: %s' % (desired_group, | |
| 1340 path, | |
| 1341 current_group, | |
| 1342 unicodify(e))) | |
| 1343 | |
| 1344 | |
| 1345 def docstring_trim(docstring): | |
| 1346 """Trimming python doc strings. Taken from: http://www.python.org/dev/peps/pep-0257/""" | |
| 1347 if not docstring: | |
| 1348 return '' | |
| 1349 # Convert tabs to spaces (following the normal Python rules) | |
| 1350 # and split into a list of lines: | |
| 1351 lines = docstring.expandtabs().splitlines() | |
| 1352 # Determine minimum indentation (first line doesn't count): | |
| 1353 indent = sys.maxsize | |
| 1354 for line in lines[1:]: | |
| 1355 stripped = line.lstrip() | |
| 1356 if stripped: | |
| 1357 indent = min(indent, len(line) - len(stripped)) | |
| 1358 # Remove indentation (first line is special): | |
| 1359 trimmed = [lines[0].strip()] | |
| 1360 if indent < sys.maxsize: | |
| 1361 for line in lines[1:]: | |
| 1362 trimmed.append(line[indent:].rstrip()) | |
| 1363 # Strip off trailing and leading blank lines: | |
| 1364 while trimmed and not trimmed[-1]: | |
| 1365 trimmed.pop() | |
| 1366 while trimmed and not trimmed[0]: | |
| 1367 trimmed.pop(0) | |
| 1368 # Return a single string: | |
| 1369 return '\n'.join(trimmed) | |
| 1370 | |
| 1371 | |
| 1372 def nice_size(size): | |
| 1373 """ | |
| 1374 Returns a readably formatted string with the size | |
| 1375 | |
| 1376 >>> nice_size(100) | |
| 1377 '100 bytes' | |
| 1378 >>> nice_size(10000) | |
| 1379 '9.8 KB' | |
| 1380 >>> nice_size(1000000) | |
| 1381 '976.6 KB' | |
| 1382 >>> nice_size(100000000) | |
| 1383 '95.4 MB' | |
| 1384 """ | |
| 1385 words = ['bytes', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB'] | |
| 1386 prefix = '' | |
| 1387 try: | |
| 1388 size = float(size) | |
| 1389 if size < 0: | |
| 1390 size = abs(size) | |
| 1391 prefix = '-' | |
| 1392 except Exception: | |
| 1393 return '??? bytes' | |
| 1394 for ind, word in enumerate(words): | |
| 1395 step = 1024 ** (ind + 1) | |
| 1396 if step > size: | |
| 1397 size = size / float(1024 ** ind) | |
| 1398 if word == 'bytes': # No decimals for bytes | |
| 1399 return "%s%d bytes" % (prefix, size) | |
| 1400 return "%s%.1f %s" % (prefix, size, word) | |
| 1401 return '??? bytes' | |
| 1402 | |
| 1403 | |
| 1404 def size_to_bytes(size): | |
| 1405 """ | |
| 1406 Returns a number of bytes (as integer) if given a reasonably formatted string with the size | |
| 1407 | |
| 1408 >>> size_to_bytes('1024') | |
| 1409 1024 | |
| 1410 >>> size_to_bytes('1.0') | |
| 1411 1 | |
| 1412 >>> size_to_bytes('10 bytes') | |
| 1413 10 | |
| 1414 >>> size_to_bytes('4k') | |
| 1415 4096 | |
| 1416 >>> size_to_bytes('2.2 TB') | |
| 1417 2418925581107 | |
| 1418 >>> size_to_bytes('.01 TB') | |
| 1419 10995116277 | |
| 1420 >>> size_to_bytes('1.b') | |
| 1421 1 | |
| 1422 >>> size_to_bytes('1.2E2k') | |
| 1423 122880 | |
| 1424 """ | |
| 1425 # The following number regexp is based on https://stackoverflow.com/questions/385558/extract-float-double-value/385597#385597 | |
| 1426 size_re = re.compile(r'(?P<number>(\d+(\.\d*)?|\.\d+)(e[+-]?\d+)?)\s*(?P<multiple>[eptgmk]?(b|bytes?)?)?$') | |
| 1427 size_match = size_re.match(size.lower()) | |
| 1428 if size_match is None: | |
| 1429 raise ValueError("Could not parse string '%s'" % size) | |
| 1430 number = float(size_match.group("number")) | |
| 1431 multiple = size_match.group("multiple") | |
| 1432 if multiple == "" or multiple.startswith('b'): | |
| 1433 return int(number) | |
| 1434 elif multiple.startswith('k'): | |
| 1435 return int(number * 1024) | |
| 1436 elif multiple.startswith('m'): | |
| 1437 return int(number * 1024 ** 2) | |
| 1438 elif multiple.startswith('g'): | |
| 1439 return int(number * 1024 ** 3) | |
| 1440 elif multiple.startswith('t'): | |
| 1441 return int(number * 1024 ** 4) | |
| 1442 elif multiple.startswith('p'): | |
| 1443 return int(number * 1024 ** 5) | |
| 1444 elif multiple.startswith('e'): | |
| 1445 return int(number * 1024 ** 6) | |
| 1446 else: | |
| 1447 raise ValueError("Unknown multiplier '%s' in '%s'" % (multiple, size)) | |
| 1448 | |
| 1449 | |
| 1450 def send_mail(frm, to, subject, body, config, html=None): | |
| 1451 """ | |
| 1452 Sends an email. | |
| 1453 | |
| 1454 :type frm: str | |
| 1455 :param frm: from address | |
| 1456 | |
| 1457 :type to: str | |
| 1458 :param to: to address | |
| 1459 | |
| 1460 :type subject: str | |
| 1461 :param subject: Subject line | |
| 1462 | |
| 1463 :type body: str | |
| 1464 :param body: Body text (should be plain text) | |
| 1465 | |
| 1466 :type config: object | |
| 1467 :param config: Galaxy configuration object | |
| 1468 | |
| 1469 :type html: str | |
| 1470 :param html: Alternative HTML representation of the body content. If | |
| 1471 provided will convert the message to a MIMEMultipart. (Default 'None') | |
| 1472 """ | |
| 1473 | |
| 1474 to = listify(to) | |
| 1475 if html: | |
| 1476 msg = MIMEMultipart('alternative') | |
| 1477 else: | |
| 1478 msg = MIMEText(body, 'plain', 'utf-8') | |
| 1479 | |
| 1480 msg['To'] = ', '.join(to) | |
| 1481 msg['From'] = frm | |
| 1482 msg['Subject'] = subject | |
| 1483 | |
| 1484 if config.smtp_server is None: | |
| 1485 log.error("Mail is not configured for this Galaxy instance.") | |
| 1486 log.info(msg) | |
| 1487 return | |
| 1488 | |
| 1489 if html: | |
| 1490 mp_text = MIMEText(body, 'plain', 'utf-8') | |
| 1491 mp_html = MIMEText(html, 'html', 'utf-8') | |
| 1492 msg.attach(mp_text) | |
| 1493 msg.attach(mp_html) | |
| 1494 | |
| 1495 smtp_ssl = asbool(getattr(config, 'smtp_ssl', False)) | |
| 1496 if smtp_ssl: | |
| 1497 s = smtplib.SMTP_SSL(config.smtp_server) | |
| 1498 else: | |
| 1499 s = smtplib.SMTP(config.smtp_server) | |
| 1500 if not smtp_ssl: | |
| 1501 try: | |
| 1502 s.starttls() | |
| 1503 log.debug('Initiated SSL/TLS connection to SMTP server: %s', config.smtp_server) | |
| 1504 except RuntimeError as e: | |
| 1505 log.warning('SSL/TLS support is not available to your Python interpreter: %s', unicodify(e)) | |
| 1506 except smtplib.SMTPHeloError as e: | |
| 1507 log.error("The server didn't reply properly to the HELO greeting: %s", unicodify(e)) | |
| 1508 s.close() | |
| 1509 raise | |
| 1510 except smtplib.SMTPException as e: | |
| 1511 log.warning('The server does not support the STARTTLS extension: %s', unicodify(e)) | |
| 1512 if config.smtp_username and config.smtp_password: | |
| 1513 try: | |
| 1514 s.login(config.smtp_username, config.smtp_password) | |
| 1515 except smtplib.SMTPHeloError as e: | |
| 1516 log.error("The server didn't reply properly to the HELO greeting: %s", unicodify(e)) | |
| 1517 s.close() | |
| 1518 raise | |
| 1519 except smtplib.SMTPAuthenticationError as e: | |
| 1520 log.error("The server didn't accept the username/password combination: %s", unicodify(e)) | |
| 1521 s.close() | |
| 1522 raise | |
| 1523 except smtplib.SMTPException as e: | |
| 1524 log.error("No suitable authentication method was found: %s", unicodify(e)) | |
| 1525 s.close() | |
| 1526 raise | |
| 1527 s.sendmail(frm, to, msg.as_string()) | |
| 1528 s.quit() | |
| 1529 | |
| 1530 | |
| 1531 def force_symlink(source, link_name): | |
| 1532 try: | |
| 1533 os.symlink(source, link_name) | |
| 1534 except OSError as e: | |
| 1535 if e.errno == errno.EEXIST: | |
| 1536 os.remove(link_name) | |
| 1537 os.symlink(source, link_name) | |
| 1538 else: | |
| 1539 raise e | |
| 1540 | |
| 1541 | |
| 1542 def move_merge(source, target): | |
| 1543 # when using shutil and moving a directory, if the target exists, | |
| 1544 # then the directory is placed inside of it | |
| 1545 # if the target doesn't exist, then the target is made into the directory | |
| 1546 # this makes it so that the target is always the target, and if it exists, | |
| 1547 # the source contents are moved into the target | |
| 1548 if os.path.isdir(source) and os.path.exists(target) and os.path.isdir(target): | |
| 1549 for name in os.listdir(source): | |
| 1550 move_merge(os.path.join(source, name), os.path.join(target, name)) | |
| 1551 else: | |
| 1552 return shutil.move(source, target) | |
| 1553 | |
| 1554 | |
| 1555 def safe_str_cmp(a, b): | |
| 1556 """safely compare two strings in a timing-attack-resistant manner | |
| 1557 """ | |
| 1558 if len(a) != len(b): | |
| 1559 return False | |
| 1560 rv = 0 | |
| 1561 for x, y in zip(a, b): | |
| 1562 rv |= ord(x) ^ ord(y) | |
| 1563 return rv == 0 | |
| 1564 | |
| 1565 | |
| 1566 galaxy_root_path = os.path.join(__path__[0], os.pardir, os.pardir, os.pardir) | |
| 1567 galaxy_samples_path = os.path.join(__path__[0], os.pardir, 'config', 'sample') | |
| 1568 | |
| 1569 | |
| 1570 def galaxy_directory(): | |
| 1571 root_path = os.path.abspath(galaxy_root_path) | |
| 1572 if os.path.basename(root_path) == "packages": | |
| 1573 root_path = os.path.abspath(os.path.join(root_path, "..")) | |
| 1574 return root_path | |
| 1575 | |
| 1576 | |
| 1577 def galaxy_samples_directory(): | |
| 1578 return os.path.abspath(galaxy_samples_path) | |
| 1579 | |
| 1580 | |
| 1581 def config_directories_from_setting(directories_setting, galaxy_root=galaxy_root_path): | |
| 1582 """ | |
| 1583 Parse the ``directories_setting`` into a list of relative or absolute | |
| 1584 filesystem paths that will be searched to discover plugins. | |
| 1585 | |
| 1586 :type galaxy_root: string | |
| 1587 :param galaxy_root: the root path of this galaxy installation | |
| 1588 :type directories_setting: string (default: None) | |
| 1589 :param directories_setting: the filesystem path (or paths) | |
| 1590 to search for plugins. Can be CSV string of paths. Will be treated as | |
| 1591 absolute if a path starts with '/', relative otherwise. | |
| 1592 :rtype: list of strings | |
| 1593 :returns: list of filesystem paths | |
| 1594 """ | |
| 1595 directories = [] | |
| 1596 if not directories_setting: | |
| 1597 return directories | |
| 1598 | |
| 1599 for directory in listify(directories_setting): | |
| 1600 directory = directory.strip() | |
| 1601 if not directory.startswith('/'): | |
| 1602 directory = os.path.join(galaxy_root, directory) | |
| 1603 if not os.path.exists(directory): | |
| 1604 log.warning('directory not found: %s', directory) | |
| 1605 continue | |
| 1606 directories.append(directory) | |
| 1607 return directories | |
| 1608 | |
| 1609 | |
| 1610 def parse_int(value, min_val=None, max_val=None, default=None, allow_none=False): | |
| 1611 try: | |
| 1612 value = int(value) | |
| 1613 if min_val is not None and value < min_val: | |
| 1614 return min_val | |
| 1615 if max_val is not None and value > max_val: | |
| 1616 return max_val | |
| 1617 return value | |
| 1618 except ValueError: | |
| 1619 if allow_none: | |
| 1620 if default is None or value == "None": | |
| 1621 return None | |
| 1622 if default: | |
| 1623 return default | |
| 1624 else: | |
| 1625 raise | |
| 1626 | |
| 1627 | |
| 1628 def parse_non_hex_float(s): | |
| 1629 r""" | |
| 1630 Parse string `s` into a float but throw a `ValueError` if the string is in | |
| 1631 the otherwise acceptable format `\d+e\d+` (e.g. 40000000000000e5.) | |
| 1632 | |
| 1633 This can be passed into `json.loads` to prevent a hex string in the above | |
| 1634 format from being incorrectly parsed as a float in scientific notation. | |
| 1635 | |
| 1636 >>> parse_non_hex_float( '123.4' ) | |
| 1637 123.4 | |
| 1638 >>> parse_non_hex_float( '2.45e+3' ) | |
| 1639 2450.0 | |
| 1640 >>> parse_non_hex_float( '2.45e-3' ) | |
| 1641 0.00245 | |
| 1642 >>> parse_non_hex_float( '40000000000000e5' ) | |
| 1643 Traceback (most recent call last): | |
| 1644 ... | |
| 1645 ValueError: could not convert string to float: 40000000000000e5 | |
| 1646 """ | |
| 1647 f = float(s) | |
| 1648 # successfully parsed as float if here - check for format in original string | |
| 1649 if 'e' in s and not ('+' in s or '-' in s): | |
| 1650 raise ValueError('could not convert string to float: ' + s) | |
| 1651 return f | |
| 1652 | |
| 1653 | |
| 1654 def build_url(base_url, port=80, scheme='http', pathspec=None, params=None, doseq=False): | |
| 1655 if params is None: | |
| 1656 params = dict() | |
| 1657 if pathspec is None: | |
| 1658 pathspec = [] | |
| 1659 parsed_url = urlparse.urlparse(base_url) | |
| 1660 if scheme != 'http': | |
| 1661 parsed_url.scheme = scheme | |
| 1662 assert parsed_url.scheme in ('http', 'https', 'ftp'), 'Invalid URL scheme: %s' % scheme | |
| 1663 if port != 80: | |
| 1664 url = '%s://%s:%d/%s' % (parsed_url.scheme, parsed_url.netloc.rstrip('/'), int(port), parsed_url.path) | |
| 1665 else: | |
| 1666 url = '%s://%s/%s' % (parsed_url.scheme, parsed_url.netloc.rstrip('/'), parsed_url.path.lstrip('/')) | |
| 1667 if len(pathspec) > 0: | |
| 1668 url = '%s/%s' % (url.rstrip('/'), '/'.join(pathspec)) | |
| 1669 if parsed_url.query: | |
| 1670 for query_parameter in parsed_url.query.split('&'): | |
| 1671 key, value = query_parameter.split('=') | |
| 1672 params[key] = value | |
| 1673 if params: | |
| 1674 url += '?%s' % urlparse.urlencode(params, doseq=doseq) | |
| 1675 return url | |
| 1676 | |
| 1677 | |
| 1678 def url_get(base_url, auth=None, pathspec=None, params=None, max_retries=5, backoff_factor=1): | |
| 1679 """Make contact with the uri provided and return any contents.""" | |
| 1680 full_url = build_url(base_url, pathspec=pathspec, params=params) | |
| 1681 s = requests.Session() | |
| 1682 retries = Retry(total=max_retries, backoff_factor=backoff_factor, status_forcelist=[429]) | |
| 1683 s.mount(base_url, HTTPAdapter(max_retries=retries)) | |
| 1684 response = s.get(full_url, auth=auth) | |
| 1685 response.raise_for_status() | |
| 1686 return response.text | |
| 1687 | |
| 1688 | |
| 1689 def download_to_file(url, dest_file_path, timeout=30, chunk_size=2 ** 20): | |
| 1690 """Download a URL to a file in chunks.""" | |
| 1691 with requests.get(url, timeout=timeout, stream=True) as r, open(dest_file_path, 'wb') as f: | |
| 1692 for chunk in r.iter_content(chunk_size): | |
| 1693 if chunk: | |
| 1694 f.write(chunk) | |
| 1695 | |
| 1696 | |
| 1697 def get_executable(): | |
| 1698 exe = sys.executable | |
| 1699 if exe.endswith('uwsgi'): | |
| 1700 virtualenv = None | |
| 1701 if uwsgi is not None: | |
| 1702 for name in ('home', 'virtualenv', 'venv', 'pyhome'): | |
| 1703 if name in uwsgi.opt: | |
| 1704 virtualenv = unicodify(uwsgi.opt[name]) | |
| 1705 break | |
| 1706 if virtualenv is None and 'VIRTUAL_ENV' in os.environ: | |
| 1707 virtualenv = os.environ['VIRTUAL_ENV'] | |
| 1708 if virtualenv is not None: | |
| 1709 exe = os.path.join(virtualenv, 'bin', 'python') | |
| 1710 else: | |
| 1711 exe = os.path.join(os.path.dirname(exe), 'python') | |
| 1712 if not os.path.exists(exe): | |
| 1713 exe = 'python' | |
| 1714 return exe | |
| 1715 | |
| 1716 | |
| 1717 class ExecutionTimer(object): | |
| 1718 | |
| 1719 def __init__(self): | |
| 1720 self.begin = time.time() | |
| 1721 | |
| 1722 def __str__(self): | |
| 1723 return "(%0.3f ms)" % (self.elapsed * 1000) | |
| 1724 | |
| 1725 @property | |
| 1726 def elapsed(self): | |
| 1727 return (time.time() - self.begin) | |
| 1728 | |
| 1729 | |
| 1730 class StructuredExecutionTimer(object): | |
| 1731 | |
| 1732 def __init__(self, timer_id, template, **tags): | |
| 1733 self.begin = time.time() | |
| 1734 self.timer_id = timer_id | |
| 1735 self.template = template | |
| 1736 self.tags = tags | |
| 1737 | |
| 1738 def __str__(self): | |
| 1739 return self.to_str() | |
| 1740 | |
| 1741 def to_str(self, **kwd): | |
| 1742 if kwd: | |
| 1743 message = string.Template(self.template).safe_substitute(kwd) | |
| 1744 else: | |
| 1745 message = self.template | |
| 1746 log_message = message + " (%0.3f ms)" % (self.elapsed * 1000) | |
| 1747 return log_message | |
| 1748 | |
| 1749 @property | |
| 1750 def elapsed(self): | |
| 1751 return (time.time() - self.begin) | |
| 1752 | |
| 1753 | |
| 1754 if __name__ == '__main__': | |
| 1755 import doctest | |
| 1756 doctest.testmod(sys.modules[__name__], verbose=False) |
