Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/cwltool/expression.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
| author | shellac |
|---|---|
| date | Mon, 22 Mar 2021 18:12:50 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:4f3585e2f14b |
|---|---|
| 1 """Parse CWL expressions.""" | |
| 2 | |
| 3 import copy | |
| 4 import json | |
| 5 import re | |
| 6 from typing import ( | |
| 7 Any, | |
| 8 Dict, | |
| 9 List, | |
| 10 Mapping, | |
| 11 MutableMapping, | |
| 12 MutableSequence, | |
| 13 Optional, | |
| 14 Tuple, | |
| 15 Union, | |
| 16 cast, | |
| 17 ) | |
| 18 | |
| 19 from schema_salad.utils import json_dumps | |
| 20 | |
| 21 from .errors import WorkflowException | |
| 22 from .loghandler import _logger | |
| 23 from .sandboxjs import JavascriptException, default_timeout, execjs | |
| 24 from .utils import ( | |
| 25 CWLObjectType, | |
| 26 CWLOutputType, | |
| 27 bytes2str_in_dicts, | |
| 28 docker_windows_path_adjust, | |
| 29 ) | |
| 30 | |
| 31 | |
| 32 def jshead(engine_config: List[str], rootvars: CWLObjectType) -> str: | |
| 33 # make sure all the byte strings are converted | |
| 34 # to str in `rootvars` dict. | |
| 35 | |
| 36 return "\n".join( | |
| 37 engine_config | |
| 38 + [ | |
| 39 "var {} = {};".format(k, json_dumps(v, indent=4)) | |
| 40 for k, v in rootvars.items() | |
| 41 ] | |
| 42 ) | |
| 43 | |
| 44 | |
| 45 # decode all raw strings to unicode | |
| 46 seg_symbol = r"""\w+""" | |
| 47 seg_single = r"""\['([^']|\\')+'\]""" | |
| 48 seg_double = r"""\["([^"]|\\")+"\]""" | |
| 49 seg_index = r"""\[[0-9]+\]""" | |
| 50 segments = fr"(\.{seg_symbol}|{seg_single}|{seg_double}|{seg_index})" | |
| 51 segment_re = re.compile(segments, flags=re.UNICODE) | |
| 52 param_str = fr"\(({seg_symbol}){segments}*\)$" | |
| 53 param_re = re.compile(param_str, flags=re.UNICODE) | |
| 54 | |
| 55 | |
| 56 class SubstitutionError(Exception): | |
| 57 pass | |
| 58 | |
| 59 | |
| 60 def scanner(scan: str) -> Optional[Tuple[int, int]]: | |
| 61 DEFAULT = 0 | |
| 62 DOLLAR = 1 | |
| 63 PAREN = 2 | |
| 64 BRACE = 3 | |
| 65 SINGLE_QUOTE = 4 | |
| 66 DOUBLE_QUOTE = 5 | |
| 67 BACKSLASH = 6 | |
| 68 | |
| 69 i = 0 | |
| 70 stack = [DEFAULT] | |
| 71 start = 0 | |
| 72 while i < len(scan): | |
| 73 state = stack[-1] | |
| 74 c = scan[i] | |
| 75 | |
| 76 if state == DEFAULT: | |
| 77 if c == "$": | |
| 78 stack.append(DOLLAR) | |
| 79 elif c == "\\": | |
| 80 stack.append(BACKSLASH) | |
| 81 elif state == BACKSLASH: | |
| 82 stack.pop() | |
| 83 if stack[-1] == DEFAULT: | |
| 84 return (i - 1, i + 1) | |
| 85 elif state == DOLLAR: | |
| 86 if c == "(": | |
| 87 start = i - 1 | |
| 88 stack.append(PAREN) | |
| 89 elif c == "{": | |
| 90 start = i - 1 | |
| 91 stack.append(BRACE) | |
| 92 else: | |
| 93 stack.pop() | |
| 94 i -= 1 | |
| 95 elif state == PAREN: | |
| 96 if c == "(": | |
| 97 stack.append(PAREN) | |
| 98 elif c == ")": | |
| 99 stack.pop() | |
| 100 if stack[-1] == DOLLAR: | |
| 101 return (start, i + 1) | |
| 102 elif c == "'": | |
| 103 stack.append(SINGLE_QUOTE) | |
| 104 elif c == '"': | |
| 105 stack.append(DOUBLE_QUOTE) | |
| 106 elif state == BRACE: | |
| 107 if c == "{": | |
| 108 stack.append(BRACE) | |
| 109 elif c == "}": | |
| 110 stack.pop() | |
| 111 if stack[-1] == DOLLAR: | |
| 112 return (start, i + 1) | |
| 113 elif c == "'": | |
| 114 stack.append(SINGLE_QUOTE) | |
| 115 elif c == '"': | |
| 116 stack.append(DOUBLE_QUOTE) | |
| 117 elif state == SINGLE_QUOTE: | |
| 118 if c == "'": | |
| 119 stack.pop() | |
| 120 elif c == "\\": | |
| 121 stack.append(BACKSLASH) | |
| 122 elif state == DOUBLE_QUOTE: | |
| 123 if c == '"': | |
| 124 stack.pop() | |
| 125 elif c == "\\": | |
| 126 stack.append(BACKSLASH) | |
| 127 i += 1 | |
| 128 | |
| 129 if len(stack) > 1 and not (len(stack) == 2 and stack[1] in (BACKSLASH, DOLLAR)): | |
| 130 raise SubstitutionError( | |
| 131 "Substitution error, unfinished block starting at position {}: '{}' stack was {}".format( | |
| 132 start, scan[start:], stack | |
| 133 ) | |
| 134 ) | |
| 135 return None | |
| 136 | |
| 137 | |
| 138 def next_seg( | |
| 139 parsed_string: str, remaining_string: str, current_value: CWLOutputType | |
| 140 ) -> CWLOutputType: | |
| 141 if remaining_string: | |
| 142 m = segment_re.match(remaining_string) | |
| 143 if not m: | |
| 144 return current_value | |
| 145 next_segment_str = m.group(0) | |
| 146 | |
| 147 key = None # type: Optional[Union[str, int]] | |
| 148 if next_segment_str[0] == ".": | |
| 149 key = next_segment_str[1:] | |
| 150 elif next_segment_str[1] in ("'", '"'): | |
| 151 key = next_segment_str[2:-2].replace("\\'", "'").replace('\\"', '"') | |
| 152 | |
| 153 if key is not None: | |
| 154 if ( | |
| 155 isinstance(current_value, MutableSequence) | |
| 156 and key == "length" | |
| 157 and not remaining_string[m.end(0) :] | |
| 158 ): | |
| 159 return len(current_value) | |
| 160 if not isinstance(current_value, MutableMapping): | |
| 161 raise WorkflowException( | |
| 162 "%s is a %s, cannot index on string '%s'" | |
| 163 % (parsed_string, type(current_value).__name__, key) | |
| 164 ) | |
| 165 if key not in current_value: | |
| 166 raise WorkflowException(f"{parsed_string} does not contain key '{key}'") | |
| 167 else: | |
| 168 try: | |
| 169 key = int(next_segment_str[1:-1]) | |
| 170 except ValueError as v: | |
| 171 raise WorkflowException(str(v)) from v | |
| 172 if not isinstance(current_value, MutableSequence): | |
| 173 raise WorkflowException( | |
| 174 "%s is a %s, cannot index on int '%s'" | |
| 175 % (parsed_string, type(current_value).__name__, key) | |
| 176 ) | |
| 177 if key and key >= len(current_value): | |
| 178 raise WorkflowException( | |
| 179 "%s list index %i out of range" % (parsed_string, key) | |
| 180 ) | |
| 181 | |
| 182 if isinstance(current_value, Mapping): | |
| 183 try: | |
| 184 return next_seg( | |
| 185 parsed_string + remaining_string, | |
| 186 remaining_string[m.end(0) :], | |
| 187 cast(CWLOutputType, current_value[cast(str, key)]), | |
| 188 ) | |
| 189 except KeyError: | |
| 190 raise WorkflowException(f"{parsed_string} doesn't have property {key}") | |
| 191 elif isinstance(current_value, list) and isinstance(key, int): | |
| 192 try: | |
| 193 return next_seg( | |
| 194 parsed_string + remaining_string, | |
| 195 remaining_string[m.end(0) :], | |
| 196 current_value[key], | |
| 197 ) | |
| 198 except KeyError: | |
| 199 raise WorkflowException(f"{parsed_string} doesn't have property {key}") | |
| 200 else: | |
| 201 raise WorkflowException(f"{parsed_string} doesn't have property {key}") | |
| 202 else: | |
| 203 return current_value | |
| 204 | |
| 205 | |
| 206 def evaluator( | |
| 207 ex: str, | |
| 208 jslib: str, | |
| 209 obj: CWLObjectType, | |
| 210 timeout: float, | |
| 211 fullJS: bool = False, | |
| 212 force_docker_pull: bool = False, | |
| 213 debug: bool = False, | |
| 214 js_console: bool = False, | |
| 215 ) -> Optional[CWLOutputType]: | |
| 216 match = param_re.match(ex) | |
| 217 | |
| 218 expression_parse_exception = None | |
| 219 expression_parse_succeeded = False | |
| 220 | |
| 221 if match is not None: | |
| 222 first_symbol = match.group(1) | |
| 223 first_symbol_end = match.end(1) | |
| 224 | |
| 225 if first_symbol_end + 1 == len(ex) and first_symbol == "null": | |
| 226 return None | |
| 227 try: | |
| 228 if obj.get(first_symbol) is None: | |
| 229 raise WorkflowException("%s is not defined" % first_symbol) | |
| 230 | |
| 231 return next_seg( | |
| 232 first_symbol, | |
| 233 ex[first_symbol_end:-1], | |
| 234 cast(CWLOutputType, obj[first_symbol]), | |
| 235 ) | |
| 236 except WorkflowException as werr: | |
| 237 expression_parse_exception = werr | |
| 238 else: | |
| 239 expression_parse_succeeded = True | |
| 240 | |
| 241 if fullJS and not expression_parse_succeeded: | |
| 242 return execjs( | |
| 243 ex, | |
| 244 jslib, | |
| 245 timeout, | |
| 246 force_docker_pull=force_docker_pull, | |
| 247 debug=debug, | |
| 248 js_console=js_console, | |
| 249 ) | |
| 250 else: | |
| 251 if expression_parse_exception is not None: | |
| 252 raise JavascriptException( | |
| 253 "Syntax error in parameter reference '%s': %s. This could be " | |
| 254 "due to using Javascript code without specifying " | |
| 255 "InlineJavascriptRequirement." % (ex[1:-1], expression_parse_exception) | |
| 256 ) | |
| 257 else: | |
| 258 raise JavascriptException( | |
| 259 "Syntax error in parameter reference '%s'. This could be due " | |
| 260 "to using Javascript code without specifying " | |
| 261 "InlineJavascriptRequirement." % ex | |
| 262 ) | |
| 263 | |
| 264 | |
| 265 def _convert_dumper(string: str) -> str: | |
| 266 return "{} + ".format(json.dumps(string)) | |
| 267 | |
| 268 | |
| 269 def interpolate( | |
| 270 scan: str, | |
| 271 rootvars: CWLObjectType, | |
| 272 timeout: float = default_timeout, | |
| 273 fullJS: bool = False, | |
| 274 jslib: str = "", | |
| 275 force_docker_pull: bool = False, | |
| 276 debug: bool = False, | |
| 277 js_console: bool = False, | |
| 278 strip_whitespace: bool = True, | |
| 279 escaping_behavior: int = 2, | |
| 280 convert_to_expression: bool = False, | |
| 281 ) -> Optional[CWLOutputType]: | |
| 282 """ | |
| 283 Interpolate and evaluate. | |
| 284 | |
| 285 Note: only call with convert_to_expression=True on CWL Expressions in $() | |
| 286 form that need interpolation. | |
| 287 """ | |
| 288 if strip_whitespace: | |
| 289 scan = scan.strip() | |
| 290 parts = [] | |
| 291 if convert_to_expression: | |
| 292 dump = _convert_dumper | |
| 293 parts.append("${return ") | |
| 294 else: | |
| 295 dump = lambda x: x | |
| 296 w = scanner(scan) | |
| 297 while w: | |
| 298 if convert_to_expression: | |
| 299 parts.append('"{}" + '.format(scan[0 : w[0]])) | |
| 300 else: | |
| 301 parts.append(scan[0 : w[0]]) | |
| 302 | |
| 303 if scan[w[0]] == "$": | |
| 304 if not convert_to_expression: | |
| 305 e = evaluator( | |
| 306 scan[w[0] + 1 : w[1]], | |
| 307 jslib, | |
| 308 rootvars, | |
| 309 timeout, | |
| 310 fullJS=fullJS, | |
| 311 force_docker_pull=force_docker_pull, | |
| 312 debug=debug, | |
| 313 js_console=js_console, | |
| 314 ) | |
| 315 if w[0] == 0 and w[1] == len(scan) and len(parts) <= 1: | |
| 316 return e | |
| 317 | |
| 318 leaf = json_dumps(e, sort_keys=True) | |
| 319 if leaf[0] == '"': | |
| 320 leaf = json.loads(leaf) | |
| 321 parts.append(leaf) | |
| 322 else: | |
| 323 parts.append( | |
| 324 "function(){var item =" | |
| 325 + scan[w[0] : w[1]][2:-1] | |
| 326 + '; if (typeof(item) === "string"){ return item; } else { return JSON.stringify(item); }}() + ' | |
| 327 ) | |
| 328 elif scan[w[0]] == "\\": | |
| 329 if escaping_behavior == 1: | |
| 330 # Old behavior. Just skip the next character. | |
| 331 e = scan[w[1] - 1] | |
| 332 parts.append(dump(e)) | |
| 333 elif escaping_behavior == 2: | |
| 334 # Backslash quoting requires a three character lookahead. | |
| 335 e = scan[w[0] : w[1] + 1] | |
| 336 if e in ("\\$(", "\\${"): | |
| 337 # Suppress start of a parameter reference, drop the | |
| 338 # backslash. | |
| 339 parts.append(dump(e[1:])) | |
| 340 w = (w[0], w[1] + 1) | |
| 341 elif e[1] == "\\": | |
| 342 # Double backslash, becomes a single backslash | |
| 343 parts.append(dump("\\")) | |
| 344 else: | |
| 345 # Some other text, add it as-is (including the | |
| 346 # backslash) and resume scanning. | |
| 347 parts.append(dump(e[:2])) | |
| 348 else: | |
| 349 raise Exception("Unknown escaping behavior %s" % escaping_behavior) | |
| 350 scan = scan[w[1] :] | |
| 351 w = scanner(scan) | |
| 352 if convert_to_expression: | |
| 353 parts.append(f'"{scan}"') | |
| 354 parts.append(";}") | |
| 355 else: | |
| 356 parts.append(scan) | |
| 357 return "".join(parts) | |
| 358 | |
| 359 | |
| 360 def needs_parsing(snippet: Any) -> bool: | |
| 361 return isinstance(snippet, str) and ("$(" in snippet or "${" in snippet) | |
| 362 | |
| 363 | |
| 364 def do_eval( | |
| 365 ex: Optional[CWLOutputType], | |
| 366 jobinput: CWLObjectType, | |
| 367 requirements: List[CWLObjectType], | |
| 368 outdir: Optional[str], | |
| 369 tmpdir: Optional[str], | |
| 370 resources: Dict[str, Union[float, int, str]], | |
| 371 context: Optional[CWLOutputType] = None, | |
| 372 timeout: float = default_timeout, | |
| 373 force_docker_pull: bool = False, | |
| 374 debug: bool = False, | |
| 375 js_console: bool = False, | |
| 376 strip_whitespace: bool = True, | |
| 377 cwlVersion: str = "", | |
| 378 ) -> Optional[CWLOutputType]: | |
| 379 | |
| 380 runtime = cast(MutableMapping[str, Union[int, str, None]], copy.deepcopy(resources)) | |
| 381 runtime["tmpdir"] = docker_windows_path_adjust(tmpdir) if tmpdir else None | |
| 382 runtime["outdir"] = docker_windows_path_adjust(outdir) if outdir else None | |
| 383 | |
| 384 rootvars = cast( | |
| 385 CWLObjectType, | |
| 386 bytes2str_in_dicts({"inputs": jobinput, "self": context, "runtime": runtime}), | |
| 387 ) | |
| 388 | |
| 389 if isinstance(ex, str) and needs_parsing(ex): | |
| 390 fullJS = False | |
| 391 jslib = "" | |
| 392 for r in reversed(requirements): | |
| 393 if r["class"] == "InlineJavascriptRequirement": | |
| 394 fullJS = True | |
| 395 jslib = jshead(cast(List[str], r.get("expressionLib", [])), rootvars) | |
| 396 break | |
| 397 | |
| 398 try: | |
| 399 return interpolate( | |
| 400 ex, | |
| 401 rootvars, | |
| 402 timeout=timeout, | |
| 403 fullJS=fullJS, | |
| 404 jslib=jslib, | |
| 405 force_docker_pull=force_docker_pull, | |
| 406 debug=debug, | |
| 407 js_console=js_console, | |
| 408 strip_whitespace=strip_whitespace, | |
| 409 escaping_behavior=1 | |
| 410 if cwlVersion | |
| 411 in ( | |
| 412 "v1.0", | |
| 413 "v1.1.0-dev1", | |
| 414 "v1.1", | |
| 415 "v1.2.0-dev1", | |
| 416 "v1.2.0-dev2", | |
| 417 "v1.2.0-dev3", | |
| 418 ) | |
| 419 else 2, | |
| 420 ) | |
| 421 | |
| 422 except Exception as e: | |
| 423 _logger.exception(e) | |
| 424 raise WorkflowException("Expression evaluation error:\n%s" % str(e)) from e | |
| 425 else: | |
| 426 return ex |
