comparison env/lib/python3.9/site-packages/cwltool/expression.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 """Parse CWL expressions."""
2
3 import copy
4 import json
5 import re
6 from typing import (
7 Any,
8 Dict,
9 List,
10 Mapping,
11 MutableMapping,
12 MutableSequence,
13 Optional,
14 Tuple,
15 Union,
16 cast,
17 )
18
19 from schema_salad.utils import json_dumps
20
21 from .errors import WorkflowException
22 from .loghandler import _logger
23 from .sandboxjs import JavascriptException, default_timeout, execjs
24 from .utils import (
25 CWLObjectType,
26 CWLOutputType,
27 bytes2str_in_dicts,
28 docker_windows_path_adjust,
29 )
30
31
32 def jshead(engine_config: List[str], rootvars: CWLObjectType) -> str:
33 # make sure all the byte strings are converted
34 # to str in `rootvars` dict.
35
36 return "\n".join(
37 engine_config
38 + [
39 "var {} = {};".format(k, json_dumps(v, indent=4))
40 for k, v in rootvars.items()
41 ]
42 )
43
44
45 # decode all raw strings to unicode
46 seg_symbol = r"""\w+"""
47 seg_single = r"""\['([^']|\\')+'\]"""
48 seg_double = r"""\["([^"]|\\")+"\]"""
49 seg_index = r"""\[[0-9]+\]"""
50 segments = fr"(\.{seg_symbol}|{seg_single}|{seg_double}|{seg_index})"
51 segment_re = re.compile(segments, flags=re.UNICODE)
52 param_str = fr"\(({seg_symbol}){segments}*\)$"
53 param_re = re.compile(param_str, flags=re.UNICODE)
54
55
56 class SubstitutionError(Exception):
57 pass
58
59
60 def scanner(scan: str) -> Optional[Tuple[int, int]]:
61 DEFAULT = 0
62 DOLLAR = 1
63 PAREN = 2
64 BRACE = 3
65 SINGLE_QUOTE = 4
66 DOUBLE_QUOTE = 5
67 BACKSLASH = 6
68
69 i = 0
70 stack = [DEFAULT]
71 start = 0
72 while i < len(scan):
73 state = stack[-1]
74 c = scan[i]
75
76 if state == DEFAULT:
77 if c == "$":
78 stack.append(DOLLAR)
79 elif c == "\\":
80 stack.append(BACKSLASH)
81 elif state == BACKSLASH:
82 stack.pop()
83 if stack[-1] == DEFAULT:
84 return (i - 1, i + 1)
85 elif state == DOLLAR:
86 if c == "(":
87 start = i - 1
88 stack.append(PAREN)
89 elif c == "{":
90 start = i - 1
91 stack.append(BRACE)
92 else:
93 stack.pop()
94 i -= 1
95 elif state == PAREN:
96 if c == "(":
97 stack.append(PAREN)
98 elif c == ")":
99 stack.pop()
100 if stack[-1] == DOLLAR:
101 return (start, i + 1)
102 elif c == "'":
103 stack.append(SINGLE_QUOTE)
104 elif c == '"':
105 stack.append(DOUBLE_QUOTE)
106 elif state == BRACE:
107 if c == "{":
108 stack.append(BRACE)
109 elif c == "}":
110 stack.pop()
111 if stack[-1] == DOLLAR:
112 return (start, i + 1)
113 elif c == "'":
114 stack.append(SINGLE_QUOTE)
115 elif c == '"':
116 stack.append(DOUBLE_QUOTE)
117 elif state == SINGLE_QUOTE:
118 if c == "'":
119 stack.pop()
120 elif c == "\\":
121 stack.append(BACKSLASH)
122 elif state == DOUBLE_QUOTE:
123 if c == '"':
124 stack.pop()
125 elif c == "\\":
126 stack.append(BACKSLASH)
127 i += 1
128
129 if len(stack) > 1 and not (len(stack) == 2 and stack[1] in (BACKSLASH, DOLLAR)):
130 raise SubstitutionError(
131 "Substitution error, unfinished block starting at position {}: '{}' stack was {}".format(
132 start, scan[start:], stack
133 )
134 )
135 return None
136
137
138 def next_seg(
139 parsed_string: str, remaining_string: str, current_value: CWLOutputType
140 ) -> CWLOutputType:
141 if remaining_string:
142 m = segment_re.match(remaining_string)
143 if not m:
144 return current_value
145 next_segment_str = m.group(0)
146
147 key = None # type: Optional[Union[str, int]]
148 if next_segment_str[0] == ".":
149 key = next_segment_str[1:]
150 elif next_segment_str[1] in ("'", '"'):
151 key = next_segment_str[2:-2].replace("\\'", "'").replace('\\"', '"')
152
153 if key is not None:
154 if (
155 isinstance(current_value, MutableSequence)
156 and key == "length"
157 and not remaining_string[m.end(0) :]
158 ):
159 return len(current_value)
160 if not isinstance(current_value, MutableMapping):
161 raise WorkflowException(
162 "%s is a %s, cannot index on string '%s'"
163 % (parsed_string, type(current_value).__name__, key)
164 )
165 if key not in current_value:
166 raise WorkflowException(f"{parsed_string} does not contain key '{key}'")
167 else:
168 try:
169 key = int(next_segment_str[1:-1])
170 except ValueError as v:
171 raise WorkflowException(str(v)) from v
172 if not isinstance(current_value, MutableSequence):
173 raise WorkflowException(
174 "%s is a %s, cannot index on int '%s'"
175 % (parsed_string, type(current_value).__name__, key)
176 )
177 if key and key >= len(current_value):
178 raise WorkflowException(
179 "%s list index %i out of range" % (parsed_string, key)
180 )
181
182 if isinstance(current_value, Mapping):
183 try:
184 return next_seg(
185 parsed_string + remaining_string,
186 remaining_string[m.end(0) :],
187 cast(CWLOutputType, current_value[cast(str, key)]),
188 )
189 except KeyError:
190 raise WorkflowException(f"{parsed_string} doesn't have property {key}")
191 elif isinstance(current_value, list) and isinstance(key, int):
192 try:
193 return next_seg(
194 parsed_string + remaining_string,
195 remaining_string[m.end(0) :],
196 current_value[key],
197 )
198 except KeyError:
199 raise WorkflowException(f"{parsed_string} doesn't have property {key}")
200 else:
201 raise WorkflowException(f"{parsed_string} doesn't have property {key}")
202 else:
203 return current_value
204
205
206 def evaluator(
207 ex: str,
208 jslib: str,
209 obj: CWLObjectType,
210 timeout: float,
211 fullJS: bool = False,
212 force_docker_pull: bool = False,
213 debug: bool = False,
214 js_console: bool = False,
215 ) -> Optional[CWLOutputType]:
216 match = param_re.match(ex)
217
218 expression_parse_exception = None
219 expression_parse_succeeded = False
220
221 if match is not None:
222 first_symbol = match.group(1)
223 first_symbol_end = match.end(1)
224
225 if first_symbol_end + 1 == len(ex) and first_symbol == "null":
226 return None
227 try:
228 if obj.get(first_symbol) is None:
229 raise WorkflowException("%s is not defined" % first_symbol)
230
231 return next_seg(
232 first_symbol,
233 ex[first_symbol_end:-1],
234 cast(CWLOutputType, obj[first_symbol]),
235 )
236 except WorkflowException as werr:
237 expression_parse_exception = werr
238 else:
239 expression_parse_succeeded = True
240
241 if fullJS and not expression_parse_succeeded:
242 return execjs(
243 ex,
244 jslib,
245 timeout,
246 force_docker_pull=force_docker_pull,
247 debug=debug,
248 js_console=js_console,
249 )
250 else:
251 if expression_parse_exception is not None:
252 raise JavascriptException(
253 "Syntax error in parameter reference '%s': %s. This could be "
254 "due to using Javascript code without specifying "
255 "InlineJavascriptRequirement." % (ex[1:-1], expression_parse_exception)
256 )
257 else:
258 raise JavascriptException(
259 "Syntax error in parameter reference '%s'. This could be due "
260 "to using Javascript code without specifying "
261 "InlineJavascriptRequirement." % ex
262 )
263
264
265 def _convert_dumper(string: str) -> str:
266 return "{} + ".format(json.dumps(string))
267
268
269 def interpolate(
270 scan: str,
271 rootvars: CWLObjectType,
272 timeout: float = default_timeout,
273 fullJS: bool = False,
274 jslib: str = "",
275 force_docker_pull: bool = False,
276 debug: bool = False,
277 js_console: bool = False,
278 strip_whitespace: bool = True,
279 escaping_behavior: int = 2,
280 convert_to_expression: bool = False,
281 ) -> Optional[CWLOutputType]:
282 """
283 Interpolate and evaluate.
284
285 Note: only call with convert_to_expression=True on CWL Expressions in $()
286 form that need interpolation.
287 """
288 if strip_whitespace:
289 scan = scan.strip()
290 parts = []
291 if convert_to_expression:
292 dump = _convert_dumper
293 parts.append("${return ")
294 else:
295 dump = lambda x: x
296 w = scanner(scan)
297 while w:
298 if convert_to_expression:
299 parts.append('"{}" + '.format(scan[0 : w[0]]))
300 else:
301 parts.append(scan[0 : w[0]])
302
303 if scan[w[0]] == "$":
304 if not convert_to_expression:
305 e = evaluator(
306 scan[w[0] + 1 : w[1]],
307 jslib,
308 rootvars,
309 timeout,
310 fullJS=fullJS,
311 force_docker_pull=force_docker_pull,
312 debug=debug,
313 js_console=js_console,
314 )
315 if w[0] == 0 and w[1] == len(scan) and len(parts) <= 1:
316 return e
317
318 leaf = json_dumps(e, sort_keys=True)
319 if leaf[0] == '"':
320 leaf = json.loads(leaf)
321 parts.append(leaf)
322 else:
323 parts.append(
324 "function(){var item ="
325 + scan[w[0] : w[1]][2:-1]
326 + '; if (typeof(item) === "string"){ return item; } else { return JSON.stringify(item); }}() + '
327 )
328 elif scan[w[0]] == "\\":
329 if escaping_behavior == 1:
330 # Old behavior. Just skip the next character.
331 e = scan[w[1] - 1]
332 parts.append(dump(e))
333 elif escaping_behavior == 2:
334 # Backslash quoting requires a three character lookahead.
335 e = scan[w[0] : w[1] + 1]
336 if e in ("\\$(", "\\${"):
337 # Suppress start of a parameter reference, drop the
338 # backslash.
339 parts.append(dump(e[1:]))
340 w = (w[0], w[1] + 1)
341 elif e[1] == "\\":
342 # Double backslash, becomes a single backslash
343 parts.append(dump("\\"))
344 else:
345 # Some other text, add it as-is (including the
346 # backslash) and resume scanning.
347 parts.append(dump(e[:2]))
348 else:
349 raise Exception("Unknown escaping behavior %s" % escaping_behavior)
350 scan = scan[w[1] :]
351 w = scanner(scan)
352 if convert_to_expression:
353 parts.append(f'"{scan}"')
354 parts.append(";}")
355 else:
356 parts.append(scan)
357 return "".join(parts)
358
359
360 def needs_parsing(snippet: Any) -> bool:
361 return isinstance(snippet, str) and ("$(" in snippet or "${" in snippet)
362
363
364 def do_eval(
365 ex: Optional[CWLOutputType],
366 jobinput: CWLObjectType,
367 requirements: List[CWLObjectType],
368 outdir: Optional[str],
369 tmpdir: Optional[str],
370 resources: Dict[str, Union[float, int, str]],
371 context: Optional[CWLOutputType] = None,
372 timeout: float = default_timeout,
373 force_docker_pull: bool = False,
374 debug: bool = False,
375 js_console: bool = False,
376 strip_whitespace: bool = True,
377 cwlVersion: str = "",
378 ) -> Optional[CWLOutputType]:
379
380 runtime = cast(MutableMapping[str, Union[int, str, None]], copy.deepcopy(resources))
381 runtime["tmpdir"] = docker_windows_path_adjust(tmpdir) if tmpdir else None
382 runtime["outdir"] = docker_windows_path_adjust(outdir) if outdir else None
383
384 rootvars = cast(
385 CWLObjectType,
386 bytes2str_in_dicts({"inputs": jobinput, "self": context, "runtime": runtime}),
387 )
388
389 if isinstance(ex, str) and needs_parsing(ex):
390 fullJS = False
391 jslib = ""
392 for r in reversed(requirements):
393 if r["class"] == "InlineJavascriptRequirement":
394 fullJS = True
395 jslib = jshead(cast(List[str], r.get("expressionLib", [])), rootvars)
396 break
397
398 try:
399 return interpolate(
400 ex,
401 rootvars,
402 timeout=timeout,
403 fullJS=fullJS,
404 jslib=jslib,
405 force_docker_pull=force_docker_pull,
406 debug=debug,
407 js_console=js_console,
408 strip_whitespace=strip_whitespace,
409 escaping_behavior=1
410 if cwlVersion
411 in (
412 "v1.0",
413 "v1.1.0-dev1",
414 "v1.1",
415 "v1.2.0-dev1",
416 "v1.2.0-dev2",
417 "v1.2.0-dev3",
418 )
419 else 2,
420 )
421
422 except Exception as e:
423 _logger.exception(e)
424 raise WorkflowException("Expression evaluation error:\n%s" % str(e)) from e
425 else:
426 return ex