Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/cwltool/expression.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4f3585e2f14b |
---|---|
1 """Parse CWL expressions.""" | |
2 | |
3 import copy | |
4 import json | |
5 import re | |
6 from typing import ( | |
7 Any, | |
8 Dict, | |
9 List, | |
10 Mapping, | |
11 MutableMapping, | |
12 MutableSequence, | |
13 Optional, | |
14 Tuple, | |
15 Union, | |
16 cast, | |
17 ) | |
18 | |
19 from schema_salad.utils import json_dumps | |
20 | |
21 from .errors import WorkflowException | |
22 from .loghandler import _logger | |
23 from .sandboxjs import JavascriptException, default_timeout, execjs | |
24 from .utils import ( | |
25 CWLObjectType, | |
26 CWLOutputType, | |
27 bytes2str_in_dicts, | |
28 docker_windows_path_adjust, | |
29 ) | |
30 | |
31 | |
32 def jshead(engine_config: List[str], rootvars: CWLObjectType) -> str: | |
33 # make sure all the byte strings are converted | |
34 # to str in `rootvars` dict. | |
35 | |
36 return "\n".join( | |
37 engine_config | |
38 + [ | |
39 "var {} = {};".format(k, json_dumps(v, indent=4)) | |
40 for k, v in rootvars.items() | |
41 ] | |
42 ) | |
43 | |
44 | |
45 # decode all raw strings to unicode | |
46 seg_symbol = r"""\w+""" | |
47 seg_single = r"""\['([^']|\\')+'\]""" | |
48 seg_double = r"""\["([^"]|\\")+"\]""" | |
49 seg_index = r"""\[[0-9]+\]""" | |
50 segments = fr"(\.{seg_symbol}|{seg_single}|{seg_double}|{seg_index})" | |
51 segment_re = re.compile(segments, flags=re.UNICODE) | |
52 param_str = fr"\(({seg_symbol}){segments}*\)$" | |
53 param_re = re.compile(param_str, flags=re.UNICODE) | |
54 | |
55 | |
56 class SubstitutionError(Exception): | |
57 pass | |
58 | |
59 | |
60 def scanner(scan: str) -> Optional[Tuple[int, int]]: | |
61 DEFAULT = 0 | |
62 DOLLAR = 1 | |
63 PAREN = 2 | |
64 BRACE = 3 | |
65 SINGLE_QUOTE = 4 | |
66 DOUBLE_QUOTE = 5 | |
67 BACKSLASH = 6 | |
68 | |
69 i = 0 | |
70 stack = [DEFAULT] | |
71 start = 0 | |
72 while i < len(scan): | |
73 state = stack[-1] | |
74 c = scan[i] | |
75 | |
76 if state == DEFAULT: | |
77 if c == "$": | |
78 stack.append(DOLLAR) | |
79 elif c == "\\": | |
80 stack.append(BACKSLASH) | |
81 elif state == BACKSLASH: | |
82 stack.pop() | |
83 if stack[-1] == DEFAULT: | |
84 return (i - 1, i + 1) | |
85 elif state == DOLLAR: | |
86 if c == "(": | |
87 start = i - 1 | |
88 stack.append(PAREN) | |
89 elif c == "{": | |
90 start = i - 1 | |
91 stack.append(BRACE) | |
92 else: | |
93 stack.pop() | |
94 i -= 1 | |
95 elif state == PAREN: | |
96 if c == "(": | |
97 stack.append(PAREN) | |
98 elif c == ")": | |
99 stack.pop() | |
100 if stack[-1] == DOLLAR: | |
101 return (start, i + 1) | |
102 elif c == "'": | |
103 stack.append(SINGLE_QUOTE) | |
104 elif c == '"': | |
105 stack.append(DOUBLE_QUOTE) | |
106 elif state == BRACE: | |
107 if c == "{": | |
108 stack.append(BRACE) | |
109 elif c == "}": | |
110 stack.pop() | |
111 if stack[-1] == DOLLAR: | |
112 return (start, i + 1) | |
113 elif c == "'": | |
114 stack.append(SINGLE_QUOTE) | |
115 elif c == '"': | |
116 stack.append(DOUBLE_QUOTE) | |
117 elif state == SINGLE_QUOTE: | |
118 if c == "'": | |
119 stack.pop() | |
120 elif c == "\\": | |
121 stack.append(BACKSLASH) | |
122 elif state == DOUBLE_QUOTE: | |
123 if c == '"': | |
124 stack.pop() | |
125 elif c == "\\": | |
126 stack.append(BACKSLASH) | |
127 i += 1 | |
128 | |
129 if len(stack) > 1 and not (len(stack) == 2 and stack[1] in (BACKSLASH, DOLLAR)): | |
130 raise SubstitutionError( | |
131 "Substitution error, unfinished block starting at position {}: '{}' stack was {}".format( | |
132 start, scan[start:], stack | |
133 ) | |
134 ) | |
135 return None | |
136 | |
137 | |
138 def next_seg( | |
139 parsed_string: str, remaining_string: str, current_value: CWLOutputType | |
140 ) -> CWLOutputType: | |
141 if remaining_string: | |
142 m = segment_re.match(remaining_string) | |
143 if not m: | |
144 return current_value | |
145 next_segment_str = m.group(0) | |
146 | |
147 key = None # type: Optional[Union[str, int]] | |
148 if next_segment_str[0] == ".": | |
149 key = next_segment_str[1:] | |
150 elif next_segment_str[1] in ("'", '"'): | |
151 key = next_segment_str[2:-2].replace("\\'", "'").replace('\\"', '"') | |
152 | |
153 if key is not None: | |
154 if ( | |
155 isinstance(current_value, MutableSequence) | |
156 and key == "length" | |
157 and not remaining_string[m.end(0) :] | |
158 ): | |
159 return len(current_value) | |
160 if not isinstance(current_value, MutableMapping): | |
161 raise WorkflowException( | |
162 "%s is a %s, cannot index on string '%s'" | |
163 % (parsed_string, type(current_value).__name__, key) | |
164 ) | |
165 if key not in current_value: | |
166 raise WorkflowException(f"{parsed_string} does not contain key '{key}'") | |
167 else: | |
168 try: | |
169 key = int(next_segment_str[1:-1]) | |
170 except ValueError as v: | |
171 raise WorkflowException(str(v)) from v | |
172 if not isinstance(current_value, MutableSequence): | |
173 raise WorkflowException( | |
174 "%s is a %s, cannot index on int '%s'" | |
175 % (parsed_string, type(current_value).__name__, key) | |
176 ) | |
177 if key and key >= len(current_value): | |
178 raise WorkflowException( | |
179 "%s list index %i out of range" % (parsed_string, key) | |
180 ) | |
181 | |
182 if isinstance(current_value, Mapping): | |
183 try: | |
184 return next_seg( | |
185 parsed_string + remaining_string, | |
186 remaining_string[m.end(0) :], | |
187 cast(CWLOutputType, current_value[cast(str, key)]), | |
188 ) | |
189 except KeyError: | |
190 raise WorkflowException(f"{parsed_string} doesn't have property {key}") | |
191 elif isinstance(current_value, list) and isinstance(key, int): | |
192 try: | |
193 return next_seg( | |
194 parsed_string + remaining_string, | |
195 remaining_string[m.end(0) :], | |
196 current_value[key], | |
197 ) | |
198 except KeyError: | |
199 raise WorkflowException(f"{parsed_string} doesn't have property {key}") | |
200 else: | |
201 raise WorkflowException(f"{parsed_string} doesn't have property {key}") | |
202 else: | |
203 return current_value | |
204 | |
205 | |
206 def evaluator( | |
207 ex: str, | |
208 jslib: str, | |
209 obj: CWLObjectType, | |
210 timeout: float, | |
211 fullJS: bool = False, | |
212 force_docker_pull: bool = False, | |
213 debug: bool = False, | |
214 js_console: bool = False, | |
215 ) -> Optional[CWLOutputType]: | |
216 match = param_re.match(ex) | |
217 | |
218 expression_parse_exception = None | |
219 expression_parse_succeeded = False | |
220 | |
221 if match is not None: | |
222 first_symbol = match.group(1) | |
223 first_symbol_end = match.end(1) | |
224 | |
225 if first_symbol_end + 1 == len(ex) and first_symbol == "null": | |
226 return None | |
227 try: | |
228 if obj.get(first_symbol) is None: | |
229 raise WorkflowException("%s is not defined" % first_symbol) | |
230 | |
231 return next_seg( | |
232 first_symbol, | |
233 ex[first_symbol_end:-1], | |
234 cast(CWLOutputType, obj[first_symbol]), | |
235 ) | |
236 except WorkflowException as werr: | |
237 expression_parse_exception = werr | |
238 else: | |
239 expression_parse_succeeded = True | |
240 | |
241 if fullJS and not expression_parse_succeeded: | |
242 return execjs( | |
243 ex, | |
244 jslib, | |
245 timeout, | |
246 force_docker_pull=force_docker_pull, | |
247 debug=debug, | |
248 js_console=js_console, | |
249 ) | |
250 else: | |
251 if expression_parse_exception is not None: | |
252 raise JavascriptException( | |
253 "Syntax error in parameter reference '%s': %s. This could be " | |
254 "due to using Javascript code without specifying " | |
255 "InlineJavascriptRequirement." % (ex[1:-1], expression_parse_exception) | |
256 ) | |
257 else: | |
258 raise JavascriptException( | |
259 "Syntax error in parameter reference '%s'. This could be due " | |
260 "to using Javascript code without specifying " | |
261 "InlineJavascriptRequirement." % ex | |
262 ) | |
263 | |
264 | |
265 def _convert_dumper(string: str) -> str: | |
266 return "{} + ".format(json.dumps(string)) | |
267 | |
268 | |
269 def interpolate( | |
270 scan: str, | |
271 rootvars: CWLObjectType, | |
272 timeout: float = default_timeout, | |
273 fullJS: bool = False, | |
274 jslib: str = "", | |
275 force_docker_pull: bool = False, | |
276 debug: bool = False, | |
277 js_console: bool = False, | |
278 strip_whitespace: bool = True, | |
279 escaping_behavior: int = 2, | |
280 convert_to_expression: bool = False, | |
281 ) -> Optional[CWLOutputType]: | |
282 """ | |
283 Interpolate and evaluate. | |
284 | |
285 Note: only call with convert_to_expression=True on CWL Expressions in $() | |
286 form that need interpolation. | |
287 """ | |
288 if strip_whitespace: | |
289 scan = scan.strip() | |
290 parts = [] | |
291 if convert_to_expression: | |
292 dump = _convert_dumper | |
293 parts.append("${return ") | |
294 else: | |
295 dump = lambda x: x | |
296 w = scanner(scan) | |
297 while w: | |
298 if convert_to_expression: | |
299 parts.append('"{}" + '.format(scan[0 : w[0]])) | |
300 else: | |
301 parts.append(scan[0 : w[0]]) | |
302 | |
303 if scan[w[0]] == "$": | |
304 if not convert_to_expression: | |
305 e = evaluator( | |
306 scan[w[0] + 1 : w[1]], | |
307 jslib, | |
308 rootvars, | |
309 timeout, | |
310 fullJS=fullJS, | |
311 force_docker_pull=force_docker_pull, | |
312 debug=debug, | |
313 js_console=js_console, | |
314 ) | |
315 if w[0] == 0 and w[1] == len(scan) and len(parts) <= 1: | |
316 return e | |
317 | |
318 leaf = json_dumps(e, sort_keys=True) | |
319 if leaf[0] == '"': | |
320 leaf = json.loads(leaf) | |
321 parts.append(leaf) | |
322 else: | |
323 parts.append( | |
324 "function(){var item =" | |
325 + scan[w[0] : w[1]][2:-1] | |
326 + '; if (typeof(item) === "string"){ return item; } else { return JSON.stringify(item); }}() + ' | |
327 ) | |
328 elif scan[w[0]] == "\\": | |
329 if escaping_behavior == 1: | |
330 # Old behavior. Just skip the next character. | |
331 e = scan[w[1] - 1] | |
332 parts.append(dump(e)) | |
333 elif escaping_behavior == 2: | |
334 # Backslash quoting requires a three character lookahead. | |
335 e = scan[w[0] : w[1] + 1] | |
336 if e in ("\\$(", "\\${"): | |
337 # Suppress start of a parameter reference, drop the | |
338 # backslash. | |
339 parts.append(dump(e[1:])) | |
340 w = (w[0], w[1] + 1) | |
341 elif e[1] == "\\": | |
342 # Double backslash, becomes a single backslash | |
343 parts.append(dump("\\")) | |
344 else: | |
345 # Some other text, add it as-is (including the | |
346 # backslash) and resume scanning. | |
347 parts.append(dump(e[:2])) | |
348 else: | |
349 raise Exception("Unknown escaping behavior %s" % escaping_behavior) | |
350 scan = scan[w[1] :] | |
351 w = scanner(scan) | |
352 if convert_to_expression: | |
353 parts.append(f'"{scan}"') | |
354 parts.append(";}") | |
355 else: | |
356 parts.append(scan) | |
357 return "".join(parts) | |
358 | |
359 | |
360 def needs_parsing(snippet: Any) -> bool: | |
361 return isinstance(snippet, str) and ("$(" in snippet or "${" in snippet) | |
362 | |
363 | |
364 def do_eval( | |
365 ex: Optional[CWLOutputType], | |
366 jobinput: CWLObjectType, | |
367 requirements: List[CWLObjectType], | |
368 outdir: Optional[str], | |
369 tmpdir: Optional[str], | |
370 resources: Dict[str, Union[float, int, str]], | |
371 context: Optional[CWLOutputType] = None, | |
372 timeout: float = default_timeout, | |
373 force_docker_pull: bool = False, | |
374 debug: bool = False, | |
375 js_console: bool = False, | |
376 strip_whitespace: bool = True, | |
377 cwlVersion: str = "", | |
378 ) -> Optional[CWLOutputType]: | |
379 | |
380 runtime = cast(MutableMapping[str, Union[int, str, None]], copy.deepcopy(resources)) | |
381 runtime["tmpdir"] = docker_windows_path_adjust(tmpdir) if tmpdir else None | |
382 runtime["outdir"] = docker_windows_path_adjust(outdir) if outdir else None | |
383 | |
384 rootvars = cast( | |
385 CWLObjectType, | |
386 bytes2str_in_dicts({"inputs": jobinput, "self": context, "runtime": runtime}), | |
387 ) | |
388 | |
389 if isinstance(ex, str) and needs_parsing(ex): | |
390 fullJS = False | |
391 jslib = "" | |
392 for r in reversed(requirements): | |
393 if r["class"] == "InlineJavascriptRequirement": | |
394 fullJS = True | |
395 jslib = jshead(cast(List[str], r.get("expressionLib", [])), rootvars) | |
396 break | |
397 | |
398 try: | |
399 return interpolate( | |
400 ex, | |
401 rootvars, | |
402 timeout=timeout, | |
403 fullJS=fullJS, | |
404 jslib=jslib, | |
405 force_docker_pull=force_docker_pull, | |
406 debug=debug, | |
407 js_console=js_console, | |
408 strip_whitespace=strip_whitespace, | |
409 escaping_behavior=1 | |
410 if cwlVersion | |
411 in ( | |
412 "v1.0", | |
413 "v1.1.0-dev1", | |
414 "v1.1", | |
415 "v1.2.0-dev1", | |
416 "v1.2.0-dev2", | |
417 "v1.2.0-dev3", | |
418 ) | |
419 else 2, | |
420 ) | |
421 | |
422 except Exception as e: | |
423 _logger.exception(e) | |
424 raise WorkflowException("Expression evaluation error:\n%s" % str(e)) from e | |
425 else: | |
426 return ex |