Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/galaxy/tool_util/parser/xml.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4f3585e2f14b |
---|---|
1 import json | |
2 import logging | |
3 import re | |
4 import uuid | |
5 from math import isinf | |
6 | |
7 import packaging.version | |
8 | |
9 from galaxy.tool_util.deps import requirements | |
10 from galaxy.tool_util.parser.util import ( | |
11 DEFAULT_DELTA, | |
12 DEFAULT_DELTA_FRAC | |
13 ) | |
14 from galaxy.util import ( | |
15 string_as_bool, | |
16 xml_text, | |
17 xml_to_string | |
18 ) | |
19 from .interface import ( | |
20 InputSource, | |
21 PageSource, | |
22 PagesSource, | |
23 TestCollectionDef, | |
24 TestCollectionOutputDef, | |
25 ToolSource, | |
26 ) | |
27 from .output_actions import ToolOutputActionGroup | |
28 from .output_collection_def import dataset_collector_descriptions_from_elem | |
29 from .output_objects import ( | |
30 ToolExpressionOutput, | |
31 ToolOutput, | |
32 ToolOutputCollection, | |
33 ToolOutputCollectionStructure | |
34 ) | |
35 from .stdio import ( | |
36 aggressive_error_checks, | |
37 error_on_exit_code, | |
38 StdioErrorLevel, | |
39 ToolStdioExitCode, | |
40 ToolStdioRegex, | |
41 ) | |
42 | |
43 | |
44 log = logging.getLogger(__name__) | |
45 | |
46 | |
47 class XmlToolSource(ToolSource): | |
48 """ Responsible for parsing a tool from classic Galaxy representation. | |
49 """ | |
50 | |
51 def __init__(self, xml_tree, source_path=None, macro_paths=None): | |
52 self.xml_tree = xml_tree | |
53 self.root = xml_tree.getroot() | |
54 self._source_path = source_path | |
55 self._macro_paths = macro_paths or [] | |
56 self.legacy_defaults = self.parse_profile() == "16.01" | |
57 | |
58 def to_string(self): | |
59 return xml_to_string(self.root) | |
60 | |
61 def parse_version(self): | |
62 return self.root.get("version", None) | |
63 | |
64 def parse_id(self): | |
65 return self.root.get("id") | |
66 | |
67 def parse_tool_module(self): | |
68 root = self.root | |
69 if root.find("type") is not None: | |
70 type_elem = root.find("type") | |
71 module = type_elem.get('module', 'galaxy.tools') | |
72 cls = type_elem.get('class') | |
73 return module, cls | |
74 | |
75 return None | |
76 | |
77 def parse_action_module(self): | |
78 root = self.root | |
79 action_elem = root.find("action") | |
80 if action_elem is not None: | |
81 module = action_elem.get('module') | |
82 cls = action_elem.get('class') | |
83 return module, cls | |
84 else: | |
85 return None | |
86 | |
87 def parse_tool_type(self): | |
88 root = self.root | |
89 if root.get('tool_type', None) is not None: | |
90 return root.get('tool_type') | |
91 | |
92 def parse_name(self): | |
93 return self.root.get("name") | |
94 | |
95 def parse_edam_operations(self): | |
96 edam_ops = self.root.find("edam_operations") | |
97 if edam_ops is None: | |
98 return [] | |
99 return [edam_op.text for edam_op in edam_ops.findall("edam_operation")] | |
100 | |
101 def parse_edam_topics(self): | |
102 edam_topics = self.root.find("edam_topics") | |
103 if edam_topics is None: | |
104 return [] | |
105 return [edam_topic.text for edam_topic in edam_topics.findall("edam_topic")] | |
106 | |
107 def parse_xrefs(self): | |
108 xrefs = self.root.find("xrefs") | |
109 if xrefs is None: | |
110 return [] | |
111 return [dict(value=xref.text.strip(), reftype=xref.attrib['type']) for xref in xrefs.findall("xref") if xref.get("type")] | |
112 | |
113 def parse_description(self): | |
114 return xml_text(self.root, "description") | |
115 | |
116 def parse_is_multi_byte(self): | |
117 return self._get_attribute_as_bool("is_multi_byte", self.default_is_multi_byte) | |
118 | |
119 def parse_display_interface(self, default): | |
120 return self._get_attribute_as_bool("display_interface", default) | |
121 | |
122 def parse_require_login(self, default): | |
123 return self._get_attribute_as_bool("require_login", default) | |
124 | |
125 def parse_request_param_translation_elem(self): | |
126 return self.root.find("request_param_translation") | |
127 | |
128 def parse_command(self): | |
129 command_el = self._command_el | |
130 return ((command_el is not None) and command_el.text) or None | |
131 | |
132 def parse_expression(self): | |
133 """ Return string containing command to run. | |
134 """ | |
135 expression_el = self.root.find("expression") | |
136 if expression_el is not None: | |
137 expression_type = expression_el.get("type") | |
138 if expression_type != "ecma5.1": | |
139 raise Exception("Unknown expression type [%s] encountered" % expression_type) | |
140 return expression_el.text | |
141 return None | |
142 | |
143 def parse_environment_variables(self): | |
144 environment_variables_el = self.root.find("environment_variables") | |
145 if environment_variables_el is None: | |
146 return [] | |
147 | |
148 environment_variables = [] | |
149 for environment_variable_el in environment_variables_el.findall("environment_variable"): | |
150 template = environment_variable_el.text | |
151 inject = environment_variable_el.get("inject") | |
152 if inject: | |
153 assert not template, "Cannot specify inject and environment variable template." | |
154 assert inject in ["api_key"] | |
155 if template: | |
156 assert not inject, "Cannot specify inject and environment variable template." | |
157 definition = { | |
158 "name": environment_variable_el.get("name"), | |
159 "template": template, | |
160 "inject": inject, | |
161 "strip": string_as_bool(environment_variable_el.get("strip", False)), | |
162 } | |
163 environment_variables.append( | |
164 definition | |
165 ) | |
166 return environment_variables | |
167 | |
168 def parse_home_target(self): | |
169 target = "job_home" if self.parse_profile() >= "18.01" else "shared_home" | |
170 command_el = self._command_el | |
171 command_legacy = (command_el is not None) and command_el.get("use_shared_home", None) | |
172 if command_legacy is not None: | |
173 target = "shared_home" if string_as_bool(command_legacy) else "job_home" | |
174 return target | |
175 | |
176 def parse_tmp_target(self): | |
177 # Default to not touching TMPDIR et. al. but if job_tmp is set | |
178 # in job_conf then do. This is a very conservative approach that shouldn't | |
179 # break or modify any configurations by default. | |
180 return "job_tmp_if_explicit" | |
181 | |
182 def parse_interpreter(self): | |
183 interpreter = None | |
184 command_el = self._command_el | |
185 if command_el is not None: | |
186 interpreter = command_el.get("interpreter", None) | |
187 if interpreter and not self.legacy_defaults: | |
188 log.warning("Deprecated interpreter attribute on command element is now ignored.") | |
189 interpreter = None | |
190 return interpreter | |
191 | |
192 def parse_version_command(self): | |
193 version_cmd = self.root.find("version_command") | |
194 if version_cmd is not None: | |
195 return version_cmd.text | |
196 else: | |
197 return None | |
198 | |
199 def parse_version_command_interpreter(self): | |
200 if self.parse_version_command() is not None: | |
201 version_cmd = self.root.find("version_command") | |
202 version_cmd_interpreter = version_cmd.get("interpreter", None) | |
203 if version_cmd_interpreter: | |
204 return version_cmd_interpreter | |
205 return None | |
206 | |
207 def parse_parallelism(self): | |
208 parallelism = self.root.find("parallelism") | |
209 parallelism_info = None | |
210 if parallelism is not None and parallelism.get("method"): | |
211 return ParallelismInfo(parallelism) | |
212 return parallelism_info | |
213 | |
214 def parse_interactivetool(self): | |
215 interactivetool_el = self.root.find("entry_points") | |
216 rtt = [] | |
217 if interactivetool_el is None: | |
218 return rtt | |
219 for ep_el in interactivetool_el.findall("entry_point"): | |
220 port = ep_el.find("port") | |
221 assert port is not None, ValueError('A port is required for InteractiveTools') | |
222 port = port.text.strip() | |
223 url = ep_el.find("url") | |
224 if url is not None: | |
225 url = url.text.strip() | |
226 name = ep_el.get('name', None) | |
227 if name: | |
228 name = name.strip() | |
229 requires_domain = string_as_bool(ep_el.attrib.get("requires_domain", False)) | |
230 rtt.append(dict(port=port, url=url, name=name, requires_domain=requires_domain)) | |
231 return rtt | |
232 | |
233 def parse_hidden(self): | |
234 hidden = xml_text(self.root, "hidden") | |
235 if hidden: | |
236 hidden = string_as_bool(hidden) | |
237 return hidden | |
238 | |
239 def parse_redirect_url_params_elem(self): | |
240 return self.root.find("redirect_url_params") | |
241 | |
242 def parse_sanitize(self): | |
243 return self._get_option_value("sanitize", True) | |
244 | |
245 def parse_refresh(self): | |
246 return self._get_option_value("refresh", False) | |
247 | |
248 def _get_option_value(self, key, default): | |
249 root = self.root | |
250 for option_elem in root.findall("options"): | |
251 if key in option_elem.attrib: | |
252 return string_as_bool(option_elem.get(key)) | |
253 return default | |
254 | |
255 @property | |
256 def _command_el(self): | |
257 return self.root.find("command") | |
258 | |
259 def _get_attribute_as_bool(self, attribute, default, elem=None): | |
260 if elem is None: | |
261 elem = self.root | |
262 return string_as_bool(elem.get(attribute, default)) | |
263 | |
264 def parse_requirements_and_containers(self): | |
265 return requirements.parse_requirements_from_xml(self.root) | |
266 | |
267 def parse_input_pages(self): | |
268 return XmlPagesSource(self.root) | |
269 | |
270 def parse_provided_metadata_style(self): | |
271 style = None | |
272 out_elem = self.root.find("outputs") | |
273 if out_elem is not None and "provided_metadata_style" in out_elem.attrib: | |
274 style = out_elem.attrib["provided_metadata_style"] | |
275 | |
276 if style is None: | |
277 style = "legacy" if self.parse_profile() < "17.09" else "default" | |
278 | |
279 assert style in ["legacy", "default"] | |
280 return style | |
281 | |
282 def parse_provided_metadata_file(self): | |
283 provided_metadata_file = "galaxy.json" | |
284 out_elem = self.root.find("outputs") | |
285 if out_elem is not None and "provided_metadata_file" in out_elem.attrib: | |
286 provided_metadata_file = out_elem.attrib["provided_metadata_file"] | |
287 | |
288 return provided_metadata_file | |
289 | |
290 def parse_outputs(self, tool): | |
291 out_elem = self.root.find("outputs") | |
292 outputs = {} | |
293 output_collections = {} | |
294 if out_elem is None: | |
295 return outputs, output_collections | |
296 | |
297 data_dict = {} | |
298 | |
299 def _parse(data_elem, **kwds): | |
300 output_def = self._parse_output(data_elem, tool, **kwds) | |
301 data_dict[output_def.name] = output_def | |
302 return output_def | |
303 | |
304 for _ in out_elem.findall("data"): | |
305 _parse(_) | |
306 | |
307 def _parse_expression(output_elem, **kwds): | |
308 output_def = self._parse_expression_output(output_elem, tool, **kwds) | |
309 output_def.filters = output_elem.findall('filter') | |
310 data_dict[output_def.name] = output_def | |
311 return output_def | |
312 | |
313 def _parse_collection(collection_elem): | |
314 name = collection_elem.get("name") | |
315 label = xml_text(collection_elem, "label") | |
316 default_format = collection_elem.get("format", "data") | |
317 collection_type = collection_elem.get("type", None) | |
318 collection_type_source = collection_elem.get("type_source", None) | |
319 collection_type_from_rules = collection_elem.get("type_from_rules", None) | |
320 structured_like = collection_elem.get("structured_like", None) | |
321 inherit_format = False | |
322 inherit_metadata = False | |
323 if structured_like: | |
324 inherit_format = string_as_bool(collection_elem.get("inherit_format", None)) | |
325 inherit_metadata = string_as_bool(collection_elem.get("inherit_metadata", None)) | |
326 default_format_source = collection_elem.get("format_source", None) | |
327 default_metadata_source = collection_elem.get("metadata_source", "") | |
328 filters = collection_elem.findall('filter') | |
329 | |
330 dataset_collector_descriptions = None | |
331 if collection_elem.find("discover_datasets") is not None: | |
332 dataset_collector_descriptions = dataset_collector_descriptions_from_elem(collection_elem, legacy=False) | |
333 structure = ToolOutputCollectionStructure( | |
334 collection_type=collection_type, | |
335 collection_type_source=collection_type_source, | |
336 collection_type_from_rules=collection_type_from_rules, | |
337 structured_like=structured_like, | |
338 dataset_collector_descriptions=dataset_collector_descriptions, | |
339 ) | |
340 output_collection = ToolOutputCollection( | |
341 name, | |
342 structure, | |
343 label=label, | |
344 filters=filters, | |
345 default_format=default_format, | |
346 inherit_format=inherit_format, | |
347 inherit_metadata=inherit_metadata, | |
348 default_format_source=default_format_source, | |
349 default_metadata_source=default_metadata_source, | |
350 ) | |
351 outputs[output_collection.name] = output_collection | |
352 | |
353 for data_elem in collection_elem.findall("data"): | |
354 _parse( | |
355 data_elem, | |
356 default_format=default_format, | |
357 default_format_source=default_format_source, | |
358 default_metadata_source=default_metadata_source, | |
359 ) | |
360 | |
361 for data_elem in collection_elem.findall("data"): | |
362 output_name = data_elem.get("name") | |
363 data = data_dict[output_name] | |
364 assert data | |
365 del data_dict[output_name] | |
366 output_collection.outputs[output_name] = data | |
367 output_collections[name] = output_collection | |
368 | |
369 for out_child in out_elem: | |
370 if out_child.tag == "data": | |
371 _parse(out_child) | |
372 elif out_child.tag == "collection": | |
373 _parse_collection(out_child) | |
374 elif out_child.tag == "output": | |
375 output_type = out_child.get("type") | |
376 if output_type == "data": | |
377 _parse(out_child) | |
378 elif output_type == "collection": | |
379 out_child.attrib["type"] = out_child.get("collection_type") | |
380 out_child.attrib["type_source"] = out_child.get("collection_type_source") | |
381 _parse_collection(out_child) | |
382 else: | |
383 _parse_expression(out_child) | |
384 else: | |
385 log.warning("Unknown output tag encountered [%s]" % out_child.tag) | |
386 | |
387 for output_def in data_dict.values(): | |
388 outputs[output_def.name] = output_def | |
389 return outputs, output_collections | |
390 | |
391 def _parse_output( | |
392 self, | |
393 data_elem, | |
394 tool, | |
395 default_format="data", | |
396 default_format_source=None, | |
397 default_metadata_source="", | |
398 expression_type=None, | |
399 ): | |
400 from_expression = data_elem.get("from") | |
401 output = ToolOutput(data_elem.get("name"), from_expression=from_expression) | |
402 output_format = data_elem.get("format", default_format) | |
403 auto_format = string_as_bool(data_elem.get("auto_format", "false")) | |
404 if auto_format and output_format != "data": | |
405 raise ValueError("Setting format and auto_format is not supported at this time.") | |
406 elif auto_format: | |
407 output_format = "_sniff_" | |
408 output.format = output_format | |
409 output.change_format = data_elem.findall("change_format") | |
410 output.format_source = data_elem.get("format_source", default_format_source) | |
411 output.default_identifier_source = data_elem.get("default_identifier_source", 'None') | |
412 output.metadata_source = data_elem.get("metadata_source", default_metadata_source) | |
413 output.parent = data_elem.get("parent", None) | |
414 output.label = xml_text(data_elem, "label") | |
415 output.count = int(data_elem.get("count", 1)) | |
416 output.filters = data_elem.findall('filter') | |
417 output.tool = tool | |
418 output.from_work_dir = data_elem.get("from_work_dir", None) | |
419 output.hidden = string_as_bool(data_elem.get("hidden", "")) | |
420 output.actions = ToolOutputActionGroup(output, data_elem.find('actions')) | |
421 output.dataset_collector_descriptions = dataset_collector_descriptions_from_elem(data_elem, legacy=self.legacy_defaults) | |
422 return output | |
423 | |
424 def _parse_expression_output(self, output_elem, tool, **kwds): | |
425 output_type = output_elem.get("type") | |
426 from_expression = output_elem.get("from") | |
427 output = ToolExpressionOutput( | |
428 output_elem.get("name"), | |
429 output_type, | |
430 from_expression, | |
431 ) | |
432 output.path = output_elem.get("value") | |
433 output.label = xml_text(output_elem, "label") | |
434 | |
435 output.hidden = string_as_bool(output_elem.get("hidden", "")) | |
436 output.actions = ToolOutputActionGroup(output, output_elem.find('actions')) | |
437 output.dataset_collector_descriptions = [] | |
438 return output | |
439 | |
440 def parse_stdio(self): | |
441 """ | |
442 parse error handling from command and stdio tag | |
443 | |
444 returns list of exit codes, list of regexes | |
445 | |
446 - exit_codes contain all non-zero exit codes (:-1 and 1:) if | |
447 detect_errors is default (if not legacy), exit_code, or aggressive | |
448 - the oom_exit_code if given and detect_errors is exit_code | |
449 - exit codes and regexes from the stdio tag | |
450 these are prepended to the list, i.e. are evaluated prior to regexes | |
451 and exit codes derived from the properties of the command tag. | |
452 thus more specific regexes of the same or more severe error level | |
453 are triggered first. | |
454 | |
455 """ | |
456 | |
457 command_el = self._command_el | |
458 detect_errors = None | |
459 if command_el is not None: | |
460 detect_errors = command_el.get("detect_errors") | |
461 | |
462 if detect_errors and detect_errors != "default": | |
463 if detect_errors == "exit_code": | |
464 oom_exit_code = None | |
465 if command_el is not None: | |
466 oom_exit_code = command_el.get("oom_exit_code", None) | |
467 if oom_exit_code is not None: | |
468 int(oom_exit_code) | |
469 exit_codes, regexes = error_on_exit_code(out_of_memory_exit_code=oom_exit_code) | |
470 elif detect_errors == "aggressive": | |
471 exit_codes, regexes = aggressive_error_checks() | |
472 else: | |
473 raise ValueError("Unknown detect_errors value encountered [%s]" % detect_errors) | |
474 elif len(self.root.findall('stdio')) == 0 and not self.legacy_defaults: | |
475 exit_codes, regexes = error_on_exit_code() | |
476 else: | |
477 exit_codes = [] | |
478 regexes = [] | |
479 | |
480 if len(self.root.findall('stdio')) > 0: | |
481 parser = StdioParser(self.root) | |
482 exit_codes = parser.stdio_exit_codes + exit_codes | |
483 regexes = parser.stdio_regexes + regexes | |
484 | |
485 return exit_codes, regexes | |
486 | |
487 def parse_strict_shell(self): | |
488 command_el = self._command_el | |
489 if packaging.version.parse(self.parse_profile()) < packaging.version.parse('20.09'): | |
490 default = "False" | |
491 else: | |
492 default = "True" | |
493 if command_el is not None: | |
494 return string_as_bool(command_el.get("strict", default)) | |
495 else: | |
496 return string_as_bool(default) | |
497 | |
498 def parse_help(self): | |
499 help_elem = self.root.find('help') | |
500 return help_elem.text if help_elem is not None else None | |
501 | |
502 @property | |
503 def macro_paths(self): | |
504 return self._macro_paths | |
505 | |
506 @property | |
507 def source_path(self): | |
508 return self._source_path | |
509 | |
510 def parse_tests_to_dict(self): | |
511 tests_elem = self.root.find("tests") | |
512 tests = [] | |
513 rval = dict( | |
514 tests=tests | |
515 ) | |
516 | |
517 if tests_elem is not None: | |
518 for i, test_elem in enumerate(tests_elem.findall("test")): | |
519 profile = self.parse_profile() | |
520 tests.append(_test_elem_to_dict(test_elem, i, profile)) | |
521 | |
522 return rval | |
523 | |
524 def parse_profile(self): | |
525 # Pre-16.04 or default XML defaults | |
526 # - Use standard error for error detection. | |
527 # - Don't run shells with -e | |
528 # - Auto-check for implicit multiple outputs. | |
529 # - Auto-check for $param_file. | |
530 # - Enable buggy interpreter attribute. | |
531 return self.root.get("profile", "16.01") | |
532 | |
533 def parse_license(self): | |
534 return self.root.get("license") | |
535 | |
536 def parse_python_template_version(self): | |
537 python_template_version = self.root.get("python_template_version", None) | |
538 if python_template_version is not None: | |
539 python_template_version = packaging.version.parse(python_template_version) | |
540 return python_template_version | |
541 | |
542 def parse_creator(self): | |
543 creators_el = self.root.find("creator") | |
544 if creators_el is None: | |
545 return None | |
546 | |
547 creators = [] | |
548 for creator_el in creators_el: | |
549 creator_as_dict = {} | |
550 if creator_el.tag == "person": | |
551 clazz = "Person" | |
552 elif creator_el.tag == "organization": | |
553 clazz = "Organization" | |
554 else: | |
555 continue | |
556 creator_as_dict["class"] = clazz | |
557 creator_as_dict.update(creator_el.attrib) | |
558 creators.append(creator_as_dict) | |
559 return creators | |
560 | |
561 | |
562 def _test_elem_to_dict(test_elem, i, profile=None): | |
563 rval = dict( | |
564 outputs=__parse_output_elems(test_elem), | |
565 output_collections=__parse_output_collection_elems(test_elem, profile=profile), | |
566 inputs=__parse_input_elems(test_elem, i), | |
567 expect_num_outputs=test_elem.get("expect_num_outputs"), | |
568 command=__parse_assert_list_from_elem(test_elem.find("assert_command")), | |
569 command_version=__parse_assert_list_from_elem(test_elem.find("assert_command_version")), | |
570 stdout=__parse_assert_list_from_elem(test_elem.find("assert_stdout")), | |
571 stderr=__parse_assert_list_from_elem(test_elem.find("assert_stderr")), | |
572 expect_exit_code=test_elem.get("expect_exit_code"), | |
573 expect_failure=string_as_bool(test_elem.get("expect_failure", False)), | |
574 maxseconds=test_elem.get("maxseconds", None), | |
575 ) | |
576 _copy_to_dict_if_present(test_elem, rval, ["num_outputs"]) | |
577 return rval | |
578 | |
579 | |
580 def __parse_input_elems(test_elem, i): | |
581 __expand_input_elems(test_elem) | |
582 return __parse_inputs_elems(test_elem, i) | |
583 | |
584 | |
585 def __parse_output_elems(test_elem): | |
586 outputs = [] | |
587 for output_elem in test_elem.findall("output"): | |
588 name, file, attributes = __parse_output_elem(output_elem) | |
589 outputs.append({"name": name, "value": file, "attributes": attributes}) | |
590 return outputs | |
591 | |
592 | |
593 def __parse_output_elem(output_elem): | |
594 attrib = dict(output_elem.attrib) | |
595 name = attrib.pop('name', None) | |
596 if name is None: | |
597 raise Exception("Test output does not have a 'name'") | |
598 | |
599 file, attributes = __parse_test_attributes(output_elem, attrib, parse_discovered_datasets=True) | |
600 return name, file, attributes | |
601 | |
602 | |
603 def __parse_command_elem(test_elem): | |
604 assert_elem = test_elem.find("command") | |
605 return __parse_assert_list_from_elem(assert_elem) | |
606 | |
607 | |
608 def __parse_output_collection_elems(test_elem, profile=None): | |
609 output_collections = [] | |
610 for output_collection_elem in test_elem.findall("output_collection"): | |
611 output_collection_def = __parse_output_collection_elem(output_collection_elem, profile=profile) | |
612 output_collections.append(output_collection_def) | |
613 return output_collections | |
614 | |
615 | |
616 def __parse_output_collection_elem(output_collection_elem, profile=None): | |
617 attrib = dict(output_collection_elem.attrib) | |
618 name = attrib.pop('name', None) | |
619 if name is None: | |
620 raise Exception("Test output collection does not have a 'name'") | |
621 element_tests = __parse_element_tests(output_collection_elem, profile=profile) | |
622 return TestCollectionOutputDef(name, attrib, element_tests).to_dict() | |
623 | |
624 | |
625 def __parse_element_tests(parent_element, profile=None): | |
626 element_tests = {} | |
627 for idx, element in enumerate(parent_element.findall("element")): | |
628 element_attrib = dict(element.attrib) | |
629 identifier = element_attrib.pop('name', None) | |
630 if identifier is None: | |
631 raise Exception("Test primary dataset does not have a 'identifier'") | |
632 element_tests[identifier] = __parse_test_attributes(element, element_attrib, parse_elements=True, profile=profile) | |
633 if profile and profile >= "20.09": | |
634 element_tests[identifier][1]["expected_sort_order"] = idx | |
635 | |
636 return element_tests | |
637 | |
638 | |
639 def __parse_test_attributes(output_elem, attrib, parse_elements=False, parse_discovered_datasets=False, profile=None): | |
640 assert_list = __parse_assert_list(output_elem) | |
641 | |
642 # Allow either file or value to specify a target file to compare result with | |
643 # file was traditionally used by outputs and value by extra files. | |
644 file = attrib.pop('file', attrib.pop('value', None)) | |
645 | |
646 # File no longer required if an list of assertions was present. | |
647 attributes = {} | |
648 | |
649 if 'value_json' in attrib: | |
650 attributes['object'] = json.loads(attrib.pop('value_json')) | |
651 | |
652 # Method of comparison | |
653 attributes['compare'] = attrib.pop('compare', 'diff').lower() | |
654 # Number of lines to allow to vary in logs (for dates, etc) | |
655 attributes['lines_diff'] = int(attrib.pop('lines_diff', '0')) | |
656 # Allow a file size to vary if sim_size compare | |
657 attributes['delta'] = int(attrib.pop('delta', DEFAULT_DELTA)) | |
658 attributes['delta_frac'] = float(attrib['delta_frac']) if 'delta_frac' in attrib else DEFAULT_DELTA_FRAC | |
659 attributes['sort'] = string_as_bool(attrib.pop('sort', False)) | |
660 attributes['decompress'] = string_as_bool(attrib.pop('decompress', False)) | |
661 extra_files = [] | |
662 if 'ftype' in attrib: | |
663 attributes['ftype'] = attrib['ftype'] | |
664 for extra in output_elem.findall('extra_files'): | |
665 extra_files.append(__parse_extra_files_elem(extra)) | |
666 metadata = {} | |
667 for metadata_elem in output_elem.findall('metadata'): | |
668 metadata[metadata_elem.get('name')] = metadata_elem.get('value') | |
669 md5sum = attrib.get("md5", None) | |
670 checksum = attrib.get("checksum", None) | |
671 element_tests = {} | |
672 if parse_elements: | |
673 element_tests = __parse_element_tests(output_elem, profile=profile) | |
674 | |
675 primary_datasets = {} | |
676 if parse_discovered_datasets: | |
677 for primary_elem in (output_elem.findall("discovered_dataset") or []): | |
678 primary_attrib = dict(primary_elem.attrib) | |
679 designation = primary_attrib.pop('designation', None) | |
680 if designation is None: | |
681 raise Exception("Test primary dataset does not have a 'designation'") | |
682 primary_datasets[designation] = __parse_test_attributes(primary_elem, primary_attrib) | |
683 | |
684 has_checksum = md5sum or checksum | |
685 has_nested_tests = extra_files or element_tests or primary_datasets | |
686 has_object = 'object' in attributes | |
687 if not (assert_list or file or metadata or has_checksum or has_nested_tests or has_object): | |
688 raise Exception("Test output defines nothing to check (e.g. must have a 'file' check against, assertions to check, metadata or checksum tests, etc...)") | |
689 attributes['assert_list'] = assert_list | |
690 attributes['extra_files'] = extra_files | |
691 attributes['metadata'] = metadata | |
692 attributes['md5'] = md5sum | |
693 attributes['checksum'] = checksum | |
694 attributes['elements'] = element_tests | |
695 attributes['primary_datasets'] = primary_datasets | |
696 return file, attributes | |
697 | |
698 | |
699 def __parse_assert_list(output_elem): | |
700 assert_elem = output_elem.find("assert_contents") | |
701 return __parse_assert_list_from_elem(assert_elem) | |
702 | |
703 | |
704 def __parse_assert_list_from_elem(assert_elem): | |
705 assert_list = None | |
706 | |
707 def convert_elem(elem): | |
708 """ Converts and XML element to a dictionary format, used by assertion checking code. """ | |
709 tag = elem.tag | |
710 attributes = dict(elem.attrib) | |
711 converted_children = [] | |
712 for child_elem in elem: | |
713 converted_children.append(convert_elem(child_elem)) | |
714 return {"tag": tag, "attributes": attributes, "children": converted_children} | |
715 if assert_elem is not None: | |
716 assert_list = [] | |
717 for assert_child in list(assert_elem): | |
718 assert_list.append(convert_elem(assert_child)) | |
719 | |
720 return assert_list | |
721 | |
722 | |
723 def __parse_extra_files_elem(extra): | |
724 # File or directory, when directory, compare basename | |
725 # by basename | |
726 attrib = dict(extra.attrib) | |
727 extra_type = attrib.pop('type', 'file') | |
728 extra_name = attrib.pop('name', None) | |
729 assert extra_type == 'directory' or extra_name is not None, \ | |
730 'extra_files type (%s) requires a name attribute' % extra_type | |
731 extra_value, extra_attributes = __parse_test_attributes(extra, attrib) | |
732 return { | |
733 "value": extra_value, | |
734 "name": extra_name, | |
735 "type": extra_type, | |
736 "attributes": extra_attributes | |
737 } | |
738 | |
739 | |
740 def __expand_input_elems(root_elem, prefix=""): | |
741 __append_prefix_to_params(root_elem, prefix) | |
742 | |
743 repeat_elems = root_elem.findall('repeat') | |
744 indices = {} | |
745 for repeat_elem in repeat_elems: | |
746 name = repeat_elem.get("name") | |
747 if name not in indices: | |
748 indices[name] = 0 | |
749 index = 0 | |
750 else: | |
751 index = indices[name] + 1 | |
752 indices[name] = index | |
753 | |
754 new_prefix = __prefix_join(prefix, name, index=index) | |
755 __expand_input_elems(repeat_elem, new_prefix) | |
756 __pull_up_params(root_elem, repeat_elem) | |
757 | |
758 cond_elems = root_elem.findall('conditional') | |
759 for cond_elem in cond_elems: | |
760 new_prefix = __prefix_join(prefix, cond_elem.get("name")) | |
761 __expand_input_elems(cond_elem, new_prefix) | |
762 __pull_up_params(root_elem, cond_elem) | |
763 | |
764 section_elems = root_elem.findall('section') | |
765 for section_elem in section_elems: | |
766 new_prefix = __prefix_join(prefix, section_elem.get("name")) | |
767 __expand_input_elems(section_elem, new_prefix) | |
768 __pull_up_params(root_elem, section_elem) | |
769 | |
770 | |
771 def __append_prefix_to_params(elem, prefix): | |
772 for param_elem in elem.findall('param'): | |
773 param_elem.set("name", __prefix_join(prefix, param_elem.get("name"))) | |
774 | |
775 | |
776 def __pull_up_params(parent_elem, child_elem): | |
777 for param_elem in child_elem.findall('param'): | |
778 parent_elem.append(param_elem) | |
779 | |
780 | |
781 def __prefix_join(prefix, name, index=None): | |
782 name = name if index is None else "%s_%d" % (name, index) | |
783 return name if not prefix else f"{prefix}|{name}" | |
784 | |
785 | |
786 def _copy_to_dict_if_present(elem, rval, attributes): | |
787 for attribute in attributes: | |
788 if attribute in elem.attrib: | |
789 rval[attribute] = elem.get(attribute) | |
790 return rval | |
791 | |
792 | |
793 def __parse_inputs_elems(test_elem, i): | |
794 raw_inputs = [] | |
795 for param_elem in test_elem.findall("param"): | |
796 raw_inputs.append(__parse_param_elem(param_elem, i)) | |
797 | |
798 return raw_inputs | |
799 | |
800 | |
801 def __parse_param_elem(param_elem, i=0): | |
802 attrib = dict(param_elem.attrib) | |
803 if 'values' in attrib: | |
804 value = attrib['values'].split(',') | |
805 elif 'value' in attrib: | |
806 value = attrib['value'] | |
807 elif 'value_json' in attrib: | |
808 value = json.loads(attrib['value_json']) | |
809 else: | |
810 value = None | |
811 | |
812 children_elem = param_elem | |
813 if children_elem is not None: | |
814 # At this time, we can assume having children only | |
815 # occurs on DataToolParameter test items but this could | |
816 # change and would cause the below parsing to change | |
817 # based upon differences in children items | |
818 attrib['metadata'] = {} | |
819 attrib['composite_data'] = [] | |
820 attrib['edit_attributes'] = [] | |
821 # Composite datasets need to be renamed uniquely | |
822 composite_data_name = None | |
823 for child in children_elem: | |
824 if child.tag == 'composite_data': | |
825 file_name = child.get("value") | |
826 attrib['composite_data'].append(file_name) | |
827 if composite_data_name is None: | |
828 # Generate a unique name; each test uses a | |
829 # fresh history. | |
830 composite_data_name = '_COMPOSITE_RENAMED_t%d_%s' \ | |
831 % (i, uuid.uuid1().hex) | |
832 elif child.tag == 'metadata': | |
833 attrib['metadata'][child.get("name")] = child.get("value") | |
834 elif child.tag == 'edit_attributes': | |
835 attrib['edit_attributes'].append(child) | |
836 elif child.tag == 'collection': | |
837 attrib['collection'] = TestCollectionDef.from_xml(child, __parse_param_elem) | |
838 if composite_data_name: | |
839 # Composite datasets need implicit renaming; | |
840 # inserted at front of list so explicit declarations | |
841 # take precedence | |
842 attrib['edit_attributes'].insert(0, {'type': 'name', 'value': composite_data_name}) | |
843 name = attrib.pop('name') | |
844 return { | |
845 "name": name, | |
846 "value": value, | |
847 "attributes": attrib | |
848 } | |
849 | |
850 | |
851 class StdioParser: | |
852 | |
853 def __init__(self, root): | |
854 try: | |
855 self.stdio_exit_codes = list() | |
856 self.stdio_regexes = list() | |
857 | |
858 # We should have a single <stdio> element, but handle the case for | |
859 # multiples. | |
860 # For every stdio element, add all of the exit_code and regex | |
861 # subelements that we find: | |
862 for stdio_elem in (root.findall('stdio')): | |
863 self.parse_stdio_exit_codes(stdio_elem) | |
864 self.parse_stdio_regexes(stdio_elem) | |
865 except Exception: | |
866 log.exception("Exception in parse_stdio!") | |
867 | |
868 def parse_stdio_exit_codes(self, stdio_elem): | |
869 """ | |
870 Parse the tool's <stdio> element's <exit_code> subelements. | |
871 This will add all of those elements, if any, to self.stdio_exit_codes. | |
872 """ | |
873 try: | |
874 # Look for all <exit_code> elements. Each exit_code element must | |
875 # have a range/value. | |
876 # Exit-code ranges have precedence over a single exit code. | |
877 # So if there are value and range attributes, we use the range | |
878 # attribute. If there is neither a range nor a value, then print | |
879 # a warning and skip to the next. | |
880 for exit_code_elem in (stdio_elem.findall("exit_code")): | |
881 exit_code = ToolStdioExitCode() | |
882 # Each exit code has an optional description that can be | |
883 # part of the "desc" or "description" attributes: | |
884 exit_code.desc = exit_code_elem.get("desc") | |
885 if exit_code.desc is None: | |
886 exit_code.desc = exit_code_elem.get("description") | |
887 # Parse the error level: | |
888 exit_code.error_level = ( | |
889 self.parse_error_level(exit_code_elem.get("level"))) | |
890 code_range = exit_code_elem.get("range", "") | |
891 if code_range is None: | |
892 code_range = exit_code_elem.get("value", "") | |
893 if code_range is None: | |
894 log.warning("Tool stdio exit codes must have a range or value") | |
895 continue | |
896 # Parse the range. We look for: | |
897 # :Y | |
898 # X: | |
899 # X:Y - Split on the colon. We do not allow a colon | |
900 # without a beginning or end, though we could. | |
901 # Also note that whitespace is eliminated. | |
902 # TODO: Turn this into a single match - it should be | |
903 # more efficient. | |
904 code_range = re.sub(r"\s", "", code_range) | |
905 code_ranges = re.split(r":", code_range) | |
906 if (len(code_ranges) == 2): | |
907 if (code_ranges[0] is None or '' == code_ranges[0]): | |
908 exit_code.range_start = float("-inf") | |
909 else: | |
910 exit_code.range_start = int(code_ranges[0]) | |
911 if (code_ranges[1] is None or '' == code_ranges[1]): | |
912 exit_code.range_end = float("inf") | |
913 else: | |
914 exit_code.range_end = int(code_ranges[1]) | |
915 # If we got more than one colon, then ignore the exit code. | |
916 elif (len(code_ranges) > 2): | |
917 log.warning("Invalid tool exit_code range %s - ignored" | |
918 % code_range) | |
919 continue | |
920 # Else we have a singular value. If it's not an integer, then | |
921 # we'll just write a log message and skip this exit_code. | |
922 else: | |
923 try: | |
924 exit_code.range_start = int(code_range) | |
925 except Exception: | |
926 log.error(code_range) | |
927 log.warning("Invalid range start for tool's exit_code %s: exit_code ignored" % code_range) | |
928 continue | |
929 exit_code.range_end = exit_code.range_start | |
930 # TODO: Check if we got ">", ">=", "<", or "<=": | |
931 # Check that the range, regardless of how we got it, | |
932 # isn't bogus. If we have two infinite values, then | |
933 # the start must be -inf and the end must be +inf. | |
934 # So at least warn about this situation: | |
935 if isinf(exit_code.range_start) and isinf(exit_code.range_end): | |
936 log.warning("Tool exit_code range %s will match on all exit codes" % code_range) | |
937 self.stdio_exit_codes.append(exit_code) | |
938 except Exception: | |
939 log.exception("Exception in parse_stdio_exit_codes!") | |
940 | |
941 def parse_stdio_regexes(self, stdio_elem): | |
942 """ | |
943 Look in the tool's <stdio> elem for all <regex> subelements | |
944 that define how to look for warnings and fatal errors in | |
945 stdout and stderr. This will add all such regex elements | |
946 to the Tols's stdio_regexes list. | |
947 """ | |
948 try: | |
949 # Look for every <regex> subelement. The regular expression | |
950 # will have "match" and "source" (or "src") attributes. | |
951 for regex_elem in (stdio_elem.findall("regex")): | |
952 # TODO: Fill in ToolStdioRegex | |
953 regex = ToolStdioRegex() | |
954 # Each regex has an optional description that can be | |
955 # part of the "desc" or "description" attributes: | |
956 regex.desc = regex_elem.get("desc") | |
957 if regex.desc is None: | |
958 regex.desc = regex_elem.get("description") | |
959 # Parse the error level | |
960 regex.error_level = ( | |
961 self.parse_error_level(regex_elem.get("level"))) | |
962 regex.match = regex_elem.get("match", "") | |
963 if regex.match is None: | |
964 # TODO: Convert the offending XML element to a string | |
965 log.warning("Ignoring tool's stdio regex element %s - " | |
966 "the 'match' attribute must exist") | |
967 continue | |
968 # Parse the output sources. We look for the "src", "source", | |
969 # and "sources" attributes, in that order. If there is no | |
970 # such source, then the source defaults to stderr & stdout. | |
971 # Look for a comma and then look for "err", "error", "out", | |
972 # and "output": | |
973 output_srcs = regex_elem.get("src") | |
974 if output_srcs is None: | |
975 output_srcs = regex_elem.get("source") | |
976 if output_srcs is None: | |
977 output_srcs = regex_elem.get("sources") | |
978 if output_srcs is None: | |
979 output_srcs = "output,error" | |
980 output_srcs = re.sub(r"\s", "", output_srcs) | |
981 src_list = re.split(r",", output_srcs) | |
982 # Just put together anything to do with "out", including | |
983 # "stdout", "output", etc. Repeat for "stderr", "error", | |
984 # and anything to do with "err". If neither stdout nor | |
985 # stderr were specified, then raise a warning and scan both. | |
986 for src in src_list: | |
987 if re.search("both", src, re.IGNORECASE): | |
988 regex.stdout_match = True | |
989 regex.stderr_match = True | |
990 if re.search("out", src, re.IGNORECASE): | |
991 regex.stdout_match = True | |
992 if re.search("err", src, re.IGNORECASE): | |
993 regex.stderr_match = True | |
994 if (not regex.stdout_match and not regex.stderr_match): | |
995 log.warning("Tool id %s: unable to determine if tool " | |
996 "stream source scanning is output, error, " | |
997 "or both. Defaulting to use both." % self.id) | |
998 regex.stdout_match = True | |
999 regex.stderr_match = True | |
1000 self.stdio_regexes.append(regex) | |
1001 except Exception: | |
1002 log.exception("Exception in parse_stdio_exit_codes!") | |
1003 | |
1004 # TODO: This method doesn't have to be part of the Tool class. | |
1005 def parse_error_level(self, err_level): | |
1006 """ | |
1007 Parses error level and returns error level enumeration. If | |
1008 unparsable, returns 'fatal' | |
1009 """ | |
1010 return_level = StdioErrorLevel.FATAL | |
1011 try: | |
1012 if err_level: | |
1013 if (re.search("log", err_level, re.IGNORECASE)): | |
1014 return_level = StdioErrorLevel.LOG | |
1015 elif (re.search("qc", err_level, re.IGNORECASE)): | |
1016 return_level = StdioErrorLevel.QC | |
1017 elif (re.search("warning", err_level, re.IGNORECASE)): | |
1018 return_level = StdioErrorLevel.WARNING | |
1019 elif (re.search("fatal_oom", err_level, re.IGNORECASE)): | |
1020 return_level = StdioErrorLevel.FATAL_OOM | |
1021 elif (re.search("fatal", err_level, re.IGNORECASE)): | |
1022 return_level = StdioErrorLevel.FATAL | |
1023 else: | |
1024 log.debug("Tool %s: error level %s did not match log/warning/fatal" % | |
1025 (self.id, err_level)) | |
1026 except Exception: | |
1027 log.exception("Exception in parse_error_level") | |
1028 return return_level | |
1029 | |
1030 | |
1031 class XmlPagesSource(PagesSource): | |
1032 | |
1033 def __init__(self, root): | |
1034 self.input_elem = root.find("inputs") | |
1035 page_sources = [] | |
1036 if self.input_elem is not None: | |
1037 pages_elem = self.input_elem.findall("page") | |
1038 for page in (pages_elem or [self.input_elem]): | |
1039 page_sources.append(XmlPageSource(page)) | |
1040 super().__init__(page_sources) | |
1041 | |
1042 @property | |
1043 def inputs_defined(self): | |
1044 return self.input_elem is not None | |
1045 | |
1046 | |
1047 class XmlPageSource(PageSource): | |
1048 | |
1049 def __init__(self, parent_elem): | |
1050 self.parent_elem = parent_elem | |
1051 | |
1052 def parse_display(self): | |
1053 display_elem = self.parent_elem.find("display") | |
1054 if display_elem is not None: | |
1055 display = xml_to_string(display_elem) | |
1056 else: | |
1057 display = None | |
1058 return display | |
1059 | |
1060 def parse_input_sources(self): | |
1061 return map(XmlInputSource, self.parent_elem) | |
1062 | |
1063 | |
1064 class XmlInputSource(InputSource): | |
1065 | |
1066 def __init__(self, input_elem): | |
1067 self.input_elem = input_elem | |
1068 self.input_type = self.input_elem.tag | |
1069 | |
1070 def parse_input_type(self): | |
1071 return self.input_type | |
1072 | |
1073 def elem(self): | |
1074 return self.input_elem | |
1075 | |
1076 def get(self, key, value=None): | |
1077 return self.input_elem.get(key, value) | |
1078 | |
1079 def get_bool(self, key, default): | |
1080 return string_as_bool(self.get(key, default)) | |
1081 | |
1082 def parse_label(self): | |
1083 return xml_text(self.input_elem, "label") | |
1084 | |
1085 def parse_help(self): | |
1086 return xml_text(self.input_elem, "help") | |
1087 | |
1088 def parse_sanitizer_elem(self): | |
1089 return self.input_elem.find("sanitizer") | |
1090 | |
1091 def parse_validator_elems(self): | |
1092 return self.input_elem.findall("validator") | |
1093 | |
1094 def parse_dynamic_options_elem(self): | |
1095 """ Return a galaxy.tools.parameters.dynamic_options.DynamicOptions | |
1096 if appropriate. | |
1097 """ | |
1098 options_elem = self.input_elem.find('options') | |
1099 return options_elem | |
1100 | |
1101 def parse_static_options(self): | |
1102 static_options = list() | |
1103 elem = self.input_elem | |
1104 for option in elem.findall("option"): | |
1105 value = option.get("value") | |
1106 selected = string_as_bool(option.get("selected", False)) | |
1107 static_options.append((option.text or value, value, selected)) | |
1108 return static_options | |
1109 | |
1110 def parse_optional(self, default=None): | |
1111 """ Return boolean indicating whether parameter is optional. """ | |
1112 elem = self.input_elem | |
1113 if self.get('type') == "data_column": | |
1114 # Allow specifing force_select for backward compat., but probably | |
1115 # should use optional going forward for consistency with other | |
1116 # parameters. | |
1117 if "force_select" in elem.attrib: | |
1118 force_select = string_as_bool(elem.get("force_select")) | |
1119 else: | |
1120 force_select = not string_as_bool(elem.get("optional", False)) | |
1121 return not force_select | |
1122 | |
1123 if default is None: | |
1124 default = self.default_optional | |
1125 return self.get_bool("optional", default) | |
1126 | |
1127 def parse_conversion_tuples(self): | |
1128 elem = self.input_elem | |
1129 conversions = [] | |
1130 for conv_elem in elem.findall("conversion"): | |
1131 name = conv_elem.get("name") # name for commandline substitution | |
1132 conv_extensions = conv_elem.get("type") # target datatype extension | |
1133 conversions.append((name, conv_extensions)) | |
1134 return conversions | |
1135 | |
1136 def parse_nested_inputs_source(self): | |
1137 elem = self.input_elem | |
1138 return XmlPageSource(elem) | |
1139 | |
1140 def parse_test_input_source(self): | |
1141 elem = self.input_elem | |
1142 input_elem = elem.find("param") | |
1143 assert input_elem is not None, "<conditional> must have a child <param>" | |
1144 return XmlInputSource(input_elem) | |
1145 | |
1146 def parse_when_input_sources(self): | |
1147 elem = self.input_elem | |
1148 | |
1149 sources = [] | |
1150 for case_elem in elem.findall("when"): | |
1151 value = case_elem.get("value") | |
1152 case_page_source = XmlPageSource(case_elem) | |
1153 sources.append((value, case_page_source)) | |
1154 return sources | |
1155 | |
1156 | |
1157 class ParallelismInfo: | |
1158 """ | |
1159 Stores the information (if any) for running multiple instances of the tool in parallel | |
1160 on the same set of inputs. | |
1161 """ | |
1162 | |
1163 def __init__(self, tag): | |
1164 self.method = tag.get('method') | |
1165 if isinstance(tag, dict): | |
1166 items = tag.items() | |
1167 else: | |
1168 items = tag.attrib.items() | |
1169 self.attributes = dict([item for item in items if item[0] != 'method']) | |
1170 if len(self.attributes) == 0: | |
1171 # legacy basic mode - provide compatible defaults | |
1172 self.attributes['split_size'] = 20 | |
1173 self.attributes['split_mode'] = 'number_of_parts' |