comparison env/lib/python3.9/site-packages/galaxy/tool_util/parser/xml.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 import json
2 import logging
3 import re
4 import uuid
5 from math import isinf
6
7 import packaging.version
8
9 from galaxy.tool_util.deps import requirements
10 from galaxy.tool_util.parser.util import (
11 DEFAULT_DELTA,
12 DEFAULT_DELTA_FRAC
13 )
14 from galaxy.util import (
15 string_as_bool,
16 xml_text,
17 xml_to_string
18 )
19 from .interface import (
20 InputSource,
21 PageSource,
22 PagesSource,
23 TestCollectionDef,
24 TestCollectionOutputDef,
25 ToolSource,
26 )
27 from .output_actions import ToolOutputActionGroup
28 from .output_collection_def import dataset_collector_descriptions_from_elem
29 from .output_objects import (
30 ToolExpressionOutput,
31 ToolOutput,
32 ToolOutputCollection,
33 ToolOutputCollectionStructure
34 )
35 from .stdio import (
36 aggressive_error_checks,
37 error_on_exit_code,
38 StdioErrorLevel,
39 ToolStdioExitCode,
40 ToolStdioRegex,
41 )
42
43
44 log = logging.getLogger(__name__)
45
46
47 class XmlToolSource(ToolSource):
48 """ Responsible for parsing a tool from classic Galaxy representation.
49 """
50
51 def __init__(self, xml_tree, source_path=None, macro_paths=None):
52 self.xml_tree = xml_tree
53 self.root = xml_tree.getroot()
54 self._source_path = source_path
55 self._macro_paths = macro_paths or []
56 self.legacy_defaults = self.parse_profile() == "16.01"
57
58 def to_string(self):
59 return xml_to_string(self.root)
60
61 def parse_version(self):
62 return self.root.get("version", None)
63
64 def parse_id(self):
65 return self.root.get("id")
66
67 def parse_tool_module(self):
68 root = self.root
69 if root.find("type") is not None:
70 type_elem = root.find("type")
71 module = type_elem.get('module', 'galaxy.tools')
72 cls = type_elem.get('class')
73 return module, cls
74
75 return None
76
77 def parse_action_module(self):
78 root = self.root
79 action_elem = root.find("action")
80 if action_elem is not None:
81 module = action_elem.get('module')
82 cls = action_elem.get('class')
83 return module, cls
84 else:
85 return None
86
87 def parse_tool_type(self):
88 root = self.root
89 if root.get('tool_type', None) is not None:
90 return root.get('tool_type')
91
92 def parse_name(self):
93 return self.root.get("name")
94
95 def parse_edam_operations(self):
96 edam_ops = self.root.find("edam_operations")
97 if edam_ops is None:
98 return []
99 return [edam_op.text for edam_op in edam_ops.findall("edam_operation")]
100
101 def parse_edam_topics(self):
102 edam_topics = self.root.find("edam_topics")
103 if edam_topics is None:
104 return []
105 return [edam_topic.text for edam_topic in edam_topics.findall("edam_topic")]
106
107 def parse_xrefs(self):
108 xrefs = self.root.find("xrefs")
109 if xrefs is None:
110 return []
111 return [dict(value=xref.text.strip(), reftype=xref.attrib['type']) for xref in xrefs.findall("xref") if xref.get("type")]
112
113 def parse_description(self):
114 return xml_text(self.root, "description")
115
116 def parse_is_multi_byte(self):
117 return self._get_attribute_as_bool("is_multi_byte", self.default_is_multi_byte)
118
119 def parse_display_interface(self, default):
120 return self._get_attribute_as_bool("display_interface", default)
121
122 def parse_require_login(self, default):
123 return self._get_attribute_as_bool("require_login", default)
124
125 def parse_request_param_translation_elem(self):
126 return self.root.find("request_param_translation")
127
128 def parse_command(self):
129 command_el = self._command_el
130 return ((command_el is not None) and command_el.text) or None
131
132 def parse_expression(self):
133 """ Return string containing command to run.
134 """
135 expression_el = self.root.find("expression")
136 if expression_el is not None:
137 expression_type = expression_el.get("type")
138 if expression_type != "ecma5.1":
139 raise Exception("Unknown expression type [%s] encountered" % expression_type)
140 return expression_el.text
141 return None
142
143 def parse_environment_variables(self):
144 environment_variables_el = self.root.find("environment_variables")
145 if environment_variables_el is None:
146 return []
147
148 environment_variables = []
149 for environment_variable_el in environment_variables_el.findall("environment_variable"):
150 template = environment_variable_el.text
151 inject = environment_variable_el.get("inject")
152 if inject:
153 assert not template, "Cannot specify inject and environment variable template."
154 assert inject in ["api_key"]
155 if template:
156 assert not inject, "Cannot specify inject and environment variable template."
157 definition = {
158 "name": environment_variable_el.get("name"),
159 "template": template,
160 "inject": inject,
161 "strip": string_as_bool(environment_variable_el.get("strip", False)),
162 }
163 environment_variables.append(
164 definition
165 )
166 return environment_variables
167
168 def parse_home_target(self):
169 target = "job_home" if self.parse_profile() >= "18.01" else "shared_home"
170 command_el = self._command_el
171 command_legacy = (command_el is not None) and command_el.get("use_shared_home", None)
172 if command_legacy is not None:
173 target = "shared_home" if string_as_bool(command_legacy) else "job_home"
174 return target
175
176 def parse_tmp_target(self):
177 # Default to not touching TMPDIR et. al. but if job_tmp is set
178 # in job_conf then do. This is a very conservative approach that shouldn't
179 # break or modify any configurations by default.
180 return "job_tmp_if_explicit"
181
182 def parse_interpreter(self):
183 interpreter = None
184 command_el = self._command_el
185 if command_el is not None:
186 interpreter = command_el.get("interpreter", None)
187 if interpreter and not self.legacy_defaults:
188 log.warning("Deprecated interpreter attribute on command element is now ignored.")
189 interpreter = None
190 return interpreter
191
192 def parse_version_command(self):
193 version_cmd = self.root.find("version_command")
194 if version_cmd is not None:
195 return version_cmd.text
196 else:
197 return None
198
199 def parse_version_command_interpreter(self):
200 if self.parse_version_command() is not None:
201 version_cmd = self.root.find("version_command")
202 version_cmd_interpreter = version_cmd.get("interpreter", None)
203 if version_cmd_interpreter:
204 return version_cmd_interpreter
205 return None
206
207 def parse_parallelism(self):
208 parallelism = self.root.find("parallelism")
209 parallelism_info = None
210 if parallelism is not None and parallelism.get("method"):
211 return ParallelismInfo(parallelism)
212 return parallelism_info
213
214 def parse_interactivetool(self):
215 interactivetool_el = self.root.find("entry_points")
216 rtt = []
217 if interactivetool_el is None:
218 return rtt
219 for ep_el in interactivetool_el.findall("entry_point"):
220 port = ep_el.find("port")
221 assert port is not None, ValueError('A port is required for InteractiveTools')
222 port = port.text.strip()
223 url = ep_el.find("url")
224 if url is not None:
225 url = url.text.strip()
226 name = ep_el.get('name', None)
227 if name:
228 name = name.strip()
229 requires_domain = string_as_bool(ep_el.attrib.get("requires_domain", False))
230 rtt.append(dict(port=port, url=url, name=name, requires_domain=requires_domain))
231 return rtt
232
233 def parse_hidden(self):
234 hidden = xml_text(self.root, "hidden")
235 if hidden:
236 hidden = string_as_bool(hidden)
237 return hidden
238
239 def parse_redirect_url_params_elem(self):
240 return self.root.find("redirect_url_params")
241
242 def parse_sanitize(self):
243 return self._get_option_value("sanitize", True)
244
245 def parse_refresh(self):
246 return self._get_option_value("refresh", False)
247
248 def _get_option_value(self, key, default):
249 root = self.root
250 for option_elem in root.findall("options"):
251 if key in option_elem.attrib:
252 return string_as_bool(option_elem.get(key))
253 return default
254
255 @property
256 def _command_el(self):
257 return self.root.find("command")
258
259 def _get_attribute_as_bool(self, attribute, default, elem=None):
260 if elem is None:
261 elem = self.root
262 return string_as_bool(elem.get(attribute, default))
263
264 def parse_requirements_and_containers(self):
265 return requirements.parse_requirements_from_xml(self.root)
266
267 def parse_input_pages(self):
268 return XmlPagesSource(self.root)
269
270 def parse_provided_metadata_style(self):
271 style = None
272 out_elem = self.root.find("outputs")
273 if out_elem is not None and "provided_metadata_style" in out_elem.attrib:
274 style = out_elem.attrib["provided_metadata_style"]
275
276 if style is None:
277 style = "legacy" if self.parse_profile() < "17.09" else "default"
278
279 assert style in ["legacy", "default"]
280 return style
281
282 def parse_provided_metadata_file(self):
283 provided_metadata_file = "galaxy.json"
284 out_elem = self.root.find("outputs")
285 if out_elem is not None and "provided_metadata_file" in out_elem.attrib:
286 provided_metadata_file = out_elem.attrib["provided_metadata_file"]
287
288 return provided_metadata_file
289
290 def parse_outputs(self, tool):
291 out_elem = self.root.find("outputs")
292 outputs = {}
293 output_collections = {}
294 if out_elem is None:
295 return outputs, output_collections
296
297 data_dict = {}
298
299 def _parse(data_elem, **kwds):
300 output_def = self._parse_output(data_elem, tool, **kwds)
301 data_dict[output_def.name] = output_def
302 return output_def
303
304 for _ in out_elem.findall("data"):
305 _parse(_)
306
307 def _parse_expression(output_elem, **kwds):
308 output_def = self._parse_expression_output(output_elem, tool, **kwds)
309 output_def.filters = output_elem.findall('filter')
310 data_dict[output_def.name] = output_def
311 return output_def
312
313 def _parse_collection(collection_elem):
314 name = collection_elem.get("name")
315 label = xml_text(collection_elem, "label")
316 default_format = collection_elem.get("format", "data")
317 collection_type = collection_elem.get("type", None)
318 collection_type_source = collection_elem.get("type_source", None)
319 collection_type_from_rules = collection_elem.get("type_from_rules", None)
320 structured_like = collection_elem.get("structured_like", None)
321 inherit_format = False
322 inherit_metadata = False
323 if structured_like:
324 inherit_format = string_as_bool(collection_elem.get("inherit_format", None))
325 inherit_metadata = string_as_bool(collection_elem.get("inherit_metadata", None))
326 default_format_source = collection_elem.get("format_source", None)
327 default_metadata_source = collection_elem.get("metadata_source", "")
328 filters = collection_elem.findall('filter')
329
330 dataset_collector_descriptions = None
331 if collection_elem.find("discover_datasets") is not None:
332 dataset_collector_descriptions = dataset_collector_descriptions_from_elem(collection_elem, legacy=False)
333 structure = ToolOutputCollectionStructure(
334 collection_type=collection_type,
335 collection_type_source=collection_type_source,
336 collection_type_from_rules=collection_type_from_rules,
337 structured_like=structured_like,
338 dataset_collector_descriptions=dataset_collector_descriptions,
339 )
340 output_collection = ToolOutputCollection(
341 name,
342 structure,
343 label=label,
344 filters=filters,
345 default_format=default_format,
346 inherit_format=inherit_format,
347 inherit_metadata=inherit_metadata,
348 default_format_source=default_format_source,
349 default_metadata_source=default_metadata_source,
350 )
351 outputs[output_collection.name] = output_collection
352
353 for data_elem in collection_elem.findall("data"):
354 _parse(
355 data_elem,
356 default_format=default_format,
357 default_format_source=default_format_source,
358 default_metadata_source=default_metadata_source,
359 )
360
361 for data_elem in collection_elem.findall("data"):
362 output_name = data_elem.get("name")
363 data = data_dict[output_name]
364 assert data
365 del data_dict[output_name]
366 output_collection.outputs[output_name] = data
367 output_collections[name] = output_collection
368
369 for out_child in out_elem:
370 if out_child.tag == "data":
371 _parse(out_child)
372 elif out_child.tag == "collection":
373 _parse_collection(out_child)
374 elif out_child.tag == "output":
375 output_type = out_child.get("type")
376 if output_type == "data":
377 _parse(out_child)
378 elif output_type == "collection":
379 out_child.attrib["type"] = out_child.get("collection_type")
380 out_child.attrib["type_source"] = out_child.get("collection_type_source")
381 _parse_collection(out_child)
382 else:
383 _parse_expression(out_child)
384 else:
385 log.warning("Unknown output tag encountered [%s]" % out_child.tag)
386
387 for output_def in data_dict.values():
388 outputs[output_def.name] = output_def
389 return outputs, output_collections
390
391 def _parse_output(
392 self,
393 data_elem,
394 tool,
395 default_format="data",
396 default_format_source=None,
397 default_metadata_source="",
398 expression_type=None,
399 ):
400 from_expression = data_elem.get("from")
401 output = ToolOutput(data_elem.get("name"), from_expression=from_expression)
402 output_format = data_elem.get("format", default_format)
403 auto_format = string_as_bool(data_elem.get("auto_format", "false"))
404 if auto_format and output_format != "data":
405 raise ValueError("Setting format and auto_format is not supported at this time.")
406 elif auto_format:
407 output_format = "_sniff_"
408 output.format = output_format
409 output.change_format = data_elem.findall("change_format")
410 output.format_source = data_elem.get("format_source", default_format_source)
411 output.default_identifier_source = data_elem.get("default_identifier_source", 'None')
412 output.metadata_source = data_elem.get("metadata_source", default_metadata_source)
413 output.parent = data_elem.get("parent", None)
414 output.label = xml_text(data_elem, "label")
415 output.count = int(data_elem.get("count", 1))
416 output.filters = data_elem.findall('filter')
417 output.tool = tool
418 output.from_work_dir = data_elem.get("from_work_dir", None)
419 output.hidden = string_as_bool(data_elem.get("hidden", ""))
420 output.actions = ToolOutputActionGroup(output, data_elem.find('actions'))
421 output.dataset_collector_descriptions = dataset_collector_descriptions_from_elem(data_elem, legacy=self.legacy_defaults)
422 return output
423
424 def _parse_expression_output(self, output_elem, tool, **kwds):
425 output_type = output_elem.get("type")
426 from_expression = output_elem.get("from")
427 output = ToolExpressionOutput(
428 output_elem.get("name"),
429 output_type,
430 from_expression,
431 )
432 output.path = output_elem.get("value")
433 output.label = xml_text(output_elem, "label")
434
435 output.hidden = string_as_bool(output_elem.get("hidden", ""))
436 output.actions = ToolOutputActionGroup(output, output_elem.find('actions'))
437 output.dataset_collector_descriptions = []
438 return output
439
440 def parse_stdio(self):
441 """
442 parse error handling from command and stdio tag
443
444 returns list of exit codes, list of regexes
445
446 - exit_codes contain all non-zero exit codes (:-1 and 1:) if
447 detect_errors is default (if not legacy), exit_code, or aggressive
448 - the oom_exit_code if given and detect_errors is exit_code
449 - exit codes and regexes from the stdio tag
450 these are prepended to the list, i.e. are evaluated prior to regexes
451 and exit codes derived from the properties of the command tag.
452 thus more specific regexes of the same or more severe error level
453 are triggered first.
454
455 """
456
457 command_el = self._command_el
458 detect_errors = None
459 if command_el is not None:
460 detect_errors = command_el.get("detect_errors")
461
462 if detect_errors and detect_errors != "default":
463 if detect_errors == "exit_code":
464 oom_exit_code = None
465 if command_el is not None:
466 oom_exit_code = command_el.get("oom_exit_code", None)
467 if oom_exit_code is not None:
468 int(oom_exit_code)
469 exit_codes, regexes = error_on_exit_code(out_of_memory_exit_code=oom_exit_code)
470 elif detect_errors == "aggressive":
471 exit_codes, regexes = aggressive_error_checks()
472 else:
473 raise ValueError("Unknown detect_errors value encountered [%s]" % detect_errors)
474 elif len(self.root.findall('stdio')) == 0 and not self.legacy_defaults:
475 exit_codes, regexes = error_on_exit_code()
476 else:
477 exit_codes = []
478 regexes = []
479
480 if len(self.root.findall('stdio')) > 0:
481 parser = StdioParser(self.root)
482 exit_codes = parser.stdio_exit_codes + exit_codes
483 regexes = parser.stdio_regexes + regexes
484
485 return exit_codes, regexes
486
487 def parse_strict_shell(self):
488 command_el = self._command_el
489 if packaging.version.parse(self.parse_profile()) < packaging.version.parse('20.09'):
490 default = "False"
491 else:
492 default = "True"
493 if command_el is not None:
494 return string_as_bool(command_el.get("strict", default))
495 else:
496 return string_as_bool(default)
497
498 def parse_help(self):
499 help_elem = self.root.find('help')
500 return help_elem.text if help_elem is not None else None
501
502 @property
503 def macro_paths(self):
504 return self._macro_paths
505
506 @property
507 def source_path(self):
508 return self._source_path
509
510 def parse_tests_to_dict(self):
511 tests_elem = self.root.find("tests")
512 tests = []
513 rval = dict(
514 tests=tests
515 )
516
517 if tests_elem is not None:
518 for i, test_elem in enumerate(tests_elem.findall("test")):
519 profile = self.parse_profile()
520 tests.append(_test_elem_to_dict(test_elem, i, profile))
521
522 return rval
523
524 def parse_profile(self):
525 # Pre-16.04 or default XML defaults
526 # - Use standard error for error detection.
527 # - Don't run shells with -e
528 # - Auto-check for implicit multiple outputs.
529 # - Auto-check for $param_file.
530 # - Enable buggy interpreter attribute.
531 return self.root.get("profile", "16.01")
532
533 def parse_license(self):
534 return self.root.get("license")
535
536 def parse_python_template_version(self):
537 python_template_version = self.root.get("python_template_version", None)
538 if python_template_version is not None:
539 python_template_version = packaging.version.parse(python_template_version)
540 return python_template_version
541
542 def parse_creator(self):
543 creators_el = self.root.find("creator")
544 if creators_el is None:
545 return None
546
547 creators = []
548 for creator_el in creators_el:
549 creator_as_dict = {}
550 if creator_el.tag == "person":
551 clazz = "Person"
552 elif creator_el.tag == "organization":
553 clazz = "Organization"
554 else:
555 continue
556 creator_as_dict["class"] = clazz
557 creator_as_dict.update(creator_el.attrib)
558 creators.append(creator_as_dict)
559 return creators
560
561
562 def _test_elem_to_dict(test_elem, i, profile=None):
563 rval = dict(
564 outputs=__parse_output_elems(test_elem),
565 output_collections=__parse_output_collection_elems(test_elem, profile=profile),
566 inputs=__parse_input_elems(test_elem, i),
567 expect_num_outputs=test_elem.get("expect_num_outputs"),
568 command=__parse_assert_list_from_elem(test_elem.find("assert_command")),
569 command_version=__parse_assert_list_from_elem(test_elem.find("assert_command_version")),
570 stdout=__parse_assert_list_from_elem(test_elem.find("assert_stdout")),
571 stderr=__parse_assert_list_from_elem(test_elem.find("assert_stderr")),
572 expect_exit_code=test_elem.get("expect_exit_code"),
573 expect_failure=string_as_bool(test_elem.get("expect_failure", False)),
574 maxseconds=test_elem.get("maxseconds", None),
575 )
576 _copy_to_dict_if_present(test_elem, rval, ["num_outputs"])
577 return rval
578
579
580 def __parse_input_elems(test_elem, i):
581 __expand_input_elems(test_elem)
582 return __parse_inputs_elems(test_elem, i)
583
584
585 def __parse_output_elems(test_elem):
586 outputs = []
587 for output_elem in test_elem.findall("output"):
588 name, file, attributes = __parse_output_elem(output_elem)
589 outputs.append({"name": name, "value": file, "attributes": attributes})
590 return outputs
591
592
593 def __parse_output_elem(output_elem):
594 attrib = dict(output_elem.attrib)
595 name = attrib.pop('name', None)
596 if name is None:
597 raise Exception("Test output does not have a 'name'")
598
599 file, attributes = __parse_test_attributes(output_elem, attrib, parse_discovered_datasets=True)
600 return name, file, attributes
601
602
603 def __parse_command_elem(test_elem):
604 assert_elem = test_elem.find("command")
605 return __parse_assert_list_from_elem(assert_elem)
606
607
608 def __parse_output_collection_elems(test_elem, profile=None):
609 output_collections = []
610 for output_collection_elem in test_elem.findall("output_collection"):
611 output_collection_def = __parse_output_collection_elem(output_collection_elem, profile=profile)
612 output_collections.append(output_collection_def)
613 return output_collections
614
615
616 def __parse_output_collection_elem(output_collection_elem, profile=None):
617 attrib = dict(output_collection_elem.attrib)
618 name = attrib.pop('name', None)
619 if name is None:
620 raise Exception("Test output collection does not have a 'name'")
621 element_tests = __parse_element_tests(output_collection_elem, profile=profile)
622 return TestCollectionOutputDef(name, attrib, element_tests).to_dict()
623
624
625 def __parse_element_tests(parent_element, profile=None):
626 element_tests = {}
627 for idx, element in enumerate(parent_element.findall("element")):
628 element_attrib = dict(element.attrib)
629 identifier = element_attrib.pop('name', None)
630 if identifier is None:
631 raise Exception("Test primary dataset does not have a 'identifier'")
632 element_tests[identifier] = __parse_test_attributes(element, element_attrib, parse_elements=True, profile=profile)
633 if profile and profile >= "20.09":
634 element_tests[identifier][1]["expected_sort_order"] = idx
635
636 return element_tests
637
638
639 def __parse_test_attributes(output_elem, attrib, parse_elements=False, parse_discovered_datasets=False, profile=None):
640 assert_list = __parse_assert_list(output_elem)
641
642 # Allow either file or value to specify a target file to compare result with
643 # file was traditionally used by outputs and value by extra files.
644 file = attrib.pop('file', attrib.pop('value', None))
645
646 # File no longer required if an list of assertions was present.
647 attributes = {}
648
649 if 'value_json' in attrib:
650 attributes['object'] = json.loads(attrib.pop('value_json'))
651
652 # Method of comparison
653 attributes['compare'] = attrib.pop('compare', 'diff').lower()
654 # Number of lines to allow to vary in logs (for dates, etc)
655 attributes['lines_diff'] = int(attrib.pop('lines_diff', '0'))
656 # Allow a file size to vary if sim_size compare
657 attributes['delta'] = int(attrib.pop('delta', DEFAULT_DELTA))
658 attributes['delta_frac'] = float(attrib['delta_frac']) if 'delta_frac' in attrib else DEFAULT_DELTA_FRAC
659 attributes['sort'] = string_as_bool(attrib.pop('sort', False))
660 attributes['decompress'] = string_as_bool(attrib.pop('decompress', False))
661 extra_files = []
662 if 'ftype' in attrib:
663 attributes['ftype'] = attrib['ftype']
664 for extra in output_elem.findall('extra_files'):
665 extra_files.append(__parse_extra_files_elem(extra))
666 metadata = {}
667 for metadata_elem in output_elem.findall('metadata'):
668 metadata[metadata_elem.get('name')] = metadata_elem.get('value')
669 md5sum = attrib.get("md5", None)
670 checksum = attrib.get("checksum", None)
671 element_tests = {}
672 if parse_elements:
673 element_tests = __parse_element_tests(output_elem, profile=profile)
674
675 primary_datasets = {}
676 if parse_discovered_datasets:
677 for primary_elem in (output_elem.findall("discovered_dataset") or []):
678 primary_attrib = dict(primary_elem.attrib)
679 designation = primary_attrib.pop('designation', None)
680 if designation is None:
681 raise Exception("Test primary dataset does not have a 'designation'")
682 primary_datasets[designation] = __parse_test_attributes(primary_elem, primary_attrib)
683
684 has_checksum = md5sum or checksum
685 has_nested_tests = extra_files or element_tests or primary_datasets
686 has_object = 'object' in attributes
687 if not (assert_list or file or metadata or has_checksum or has_nested_tests or has_object):
688 raise Exception("Test output defines nothing to check (e.g. must have a 'file' check against, assertions to check, metadata or checksum tests, etc...)")
689 attributes['assert_list'] = assert_list
690 attributes['extra_files'] = extra_files
691 attributes['metadata'] = metadata
692 attributes['md5'] = md5sum
693 attributes['checksum'] = checksum
694 attributes['elements'] = element_tests
695 attributes['primary_datasets'] = primary_datasets
696 return file, attributes
697
698
699 def __parse_assert_list(output_elem):
700 assert_elem = output_elem.find("assert_contents")
701 return __parse_assert_list_from_elem(assert_elem)
702
703
704 def __parse_assert_list_from_elem(assert_elem):
705 assert_list = None
706
707 def convert_elem(elem):
708 """ Converts and XML element to a dictionary format, used by assertion checking code. """
709 tag = elem.tag
710 attributes = dict(elem.attrib)
711 converted_children = []
712 for child_elem in elem:
713 converted_children.append(convert_elem(child_elem))
714 return {"tag": tag, "attributes": attributes, "children": converted_children}
715 if assert_elem is not None:
716 assert_list = []
717 for assert_child in list(assert_elem):
718 assert_list.append(convert_elem(assert_child))
719
720 return assert_list
721
722
723 def __parse_extra_files_elem(extra):
724 # File or directory, when directory, compare basename
725 # by basename
726 attrib = dict(extra.attrib)
727 extra_type = attrib.pop('type', 'file')
728 extra_name = attrib.pop('name', None)
729 assert extra_type == 'directory' or extra_name is not None, \
730 'extra_files type (%s) requires a name attribute' % extra_type
731 extra_value, extra_attributes = __parse_test_attributes(extra, attrib)
732 return {
733 "value": extra_value,
734 "name": extra_name,
735 "type": extra_type,
736 "attributes": extra_attributes
737 }
738
739
740 def __expand_input_elems(root_elem, prefix=""):
741 __append_prefix_to_params(root_elem, prefix)
742
743 repeat_elems = root_elem.findall('repeat')
744 indices = {}
745 for repeat_elem in repeat_elems:
746 name = repeat_elem.get("name")
747 if name not in indices:
748 indices[name] = 0
749 index = 0
750 else:
751 index = indices[name] + 1
752 indices[name] = index
753
754 new_prefix = __prefix_join(prefix, name, index=index)
755 __expand_input_elems(repeat_elem, new_prefix)
756 __pull_up_params(root_elem, repeat_elem)
757
758 cond_elems = root_elem.findall('conditional')
759 for cond_elem in cond_elems:
760 new_prefix = __prefix_join(prefix, cond_elem.get("name"))
761 __expand_input_elems(cond_elem, new_prefix)
762 __pull_up_params(root_elem, cond_elem)
763
764 section_elems = root_elem.findall('section')
765 for section_elem in section_elems:
766 new_prefix = __prefix_join(prefix, section_elem.get("name"))
767 __expand_input_elems(section_elem, new_prefix)
768 __pull_up_params(root_elem, section_elem)
769
770
771 def __append_prefix_to_params(elem, prefix):
772 for param_elem in elem.findall('param'):
773 param_elem.set("name", __prefix_join(prefix, param_elem.get("name")))
774
775
776 def __pull_up_params(parent_elem, child_elem):
777 for param_elem in child_elem.findall('param'):
778 parent_elem.append(param_elem)
779
780
781 def __prefix_join(prefix, name, index=None):
782 name = name if index is None else "%s_%d" % (name, index)
783 return name if not prefix else f"{prefix}|{name}"
784
785
786 def _copy_to_dict_if_present(elem, rval, attributes):
787 for attribute in attributes:
788 if attribute in elem.attrib:
789 rval[attribute] = elem.get(attribute)
790 return rval
791
792
793 def __parse_inputs_elems(test_elem, i):
794 raw_inputs = []
795 for param_elem in test_elem.findall("param"):
796 raw_inputs.append(__parse_param_elem(param_elem, i))
797
798 return raw_inputs
799
800
801 def __parse_param_elem(param_elem, i=0):
802 attrib = dict(param_elem.attrib)
803 if 'values' in attrib:
804 value = attrib['values'].split(',')
805 elif 'value' in attrib:
806 value = attrib['value']
807 elif 'value_json' in attrib:
808 value = json.loads(attrib['value_json'])
809 else:
810 value = None
811
812 children_elem = param_elem
813 if children_elem is not None:
814 # At this time, we can assume having children only
815 # occurs on DataToolParameter test items but this could
816 # change and would cause the below parsing to change
817 # based upon differences in children items
818 attrib['metadata'] = {}
819 attrib['composite_data'] = []
820 attrib['edit_attributes'] = []
821 # Composite datasets need to be renamed uniquely
822 composite_data_name = None
823 for child in children_elem:
824 if child.tag == 'composite_data':
825 file_name = child.get("value")
826 attrib['composite_data'].append(file_name)
827 if composite_data_name is None:
828 # Generate a unique name; each test uses a
829 # fresh history.
830 composite_data_name = '_COMPOSITE_RENAMED_t%d_%s' \
831 % (i, uuid.uuid1().hex)
832 elif child.tag == 'metadata':
833 attrib['metadata'][child.get("name")] = child.get("value")
834 elif child.tag == 'edit_attributes':
835 attrib['edit_attributes'].append(child)
836 elif child.tag == 'collection':
837 attrib['collection'] = TestCollectionDef.from_xml(child, __parse_param_elem)
838 if composite_data_name:
839 # Composite datasets need implicit renaming;
840 # inserted at front of list so explicit declarations
841 # take precedence
842 attrib['edit_attributes'].insert(0, {'type': 'name', 'value': composite_data_name})
843 name = attrib.pop('name')
844 return {
845 "name": name,
846 "value": value,
847 "attributes": attrib
848 }
849
850
851 class StdioParser:
852
853 def __init__(self, root):
854 try:
855 self.stdio_exit_codes = list()
856 self.stdio_regexes = list()
857
858 # We should have a single <stdio> element, but handle the case for
859 # multiples.
860 # For every stdio element, add all of the exit_code and regex
861 # subelements that we find:
862 for stdio_elem in (root.findall('stdio')):
863 self.parse_stdio_exit_codes(stdio_elem)
864 self.parse_stdio_regexes(stdio_elem)
865 except Exception:
866 log.exception("Exception in parse_stdio!")
867
868 def parse_stdio_exit_codes(self, stdio_elem):
869 """
870 Parse the tool's <stdio> element's <exit_code> subelements.
871 This will add all of those elements, if any, to self.stdio_exit_codes.
872 """
873 try:
874 # Look for all <exit_code> elements. Each exit_code element must
875 # have a range/value.
876 # Exit-code ranges have precedence over a single exit code.
877 # So if there are value and range attributes, we use the range
878 # attribute. If there is neither a range nor a value, then print
879 # a warning and skip to the next.
880 for exit_code_elem in (stdio_elem.findall("exit_code")):
881 exit_code = ToolStdioExitCode()
882 # Each exit code has an optional description that can be
883 # part of the "desc" or "description" attributes:
884 exit_code.desc = exit_code_elem.get("desc")
885 if exit_code.desc is None:
886 exit_code.desc = exit_code_elem.get("description")
887 # Parse the error level:
888 exit_code.error_level = (
889 self.parse_error_level(exit_code_elem.get("level")))
890 code_range = exit_code_elem.get("range", "")
891 if code_range is None:
892 code_range = exit_code_elem.get("value", "")
893 if code_range is None:
894 log.warning("Tool stdio exit codes must have a range or value")
895 continue
896 # Parse the range. We look for:
897 # :Y
898 # X:
899 # X:Y - Split on the colon. We do not allow a colon
900 # without a beginning or end, though we could.
901 # Also note that whitespace is eliminated.
902 # TODO: Turn this into a single match - it should be
903 # more efficient.
904 code_range = re.sub(r"\s", "", code_range)
905 code_ranges = re.split(r":", code_range)
906 if (len(code_ranges) == 2):
907 if (code_ranges[0] is None or '' == code_ranges[0]):
908 exit_code.range_start = float("-inf")
909 else:
910 exit_code.range_start = int(code_ranges[0])
911 if (code_ranges[1] is None or '' == code_ranges[1]):
912 exit_code.range_end = float("inf")
913 else:
914 exit_code.range_end = int(code_ranges[1])
915 # If we got more than one colon, then ignore the exit code.
916 elif (len(code_ranges) > 2):
917 log.warning("Invalid tool exit_code range %s - ignored"
918 % code_range)
919 continue
920 # Else we have a singular value. If it's not an integer, then
921 # we'll just write a log message and skip this exit_code.
922 else:
923 try:
924 exit_code.range_start = int(code_range)
925 except Exception:
926 log.error(code_range)
927 log.warning("Invalid range start for tool's exit_code %s: exit_code ignored" % code_range)
928 continue
929 exit_code.range_end = exit_code.range_start
930 # TODO: Check if we got ">", ">=", "<", or "<=":
931 # Check that the range, regardless of how we got it,
932 # isn't bogus. If we have two infinite values, then
933 # the start must be -inf and the end must be +inf.
934 # So at least warn about this situation:
935 if isinf(exit_code.range_start) and isinf(exit_code.range_end):
936 log.warning("Tool exit_code range %s will match on all exit codes" % code_range)
937 self.stdio_exit_codes.append(exit_code)
938 except Exception:
939 log.exception("Exception in parse_stdio_exit_codes!")
940
941 def parse_stdio_regexes(self, stdio_elem):
942 """
943 Look in the tool's <stdio> elem for all <regex> subelements
944 that define how to look for warnings and fatal errors in
945 stdout and stderr. This will add all such regex elements
946 to the Tols's stdio_regexes list.
947 """
948 try:
949 # Look for every <regex> subelement. The regular expression
950 # will have "match" and "source" (or "src") attributes.
951 for regex_elem in (stdio_elem.findall("regex")):
952 # TODO: Fill in ToolStdioRegex
953 regex = ToolStdioRegex()
954 # Each regex has an optional description that can be
955 # part of the "desc" or "description" attributes:
956 regex.desc = regex_elem.get("desc")
957 if regex.desc is None:
958 regex.desc = regex_elem.get("description")
959 # Parse the error level
960 regex.error_level = (
961 self.parse_error_level(regex_elem.get("level")))
962 regex.match = regex_elem.get("match", "")
963 if regex.match is None:
964 # TODO: Convert the offending XML element to a string
965 log.warning("Ignoring tool's stdio regex element %s - "
966 "the 'match' attribute must exist")
967 continue
968 # Parse the output sources. We look for the "src", "source",
969 # and "sources" attributes, in that order. If there is no
970 # such source, then the source defaults to stderr & stdout.
971 # Look for a comma and then look for "err", "error", "out",
972 # and "output":
973 output_srcs = regex_elem.get("src")
974 if output_srcs is None:
975 output_srcs = regex_elem.get("source")
976 if output_srcs is None:
977 output_srcs = regex_elem.get("sources")
978 if output_srcs is None:
979 output_srcs = "output,error"
980 output_srcs = re.sub(r"\s", "", output_srcs)
981 src_list = re.split(r",", output_srcs)
982 # Just put together anything to do with "out", including
983 # "stdout", "output", etc. Repeat for "stderr", "error",
984 # and anything to do with "err". If neither stdout nor
985 # stderr were specified, then raise a warning and scan both.
986 for src in src_list:
987 if re.search("both", src, re.IGNORECASE):
988 regex.stdout_match = True
989 regex.stderr_match = True
990 if re.search("out", src, re.IGNORECASE):
991 regex.stdout_match = True
992 if re.search("err", src, re.IGNORECASE):
993 regex.stderr_match = True
994 if (not regex.stdout_match and not regex.stderr_match):
995 log.warning("Tool id %s: unable to determine if tool "
996 "stream source scanning is output, error, "
997 "or both. Defaulting to use both." % self.id)
998 regex.stdout_match = True
999 regex.stderr_match = True
1000 self.stdio_regexes.append(regex)
1001 except Exception:
1002 log.exception("Exception in parse_stdio_exit_codes!")
1003
1004 # TODO: This method doesn't have to be part of the Tool class.
1005 def parse_error_level(self, err_level):
1006 """
1007 Parses error level and returns error level enumeration. If
1008 unparsable, returns 'fatal'
1009 """
1010 return_level = StdioErrorLevel.FATAL
1011 try:
1012 if err_level:
1013 if (re.search("log", err_level, re.IGNORECASE)):
1014 return_level = StdioErrorLevel.LOG
1015 elif (re.search("qc", err_level, re.IGNORECASE)):
1016 return_level = StdioErrorLevel.QC
1017 elif (re.search("warning", err_level, re.IGNORECASE)):
1018 return_level = StdioErrorLevel.WARNING
1019 elif (re.search("fatal_oom", err_level, re.IGNORECASE)):
1020 return_level = StdioErrorLevel.FATAL_OOM
1021 elif (re.search("fatal", err_level, re.IGNORECASE)):
1022 return_level = StdioErrorLevel.FATAL
1023 else:
1024 log.debug("Tool %s: error level %s did not match log/warning/fatal" %
1025 (self.id, err_level))
1026 except Exception:
1027 log.exception("Exception in parse_error_level")
1028 return return_level
1029
1030
1031 class XmlPagesSource(PagesSource):
1032
1033 def __init__(self, root):
1034 self.input_elem = root.find("inputs")
1035 page_sources = []
1036 if self.input_elem is not None:
1037 pages_elem = self.input_elem.findall("page")
1038 for page in (pages_elem or [self.input_elem]):
1039 page_sources.append(XmlPageSource(page))
1040 super().__init__(page_sources)
1041
1042 @property
1043 def inputs_defined(self):
1044 return self.input_elem is not None
1045
1046
1047 class XmlPageSource(PageSource):
1048
1049 def __init__(self, parent_elem):
1050 self.parent_elem = parent_elem
1051
1052 def parse_display(self):
1053 display_elem = self.parent_elem.find("display")
1054 if display_elem is not None:
1055 display = xml_to_string(display_elem)
1056 else:
1057 display = None
1058 return display
1059
1060 def parse_input_sources(self):
1061 return map(XmlInputSource, self.parent_elem)
1062
1063
1064 class XmlInputSource(InputSource):
1065
1066 def __init__(self, input_elem):
1067 self.input_elem = input_elem
1068 self.input_type = self.input_elem.tag
1069
1070 def parse_input_type(self):
1071 return self.input_type
1072
1073 def elem(self):
1074 return self.input_elem
1075
1076 def get(self, key, value=None):
1077 return self.input_elem.get(key, value)
1078
1079 def get_bool(self, key, default):
1080 return string_as_bool(self.get(key, default))
1081
1082 def parse_label(self):
1083 return xml_text(self.input_elem, "label")
1084
1085 def parse_help(self):
1086 return xml_text(self.input_elem, "help")
1087
1088 def parse_sanitizer_elem(self):
1089 return self.input_elem.find("sanitizer")
1090
1091 def parse_validator_elems(self):
1092 return self.input_elem.findall("validator")
1093
1094 def parse_dynamic_options_elem(self):
1095 """ Return a galaxy.tools.parameters.dynamic_options.DynamicOptions
1096 if appropriate.
1097 """
1098 options_elem = self.input_elem.find('options')
1099 return options_elem
1100
1101 def parse_static_options(self):
1102 static_options = list()
1103 elem = self.input_elem
1104 for option in elem.findall("option"):
1105 value = option.get("value")
1106 selected = string_as_bool(option.get("selected", False))
1107 static_options.append((option.text or value, value, selected))
1108 return static_options
1109
1110 def parse_optional(self, default=None):
1111 """ Return boolean indicating whether parameter is optional. """
1112 elem = self.input_elem
1113 if self.get('type') == "data_column":
1114 # Allow specifing force_select for backward compat., but probably
1115 # should use optional going forward for consistency with other
1116 # parameters.
1117 if "force_select" in elem.attrib:
1118 force_select = string_as_bool(elem.get("force_select"))
1119 else:
1120 force_select = not string_as_bool(elem.get("optional", False))
1121 return not force_select
1122
1123 if default is None:
1124 default = self.default_optional
1125 return self.get_bool("optional", default)
1126
1127 def parse_conversion_tuples(self):
1128 elem = self.input_elem
1129 conversions = []
1130 for conv_elem in elem.findall("conversion"):
1131 name = conv_elem.get("name") # name for commandline substitution
1132 conv_extensions = conv_elem.get("type") # target datatype extension
1133 conversions.append((name, conv_extensions))
1134 return conversions
1135
1136 def parse_nested_inputs_source(self):
1137 elem = self.input_elem
1138 return XmlPageSource(elem)
1139
1140 def parse_test_input_source(self):
1141 elem = self.input_elem
1142 input_elem = elem.find("param")
1143 assert input_elem is not None, "<conditional> must have a child <param>"
1144 return XmlInputSource(input_elem)
1145
1146 def parse_when_input_sources(self):
1147 elem = self.input_elem
1148
1149 sources = []
1150 for case_elem in elem.findall("when"):
1151 value = case_elem.get("value")
1152 case_page_source = XmlPageSource(case_elem)
1153 sources.append((value, case_page_source))
1154 return sources
1155
1156
1157 class ParallelismInfo:
1158 """
1159 Stores the information (if any) for running multiple instances of the tool in parallel
1160 on the same set of inputs.
1161 """
1162
1163 def __init__(self, tag):
1164 self.method = tag.get('method')
1165 if isinstance(tag, dict):
1166 items = tag.items()
1167 else:
1168 items = tag.attrib.items()
1169 self.attributes = dict([item for item in items if item[0] != 'method'])
1170 if len(self.attributes) == 0:
1171 # legacy basic mode - provide compatible defaults
1172 self.attributes['split_size'] = 20
1173 self.attributes['split_mode'] = 'number_of_parts'