comparison env/lib/python3.9/site-packages/galaxy/tool_util/parser/output_collection_def.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 """ This module define an abstract class for reasoning about Galaxy's
2 dataset collection after jobs are finished.
3 """
4
5 from galaxy.util import asbool
6 from .util import is_dict
7
8 DEFAULT_EXTRA_FILENAME_PATTERN = r"primary_DATASET_ID_(?P<designation>[^_]+)_(?P<visible>[^_]+)_(?P<ext>[^_]+)(_(?P<dbkey>[^_]+))?"
9 DEFAULT_SORT_BY = "filename"
10 DEFAULT_SORT_COMP = "lexical"
11
12
13 # XML can describe custom patterns, but these literals describe named
14 # patterns that will be replaced.
15 NAMED_PATTERNS = {
16 "__default__": DEFAULT_EXTRA_FILENAME_PATTERN,
17 "__name__": r"(?P<name>.*)",
18 "__designation__": r"(?P<designation>.*)",
19 "__name_and_ext__": r"(?P<name>.*)\.(?P<ext>[^\.]+)?",
20 "__designation_and_ext__": r"(?P<designation>.*)\.(?P<ext>[^\._]+)?",
21 }
22
23 INPUT_DBKEY_TOKEN = "__input__"
24 LEGACY_DEFAULT_DBKEY = None # don't use __input__ for legacy default collection
25
26
27 def dataset_collector_descriptions_from_elem(elem, legacy=True):
28 primary_dataset_elems = elem.findall("discover_datasets")
29 num_discover_dataset_blocks = len(primary_dataset_elems)
30 if num_discover_dataset_blocks == 0 and legacy:
31 collectors = [DEFAULT_DATASET_COLLECTOR_DESCRIPTION]
32 else:
33 collectors = [dataset_collection_description(**e.attrib) for e in primary_dataset_elems]
34
35 return _validate_collectors(collectors)
36
37
38 def dataset_collector_descriptions_from_output_dict(as_dict):
39 discover_datasets_dicts = as_dict.get("discover_datasets", [])
40 if is_dict(discover_datasets_dicts):
41 discover_datasets_dicts = [discover_datasets_dicts]
42 dataset_collector_descriptions = dataset_collector_descriptions_from_list(discover_datasets_dicts)
43 return _validate_collectors(dataset_collector_descriptions)
44
45
46 def _validate_collectors(collectors):
47 num_discover_dataset_blocks = len(collectors)
48 if num_discover_dataset_blocks > 1:
49 for collector in collectors:
50 if collector.discover_via == "tool_provided_metadata":
51 raise Exception("Cannot specify more than one discover dataset condition if any of them specify tool_provided_metadata.")
52
53 return collectors
54
55
56 def dataset_collector_descriptions_from_list(discover_datasets_dicts):
57 return list(map(lambda kwds: dataset_collection_description(**kwds), discover_datasets_dicts))
58
59
60 def dataset_collection_description(**kwargs):
61 from_provided_metadata = asbool(kwargs.get("from_provided_metadata", False))
62 discover_via = kwargs.get("discover_via", "tool_provided_metadata" if from_provided_metadata else "pattern")
63 if discover_via == "tool_provided_metadata":
64 for key in ["pattern", "sort_by"]:
65 if kwargs.get(key):
66 raise Exception("Cannot specify attribute [%s] if from_provided_metadata is True" % key)
67 return ToolProvidedMetadataDatasetCollection(**kwargs)
68 else:
69 return FilePatternDatasetCollectionDescription(**kwargs)
70
71
72 class DatasetCollectionDescription:
73
74 def __init__(self, **kwargs):
75 self.default_dbkey = kwargs.get("dbkey", INPUT_DBKEY_TOKEN)
76 self.default_ext = kwargs.get("ext", None)
77 if self.default_ext is None and "format" in kwargs:
78 self.default_ext = kwargs.get("format")
79 self.default_visible = asbool(kwargs.get("visible", None))
80 self.assign_primary_output = asbool(kwargs.get('assign_primary_output', False))
81 self.directory = kwargs.get("directory", None)
82 self.recurse = False
83
84 def to_dict(self):
85 return {
86 'discover_via': self.discover_via,
87 'dbkey': self.default_dbkey,
88 'format': self.default_ext,
89 'visible': self.default_visible,
90 'assign_primary_output': self.assign_primary_output,
91 'directory': self.directory,
92 'recurse': self.recurse,
93 }
94
95
96 class ToolProvidedMetadataDatasetCollection(DatasetCollectionDescription):
97
98 discover_via = "tool_provided_metadata"
99
100
101 class FilePatternDatasetCollectionDescription(DatasetCollectionDescription):
102
103 discover_via = "pattern"
104
105 def __init__(self, **kwargs):
106 super().__init__(**kwargs)
107 pattern = kwargs.get("pattern", "__default__")
108 self.recurse = asbool(kwargs.get("recurse", False))
109 if pattern in NAMED_PATTERNS:
110 pattern = NAMED_PATTERNS.get(pattern)
111 self.pattern = pattern
112 sort_by = kwargs.get("sort_by", DEFAULT_SORT_BY)
113 if sort_by.startswith("reverse_"):
114 self.sort_reverse = True
115 sort_by = sort_by[len("reverse_"):]
116 else:
117 self.sort_reverse = False
118 if "_" in sort_by:
119 sort_comp, sort_by = sort_by.split("_", 1)
120 assert sort_comp in ["lexical", "numeric"]
121 else:
122 sort_comp = DEFAULT_SORT_COMP
123 assert sort_by in [
124 "filename",
125 "name",
126 "designation",
127 "dbkey"
128 ]
129 self.sort_key = sort_by
130 self.sort_comp = sort_comp
131
132 def to_dict(self):
133 as_dict = super().to_dict()
134 as_dict.update({
135 "sort_key": self.sort_key,
136 "sort_comp": self.sort_comp,
137 "pattern": self.pattern,
138 "recurse": self.recurse,
139 })
140 return as_dict
141
142
143 DEFAULT_DATASET_COLLECTOR_DESCRIPTION = FilePatternDatasetCollectionDescription(
144 default_dbkey=LEGACY_DEFAULT_DBKEY,
145 )