Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/galaxy/tool_util/parser/output_collection_def.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4f3585e2f14b |
---|---|
1 """ This module define an abstract class for reasoning about Galaxy's | |
2 dataset collection after jobs are finished. | |
3 """ | |
4 | |
5 from galaxy.util import asbool | |
6 from .util import is_dict | |
7 | |
8 DEFAULT_EXTRA_FILENAME_PATTERN = r"primary_DATASET_ID_(?P<designation>[^_]+)_(?P<visible>[^_]+)_(?P<ext>[^_]+)(_(?P<dbkey>[^_]+))?" | |
9 DEFAULT_SORT_BY = "filename" | |
10 DEFAULT_SORT_COMP = "lexical" | |
11 | |
12 | |
13 # XML can describe custom patterns, but these literals describe named | |
14 # patterns that will be replaced. | |
15 NAMED_PATTERNS = { | |
16 "__default__": DEFAULT_EXTRA_FILENAME_PATTERN, | |
17 "__name__": r"(?P<name>.*)", | |
18 "__designation__": r"(?P<designation>.*)", | |
19 "__name_and_ext__": r"(?P<name>.*)\.(?P<ext>[^\.]+)?", | |
20 "__designation_and_ext__": r"(?P<designation>.*)\.(?P<ext>[^\._]+)?", | |
21 } | |
22 | |
23 INPUT_DBKEY_TOKEN = "__input__" | |
24 LEGACY_DEFAULT_DBKEY = None # don't use __input__ for legacy default collection | |
25 | |
26 | |
27 def dataset_collector_descriptions_from_elem(elem, legacy=True): | |
28 primary_dataset_elems = elem.findall("discover_datasets") | |
29 num_discover_dataset_blocks = len(primary_dataset_elems) | |
30 if num_discover_dataset_blocks == 0 and legacy: | |
31 collectors = [DEFAULT_DATASET_COLLECTOR_DESCRIPTION] | |
32 else: | |
33 collectors = [dataset_collection_description(**e.attrib) for e in primary_dataset_elems] | |
34 | |
35 return _validate_collectors(collectors) | |
36 | |
37 | |
38 def dataset_collector_descriptions_from_output_dict(as_dict): | |
39 discover_datasets_dicts = as_dict.get("discover_datasets", []) | |
40 if is_dict(discover_datasets_dicts): | |
41 discover_datasets_dicts = [discover_datasets_dicts] | |
42 dataset_collector_descriptions = dataset_collector_descriptions_from_list(discover_datasets_dicts) | |
43 return _validate_collectors(dataset_collector_descriptions) | |
44 | |
45 | |
46 def _validate_collectors(collectors): | |
47 num_discover_dataset_blocks = len(collectors) | |
48 if num_discover_dataset_blocks > 1: | |
49 for collector in collectors: | |
50 if collector.discover_via == "tool_provided_metadata": | |
51 raise Exception("Cannot specify more than one discover dataset condition if any of them specify tool_provided_metadata.") | |
52 | |
53 return collectors | |
54 | |
55 | |
56 def dataset_collector_descriptions_from_list(discover_datasets_dicts): | |
57 return list(map(lambda kwds: dataset_collection_description(**kwds), discover_datasets_dicts)) | |
58 | |
59 | |
60 def dataset_collection_description(**kwargs): | |
61 from_provided_metadata = asbool(kwargs.get("from_provided_metadata", False)) | |
62 discover_via = kwargs.get("discover_via", "tool_provided_metadata" if from_provided_metadata else "pattern") | |
63 if discover_via == "tool_provided_metadata": | |
64 for key in ["pattern", "sort_by"]: | |
65 if kwargs.get(key): | |
66 raise Exception("Cannot specify attribute [%s] if from_provided_metadata is True" % key) | |
67 return ToolProvidedMetadataDatasetCollection(**kwargs) | |
68 else: | |
69 return FilePatternDatasetCollectionDescription(**kwargs) | |
70 | |
71 | |
72 class DatasetCollectionDescription: | |
73 | |
74 def __init__(self, **kwargs): | |
75 self.default_dbkey = kwargs.get("dbkey", INPUT_DBKEY_TOKEN) | |
76 self.default_ext = kwargs.get("ext", None) | |
77 if self.default_ext is None and "format" in kwargs: | |
78 self.default_ext = kwargs.get("format") | |
79 self.default_visible = asbool(kwargs.get("visible", None)) | |
80 self.assign_primary_output = asbool(kwargs.get('assign_primary_output', False)) | |
81 self.directory = kwargs.get("directory", None) | |
82 self.recurse = False | |
83 | |
84 def to_dict(self): | |
85 return { | |
86 'discover_via': self.discover_via, | |
87 'dbkey': self.default_dbkey, | |
88 'format': self.default_ext, | |
89 'visible': self.default_visible, | |
90 'assign_primary_output': self.assign_primary_output, | |
91 'directory': self.directory, | |
92 'recurse': self.recurse, | |
93 } | |
94 | |
95 | |
96 class ToolProvidedMetadataDatasetCollection(DatasetCollectionDescription): | |
97 | |
98 discover_via = "tool_provided_metadata" | |
99 | |
100 | |
101 class FilePatternDatasetCollectionDescription(DatasetCollectionDescription): | |
102 | |
103 discover_via = "pattern" | |
104 | |
105 def __init__(self, **kwargs): | |
106 super().__init__(**kwargs) | |
107 pattern = kwargs.get("pattern", "__default__") | |
108 self.recurse = asbool(kwargs.get("recurse", False)) | |
109 if pattern in NAMED_PATTERNS: | |
110 pattern = NAMED_PATTERNS.get(pattern) | |
111 self.pattern = pattern | |
112 sort_by = kwargs.get("sort_by", DEFAULT_SORT_BY) | |
113 if sort_by.startswith("reverse_"): | |
114 self.sort_reverse = True | |
115 sort_by = sort_by[len("reverse_"):] | |
116 else: | |
117 self.sort_reverse = False | |
118 if "_" in sort_by: | |
119 sort_comp, sort_by = sort_by.split("_", 1) | |
120 assert sort_comp in ["lexical", "numeric"] | |
121 else: | |
122 sort_comp = DEFAULT_SORT_COMP | |
123 assert sort_by in [ | |
124 "filename", | |
125 "name", | |
126 "designation", | |
127 "dbkey" | |
128 ] | |
129 self.sort_key = sort_by | |
130 self.sort_comp = sort_comp | |
131 | |
132 def to_dict(self): | |
133 as_dict = super().to_dict() | |
134 as_dict.update({ | |
135 "sort_key": self.sort_key, | |
136 "sort_comp": self.sort_comp, | |
137 "pattern": self.pattern, | |
138 "recurse": self.recurse, | |
139 }) | |
140 return as_dict | |
141 | |
142 | |
143 DEFAULT_DATASET_COLLECTOR_DESCRIPTION = FilePatternDatasetCollectionDescription( | |
144 default_dbkey=LEGACY_DEFAULT_DBKEY, | |
145 ) |