Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/galaxy/tool_util/cwl/representation.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4f3585e2f14b |
---|---|
1 """ This module is responsible for converting between Galaxy's tool | |
2 input description and the CWL description for a job json. """ | |
3 | |
4 import json | |
5 import logging | |
6 import os | |
7 from enum import Enum | |
8 from typing import Any, NamedTuple, Optional | |
9 | |
10 from galaxy.exceptions import RequestParameterInvalidException | |
11 from galaxy.util import safe_makedirs, string_as_bool | |
12 from .util import set_basename_and_derived_properties | |
13 | |
14 | |
15 log = logging.getLogger(__name__) | |
16 | |
17 NOT_PRESENT = object() | |
18 | |
19 NO_GALAXY_INPUT = object() | |
20 | |
21 | |
22 class INPUT_TYPE(str, Enum): | |
23 DATA = "data" | |
24 INTEGER = "integer" | |
25 FLOAT = "float" | |
26 TEXT = "text" | |
27 BOOLEAN = "boolean" | |
28 SELECT = "select" | |
29 FIELD = "field" | |
30 CONDITIONAL = "conditional" | |
31 DATA_COLLECTON = "data_collection" | |
32 | |
33 | |
34 # There are two approaches to mapping CWL tool state to Galaxy tool state | |
35 # one is to map CWL types to compound Galaxy tool parameters combinations | |
36 # with conditionals and the other is to use a new Galaxy parameter type that | |
37 # allows unions, optional specifications, etc.... The problem with the former | |
38 # is that it doesn't work with the workflow parameters for instance and is | |
39 # very complex on the backend. The problem with the latter is that the GUI | |
40 # for this parameter type is undefined curently. | |
41 USE_FIELD_TYPES = True | |
42 | |
43 # There are two approaches to mapping CWL workflow inputs to Galaxy workflow | |
44 # steps. The first is to simply map everything to expressions and stick them into | |
45 # files and use data inputs - the second is to use parameter_input steps with | |
46 # fields types. We are dispatching on USE_FIELD_TYPES for now - to choose but | |
47 # may diverge later? | |
48 # There are open issues with each approach: | |
49 # - Mapping everything to files makes the GUI harder to imagine but the backend | |
50 # easier to manage in someways. | |
51 USE_STEP_PARAMETERS = USE_FIELD_TYPES | |
52 | |
53 | |
54 class TypeRepresentation(NamedTuple): | |
55 name: str | |
56 galaxy_param_type: Any | |
57 label: str | |
58 collection_type: Optional[str] | |
59 | |
60 @property | |
61 def uses_param(self): | |
62 return self.galaxy_param_type is not NO_GALAXY_INPUT | |
63 | |
64 | |
65 TYPE_REPRESENTATIONS = [ | |
66 TypeRepresentation("null", NO_GALAXY_INPUT, "no input", None), | |
67 TypeRepresentation("integer", INPUT_TYPE.INTEGER, "an integer", None), | |
68 TypeRepresentation("float", INPUT_TYPE.FLOAT, "a decimal number", None), | |
69 TypeRepresentation("double", INPUT_TYPE.FLOAT, "a decimal number", None), | |
70 TypeRepresentation("file", INPUT_TYPE.DATA, "a dataset", None), | |
71 TypeRepresentation("directory", INPUT_TYPE.DATA, "a directory", None), | |
72 TypeRepresentation("boolean", INPUT_TYPE.BOOLEAN, "a boolean", None), | |
73 TypeRepresentation("text", INPUT_TYPE.TEXT, "a simple text field", None), | |
74 TypeRepresentation("record", INPUT_TYPE.DATA_COLLECTON, "record as a dataset collection", "record"), | |
75 TypeRepresentation("json", INPUT_TYPE.TEXT, "arbitrary JSON structure", None), | |
76 TypeRepresentation("array", INPUT_TYPE.DATA_COLLECTON, "as a dataset list", "list"), | |
77 TypeRepresentation("enum", INPUT_TYPE.TEXT, "enum value", None), # TODO: make this a select... | |
78 TypeRepresentation("field", INPUT_TYPE.FIELD, "arbitrary JSON structure", None), | |
79 ] | |
80 FIELD_TYPE_REPRESENTATION = TYPE_REPRESENTATIONS[-1] | |
81 | |
82 if not USE_FIELD_TYPES: | |
83 CWL_TYPE_TO_REPRESENTATIONS = { | |
84 "Any": ["integer", "float", "file", "boolean", "text", "record", "json"], | |
85 "array": ["array"], | |
86 "string": ["text"], | |
87 "boolean": ["boolean"], | |
88 "int": ["integer"], | |
89 "float": ["float"], | |
90 "File": ["file"], | |
91 "Directory": ["directory"], | |
92 "null": ["null"], | |
93 "record": ["record"], | |
94 } | |
95 else: | |
96 CWL_TYPE_TO_REPRESENTATIONS = { | |
97 "Any": ["field"], | |
98 "array": ["array"], | |
99 "string": ["text"], | |
100 "boolean": ["boolean"], | |
101 "int": ["integer"], | |
102 "float": ["float"], | |
103 "File": ["file"], | |
104 "Directory": ["directory"], | |
105 "null": ["null"], | |
106 "record": ["record"], | |
107 "enum": ["enum"], | |
108 "double": ["double"], | |
109 } | |
110 | |
111 | |
112 def type_representation_from_name(type_representation_name): | |
113 for type_representation in TYPE_REPRESENTATIONS: | |
114 if type_representation.name == type_representation_name: | |
115 return type_representation | |
116 | |
117 assert False | |
118 | |
119 | |
120 def type_descriptions_for_field_types(field_types): | |
121 type_representation_names = set() | |
122 for field_type in field_types: | |
123 if isinstance(field_type, dict) and field_type.get("type"): | |
124 field_type = field_type.get("type") | |
125 | |
126 try: | |
127 type_representation_names_for_field_type = CWL_TYPE_TO_REPRESENTATIONS.get(field_type) | |
128 except TypeError: | |
129 raise Exception("Failed to convert field_type %s" % field_type) | |
130 if type_representation_names_for_field_type is None: | |
131 raise Exception("Failed to convert type %s" % field_type) | |
132 type_representation_names.update(type_representation_names_for_field_type) | |
133 type_representations = [] | |
134 for type_representation in TYPE_REPRESENTATIONS: | |
135 if type_representation.name in type_representation_names: | |
136 type_representations.append(type_representation) | |
137 return type_representations | |
138 | |
139 | |
140 def dataset_wrapper_to_file_json(inputs_dir, dataset_wrapper): | |
141 if dataset_wrapper.ext == "expression.json": | |
142 with open(dataset_wrapper.file_name) as f: | |
143 return json.load(f) | |
144 | |
145 if dataset_wrapper.ext == "directory": | |
146 return dataset_wrapper_to_directory_json(inputs_dir, dataset_wrapper) | |
147 | |
148 extra_files_path = dataset_wrapper.extra_files_path | |
149 secondary_files_path = os.path.join(extra_files_path, "__secondary_files__") | |
150 path = str(dataset_wrapper) | |
151 raw_file_object = {"class": "File"} | |
152 | |
153 if os.path.exists(secondary_files_path): | |
154 safe_makedirs(inputs_dir) | |
155 name = os.path.basename(path) | |
156 new_input_path = os.path.join(inputs_dir, name) | |
157 os.symlink(path, new_input_path) | |
158 secondary_files = [] | |
159 for secondary_file_name in os.listdir(secondary_files_path): | |
160 secondary_file_path = os.path.join(secondary_files_path, secondary_file_name) | |
161 target = os.path.join(inputs_dir, secondary_file_name) | |
162 log.info(f"linking [{secondary_file_path}] to [{target}]") | |
163 os.symlink(secondary_file_path, target) | |
164 is_dir = os.path.isdir(os.path.realpath(secondary_file_path)) | |
165 secondary_files.append({"class": "File" if not is_dir else "Directory", "location": target}) | |
166 | |
167 raw_file_object["secondaryFiles"] = secondary_files | |
168 path = new_input_path | |
169 | |
170 raw_file_object["location"] = path | |
171 | |
172 # Verify it isn't a NoneDataset | |
173 if dataset_wrapper.unsanitized: | |
174 raw_file_object["size"] = int(dataset_wrapper.get_size()) | |
175 | |
176 set_basename_and_derived_properties(raw_file_object, str(dataset_wrapper.created_from_basename or dataset_wrapper.name)) | |
177 return raw_file_object | |
178 | |
179 | |
180 def dataset_wrapper_to_directory_json(inputs_dir, dataset_wrapper): | |
181 assert dataset_wrapper.ext == "directory" | |
182 | |
183 # get directory name | |
184 archive_name = str(dataset_wrapper.created_from_basename or dataset_wrapper.name) | |
185 nameroot, nameext = os.path.splitext(archive_name) | |
186 directory_name = nameroot # assume archive file name contains the directory name | |
187 | |
188 # get archive location | |
189 try: | |
190 archive_location = dataset_wrapper.unsanitized.file_name | |
191 except Exception: | |
192 archive_location = None | |
193 | |
194 directory_json = {"location": dataset_wrapper.extra_files_path, | |
195 "class": "Directory", | |
196 "name": directory_name, | |
197 "archive_location": archive_location, | |
198 "archive_nameext": nameext, | |
199 "archive_nameroot": nameroot} | |
200 | |
201 return directory_json | |
202 | |
203 | |
204 def collection_wrapper_to_array(inputs_dir, wrapped_value): | |
205 rval = [] | |
206 for value in wrapped_value: | |
207 rval.append(dataset_wrapper_to_file_json(inputs_dir, value)) | |
208 return rval | |
209 | |
210 | |
211 def collection_wrapper_to_record(inputs_dir, wrapped_value): | |
212 rval = {} | |
213 for key, value in wrapped_value.items(): | |
214 rval[key] = dataset_wrapper_to_file_json(inputs_dir, value) | |
215 return rval | |
216 | |
217 | |
218 def to_cwl_job(tool, param_dict, local_working_directory): | |
219 """ tool is Galaxy's representation of the tool and param_dict is the | |
220 parameter dictionary with wrapped values. | |
221 """ | |
222 tool_proxy = tool._cwl_tool_proxy | |
223 input_fields = tool_proxy.input_fields() | |
224 inputs = tool.inputs | |
225 input_json = {} | |
226 | |
227 inputs_dir = os.path.join(local_working_directory, "_inputs") | |
228 | |
229 def simple_value(input, param_dict_value, type_representation_name=None): | |
230 type_representation = type_representation_from_name(type_representation_name) | |
231 # Hmm... cwl_type isn't really the cwl type in every case, | |
232 # like in the case of json for instance. | |
233 | |
234 if type_representation.galaxy_param_type == NO_GALAXY_INPUT: | |
235 assert param_dict_value is None | |
236 return None | |
237 | |
238 if type_representation.name == "file": | |
239 dataset_wrapper = param_dict_value | |
240 return dataset_wrapper_to_file_json(inputs_dir, dataset_wrapper) | |
241 elif type_representation.name == "directory": | |
242 dataset_wrapper = param_dict_value | |
243 return dataset_wrapper_to_directory_json(inputs_dir, dataset_wrapper) | |
244 elif type_representation.name == "integer": | |
245 return int(str(param_dict_value)) | |
246 elif type_representation.name == "long": | |
247 return int(str(param_dict_value)) | |
248 elif type_representation.name in ["float", "double"]: | |
249 return float(str(param_dict_value)) | |
250 elif type_representation.name == "boolean": | |
251 return string_as_bool(param_dict_value) | |
252 elif type_representation.name == "text": | |
253 return str(param_dict_value) | |
254 elif type_representation.name == "enum": | |
255 return str(param_dict_value) | |
256 elif type_representation.name == "json": | |
257 raw_value = param_dict_value.value | |
258 return json.loads(raw_value) | |
259 elif type_representation.name == "field": | |
260 if param_dict_value is None: | |
261 return None | |
262 if hasattr(param_dict_value, "value"): | |
263 # Is InputValueWrapper | |
264 rval = param_dict_value.value | |
265 if isinstance(rval, dict) and "src" in rval and rval["src"] == "json": | |
266 # needed for wf_step_connect_undeclared_param, so non-file defaults? | |
267 return rval["value"] | |
268 return rval | |
269 elif not param_dict_value.is_collection: | |
270 # Is DatasetFilenameWrapper | |
271 return dataset_wrapper_to_file_json(inputs_dir, param_dict_value) | |
272 else: | |
273 # Is DatasetCollectionWrapper | |
274 hdca_wrapper = param_dict_value | |
275 if hdca_wrapper.collection_type == "list": | |
276 # TODO: generalize to lists of lists and lists of non-files... | |
277 return collection_wrapper_to_array(inputs_dir, hdca_wrapper) | |
278 elif hdca_wrapper.collection_type.collection_type == "record": | |
279 return collection_wrapper_to_record(inputs_dir, hdca_wrapper) | |
280 | |
281 elif type_representation.name == "array": | |
282 # TODO: generalize to lists of lists and lists of non-files... | |
283 return collection_wrapper_to_array(inputs_dir, param_dict_value) | |
284 elif type_representation.name == "record": | |
285 return collection_wrapper_to_record(inputs_dir, param_dict_value) | |
286 else: | |
287 return str(param_dict_value) | |
288 | |
289 for input_name, input in inputs.items(): | |
290 if input.type == "repeat": | |
291 only_input = next(iter(input.inputs.values())) | |
292 array_value = [] | |
293 for instance in param_dict[input_name]: | |
294 array_value.append(simple_value(only_input, instance[input_name[:-len("_repeat")]])) | |
295 input_json[input_name[:-len("_repeat")]] = array_value | |
296 elif input.type == "conditional": | |
297 assert input_name in param_dict, f"No value for {input_name} in {param_dict}" | |
298 current_case = param_dict[input_name]["_cwl__type_"] | |
299 if str(current_case) != "null": # str because it is a wrapped... | |
300 case_index = input.get_current_case(current_case) | |
301 case_input = input.cases[case_index].inputs["_cwl__value_"] | |
302 case_value = param_dict[input_name]["_cwl__value_"] | |
303 input_json[input_name] = simple_value(case_input, case_value, current_case) | |
304 else: | |
305 matched_field = None | |
306 for field in input_fields: | |
307 if field["name"] == input_name: | |
308 matched_field = field | |
309 field_type = field_to_field_type(matched_field) | |
310 if isinstance(field_type, list): | |
311 assert USE_FIELD_TYPES | |
312 type_descriptions = [FIELD_TYPE_REPRESENTATION] | |
313 else: | |
314 type_descriptions = type_descriptions_for_field_types([field_type]) | |
315 assert len(type_descriptions) == 1 | |
316 type_description_name = type_descriptions[0].name | |
317 input_json[input_name] = simple_value(input, param_dict[input_name], type_description_name) | |
318 | |
319 log.debug("Galaxy Tool State is CWL State is %s" % input_json) | |
320 return input_json | |
321 | |
322 | |
323 def to_galaxy_parameters(tool, as_dict): | |
324 """ Tool is Galaxy's representation of the tool and as_dict is a Galaxified | |
325 representation of the input json (no paths, HDA references for instance). | |
326 """ | |
327 inputs = tool.inputs | |
328 galaxy_request = {} | |
329 | |
330 def from_simple_value(input, param_dict_value, type_representation_name=None): | |
331 if type_representation_name == "json": | |
332 return json.dumps(param_dict_value) | |
333 else: | |
334 return param_dict_value | |
335 | |
336 for input_name, input in inputs.items(): | |
337 as_dict_value = as_dict.get(input_name, NOT_PRESENT) | |
338 galaxy_input_type = input.type | |
339 | |
340 if galaxy_input_type == "repeat": | |
341 if input_name not in as_dict: | |
342 continue | |
343 | |
344 only_input = next(iter(input.inputs.values())) | |
345 for index, value in enumerate(as_dict_value): | |
346 key = f"{input_name}_repeat_0|{only_input.name}" | |
347 galaxy_value = from_simple_value(only_input, value) | |
348 galaxy_request[key] = galaxy_value | |
349 elif galaxy_input_type == "conditional": | |
350 case_strings = input.case_strings | |
351 # TODO: less crazy handling of defaults... | |
352 if (as_dict_value is NOT_PRESENT or as_dict_value is None) and "null" in case_strings: | |
353 type_representation_name = "null" | |
354 elif (as_dict_value is NOT_PRESENT or as_dict_value is None): | |
355 raise RequestParameterInvalidException( | |
356 "Cannot translate CWL datatype - value [{}] of type [{}] with case_strings [{}]. Non-null property must be set.".format( | |
357 as_dict_value, type(as_dict_value), case_strings | |
358 ) | |
359 ) | |
360 elif isinstance(as_dict_value, bool) and "boolean" in case_strings: | |
361 type_representation_name = "boolean" | |
362 elif isinstance(as_dict_value, int) and "integer" in case_strings: | |
363 type_representation_name = "integer" | |
364 elif isinstance(as_dict_value, int) and "long" in case_strings: | |
365 type_representation_name = "long" | |
366 elif isinstance(as_dict_value, (int, float)) and "float" in case_strings: | |
367 type_representation_name = "float" | |
368 elif isinstance(as_dict_value, (int, float)) and "double" in case_strings: | |
369 type_representation_name = "double" | |
370 elif isinstance(as_dict_value, str) and "string" in case_strings: | |
371 type_representation_name = "string" | |
372 elif isinstance(as_dict_value, dict) and "src" in as_dict_value and "id" in as_dict_value and "file" in case_strings: | |
373 type_representation_name = "file" | |
374 elif isinstance(as_dict_value, dict) and "src" in as_dict_value and "id" in as_dict_value and "directory" in case_strings: | |
375 # TODO: can't disambiuate with above if both are available... | |
376 type_representation_name = "directory" | |
377 elif "field" in case_strings: | |
378 type_representation_name = "field" | |
379 elif "json" in case_strings and as_dict_value is not None: | |
380 type_representation_name = "json" | |
381 else: | |
382 raise RequestParameterInvalidException( | |
383 "Cannot translate CWL datatype - value [{}] of type [{}] with case_strings [{}].".format( | |
384 as_dict_value, type(as_dict_value), case_strings | |
385 ) | |
386 ) | |
387 galaxy_request["%s|_cwl__type_" % input_name] = type_representation_name | |
388 if type_representation_name != "null": | |
389 current_case_index = input.get_current_case(type_representation_name) | |
390 current_case_inputs = input.cases[current_case_index].inputs | |
391 current_case_input = current_case_inputs["_cwl__value_"] | |
392 galaxy_value = from_simple_value(current_case_input, as_dict_value, type_representation_name) | |
393 galaxy_request["%s|_cwl__value_" % input_name] = galaxy_value | |
394 elif as_dict_value is NOT_PRESENT: | |
395 continue | |
396 else: | |
397 galaxy_value = from_simple_value(input, as_dict_value) | |
398 galaxy_request[input_name] = galaxy_value | |
399 | |
400 log.info("Converted galaxy_request is %s" % galaxy_request) | |
401 return galaxy_request | |
402 | |
403 | |
404 def field_to_field_type(field): | |
405 field_type = field["type"] | |
406 if isinstance(field_type, dict): | |
407 field_type = field_type["type"] | |
408 if isinstance(field_type, list): | |
409 field_type_length = len(field_type) | |
410 if field_type_length == 0: | |
411 raise Exception("Zero-length type list encountered, invalid CWL?") | |
412 elif len(field_type) == 1: | |
413 field_type = field_type[0] | |
414 | |
415 return field_type |