Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/gxformat2/abstract.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:32:28 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:d30785e31577 | 1:56ad4e20f292 |
---|---|
1 """Module for exporting Galaxy workflows to CWL abstract interface.""" | |
2 import argparse | |
3 import sys | |
4 | |
5 from gxformat2._scripts import ensure_format2 | |
6 from gxformat2._yaml import ordered_dump, ordered_load | |
7 from gxformat2.converter import steps_as_list | |
8 from gxformat2.normalize import ensure_implicit_step_outs, walk_id_list_or_dict | |
9 | |
10 CWL_VERSION = "v1.2.0-dev5" | |
11 | |
12 SCRIPT_DESCRIPTION = """ | |
13 This script converts the an executable Galaxy workflow (in either format - | |
14 Format 2 or native .ga) into an abstract CWL representation. | |
15 | |
16 In order to represent Galaxy tool executions in the Common Workflow Language | |
17 workflow language, they are serialized as v1.2+ abstract 'Operation' classes. | |
18 Because abstract 'Operation' classes are used, the resulting CWL workflow is | |
19 not executable - either in Galaxy or by CWL implementations. The resulting CWL | |
20 file should be thought of more as a common metadata specification describing | |
21 the workflow structure. | |
22 """ | |
23 | |
24 | |
25 def from_dict(workflow_dict, subworkflow=False): | |
26 """Convert dictified Galaxy workflow into abstract CWL representation.""" | |
27 # TODO: pass some sort of flag to ensure_format2 to make sure information | |
28 # about step outputs that may be present in native format is not lost when | |
29 # converting to Format2. | |
30 workflow_dict = ensure_format2(workflow_dict) | |
31 ensure_implicit_step_outs(workflow_dict) | |
32 | |
33 requirements = {} | |
34 abstract_dict = { | |
35 'class': 'Workflow', | |
36 } | |
37 if not subworkflow: | |
38 abstract_dict["cwlVersion"] = CWL_VERSION | |
39 # inputs and outputs already mostly in CWL format... | |
40 abstract_dict["inputs"] = _format2_inputs_to_abstract(workflow_dict.get("inputs", {})) | |
41 abstract_dict["outputs"] = _format2_outputs_to_abstract(workflow_dict.get("outputs", {})) | |
42 steps = {} | |
43 for format2_step in steps_as_list(workflow_dict): | |
44 steps[format2_step["label"]] = _format2_step_to_abstract(format2_step, requirements=requirements) | |
45 | |
46 abstract_dict["steps"] = steps | |
47 if requirements: | |
48 abstract_dict['requirements'] = requirements | |
49 return abstract_dict | |
50 | |
51 | |
52 def _format2_step_to_abstract(format2_step, requirements): | |
53 """Convert Format2 step CWL 1.2+ abstract operation.""" | |
54 abstract_step = {} | |
55 if "run" in format2_step: | |
56 # probably encountered in subworkflow. | |
57 format2_run = format2_step["run"] | |
58 format2_run_class = format2_run["class"] | |
59 requirements["SubworkflowFeatureRequirement"] = {} | |
60 if format2_run_class == "GalaxyWorkflow": | |
61 # preprocess to ensure it has outs - should the original call be recursive? | |
62 ensure_implicit_step_outs(format2_run) | |
63 step_run = from_dict(format2_run, subworkflow=True) | |
64 abstract_step["run"] = step_run | |
65 else: | |
66 raise NotImplementedError("Unknown runnabled type encountered [%s]" % format2_run_class) | |
67 else: | |
68 step_run = { | |
69 "class": "Operation", | |
70 "doc": format2_step.get("doc", ""), | |
71 "inputs": {}, # TODO | |
72 "outputs": {}, # TODO | |
73 } | |
74 abstract_step["run"] = step_run | |
75 abstract_step["in"] = _format2_in_to_abstract(format2_step.get("in", [])) | |
76 abstract_step["out"] = _format2_out_to_abstract(format2_step.get("out", [])) | |
77 return abstract_step | |
78 | |
79 | |
80 def _format2_in_to_abstract(in_dict): | |
81 """Convert Format2 'in' dict for step into CWL abstract 'in' dict.""" | |
82 return in_dict | |
83 | |
84 | |
85 def _format2_out_to_abstract(out): | |
86 """Convert Format2 'out' list for step into CWL abstract 'out' list.""" | |
87 cwl_out = [] | |
88 if isinstance(out, dict): | |
89 for out_name, out_def in out.items(): | |
90 # discard PJA info when converting to abstract CWL | |
91 cwl_out.append(out_name) | |
92 else: | |
93 cwl_out = out | |
94 return cwl_out | |
95 | |
96 | |
97 def _format2_inputs_to_abstract(inputs): | |
98 """Strip Galaxy extensions or namespace them.""" | |
99 abstract_inputs = {} | |
100 | |
101 for input_name, input_def in walk_id_list_or_dict(inputs): | |
102 if isinstance(input_def, dict): | |
103 input_type = input_def.get("type") | |
104 else: | |
105 input_type = input_def | |
106 input_def = {"type": input_type} | |
107 | |
108 if input_type == "data": | |
109 input_def["type"] = "File" | |
110 | |
111 _format2_type_to_abstract(input_def) | |
112 | |
113 # Strip off Galaxy extensions | |
114 input_def.pop("position", None) | |
115 abstract_inputs[input_name] = input_def | |
116 | |
117 return abstract_inputs | |
118 | |
119 | |
120 def _format2_type_to_abstract(has_type): | |
121 format2_type = has_type.pop("type") | |
122 if format2_type == "data": | |
123 cwl_type = "File" | |
124 elif format2_type == "collection": | |
125 # TODO: handled nested collections, pairs, etc... | |
126 cwl_type = "File[]" | |
127 else: | |
128 cwl_type = format2_type | |
129 optional = has_type.pop("optional", False) | |
130 if optional: | |
131 cwl_type += "?" | |
132 has_type["type"] = cwl_type | |
133 | |
134 | |
135 def _format2_outputs_to_abstract(outputs): | |
136 """Strip Galaxy extensions or namespace them.""" | |
137 for output_name, output in walk_id_list_or_dict(outputs): | |
138 if "type" not in output: | |
139 output["type"] = "File" | |
140 return outputs | |
141 | |
142 | |
143 def main(argv=None): | |
144 """Entry point for script to export abstract interface.""" | |
145 if argv is None: | |
146 argv = sys.argv[1:] | |
147 | |
148 args = _parser().parse_args(argv) | |
149 | |
150 workflow_path = args.input_path | |
151 output_path = args.output_path or (workflow_path + ".abstract.cwl") | |
152 | |
153 if workflow_path == "-": | |
154 workflow_dict = ordered_load(sys.stdin) | |
155 else: | |
156 with open(workflow_path, "r") as f: | |
157 workflow_dict = ordered_load(f) | |
158 | |
159 abstract_dict = from_dict(workflow_dict) | |
160 with open(output_path, "w") as f: | |
161 ordered_dump(abstract_dict, f) | |
162 | |
163 return 0 | |
164 | |
165 | |
166 def _parser(): | |
167 parser = argparse.ArgumentParser(description=SCRIPT_DESCRIPTION) | |
168 parser.add_argument('input_path', metavar='INPUT', type=str, | |
169 help='input workflow path (.ga/gxwf.yml)') | |
170 parser.add_argument('output_path', metavar='OUTPUT', type=str, nargs="?", | |
171 help='input workflow path (.cwl)') | |
172 return parser | |
173 | |
174 | |
175 if __name__ == "__main__": | |
176 sys.exit(main()) | |
177 | |
178 | |
179 __all__ = ('main', 'from_dict') |