comparison planemo/lib/python3.7/site-packages/gxformat2/abstract.py @ 1:56ad4e20f292 draft

"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author guerler
date Fri, 31 Jul 2020 00:32:28 -0400
parents
children
comparison
equal deleted inserted replaced
0:d30785e31577 1:56ad4e20f292
1 """Module for exporting Galaxy workflows to CWL abstract interface."""
2 import argparse
3 import sys
4
5 from gxformat2._scripts import ensure_format2
6 from gxformat2._yaml import ordered_dump, ordered_load
7 from gxformat2.converter import steps_as_list
8 from gxformat2.normalize import ensure_implicit_step_outs, walk_id_list_or_dict
9
10 CWL_VERSION = "v1.2.0-dev5"
11
12 SCRIPT_DESCRIPTION = """
13 This script converts the an executable Galaxy workflow (in either format -
14 Format 2 or native .ga) into an abstract CWL representation.
15
16 In order to represent Galaxy tool executions in the Common Workflow Language
17 workflow language, they are serialized as v1.2+ abstract 'Operation' classes.
18 Because abstract 'Operation' classes are used, the resulting CWL workflow is
19 not executable - either in Galaxy or by CWL implementations. The resulting CWL
20 file should be thought of more as a common metadata specification describing
21 the workflow structure.
22 """
23
24
25 def from_dict(workflow_dict, subworkflow=False):
26 """Convert dictified Galaxy workflow into abstract CWL representation."""
27 # TODO: pass some sort of flag to ensure_format2 to make sure information
28 # about step outputs that may be present in native format is not lost when
29 # converting to Format2.
30 workflow_dict = ensure_format2(workflow_dict)
31 ensure_implicit_step_outs(workflow_dict)
32
33 requirements = {}
34 abstract_dict = {
35 'class': 'Workflow',
36 }
37 if not subworkflow:
38 abstract_dict["cwlVersion"] = CWL_VERSION
39 # inputs and outputs already mostly in CWL format...
40 abstract_dict["inputs"] = _format2_inputs_to_abstract(workflow_dict.get("inputs", {}))
41 abstract_dict["outputs"] = _format2_outputs_to_abstract(workflow_dict.get("outputs", {}))
42 steps = {}
43 for format2_step in steps_as_list(workflow_dict):
44 steps[format2_step["label"]] = _format2_step_to_abstract(format2_step, requirements=requirements)
45
46 abstract_dict["steps"] = steps
47 if requirements:
48 abstract_dict['requirements'] = requirements
49 return abstract_dict
50
51
52 def _format2_step_to_abstract(format2_step, requirements):
53 """Convert Format2 step CWL 1.2+ abstract operation."""
54 abstract_step = {}
55 if "run" in format2_step:
56 # probably encountered in subworkflow.
57 format2_run = format2_step["run"]
58 format2_run_class = format2_run["class"]
59 requirements["SubworkflowFeatureRequirement"] = {}
60 if format2_run_class == "GalaxyWorkflow":
61 # preprocess to ensure it has outs - should the original call be recursive?
62 ensure_implicit_step_outs(format2_run)
63 step_run = from_dict(format2_run, subworkflow=True)
64 abstract_step["run"] = step_run
65 else:
66 raise NotImplementedError("Unknown runnabled type encountered [%s]" % format2_run_class)
67 else:
68 step_run = {
69 "class": "Operation",
70 "doc": format2_step.get("doc", ""),
71 "inputs": {}, # TODO
72 "outputs": {}, # TODO
73 }
74 abstract_step["run"] = step_run
75 abstract_step["in"] = _format2_in_to_abstract(format2_step.get("in", []))
76 abstract_step["out"] = _format2_out_to_abstract(format2_step.get("out", []))
77 return abstract_step
78
79
80 def _format2_in_to_abstract(in_dict):
81 """Convert Format2 'in' dict for step into CWL abstract 'in' dict."""
82 return in_dict
83
84
85 def _format2_out_to_abstract(out):
86 """Convert Format2 'out' list for step into CWL abstract 'out' list."""
87 cwl_out = []
88 if isinstance(out, dict):
89 for out_name, out_def in out.items():
90 # discard PJA info when converting to abstract CWL
91 cwl_out.append(out_name)
92 else:
93 cwl_out = out
94 return cwl_out
95
96
97 def _format2_inputs_to_abstract(inputs):
98 """Strip Galaxy extensions or namespace them."""
99 abstract_inputs = {}
100
101 for input_name, input_def in walk_id_list_or_dict(inputs):
102 if isinstance(input_def, dict):
103 input_type = input_def.get("type")
104 else:
105 input_type = input_def
106 input_def = {"type": input_type}
107
108 if input_type == "data":
109 input_def["type"] = "File"
110
111 _format2_type_to_abstract(input_def)
112
113 # Strip off Galaxy extensions
114 input_def.pop("position", None)
115 abstract_inputs[input_name] = input_def
116
117 return abstract_inputs
118
119
120 def _format2_type_to_abstract(has_type):
121 format2_type = has_type.pop("type")
122 if format2_type == "data":
123 cwl_type = "File"
124 elif format2_type == "collection":
125 # TODO: handled nested collections, pairs, etc...
126 cwl_type = "File[]"
127 else:
128 cwl_type = format2_type
129 optional = has_type.pop("optional", False)
130 if optional:
131 cwl_type += "?"
132 has_type["type"] = cwl_type
133
134
135 def _format2_outputs_to_abstract(outputs):
136 """Strip Galaxy extensions or namespace them."""
137 for output_name, output in walk_id_list_or_dict(outputs):
138 if "type" not in output:
139 output["type"] = "File"
140 return outputs
141
142
143 def main(argv=None):
144 """Entry point for script to export abstract interface."""
145 if argv is None:
146 argv = sys.argv[1:]
147
148 args = _parser().parse_args(argv)
149
150 workflow_path = args.input_path
151 output_path = args.output_path or (workflow_path + ".abstract.cwl")
152
153 if workflow_path == "-":
154 workflow_dict = ordered_load(sys.stdin)
155 else:
156 with open(workflow_path, "r") as f:
157 workflow_dict = ordered_load(f)
158
159 abstract_dict = from_dict(workflow_dict)
160 with open(output_path, "w") as f:
161 ordered_dump(abstract_dict, f)
162
163 return 0
164
165
166 def _parser():
167 parser = argparse.ArgumentParser(description=SCRIPT_DESCRIPTION)
168 parser.add_argument('input_path', metavar='INPUT', type=str,
169 help='input workflow path (.ga/gxwf.yml)')
170 parser.add_argument('output_path', metavar='OUTPUT', type=str, nargs="?",
171 help='input workflow path (.cwl)')
172 return parser
173
174
175 if __name__ == "__main__":
176 sys.exit(main())
177
178
179 __all__ = ('main', 'from_dict')