comparison gmql_queries_composer.py @ 0:a80c93182db3 draft default tip

planemo upload for repository https://github.com/lu-brn/gmql-galaxy commit 953ee36ceda5814dc9baa03427bc0eb4ee2e93bd-dirty
author geco-team
date Tue, 26 Jun 2018 09:08:06 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:a80c93182db3
1 #!/usr/bin/env python
2 # --------------------------------------------------------------------------------
3 # GMQL Queries Compositor
4 # --------------------------------------------------------------------------------
5 # Luana Brancato, luana.brancato@mail.polimi.it
6 # --------------------------------------------------------------------------------
7
8 import os, sys, argparse, json
9 from itertools import chain
10 from gmql_queries_statements import *
11 from gmql_rest_queries import compile_query, run_query, check_input
12 from gmql_rest_datasets import list_datasets
13 from gmql_queries_constants import *
14
15 def read_query(query_data):
16
17 # Create new Query object and read JSON file
18 query = dict ()
19
20 with open(query_data, 'r') as f_in :
21 qd = json.loads(f_in.read())
22
23 query.update(name=qd['query_name'])
24
25 # A list of statements objects is created from the list of operations and added to the query
26 statements = map(lambda x: read_statement(x['operation']), qd['operations'])
27
28 # Check if the user asked for materialize the final result and in case add a materialize statement
29 # for the last variable defined.
30
31 if qd['materialize']['materialize_result'] :
32 var = statements[-1][0].variables['output']
33 mat_stm = Materialize(qd['materialize']['file_name'],var)
34 statements.append((mat_stm,))
35
36 # Also save info about the desired output format (if available)
37 out_format = qd['materialize']['choose_op'].get('out_format',None)
38 if out_format:
39 query.update(out_format=out_format)
40
41 #Check if the user wants to import results into Galaxy already
42 importFlag = qd['materialize']['choose_op'].get('import', None)
43 if importFlag is not None:
44 query.update(importFlag=importFlag)
45
46 # Add statements list to query, flattening list elements if needed (in case there's some intermediate
47 # materialize)
48
49 query.update(statements=[x for x in chain.from_iterable(statements)])
50
51
52 return query
53
54
55 def read_statement(x):
56
57 op = x['operator']
58
59 if op == 'SELECT' :
60 stm = create_select(x)
61 if op == 'MAP' :
62 stm = create_map(x)
63 if op == 'ORDER' :
64 stm = create_order(x)
65 if op == 'JOIN' :
66 stm = create_join(x)
67 if op == 'PROJECT':
68 stm = create_project(x)
69 if op == 'COVER' :
70 stm = create_cover(x)
71 if op == 'EXTEND' :
72 stm = create_extend(x)
73 if op == 'GROUP' :
74 stm = create_group(x)
75 if op == 'MERGE' :
76 stm = create_merge(x)
77 if op == 'UNION' :
78 stm = create_union(x)
79 if op == 'DIFFERENCE' :
80 stm = create_difference(x)
81
82
83 # If the user asked to materialize the current statement, add a MATERIALIZE statement; otherwise return
84 # only the current statement
85
86 if x['m_stm']['materialize_stm'] :
87 mat_stm = Materialize(x['m_stm']['file_name'],stm.variables['output'])
88 return (stm, mat_stm)
89 else:
90 return (stm,)
91
92 def create_project(x):
93 stm = Project()
94
95 # Set output and input variables
96 stm.set_output_var(x['output_var'])
97 stm.set_input_var(x['input_var'])
98
99 # Check if there are info about region fields to keep and set them up
100
101 reg_att = x['region_att']['allbut']
102 if reg_att['allbut_flag'] == 'keep' :
103
104 r_fields = reg_att.get('list_keep', None)
105 # If the list exists and it is not empty
106 if r_fields:
107 r_fields = map(lambda x: x.get('attribute'), r_fields)
108 stm.set_regions(AttributesList(r_fields))
109 else:
110 r_fields = reg_att.get('list_exclude', None)
111 if r_fields:
112 r_fields = map(lambda x: x.get('attribute'), r_fields)
113 stm.set_regions(AttributesList(r_fields), type='exclude')
114
115 # Similarly for metadata attributes to keep
116
117 meta_att = x['meta_att']['allbut']
118 if meta_att['allbut_flag'] == 'keep' :
119 m_atts = meta_att.get('list_keep', None)
120 if m_atts:
121 m_atts = map(lambda x: x.get('attribute'), m_atts)
122 stm.set_metadata(AttributesList(m_atts))
123 else:
124 m_atts = meta_att.get('list_exclude', None)
125 if m_atts:
126 m_atts = map(lambda x: x.get('attribute'), m_atts)
127 stm.set_metadata(AttributesList(m_atts), type='exclude')
128
129 # Look if there are new region fields definition and set them up
130
131 pnr = x.get('project_new_regions').get('new_region_att', None)
132 if pnr:
133 pnr = map(lambda x: (x.get('new_name'), x.get('gen_function')), pnr)
134 f_defs = map(lambda x: _project_get_new(x), pnr)
135
136 stm.set_new_regions(f_defs)
137
138 # Look for new metadata attributes definitions
139
140 pnm = x.get('project_new_meta').get('new_meta_att', None)
141 if pnm:
142 pnm = map(lambda x: (x.get('new_name'), x.get('gen_function')), pnm)
143 f_defs = map(lambda x: _project_get_new(x), pnm)
144
145 stm.set_new_metadata(f_defs)
146
147
148 return stm
149
150 def _project_get_new(nr):
151
152 gen_type = nr[1].get('gen_type')
153 new_name = nr[0]
154
155 fg = ''
156
157 if gen_type in ['aggregate', 'SQRT', 'NULL']:
158 fg = ProjectGenerator(new_name, RegFunction(nr[1].get('function')), nr[1].get('arg'))
159
160 if gen_type in ['arithmetic']:
161 fg = ProjectGenerator(new_name, RegFunction.MATH, nr[1].get('expression'))
162
163 if gen_type in ['rename', 'fixed']:
164 fg = ProjectGenerator(new_name, gen_type, nr[1].get('arg'))
165
166 if gen_type in ['META']:
167 fg = ProjectGenerator(new_name, RegFunction.META, (nr[1].get('arg'), nr[1].get('att_type')))
168
169 return fg
170
171 def create_group(x):
172 stm = Group()
173
174 # Set output and input variables
175 stm.set_output_var(x['output_var'])
176 stm.set_input_var(x['input_var'])
177
178 # If group_type is set to default, we're sure there are no additional conditions and we can return already
179 # (GROUP will work by default conditions)
180
181 add_grouping = x['add_grouping']
182
183 if add_grouping['group_type'] == 'default' :
184 return stm
185
186 # Check if there are additional metadata grouping attributes and set them up, along eventual
187 # definition of new attributes
188
189 metadata = add_grouping.get('metadata', None)
190 if metadata:
191 #group_atts = filter(lambda x: x['j_att'], metadata['group_meta_atts'])
192 group_atts = map(lambda x: (x['j_att'], x['metajoin_match']), metadata['group_meta_atts'])
193
194 jc = GroupbyClause(group_atts)
195 stm.set_group_meta(jc)
196
197 # Check if there are new metadata definitions and set them up
198 add_flag = metadata.get('meta_agg').get('meta_agg_flag')
199 if add_flag:
200 nm_data = metadata.get('meta_agg', None)
201 if nm_data:
202 new_atts = map(lambda x: MetaAttributesGenerator(newAttribute=x['new_name'],
203 function=RegFunction(x['function']),
204 argRegion=x['argument']), nm_data['new_attributes'])
205 if new_atts.__len__() > 0:
206 stm.set_new_metadata(new_atts)
207
208 # Check if there are additional region grouping attributes and set them up
209 # Note that it may happen that the list is empty
210
211 regions = add_grouping.get('regions', None)
212 if regions:
213 r_group_atts = filter(lambda x: x['attribute'], regions['group_regions_atts'])
214 r_group_atts = map(lambda x: x['attribute'], r_group_atts)
215
216 if r_group_atts.__len__() > 0:
217 attList = AttributesList(r_group_atts)
218 stm.set_group_regions(attList)
219
220 nr_data = regions.get('new_attributes', None)
221 if nr_data:
222 r_new_atts = filter(lambda x: x['new_name'] and (x['function'] != 'None') and x['argument'], nr_data)
223 r_new_atts = map(lambda x: RegionGenerator(newRegion=x['new_name'],
224 function=RegFunction(x['function']),
225 argRegion=x['argument']), r_new_atts)
226 if r_new_atts.__len__() > 0:
227 stm.set_new_regions(r_new_atts)
228
229 return stm
230
231 def create_merge(x):
232 stm = Merge()
233
234 # Set output and input variables
235 stm.set_output_var(x['output_var'])
236 stm.set_input_var(x['input_var'])
237
238 # Check if there are additional grouping options and set them up
239
240 group_atts = x['groupby']['group_meta_atts']
241 if group_atts.__len__() > 0:
242 group_atts = map(lambda x: (x['j_att'], x['metajoin_match']), group_atts)
243 gc = GroupbyClause(group_atts)
244 stm.set_groupy_clause(gc)
245
246 return stm
247
248 def create_union(x):
249 stm = Union()
250
251 # Set output and input variables
252 stm.set_output_var(x['output_var'])
253 stm.set_first_var(x['input_var_first'])
254 stm.set_second_var(x['input_var_second'])
255
256 return stm
257
258 def create_difference(x):
259 stm = Difference()
260
261 # Set output and input variables
262 stm.set_output_var(x['output_var'])
263 stm.set_reference_var(x['input_var_reference'])
264 stm.set_negative_var(x['input_var_negative'])
265
266 # Check if the exact flag is set
267 if x['exact_flag'] is True :
268 stm.set_exact()
269
270 # Check if there are joinby attributes and set them up
271
272 joinby_atts = x['joinby']['group_meta_atts']
273 if joinby_atts.__len__() > 0:
274 joinby_atts = map(lambda x: (x['j_att'], x['metajoin_match']), joinby_atts)
275 jc = JoinbyClause(joinby_atts)
276 stm.set_joinby_clause(jc)
277
278 return stm
279
280 def create_extend(x):
281 stm = Extend()
282
283 # Set output and input variables
284 stm.set_output_var(x['output_var'])
285 stm.set_input_var(x['input_var'])
286
287 # Look for new metadata attributes definitions
288
289 data = x['new_metadata_attributes']['new_attributes']
290
291 new_atts = map(lambda x: MetaAttributesGenerator(newAttribute=x['new_name'],
292 function=RegFunction(x['function']),
293 argRegion=x['argument']), data)
294
295 stm.set_new_attributes(new_atts)
296
297 return stm
298
299
300 def create_join(x) :
301 stm = Join()
302
303 # Set output and input variables
304 stm.set_output_var(x['output_var'])
305 stm.set_anchor_var(x['input_var_anchor'])
306 stm.set_experiment_var(x['input_var_experiment'])
307
308 # Look for conditions over regions distances and attributes values.
309
310 conds = x['conditions_section']['conditions']
311
312 if conds['c_type'] == 'distance' :
313 pred = _genomic_predicate(conds.get('distance_conditions'))
314 stm.set_genomic_predicate(pred)
315 if conds['c_type'] == 'attributes' :
316 pred = _equi_conditions(conds.get('region_attributes'))
317 stm.set_equi_conditions(pred)
318 if conds['c_type'] == 'both':
319 pred1 = _genomic_predicate(conds.get('distance_conditions'))
320 pred2 = _equi_conditions(conds.get('region_attributes'))
321 stm.set_genomic_predicate(pred1)
322 stm.set_equi_conditions(pred2)
323
324 # Set the output preference
325 stm.set_output_opt(conds.get('output_opt'))
326
327
328 # Check if there are joinby conditions and set them up
329
330 join_data = x['joinby']['joinby_clause']
331 join_data = filter(lambda x: x['j_att'], join_data)
332 join_data = map(lambda x: (x['j_att'], x['metajoin_match']), join_data)
333
334 if join_data.__len__() > 0:
335 jc = JoinbyClause(join_data)
336 stm.set_joinby_clause(jc)
337
338
339 return stm
340
341
342 def _genomic_predicate(pred):
343
344 gp = GenomicPredicate()
345
346 # Loop over the distal predicates and distinguish between distal conditions and stream directions.
347 for x in pred:
348 x = x.get('type_dc')
349 if x.get('type_dc_value') == 'dist' :
350 gp.add_distal_condition(x.get('dc'), x.get('n'))
351 else:
352 gp.add_distal_stream(x.get('ds'))
353
354 return gp
355
356
357 def _equi_conditions(pred):
358
359 atts = map(lambda x: x.get('attribute'), pred)
360 ec = AttributesList(atts)
361
362 return ec
363
364
365 def create_order(x):
366 stm = Order()
367
368 # Set output and input variables
369 stm.set_output_var(x['output_var'])
370 stm.set_input_var(x['input_var_ordering_ds'])
371
372 # Collects ordering attributes and set them up, according also to their type (metadata or region)
373
374 # Divide metadata attributes from region ones
375 atts = x['ordering_attributes']['attributes']
376
377 meta_att = filter(lambda att: att['att_type'] == 'metadata', atts)
378 region_att = filter(lambda att: att['att_type'] == 'region', atts)
379
380 # Collect attributes info from the two lists and add them to the ORDER parameters
381
382 if meta_att:
383 o_att_meta = OrderingAttributes()
384 map(lambda att: o_att_meta.add_attribute(att['attribute_name'],att['order_type']), meta_att)
385 stm.set_ordering_attributes(o_att_meta, 'metadata')
386
387 if region_att:
388 o_att_region = OrderingAttributes()
389 map(lambda att: o_att_region.add_attribute(att['attribute_name'], att['order_type']), region_att)
390 stm.set_ordering_attributes(o_att_region, 'region')
391
392 # Check if there are constraints over the number of samples to extract and set them up
393
394 top_opts = x['top_options']['to']
395
396 if top_opts:
397 topts = list()
398 for to in top_opts:
399 topts.append((to['type'],to['opt']['k_type'],to['opt']['k']))
400 stm.set_top_options(topts)
401
402 return stm
403
404
405 def create_map(x):
406 stm = Map()
407
408 # Set output and input variables
409 stm.set_output_var(x['output_var'])
410 stm.set_reference_var(x['input_var_reference'])
411 stm.set_experiment_var(x['input_var_experiment'])
412
413 # Check if the user has given an alternative name to the default one for the counting result
414
415 if x['count_result']:
416 stm.set_count_attribute(x['count_result'])
417
418 # Check if there are additional region attributes definition and set them up
419
420 nr_data = x['new_regions_attributes']['new_regions']
421
422 new_regions = filter(lambda x: x['new_name'] and (x['function'] != 'None') and x['argument'], nr_data)
423 new_regions = map(lambda x: RegionGenerator(newRegion=x['new_name'],
424 function=RegFunction(x['function']),
425 argRegion=x['argument']), new_regions)
426
427 if new_regions.__len__() > 0 :
428 stm.set_new_regions(new_regions)
429
430 # Check if there are joinby conditions and set them up
431
432 join_data = x['joinby']['joinby_clause']
433 join_data = filter(lambda x: x['j_att'], join_data)
434 join_data = map(lambda x: (x['j_att'], x['metajoin_match']), join_data)
435
436 if join_data.__len__() > 0:
437 jc = JoinbyClause(join_data)
438 stm.set_joinby_clause(jc)
439
440 return stm
441
442 def create_cover(x):
443 stm = Cover(x['cover_variant'])
444
445 #Set output and input variables
446 stm.set_output_var(x['output_var'])
447 stm.set_input_var(x['input_var'])
448
449 # Read minAcc value
450 min_data = x['minAcc']
451 minAcc = _read_acc_values(min_data, min_data['min_type'])
452 stm.set_minAcc(minAcc)
453
454 # Read maxAcc value
455 max_data = x['maxAcc']
456 maxAcc = _read_acc_values(max_data, max_data['max_type'])
457 stm.set_maxAcc(maxAcc)
458
459 # Check if there are additional region attributes definition and set them up
460
461 nr_data = x['new_regions_attributes']['new_regions']
462
463 new_regions = filter(lambda x: x['new_name'] and (x['function'] != 'None') and x['argument'], nr_data)
464 new_regions = map(lambda x: RegionGenerator(newRegion=x['new_name'],
465 function=RegFunction(x['function']),
466 argRegion=x['argument']), new_regions)
467
468 if new_regions.__len__() > 0:
469 stm.set_new_regions(new_regions)
470
471 # Check if there are groupby conditions and set them up
472
473 group_data = x['groupby']['groupby_clause']
474 group_data = filter(lambda x: x['j_att'], group_data)
475 group_data = map(lambda x: (x['j_att'], x['metajoin_match']), group_data)
476
477 if group_data.__len__() > 0:
478 jc = GroupbyClause(group_data)
479 stm.set_groupby_clause(jc)
480
481 return stm
482
483 def _read_acc_values(data, value):
484
485 if value in ['ANY', 'ALL']:
486 return value
487 if value == 'value':
488 return str(data['value'])
489 if value == 'ALL_n':
490 return 'ALL / {n}'.format(n=data['n'])
491 if value == 'ALL_n_k':
492 return '(ALL + {k}) / {n}'.format(n=data['n'],k=data['k'])
493
494
495 def create_select(x) :
496
497 stm = Select()
498
499 # Set output and input variables
500 stm.set_output_var(x['output_var'])
501
502 input_data = x['input']
503
504 if x['input']['input_type'] == 'i_ds' :
505 input_var = input_data['input_ds']
506 stm.set_input_var(input_var)
507 if x['input']['input_type'] == 'i_var':
508 input_var = input_data['input_var']
509 stm.set_input_var(input_var)
510
511 # Check if there's metadata predicates and set them up
512 # They can be given as built step by step or directly as a text line.
513 # Check the type and parse the appropriate data
514
515 mp_data = input_data['metadata_predicates']['conditions']
516 if mp_data['ad_flag'] == 'steps' :
517
518 if mp_data['condition'] != 'None':
519 meta_pred = _metadata_predicate(mp_data)
520
521 # If there are further blocks
522 for ma in mp_data['add_meta_blocks']:
523 if meta_pred.__len__() > 1 :
524 meta_pred = [meta_pred, Wff.BLOCK]
525 mp = _metadata_predicate(ma)
526
527 if ma['block_logCon']['negate']:
528 mp = [mp, Wff.NOT]
529
530 meta_pred = [meta_pred, mp, Wff(ma['block_logCon']['logCon'])]
531
532 stm.set_param(meta_pred, 'metadata')
533 else :
534 meta_pred = check_input(mp_data['conditions_string'])
535 stm.set_param(meta_pred, 'metadata')
536
537 # Similar applies with Region Predicates (if they are present)
538 rp_data = input_data['region_predicates']['conditions']
539 if rp_data['ad_flag'] == 'steps' :
540
541 if rp_data['condition'] != 'None':
542 reg_pred = _region_predicate(rp_data)
543
544 # If there are further blocks
545 for ra in rp_data['add_region_blocks']:
546 if reg_pred.__len__() > 1:
547 reg_pred = [reg_pred, Wff.BLOCK]
548 rp = _region_predicate(ra)
549
550 if ra['block_logCon']['negate']:
551 rp = [rp, Wff.NOT]
552
553 reg_pred = [reg_pred, rp, Wff(ra['block_logCon']['logCon'])]
554
555
556 stm.set_param(reg_pred, 'region')
557 else:
558 reg_pred = check_input(rp_data['conditions_string'])
559 stm.set_param(reg_pred, 'region')
560
561
562
563 # Check if there is a semijoin predicate. If it does, collect the attributes and the external ds to confront with.
564
565 sj_data = input_data['semijoin_predicate']
566
567 if sj_data['sj_attributes'] :
568 sj_attr = map(lambda x: x['sj_att'], sj_data['sj_attributes'])
569 sj = SemiJoinPredicate(sj_attr,sj_data['ds_ext'],sj_data['condition'])
570
571 stm.set_param(sj, 'semijoin')
572
573 return stm
574
575 def _metadata_predicate(mp_data):
576 # Metadata predicates are well formed logical formulas. Create a new one and add the first
577 # predicate. Negate it if it's the case.
578
579 mp = MetaPredicate(mp_data['attribute'], mp_data['value'], mp_data['condition'])
580 if mp_data['negate']:
581 mp = [mp, Wff.NOT]
582
583 # Check if there are further predicates
584 for pa in mp_data['pm_additional']:
585
586 mp1 = MetaPredicate(pa['attribute'], pa['value'], pa['condition'])
587 if pa['negate']:
588 mp1 = [mp1, Wff.NOT]
589
590 if pa['logCon'] == 'AND':
591 mp = [mp, mp1, Wff.AND]
592 if pa['logCon'] == 'OR':
593 mp = [mp, mp1, Wff.OR]
594
595 return mp
596
597 def _region_predicate(rp_data):
598
599 rp_s = RegionPredicate(rp_data['attribute'], rp_data['value'], rp_data['condition'])
600 if rp_data['is_meta_value']:
601 rp_s.set_value_type('meta')
602 else:
603 rp_s.set_value_type()
604 rp = rp_s
605 if rp_data['negate']:
606 rp = [rp, Wff.NOT]
607
608 # Check if there are further predicates
609 for pa in rp_data['pr_additional']:
610 rp1_s = RegionPredicate(pa['attribute'], pa['value'], pa['condition'])
611 if pa['is_meta_value']:
612 rp1_s.set_value_type('meta')
613 else:
614 rp1_s.set_value_type()
615 #rp1 = WellFormedFormula(rp1_s)
616 rp1 = rp1_s
617
618 if pa['negate']:
619 rp1 = [rp1, Wff.NOT]
620
621 if pa['logCon'] == 'AND':
622 rp = [rp, rp1, Wff.AND]
623 if pa['logCon'] == 'OR':
624 rp = [rp, rp1, Wff.OR]
625
626 return rp
627
628 def save(query, output, query_source):
629
630 # Set the config files where to look for the actual syntax to use
631 y_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'gmql_syntax.yaml')
632
633 with open(y_path, 'r') as yamlf:
634 syntax = yaml.load(yamlf)
635
636 # If I am continuing a local query, first copy the older statements
637 if query_source:
638 with open(output, 'w') as f_out:
639 with open(query_source, 'r') as f_in:
640 f_out.writelines(f_in.readlines())
641
642
643 with open(output, 'a') as f_out:
644
645 for s in query['statements'] :
646 f_out.write('{stm}\n'.format(stm=s.save(syntax)))
647
648
649 def compile(user, query_name, query_file, log):
650 # Call the service in gmql_rest_queries to send the query to the GMQL server to compile.
651
652 compile_query(user, query_name, query_file, log)
653
654
655 def run(user, query_name, query, log, out_format, importFlag, updated_ds_list):
656 # Call the service in gmql_rest_queries to send the query to the GMQL server to be executed.
657
658 run_query(user, query_name, query, log, out_format, importFlag)
659
660 #Save updated list of datasets
661 list_datasets(user, updated_ds_list)
662
663
664 def stop_err(msg):
665 sys.stderr.write("%s\n" % msg)
666
667 def __main__():
668
669 parser = argparse.ArgumentParser()
670 parser.add_argument("-user")
671 parser.add_argument("-cmd")
672 parser.add_argument("-query_params")
673 parser.add_argument("-query_output")
674 parser.add_argument("-query_source")
675 parser.add_argument("-query_log")
676 parser.add_argument("-updated_ds_list")
677
678 args = parser.parse_args()
679
680 query = read_query(args.query_params)
681 save(query, args.query_output, args.query_source)
682
683 if(args.cmd == 'compile'):
684 compile(args.user, query['name'], args.query_output, args.query_log)
685
686 if(args.cmd == 'run'):
687 run(args.user, query['name'], args.query_output, args.query_log, query['out_format'], query['importFlag'], args.updated_ds_list)
688
689
690 if __name__ == "__main__":
691 __main__()