Mercurial > repos > geco-team > gmql_queries_composer
view gmql_queries_statements.py @ 0:a80c93182db3 draft default tip
planemo upload for repository https://github.com/lu-brn/gmql-galaxy commit 953ee36ceda5814dc9baa03427bc0eb4ee2e93bd-dirty
author | geco-team |
---|---|
date | Tue, 26 Jun 2018 09:08:06 -0400 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env python # -------------------------------------------------------------------------------- # GMQL Queries Statements Classes # -------------------------------------------------------------------------------- # Luana Brancato, luana.brancato@mail.polimi.it # -------------------------------------------------------------------------------- import yaml from gmql_queries_constants import * class Statement(object): def __init__(self): self.operator = Operator self.variables = dict () self.params = dict () def save(self, syntax): var_o = self.variables.get('output') var_i = [self.variables.get('input1'),self.variables.get('input2','')] stm = syntax['STATEMENT'].format(operator=self.operator.value, out_var=var_o, in_vars=" ".join(var_i), parameters='{parameters}') return stm def write_query(self, syntax): self.syntax = yaml.load(syntax) def set_variable(self, var, var_name): self.variables[var_name] = var def set_param(self, param, param_type): self.params[param_type] = param class Materialize(Statement): def __init__(self, filename, input_ds): super(Materialize, self).__init__() self.operator = Operator.MATERIALIZE self.set_variable(filename, 'output') self.set_variable(input_ds, 'input1') def save(self, syntax): stm = syntax['MATERIALIZE'].format(variable=self.variables.get('input1'), file_name=self.variables.get('output')) return stm class Select(Statement): def __init__(self): super(Select, self).__init__() self.operator = Operator.SELECT def save(self, syntax): stm = super(Select, self).save(syntax) params_form = syntax['PARAMS'] select_params = params_form[self.operator.value] sep = params_form['type_separator'] params = [] # Format conditions over metadata predicate = self.params.get('metadata', None) if predicate: f_predicate = self.save_wff(params_form, predicate) params.append(select_params['metadata'].format(predicate=f_predicate)) # Format conditions over samples fields predicate = self.params.get('region', None) if predicate: f_predicate = self.save_wff(params_form, predicate) params.append(select_params['region'].format(predicate=f_predicate)) # Format semijoin conditions predicate = self.params.get('semijoin', None) if predicate: f_predicate = predicate.save(select_params['semijoin_predicates'], sep) params.append(select_params['semijoin'].format(predicate=f_predicate)) stm = stm.format(parameters=sep.join(params)) return stm @staticmethod def save_wff(syntax, pred): w_format = syntax['wff'] if isinstance(pred, list): if pred[-1] is Wff.AND or Wff.OR: return w_format[pred[-1].value].format(p1=Select.save_wff(syntax, pred[0]), p2=Select.save_wff(syntax, pred[1])) if pred[-1] is Wff.NOT or Wff.BLOCK: return w_format[pred[-1].value].format(p=Select.save_wff(syntax, pred[0])) else : if isinstance(pred, Predicate): return pred.save(syntax) else: return pred def set_output_var(self, var): self.set_variable(var, 'output') def set_input_var(self, var): self.set_variable(var, 'input1') def set_metadata_predicates(self, logicalPredicate): self.set_param(logicalPredicate, 'metadata') def set_region_predicates(self, logicalPredicate): self.set_param(logicalPredicate, 'region') def set_semijoin_predicates(self, sjClauses): self.set_param(sjClauses, 'semijoin') class Project(Statement): def __init__(self): super(Project, self).__init__() self.operator = Operator.PROJECT def set_output_var(self, var): self.set_variable(var, 'output') def set_input_var(self, var): self.set_variable(var, 'input1') def set_regions(self, regionsAttributes, type='keep'): self.set_param((regionsAttributes, type), 'regions') def set_metadata(self, metadataAttributes, type='keep'): self.set_param((metadataAttributes, type),'metadata') def set_new_regions(self, regionAttDef): self.set_param(regionAttDef, 'newRegions') def set_new_metadata(self, metadataAttDef): self.set_param(metadataAttDef, 'newMetadata') def save(self, syntax): stm = super(Project, self).save(syntax) params_form = syntax['PARAMS'] project_format = params_form[self.operator.value] param_sep = params_form['param_separator'] type_sep = params_form['type_separator'] params = [] # Format regions attributes to keep params_regs = self.params.get('regions', None) if params_regs: att_list = project_format['att_list'][params_regs[1]].format(att_list=params_regs[0].save('',type_sep)) regionsAtt = project_format['regions'].format(att_list=att_list) params.append(regionsAtt) # Format metadata attributes to keep params_mets = self.params.get('metadata', None) if params_mets: att_list = project_format['att_list'][params_mets[1]].format(att_list=params_mets[0].save('',type_sep)) metadataAtt = project_format['metadata'].format(att_list=att_list) params.append(metadataAtt) # Format new regions attributes definitions params_newReg = self.params.get('newRegions', None) if params_newReg : newRegions = map(lambda x: x.save(params_form),params_newReg) params.append(project_format['newRegions'].format(newAttributes=param_sep.join(newRegions))) # Format new metadata attributes definitions params_newMeta = self.params.get('newMetadata', None) if params_newMeta : newMetadata = map(lambda x: x.save(params_form),params_newMeta) params.append(project_format['newMetadata'].format(newAttributes=param_sep.join(newMetadata))) stm = stm.format(parameters=type_sep.join(params)) return stm class Extend(Statement): def __init__(self): super(Extend, self).__init__() self.operator = Operator.EXTEND def set_output_var(self, var): self.set_variable(var, 'output') def set_input_var(self, var): self.set_variable(var, 'input1') def set_new_attributes(self, newAttributes): self.set_param(newAttributes, 'newMetadata') def save(self, syntax): stm = super(Extend, self).save(syntax) params_form = syntax['PARAMS'] param_sep = params_form['param_separator'] # Get new metadata attributes definition and format them params_newMeta = self.params.get('newMetadata') newMetadata = map(lambda x: x.save(params_form),params_newMeta) stm = stm.format(parameters=param_sep.join(newMetadata)) return stm class Merge(Statement): def __init__(self): super(Merge, self).__init__() self.operator = Operator.MERGE def set_output_var(self, var): self.set_variable(var, 'output') def set_input_var(self, var): self.set_variable(var, 'input1') def set_groupy_clause(self, joinbyClause): self.set_param(joinbyClause, 'groupby') def save(self, syntax): stm = super(Merge, self).save(syntax) params_form = syntax['PARAMS'] merge_format = params_form[Operator.MERGE.value] param_sep = params_form['param_separator'] type_sep = params_form['type_separator'] params = [] # Format groupby clause (if present) gbc = self.params.get('groupby', None) if gbc: params.append(merge_format['groupby'].format(groupbyClause=gbc.save(params_form, param_sep))) stm = stm.format(parameters=type_sep.join(params)) return stm class Difference(Statement): def __init__(self): super(Difference, self).__init__() self.operator = Operator.DIFFERENCE self.exact_flag = False def set_output_var(self, var): self.set_variable(var, 'output') def set_reference_var(self, var): self.set_variable(var, 'input1') def set_negative_var(self, var): self.set_variable(var, 'input2') def set_exact(self): self.exact_flag = True def set_joinby_clause(self, joinbyClause): self.set_param(joinbyClause, 'joinby') def save(self, syntax): stm = super(Difference, self).save(syntax) params_form = syntax['PARAMS'] difference_format = params_form[Operator.DIFFERENCE.value] param_sep = params_form['param_separator'] type_sep = params_form['type_separator'] params = [] # Check if the the exact flag is set to true and in case write the option if self.exact_flag: params.append(difference_format['exact'].format(flag='true')) # Format joinby clause (if present) jbc = self.params.get('joinby', None) if jbc: params.append(difference_format['joinby'].format(joinbyClause=jbc.save(params_form, param_sep))) stm = stm.format(parameters=type_sep.join(params)) return stm class Union(Statement): def __init__(self): super(Union, self).__init__() self.operator = Operator.UNION def set_output_var(self, var): self.set_variable(var, 'output') def set_first_var(self, var): self.set_variable(var, 'input1') def set_second_var(self, var): self.set_variable(var, 'input2') def save(self, syntax): stm = super(Union, self).save(syntax) return stm.format(parameters='') class Group(Statement): def __init__(self): super(Group, self).__init__() self.operator = Operator.GROUP def set_output_var(self, var): self.set_variable(var, 'output') def set_input_var(self, var): self.set_variable(var, 'input1') def set_group_meta(self, groupbyClause): self.set_param(groupbyClause, 'meta') def set_new_metadata(self, metaAttDef): self.set_param(metaAttDef, 'newMetadata') def set_group_regions(self, attList): self.set_param(attList, 'regions') def set_new_regions(self, regionsAttDef): self.set_param(regionsAttDef, 'newRegions') def save(self, syntax): stm = super(Group, self).save(syntax) params_form = syntax['PARAMS'] group_format = params_form[Operator.GROUP.value] type_sep = params_form['type_separator'] param_sep = params_form['param_separator'] params = [] # Check if there are additional grouping options over metadata, and in case set them up # (they are joinbyClause) # Format joinby clause jbc = self.params.get('meta', None) if jbc: params.append(group_format['meta'].format(groupMeta=jbc.save(params_form,param_sep))) # Check if there are new metadata definitions and set them up # (they are AttributesGenerator objects) params_newMeta = self.params.get('newMetadata', None) if params_newMeta: newMetadata = map(lambda x: x.save(params_form), params_newMeta) params.append(group_format['newMetadata'].format(newAttributes=param_sep.join(newMetadata))) # Check if there are additional grouping options over regions attributes, and in case set them up # (they are an AttributesList) attList = self.params.get('regions', None) if attList: params.append(group_format['regions'].format(groupRegions=attList.save(params_form, param_sep))) # Check if there are new metadata definitions and set them up # (they are RegionGenerator objects) params_newRegions = self.params.get('newRegions', None) if params_newRegions: newRegions = map(lambda x: x.save(params_form), params_newRegions) params.append(group_format['newRegions'].format(newRegions=param_sep.join(newRegions))) stm = stm.format(parameters=type_sep.join(params)) return stm class Cover(Statement): def __init__(self, cover_variant): super(Cover, self).__init__() self.operator = Operator(cover_variant) def set_minAcc(self, minAcc): self.minAcc = minAcc def set_maxAcc(self, maxAcc): self.maxAcc = maxAcc def set_output_var(self, var): self.set_variable(var, 'output') def set_input_var(self, var): self.set_variable(var, 'input1') def set_new_regions(self, regionAttributes): self.set_param(regionAttributes, 'newRegions') def set_groupby_clause(self, groupbyClause): self.set_param(groupbyClause, 'groupby') def save(self, syntax): stm = super(Cover, self).save(syntax) params_form = syntax['PARAMS'] cover_format = params_form[Operator.COVER.value] type_sep = params_form['type_separator'] param_sep = params_form['param_separator'] params = [] # minAcc and maxAcc are joined and then added to the list as they are params.append(param_sep.join([self.minAcc,self.maxAcc])) # Format groupby clause jbc = self.params.get('groupby', None) if jbc: params.append(cover_format['groupby'].format(groupbyClause=jbc.save(params_form,param_sep))) # Format new region attributes definitions param_regs = self.params.get('newRegions', None) if param_regs: newRegions = map(lambda x: x.save(params_form), param_regs) params.append(cover_format['regions'].format(newRegions=param_sep.join(newRegions))) stm = stm.format(parameters=type_sep.join(params)) return stm class Map(Statement): def __init__(self): super(Map, self).__init__() self.operator = Operator.MAP self.count_attribute = '' def set_output_var(self, var): self.set_variable(var, 'output') def set_reference_var(self, var): self.set_variable(var, 'input1') def set_experiment_var(self, var): self.set_variable(var, 'input2') def set_count_attribute(self, name): self.count_attribute = name def set_new_regions(self, regionAttributes): self.set_param(regionAttributes, 'newRegions') def set_joinby_clause(self, joinbyClause): self.set_param(joinbyClause, 'joinby') def save(self, syntax): stm = super(Map, self).save(syntax) params_form = syntax['PARAMS'] map_format = params_form[self.operator.value] type_sep = params_form['type_separator'] param_sep = params_form['param_separator'] params = [] # Format new region attributes definitions param_regs = self.params.get('newRegions', None) if param_regs : newRegions = map(lambda x: x.save(params_form),param_regs) params.append(map_format['regions'].format(newRegions=param_sep.join(newRegions))) # Format user chosen name for the count attribute, if present if self.count_attribute : params.append(map_format['count'].format(count_name=self.count_attribute)) # Format joinby clause jbc = self.params.get('joinby', None) if jbc: params.append(map_format['joinby'].format(joinbyClause=jbc.save(params_form,param_sep))) stm = stm.format(parameters=type_sep.join(params)) return stm class Order(Statement): def __init__(self): super(Order, self).__init__() self.operator = Operator.ORDER def set_output_var(self, var): self.set_variable(var, 'output') def set_input_var(self, var): self.set_variable(var, 'input1') def set_ordering_attributes(self, ordAtt, type): # Type can be 'metadata' or 'region' self.set_param(ordAtt,type+'OrderingAttributes') def set_top_options(self, topts): self.set_param(topts, 'top') def save(self, syntax): stm = super(Order,self).save(syntax) params_form = syntax['PARAMS'] order_form = params_form[self.operator.value] type_sep = params_form['type_separator'] sep = params_form['param_separator'] params = [] # Format metadata attribute lists meta_att = self.params.get('metadataOrderingAttributes', None) if meta_att: params.append(order_form['metadata']['orderingAttributes'] .format(att_list=meta_att.save(order_form['att_list'],sep))) # Top options tops = self.params.get('top', None) if tops: m_tops = filter(lambda x: x[0] == 'metadata', tops) if m_tops: m_tops = map(lambda x: order_form[x[0]]['top'][x[1]].format(k=x[2]), m_tops) params.append(type_sep.join(m_tops)) # Format region attribute lists region_att = self.params.get('regionOrderingAttributes', None) if region_att: params.append(order_form['region']['orderingAttributes'] .format(att_list=region_att.save(order_form['att_list'],sep))) # Top options if tops: r_tops = filter(lambda x: x[0] == 'region', tops) if r_tops: r_tops = map(lambda x: order_form[x[0]]['top'][x[1]].format(k=x[2]), r_tops) params.append(type_sep.join(r_tops)) stm = stm.format(parameters=type_sep.join(params)) return stm class Join(Statement): def __init__(self): super(Join, self).__init__() self.operator = Operator.JOIN def set_output_var(self, var): self.set_variable(var, 'output') def set_anchor_var(self, var): self.set_variable(var, 'input1') def set_experiment_var(self, var): self.set_variable(var, 'input2') def set_output_opt(self, coord_param): self.set_param(CoordParam(coord_param), 'output_opt') def set_joinby_clause(self, joinbyClause): self.set_param(joinbyClause, 'joinby') def set_equi_conditions(self, attributesList): self.set_param(attributesList, 'equi_clause') def set_genomic_predicate(self, genomicPredicate): self.set_param(genomicPredicate, 'genomic_predicate') def save(self, syntax): stm = super(Join, self).save(syntax) params_form = syntax['PARAMS'] join_format = params_form[self.operator.value] type_sep = params_form['type_separator'] sep = params_form['param_separator'] params = [] # Format Genomic Predicate gpred = self.params.get('genomic_predicate', None) if gpred: params.append(join_format['genomic_predicate'].format(genomic_predicate=gpred.save(join_format,sep))) # Format predicate over attributes equi_predicate = self.params.get('equi_clause', None) if equi_predicate: params.append(join_format['equi_clause'].format(att_list=equi_predicate.save(params_form, sep))) # Format option over output output_cond = self.params.get('output_opt').value if output_cond: params.append(join_format['output_opt'].format(coord_param=output_cond)) # Format Joinby clause jbc = self.params.get('joinby', None) if jbc: params.append(join_format['joinby'].format(joinbyClause=jbc.save(params_form, sep))) stm = stm.format(parameters=type_sep.join(params)) return stm class Predicate(object): def __init__(self, field1, field2, condition): self.p_attribute = field1 self.p_value = field2 self.condition = condition self.value_type = 'string' def save(self, syntax): p_format = syntax['predicate'] predicate = p_format[self.condition].format(att=self.p_attribute, val=p_format['values'][self.value_type].format(p=self.p_value)) return predicate class MetaPredicate(Predicate): def __init__(self, attribute, value, condition): super(MetaPredicate, self).__init__(attribute, value, condition) class RegionPredicate(Predicate): def __init__(self, attribute, value, condition): super(RegionPredicate, self).__init__(attribute, value, condition) def set_value_type(self, type=None): """Possible values type are: coordinate, float, string, meta_attribute""" if type is None : if self.p_attribute in ['chr', 'left', 'right', 'strand'] : #The region attribute given is a region coordinate attribute self.value_type = 'coordinate' else : try: self.p_value = int(self.p_value) self.value_type = 'int' except ValueError : try: self.p_value= float(self.p_value) self.value_type = 'float' except ValueError : self.value_type = 'string' else: #The type is given. self.value_type = type class RegionGenerator(object): def __init__(self, newRegion, function, argRegion): self.newRegion = newRegion self.function = function self.argument = argRegion def save(self, syntax): f = syntax['function'].format(function=self.function.value, arg=self.argument) return syntax['new_region'].format(r=self.newRegion, function=f) class MetaAttributesGenerator(RegionGenerator): def __init__(self, newAttribute, function, argRegion): super(MetaAttributesGenerator, self).__init__(newAttribute, function, argRegion) def save(self, syntax): return super(MetaAttributesGenerator, self).save(syntax) class ProjectGenerator(RegionGenerator): def __init__(self, newRegion, function, arg): super(ProjectGenerator, self).__init__(newRegion, function, arg) def save(self, syntax): if self.function == RegFunction.MATH: f = self.argument return syntax['new_region'].format(r=self.newRegion, function=f) if self.function in ['rename','fixed'] : f = self.argument if self.function == 'fixed': f = "{f}".format(f=f) return syntax['new_region'].format(r=self.newRegion, function=f) if self.function is RegFunction.META : f = syntax['function'].format(function=self.function.value, arg=syntax['param_separator'].join(self.argument)) return syntax['new_region'].format(r=self.newRegion, function=f) else: return super(ProjectGenerator, self).save(syntax) class AttributesList(object): def __init__(self, attributes): self.attributes = attributes def save(self, syntax, sep): attr = sep.join(self.attributes) return attr class OrderingAttributes(AttributesList): def __init__(self): attributes = list() super(OrderingAttributes, self).__init__(attributes) def add_attribute(self, att, desc): self.attributes.append((att,desc)) def save(self, syntax, sep): self.attributes = map(lambda x: syntax[x[1]].format(att=x[0]), self.attributes) return super(OrderingAttributes, self).save(syntax,sep) class JoinbyClause(AttributesList): def __init__(self, attributes): super(JoinbyClause, self).__init__(attributes) def save(self, syntax, sep): attributes = map(lambda x: syntax['metajoin_condition'][x[1]].format(att_name=x[0]), self.attributes) return sep.join(attributes) class GroupbyClause(JoinbyClause): def __init__(self, attributes): super(GroupbyClause, self).__init__(attributes) def save(self, syntax, sep): return super(GroupbyClause, self).save(syntax, sep) class SemiJoinPredicate(AttributesList): def __init__(self, attributes, dataset, condition): super(SemiJoinPredicate, self).__init__(attributes) self.ds_ext = dataset self.condition = condition def save(self, syntax, sep): attributes = super(SemiJoinPredicate, self).save(syntax, sep) return syntax[self.condition].format(attributes=attributes, ds_ext=self.ds_ext) class GenomicPredicate(object): def __init__(self): self.distal_conditions = [] self.distal_stream = '' def add_distal_condition(self, condition, n): self.distal_conditions.append((DistalConditions(condition), n)) def add_distal_stream(self, direction): self.distal_stream = DistalStream(direction) def save(self, syntax, sep): dc = map(lambda x: syntax['distal_condition'].format(dc=x[0].value, n=x[1]), self.distal_conditions) if self.distal_stream: dc.append(syntax['distal_stream'].format(ds=self.distal_stream.value)) return sep.join(dc)