view gmql_queries_statements.py @ 0:a80c93182db3 draft default tip

planemo upload for repository https://github.com/lu-brn/gmql-galaxy commit 953ee36ceda5814dc9baa03427bc0eb4ee2e93bd-dirty
author geco-team
date Tue, 26 Jun 2018 09:08:06 -0400
parents
children
line wrap: on
line source

#!/usr/bin/env python
# --------------------------------------------------------------------------------
# GMQL Queries Statements Classes
# --------------------------------------------------------------------------------
# Luana Brancato, luana.brancato@mail.polimi.it
# --------------------------------------------------------------------------------

import yaml
from gmql_queries_constants import * 

class Statement(object):

    def __init__(self):

        self.operator = Operator
        self.variables = dict ()
        self.params = dict ()

    def save(self, syntax):

        var_o = self.variables.get('output')
        var_i = [self.variables.get('input1'),self.variables.get('input2','')]

        stm = syntax['STATEMENT'].format(operator=self.operator.value,
                                         out_var=var_o,
                                         in_vars=" ".join(var_i),
                                         parameters='{parameters}')

        return stm

    def write_query(self, syntax):
        self.syntax = yaml.load(syntax)

    def set_variable(self, var, var_name):
        self.variables[var_name] = var

    def set_param(self, param, param_type):
        self.params[param_type] = param

class Materialize(Statement):

    def __init__(self, filename, input_ds):
        super(Materialize, self).__init__()
        self.operator = Operator.MATERIALIZE
        self.set_variable(filename, 'output')
        self.set_variable(input_ds, 'input1')

    def save(self, syntax):

        stm = syntax['MATERIALIZE'].format(variable=self.variables.get('input1'),
                                           file_name=self.variables.get('output'))

        return stm


class Select(Statement):

    def __init__(self):
        super(Select, self).__init__()
        self.operator = Operator.SELECT

    def save(self, syntax):
        stm = super(Select, self).save(syntax)
        params_form = syntax['PARAMS']
        select_params = params_form[self.operator.value]
        sep = params_form['type_separator']

        params = []

        # Format conditions over metadata
        predicate = self.params.get('metadata', None)

        if predicate:
            f_predicate = self.save_wff(params_form, predicate)
            params.append(select_params['metadata'].format(predicate=f_predicate))

        # Format conditions over samples fields
        predicate = self.params.get('region', None)

        if predicate:
            f_predicate = self.save_wff(params_form, predicate)
            params.append(select_params['region'].format(predicate=f_predicate))

        # Format semijoin conditions
        predicate = self.params.get('semijoin', None)

        if predicate:
            f_predicate = predicate.save(select_params['semijoin_predicates'], sep)
            params.append(select_params['semijoin'].format(predicate=f_predicate))

        stm = stm.format(parameters=sep.join(params))

        return stm

    @staticmethod
    def save_wff(syntax, pred):
        w_format = syntax['wff']

        if isinstance(pred, list):
            if pred[-1] is Wff.AND or Wff.OR:
                return w_format[pred[-1].value].format(p1=Select.save_wff(syntax, pred[0]), p2=Select.save_wff(syntax, pred[1]))
            if pred[-1] is Wff.NOT or Wff.BLOCK:
                return w_format[pred[-1].value].format(p=Select.save_wff(syntax, pred[0]))
        else :
            if isinstance(pred, Predicate):
                return pred.save(syntax)
            else:
                return pred

    def set_output_var(self, var):
        self.set_variable(var, 'output')

    def set_input_var(self, var):
        self.set_variable(var, 'input1')

    def set_metadata_predicates(self, logicalPredicate):
        self.set_param(logicalPredicate, 'metadata')

    def set_region_predicates(self, logicalPredicate):
        self.set_param(logicalPredicate, 'region')

    def set_semijoin_predicates(self, sjClauses):
        self.set_param(sjClauses, 'semijoin')


class Project(Statement):
    def __init__(self):
        super(Project, self).__init__()
        self.operator = Operator.PROJECT

    def set_output_var(self, var):
        self.set_variable(var, 'output')

    def set_input_var(self, var):
        self.set_variable(var, 'input1')

    def set_regions(self, regionsAttributes, type='keep'):
        self.set_param((regionsAttributes, type), 'regions')

    def set_metadata(self, metadataAttributes, type='keep'):
        self.set_param((metadataAttributes, type),'metadata')

    def set_new_regions(self, regionAttDef):
        self.set_param(regionAttDef, 'newRegions')

    def set_new_metadata(self, metadataAttDef):
        self.set_param(metadataAttDef, 'newMetadata')

    def save(self, syntax):
        stm = super(Project, self).save(syntax)

        params_form = syntax['PARAMS']
        project_format = params_form[self.operator.value]
        param_sep = params_form['param_separator']
        type_sep = params_form['type_separator']

        params = []

        # Format regions attributes to keep
        params_regs = self.params.get('regions', None)

        if params_regs:
            att_list = project_format['att_list'][params_regs[1]].format(att_list=params_regs[0].save('',type_sep))
            regionsAtt = project_format['regions'].format(att_list=att_list)
            params.append(regionsAtt)

        # Format metadata attributes to keep
        params_mets = self.params.get('metadata', None)

        if params_mets:
            att_list = project_format['att_list'][params_mets[1]].format(att_list=params_mets[0].save('',type_sep))
            metadataAtt = project_format['metadata'].format(att_list=att_list)
            params.append(metadataAtt)

        # Format new regions attributes definitions
        params_newReg = self.params.get('newRegions', None)

        if params_newReg :
            newRegions = map(lambda x: x.save(params_form),params_newReg)
            params.append(project_format['newRegions'].format(newAttributes=param_sep.join(newRegions)))

        # Format new metadata attributes definitions
        params_newMeta = self.params.get('newMetadata', None)

        if params_newMeta :
            newMetadata = map(lambda x: x.save(params_form),params_newMeta)
            params.append(project_format['newMetadata'].format(newAttributes=param_sep.join(newMetadata)))

        stm = stm.format(parameters=type_sep.join(params))

        return stm

class Extend(Statement):
    def __init__(self):
        super(Extend, self).__init__()
        self.operator = Operator.EXTEND

    def set_output_var(self, var):
        self.set_variable(var, 'output')

    def set_input_var(self, var):
        self.set_variable(var, 'input1')

    def set_new_attributes(self, newAttributes):
        self.set_param(newAttributes, 'newMetadata')

    def save(self, syntax):
        stm = super(Extend, self).save(syntax)

        params_form = syntax['PARAMS']
        param_sep = params_form['param_separator']

        # Get new metadata attributes definition and format them

        params_newMeta = self.params.get('newMetadata')
        newMetadata = map(lambda x: x.save(params_form),params_newMeta)

        stm = stm.format(parameters=param_sep.join(newMetadata))

        return stm

class Merge(Statement):
    def __init__(self):
        super(Merge, self).__init__()
        self.operator = Operator.MERGE

    def set_output_var(self, var):
        self.set_variable(var, 'output')

    def set_input_var(self, var):
        self.set_variable(var, 'input1')

    def set_groupy_clause(self, joinbyClause):
        self.set_param(joinbyClause, 'groupby')

    def save(self, syntax):
        stm = super(Merge, self).save(syntax)

        params_form = syntax['PARAMS']
        merge_format = params_form[Operator.MERGE.value]
        param_sep = params_form['param_separator']
        type_sep = params_form['type_separator']

        params = []

        # Format groupby clause (if present)
        gbc = self.params.get('groupby', None)

        if gbc:
            params.append(merge_format['groupby'].format(groupbyClause=gbc.save(params_form, param_sep)))

        stm = stm.format(parameters=type_sep.join(params))

        return stm

class Difference(Statement):
    def __init__(self):
        super(Difference, self).__init__()
        self.operator = Operator.DIFFERENCE
        self.exact_flag = False

    def set_output_var(self, var):
        self.set_variable(var, 'output')

    def set_reference_var(self, var):
        self.set_variable(var, 'input1')

    def set_negative_var(self, var):
        self.set_variable(var, 'input2')

    def set_exact(self):
        self.exact_flag = True

    def set_joinby_clause(self, joinbyClause):
        self.set_param(joinbyClause, 'joinby')

    def save(self, syntax):
        stm = super(Difference, self).save(syntax)

        params_form = syntax['PARAMS']
        difference_format = params_form[Operator.DIFFERENCE.value]
        param_sep = params_form['param_separator']
        type_sep = params_form['type_separator']

        params = []

        # Check if the the exact flag is set to true and in case write the option
        if self.exact_flag:
            params.append(difference_format['exact'].format(flag='true'))

        # Format joinby clause (if present)
        jbc = self.params.get('joinby', None)

        if jbc:
            params.append(difference_format['joinby'].format(joinbyClause=jbc.save(params_form, param_sep)))

        stm = stm.format(parameters=type_sep.join(params))

        return stm


class Union(Statement):
    def __init__(self):
        super(Union, self).__init__()
        self.operator = Operator.UNION

    def set_output_var(self, var):
        self.set_variable(var, 'output')

    def set_first_var(self, var):
        self.set_variable(var, 'input1')

    def set_second_var(self, var):
        self.set_variable(var, 'input2')

    def save(self, syntax):
        stm = super(Union, self).save(syntax)
        return stm.format(parameters='')


class Group(Statement):
    def __init__(self):
        super(Group, self).__init__()
        self.operator = Operator.GROUP

    def set_output_var(self, var):
        self.set_variable(var, 'output')

    def set_input_var(self, var):
        self.set_variable(var, 'input1')

    def set_group_meta(self, groupbyClause):
        self.set_param(groupbyClause, 'meta')

    def set_new_metadata(self, metaAttDef):
        self.set_param(metaAttDef, 'newMetadata')

    def set_group_regions(self, attList):
        self.set_param(attList, 'regions')

    def set_new_regions(self, regionsAttDef):
        self.set_param(regionsAttDef, 'newRegions')

    def save(self, syntax):
        stm = super(Group, self).save(syntax)

        params_form = syntax['PARAMS']
        group_format = params_form[Operator.GROUP.value]
        type_sep = params_form['type_separator']
        param_sep = params_form['param_separator']

        params = []

        # Check if there are additional grouping options over metadata, and in case set them up
        # (they are joinbyClause)

        # Format joinby clause
        jbc = self.params.get('meta', None)

        if jbc:
            params.append(group_format['meta'].format(groupMeta=jbc.save(params_form,param_sep)))


        # Check if there are new metadata definitions and set them up
        # (they are AttributesGenerator objects)

        params_newMeta = self.params.get('newMetadata', None)
        if params_newMeta:
            newMetadata = map(lambda x: x.save(params_form), params_newMeta)
            params.append(group_format['newMetadata'].format(newAttributes=param_sep.join(newMetadata)))


        # Check if there are additional grouping options over regions attributes, and in case set them up
        # (they are an AttributesList)

        attList = self.params.get('regions', None)
        if attList:
            params.append(group_format['regions'].format(groupRegions=attList.save(params_form, param_sep)))

        # Check if there are new metadata definitions and set them up
        # (they are RegionGenerator objects)

        params_newRegions = self.params.get('newRegions', None)
        if params_newRegions:
            newRegions = map(lambda x: x.save(params_form), params_newRegions)
            params.append(group_format['newRegions'].format(newRegions=param_sep.join(newRegions)))

        stm = stm.format(parameters=type_sep.join(params))

        return stm


class Cover(Statement):
    def __init__(self, cover_variant):
        super(Cover, self).__init__()
        self.operator = Operator(cover_variant)

    def set_minAcc(self, minAcc):
        self.minAcc = minAcc

    def set_maxAcc(self, maxAcc):
        self.maxAcc = maxAcc

    def set_output_var(self, var):
        self.set_variable(var, 'output')

    def set_input_var(self, var):
        self.set_variable(var, 'input1')

    def set_new_regions(self, regionAttributes):
        self.set_param(regionAttributes, 'newRegions')

    def set_groupby_clause(self, groupbyClause):
        self.set_param(groupbyClause, 'groupby')

    def save(self, syntax):
        stm = super(Cover, self).save(syntax)

        params_form = syntax['PARAMS']
        cover_format = params_form[Operator.COVER.value]
        type_sep = params_form['type_separator']
        param_sep = params_form['param_separator']
        
        params = []
        
        # minAcc and maxAcc are joined and then added to the list as they are
        params.append(param_sep.join([self.minAcc,self.maxAcc]))

        # Format groupby clause
        jbc = self.params.get('groupby', None)

        if jbc:
            params.append(cover_format['groupby'].format(groupbyClause=jbc.save(params_form,param_sep)))

        # Format new region attributes definitions

        param_regs = self.params.get('newRegions', None)

        if param_regs:
            newRegions = map(lambda x: x.save(params_form), param_regs)
            params.append(cover_format['regions'].format(newRegions=param_sep.join(newRegions)))

        stm = stm.format(parameters=type_sep.join(params))

        return stm


class Map(Statement):
    def __init__(self):
        super(Map, self).__init__()
        self.operator = Operator.MAP
        self.count_attribute = ''

    def set_output_var(self, var):
        self.set_variable(var, 'output')

    def set_reference_var(self, var):
        self.set_variable(var, 'input1')

    def set_experiment_var(self, var):
        self.set_variable(var, 'input2')

    def set_count_attribute(self, name):
        self.count_attribute = name

    def set_new_regions(self, regionAttributes):
        self.set_param(regionAttributes, 'newRegions')

    def set_joinby_clause(self, joinbyClause):
        self.set_param(joinbyClause, 'joinby')

    def save(self, syntax):
        stm = super(Map, self).save(syntax)

        params_form = syntax['PARAMS']
        map_format = params_form[self.operator.value]
        type_sep = params_form['type_separator']
        param_sep = params_form['param_separator']

        params = []

        # Format new region attributes definitions

        param_regs = self.params.get('newRegions', None)

        if param_regs :
            newRegions = map(lambda x: x.save(params_form),param_regs)
            params.append(map_format['regions'].format(newRegions=param_sep.join(newRegions)))


        # Format user chosen name for the count attribute, if present
        if self.count_attribute :
            params.append(map_format['count'].format(count_name=self.count_attribute))

        # Format joinby clause
        jbc = self.params.get('joinby', None)

        if jbc:
            params.append(map_format['joinby'].format(joinbyClause=jbc.save(params_form,param_sep)))


        stm = stm.format(parameters=type_sep.join(params))

        return stm


class Order(Statement):

    def __init__(self):
        super(Order, self).__init__()
        self.operator = Operator.ORDER

    def set_output_var(self, var):
        self.set_variable(var, 'output')

    def set_input_var(self, var):
        self.set_variable(var, 'input1')

    def set_ordering_attributes(self, ordAtt, type):
        # Type can be 'metadata' or 'region'
        self.set_param(ordAtt,type+'OrderingAttributes')

    def set_top_options(self, topts):
        self.set_param(topts, 'top')

    def save(self, syntax):
        stm = super(Order,self).save(syntax)

        params_form = syntax['PARAMS']
        order_form = params_form[self.operator.value]
        type_sep = params_form['type_separator']
        sep = params_form['param_separator']

        params = []

        # Format metadata attribute lists
        meta_att = self.params.get('metadataOrderingAttributes', None)
        if meta_att:
            params.append(order_form['metadata']['orderingAttributes']
                          .format(att_list=meta_att.save(order_form['att_list'],sep)))

        # Top options
        tops = self.params.get('top', None)
        if tops:
            m_tops = filter(lambda x: x[0] == 'metadata', tops)
            if m_tops:
                m_tops = map(lambda x: order_form[x[0]]['top'][x[1]].format(k=x[2]), m_tops)
                params.append(type_sep.join(m_tops))

          # Format region attribute lists
        region_att = self.params.get('regionOrderingAttributes', None)
        if region_att:
            params.append(order_form['region']['orderingAttributes']
                          .format(att_list=region_att.save(order_form['att_list'],sep)))


        # Top options
        if tops:
            r_tops = filter(lambda x: x[0] == 'region', tops)
            if r_tops:
                r_tops = map(lambda x: order_form[x[0]]['top'][x[1]].format(k=x[2]), r_tops)
                params.append(type_sep.join(r_tops))

        stm = stm.format(parameters=type_sep.join(params))

        return stm

class Join(Statement):

    def __init__(self):
        super(Join, self).__init__()
        self.operator = Operator.JOIN

    def set_output_var(self, var):
        self.set_variable(var, 'output')

    def set_anchor_var(self, var):
        self.set_variable(var, 'input1')

    def set_experiment_var(self, var):
        self.set_variable(var, 'input2')

    def set_output_opt(self, coord_param):
        self.set_param(CoordParam(coord_param), 'output_opt')

    def set_joinby_clause(self, joinbyClause):
        self.set_param(joinbyClause, 'joinby')

    def set_equi_conditions(self, attributesList):
        self.set_param(attributesList, 'equi_clause')

    def set_genomic_predicate(self, genomicPredicate):
        self.set_param(genomicPredicate, 'genomic_predicate')

    def save(self, syntax):

        stm = super(Join, self).save(syntax)

        params_form = syntax['PARAMS']
        join_format = params_form[self.operator.value]
        type_sep = params_form['type_separator']
        sep = params_form['param_separator']

        params = []

        # Format Genomic Predicate
        gpred = self.params.get('genomic_predicate', None)
        if gpred:
            params.append(join_format['genomic_predicate'].format(genomic_predicate=gpred.save(join_format,sep)))


        #  Format predicate over attributes
        equi_predicate = self.params.get('equi_clause', None)
        if equi_predicate:
            params.append(join_format['equi_clause'].format(att_list=equi_predicate.save(params_form, sep)))


        # Format option over output
        output_cond = self.params.get('output_opt').value
        if output_cond:
            params.append(join_format['output_opt'].format(coord_param=output_cond))

        # Format Joinby clause
        jbc = self.params.get('joinby', None)
        if jbc:
            params.append(join_format['joinby'].format(joinbyClause=jbc.save(params_form, sep)))

        stm = stm.format(parameters=type_sep.join(params))

        return stm



class Predicate(object):

    def __init__(self, field1, field2, condition):

        self.p_attribute = field1
        self.p_value = field2
        self.condition = condition
        self.value_type = 'string'

    def save(self, syntax):
        p_format = syntax['predicate']

        predicate = p_format[self.condition].format(att=self.p_attribute,
                                                    val=p_format['values'][self.value_type].format(p=self.p_value))

        return predicate


class MetaPredicate(Predicate):

    def __init__(self, attribute, value, condition):
        super(MetaPredicate, self).__init__(attribute, value, condition)


class RegionPredicate(Predicate):

    def __init__(self, attribute, value, condition):
        super(RegionPredicate, self).__init__(attribute, value, condition)

    def set_value_type(self, type=None):
        """Possible values type are: coordinate, float, string, meta_attribute"""

        if type is None :
            if self.p_attribute in ['chr', 'left', 'right', 'strand'] :
                #The region attribute given is a region coordinate attribute
                self.value_type = 'coordinate'
            else :
                try:
                    self.p_value = int(self.p_value)
                    self.value_type = 'int'
                except ValueError :
                    try:
                        self.p_value= float(self.p_value)
                        self.value_type = 'float'
                    except ValueError :
                        self.value_type = 'string'
        else:
            #The type is given.
            self.value_type = type

class RegionGenerator(object):
    def __init__(self, newRegion, function, argRegion):
        self.newRegion = newRegion
        self.function = function
        self.argument = argRegion

    def save(self, syntax):

        f = syntax['function'].format(function=self.function.value,
                                      arg=self.argument)

        return syntax['new_region'].format(r=self.newRegion,
                                           function=f)

class MetaAttributesGenerator(RegionGenerator):
    def __init__(self, newAttribute, function, argRegion):
        super(MetaAttributesGenerator, self).__init__(newAttribute, function, argRegion)

    def save(self, syntax):
        return super(MetaAttributesGenerator, self).save(syntax)

class ProjectGenerator(RegionGenerator):
    def __init__(self, newRegion, function, arg):
        super(ProjectGenerator, self).__init__(newRegion, function, arg)

    def save(self, syntax):
        if self.function == RegFunction.MATH:
            f = self.argument
            return syntax['new_region'].format(r=self.newRegion, function=f)
        if self.function in ['rename','fixed'] :
            f = self.argument
            if self.function == 'fixed':
                f = "{f}".format(f=f)
            return syntax['new_region'].format(r=self.newRegion, function=f)
        if self.function is RegFunction.META :
            f = syntax['function'].format(function=self.function.value,
                                          arg=syntax['param_separator'].join(self.argument))
            return  syntax['new_region'].format(r=self.newRegion,
                                                function=f)
        else:
            return super(ProjectGenerator, self).save(syntax)



class AttributesList(object):

    def __init__(self, attributes):
        self.attributes = attributes

    def save(self, syntax, sep):
        attr =  sep.join(self.attributes)
        return attr

class OrderingAttributes(AttributesList):

    def __init__(self):
        attributes = list()
        super(OrderingAttributes, self).__init__(attributes)

    def add_attribute(self, att, desc):
        self.attributes.append((att,desc))

    def save(self, syntax, sep):
        self.attributes = map(lambda x: syntax[x[1]].format(att=x[0]), self.attributes)
        return super(OrderingAttributes, self).save(syntax,sep)


class JoinbyClause(AttributesList):

    def __init__(self, attributes):
        super(JoinbyClause, self).__init__(attributes)

    def save(self, syntax, sep):
        attributes = map(lambda x: syntax['metajoin_condition'][x[1]].format(att_name=x[0]), self.attributes)
        return sep.join(attributes)

class GroupbyClause(JoinbyClause):
    def __init__(self, attributes):
        super(GroupbyClause, self).__init__(attributes)

    def save(self, syntax, sep):
        return super(GroupbyClause, self).save(syntax, sep)


class SemiJoinPredicate(AttributesList):

    def __init__(self, attributes, dataset, condition):
        super(SemiJoinPredicate, self).__init__(attributes)
        self.ds_ext = dataset
        self.condition = condition

    def save(self, syntax, sep):
        attributes = super(SemiJoinPredicate, self).save(syntax, sep)
        return syntax[self.condition].format(attributes=attributes, ds_ext=self.ds_ext)


class GenomicPredicate(object):

    def __init__(self):
        self.distal_conditions = []
        self.distal_stream = ''

    def add_distal_condition(self, condition, n):
        self.distal_conditions.append((DistalConditions(condition), n))

    def add_distal_stream(self, direction):
        self.distal_stream = DistalStream(direction)

    def save(self, syntax, sep):
        dc = map(lambda x: syntax['distal_condition'].format(dc=x[0].value, n=x[1]), self.distal_conditions)
        if self.distal_stream:
            dc.append(syntax['distal_stream'].format(ds=self.distal_stream.value))

        return sep.join(dc)