diff gmql_queries_composer.py @ 0:a80c93182db3 draft default tip

planemo upload for repository https://github.com/lu-brn/gmql-galaxy commit 953ee36ceda5814dc9baa03427bc0eb4ee2e93bd-dirty
author geco-team
date Tue, 26 Jun 2018 09:08:06 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gmql_queries_composer.py	Tue Jun 26 09:08:06 2018 -0400
@@ -0,0 +1,691 @@
+#!/usr/bin/env python
+# --------------------------------------------------------------------------------
+# GMQL Queries Compositor
+# --------------------------------------------------------------------------------
+# Luana Brancato, luana.brancato@mail.polimi.it
+# --------------------------------------------------------------------------------
+
+import os, sys, argparse, json
+from itertools import chain
+from gmql_queries_statements import *
+from gmql_rest_queries import compile_query, run_query, check_input
+from gmql_rest_datasets import list_datasets
+from gmql_queries_constants import *
+
+def read_query(query_data):
+
+    # Create new Query object and read JSON file
+    query = dict ()
+
+    with open(query_data, 'r') as f_in :
+        qd = json.loads(f_in.read())
+
+    query.update(name=qd['query_name'])
+
+    # A list of statements objects is created from the list of operations and added to the query
+    statements = map(lambda x: read_statement(x['operation']), qd['operations'])
+
+    # Check if the user asked for materialize the final result and in case add a materialize statement
+    # for the last variable defined.
+
+    if qd['materialize']['materialize_result'] :
+        var = statements[-1][0].variables['output']
+        mat_stm = Materialize(qd['materialize']['file_name'],var)
+        statements.append((mat_stm,))
+
+        # Also save info about the desired output format (if available)
+        out_format = qd['materialize']['choose_op'].get('out_format',None)
+        if out_format:
+            query.update(out_format=out_format)
+
+        #Check if the user wants to import results into Galaxy already
+        importFlag = qd['materialize']['choose_op'].get('import', None)
+        if importFlag is not None:
+            query.update(importFlag=importFlag)
+
+    # Add statements list to query, flattening list elements if needed (in case there's some intermediate
+    # materialize)
+
+    query.update(statements=[x for x in chain.from_iterable(statements)])
+
+
+    return query
+
+
+def read_statement(x):
+
+    op = x['operator']
+
+    if op == 'SELECT' :
+        stm = create_select(x)
+    if op == 'MAP' :
+        stm = create_map(x)
+    if op == 'ORDER' :
+        stm = create_order(x)
+    if op == 'JOIN' :
+        stm = create_join(x)
+    if op == 'PROJECT':
+        stm = create_project(x)
+    if op == 'COVER' :
+        stm = create_cover(x)
+    if op == 'EXTEND' :
+        stm = create_extend(x)
+    if op == 'GROUP' :
+        stm = create_group(x)
+    if op == 'MERGE' :
+        stm = create_merge(x)
+    if op == 'UNION' :
+        stm = create_union(x)
+    if op == 'DIFFERENCE' :
+        stm = create_difference(x)
+
+
+    # If the user asked to materialize the current statement, add a MATERIALIZE statement; otherwise return
+    # only the current statement
+
+    if x['m_stm']['materialize_stm'] :
+        mat_stm = Materialize(x['m_stm']['file_name'],stm.variables['output'])
+        return (stm, mat_stm)
+    else:
+        return (stm,)
+
+def create_project(x):
+    stm = Project()
+
+    # Set output and input variables
+    stm.set_output_var(x['output_var'])
+    stm.set_input_var(x['input_var'])
+
+    # Check if there are info about region fields to keep and set them up
+
+    reg_att = x['region_att']['allbut']
+    if reg_att['allbut_flag'] == 'keep' :
+
+        r_fields = reg_att.get('list_keep', None)
+        # If the list exists and it is not empty
+        if r_fields:
+            r_fields = map(lambda x: x.get('attribute'), r_fields)
+            stm.set_regions(AttributesList(r_fields))
+    else:
+        r_fields = reg_att.get('list_exclude', None)
+        if r_fields:
+            r_fields = map(lambda x: x.get('attribute'), r_fields)
+            stm.set_regions(AttributesList(r_fields), type='exclude')
+
+    # Similarly for metadata attributes to keep
+
+    meta_att = x['meta_att']['allbut']
+    if meta_att['allbut_flag'] == 'keep' :
+        m_atts = meta_att.get('list_keep', None)
+        if m_atts:
+            m_atts = map(lambda x: x.get('attribute'), m_atts)
+            stm.set_metadata(AttributesList(m_atts))
+    else:
+        m_atts = meta_att.get('list_exclude', None)
+        if m_atts:
+            m_atts = map(lambda x: x.get('attribute'), m_atts)
+            stm.set_metadata(AttributesList(m_atts), type='exclude')
+
+    # Look if there are new region fields definition and set them up
+
+    pnr = x.get('project_new_regions').get('new_region_att', None)
+    if pnr:
+        pnr = map(lambda x: (x.get('new_name'), x.get('gen_function')), pnr)
+        f_defs = map(lambda x: _project_get_new(x), pnr)
+
+        stm.set_new_regions(f_defs)
+
+    # Look for new metadata attributes definitions
+
+    pnm = x.get('project_new_meta').get('new_meta_att', None)
+    if pnm:
+        pnm = map(lambda x: (x.get('new_name'), x.get('gen_function')), pnm)
+        f_defs = map(lambda x: _project_get_new(x), pnm)
+
+        stm.set_new_metadata(f_defs)
+
+
+    return stm
+
+def _project_get_new(nr):
+
+    gen_type = nr[1].get('gen_type')
+    new_name = nr[0]
+
+    fg = ''
+
+    if gen_type in ['aggregate', 'SQRT', 'NULL']:
+        fg = ProjectGenerator(new_name, RegFunction(nr[1].get('function')), nr[1].get('arg'))
+
+    if gen_type in ['arithmetic']:
+        fg = ProjectGenerator(new_name, RegFunction.MATH, nr[1].get('expression'))
+
+    if gen_type in ['rename', 'fixed']:
+        fg = ProjectGenerator(new_name, gen_type, nr[1].get('arg'))
+
+    if gen_type in ['META']:
+        fg = ProjectGenerator(new_name, RegFunction.META, (nr[1].get('arg'), nr[1].get('att_type')))
+
+    return fg
+
+def create_group(x):
+    stm = Group()
+
+    # Set output and input variables
+    stm.set_output_var(x['output_var'])
+    stm.set_input_var(x['input_var'])
+
+    # If group_type is set to default, we're sure there are no additional conditions and we can return already
+    # (GROUP will work by default conditions)
+
+    add_grouping = x['add_grouping']
+
+    if add_grouping['group_type'] == 'default' :
+        return stm
+
+    # Check if there are additional metadata grouping attributes and set them up, along eventual
+    # definition of new attributes
+
+    metadata = add_grouping.get('metadata', None)
+    if metadata:
+        #group_atts = filter(lambda x: x['j_att'], metadata['group_meta_atts'])
+        group_atts = map(lambda x: (x['j_att'], x['metajoin_match']), metadata['group_meta_atts'])
+
+        jc = GroupbyClause(group_atts)
+        stm.set_group_meta(jc)
+
+        # Check if there are new metadata definitions and set them up
+        add_flag = metadata.get('meta_agg').get('meta_agg_flag')
+        if add_flag:
+            nm_data = metadata.get('meta_agg', None)
+            if nm_data:
+                new_atts = map(lambda x: MetaAttributesGenerator(newAttribute=x['new_name'],
+                                                             function=RegFunction(x['function']),
+                                                             argRegion=x['argument']), nm_data['new_attributes'])
+                if new_atts.__len__() > 0:
+                    stm.set_new_metadata(new_atts)
+
+    # Check if there are additional region grouping attributes and set them up
+    # Note that it may happen that the list is empty
+
+    regions = add_grouping.get('regions', None)
+    if regions:
+        r_group_atts = filter(lambda x: x['attribute'], regions['group_regions_atts'])
+        r_group_atts = map(lambda x: x['attribute'], r_group_atts)
+
+        if r_group_atts.__len__() > 0:
+            attList = AttributesList(r_group_atts)
+            stm.set_group_regions(attList)
+
+        nr_data = regions.get('new_attributes', None)
+        if nr_data:
+            r_new_atts = filter(lambda x: x['new_name'] and (x['function'] != 'None') and x['argument'], nr_data)
+            r_new_atts = map(lambda x: RegionGenerator(newRegion=x['new_name'],
+                                                       function=RegFunction(x['function']),
+                                                       argRegion=x['argument']), r_new_atts)
+            if r_new_atts.__len__() > 0:
+                stm.set_new_regions(r_new_atts)
+
+    return stm
+
+def create_merge(x):
+    stm = Merge()
+
+    # Set output and input variables
+    stm.set_output_var(x['output_var'])
+    stm.set_input_var(x['input_var'])
+
+    # Check if there are additional grouping options and set them up
+
+    group_atts = x['groupby']['group_meta_atts']
+    if group_atts.__len__() > 0:
+        group_atts = map(lambda x: (x['j_att'], x['metajoin_match']), group_atts)
+        gc = GroupbyClause(group_atts)
+        stm.set_groupy_clause(gc)
+
+    return stm
+
+def create_union(x):
+    stm = Union()
+
+    # Set output and input variables
+    stm.set_output_var(x['output_var'])
+    stm.set_first_var(x['input_var_first'])
+    stm.set_second_var(x['input_var_second'])
+
+    return stm
+
+def create_difference(x):
+    stm = Difference()
+
+    # Set output and input variables
+    stm.set_output_var(x['output_var'])
+    stm.set_reference_var(x['input_var_reference'])
+    stm.set_negative_var(x['input_var_negative'])
+
+    # Check if the exact flag is set
+    if x['exact_flag'] is True :
+        stm.set_exact()
+
+    # Check if there are joinby attributes and set them up
+
+    joinby_atts = x['joinby']['group_meta_atts']
+    if joinby_atts.__len__() > 0:
+        joinby_atts = map(lambda x: (x['j_att'], x['metajoin_match']), joinby_atts)
+        jc = JoinbyClause(joinby_atts)
+        stm.set_joinby_clause(jc)
+
+    return stm
+
+def create_extend(x):
+    stm = Extend()
+
+    # Set output and input variables
+    stm.set_output_var(x['output_var'])
+    stm.set_input_var(x['input_var'])
+
+    # Look for new metadata attributes definitions
+
+    data = x['new_metadata_attributes']['new_attributes']
+
+    new_atts = map(lambda x: MetaAttributesGenerator(newAttribute=x['new_name'],
+                                                function=RegFunction(x['function']),
+                                                argRegion=x['argument']), data)
+
+    stm.set_new_attributes(new_atts)
+
+    return stm
+
+
+def create_join(x) :
+    stm = Join()
+
+    # Set output and input variables
+    stm.set_output_var(x['output_var'])
+    stm.set_anchor_var(x['input_var_anchor'])
+    stm.set_experiment_var(x['input_var_experiment'])
+
+    # Look for conditions over regions distances and attributes values.
+
+    conds = x['conditions_section']['conditions']
+
+    if conds['c_type'] == 'distance' :
+        pred = _genomic_predicate(conds.get('distance_conditions'))
+        stm.set_genomic_predicate(pred)
+    if conds['c_type'] == 'attributes' :
+        pred = _equi_conditions(conds.get('region_attributes'))
+        stm.set_equi_conditions(pred)
+    if conds['c_type'] == 'both':
+        pred1 = _genomic_predicate(conds.get('distance_conditions'))
+        pred2 = _equi_conditions(conds.get('region_attributes'))
+        stm.set_genomic_predicate(pred1)
+        stm.set_equi_conditions(pred2)
+
+    # Set the output preference
+    stm.set_output_opt(conds.get('output_opt'))
+
+
+    # Check if there are joinby conditions and set them up
+
+    join_data = x['joinby']['joinby_clause']
+    join_data = filter(lambda x: x['j_att'], join_data)
+    join_data = map(lambda x: (x['j_att'], x['metajoin_match']), join_data)
+
+    if join_data.__len__() > 0:
+        jc = JoinbyClause(join_data)
+        stm.set_joinby_clause(jc)
+
+
+    return stm
+
+
+def _genomic_predicate(pred):
+
+    gp = GenomicPredicate()
+
+    # Loop over the distal predicates and distinguish between distal conditions and stream directions.
+    for x in pred:
+        x = x.get('type_dc')
+        if x.get('type_dc_value') == 'dist' :
+            gp.add_distal_condition(x.get('dc'), x.get('n'))
+        else:
+            gp.add_distal_stream(x.get('ds'))
+
+    return gp
+
+
+def _equi_conditions(pred):
+
+    atts = map(lambda x: x.get('attribute'), pred)
+    ec = AttributesList(atts)
+
+    return ec
+
+
+def create_order(x):
+     stm = Order()
+
+     # Set output and input variables
+     stm.set_output_var(x['output_var'])
+     stm.set_input_var(x['input_var_ordering_ds'])
+
+     # Collects ordering attributes and set them up, according also to their type (metadata or region)
+
+     # Divide metadata attributes from region ones
+     atts = x['ordering_attributes']['attributes']
+
+     meta_att = filter(lambda att: att['att_type'] == 'metadata', atts)
+     region_att = filter(lambda att: att['att_type'] == 'region', atts)
+
+     # Collect attributes info from the two lists and add them to the ORDER parameters
+
+     if meta_att:
+         o_att_meta = OrderingAttributes()
+         map(lambda att: o_att_meta.add_attribute(att['attribute_name'],att['order_type']), meta_att)
+         stm.set_ordering_attributes(o_att_meta, 'metadata')
+
+     if region_att:
+         o_att_region = OrderingAttributes()
+         map(lambda att: o_att_region.add_attribute(att['attribute_name'], att['order_type']), region_att)
+         stm.set_ordering_attributes(o_att_region, 'region')
+
+     # Check if there are constraints over the number of samples to extract and set them up
+
+     top_opts = x['top_options']['to']
+
+     if top_opts:
+         topts = list()
+         for to in top_opts:
+             topts.append((to['type'],to['opt']['k_type'],to['opt']['k']))
+         stm.set_top_options(topts)
+
+     return stm
+
+
+def create_map(x):
+    stm = Map()
+
+    # Set output and input variables
+    stm.set_output_var(x['output_var'])
+    stm.set_reference_var(x['input_var_reference'])
+    stm.set_experiment_var(x['input_var_experiment'])
+
+    # Check if the user has given an alternative name to the default one for the counting result
+
+    if x['count_result']:
+        stm.set_count_attribute(x['count_result'])
+
+    # Check if there are additional region attributes definition and set them up
+
+    nr_data = x['new_regions_attributes']['new_regions']
+
+    new_regions = filter(lambda x: x['new_name'] and (x['function'] != 'None') and x['argument'], nr_data)
+    new_regions = map(lambda x: RegionGenerator(newRegion=x['new_name'],
+                                   function=RegFunction(x['function']),
+                                   argRegion=x['argument']), new_regions)
+
+    if new_regions.__len__() > 0 :
+        stm.set_new_regions(new_regions)
+
+    # Check if there are joinby conditions and set them up
+
+    join_data = x['joinby']['joinby_clause']
+    join_data = filter(lambda x: x['j_att'], join_data)
+    join_data = map(lambda x: (x['j_att'], x['metajoin_match']), join_data)
+
+    if join_data.__len__() > 0:
+        jc = JoinbyClause(join_data)
+        stm.set_joinby_clause(jc)
+
+    return stm
+
+def create_cover(x):
+    stm = Cover(x['cover_variant'])
+
+    #Set output and input variables
+    stm.set_output_var(x['output_var'])
+    stm.set_input_var(x['input_var'])
+
+    # Read minAcc value
+    min_data = x['minAcc']
+    minAcc = _read_acc_values(min_data, min_data['min_type'])
+    stm.set_minAcc(minAcc)
+
+    # Read maxAcc value
+    max_data = x['maxAcc']
+    maxAcc = _read_acc_values(max_data, max_data['max_type'])
+    stm.set_maxAcc(maxAcc)
+
+    # Check if there are additional region attributes definition and set them up
+
+    nr_data = x['new_regions_attributes']['new_regions']
+
+    new_regions = filter(lambda x: x['new_name'] and (x['function'] != 'None') and x['argument'], nr_data)
+    new_regions = map(lambda x: RegionGenerator(newRegion=x['new_name'],
+                                                function=RegFunction(x['function']),
+                                                argRegion=x['argument']), new_regions)
+
+    if new_regions.__len__() > 0:
+        stm.set_new_regions(new_regions)
+
+    # Check if there are groupby conditions and set them up
+
+    group_data = x['groupby']['groupby_clause']
+    group_data = filter(lambda x: x['j_att'], group_data)
+    group_data = map(lambda x: (x['j_att'], x['metajoin_match']), group_data)
+
+    if group_data.__len__() > 0:
+        jc = GroupbyClause(group_data)
+        stm.set_groupby_clause(jc)
+
+    return stm
+
+def _read_acc_values(data, value):
+
+    if value in ['ANY', 'ALL']:
+        return value
+    if value == 'value':
+        return str(data['value'])
+    if value == 'ALL_n':
+        return 'ALL / {n}'.format(n=data['n'])
+    if value == 'ALL_n_k':
+        return '(ALL + {k}) / {n}'.format(n=data['n'],k=data['k'])
+
+
+def create_select(x) :
+
+    stm = Select()
+
+    # Set output and input variables
+    stm.set_output_var(x['output_var'])
+
+    input_data = x['input']
+
+    if x['input']['input_type'] == 'i_ds' :
+         input_var = input_data['input_ds']
+         stm.set_input_var(input_var)
+    if x['input']['input_type'] == 'i_var':
+         input_var = input_data['input_var']
+         stm.set_input_var(input_var)
+
+    # Check if there's metadata predicates and set them up
+    # They can be given as built step by step or directly as a text line.
+    # Check the type and parse the appropriate data
+
+    mp_data = input_data['metadata_predicates']['conditions']
+    if mp_data['ad_flag'] == 'steps' :
+
+        if mp_data['condition'] != 'None':
+            meta_pred = _metadata_predicate(mp_data)
+
+            # If there are further blocks
+            for ma in mp_data['add_meta_blocks']:
+                if meta_pred.__len__() > 1 :
+                    meta_pred = [meta_pred, Wff.BLOCK]
+                mp = _metadata_predicate(ma)
+
+                if ma['block_logCon']['negate']:
+                    mp = [mp, Wff.NOT]
+
+                meta_pred = [meta_pred, mp, Wff(ma['block_logCon']['logCon'])]
+
+            stm.set_param(meta_pred, 'metadata')
+    else :
+        meta_pred = check_input(mp_data['conditions_string'])
+        stm.set_param(meta_pred, 'metadata')
+
+    # Similar applies with Region Predicates (if they are present)
+    rp_data = input_data['region_predicates']['conditions']
+    if rp_data['ad_flag'] == 'steps' :
+
+        if rp_data['condition'] != 'None':
+            reg_pred = _region_predicate(rp_data)
+
+            # If there are further blocks
+            for ra in rp_data['add_region_blocks']:
+                if reg_pred.__len__() > 1:
+                    reg_pred = [reg_pred, Wff.BLOCK]
+                rp = _region_predicate(ra)
+
+                if ra['block_logCon']['negate']:
+                    rp = [rp, Wff.NOT]
+
+                reg_pred = [reg_pred, rp, Wff(ra['block_logCon']['logCon'])]
+
+
+            stm.set_param(reg_pred, 'region')
+    else:
+        reg_pred = check_input(rp_data['conditions_string'])
+        stm.set_param(reg_pred, 'region')
+
+
+
+    # Check if there is a semijoin predicate. If it does, collect the attributes and the external ds to confront with.
+
+    sj_data = input_data['semijoin_predicate']
+
+    if sj_data['sj_attributes'] :
+        sj_attr = map(lambda x: x['sj_att'], sj_data['sj_attributes'])
+        sj = SemiJoinPredicate(sj_attr,sj_data['ds_ext'],sj_data['condition'])
+
+        stm.set_param(sj, 'semijoin')
+
+    return stm
+
+def _metadata_predicate(mp_data):
+    # Metadata predicates are well formed logical formulas. Create a new one and add the first
+    # predicate. Negate it if it's the case.
+
+    mp = MetaPredicate(mp_data['attribute'], mp_data['value'], mp_data['condition'])
+    if mp_data['negate']:
+        mp = [mp, Wff.NOT]
+
+    # Check if there are further predicates
+    for pa in mp_data['pm_additional']:
+
+        mp1 = MetaPredicate(pa['attribute'], pa['value'], pa['condition'])
+        if pa['negate']:
+            mp1 = [mp1, Wff.NOT]
+
+        if pa['logCon'] == 'AND':
+            mp = [mp, mp1, Wff.AND]
+        if pa['logCon'] == 'OR':
+            mp = [mp, mp1, Wff.OR]
+
+    return mp
+
+def _region_predicate(rp_data):
+
+    rp_s = RegionPredicate(rp_data['attribute'], rp_data['value'], rp_data['condition'])
+    if rp_data['is_meta_value']:
+        rp_s.set_value_type('meta')
+    else:
+        rp_s.set_value_type()
+    rp = rp_s
+    if rp_data['negate']:
+        rp = [rp, Wff.NOT]
+
+    # Check if there are further predicates
+    for pa in rp_data['pr_additional']:
+        rp1_s = RegionPredicate(pa['attribute'], pa['value'], pa['condition'])
+        if pa['is_meta_value']:
+            rp1_s.set_value_type('meta')
+        else:
+            rp1_s.set_value_type()
+        #rp1 = WellFormedFormula(rp1_s)
+        rp1 = rp1_s
+
+        if pa['negate']:
+            rp1 = [rp1, Wff.NOT]
+
+        if pa['logCon'] == 'AND':
+            rp = [rp, rp1, Wff.AND]
+        if pa['logCon'] == 'OR':
+            rp = [rp, rp1, Wff.OR]
+
+    return rp
+
+def save(query, output, query_source):
+
+    # Set the config files where to look for the actual syntax to use
+    y_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'gmql_syntax.yaml')
+
+    with open(y_path, 'r') as yamlf:
+        syntax = yaml.load(yamlf)
+
+    # If I am continuing a local query, first copy the older statements
+    if query_source:
+        with open(output, 'w') as f_out:
+            with open(query_source, 'r') as f_in:
+                f_out.writelines(f_in.readlines())
+
+
+    with open(output, 'a') as f_out:
+
+        for s in query['statements'] :
+             f_out.write('{stm}\n'.format(stm=s.save(syntax)))
+
+
+def compile(user, query_name, query_file, log):
+    # Call the service in gmql_rest_queries to send the query to the GMQL server to compile.
+
+    compile_query(user, query_name, query_file, log)
+
+
+def run(user, query_name, query, log, out_format, importFlag, updated_ds_list):
+    # Call the service in gmql_rest_queries to send the query to the GMQL server to be executed.
+
+    run_query(user, query_name, query, log, out_format, importFlag)
+
+    #Save updated list of datasets
+    list_datasets(user, updated_ds_list)
+
+
+def stop_err(msg):
+    sys.stderr.write("%s\n" % msg)
+
+def __main__():
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-user")
+    parser.add_argument("-cmd")
+    parser.add_argument("-query_params")
+    parser.add_argument("-query_output")
+    parser.add_argument("-query_source")
+    parser.add_argument("-query_log")
+    parser.add_argument("-updated_ds_list")
+
+    args = parser.parse_args()
+
+    query = read_query(args.query_params)
+    save(query, args.query_output, args.query_source)
+
+    if(args.cmd == 'compile'):
+        compile(args.user, query['name'], args.query_output, args.query_log)
+
+    if(args.cmd == 'run'):
+        run(args.user, query['name'], args.query_output, args.query_log, query['out_format'], query['importFlag'], args.updated_ds_list)
+
+
+if __name__ == "__main__":
+    __main__()