Commit message:
planemo upload for repository https://github.com/lu-brn/gmql-galaxy commit 953ee36ceda5814dc9baa03427bc0eb4ee2e93bd-dirty |
added:
dynamic_utils.py gmql_operators_cover.xml gmql_operators_extend.xml gmql_operators_group.xml gmql_operators_join.xml gmql_operators_map.xml gmql_operators_merge_union_diff.xml gmql_operators_order.xml gmql_operators_project.xml gmql_operators_select.xml gmql_operators_tests.xml gmql_queries_composer.py gmql_queries_composer.xml gmql_queries_constants.py gmql_queries_macros.xml gmql_queries_statements.py gmql_rest.yaml gmql_rest_datasets.py gmql_rest_macros.xml gmql_rest_queries.py gmql_syntax.yaml test-data/Example1_Archive.zip test-data/TG/S_00000.gdm test-data/TG/S_00000.gdm.meta test-data/TG/S_00001.gdm test-data/TG/S_00001.gdm.meta test-data/TG/S_00002.gdm test-data/TG/S_00002.gdm.meta test-data/TG/schema.xml test-data/cover1.gmql_query test-data/cover2.gmql_query test-data/difference1.gmql_query test-data/extend1.gmql_query test-data/group1.gmql_query test-data/guest.gmql_user test-data/guest2.gmql_user test-data/join1.gmql_query test-data/join2.gmql_query test-data/map1.gmql_query test-data/merge1.gmql_query test-data/order1.gmql_query test-data/project1.gmql_query test-data/query.gmql_query test-data/rep.gmql_repository test-data/sample1.bed test-data/sample1.bed.meta test-data/sample2.bed test-data/sample2.bed.meta test-data/sample3.bed test-data/sample3.bed.meta test-data/select1.gmql_query test-data/select2.gmql_query test-data/select3.gmql_query test-data/union1.gmql_query utilities.py |
b |
diff -r 000000000000 -r a80c93182db3 dynamic_utils.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dynamic_utils.py Tue Jun 26 09:08:06 2018 -0400 |
[ |
b'@@ -0,0 +1,281 @@\n+#!/usr/bin/env python\n+# --------------------------------------------------------------------------------\n+# Class for the dynamic options in the GMQL tools\n+# --------------------------------------------------------------------------------\n+# Luana Brancato, luana.brancato@mail.polimi.it\n+# --------------------------------------------------------------------------------\n+\n+import sys, requests\n+\n+\n+def validate(request_context, error_map, params, inputs):\n+ """Generic validate function, it checks if the user is valid."""\n+\n+ user = params.get(\'authToken\', \'\')\n+\n+ if user:\n+ try:\n+ validate_user(user.file_name)\n+ except:\n+ error_msg = \'User has expired\'\n+ error_map[\'authToken\'] = error_msg\n+\n+\n+def validate_upload(request_context, error_map, params, inputs):\n+ """Validate function for uploading tool. It also checks the chosen ds name does not exists already."""\n+\n+ validate(request_context, error_map, params, inputs)\n+\n+ name = params.get(\'name\')\n+\n+ user = params.get(\'authToken\')\n+\n+ #This MUST be changed in the future to a parametric solution. Hopefully in the future Galaxy will allow\n+ #validation without external scripts\n+\n+ url = \'http://genomic.elet.polimi.it/gmql-rest/datasets\'\n+\n+ datasets = get(url, user=user.file_name)\n+ list_datasets = [x[\'name\'] for x in datasets[\'datasets\']]\n+\n+ if name in list_datasets:\n+ error_msg = \'Dataset already exists. Choose another name.\'\n+ error_map[\'name\'] = error_msg\n+\n+\n+def validate_variables(request_context, error_map, params, inputs):\n+ """Validate function for gmql_compositor. It checks that all queries input variables\n+ have been previously defined. """\n+\n+ validate(request_context, error_map, params, inputs)\n+\n+ output_vars = set([])\n+\n+ # TODO: Include in the check output variables eventually defined previously in another query\n+\n+ for op in params.get(\'operations\'):\n+ op_curr = op.get(\'operation\')\n+ if op_curr.get(\'input\', \'\'):\n+ input_var = op_curr.get(\'input\').get(\'input_var\', \'\')\n+ if input_var:\n+ if input_var not in output_vars:\n+ error_msg = \'%s has not been defined yet\\n\' % (input_var)\n+ name = \'|\'.join([\'operations_%d\' % (op.get(\'__index__\')), \'operation\', \'input\', \'input_var\'])\n+ error_map[name] = error_msg\n+ else:\n+ for key in op_curr.keys():\n+ if key.startswith(\'input_var\'):\n+ input_var = op_curr.get(key)\n+ if input_var:\n+ if input_var not in output_vars:\n+ error_msg = \'%s has not been defined yet\\n\' % (input_var)\n+ name = \'|\'.join([\'operations_%d\' % (op.get(\'__index__\')), \'operation\', key])\n+ error_map[name] = error_msg\n+\n+ # Update output_vars with the result of current operation\n+ output_vars.add(op_curr.get(\'output_var\'))\n+\n+\n+def validate_user(user):\n+ """Check if the user is a valid one"""\n+\n+ if user:\n+ with open(user, \'r\') as f:\n+ valid = f.readline().rstrip(\'\\n\').split(\'\\t\')[2]\n+ if valid == \'False\':\n+ raise Exception, "User has expired"\n+\n+\n+def get_metadata_attr(user, ds, ds_list):\n+ options = []\n+\n+ try:\n+ validate_user(user)\n+ if ds_list:\n+\n+ owner = \'\'\n+\n+ with open(ds_list, \'r\') as f:\n+ for d in f.readlines():\n+ if d.split(\'\\t\')[0] == ds:\n+ owner = d.split(\'\\t\')[1].rstrip(\'\\n\')\n+ f.close()\n+\n+ attr_list = get_metadata(user, ds, str(owner))\n+\n+ for i, att in enumerate(attr_list[\'attributes\']):\n+ options.append((att.get(\'key\', \' \'), att.get(\'key\', \' \'), i == 0))\n+\n+ return options\n+\n+ else:\n+ return option'..b' """It takes the tabular file with the information over the user\n+ name authToken valid_flag\n+ It checks if the user is still valid and extract the authToken for the REST calls"""\n+\n+ with open(input, \'r\') as f_in:\n+ user = f_in.readline().rstrip(\'\\n\').split(\'\\t\')\n+\n+ if user[2]:\n+ token = user[1]\n+ else:\n+ stop_err("This session is no longer valid")\n+\n+ return token\n+\n+\n+def get(url, user=None, response_type=\'json\'):\n+ """GET Request\n+ :param url: url where to fetch the requested resource\n+ :param user: for authenticated requests; if not provided make an unauthenticated request (es. for login)\n+ :param response_type: type of the fetched response.\n+ JSON ( Default )\n+ TEXT\n+ ZIP\n+ FILE\n+ """\n+\n+ # Set request headers\n+ headers = dict()\n+\n+ if user:\n+ headers.update({\'X-AUTH-TOKEN\': read_token(user)})\n+\n+ if response_type == \'text\':\n+ headers.update({\'Accept\': \'text/plain\'})\n+ elif response_type == \'zip\':\n+ pass\n+ elif response_type == \'file\':\n+ headers.update({\'Accept\': \'file\'})\n+ else:\n+ headers.update({\'Accept\': \'application/json\'})\n+\n+ # Make the request\n+ response = requests.get(url, headers=headers)\n+\n+ # Check returned server status\n+ status_code = response.status_code\n+\n+ # Read result. If Server OK, read according to response_type. Raise an error otherwise.\n+ if status_code == requests.codes.ok:\n+ if response_type == \'json\':\n+ return response.json()\n+ elif response_type == \'text\':\n+ return response.text\n+ else:\n+ return response\n+ elif status_code == requests.codes.unauthorized:\n+ #expire_user(user)\n+ stop_err("You are not authorized to do this. \\nPlease login first.")\n+ elif status_code == requests.codes.not_found:\n+ stop_err("Resource not found for this user.")\n+ else:\n+ stop_err("Error {code}: {reason}\\n{message}".format(code=status_code,\n+ reason=response.reason,\n+ message=response.content))\n+\n+def post(url, payload, user=None, params=None, content_type=\'json\', response_type=\'json\') :\n+ """ POST Request\n+ :param url: url where to post data\n+ :param payload: payload for the post request. Type is specified by content_type.\n+ :param user: for authenticated requests; if not provided make an unauthenticated request (es. for registration)\n+ :param params: optional query parameters\n+ :param content_type\n+ :param response_type: Default is json\n+ """\n+\n+\n+ # Set request headers\n+ headers = dict()\n+\n+ if user:\n+ headers.update({\'X-AUTH-TOKEN\': read_token(user)})\n+\n+ headers.update({\'Accept\': \'application/json\'})\n+\n+ if content_type == \'text\' :\n+ headers.update({\'Content-Type\' : \'text/plain\'})\n+ response = requests.post(url, params=params, headers=headers, data=payload)\n+ elif content_type == \'multiform\' :\n+ response = requests.post(url, params=params, headers=headers, files=payload)\n+ else :\n+ headers.update({\'Content-Type\': \'application/json\'})\n+ response = requests.post(url, params=params, headers=headers, json=payload)\n+\n+ # Check returned server status\n+ status_code = response.status_code\n+\n+\n+ if status_code == requests.codes.ok :\n+ return response.json()\n+ elif status_code == requests.codes.unauthorized :\n+ #expire_user(user)\n+ stop_err("You are not authorized to do this. \\nPlease login first.")\n+ else :\n+ stop_err("Error {code}: {reason}\\n{message}".format(code=status_code,\n+ reason=response.reason,\n+ message=response.content))\n+\n+\n+def stop_err(msg):\n+ sys.stderr.write("%s\\n" % msg)\n+ sys.exit()\n\\ No newline at end of file\n' |
b |
diff -r 000000000000 -r a80c93182db3 gmql_operators_cover.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gmql_operators_cover.xml Tue Jun 26 09:08:06 2018 -0400 |
b |
@@ -0,0 +1,80 @@ +<macros> + <import>gmql_queries_macros.xml</import> + <xml name="cover"> + <expand macro="input_var" name="input_var" label="Input Dataset" optional="false" /> + <expand macro="output_var" /> + <param name="cover_variant" type="select" label="COVER type" + help="Variants modify the coordinates of the returned regions"> + <option value="COVER">Base COVER</option> + <option value="FLAT">FLAT (returns the union of all the regions which contribute to the COVER)</option> + <option value="SUMMIT">SUMMIT (returns only those portions of the COVER result with the maximum number of overlapping region)</option> + <option value="HISTOGRAM">HISTOGRAM (returns all regions contributing to the COVER divided in different (contiguous) parts according to their accumulation index value (one part for each different accumulation value), which is assigned to the additional AccIndex region attribute</option> + </param> + <conditional name="minAcc"> + <param name="min_type" type="select" label="minAcc" + help="It is the minimum accumulation value, i.e., the minimum number of overlapping + regions to be considered during COVER execution"> + <expand macro="acc_options" /> + </param> + <expand macro="acc_values" /> + </conditional> + <conditional name="maxAcc"> + <param name="max_type" type="select" label="maxAcc" + help="It is the maximum accumulation value, i.e., the maximum number of overlapping + regions to be considered during COVER execution"> + <expand macro="acc_options" > + <option value="ANY">ANY (Any number of overlapping regions)</option> + </expand> + </param> + <expand macro="acc_values" /> + </conditional> + <section name="new_regions_attributes" title="New Genomic Region Attributes" expanded="false"> + <repeat name="new_regions" title="Add New Region Attribute to Result Dataset" min="0" default="0" + help="Resulting regions may have new attributes, calculated by means of aggregate expressions + over the attributes of the contributing regions. By default, they are already added as region + attributes 'JaccardIntersect', calculated as the ratio between the lengths of the intersection + and of the union of the contributing regions; 'JaccardResult' as the ratio between the lengths + of the result and of the union of the contributing regions"> + <param name="new_name" type="text" label="New Region Attribute Name" optional="true"> + <expand macro="text_val"/> + </param> + <expand macro="aggrFuncs" help="Sum, average, maximum, minimum and standard deviation are only for numerical + attributes" optional="true" label="Aggregate Function to apply"/> + <param name="argument" label="Region attributes over which apply function" type="text" optional="true"> + <expand macro="text_val"/> + </param> + </repeat> + </section> + <section name="groupby" title="Group Samples by Metadata" expanded="false" + help="The input samples are partitioned in groups, each with distinct values of the grouping metadata + attributes, and the COVER operation is separately applied (as described above) to each group, + yielding to one sample in the result for each group (input samples that do not satisfy the groupby + condition are disregarded)."> + <repeat name="groupby_clause" title="Attributes for grouping" min="0" default="0" > + <expand macro="metajoin_attribute" name="j_att" optional="true" > + <expand macro="metajoin_options" /> + </expand> + </repeat> + </section> + <expand macro="materialize_stm" /> + </xml> + <xml name="acc_options"> + <yield/> + <option value="value">Integer Value</option> + <option value="ALL">ALL (Number of samples in the input dataset)</option> + <option value="ALL_n">ALL / n</option> + <option value="ALL_n_k">(ALL + k) / n</option> + </xml> + <xml name="acc_values"> + <when value="value"> + <param name="value" type="integer" label="Value" value="1" min="1"/> + </when> + <when value="ALL_n"> + <param name="n" type="integer" label="Value of n" value="1" min="1"/> + </when> + <when value="ALL_n_k"> + <param name="n" type="integer" label="Value of n" value="1" min="1"/> + <param name="k" type="integer" label="Value of k" value="1" min="1"/> + </when> + </xml> +</macros> \ No newline at end of file |
b |
diff -r 000000000000 -r a80c93182db3 gmql_operators_extend.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gmql_operators_extend.xml Tue Jun 26 09:08:06 2018 -0400 |
b |
@@ -0,0 +1,27 @@ +<macros> + <import>gmql_queries_macros.xml</import> + <xml name="extend"> + <expand macro="input_var" name="input_var" label="Input Dataset" optional="false" /> + <expand macro="output_var" /> + <section name="new_metadata_attributes" title="New Metadata Attributes" expanded="true"> + <repeat name="new_attributes" title="Add Metadata Attribute to Result Dataset" min="1" + help="Builds new metadata attributes, assigns their values as the result of aggregate functions + calculated on sample region attributes, and adds them to the existing metadata attribute-value + pairs of the sample"> + <param name="new_name" type="text" label="New Region Attribute Name" optional="true"> + <expand macro="text_val"/> + </param> + <expand macro="aggrFuncs" help="Sum, average, maximum, minimum and standard deviation are only for numerical + attributes" optional="false" label="Aggregate Function to apply"> + <option value="q1">First Quartile</option> + <option value="q2">Second Quartile</option> + <option value="q3">Third Quartile</option> + </expand> + <param name="argument" label="Region attributes over which apply function" type="text" optional="true"> + <expand macro="text_val"/> + </param> + </repeat> + </section> + <expand macro="materialize_stm" /> + </xml> +</macros> \ No newline at end of file |
b |
diff -r 000000000000 -r a80c93182db3 gmql_operators_group.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gmql_operators_group.xml Tue Jun 26 09:08:06 2018 -0400 |
[ |
@@ -0,0 +1,82 @@ +<macros> + <import>gmql_queries_macros.xml</import> + <xml name="group"> + <expand macro="input_var" name="input_var" label="Input Dataset" optional="false" /> + <expand macro="output_var" /> + <conditional name="add_grouping"> + <param name="group_type" type="select" label="Additional Grouping Options" + help="If the defualt option is set, it will group only on the region attributes which represent + the four genomic coordinates (chr, start, stop, and strand). Inside a single sample, it collapses + all regions that have equal values in these four coordinates into a single one, thus eliminating duplicate regions" > + <option value="default">Only default attributes (genomic coordinates)</option> + <option value="meta">Group also on metadata attributes</option> + <option value="regions">Group also on other region attributes and/or create new region fields with aggregate functions</option> + <option value="both">Group also on both metadata and other regions attributes</option> + </param> + <when value="default"> + <!-- do nothing --> + </when> + <when value="meta"> + <expand macro="metadata_options" /> + </when> + <when value="regions"> + <expand macro="region_options" /> + </when> + <when value="both"> + <expand macro="metadata_options" /> + <expand macro="region_options" /> + </when> + </conditional> + <expand macro="materialize_stm" /> + </xml> + <xml name="region_options"> + <section name="regions" title="Additional grouping options over region attributes" expanded="true" + help="In each sample, regions found in the same group (i.e., regions with same coordinates and + grouping attribute values), are combined into a single region; this allows to merge regions that are + duplicated inside the same sample (based on the values of their coordinates and of other possible + specified region attributes)"> + <repeat name="group_regions_atts" min="0" title="Grouping Region Attribute"> + <param name="attribute" type="text" label="Region Attribute"> + <expand macro="text_val" /> + <validator type="expression" message="Coordinate attributes are already used by default">value not in ['chr','left','right','strand']</validator> + </param> + </repeat> + <expand macro="att_generator" title="Aggregate Region Attribute" min="0"/> + </section> + </xml> + <xml name="metadata_options"> + <section name="metadata" title="Grouping Options over metadata attributes" expanded="true" + help="GROUP performs the grouping of samples of the input dataset based on one specified metadata + attribute. If the metadata attribute is multi-value, the grouping identifies different groups of + samples for each attribute value combination. All metadata in the input samples are conserved, + with the addition of the _group attribute, whose value is the identifier of the group to which + the specific sample is assigned. Samples having missing values for any of the grouping metadata + attributes are assigned all together to one group, identified by the special value _group = 0"> + <repeat name="group_meta_atts" min="1" title="Grouping Metadata Attribute"> + <expand macro="metajoin_attribute" name="j_att" optional="true" > + <expand macro="metajoin_options" /> + </expand> + </repeat> + <conditional name="meta_agg"> + <param name="meta_agg_flag" type="boolean" label="Create new aggregate attributes?" + help="It is possible to request the evaluation of aggregate functions on metadata attributes; + these functions consider the metadata contained in all samples of each group" /> + <when value="true"> + <expand macro="att_generator" title="Aggregate Metadata Attribute" min="1" /> + </when> + </conditional> + </section> + </xml> + <xml name="att_generator" token_title="@TITLE@" token_min="@MIN@"> + <repeat name="new_attributes" title="@TITLE@" min="@MIN@"> + <param name="new_name" type="text" label="New Attribute Name"> + <expand macro="text_val"/> + </param> + <expand macro="aggrFuncs" help="Sum, average, maximum, minimum and standard deviation are only for numerical + attributes" optional="false" label="Aggregate Function to apply" /> + <param name="argument" label="Attributes over which apply function" type="text"> + <expand macro="text_val"/> + </param> + </repeat> + </xml> +</macros> \ No newline at end of file |
b |
diff -r 000000000000 -r a80c93182db3 gmql_operators_join.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gmql_operators_join.xml Tue Jun 26 09:08:06 2018 -0400 |
b |
@@ -0,0 +1,117 @@ +<macros> + <import>gmql_queries_macros.xml</import> + <xml name="join"> + <expand macro="input_var" name="input_var_anchor" label="Anchor Dataset" optional="false" /> + <expand macro="input_var" name="input_var_experiment" label="Experiment Dataset" optional="false" /> + <expand macro="output_var" /> + <section name="conditions_section" title="Join Conditions" expanded="true"> + <conditional name="conditions"> + <param name="c_type" type="select" label="Type of conditions" display="radio"> + <option value="distance">Only on genomic distances</option> + <option value="attributes">Only on region attributes</option> + <option value="both">Both types</option> + </param> + <when value="distance"> + <expand macro="genomic_predicate" /> + <expand macro="output_opt" + help="Declare which region is given in output for each input pair of anchor and experiment + regions satisfying the equi predicate and the genomic predicate. The default output option is CAT."> + <option value="CAT" selected="true">CAT: Outputs the concatenation between regions that satisfy predicates</option> + <option value="INT">INT: Outputs overlapping part (intersection) that satisfy predicates</option> + </expand> + </when> + <when value="attributes"> + <expand macro="equi_predicate" /> + <expand macro="output_opt" + help="Declare which region is given in output for each input pair of anchor and experiment + regions satisfying the equi predicate and the genomic predicate. Some options are not allowed + with conditions only on the attributes (INT and CAT)."/> + </when> + <when value="both"> + <expand macro="genomic_predicate"/> + <expand macro="equi_predicate"/> + <expand macro="output_opt" + help="Declare which region is given in output for each input pair of anchor and experiment + regions satisfying the equi predicate and the genomic predicate. The default output option is CAT."> + <option value="CAT" selected="true">CAT: Outputs the concatenation between regions that satisfy predicates </option> + <option value="INT">INT: Outputs overlapping part (intersection) that satisfy predicates</option> + </expand> + </when> + </conditional> + </section> + <section name="joinby" title="Condition on Samples Metadata" expanded="false" + help="This is used to select sample pairs satisfying certain conditions on their metadata + (e.g., regarding the same cell line or antibody target); it is expressed as a list of metadata + attributes whose names and values must match between samples in the Anchor dataset and the + Experiment Dataset in order for such samples to verify the condition and be considered for the join."> + <repeat name="joinby_clause" title="Attributes to match" min="0" default="0"> + <expand macro="metajoin_attribute" name="j_att" optional="true"> + <expand macro="metajoin_options"/> + </expand> + </repeat> + </section> + <expand macro="materialize_stm" /> + </xml> + <xml name="equi_predicate"> + <repeat name="region_attributes" title="Add Region Attribute" + min="1" default="1" + help="Region attributes which must exist in anchor dataset region for the experiment region + to be considered" > + <param name="attribute" type="text" label="Region Attribute"> + <expand macro="text_val" /> + </param> + </repeat> + </xml> + <xml name="genomic_predicate"> + <repeat name="distance_conditions" title="Conditions over genomic distances between samples regions" + min="1" max="4" default="1" + help="Genometric predicates are fundamental for JOIN commands: they allow the expression of a variety + of distal conditions all based on the concept of genomic distance. The genomic distance is defined as + the number of base pairs (i.e., nucleotides) between the closest opposite ends of two regions on to the + same DNA strand, or when at least one of the two regions has unknown strand, and belonging to the same + chromosome (it is not defined for regions on different chromosomes or different DNA strands). + Further details in the GMQL online guide or in the above help section."> + <conditional name="type_dc"> + <param label="Type" name="type_dc_value" type="select" display="radio"> + <option value="dist">Distance</option> + <option value="stream">Direction</option> + </param> + <when value="dist"> + <param name="dc" label="Condition" type="select" + help="The overall condition must contain at least one and at most four distal conditions. + There must be at least one less-equal distance or one less distance, + or a minimum distance clause (which can then be combined with other clauses) in order to + be well-formed and compile."> + <option value="MD">Minimum distance</option> + <option value="DL">Less distance</option> + <option value="DLE">Less or equal distance</option> + <option value="DG">Greater distance</option> + <option value="DGE">Greater or equal distance</option> + </param> + <param name="n" label="Value" type="integer" value="0" + help="In the GMQL framework, overlapping regions have negative distance while adjacent + regions have distance equal to 0"/> + </when> + <when value="stream"> + <param name="ds" label="Direction" display="radio" type="select" + help="This requires that the rest of the predicate holds only on the upstream (downstream) + genome with respect to the anchor region." > + <option value="UP">Upstream</option> + <option value="DOWN">Downstream</option> + </param> + </when> + </conditional> + </repeat> + </xml> + <xml name="output_opt" token_help="@HELP@"> + <param name="output_opt" type="select" label="Output options" + help="@HELP@" > + <option value="LEFT">LEFT: Outputs only anchor samples regions</option> + <option value="LEFT_DISTINCT">LEFT_DISTINCT: Outputs only anchor regions and eliminate duplicates</option> + <option value="RIGHT">RIGHT: Outpus only experiment samples regions</option> + <option value="RIGHT_DISTINCT">RIGHT_DISTINCT: Outputs only experiment samples regions and eliminate duplicates</option> + <option value="BOTH">BOTH: Outputs anchor samples regions, but it adds attributes of the relative experiments samples regions</option> + <yield /> + </param> + </xml> +</macros> \ No newline at end of file |
b |
diff -r 000000000000 -r a80c93182db3 gmql_operators_map.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gmql_operators_map.xml Tue Jun 26 09:08:06 2018 -0400 |
[ |
@@ -0,0 +1,37 @@ +<macros> + <import>gmql_queries_macros.xml</import> + <xml name="map"> + <expand macro="input_var" name="input_var_reference" label="Reference Dataset" optional="false" /> + <expand macro="input_var" name="input_var_experiment" label="Experiment Dataset" optional="false" /> + <expand macro="output_var" /> + <param name="count_result" type="text" label="Rename default counting attribute name" + help="The MAP operation always counts the number of each experiment sample region intersecting a certain reference region; + Results are stored by default in an attribute named count_[DSrefName]_[DSexpName]." + optional="true"> + <expand macro="text_val" /> + </param> + <section name="new_regions_attributes" title="New Genomic Region Attributes" expanded="false"> + <repeat name="new_regions" title="Add New Region Attribute to Result Dataset" min="0" default="0" + help="Create new genomic region attributes by using functions on existing experiment region attributes"> + <param name="new_name" type="text" label="New Region Attribute Name" optional="true"> + <expand macro="text_val"/> + </param> + <expand macro="aggrFuncs" help="Sum, average, maximum, minimum and standard deviation are only for numerical + attributes" optional="true" label="Aggregate Function to apply"/> + <param name="argument" label="Region attributes over which apply function" type="text" optional="true"> + <expand macro="text_val"/> + </param> + </repeat> + </section> + <section name="joinby" title="Condition on Samples Metadata" expanded="false" + help="A list of metadata attribute names (or their suffixes) that must be present with equal values in the metadata of + the reference and experiment samples."> + <repeat name="joinby_clause" title="Attributes to match" min="0" default="0" > + <expand macro="metajoin_attribute" name="j_att" optional="true" > + <expand macro="metajoin_options" /> + </expand> + </repeat> + </section> + <expand macro="materialize_stm" /> + </xml> +</macros> \ No newline at end of file |
b |
diff -r 000000000000 -r a80c93182db3 gmql_operators_merge_union_diff.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gmql_operators_merge_union_diff.xml Tue Jun 26 09:08:06 2018 -0400 |
b |
@@ -0,0 +1,50 @@ +<macros> + <import>gmql_queries_macros.xml</import> + <xml name="merge"> + <expand macro="input_var" name="input_var" label="Input Dataset" optional="false" /> + <expand macro="output_var" /> + <section name="groupby" title="Group Samples" expanded="true" + help="Grouping metadata attributes can be specified: the samples are then partitioned in groups, + each with a distinct value of the grouping metadata attributes, and the MERGE operation is applied to each + group separately, yielding to one sample in the result dataset for each group. Samples without the grouping + metadata attributes are disregarded"> + <repeat name="group_meta_atts" min="0" title="Metadata Attribute"> + <expand macro="metajoin_attribute" name="j_att" optional="true" > + <expand macro="metajoin_options" /> + </expand> + </repeat> + </section> + <expand macro="materialize_stm" /> + </xml> + <xml name="union"> + <expand macro="input_var" name="input_var_first" label="First Dataset to be unified" optional="false" /> + <expand macro="input_var" name="input_var_second" label="Second Dataset to be unified" optional="false" /> + <expand macro="output_var" /> + <expand macro="materialize_stm" /> + </xml> + <xml name="difference"> + <param name="input_var_reference" label="Reference Dataset" type="text" + help="It is he dataset which is copied in the output and from which regions of DS neg are “subtracted”. + Must be a dataset variable already defined as output of a previous operation."> + <expand macro="text_val" /> + </param> + <param name="input_var_negative" label="Dataset to subtract from Reference" type="text" + help="If any reference region is found to have intersection with a region in this dataset, it is removed from + the output dataset. Must be a dataset variable already defined as output of a previous operation."> + <expand macro="text_val" /> + </param> + <param name="exact_flag" label="Exact matching?" type="boolean" help="If selected, regions are not kept in + the result only if they exactly matches regions in the reference dataset." /> + <section name="joinby" title="Joinby Conditions" expanded="true" + help="If attributes are specified, only samples that have the same value for each attributeare considered + when performing the DIFFERENCE."> + <repeat name="group_meta_atts" min="0" title="Metadata Attribute"> + <expand macro="metajoin_attribute" name="j_att" optional="true" > + <expand macro="metajoin_options" /> + </expand> + </repeat> + </section> + <expand macro="output_var" /> + <expand macro="materialize_stm" /> + </xml> +</macros> \ No newline at end of file |
b |
diff -r 000000000000 -r a80c93182db3 gmql_operators_order.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gmql_operators_order.xml Tue Jun 26 09:08:06 2018 -0400 |
b |
@@ -0,0 +1,57 @@ +<macro> + <import>gmql_queries_macros.xml</import> + <xml name="order"> + <expand macro="input_var" name="input_var_ordering_ds" label="Input Dataset to Order" /> + <expand macro="output_var" /> + <section name="ordering_attributes" title="Ordering Attributes" expanded="true" + help="Samples or samples regions will be ordered according to the values of the given attribute(s). + A a new ordering metadata and/or region attribute is added with the sample or region ordering + value, respectively. " > + <repeat name="attributes" title="Add new ordering attribute" min="1" default="1" + help="At least an ordering metadata or region attribute must be specified."> + <param name="att_type" type="select" display="radio" label="Metadata or region attribute?" > + <option value="metadata">Metadata</option> + <option value="region">Region</option> + </param> + <param name="attribute_name" type="text" label="Attribute Name" > + <expand macro="text_val" /> + </param> + <param name="order_type" label="Ascending or Descending order?" type="select" display="radio" > + <option value="asc">Ascending</option> + <option value="desc">Descending</option> + </param> + </repeat> + </section> + <section name="top_options" title="Samples/Regions to keep" expanded="true" + help="It is possible to specify the number (or percentage) of samples (or regions) to be extracted + from the ordered dataset (or from each sample), starting from the top (with respect to the + ascending/descending ordering)." > + <repeat name="to" title="Constraint on number of samples (sample region)" + min="0" default="0" > + <param name="type" type="select" label="Constraint on samples or samples regions?"> + <option value="metadata">Samples</option> + <option value="region">Samples Regions</option> + </param> + <conditional name="opt"> + <param name="k_type" type="select" label="How to chose samples/sample regions" display="radio" > + <option value="n">Top k elements</option> + <option value="perc">Top k % elements</option> + <option value="group">Top k elements of each group (implicitly consider the ordering defined by + first grouping identical values of the first n − 1 ordering attributes, and then sorted by the + remaining attributes)</option> + </param> + <when value="n"> + <param name="k" type="integer" label="Value of k" value="1" min="1"/> + </when> + <when value="group"> + <param name="k" type="integer" label="Value of k" value="1" min="1"/> + </when> + <when value="perc"> + <param name="k" type="integer" label="Value of k" value="1" min="1" max="100"/> + </when> + </conditional> + </repeat> + </section> + <expand macro="materialize_stm" /> + </xml> +</macro> \ No newline at end of file |
b |
diff -r 000000000000 -r a80c93182db3 gmql_operators_project.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gmql_operators_project.xml Tue Jun 26 09:08:06 2018 -0400 |
[ |
@@ -0,0 +1,128 @@ +<macro> + <import>gmql_queries_macros.xml</import> + <xml name="project"> + <expand macro="input_var" name="input_var" label="Input Dataset" /> + <expand macro="output_var" /> + <expand macro="sec_attributes" sec="region_att" title="Region attributes to keep" attname="Region Attribute"/> + <expand macro="sec_attributes" sec="meta_att" title="Metadata attributes to keep" attname="Metadata Attribute"/> + <section name="project_new_regions" title="New Genomic Region Attributes" expanded="false" + help="Generate new genomic region attributes functions on existing region or metadata attributes or + constants. It is also possible to rename an existing field." > + <repeat name="new_region_att" title="Define Region Attribute" min="0"> + <param name="new_name" type="text" label="New attribute name"> + <expand macro="text_val"/> + </param> + <conditional name="gen_function"> + <expand macro="gen_type_functions" label="Type of function to apply to generate new region attribute"> + <option value="NULL">Define a new numeric region attribute with "null" value</option> + <option value="META">Value of a metadata attribute</option> + </expand> + <expand macro="gen_arguments"> + <when value="NULL"> + <param name="att_type" type="select" label="Type of new attribute"> + <option value="INTEGER">INTEGER</option> + <option value="INTEGER">DOUBLE</option> + </param> + </when> + <when value="META"> + <expand macro="f_argument" label="Name of the metadata attribute to use as source"/> + <param name="att_type" type="select" label="Type of new attribute"> + <option value="INTEGER">INTEGER</option> + <option value="INTEGER">DOUBLE</option> + <option value="STRING">STRING (for any case when the attribute has not numerical values)</option> + </param> + </when> + </expand> + </conditional> + </repeat> + </section> + <section name="project_new_meta" title="New Metadata Attributes" expanded="false" + help="Generate new metadata attributes functions on existing metadata attributes or constants. + It is also possible to rename an existing field." > + <repeat name="new_meta_att" title="Define Metadata Attribute" min="0"> + <param name="new_name" type="text" label="New attribute name" > + <expand macro="text_val" /> + </param> + <conditional name="gen_function"> + <expand macro="gen_type_functions" label="Type of function to apply to generate new metadata attribute"> + </expand> + <expand macro="gen_arguments" > + </expand> + </conditional> + </repeat> + </section> + <expand macro="materialize_stm" /> + </xml> + <xml name="gen_type_functions" token_label="@LABEL@"> + <param name="gen_type" type="select" label="@LABEL@"> + <option value="aggregate">Aggregate function</option> + <option value="arithmetic">Arithmetic expression</option> + <option value="SQRT">Square root</option> + <option value="rename">Rename a field</option> + <option value="fixed">Create and set all instances to a value</option> + <yield /> + </param> + </xml> + <xml name="gen_arguments"> + <when value="aggregate"> + <expand macro="aggrFuncs" optional="true" + help="Sum, average, maximum, minimum and standard deviation are only for numerical attributes" + label="Function to apply to generate new region attribute"/> + <expand macro="f_argument" label="Region attribute over which apply function"/> + </when> + <when value="arithmetic"> + <param name="expression" label="Expression" type="text" + help="All basic mathematical operations (+, -, *, /), including usage of parenthesis"> + <validator type="regex" message="It includes not valid characters">[\w\.\(\)\-\+\*\/\s]+</validator> + </param> + </when> + <when value="SQRT"> + <expand macro="f_argument" label="Region attribute over which apply function"/> + </when> + <when value="rename"> + <expand macro="f_argument" label="New name"/> + </when> + <when value="fixed"> + <expand macro="f_argument" label="Defined value"/> + </when> + <yield/> + </xml> + <xml name="f_argument" token_label="@LABEL@"> + <param name="arg" type="text" label="@LABEL@"> + <expand macro="text_val"/> + </param> + </xml> + <xml name="sec_attributes" token_sec="@SEC@" token_title="@TITLE@" token_attname="@ATTNAME@"> + <section name="@SEC@" title="@TITLE@" + help="The PROJECT operator creates, from an existing dataset, a new dataset with all the samples + (with their regions and region values) in the input one, but keeping for each sample in the input dataset + only those metadata and/or region attributes expressed in the following sections. Note that if no + options are given, the default behavior is to remove all the region attributes which are not coordinates + (i.e., only chr, start, stop, and strand are kept)"> + <conditional name="allbut"> + <param name="allbut_flag" type="select" label="How to list attributes"> + <option value="keep">List only the ones to keep</option> + <option value="exclude">Keep all attributes and list what to exclude</option> + </param> + <when value="keep"> + <expand macro="attribute_field" name="list_keep" min="1" attname="@ATTNAME@" /> + </when> + <when value="exclude"> + <expand macro="attribute_field" name="list_exclude" min="0" attname="@ATTNAME@" /> + </when> + </conditional> + </section> + </xml> + <xml name="attribute_field" token_name="@NAME@" token_min="@MIN@" token_attname="@ATTNAME@"> + <repeat name="@NAME@" min="@MIN@" title="@ATTNAME@" + help="The wildcard '?' can be used in place of the syntax (at most one per attribute)"> + <param name="attribute" type="text" label="Attribute Name" optional="true"> + <validator type="regex" message="Format not valid">(\?\.)?[\w]+|[\w]+(\.\?)|(\?\.)[\w]+(\.\?)$</validator> + <option value="attribute">attribute</option> + <option value="?.attribute">?.attribute</option> + <option value="dataset.?">dataset.?</option> + <option value="dataset.?.attribute">dataset.?.attribute</option> + </param> + </repeat> + </xml> +</macro> \ No newline at end of file |
b |
diff -r 000000000000 -r a80c93182db3 gmql_operators_select.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gmql_operators_select.xml Tue Jun 26 09:08:06 2018 -0400 |
[ |
b'@@ -0,0 +1,167 @@\n+<macros>\n+ <import>gmql_queries_macros.xml</import>\n+ <xml name="select">\n+ <conditional name="input">\n+ <param name="input_type" type="select" display="radio" label="Dataset over which SELECT samples"\n+ help="It can be a dataset in the user space or the result of a MATERIALIZE operation still to be executed.">\n+ <option value="i_ds">User Dataset</option>\n+ <option value="i_var">Materializing result</option>\n+ </param>\n+ <when value="i_ds">\n+ <param name="gmql_datasets" type="data" format="gmql_repository" label="Working Datasets" />\n+ <param name="input_ds" type="select" label="Select Dataset" refresh_on_change="True">\n+ <options from_dataset="gmql_datasets">\n+ <column name="value" index="0"/>\n+ </options>\n+ </param>\n+ <expand macro="predicates">\n+ <expand macro="advanced_mode">\n+ <expand macro="meta_block" optional="true" vartype="select"/>\n+ <repeat name="add_meta_blocks" title="Additional Conditions Block" default="0" min="0"\n+ help="Compose different blocks to obtain more complex conditions">\n+ <expand macro="meta_block" vartype="select">\n+ <expand macro="block_logCon"/>\n+ </expand>\n+ </repeat>\n+ </expand>\n+ </expand>\n+ </when>\n+ <when value="i_var">\n+ <expand macro="input_var" name="input_var" label="Variable name" optional="false"/>\n+ <expand macro="predicates">\n+ <expand macro="advanced_mode">\n+ <expand macro="meta_block" optional="true" vartype="text"/>\n+ <repeat name="add_meta_blocks" title="Additional Conditions Block" default="0" min="0"\n+ help="Compose different blocks to obtain more complex conditions">\n+ <expand macro="meta_block" vartype="text">\n+ <expand macro="block_logCon"/>\n+ </expand>\n+ </repeat>\n+ </expand>\n+ </expand>\n+ </when>\n+ </conditional>\n+ <expand macro="output_var" />\n+ <expand macro="materialize_stm" />\n+ </xml>\n+ <xml name="predicates">\n+ <section name="metadata_predicates" title="Conditions on metadata" expanded="false"\n+ help="Selection based on the existence and values of certain metadata attributes in each sample.">\n+ <yield/>\n+ </section>\n+ <section name="region_predicates" title="Conditions on region fields" expanded="false"\n+ help="Selection based on the characteristics of the genomic regions of each sample.">\n+ <expand macro="advanced_mode">\n+ <expand macro="region_block" optional="true"/>\n+ <repeat name="add_region_blocks" title="Additional Conditions Block" default="0" min="0"\n+ help="Compose different blocks to obtain more complex conditions">\n+ <expand macro="region_block">\n+ <expand macro="block_logCon"/>\n+ </expand>\n+ </repeat>\n+ </expand>\n+ </section>\n+ <section name="semijoin_predicate" title="Conditions in relation of an external datasets" expanded="false"\n+ help="Selection based on the existence of certain metadata attributes\n+ and the matching of their values with those associated with at least one sample in an\n+ external dataset D_EXT">\n+ <expand macro="semijoin"/>\n+ </section>\n+ </xml'..b' min="0" default="0">\n+ <expand macro="metadata_predicate" vartype="@VARTYPE@">\n+ <expand macro="logCon"/>\n+ </expand>\n+ </repeat>\n+ </xml>\n+ <xml name="metadata_predicate" token_optional="optional" token_vartype="vartype">\n+ <yield />\n+ <param name="negate" type="boolean" label="Negative Condition" optional="@OPTIONAL@"/>\n+ <param name="attribute" type="@VARTYPE@" label="Metadata Attribute Name" optional="@OPTIONAL@"\n+ refresh_on_change="True"\n+ dynamic_options="get_metadata_attr(user=authToken.file_name,ds=input_ds, ds_list=gmql_datasets.file_name)" />\n+ <expand macro="condition" optional="@OPTIONAL@" />\n+ <param name="value" type="select" label="Value whom to confront the metadata attribute with" optional="@OPTIONAL@"\n+ dynamic_options="get_metadata_values(user=authToken.file_name, ds=input_ds, ds_list=gmql_datasets.file_name, att=attribute)" />\n+ </xml>\n+ <xml name="region_block" token_optional="optional">\n+ <yield />\n+ <expand macro="region_predicate" optional="@OPTIONAL@"/>\n+ <repeat name="pr_additional" title="Additional Condition"\n+ min="0" default="0">\n+ <expand macro="region_predicate">\n+ <expand macro="logCon"/>\n+ </expand>\n+ </repeat>\n+ </xml>\n+ <xml name="region_predicate" token_optional="optional">\n+ <yield />\n+ <param name="negate" type="boolean" label="Negative Condition" optional="@OPTIONAL@" />\n+ <param name="attribute" type="text" label="Region Attribute Name" optional="@OPTIONAL@" >\n+ <expand macro="region_attribute" />\n+ </param>\n+ <expand macro="condition" optional="@OPTIONAL@" />\n+ <param name="value" type="text" label="Value whom to confront the region attribute with" optional="@OPTIONAL@" >\n+ <expand macro="region_value"/>\n+ </param>\n+ <param name="is_meta_value" type="boolean" label="Is it the confront value a metadata field?" optional="@OPTIONAL@"\n+ help="The use of metadata attributes in predicates on region attributes is enabled." />\n+ </xml>\n+ <xml name="semijoin">\n+ <repeat name="sj_attributes" title="Metadata Attributes to match in DS_EXT" min="0" default="0">\n+ <expand macro="metajoin_attribute" name="sj_att" />\n+ </repeat>\n+ <param name="condition" type="select" label="Condition">\n+ <option value="IN">IN</option>\n+ <option value="NOT_IN">NOT IN</option>\n+ </param>\n+ <expand macro="input_var" name="ds_ext" label="DS_EXT" optional="true" />\n+ </xml>\n+ <xml name="region_attribute">\n+ <expand macro="text_val" />\n+ <option value="chr">chr</option>\n+ <option value="left">left</option>\n+ <option value="right">right</option>\n+ <option value="strand">strand</option>\n+ <option value="region_field">region_field</option>\n+ </xml>\n+ <xml name="region_value">\n+ <validator type="regex" message="Valid characters are letters, digits, \'.\', +, -, and the wildcard symbol \'*\'">[+\\-\\*]|([\\d]+\\.)?[\\d]+|[\\w]+$</validator>\n+ <option value="*">any value</option>\n+ <option value="string">string</option>\n+ <option value="1">1</option>\n+ <option value="0.0">0.0</option>\n+ <option value="+">+</option>\n+ <option value="-">-</option>\n+ </xml>\n+ <xml name="condition" token_optional="optional">\n+ <param name="condition" type="select" label="Condition" optional="@OPTIONAL@">\n+ <option value="eq"><![CDATA[ == ]]></option>\n+ <option value="gt"><![CDATA[ > ]]></option>\n+ <option value="lt"><![CDATA[ < ]]></option>\n+ <option value="get"><![CDATA[ >= ]]></option>\n+ <option value="let"><![CDATA[ <= ]]></option>\n+ </param>\n+ </xml>\n+</macros>\n\\ No newline at end of file\n' |
b |
diff -r 000000000000 -r a80c93182db3 gmql_operators_tests.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gmql_operators_tests.xml Tue Jun 26 09:08:06 2018 -0400 |
b |
b'@@ -0,0 +1,796 @@\n+<macros>\n+ <xml name="query_intro" token_name="@NAME@">\n+ <param name="authToken" value="guest2.gmql_user"/>\n+ <param name="query_name" value="@NAME@"/>\n+ <conditional name="query_create">\n+ <param name="create" value="yes"/>\n+ </conditional>\n+ </xml>\n+ <xml name="query_end" token_mat="@MAT@">\n+ <conditional name="materialize">\n+ <param name="materialize_result" value="@MAT@"/>\n+ <yield />\n+ </conditional>\n+ </xml>\n+ <xml name="simple_select" token_ds="@DS@">\n+ <repeat name="operations">\n+ <conditional name="operation">\n+ <param name="operator" value="SELECT"/>\n+ <conditional name="input">\n+ <param name="input_type" value="i_ds"/>\n+ <param name="gmql_datasets" value="rep.gmql_repository"/>\n+ <param name="input_ds" value="@DS@"/>\n+ <section name="region_predicates">\n+ <conditional name="conditions">\n+ <param name="ad_flags" value="strings"/>\n+ <param name="conditions_string" value=""/>\n+ </conditional>\n+ </section>\n+ <section name="metadata_predicates">\n+ <conditional name="conditions">\n+ <param name="ad_flags" value="strings"/>\n+ <param name="conditions_string" value=""/>\n+ </conditional>\n+ </section>\n+ <section name="semijoin_predicate">\n+ <param name="condition" value="IN"/>\n+ <param name="ds_ext" value=""/>\n+ </section>\n+ </conditional>\n+ <yield />\n+ </conditional>\n+ </repeat>\n+ </xml>\n+ <xml name="composer_tests">\n+ <tests>\n+ <test>\n+ <!-- TEST 1: Select (Simple Metadata Condition) -->\n+ <expand macro="query_intro" name="test_select1"/>\n+ <repeat name="operations">\n+ <conditional name="operation" >\n+ <param name="operator" value="SELECT"/>\n+ <conditional name="input">\n+ <param name="input_type" value="i_ds"/>\n+ <param name="gmql_datasets" value="rep.gmql_repository"/>\n+ <param name="input_ds" value="Example_Dataset_1"/>\n+ <section name="metadata_predicates">\n+ <conditional name="conditions">\n+ <param name="ad_flags" value="steps"/>\n+ <param name="negate" value="false"/>\n+ <param name="attribute" value="grant"/>\n+ <param name="condition" value="eq"/>\n+ <param name="value" value="Stam"/>\n+ </conditional>\n+ </section>\n+ <section name="region_predicates">\n+ <conditional name="conditions">\n+ <param name="ad_flags" value="strings"/>\n+ <param name="conditions_string" value=""/>\n+ </conditional>\n+ </section>\n+ <section name="semijoin_predicate">\n+ <param name="condition" value="IN"/>\n+ <param name="ds_ext" value=""/>\n+ </section>\n+ </conditional>\n+ </conditional>\n+ </repeat>\n+ <param name="output_var" value="OUT"/>\n+ <conditional name="m_stm"'..b'est2.gmql_user"/>\n+ <param name="query_name" value="testexec"/>\n+ <conditional name="query_create">\n+ <param name="create" value="yes"/>\n+ </conditional>\n+ <repeat name="operations">\n+ <conditional name="operation">\n+ <param name="operator" value="SELECT"/>\n+ <conditional name="input">\n+ <param name="input_type" value="i_ds"/>\n+ <param name="gmql_datasets" value="rep.gmql_repository"/>\n+ <param name="input_ds" value="Example_Dataset_1"/>\n+ <section name="metadata_predicates">\n+ <conditional name="conditions">\n+ <param name="ad_flags" value="steps"/>\n+ <param name="negate" value="false"/>\n+ <param name="attribute" value="grant"/>\n+ <param name="condition" value="eq"/>\n+ <param name="value" value="Stam"/>\n+ </conditional>\n+ </section>\n+ <section name="region_predicates">\n+ <conditional name="conditions">\n+ <param name="ad_flags" value="strings"/>\n+ <param name="conditions_string" value=""/>\n+ </conditional>\n+ </section>\n+ <section name="semijoin_predicate">\n+ <param name="condition" value="IN"/>\n+ <param name="ds_ext" value=""/>\n+ </section>\n+ </conditional>\n+ </conditional>\n+ </repeat>\n+ <param name="output_var" value="VAR"/>\n+ <conditional name="materialize">\n+ <param name="file_name" value="TG"/>\n+ <param name="materialize_result" value="true"/>\n+ <conditional name="choose_op">\n+ <param name="op" value="run"/>\n+ <param name="out_format" value="gdm"/>\n+ <param name="import" value="true"/>\n+ </conditional>\n+ </conditional>\n+ <output name="query" file="query.gmql_query"/>\n+ <output name="log" ftype="txt">\n+ <assert_contents>\n+ <has_text text="SUCCESS"/>\n+ </assert_contents>\n+ </output>\n+ <output name="updated_list" ftype="gmql_repository">\n+ <assert_contents>\n+ <has_text_matching expression="testexec_\\d+_\\d+_TG"/>\n+ </assert_contents>\n+ </output>\n+ <collection name="query_results_s" type="list">\n+ <metadata name="name" value="testexec results"/>\n+ <discovered_dataset designation="S00000" file="TG/S_00000.gdm"/>\n+ <discovered_dataset designation="S00001" file="TG/S_00001.gdm"/>\n+ <discovered_dataset designation="S00002" file="TG/S_00002.gdm"/>\n+ </collection>\n+ <collection name="query_results_m" type="list">\n+ <metadata name="name" value="testexec results metadata"/>\n+ <discovered_dataset designation="S00000" file="TG/S_00000.gdm.meta"/>\n+ <discovered_dataset designation="S00001" file="TG/S_00001.gdm.meta"/>\n+ <discovered_dataset designation="S00002" file="TG/S_00002.gdm.meta"/>\n+ </collection>\n+ </test>\n+ </tests>\n+ </xml>\n+</macros>\n\\ No newline at end of file\n' |
b |
diff -r 000000000000 -r a80c93182db3 gmql_queries_composer.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gmql_queries_composer.py Tue Jun 26 09:08:06 2018 -0400 |
[ |
b"@@ -0,0 +1,691 @@\n+#!/usr/bin/env python\n+# --------------------------------------------------------------------------------\n+# GMQL Queries Compositor\n+# --------------------------------------------------------------------------------\n+# Luana Brancato, luana.brancato@mail.polimi.it\n+# --------------------------------------------------------------------------------\n+\n+import os, sys, argparse, json\n+from itertools import chain\n+from gmql_queries_statements import *\n+from gmql_rest_queries import compile_query, run_query, check_input\n+from gmql_rest_datasets import list_datasets\n+from gmql_queries_constants import *\n+\n+def read_query(query_data):\n+\n+ # Create new Query object and read JSON file\n+ query = dict ()\n+\n+ with open(query_data, 'r') as f_in :\n+ qd = json.loads(f_in.read())\n+\n+ query.update(name=qd['query_name'])\n+\n+ # A list of statements objects is created from the list of operations and added to the query\n+ statements = map(lambda x: read_statement(x['operation']), qd['operations'])\n+\n+ # Check if the user asked for materialize the final result and in case add a materialize statement\n+ # for the last variable defined.\n+\n+ if qd['materialize']['materialize_result'] :\n+ var = statements[-1][0].variables['output']\n+ mat_stm = Materialize(qd['materialize']['file_name'],var)\n+ statements.append((mat_stm,))\n+\n+ # Also save info about the desired output format (if available)\n+ out_format = qd['materialize']['choose_op'].get('out_format',None)\n+ if out_format:\n+ query.update(out_format=out_format)\n+\n+ #Check if the user wants to import results into Galaxy already\n+ importFlag = qd['materialize']['choose_op'].get('import', None)\n+ if importFlag is not None:\n+ query.update(importFlag=importFlag)\n+\n+ # Add statements list to query, flattening list elements if needed (in case there's some intermediate\n+ # materialize)\n+\n+ query.update(statements=[x for x in chain.from_iterable(statements)])\n+\n+\n+ return query\n+\n+\n+def read_statement(x):\n+\n+ op = x['operator']\n+\n+ if op == 'SELECT' :\n+ stm = create_select(x)\n+ if op == 'MAP' :\n+ stm = create_map(x)\n+ if op == 'ORDER' :\n+ stm = create_order(x)\n+ if op == 'JOIN' :\n+ stm = create_join(x)\n+ if op == 'PROJECT':\n+ stm = create_project(x)\n+ if op == 'COVER' :\n+ stm = create_cover(x)\n+ if op == 'EXTEND' :\n+ stm = create_extend(x)\n+ if op == 'GROUP' :\n+ stm = create_group(x)\n+ if op == 'MERGE' :\n+ stm = create_merge(x)\n+ if op == 'UNION' :\n+ stm = create_union(x)\n+ if op == 'DIFFERENCE' :\n+ stm = create_difference(x)\n+\n+\n+ # If the user asked to materialize the current statement, add a MATERIALIZE statement; otherwise return\n+ # only the current statement\n+\n+ if x['m_stm']['materialize_stm'] :\n+ mat_stm = Materialize(x['m_stm']['file_name'],stm.variables['output'])\n+ return (stm, mat_stm)\n+ else:\n+ return (stm,)\n+\n+def create_project(x):\n+ stm = Project()\n+\n+ # Set output and input variables\n+ stm.set_output_var(x['output_var'])\n+ stm.set_input_var(x['input_var'])\n+\n+ # Check if there are info about region fields to keep and set them up\n+\n+ reg_att = x['region_att']['allbut']\n+ if reg_att['allbut_flag'] == 'keep' :\n+\n+ r_fields = reg_att.get('list_keep', None)\n+ # If the list exists and it is not empty\n+ if r_fields:\n+ r_fields = map(lambda x: x.get('attribute'), r_fields)\n+ stm.set_regions(AttributesList(r_fields))\n+ else:\n+ r_fields = reg_att.get('list_exclude', None)\n+ if r_fields:\n+ r_fields = map(lambda x: x.get('attribute'), r_fields)\n+ stm.set_regions(AttributesList(r_fields), type='exclude')\n+\n+ # Similarly for metadata attributes to keep\n+\n+ meta_att = x['meta_att']['allbut']\n+ if meta_att['allb"..b' the external ds to confront with.\n+\n+ sj_data = input_data[\'semijoin_predicate\']\n+\n+ if sj_data[\'sj_attributes\'] :\n+ sj_attr = map(lambda x: x[\'sj_att\'], sj_data[\'sj_attributes\'])\n+ sj = SemiJoinPredicate(sj_attr,sj_data[\'ds_ext\'],sj_data[\'condition\'])\n+\n+ stm.set_param(sj, \'semijoin\')\n+\n+ return stm\n+\n+def _metadata_predicate(mp_data):\n+ # Metadata predicates are well formed logical formulas. Create a new one and add the first\n+ # predicate. Negate it if it\'s the case.\n+\n+ mp = MetaPredicate(mp_data[\'attribute\'], mp_data[\'value\'], mp_data[\'condition\'])\n+ if mp_data[\'negate\']:\n+ mp = [mp, Wff.NOT]\n+\n+ # Check if there are further predicates\n+ for pa in mp_data[\'pm_additional\']:\n+\n+ mp1 = MetaPredicate(pa[\'attribute\'], pa[\'value\'], pa[\'condition\'])\n+ if pa[\'negate\']:\n+ mp1 = [mp1, Wff.NOT]\n+\n+ if pa[\'logCon\'] == \'AND\':\n+ mp = [mp, mp1, Wff.AND]\n+ if pa[\'logCon\'] == \'OR\':\n+ mp = [mp, mp1, Wff.OR]\n+\n+ return mp\n+\n+def _region_predicate(rp_data):\n+\n+ rp_s = RegionPredicate(rp_data[\'attribute\'], rp_data[\'value\'], rp_data[\'condition\'])\n+ if rp_data[\'is_meta_value\']:\n+ rp_s.set_value_type(\'meta\')\n+ else:\n+ rp_s.set_value_type()\n+ rp = rp_s\n+ if rp_data[\'negate\']:\n+ rp = [rp, Wff.NOT]\n+\n+ # Check if there are further predicates\n+ for pa in rp_data[\'pr_additional\']:\n+ rp1_s = RegionPredicate(pa[\'attribute\'], pa[\'value\'], pa[\'condition\'])\n+ if pa[\'is_meta_value\']:\n+ rp1_s.set_value_type(\'meta\')\n+ else:\n+ rp1_s.set_value_type()\n+ #rp1 = WellFormedFormula(rp1_s)\n+ rp1 = rp1_s\n+\n+ if pa[\'negate\']:\n+ rp1 = [rp1, Wff.NOT]\n+\n+ if pa[\'logCon\'] == \'AND\':\n+ rp = [rp, rp1, Wff.AND]\n+ if pa[\'logCon\'] == \'OR\':\n+ rp = [rp, rp1, Wff.OR]\n+\n+ return rp\n+\n+def save(query, output, query_source):\n+\n+ # Set the config files where to look for the actual syntax to use\n+ y_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), \'gmql_syntax.yaml\')\n+\n+ with open(y_path, \'r\') as yamlf:\n+ syntax = yaml.load(yamlf)\n+\n+ # If I am continuing a local query, first copy the older statements\n+ if query_source:\n+ with open(output, \'w\') as f_out:\n+ with open(query_source, \'r\') as f_in:\n+ f_out.writelines(f_in.readlines())\n+\n+\n+ with open(output, \'a\') as f_out:\n+\n+ for s in query[\'statements\'] :\n+ f_out.write(\'{stm}\\n\'.format(stm=s.save(syntax)))\n+\n+\n+def compile(user, query_name, query_file, log):\n+ # Call the service in gmql_rest_queries to send the query to the GMQL server to compile.\n+\n+ compile_query(user, query_name, query_file, log)\n+\n+\n+def run(user, query_name, query, log, out_format, importFlag, updated_ds_list):\n+ # Call the service in gmql_rest_queries to send the query to the GMQL server to be executed.\n+\n+ run_query(user, query_name, query, log, out_format, importFlag)\n+\n+ #Save updated list of datasets\n+ list_datasets(user, updated_ds_list)\n+\n+\n+def stop_err(msg):\n+ sys.stderr.write("%s\\n" % msg)\n+\n+def __main__():\n+\n+ parser = argparse.ArgumentParser()\n+ parser.add_argument("-user")\n+ parser.add_argument("-cmd")\n+ parser.add_argument("-query_params")\n+ parser.add_argument("-query_output")\n+ parser.add_argument("-query_source")\n+ parser.add_argument("-query_log")\n+ parser.add_argument("-updated_ds_list")\n+\n+ args = parser.parse_args()\n+\n+ query = read_query(args.query_params)\n+ save(query, args.query_output, args.query_source)\n+\n+ if(args.cmd == \'compile\'):\n+ compile(args.user, query[\'name\'], args.query_output, args.query_log)\n+\n+ if(args.cmd == \'run\'):\n+ run(args.user, query[\'name\'], args.query_output, args.query_log, query[\'out_format\'], query[\'importFlag\'], args.updated_ds_list)\n+\n+\n+if __name__ == "__main__":\n+ __main__()\n' |
b |
diff -r 000000000000 -r a80c93182db3 gmql_queries_composer.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gmql_queries_composer.xml Tue Jun 26 09:08:06 2018 -0400 |
[ |
b'@@ -0,0 +1,291 @@\n+<tool id="gmql_queries_composer" name="GMLQ Query Composer" version="0.1.1">\n+ <description>Create, Compile and Run GMQL queries step by step.</description>\n+ <macros>\n+ <import>gmql_rest_macros.xml</import>\n+ <import>gmql_queries_macros.xml</import>\n+ <import>gmql_operators_select.xml</import>\n+ <import>gmql_operators_order.xml</import>\n+ <import>gmql_operators_join.xml</import>\n+ <import>gmql_operators_map.xml</import>\n+ <import>gmql_operators_project.xml</import>\n+ <import>gmql_operators_cover.xml</import>\n+ <import>gmql_operators_extend.xml</import>\n+ <import>gmql_operators_group.xml</import>\n+ <import>gmql_operators_merge_union_diff.xml</import>\n+ <import>gmql_operators_tests.xml</import>\n+ </macros>\n+ <command><![CDATA[\n+ #if $materialize.materialize_result == \'true\' :\n+ #if $materialize.choose_op.op == \'run\' :\n+ mkdir -p dataset && cd dataset &&\n+ python $__tool_directory__/gmql_queries_composer.py\n+ -user=$authToken\n+ -cmd=\'run\'\n+ -query_params=$query_params\n+ -query_output=$query\n+ #if $query_create.create == \'no\' :\n+ -query_source=$query_file\n+ #end if\n+ -query_log=$log\n+\t -updated_ds_list=$updated_list\n+\t #else :\n+\t python $__tool_directory__/gmql_queries_composer.py\n+ -user=$authToken\n+ -cmd=\'compile\'\n+ -query_params=$query_params\n+ -query_output=$query\n+ #if $query_create.create == \'no\' :\n+ -query_source=$query_file\n+ #end if\n+ -query_log=$log\n+ #end if\n+ #else:\n+ python $__tool_directory__/gmql_queries_composer.py\n+ -user=$authToken\n+ -cmd=\'save\'\n+ -query_params=$query_params\n+ -query_output=$query\n+ #if $query_create.create == \'no\' :\n+ -query_source=$query_file\n+ #end if\n+ #end if\n+ ]]></command>\n+ <code file="dynamic_utils.py" >\n+ <hook validate_input="validate_variables" />\n+ </code>\n+ <configfiles>\n+ <inputs name="query_params" filename="params.json"/>\n+ </configfiles>\n+ <inputs>\n+ <param format="gmql_user" name="authToken" type="data" label="Select user" />\n+ <param name="query_name" type="text" label="Query Name" >\n+\t\t <validator type="regex" message="Only alphanumeric characters and underscore allowed. It must begin with\n+\t\t letter or underscore.">[a-zA-Z_]([\\w]+)?$</validator>\n+\t </param>\n+ <conditional name="query_create" >\n+ <param name="create" label="Create new query or append to a saved one: " type="select" display="radio" >\n+ <option value="yes">New query</option>\n+ <option value="no">Continue</option>\n+ </param>\n+ <when value="no">\n+ <param name="query_file" label="Select local query" type="data" format="gmql_query" />\n+ </when>\n+ </conditional>\n+ <repeat name="operations" title="GMQL Operations" help="Add a new operation to the execution flow."\n+ min="1" default="0">\n+ <conditional name="operation">\n+ <param name="operator" type="select" label="Operation" >\n+ <option value="SELECT">SELECT</option>\n+ <option value="PROJECT">PROJECT</option>\n+ <option value="EXTEND">EXTEND</option>\n+ <option value="ORDER">ORDER</option>\n+ <option value="GROUP">GROUP</option>\n+ <option value="MERGE">MERGE</option>\n+ <option value="UNION">UNION</option>\n+ <option value="DIFFERENCE">DIFFERENCE</option>\n+ <option value="JOIN">JOIN</option>\n+ <option value="MAP">MAP</option>\n+ <option value="COVER">COVER</option>\n+ </param>\n+ <when value="SELECT">\n+ <expand macro="select" />\n+ '..b'wing structure:\n+\n+<variable_output> = <operator>(<parameters>)<variables_input>;\n+\n+where each variable stands for a GDM dataset. Operators apply to one\n+or two input variables and construct the result variable. Parameters of\n+several operators include predicates, which are made of boolean expressions\n+of simple predicates.\n+\n+- **Select** : defines a new dataset from an existing dataset by keeping a subset of samples and/or regions from the input dataset who satisfied the given predicates.\n+\n+- **Project** : creates a new dataset keeping for each sample in the input dataset only those metadata and/or region attributes expressed in the operator parameter list. This allow to remove existing attribute or to create new ones.\n+\n+- **Extend** : for each sample in an input dataset, it builds new metadata attributes, assigns their values as the result functions calculated on sample region attributes, and adds them to the existing metadata attribute-value pairs of the sample.\n+\n+- **Order** : is used to order either samples, sample regions, or both, according to a set of metadata and/or region attributes, and/or region coordinates.\n+\n+- **Group** : performs the grouping of samples of the input dataset based on one specified metadata attribute. For each obtained group, it is possible to request the evaluation of aggregate functions on metadata attributes over the metadata contained in all samples of the group.\n+\n+- **Merge** : builds a new dataset consisting of a single sample having all the regions of all the input samples, with the same attributes and the union of all the metadata attribute-values of the input samples\n+\n+- **Union** : analogously to the UNION operation in relation algebra, integrate samples from different dataset within a single dataset. The union of the two schemas is performed by taking only the schema of the first dataset and removing the region attributes of the second dataset which are not present in the first one.\n+\n+- **Difference** : produces one sample in the result for each sample of the first operand by keeping its metadata and only those regions (with their attributes and values) which do not intersect with any region in the second operand.\n+\n+- **Map** : is a binary operation over two samples, called reference and experiment dataset. MAP computes, for each sample in the experiment dataset, aggregates over the values of the experiment regions that intersect with each reference region; we say that experiment regions are mapped to the reference regions. For each reference sample, the MAP operation produces a matrix-like structure (genomic space), where rows represent each experiment sample, columns are reference regions, and each matrix row is a vector consisting of the aggregates computed during MAP execution.\n+\n+- **Join** : it acts in two phases: first, new samples are built from pairs of samples, one of the first dataset (anchor) and one of the second one (experiment), where region attributes exist in both input datasets and their values coincide (just as the relational JOIN). After that, a genometric predicate, dealing with distal properties of regions, selects the regions to include in these new samples. The number of generated output samples is the Cartesian product of the number of samples in the anchor and in the experiment dataset (if no joinby clause is specified). Predicates over metadata allow selecting sample pairs with appropriate biological conditions; genometric join predicates allow expressing distal conditions on sample regions.\n+\n+- **Cover** : takes as input a dataset and returns another dataset with a single sample (if no groupby option is specified) by \xe2\x80\x9dcollapsing\xe2\x80\x9d the input samples and their regions according to the parameters minAcc and maxAcc.\n+\n+- **Materialize** : saves the content of a dataset in a file and registers the saved dataset in the system to make it usable in other GMQL queries.\n+\n+ ]]></help>\n+ <expand macro="citations" />\n+</tool>\n\\ No newline at end of file\n' |
b |
diff -r 000000000000 -r a80c93182db3 gmql_queries_constants.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gmql_queries_constants.py Tue Jun 26 09:08:06 2018 -0400 |
b |
@@ -0,0 +1,68 @@ +#!/usr/bin/env python +# -------------------------------------------------------------------------------- +# GMQL Queries Enum Classes +# -------------------------------------------------------------------------------- +# Luana Brancato, luana.brancato@mail.polimi.it +# -------------------------------------------------------------------------------- + +from enum import Enum + + +class Operator(Enum): + MATERIALIZE = 'MATERIALIZE' + SELECT = 'SELECT' + PROJECT = 'PROJECT' + MAP = 'MAP' + ORDER = 'ORDER' + JOIN = 'JOIN' + COVER = 'COVER' + FLAT = 'FLAT' + SUMMIT = 'SUMMIT' + HISTOGRAM = 'HISTOGRAM' + EXTEND = 'EXTEND' + GROUP = 'GROUP' + MERGE = 'MERGE' + UNION = 'UNION' + DIFFERENCE = 'DIFFERENCE' + +class Wff(Enum): + AND = 'AND' + OR = 'OR' + NOT = 'NOT' + BLOCK = 'BLOCK' + +class RegFunction(Enum): + COUNT = 'COUNT' + COUNTSAMP = 'COUNTSAMP' + BAG = 'BAG' + BAGD = 'BAGD' + SUM = 'SUM' + AVG = 'AVG' + MIN = 'MIN' + MAX = 'MAX' + MEDIAN = 'MEDIAN' + STD = 'STD' + SQRT = 'SQRT' + NULL = 'NULL' + META = 'META' + MATH = '' + +class DistalConditions(Enum) : + DL = 'DL' + DLE = 'DLE' + MD = 'MD' + DGE = 'DGE' + +class DistalStream(Enum): + UPSTREAM = 'UP' + DOWNSTREAM = 'DOWN' + +class CoordParam(Enum): + LEFT = 'LEFT' + LEFT_DISTINCT = 'LEFT_DISTINCT' + RIGHT = 'RIGHT' + RIGHT_DISTINCT = 'RIGHT_DISTINCT' + INT = 'INT' + CAT = 'CAT' + BOTH = 'BOTH' + |
b |
diff -r 000000000000 -r a80c93182db3 gmql_queries_macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gmql_queries_macros.xml Tue Jun 26 09:08:06 2018 -0400 |
[ |
@@ -0,0 +1,76 @@ +<macros> + <xml name="input_ds" token_label="@LABEL@"> + <param name="input_ds" type="select" label="@LABEL@"> + <options from_dataset="gmql_datasets"> + <column name="value" index="0"/> + </options> + </param> + </xml> + <xml name="output_var"> + <param name="output_var" type="text" label="Output Variable Name" > + <expand macro="text_val" /> + </param> + </xml> + <xml name="input_var" token_name="@NAME@" token_label="@LABEL@" token_optional="@OPTIONAL@"> + <param name="@NAME@" label="@LABEL@" optional="@OPTIONAL@" type="text" + help="Must be a dataset variable already defined as output of a previous operation."> + <expand macro="text_val" /> + </param> + </xml> + <xml name="materialize_stm" > + <conditional name="m_stm"> + <param name="materialize_stm" type="boolean" label="Materialize the result?" help="This may be helpful to check intermediate datasets" /> + <when value="true"> + <param name="file_name" type="text" label="Name of the file into which the dataset DS will be saved" + help="The actual GMQL implementation materializes DS into a file with a name in the form [queryname]_[timestamp]_filename"> + <validator type="regex" message="Only alphanumeric characters and underscore allowed.">[\w]+$</validator> + </param> + </when> + </conditional> + </xml> + <xml name="block_logCon" > + <section name="block_logCon" title="Logical concatenation between blocks" expanded="true" + help="The following logical connectors applies to the WHOLE following block."> + <expand macro="logCon" /> + <param name="negate" type="boolean" label="Negative Condition" /> + </section> + </xml> + <xml name="logCon"> + <param name="logCon" type="select" label="Logical Operator"> + <option value="AND">AND</option> + <option value="OR">OR</option> + </param> + </xml> + <xml name="metajoin_attribute" token_name="@NAME@" token_optional="@OPTIONAL@"> + <param name="@NAME@" type="text" label="Attribute Name" optional="@OPTIONAL@"> + <validator type="regex" message="Invalid attribute name.">[\w]+(.[\w]+)?$</validator> + <option value="attribute_name">attribute_name</option> + <option value="DS.attribute_name">DS.attribute_name</option> + <option value="DS.attribute_name">Prefix.DS.attribute_name</option> + </param> + <yield /> + </xml> + <xml name="metajoin_options"> + <param type="select" label="Matching Options" name="metajoin_match"> + <option value="SIMPLE">Match all attributes that are equal to OR end with the dot-separated suffix specified name</option> + <option value="EXACT">Match all attributes that are equal to the specified name (without any prefixes)</option> + <option value="FULL">Match two attributes if they end with the specified name AND their full names are equal</option> + </param> + </xml> + <xml name="aggrFuncs" token_help="@HELP@" token_optional="@OPTIONAL@" token_label="@LABEL@"> + <param name="function" type="select" label="@LABEL@" optional="@OPTIONAL@" + help="@HELP@"> + <option value="SUM">Sum Values</option> + <option value="AVG">Average Value</option> + <option value="MIN">Minimum Value</option> + <option value="MAX">Maximum Value</option> + <option value="STD">Standard Deviation</option> + <option value="BAG">Create comma separated string of attributes values</option> + <option value="BAGD">Create comma separated list of distinct attributes values</option> + <yield /> + </param> + </xml> + <xml name="text_val"> + <validator type="regex" message="Only alphanumeric characters and underscore allowed.">[\w]+$</validator> + </xml> +</macros> \ No newline at end of file |
b |
diff -r 000000000000 -r a80c93182db3 gmql_queries_statements.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gmql_queries_statements.py Tue Jun 26 09:08:06 2018 -0400 |
[ |
b'@@ -0,0 +1,797 @@\n+#!/usr/bin/env python\n+# --------------------------------------------------------------------------------\n+# GMQL Queries Statements Classes\n+# --------------------------------------------------------------------------------\n+# Luana Brancato, luana.brancato@mail.polimi.it\n+# --------------------------------------------------------------------------------\n+\n+import yaml\n+from gmql_queries_constants import * \n+\n+class Statement(object):\n+\n+ def __init__(self):\n+\n+ self.operator = Operator\n+ self.variables = dict ()\n+ self.params = dict ()\n+\n+ def save(self, syntax):\n+\n+ var_o = self.variables.get(\'output\')\n+ var_i = [self.variables.get(\'input1\'),self.variables.get(\'input2\',\'\')]\n+\n+ stm = syntax[\'STATEMENT\'].format(operator=self.operator.value,\n+ out_var=var_o,\n+ in_vars=" ".join(var_i),\n+ parameters=\'{parameters}\')\n+\n+ return stm\n+\n+ def write_query(self, syntax):\n+ self.syntax = yaml.load(syntax)\n+\n+ def set_variable(self, var, var_name):\n+ self.variables[var_name] = var\n+\n+ def set_param(self, param, param_type):\n+ self.params[param_type] = param\n+\n+class Materialize(Statement):\n+\n+ def __init__(self, filename, input_ds):\n+ super(Materialize, self).__init__()\n+ self.operator = Operator.MATERIALIZE\n+ self.set_variable(filename, \'output\')\n+ self.set_variable(input_ds, \'input1\')\n+\n+ def save(self, syntax):\n+\n+ stm = syntax[\'MATERIALIZE\'].format(variable=self.variables.get(\'input1\'),\n+ file_name=self.variables.get(\'output\'))\n+\n+ return stm\n+\n+\n+class Select(Statement):\n+\n+ def __init__(self):\n+ super(Select, self).__init__()\n+ self.operator = Operator.SELECT\n+\n+ def save(self, syntax):\n+ stm = super(Select, self).save(syntax)\n+ params_form = syntax[\'PARAMS\']\n+ select_params = params_form[self.operator.value]\n+ sep = params_form[\'type_separator\']\n+\n+ params = []\n+\n+ # Format conditions over metadata\n+ predicate = self.params.get(\'metadata\', None)\n+\n+ if predicate:\n+ f_predicate = self.save_wff(params_form, predicate)\n+ params.append(select_params[\'metadata\'].format(predicate=f_predicate))\n+\n+ # Format conditions over samples fields\n+ predicate = self.params.get(\'region\', None)\n+\n+ if predicate:\n+ f_predicate = self.save_wff(params_form, predicate)\n+ params.append(select_params[\'region\'].format(predicate=f_predicate))\n+\n+ # Format semijoin conditions\n+ predicate = self.params.get(\'semijoin\', None)\n+\n+ if predicate:\n+ f_predicate = predicate.save(select_params[\'semijoin_predicates\'], sep)\n+ params.append(select_params[\'semijoin\'].format(predicate=f_predicate))\n+\n+ stm = stm.format(parameters=sep.join(params))\n+\n+ return stm\n+\n+ @staticmethod\n+ def save_wff(syntax, pred):\n+ w_format = syntax[\'wff\']\n+\n+ if isinstance(pred, list):\n+ if pred[-1] is Wff.AND or Wff.OR:\n+ return w_format[pred[-1].value].format(p1=Select.save_wff(syntax, pred[0]), p2=Select.save_wff(syntax, pred[1]))\n+ if pred[-1] is Wff.NOT or Wff.BLOCK:\n+ return w_format[pred[-1].value].format(p=Select.save_wff(syntax, pred[0]))\n+ else :\n+ if isinstance(pred, Predicate):\n+ return pred.save(syntax)\n+ else:\n+ return pred\n+\n+ def set_output_var(self, var):\n+ self.set_variable(var, \'output\')\n+\n+ def set_input_var(self, var):\n+ self.set_variable(var, \'input1\')\n+\n+ def set_metadata_predicates(self, logicalPredicate):\n+ self.set_param(logicalPredicate, \'metadata\')\n+\n+ def set_region_predicates(self, logicalPredicat'..b'ewRegion = newRegion\n+ self.function = function\n+ self.argument = argRegion\n+\n+ def save(self, syntax):\n+\n+ f = syntax[\'function\'].format(function=self.function.value,\n+ arg=self.argument)\n+\n+ return syntax[\'new_region\'].format(r=self.newRegion,\n+ function=f)\n+\n+class MetaAttributesGenerator(RegionGenerator):\n+ def __init__(self, newAttribute, function, argRegion):\n+ super(MetaAttributesGenerator, self).__init__(newAttribute, function, argRegion)\n+\n+ def save(self, syntax):\n+ return super(MetaAttributesGenerator, self).save(syntax)\n+\n+class ProjectGenerator(RegionGenerator):\n+ def __init__(self, newRegion, function, arg):\n+ super(ProjectGenerator, self).__init__(newRegion, function, arg)\n+\n+ def save(self, syntax):\n+ if self.function == RegFunction.MATH:\n+ f = self.argument\n+ return syntax[\'new_region\'].format(r=self.newRegion, function=f)\n+ if self.function in [\'rename\',\'fixed\'] :\n+ f = self.argument\n+ if self.function == \'fixed\':\n+ f = "{f}".format(f=f)\n+ return syntax[\'new_region\'].format(r=self.newRegion, function=f)\n+ if self.function is RegFunction.META :\n+ f = syntax[\'function\'].format(function=self.function.value,\n+ arg=syntax[\'param_separator\'].join(self.argument))\n+ return syntax[\'new_region\'].format(r=self.newRegion,\n+ function=f)\n+ else:\n+ return super(ProjectGenerator, self).save(syntax)\n+\n+\n+\n+class AttributesList(object):\n+\n+ def __init__(self, attributes):\n+ self.attributes = attributes\n+\n+ def save(self, syntax, sep):\n+ attr = sep.join(self.attributes)\n+ return attr\n+\n+class OrderingAttributes(AttributesList):\n+\n+ def __init__(self):\n+ attributes = list()\n+ super(OrderingAttributes, self).__init__(attributes)\n+\n+ def add_attribute(self, att, desc):\n+ self.attributes.append((att,desc))\n+\n+ def save(self, syntax, sep):\n+ self.attributes = map(lambda x: syntax[x[1]].format(att=x[0]), self.attributes)\n+ return super(OrderingAttributes, self).save(syntax,sep)\n+\n+\n+class JoinbyClause(AttributesList):\n+\n+ def __init__(self, attributes):\n+ super(JoinbyClause, self).__init__(attributes)\n+\n+ def save(self, syntax, sep):\n+ attributes = map(lambda x: syntax[\'metajoin_condition\'][x[1]].format(att_name=x[0]), self.attributes)\n+ return sep.join(attributes)\n+\n+class GroupbyClause(JoinbyClause):\n+ def __init__(self, attributes):\n+ super(GroupbyClause, self).__init__(attributes)\n+\n+ def save(self, syntax, sep):\n+ return super(GroupbyClause, self).save(syntax, sep)\n+\n+\n+class SemiJoinPredicate(AttributesList):\n+\n+ def __init__(self, attributes, dataset, condition):\n+ super(SemiJoinPredicate, self).__init__(attributes)\n+ self.ds_ext = dataset\n+ self.condition = condition\n+\n+ def save(self, syntax, sep):\n+ attributes = super(SemiJoinPredicate, self).save(syntax, sep)\n+ return syntax[self.condition].format(attributes=attributes, ds_ext=self.ds_ext)\n+\n+\n+class GenomicPredicate(object):\n+\n+ def __init__(self):\n+ self.distal_conditions = []\n+ self.distal_stream = \'\'\n+\n+ def add_distal_condition(self, condition, n):\n+ self.distal_conditions.append((DistalConditions(condition), n))\n+\n+ def add_distal_stream(self, direction):\n+ self.distal_stream = DistalStream(direction)\n+\n+ def save(self, syntax, sep):\n+ dc = map(lambda x: syntax[\'distal_condition\'].format(dc=x[0].value, n=x[1]), self.distal_conditions)\n+ if self.distal_stream:\n+ dc.append(syntax[\'distal_stream\'].format(ds=self.distal_stream.value))\n+\n+ return sep.join(dc)\n\\ No newline at end of file\n' |
b |
diff -r 000000000000 -r a80c93182db3 gmql_rest.yaml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gmql_rest.yaml Tue Jun 26 09:08:06 2018 -0400 |
[ |
@@ -0,0 +1,43 @@ +--- +GMQL_URL : + http://genomic.elet.polimi.it/gmql-rest #server to use +access: + prefix: '' + operations: + user: [user] + guest: [guest] + register: [register] + login: [login] + logout: [logout] +metadata: + prefix: 'metadata' + operations: + list: ['{datasetName}', filter] +repository: + prefix: datasets + operations: + list_datasets : [] + list_samples : ['{datasetName}'] + delete_dataset : ['{datasetName}'] + rename_dataset : ['{datasetName}', rename, '{newDatasetName}'] + download_zip : ['{datasetName}', zip] + download_sample : ['{datasetName}','{sample}',region] + download_meta : ['{datasetName}','{sample}',metadata] + upload_url : ['{datasetName}',uploadSampleUrls] + upload_data : ['{datasetName}',uploadSample] + schema : ['{datasetName}',schema] + params: + upload_url: schemaName + upload_data: schemaName +query_exec: + prefix: queries + operations: + compile: [compile] + run: [run,'{name}','{output}'] +query_monitor : + prefix: jobs + operations: + jobs: [] + log: ['{jobid}',log] + stop: ['{jobid}',stop] + status: ['{jobid}',trace] |
b |
diff -r 000000000000 -r a80c93182db3 gmql_rest_datasets.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gmql_rest_datasets.py Tue Jun 26 09:08:06 2018 -0400 |
[ |
b'@@ -0,0 +1,411 @@\n+# Galaxy plugin to REST access to the GMQL services\n+# (Datasets)\n+# ----------------------------------------------------------------------------\n+# Luana Brancato, luana.brancato@mail.polimi.it\n+# ----------------------------------------------------------------------------\n+\n+import argparse\n+\n+import tempfile\n+import json\n+from utilities import *\n+\n+module = \'repository\'\n+\n+\n+def list_datasets(user, output, saveResult=True):\n+ """Retrieve the list of available datasets"""\n+\n+ call = \'list_datasets\'\n+ url = compose_url(module,call)\n+\n+ datasets = get(url, user=user)\n+ list_datasets = datasets[\'datasets\']\n+\n+ if saveResult:\n+ with open(output,\'w\') as f:\n+ for ds in list_datasets:\n+ f.write("{name}\\t{owner}\\n".format(name=ds[\'name\'],owner=ds[\'owner\']))\n+ f.close()\n+ else:\n+ return list_datasets\n+\n+\n+def list_samples(user, output, ds):\n+ """List the samples of a given dataset"""\n+\n+ call = \'list_samples\'\n+ url = compose_url(module,call)\n+\n+ # Specify for which dataset.\n+ # If it\'s a public dataset, the \'public.\' prefix must be added to the dataset name\n+\n+ # Check if the ds is public or not\n+ owner = \'\'\n+ for d in list_datasets(user, \'\', False):\n+ if d[\'name\'] == ds :\n+ owner = d[\'owner\']\n+\n+ if (owner==\'public\'):\n+ url = url.format(datasetName=\'public.\'+ ds)\n+ else :\n+ url = url.format(datasetName=ds)\n+\n+ samples = get(url, user=user)\n+ list_s = samples[\'samples\']\n+\n+ with open(output, \'w\') as f_out:\n+ for s in list_s:\n+ f_out.write("{id}\\t{name}\\t{ext}\\n".format(id=s[\'id\'], name=s[\'name\'],ext=s[\'path\'].rsplit(\'.\',1)[1]))\n+\n+\n+def rename_dataset(user, output, ds, new):\n+ """Rename a dataset from the user\'s private space"""\n+\n+ call = \'rename_dataset\'\n+ url = compose_url(module,call)\n+ url = url.format(datasetName=ds, newDatasetName=new)\n+\n+ outcome = get(url, user=user)\n+\n+ # Return the updated list of user\'s datasets\n+ list_datasets(user, output)\n+\n+ # Write on stdout the operation outcome\n+ sys.stdout.write("Rename: {result}".format(result=outcome[\'result\']))\n+\n+\n+def delete_dataset(user, output, ds):\n+ """Delete a dataset from the user\'s private space"""\n+\n+ call = \'delete_dataset\'\n+ url = compose_url(module,call)\n+ url = url.format(datasetName=ds)\n+\n+ outcome = delete(url, user=user)\n+\n+ #Return the updated list of user\'s datasets\n+ list_datasets(user, output)\n+\n+ #Write on stdout the operation outcome\n+ sys.stdout.write("Delete: {result}".format(result=outcome[\'result\']))\n+\n+\n+def upload_samples_url(user, output, dataset, schema, samples, updatedDsList):\n+ """Upload a dataset given the urls of the samples and their schema"""\n+\n+ #Compose the url for the REST call\n+ call = \'upload_url\'\n+ url = compose_url(module,call)\n+ url = url.format(datasetName=dataset)\n+\n+ content = dict()\n+\n+ # Put back escaped \'&\'\n+ samples = samples.replace(\'__amp__\', \'&\')\n+ schema = schema.replace(\'__amp__\', \'&\')\n+\n+ # If schema type is given, add the option to the url. Otherwise, it check if the provided schema is a valid url.\n+\n+ params = dict ()\n+\n+ if schema in [\'bed\',\'bedGraph\',\'NarrowPeak\',\'BroadPeak\',\'vcf\'] :\n+ params = add_url_param(params, module, call, schema)\n+ else:\n+ #check_schema = validators.url(schema)\n+ #if isinstance(check_schema, validators.utils.ValidationFailure): stop_err("Schema URL not valid")\n+ content.update(schema_file=schema)\n+\n+\n+ # Samples are listed one per line. It lists them looking for the new line marker (\'__cn__\')\n+ samples_list = samples.split(\'__cn__\')\n+\n+ # The regexp in input can allow a final empty string. The following removes it if present.\n+ if not samples_list[-1]:\n+ samples_list.remove("")\n+\n+ # # For each sample url, check if it is valid. If at least ones is not, upload fails\n+ # # and'..b'_samples(user, temp.name, ds)\n+\n+ # Retrieve names and extensions of the samples\n+ with open(temp.name, "r") as t:\n+ samples = map(lambda x: helper_samples(x), t)\n+ t.close()\n+\n+ os.makedirs(\'samples\')\n+ os.makedirs(\'metadata\')\n+\n+ # Create a new dict containing names and actual path to files\n+\n+ for s in samples:\n+\n+ # Get the sample\n+ get_sample(user, "samples/{name}.{ext}".format(name=s[\'name\'].replace(\'_\',\'\'), ext=s[\'ext\']), ds, s[\'name\'])\n+\n+ # Get its metadata\n+ get_sample_meta(user,"metadata/{name}.meta".format(name=s[\'name\'].replace(\'_\',\'\')),ds,s[\'name\'])\n+\n+def helper_samples(s):\n+ """From a list of samples retrieve name and extension"""\n+ split = s.split(\'\\t\')\n+ sample = dict()\n+ sample.update(name=split[1])\n+ sample.update(ext=split[2].rstrip(\'\\n\'))\n+\n+ return sample\n+\n+\n+def get_schema(user, ds, file) :\n+ """Get the schema field of the input dataset and save it in file"""\n+\n+ call = "schema"\n+\n+ url = compose_url(module, call)\n+\n+ # Check if the ds is public or not\n+ owner = \'\'\n+ for d in list_datasets(user, \'\', False):\n+ if d[\'name\'] == ds :\n+ owner = d[\'owner\']\n+\n+ if (owner==\'public\'):\n+ url = url.format(datasetName=\'public.\'+ ds)\n+ else :\n+ url = url.format(datasetName=ds)\n+\n+ schema = get(url, user=user)\n+\n+\n+ with open(file,\'w\') as f_out:\n+ for f in schema[\'fields\'] :\n+ f_out.write(\'{field}\\t{type}\\n\'.format(field=f[\'name\'],type=f[\'type\']))\n+\n+\n+\n+def set_columns_names(user, ds_name, samples_file, schema_file):\n+\n+ get_schema(user,ds_name, schema_file)\n+\n+ cwd = os.getcwd().rsplit(\'/\',1)[0]\n+ file = \'/\'.join([cwd, \'galaxy.json\'])\n+\n+ with open(schema_file, \'r\') as f_in:\n+ columns = [x.split(\'\\t\') for x in f_in]\n+ column_names = [x[0] for x in columns]\n+ column_types = [x[1].rstrip(\'\\n\') for x in columns]\n+\n+ metadata = dict()\n+ metadata.update(column_names=column_names,\n+ column_types=column_types)\n+\n+\n+ with open(file, \'w\') as f_out:\n+ with open(samples_file, \'r\') as f_in:\n+ samples_list = map(lambda x: x, f_in)\n+ samples_list.pop()\n+ for s in samples_list:\n+ config = dict()\n+ config.update(type=\'new_primary_dataset\',\n+ filename=s,\n+ metadata=metadata)\n+ f_out.write(json.dumps(config) + \'\\n\')\n+\n+\n+\n+def stop_err(msg):\n+ sys.stderr.write("%s\\n" % msg)\n+ sys.exit()\n+\n+\n+def __main__():\n+\n+ parser = argparse.ArgumentParser()\n+ parser.add_argument("output")\n+ parser.add_argument("-opt_out1")\n+ parser.add_argument("-user")\n+ parser.add_argument("-cmd")\n+ parser.add_argument("-samples")\n+ parser.add_argument("-dataset")\n+ parser.add_argument("-new_name")\n+ parser.add_argument("-schema")\n+ parser.add_argument("-add_output")\n+\n+ args = parser.parse_args()\n+\n+ if args.cmd == \'list\':\n+ list_datasets(args.user, args.output)\n+ if args.cmd == \'samples\':\n+ list_samples(args.user, args.output, args.dataset)\n+ if args.cmd == \'rename\' :\n+ rename_dataset(args.user, args.output, args.dataset, args.new_name)\n+ if args.cmd == \'delete\':\n+ delete_dataset(args.user, args.output, args.dataset)\n+ if args.cmd == \'upload_url\':\n+ upload_samples_url(args.user, args.output, args.dataset, args.schema, args.samples, args.add_output)\n+ if args.cmd == \'upload\' :\n+ upload_samples(args.user, args.output, args.dataset, args.schema, args.samples, args.add_output)\n+ if args.cmd == \'import\':\n+ import_samples(args.user, args.dataset)\n+ if args.cmd == \'download\' :\n+ download_samples(args.user,args.output,args.dataset)\n+ if args.cmd == \'schema\' :\n+ set_columns_names(args.user, args.dataset, args.samples, args.output)\n+\n+\n+if __name__ == "__main__":\n+ __main__()\n' |
b |
diff -r 000000000000 -r a80c93182db3 gmql_rest_macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gmql_rest_macros.xml Tue Jun 26 09:08:06 2018 -0400 |
b |
@@ -0,0 +1,28 @@ +<macros> + <xml name="citations"> + <citations> + <citation type="bibtex"> + @article{Masseroli2015, + author = {{Marco Masseroli, Pietro Pinoli, Francesco Venco, Abdulrahman Kaitoua, Vahid Jalili, Fernando Palluzzi, Heiko Muller, Stefano Ceri}}, + doi = {10.1093/bioinformatics/btv048}, + isbn = {13674811 (Electronic)}, + issn = {14602059}, + journal = {Bioinformatics}, + number = {12}, + pages = {1881-1888}, + pmid = {25649616}, + title = {{GenoMetric Query Language: A novel approach to large-scale genomic data management}}, + volume = {31}, + year = {2015} + } + </citation> + <citation type="bibtex"> + @misc{BionformaticsGroup, + author = {{Bionformatics Group}, Politecnico di Milano}, + title = {{GMQL Documentation}}, + url = {http://www.bioinformatics.deib.polimi.it/genomic{\_}computing/GMQLsystem/documentation.html}, + } + </citation> + </citations> + </xml> +</macros> \ No newline at end of file |
b |
diff -r 000000000000 -r a80c93182db3 gmql_rest_queries.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gmql_rest_queries.py Tue Jun 26 09:08:06 2018 -0400 |
[ |
@@ -0,0 +1,247 @@ +# Galaxy plugin to REST access to the GMQL services +# (Queries) +# ---------------------------------------------------------------------------- +# Luana Brancato, luana.brancato@mail.polimi.it +# ---------------------------------------------------------------------------- + +import argparse +from time import sleep + +from gmql_rest_datasets import list_datasets, import_samples +from utilities import * + +module_execution = 'query_exec' +module_monitor = 'query_monitor' + + +def check_input(query): + + # Clean the input from Galaxy escape characters. + + query = query.replace('__dq__', '"') + query = query.replace('__sq__', "'") + query = query.replace('__gt__', ">") + query = query.replace('__lt__', "<") + query = query.replace('__cn__', '\n') + + + return query + + +def compile_query(user, filename, query, log_file): + """Compile the given query""" + + call = 'compile' + + #Read query from file + with open(query, 'r') as f_in: + query_text = f_in.read() + + #Check the input + query_cl = check_input(query_text) + + + # Then ask it to be compiled + url = compose_url(module_execution, call) + + outcome = post(url, query_cl, user=user, content_type='text') + + status = outcome['status'] + message = outcome['message'] + target_ds = outcome['id'] + + if status == 'COMPILE_SUCCESS': + with open(log_file, 'w') as f: + f.write("{status}\n{dataset}".format(status=status, dataset=target_ds)) + f.close() + if status == 'COMPILE_FAILED': + with open(log_file, 'w') as f: + f.write("{status}\n{message}".format(status=status, message=message)) + f.close() + stop_err("Compilation failed.\nSee log for details.") + + +def run_query(user, filename, query, log_file, rs_format, importResult=True): + """Run the given query. It returns an execution log and the resulting dataset.""" + + + call = 'run' + + # Read query from file + with open(query, 'r') as f_in: + query_text = f_in.read() + + # Check the input + query_cl = check_input(query_text) + + # Then ask it to be executed + + status = "NEW" + + url = compose_url(module_execution, call) + url = url.format(name=filename,output=rs_format) + + outcome = post(url, query_cl, user=user, content_type='text') + + jobid = outcome['id'] + + while status != "SUCCESS" and status != "EXEC_FAILED" and status != "DS_CREATION_FAILED": + log = read_status(user, jobid) + status = log['status'] + sleep(5) + + message = log['message'] + time = log['executionTime'] + + if status == "EXEC_FAILED" or status == "DS_CREATION_FAILED": + with open(log_file, 'w') as f: + f.write("{status}\n{message}\n{execTime}".format(status=status, message=message, execTime=time)) + f.close() + stop_err("Execution failed.\nSee log for details") + + if status == "SUCCESS": + ext_log = read_complete_log(user, jobid) + job_list = ext_log['log'] + jobs = "" + for j in job_list: + jobs = "{j_list}{j}\n".format(j_list=jobs, j=j) + + with open(log_file, 'w') as f: + f.write("{status}\n" + "{message}\n" + "{execTime}\n" + "\n" + "{jobs}\n".format(status=status, message=message, execTime=time, jobs=jobs)) + f.close() + + importResult = bool(importResult) + + if importResult: + # For now, it gets only the final result (it's easier to deal later with simple collections + # than a nested ones) + + ds = log['datasets'][-1] + ds_name = ds.get('name') + import_samples(user, ds_name) + + +def read_status(user, jobid): + """Given the job id, it retrieves the status of the current operation + (as a JSON file)""" + + call = 'status' + + url = compose_url(module_monitor, call) + url = url.format(jobid=jobid) + + status = get(url, user=user, response_type='json') + + return status + + + +def read_complete_log(user, jobid): + """Given the jobid, it retrieves the complete log of the latest operation + (as a JSON file)""" + + call = 'log' + + url = compose_url(module_monitor, call) + url = url.format(jobid=jobid) + + log = get(url, user=user, response_type='json') + + return log + + +def show_jobs(user, output): + """Retrieve the list of the user's jobs""" + + call = 'jobs' + + url = compose_url(module_monitor, call) + + jobs = get(url, user=user, response_type='json') + + jobs_list = jobs['jobs'] + jobs_out = list() + + # For each job in the list retrieve the relative status info + for j in jobs_list: + job = dict() + j_id = j['id'] + job.update(id=j_id) + trace = read_status(user, j_id) + + status = trace['status'] + if status == 'SUCCESS' : + job.update(message=trace['message'], + status=status, + ds=trace['datasets'][0]['name'], + time=trace['executionTime']) + else : + job.update(message=trace['message'], + status=status, + ds=trace['datasets'][0]['name']) + + jobs_out.append(job) + + with open(output, 'w') as f: + for j in jobs_out: + f.write("{jobid}\t" + "{status}\t" + "{message}\t" + "{ds}\t" + "{time}\n".format(jobid=j.get('id'), status=j.get('status'), message=j.get('message'), + ds=j.get('ds'),time=j.get('time'))) + f.close() + +def stop_query(user,jobid,output) : + """Stop the execution of the given job""" + + call = 'stop' + + url = compose_url(module_monitor, call) + url = url.format(jobid=jobid) + + outcome = get(url, user=user, response_type='json') + + with open(output,'w') as f_out : + json.dump(outcome, f_out) + + + +def stop_err(msg): + sys.stderr.write("%s\n" % msg) + + +def __main__(): + parser = argparse.ArgumentParser() + parser.add_argument("-user") + parser.add_argument("-cmd") + parser.add_argument("-name") + parser.add_argument("-query") + parser.add_argument("-queryNew") + parser.add_argument("-queryLocal") + parser.add_argument("-log") + parser.add_argument("-job") + parser.add_argument("-format") + parser.add_argument("-importFlag") + parser.add_argument("-add_output") + + + args = parser.parse_args() + + if args.cmd == 'compile': + compile_query(args.user, args.name, args.query, args.log) + if args.cmd == 'execute': + run_query(args.user, args.name, args.query, args.log, args.format, args.importFlag) + list_datasets(args.user,args.add_output) + if args.cmd == 'jobs': + show_jobs(args.user, args.log) + if args.cmd == 'stop' : + stop_query(args.user, args.job, args.log) + + +if __name__ == "__main__": + __main__() |
b |
diff -r 000000000000 -r a80c93182db3 gmql_syntax.yaml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gmql_syntax.yaml Tue Jun 26 09:08:06 2018 -0400 |
b |
@@ -0,0 +1,89 @@ +--- +GMQL-VERSION: 'V2.1' +STATEMENT: + '{out_var} = {operator} ({parameters}) {in_vars};' +MATERIALIZE: + 'MATERIALIZE {variable} INTO {file_name};' +PARAMS: + type_separator: '; ' + param_separator: ', ' + wff: + NOT: 'NOT ({p})' + AND: '{p1} AND {p2}' + OR: '{p1} OR {p2}' + BLOCK: '({p})' + predicate: + eq: '{att} == {val}' + lt: '{att} < {val}' + gt: '{att} > {val}' + let: '{att} <= {val}' + get: '{att} >= {val}' + values: + string: "'{p}'" + float: "{p}" + int: "{p}" + coordinate: "{p}" + meta: "META({p})" + metajoin_condition: + SIMPLE: '{att_name}' + FULL: 'FULL({att_name})' + EXACT: 'EXACT({att_name})' + new_region: '{r} AS {function}' + function: '{function}({arg})' + SELECT: + metadata: '{predicate}' + region: 'region: {predicate}' + semijoin: 'semijoin: {predicate}' + semijoin_predicates: + IN: '{attributes} IN {ds_ext}' + NOT_IN: '{attributes} NOT IN {ds_ext}' + PROJECT: + regions: '{att_list}' + metadata: 'metadata: {att_list}' + att_list: + keep: '{att_list}' + exclude: 'ALLBUT {att_list}' + newRegions: 'region_update: {newAttributes}' + newMetadata: 'metadata_update: {newAttributes}' + MAP: + regions: '{newRegions}' + count: 'count_name: {count_name}' + joinby: 'joinby: {joinbyClause}' + COVER: + groupby: 'groupby: {groupbyClause}' + regions: 'aggregate: {newRegions}' + EXTEND: '' + GROUP: + meta: '{groupMeta}' + newMetadata: 'meta_aggregates: {newAttributes}' + regions: 'region_keys: {groupRegions}' + newRegions: 'region_aggregates: {newRegions}' + MERGE: + groupby: 'groupby: {groupbyClause}' + UNION: '' + DIFFERENCE: + exact: 'exact: {flag}' + joinby: 'joinby: {joinbyClause}' + ORDER: + att_list: + asc: '{att}' + desc: '{att} DESC' + metadata: + orderingAttributes: '{att_list}' + top: + n: 'meta_top: {k}' + perc: 'meta_topp: {k}' + group: 'meta_topg: {k}' + region: + orderingAttributes: 'region_order: {att_list}' + top: + n: 'region_top: {k}' + perc: 'region_topp: {k}' + group: 'region_topg: {k}' + JOIN: + genomic_predicate: '{genomic_predicate}' + equi_clause: 'on_attributes: {att_list}' + output_opt: 'output: {coord_param}' + joinby: 'joinby: {joinbyClause}' + distal_condition: '{dc}({n})' + distal_stream: '{ds}' \ No newline at end of file |
b |
diff -r 000000000000 -r a80c93182db3 test-data/Example1_Archive.zip |
b |
Binary file test-data/Example1_Archive.zip has changed |
b |
diff -r 000000000000 -r a80c93182db3 test-data/TG/S_00000.gdm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/TG/S_00000.gdm Tue Jun 26 09:08:06 2018 -0400 |
b |
@@ -0,0 +1,3 @@ +chr1 99 150 * GMQL Region 4 . . 10 7.0638 -1 -1 +chr1 119 180 * GMQL Region 3 . . 24 19.7648 -1 -1 +chr1 219 240 * GMQL Region 8 . . 13 11.2001 -1 -1 |
b |
diff -r 000000000000 -r a80c93182db3 test-data/TG/S_00000.gdm.meta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/TG/S_00000.gdm.meta Tue Jun 26 09:08:06 2018 -0400 |
b |
@@ -0,0 +1,71 @@ +ID 1460 +avg_score 6 +cell AG04450 +cell_description fetal lung fibroblast +cell_karyotype normal +cell_lineage endoderm +cell_orderUrl http://ccr.coriell.org/Sections/Search/Sample_Detail.aspx?Ref=AG04450 +cell_organism human +cell_protocol Stam:AG04450_Stam_protocol.pdf +cell_sex Female +cell_tag AG04450 +cell_termId BTO:0000763 +cell_termUrl http://www.ebi.ac.uk/ontology-lookup/browse.do?ontName=BTO&termId=BTO%3A0000763 +cell_tier 3 +cell_tissue lung +cell_type Cell Line +cell_vendorId AG04450 +cell_vendorName Coriell +composite wgEncodeUwDnase +dataType DnaseSeq +dataType_dataGroup Open Chromatin +dataType_description DNaseI HS Sequencing +dataType_label DNase-seq +dataType_tag DNASESEQ +dataType_type dataType +dataVersion ENCODE June 2010 Freeze +dateResubmitted 2010-06-17 +dateSubmitted 2010-01-07 +dateUnrestricted 2010-10-07 +dccAccession wgEncodeEH000506 +geoSampleAccession GSM736563 +grant Stam +grant_description Stamatoyannopoulous +grant_grantInst University of Washington +grant_label Stamatoyannopoulous +grant_projectName UW +grant_tag STAM +grant_type grant +lab UW +labExpId DS12255 +labVersion lmax-v1.0 +lab_description Stamatoyannopoulous - University of Washington +lab_grantPi Stam +lab_labInst University of Washington +lab_labPi Stam +lab_labPiFull John Stamatoyannopoulous +lab_label Stamatoyannopoulous - UW +lab_organism human +lab_tag UW +lab_type lab +md5sum 1250e67a89764a7e877bca3bbee70f62 +origAssembly hg18 +patient_age 75 +project wgEncode +replicate 2 +sex Female +size 1.8M +subId 1549 +tableName wgEncodeUwDnaseAg04450PkRep2 +treatment None +treatment_description No special treatment or protocol applies +treatment_label No treatment or prot +treatment_tag NONE +treatment_type control +type narrowPeak +url http://hgdownload.cse.ucsc.edu/goldenPath/hg19/encodeDCC/wgEncodeUwDnase/wgEncodeUwDnaseAg04450PkRep2.narrowPeak.gz +view Peaks +view_description Regions of enriched signal in experiment +view_label Peaks +view_tag PKS +view_type view |
b |
diff -r 000000000000 -r a80c93182db3 test-data/TG/S_00001.gdm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/TG/S_00001.gdm Tue Jun 26 09:08:06 2018 -0400 |
b |
@@ -0,0 +1,2 @@ +chr1 149 180 * GMQL Region 6 . . 35 30.2764 -1 -1 +chr1 229 235 * GMQL Region 9 . . 17 11.1675 -1 -1 |
b |
diff -r 000000000000 -r a80c93182db3 test-data/TG/S_00001.gdm.meta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/TG/S_00001.gdm.meta Tue Jun 26 09:08:06 2018 -0400 |
b |
@@ -0,0 +1,93 @@ +ID 1891 +antibody CTCF +antibody_antibodyDescription Rabbit polyclonal. Antibody Target: CTCF +antibody_lab Myers, Hardison, Snyder +antibody_label CTCF (07-729) +antibody_lots 1350637 DAM1472197 +antibody_orderUrl http://www.millipore.com/catalogue/item/07-729 +antibody_tag CTCF +antibody_target CTCF +antibody_targetClass TFSS +antibody_targetDescription CTCF zinc finger transcription factor. A sequence specific DNA binding protein that functions as an insulator, blocking enhancer activity. It has also been suggested to block the spreading of chromatin structure in certain instances. +antibody_targetId GeneCard:CTCF +antibody_targetUrl http://www.genecards.org/cgi-bin/carddisp.pl?gene=CTCF +antibody_type Antibody +antibody_validation Human_-_CTCF_(07-729)(Western_blot,Motif_Enrichment):human_CTCF_validation_Myers.pdf Human_-_CTCF_(07-729)(Western_blot):human_CTCF_07-729_validation_Snyder.pdf Mouse_-_CTCF(Western_blot):mouse_CTCF_validation_Hardison.pdf +antibody_vendorId 07-729 +antibody_vendorName Millipore +avg_score 7.5 +cell AG04450 +cell_description fetal lung fibroblast +cell_karyotype normal +cell_lineage endoderm +cell_orderUrl http://ccr.coriell.org/Sections/Search/Sample_Detail.aspx?Ref=AG04450 +cell_organism human +cell_protocol Stam:AG04450_Stam_protocol.pdf +cell_sex Male +cell_tag AG04450 +cell_termId BTO:0000763 +cell_termUrl http://www.ebi.ac.uk/ontology-lookup/browse.do?ontName=BTO&termId=BTO%3A0000763 +cell_tier 2 +cell_tissue lung +cell_type Cell Line +cell_vendorId AG04450 +cell_vendorName Coriell +composite wgEncodeUwTfbs +control std +controlId wgEncodeEH000930 +control_description Standard input signal for most experiments. +control_label Standard Control +control_tag STD +control_type control +dataType ChipSeq +dataType_dataGroup TFBS & Histones +dataType_description Chromatin IP Sequencing +dataType_label ChIP-seq +dataType_tag CHIPSEQ +dataType_type dataType +dataVersion ENCODE Jan 2011 Freeze +dateSubmitted 2010-10-23 +dateUnrestricted 2011-07-23 +dccAccession wgEncodeEH000976 +geoSampleAccession GSM749769 +grant Stam +grant_description Stamatoyannopoulous +grant_grantInst University of Washington +grant_label Stamatoyannopoulous +grant_projectName UW +grant_tag STAM +grant_type grant +lab UW +labExpId DS16029 +labVersion lmax-v1.0 +lab_description Stamatoyannopoulous - University of Washington +lab_grantPi Stam +lab_labInst University of Washington +lab_labPi Stam +lab_labPiFull John Stamatoyannopoulous +lab_label Stamatoyannopoulous - UW +lab_organism human +lab_tag UW +lab_type lab +md5sum 3c6b5bf3eefc28b6bab4ef37916011a1 +origAssembly hg19 +patient_age 63 +project wgEncode +replicate 1 +setType exp +sex Male +size 776K +subId 2663 +tableName wgEncodeUwTfbsAg04450CtcfStdPkRep1 +treatment None +treatment_description No special treatment or protocol applies +treatment_label No treatment or prot +treatment_tag NONE +treatment_type control +type narrowPeak +url http://hgdownload.cse.ucsc.edu/goldenPath/hg19/encodeDCC/wgEncodeUwTfbs/wgEncodeUwTfbsAg04450CtcfStdPkRep1.narrowPeak.gz +view Peaks +view_description Regions of enriched signal in experiment +view_label Peaks +view_tag PKS +view_type view |
b |
diff -r 000000000000 -r a80c93182db3 test-data/TG/S_00002.gdm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/TG/S_00002.gdm Tue Jun 26 09:08:06 2018 -0400 |
b |
@@ -0,0 +1,11 @@ +chr2 99 150 * GMQL Region 10 . . 9 5.27936 -1 -1 +chr2 199 240 * GMQL Region 9 . . 22 19.6922 -1 -1 +chr2 399 440 * GMQL Region 0.9 . . 15 15.1452 -1 -1 +chr2 539 570 * GMQL Region 0.9 . . 15 15.1452 -1 -1 +chr4 49 100 + GMQL Region 0.9 . . 15 15.1452 -1 -1 +chr4 149 179 - GMQL Region 0.9 . . 15 15.1452 -1 -1 +chr4 199 249 - GMQL Region 0.9 . . 15 15.1452 -1 -1 +chr5 99 200 * GMQL Region 4 . . 15 17.1452 -1 -1 +chr5 299 340 + GMQL Region 4 . . 15 15.1452 -1 -1 +chr5 319 340 - GMQL Region 3 . . 15 17.1452 -1 -1 +chr5 379 420 - GMQL Region 3 . . 15 17.1452 -1 -1 |
b |
diff -r 000000000000 -r a80c93182db3 test-data/TG/S_00002.gdm.meta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/TG/S_00002.gdm.meta Tue Jun 26 09:08:06 2018 -0400 |
b |
@@ -0,0 +1,71 @@ +ID 1459 +Info example text +avg_score 5 +cell AG04450 +cell_description fetal lung fibroblast +cell_karyotype normal +cell_lineage endoderm +cell_orderUrl http://ccr.coriell.org/Sections/Search/Sample_Detail.aspx?Ref=AG04450 +cell_organism human +cell_protocol Stam:AG04450_Stam_protocol.pdf +cell_sex Male +cell_tag AG04450 +cell_termId BTO:0000763 +cell_termUrl http://www.ebi.ac.uk/ontology-lookup/browse.do?ontName=BTO&termId=BTO%3A0000763 +cell_tier 2 +cell_tissue lung +cell_type Cell Line +cell_vendorId AG04450 +cell_vendorName Coriell +composite wgEncodeUwDnase +dataType DnaseSeq +dataType_dataGroup Open Chromatin +dataType_description DNaseI HS Sequencing +dataType_label DNase-seq +dataType_tag DNASESEQ +dataType_type dataType +dataVersion ENCODE June 2010 Freeze +dateResubmitted 2010-06-17 +dateSubmitted 2010-01-07 +dateUnrestricted 2010-10-07 +dccAccession wgEncodeEH000506 +geoSampleAccession GSM736514 +grant Stam +grant_description Stamatoyannopoulous +grant_grantInst University of Washington +grant_label Stamatoyannopoulous +grant_projectName UW +grant_tag STAM +grant_type grant +lab UW +labExpId DS12270 +labVersion lmax-v1.0 +lab_description Stamatoyannopoulous - University of Washington +lab_grantPi Stam +lab_labInst University of Washington +lab_labPi Stam +lab_labPiFull John Stamatoyannopoulous +lab_label Stamatoyannopoulous - UW +lab_organism human +lab_tag UW +lab_type lab +md5sum 8f7c4f145d130385f9b5e732961b3c42 +origAssembly hg18 +project wgEncode +replicate 1 +sex Male +size 1.7M +subId 1550 +tableName wgEncodeUwDnaseAg04450PkRep1 +treatment None +treatment_description No special treatment or protocol applies +treatment_label No treatment or prot +treatment_tag NONE +treatment_type control +type narrowPeak +url http://hgdownload.cse.ucsc.edu/goldenPath/hg19/encodeDCC/wgEncodeUwDnase/wgEncodeUwDnaseAg04450PkRep1.narrowPeak.gz +view Peaks +view_description Regions of enriched signal in experiment +view_label Peaks +view_tag PKS +view_type view |
b |
diff -r 000000000000 -r a80c93182db3 test-data/TG/schema.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/TG/schema.xml Tue Jun 26 09:08:06 2018 -0400 |
b |
@@ -0,0 +1,18 @@ +<?xml version='1.0' encoding='UTF-8'?> +<gmqlSchemaCollection name="queryname_20180417_000927_TG" xmlns="http://genomic.elet.polimi.it/entities"> + <gmqlSchema type="Peak" coordinate_system="default"> + <field type="STRING">chr</field> + <field type="LONG">left</field> + <field type="LONG">right</field> + <field type="CHAR">strand</field> + <field type="STRING">source</field> + <field type="STRING">feature</field> + <field type="DOUBLE">score</field> + <field type="STRING">frame</field> + <field type="STRING">name</field> + <field type="DOUBLE">signal</field> + <field type="DOUBLE">pvalue</field> + <field type="DOUBLE">qvalue</field> + <field type="DOUBLE">peak</field> + </gmqlSchema> +</gmqlSchemaCollection> \ No newline at end of file |
b |
diff -r 000000000000 -r a80c93182db3 test-data/cover1.gmql_query --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cover1.gmql_query Tue Jun 26 09:08:06 2018 -0400 |
b |
@@ -0,0 +1,2 @@ +IN = SELECT () Example_Dataset_1 ; +OUT = COVER (1, ANY; groupby: cell, antibody_target; aggregate: min_pvalue AS MIN(pvalue)) IN ; |
b |
diff -r 000000000000 -r a80c93182db3 test-data/cover2.gmql_query --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cover2.gmql_query Tue Jun 26 09:08:06 2018 -0400 |
b |
@@ -0,0 +1,4 @@ +IN = SELECT () Example_Dataset_1 ; +OUT1 = FLAT (2, 4; groupby: cell) IN ; +OUT2 = SUMMIT (2, 4; groupby: cell) IN ; +OUT3 = HISTOGRAM (ALL / 2, (ALL + 1) / 2; groupby: antibody_target) IN ; |
b |
diff -r 000000000000 -r a80c93182db3 test-data/difference1.gmql_query --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/difference1.gmql_query Tue Jun 26 09:08:06 2018 -0400 |
b |
@@ -0,0 +1,3 @@ +IN = SELECT () Example_Dataset_1 ; +IN2 = SELECT () Example_Dataset_2 ; +OUT = DIFFERENCE (exact: true; joinby: cell) IN IN2; |
b |
diff -r 000000000000 -r a80c93182db3 test-data/extend1.gmql_query --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/extend1.gmql_query Tue Jun 26 09:08:06 2018 -0400 |
b |
@@ -0,0 +1,2 @@ +OUT = SELECT () Example_Dataset_1 ; +OUT2 = EXTEND (avg_score AS AVG(score), max_p AS MAX(pvalue)) OUT ; |
b |
diff -r 000000000000 -r a80c93182db3 test-data/group1.gmql_query --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/group1.gmql_query Tue Jun 26 09:08:06 2018 -0400 |
b |
@@ -0,0 +1,2 @@ +OUT = SELECT () Example_Dataset_1 ; +OUT2 = GROUP (cell_tissue; meta_aggregates: min_tier AS MIN(cell_tier); region_keys: score; region_aggregates: min_signal AS MIN(signal)) OUT ; |
b |
diff -r 000000000000 -r a80c93182db3 test-data/guest.gmql_user --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/guest.gmql_user Tue Jun 26 09:08:06 2018 -0400 |
b |
@@ -0,0 +1,1 @@ +Guest 833ed58f-d1d3-4963-8bf3-46e0562beac2 True |
b |
diff -r 000000000000 -r a80c93182db3 test-data/guest2.gmql_user --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/guest2.gmql_user Tue Jun 26 09:08:06 2018 -0400 |
b |
@@ -0,0 +1,1 @@ +Guest 3134ead6-4661-4994-a7c6-2fc2ae0a56da True |
b |
diff -r 000000000000 -r a80c93182db3 test-data/join1.gmql_query --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/join1.gmql_query Tue Jun 26 09:08:06 2018 -0400 |
b |
@@ -0,0 +1,3 @@ +IN = SELECT () Example_Dataset_1 ; +IN2 = SELECT () Example_Dataset_2 ; +OUT = JOIN (MD(1), UP; output: RIGHT; joinby: cell) IN IN2; |
b |
diff -r 000000000000 -r a80c93182db3 test-data/join2.gmql_query --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/join2.gmql_query Tue Jun 26 09:08:06 2018 -0400 |
b |
@@ -0,0 +1,3 @@ +IN = SELECT () Example_Dataset_1 ; +IN2 = SELECT () Example_Dataset_2 ; +OUT = JOIN (DL(0); on_attributes: score, chr; output: INT; joinby: cell, provider) IN IN2; |
b |
diff -r 000000000000 -r a80c93182db3 test-data/map1.gmql_query --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/map1.gmql_query Tue Jun 26 09:08:06 2018 -0400 |
b |
@@ -0,0 +1,3 @@ +IN = SELECT () Example_Dataset_1 ; +IN2 = SELECT () Example_Dataset_2 ; +OUT = MAP (avg_score AS AVG(score), min_score AS MIN(score), chr_list AS BAG(chr); count_name: mapped_n; joinby: cell_tissue) IN IN2; |
b |
diff -r 000000000000 -r a80c93182db3 test-data/merge1.gmql_query --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/merge1.gmql_query Tue Jun 26 09:08:06 2018 -0400 |
b |
@@ -0,0 +1,2 @@ +OUT = SELECT () Example_Dataset_1 ; +OUT2 = MERGE (groupby: EXACT(antibody_target)) OUT ; |
b |
diff -r 000000000000 -r a80c93182db3 test-data/order1.gmql_query --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/order1.gmql_query Tue Jun 26 09:08:06 2018 -0400 |
b |
@@ -0,0 +1,2 @@ +OUT = SELECT () Example_Dataset_1 ; +OUT2 = ORDER (ID; meta_top: 5; region_order: score DESC, start) OUT ; |
b |
diff -r 000000000000 -r a80c93182db3 test-data/project1.gmql_query --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/project1.gmql_query Tue Jun 26 09:08:06 2018 -0400 |
b |
@@ -0,0 +1,2 @@ +OUT = SELECT () Example_Dataset_1 ; +OUT2 = PROJECT (ALLBUT name; frame; metadata: cell; region_update: lengh AS stop - start) OUT ; |
b |
diff -r 000000000000 -r a80c93182db3 test-data/query.gmql_query --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/query.gmql_query Tue Jun 26 09:08:06 2018 -0400 |
b |
@@ -0,0 +1,2 @@ +VAR = SELECT (grant == 'Stam') Example_Dataset_1 ; +MATERIALIZE VAR INTO TG; |
b |
diff -r 000000000000 -r a80c93182db3 test-data/rep.gmql_repository --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/rep.gmql_repository Tue Jun 26 09:08:06 2018 -0400 |
b |
@@ -0,0 +1,42 @@ +Example1 guest_new880 +ChIA_PET public +Example_Dataset_1 public +Example_Dataset_2 public +GRCh38_ANNOTATION_GENCODE public +GRCh38_ANNOTATION_REFSEQ public +GRCh38_ENCODE_BROAD_AUG_2017 public +GRCh38_ENCODE_BROAD_NOV_2017 public +GRCh38_ENCODE_NARROW_AUG_2017 public +GRCh38_ENCODE_NARROW_NOV_2017 public +GRCh38_TCGA_copy_number public +GRCh38_TCGA_copy_number_masked public +GRCh38_TCGA_gene_expression public +GRCh38_TCGA_methylation public +GRCh38_TCGA_miRNA_expression public +GRCh38_TCGA_miRNA_isoform_expression public +GRCh38_TCGA_somatic_mutation_masked public +HG19_ANNOTATION_GENCODE public +HG19_ANNOTATION_REFSEQ public +HG19_BED_ANNOTATION public +HG19_ENCODE_BROAD_AUG_2017 public +HG19_ENCODE_BROAD_NOV_2016 public +HG19_ENCODE_BROAD_NOV_2017 public +HG19_ENCODE_NARROW_AUG_2017 public +HG19_ENCODE_NARROW_NOV_2016 public +HG19_ENCODE_NARROW_NOV_2017 public +HG19_ROADMAP_EPIGENOMICS_BED public +HG19_ROADMAP_EPIGENOMICS_BROADPEAK public +HG19_TCGA_cnv public +HG19_TCGA_dnamethylation public +HG19_TCGA_dnaseq public +HG19_TCGA_mirnaseq_isoform public +HG19_TCGA_mirnaseq_mirna public +HG19_TCGA_rnaseq_exon public +HG19_TCGA_rnaseq_gene public +HG19_TCGA_rnaseq_spljxn public +HG19_TCGA_rnaseqv2_exon public +HG19_TCGA_rnaseqv2_gene public +HG19_TCGA_rnaseqv2_isoform public +HG19_TCGA_rnaseqv2_spljxn public +TADs_Aiden public +TADs_Dixon public |
b |
diff -r 000000000000 -r a80c93182db3 test-data/sample1.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sample1.bed Tue Jun 26 09:08:06 2018 -0400 |
b |
b'@@ -0,0 +1,12475 @@\n+chr1\t10464\t10465\tCHR1_P0001_R1\t1000\t+\n+chr1\t11873\t11874\tCHR1_P0001_R2\t1000\t+\n+chr1\t13420\t13421\tCHR1_P0001_R3\t1000\t+\n+chr1\t15945\t15946\tCHR1_M0001_R10\t1000\t-\n+chr1\t17480\t17481\tCHR1_M0001_R2\t1000\t-\n+chr1\t18003\t18004\tCHR1_M0001_R9\t1000\t-\n+chr1\t18733\t18734\tCHR1_M0001_R8\t1000\t-\n+chr1\t19172\t19173\tCHR1_M0001_R6\t1000\t-\n+chr1\t19735\t19736\tCHR1_M0001_R5\t1000\t-\n+chr1\t20510\t20511\tCHR1_M0001_R7\t1000\t-\n+chr1\t24892\t24893\tCHR1_M0001_R4\t1000\t-\n+chr1\t26794\t26795\tCHR1_P0002_R1\t1000\t+\n+chr1\t29348\t29349\tCHR1_M0001_R1\t1000\t-\n+chr1\t29554\t29555\tCHR1_P0003_R1\t1000\t+\n+chr1\t29889\t29890\tCHR1_M0001_R3\t1000\t-\n+chr1\t36073\t36074\tCHR1_M0002_R1\t1000\t-\n+chr1\t65561\t65562\tCHR1_P0004_R1\t1000\t+\n+chr1\t69085\t69086\tCHR1_P0004_R2\t1000\t+\n+chr1\t105534\t105535\tCHR1_M0004_R1\t1000\t-\n+chr1\t137188\t137189\tCHR1_M0005_R2\t1000\t-\n+chr1\t139459\t139460\tCHR1_M0005_R1\t1000\t-\n+chr1\t251849\t251850\tCHR1_M0006_R1\t1000\t-\n+chr1\t261364\t261365\tCHR1_P0005_R1\t1000\t+\n+chr1\t324713\t324714\tCHR1_P0006_R1\t1000\t+\n+chr1\t348281\t348282\tCHR1_M0007_R1\t1000\t-\n+chr1\t367658\t367659\tCHR1_P0007_R1\t1000\t+\n+chr1\t420205\t420206\tCHR1_P0008_R1\t1000\t+\n+chr1\t455711\t455712\tCHR1_M0008_R1\t1000\t-\n+chr1\t462271\t462272\tCHR1_P0009_R1\t1000\t+\n+chr1\t528069\t528070\tCHR1_P0010_R1\t1000\t+\n+chr1\t564682\t564683\tCHR1_M0009_R1\t1000\t-\n+chr1\t565034\t565035\tCHR1_P0011_R1\t1000\t+\n+chr1\t565946\t565947\tCHR1_M0009_P1557\t1000\t-\n+chr1\t566776\t566777\tCHR1_P0012_R1\t1000\t+\n+chr1\t567249\t567250\tCHR1_M0009_P2860\t1000\t-\n+chr1\t568037\t568038\tCHR1_M0009_P3648\t1000\t-\n+chr1\t568209\t568210\tCHR1_P0013_R1\t1000\t+\n+chr1\t569580\t569581\tCHR1_P0014_R1\t1000\t+\n+chr1\t569810\t569811\tCHR1_M0009_P5421\t1000\t-\n+chr1\t622034\t622035\tCHR1_M0010_R1\t1000\t-\n+chr1\t664909\t664910\tCHR1_M0011_R1\t1000\t-\n+chr1\t688889\t688890\tCHR1_M0012_R1\t1000\t-\n+chr1\t714020\t714021\tCHR1_M0013_R1\t1000\t-\n+chr1\t714287\t714288\tCHR1_P0015_P38463\t1000\t+\n+chr1\t731090\t731091\tCHR1_M0014_R1\t1000\t-\n+chr1\t752750\t752751\tCHR1_P0015_R1\t1000\t+\n+chr1\t762883\t762884\tCHR1_M0015_R1\t1000\t-\n+chr1\t763067\t763068\tCHR1_P0016_R1\t1000\t+\n+chr1\t766984\t766985\tCHR1_P0017_R1\t1000\t+\n+chr1\t791390\t791391\tCHR1_P0018_R2\t1000\t+\n+chr1\t791897\t791898\tCHR1_P0018_R3\t1000\t+\n+chr1\t792894\t792895\tCHR1_P0018_R1\t1000\t+\n+chr1\t812283\t812284\tCHR1_M0016_R1\t1000\t-\n+chr1\t845442\t845443\tCHR1_P0019_P1372\t1000\t+\n+chr1\t846814\t846815\tCHR1_P0019_R1\t1000\t+\n+chr1\t847832\t847833\tCHR1_P0019_R2\t1000\t+\n+chr1\t854050\t854051\tCHR1_M0017_R2\t1000\t-\n+chr1\t856396\t856397\tCHR1_M0017_R1\t1000\t-\n+chr1\t859335\t859336\tCHR1_P0020_P1785\t1000\t+\n+chr1\t861115\t861116\tCHR1_P0020_R1\t1000\t+\n+chr1\t871145\t871146\tCHR1_P0020_R3\t1000\t+\n+chr1\t873159\t873160\tCHR1_M0018_R4\t1000\t-\n+chr1\t874671\t874672\tCHR1_P0020_R2\t1000\t+\n+chr1\t879598\t879599\tCHR1_P0020_R4\t1000\t+\n+chr1\t880512\t880513\tCHR1_M0018_R7\t1000\t-\n+chr1\t882440\t882441\tCHR1_M0018_R3\t1000\t-\n+chr1\t883985\t883986\tCHR1_M0018_R6\t1000\t-\n+chr1\t889465\t889466\tCHR1_M0018_R2\t1000\t-\n+chr1\t891576\t891577\tCHR1_M0018_R5\t1000\t-\n+chr1\t894644\t894645\tCHR1_M0018_R1\t1000\t-\n+chr1\t896005\t896006\tCHR1_P0021_R1\t1000\t+\n+chr1\t896830\t896831\tCHR1_P0021_R2\t1000\t+\n+chr1\t898720\t898721\tCHR1_P0021_R3\t1000\t+\n+chr1\t901876\t901877\tCHR1_P0022_R1\t1000\t+\n+chr1\t912021\t912022\tCHR1_M0019_R2\t1000\t-\n+chr1\t917473\t917474\tCHR1_M0019_R1\t1000\t-\n+chr1\t934406\t934407\tCHR1_M0020_R4\t1000\t-\n+chr1\t934910\t934911\tCHR1_M0020_R2\t1000\t-\n+chr1\t935458\t935459\tCHR1_M0020_R1\t1000\t-\n+chr1\t935872\t935873\tCHR1_P0023_P13000\t1000\t+\n+chr1\t936091\t936092\tCHR1_M0020_R3\t1000\t-\n+chr1\t936808\t936809\tCHR1_P0023_P12064\t1000\t+\n+chr1\t941884\t941885\tCHR1_M0020_P6414\t1000\t-\n+chr1\t948877\t948878\tCHR1_P0023_R1\t1000\t+\n+chr1\t949362\t949363\tCHR1_P0023_R2\t1000\t+\n+chr1\t955494\t955495\tCHR1_P0024_R1\t1000\t+\n+chr1\t956289\t956290\tCHR1_P0024_R13\t1000\t+\n+chr1\t956712\t956713\tCHR1_P0024_R5\t1000\t+\n+chr1\t957632\t957633\tCHR1_P0024_R4\t1000\t+\n+chr1\t957789\t957790\tCHR1_M0020_P22319\t1000\t-\n+chr1\t968862\t968863\tCHR1_M0020_P33392\t1000\t-\n+chr1\t969327\t969328\tCHR1_M0020_P33857\t1000\t-\n+chr1\t969751\t969752\tCHR1_P0024_R3\t1000\t+\n+chr1\t976294\t976295\tCHR1_P0024_R2\t1000\t+\n+chr1\t977020\t977021\tCHR1_P0024_R14\t1000\t+\n+chr1\t977552\t977553\tCHR1_P0024_R9\t1000\t+\n+chr1\t978755\t978756\tCHR1_P0024_'..b'7492547\t247492548\tCHR1_M1791_R4\t1000\t-\n+chr1\t247494616\t247494617\tCHR1_P1881_R2\t1000\t+\n+chr1\t247495113\t247495114\tCHR1_M1791_R1\t1000\t-\n+chr1\t247495205\t247495206\tCHR1_P1881_R1\t1000\t+\n+chr1\t247579479\t247579480\tCHR1_P1882_R2\t1000\t+\n+chr1\t247581358\t247581359\tCHR1_P1882_R1\t1000\t+\n+chr1\t247581945\t247581946\tCHR1_P1882_R3\t1000\t+\n+chr1\t247587920\t247587921\tCHR1_P1882_R5\t1000\t+\n+chr1\t247588635\t247588636\tCHR1_P1882_R4\t1000\t+\n+chr1\t247615284\t247615285\tCHR1_M1792_R1\t1000\t-\n+chr1\t247654429\t247654430\tCHR1_P1883_R1\t1000\t+\n+chr1\t247670419\t247670420\tCHR1_P1884_R1\t1000\t+\n+chr1\t247681164\t247681165\tCHR1_P1885_R2\t1000\t+\n+chr1\t247681679\t247681680\tCHR1_P1885_R1\t1000\t+\n+chr1\t247690174\t247690175\tCHR1_M1793_R3\t1000\t-\n+chr1\t247694106\t247694107\tCHR1_M1793_R2\t1000\t-\n+chr1\t247697141\t247697142\tCHR1_M1793_R1\t1000\t-\n+chr1\t247712442\t247712443\tCHR1_P1884_R2\t1000\t+\n+chr1\t247751661\t247751662\tCHR1_P1886_R1\t1000\t+\n+chr1\t247768887\t247768888\tCHR1_P1887_R1\t1000\t+\n+chr1\t247803044\t247803045\tCHR1_P1888_R1\t1000\t+\n+chr1\t247836343\t247836344\tCHR1_M1794_R1\t1000\t-\n+chr1\t247876057\t247876058\tCHR1_M1795_R1\t1000\t-\n+chr1\t247901916\t247901917\tCHR1_P1889_R1\t1000\t+\n+chr1\t247921334\t247921335\tCHR1_M1796_R2\t1000\t-\n+chr1\t247921906\t247921907\tCHR1_M1796_R1\t1000\t-\n+chr1\t247937985\t247937986\tCHR1_P1890_R1\t1000\t+\n+chr1\t247979031\t247979032\tCHR1_M1797_R1\t1000\t-\n+chr1\t248005198\t248005199\tCHR1_M1798_R1\t1000\t-\n+chr1\t248020538\t248020539\tCHR1_P1891.1_R1\t1000\t+\n+chr1\t248023917\t248023918\tCHR1_P1891.1_R2\t1000\t+\n+chr1\t248031276\t248031277\tCHR1_P1891.2_R1\t1000\t+\n+chr1\t248058888\t248058889\tCHR1_P1891.2_R2\t1000\t+\n+chr1\t248084319\t248084320\tCHR1_P1892_R1\t1000\t+\n+chr1\t248100494\t248100495\tCHR1_P1893_R1\t1000\t+\n+chr1\t248112159\t248112160\tCHR1_P1894_R1\t1000\t+\n+chr1\t248128633\t248128634\tCHR1_P1895_R1\t1000\t+\n+chr1\t248153940\t248153941\tCHR1_P1896_R1\t1000\t+\n+chr1\t248201569\t248201570\tCHR1_P1897_R1\t1000\t+\n+chr1\t248201940\t248201941\tCHR1_P1897_R2\t1000\t+\n+chr1\t248223983\t248223984\tCHR1_P1898_R1\t1000\t+\n+chr1\t248262665\t248262666\tCHR1_P1893_R2\t1000\t+\n+chr1\t248285607\t248285608\tCHR1_P1899_R1\t1000\t+\n+chr1\t248308449\t248308450\tCHR1_P1900_R1\t1000\t+\n+chr1\t248343287\t248343288\tCHR1_P1901_R1\t1000\t+\n+chr1\t248366369\t248366370\tCHR1_P1902_R1\t1000\t+\n+chr1\t248402230\t248402231\tCHR1_P1903_R1\t1000\t+\n+chr1\t248402604\t248402605\tCHR1_P1903_R2\t1000\t+\n+chr1\t248437116\t248437117\tCHR1_M1799_R1\t1000\t-\n+chr1\t248458880\t248458881\tCHR1_M1800_R1\t1000\t-\n+chr1\t248487870\t248487871\tCHR1_M1801_R1\t1000\t-\n+chr1\t248512076\t248512077\tCHR1_P1904_R1\t1000\t+\n+chr1\t248524882\t248524883\tCHR1_P1905_R1\t1000\t+\n+chr1\t248550909\t248550910\tCHR1_P1906_R1\t1000\t+\n+chr1\t248569295\t248569296\tCHR1_P1907_R1\t1000\t+\n+chr1\t248616098\t248616099\tCHR1_P1908_R1\t1000\t+\n+chr1\t248636651\t248636652\tCHR1_P1909_R1\t1000\t+\n+chr1\t248651889\t248651890\tCHR1_P1910_R1\t1000\t+\n+chr1\t248684947\t248684948\tCHR1_P1911_R1\t1000\t+\n+chr1\t248722774\t248722775\tCHR1_M1803_R1\t1000\t-\n+chr1\t248738058\t248738059\tCHR1_M1804_R1\t1000\t-\n+chr1\t248757069\t248757070\tCHR1_M1805_R1\t1000\t-\n+chr1\t248790429\t248790430\tCHR1_M1806_R1\t1000\t-\n+chr1\t248802559\t248802560\tCHR1_M1807_R1\t1000\t-\n+chr1\t248814185\t248814186\tCHR1_M1808_R1\t1000\t-\n+chr1\t248845605\t248845606\tCHR1_M1809_R1\t1000\t-\n+chr1\t248885507\t248885508\tCHR1_M1810_R1\t1000\t-\n+chr1\t249077603\t249077604\tCHR1_P1912_R1\t1000\t+\n+chr1\t249105099\t249105100\tCHR1_M1811_R8\t1000\t-\n+chr1\t249106484\t249106485\tCHR1_M1811_R7\t1000\t-\n+chr1\t249108372\t249108373\tCHR1_M1811_R6\t1000\t-\n+chr1\t249110861\t249110862\tCHR1_M1811_R3\t1000\t-\n+chr1\t249119240\t249119241\tCHR1_M1811_R4\t1000\t-\n+chr1\t249119841\t249119842\tCHR1_M1811_R5\t1000\t-\n+chr1\t249120110\t249120111\tCHR1_M1811_R1\t1000\t-\n+chr1\t249120851\t249120852\tCHR1_M1811_R2\t1000\t-\n+chr1\t249132511\t249132512\tCHR1_P1913_R1\t1000\t+\n+chr1\t249132910\t249132911\tCHR1_M1811_P12012\t1000\t-\n+chr1\t249133269\t249133270\tCHR1_P1913_R6\t1000\t+\n+chr1\t249139842\t249139843\tCHR1_P1913_R4\t1000\t+\n+chr1\t249140249\t249140250\tCHR1_P1913_R2\t1000\t+\n+chr1\t249141584\t249141585\tCHR1_P1913_R3\t1000\t+\n+chr1\t249142171\t249142172\tCHR1_P1913_R5\t1000\t+\n+chr1\t249200437\t249200438\tCHR1_P1914_R1\t1000\t+\n+chr1\t249208722\t249208723\tCHR1_P1915_R1\t1000\t+\n' |
b |
diff -r 000000000000 -r a80c93182db3 test-data/sample1.bed.meta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sample1.bed.meta Tue Jun 26 09:08:06 2018 -0400 |
b |
@@ -0,0 +1,3 @@ +ann_type TSS +provider UCSC +assembly hg19 |
b |
diff -r 000000000000 -r a80c93182db3 test-data/sample2.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sample2.bed Tue Jun 26 09:08:06 2018 -0400 |
b |
@@ -0,0 +1,111 @@ +chr1 3190581 3191428 element_705 900 +chr1 8130439 8131887 element_1833 900 +chr1 10593123 10594209 element_677 200 +chr1 10732070 10733118 element_289 900 +chr1 10757664 10758631 element_361 200 +chr1 10781239 10781744 element_389 900 +chr1 10851570 10852173 element_408 900 +chr1 10925202 10925728 element_417 200 +chr1 10965129 10966144 element_241 900 +chr1 27049620 27050905 element_569 900 +chr1 32510030 32510913 element_645 900 +chr1 33722494 33723960 element_917 200 +chr1 33828789 33830947 element_1388 900 +chr1 38494689 38495688 element_1105 200 +chr1 38560194 38561606 element_238 900 +chr1 38627529 38629265 element_1124 200 +chr1 38656780 38657626 element_1179 200 +chr1 38662809 38664040 element_790 200 +chr1 38735942 38737376 element_1008 200 +chr1 38791998 38793333 element_1098 200 +chr1 38801888 38802630 element_270 200 +chr1 38819171 38820577 element_1045 200 +chr1 39192609 39194134 element_1031 900 +chr1 39248757 39250129 element_1139 900 +chr1 39291500 39292441 element_850 200 +chr1 41710596 41712937 element_1674 200 +chr1 44500383 44503337 element_1857 900 +chr1 44715420 44716129 element_277 900 +chr1 44989824 44991149 element_280 900 +chr1 49112757 49113363 element_295 200 +chr1 51006090 51007314 element_247 200 +chr1 51034546 51036289 element_194 900 +chr1 51098841 51099913 element_197 200 +chr1 51165195 51166786 element_200 900 +chr1 54925046 54928826 element_1487 900 +chr1 59522325 59524092 element_1485 900 +chr1 60102624 60105491 element_1609 200 +chr1 61086857 61087871 element_683 200 +chr1 61917795 61920190 element_1309 900 +chr1 62045460 62048159 element_1484 900 +chr1 62053433 62055908 element_1450 900 +chr1 63369349 63370894 element_311 900 +chr1 63443534 63444884 element_168 900 +chr1 63464282 63465717 element_169 900 +chr1 63665219 63666122 element_764 900 +chr1 82557436 82558140 element_436 200 +chr1 82663553 82664196 element_437 200 +chr1 82664608 82665419 element_438 200 +chr1 83220709 83221363 element_439 200 +chr1 83252219 83253218 element_440 200 +chr1 83310680 83311769 element_441 200 +chr1 83345366 83346271 element_442 900 +chr1 83360512 83361298 element_443 200 +chr1 83411289 83412040 element_444 200 +chr1 83878319 83879217 element_445 200 +chr1 87795192 87796737 element_809 900 +chr1 87803415 87805212 element_1134 200 +chr1 87821621 87823082 element_174 900 +chr1 87821793 87822910 element_322 900 +chr1 88028658 88029378 element_323 200 +chr1 88065041 88066530 element_1217 900 +chr1 88108084 88109396 element_1002 900 +chr1 88183654 88184961 element_1058 200 +chr1 88402821 88404888 element_1055 200 +chr1 88535719 88538390 element_1198 900 +chr1 88577535 88578821 element_1200 200 +chr1 88595049 88596320 element_1135 200 +chr1 88615687 88616927 element_878 900 +chr1 88646698 88648145 element_1216 200 +chr1 88686076 88687740 element_1107 900 +chr1 88841735 88843091 element_1068 200 +chr1 88875731 88877192 element_988 200 +chr1 88926796 88928508 element_327 900 +chr1 91305562 91307215 element_612 900 +chr1 92271540 92273987 element_1499 200 +chr1 97610491 97611741 element_671 900 +chr1 113540056 113542020 element_1672 900 +chr1 119028026 119029955 element_1428 900 +chr1 119452558 119453594 element_499 200 +chr1 163359231 163360494 element_1156 200 +chr1 163441941 163442842 element_762 900 +chr1 163507965 163509139 element_1185 900 +chr1 163939700 163940637 element_201 200 +chr1 164023244 164024214 element_202 200 +chr1 164197827 164199172 element_1230 200 +chr1 164620038 164621164 element_1235 900 +chr1 164637575 164639037 element_203 200 +chr1 164668592 164669823 element_1144 900 +chr1 164672787 164674206 element_970 200 +chr1 164700259 164701522 element_1136 200 +chr1 167296954 167299046 element_1331 900 +chr1 169910396 169913079 element_1442 900 +chr1 181121049 181123654 element_1862 900 +chr1 198263562 198265742 element_1322 900 +chr1 198339402 198341607 element_1443 200 +chr1 204423958 204424935 element_1368 900 +chr1 209064233 209067059 element_1694 200 +chr1 209989050 209989824 element_932 900 +chr1 210433698 210437258 element_1617 200 +chr1 213498112 213501134 element_1324 900 +chr1 213597964 213599524 element_204 900 +chr1 215888921 215890374 element_366 200 +chr1 216772416 216773458 element_175 200 +chr1 217766122 217767351 element_734 200 +chr1 218207432 218208498 element_1257 900 +chr1 218222277 218224086 element_1255 200 +chr1 221906778 221908480 element_1663 200 +chr1 232753930 232757436 element_1714 200 +chr1 243876467 243877893 element_545 900 +chr1 243895796 243896468 element_214 200 +chr1 244217325 244218426 element_476 200 |
b |
diff -r 000000000000 -r a80c93182db3 test-data/sample2.bed.meta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sample2.bed.meta Tue Jun 26 09:08:06 2018 -0400 |
b |
@@ -0,0 +1,4 @@ +ann_type enhancer +provider UCSC +name VistaEnhancers +assembly hg19 |
b |
diff -r 000000000000 -r a80c93182db3 test-data/sample3.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sample3.bed Tue Jun 26 09:08:06 2018 -0400 |
b |
b'@@ -0,0 +1,1096 @@\n+chr1\t324287\t324345\tNR_028327\t0\t+\r\n+chr1\t567704\t567793\tNR_106781\t0\t-\r\n+chr1\t764382\t764484\tNR_047522\t0\t+\r\n+chr1\t901876\t901994\tNM_032129\t0\t+\r\n+chr1\t1021257\t1021392\tNM_017891\t0\t-\r\n+chr1\t1139778\t1139866\tNM_148901\t0\t-\r\n+chr1\t1163847\t1164326\tNM_016547\t0\t-\r\n+chr1\t1178441\t1178532\tNM_001014980\t0\t-\r\n+chr1\t1230097\t1230196\tNM_030649\t0\t-\r\n+chr1\t1231489\t1231550\tNR_106784\t0\t-\r\n+chr1\t1248414\t1248504\tNM_017871\t0\t-\r\n+chr1\t1394540\t1394611\tNM_001039211\t0\t+\r\n+chr1\t1447522\t1447853\tNM_001170535\t0\t+\r\n+chr1\t1461840\t1461911\tNM_001170536\t0\t+\r\n+chr1\t1480242\t1480382\tNM_014188\t0\t-\r\n+chr1\t1551887\t1551994\tNM_001170686\t0\t+\r\n+chr1\t1563652\t1563779\tNM_001170688\t0\t+\r\n+chr1\t1573123\t1573245\tNM_033490\t0\t-\r\n+chr1\t1576408\t1576474\tNM_033490\t0\t-\r\n+chr1\t1637080\t1637171\tNM_033529\t0\t-\r\n+chr1\t1647784\t1647917\tNM_033489\t0\t-\r\n+chr1\t1670674\t1671143\tNM_182838\t0\t-\r\n+chr1\t1670674\t1671143\tNM_001199787\t0\t-\r\n+chr1\t1682670\t1684499\tNM_023018\t0\t-\r\n+chr1\t1688619\t1688749\tNM_001198993\t0\t-\r\n+chr1\t2066700\t2066786\tNM_002744\t0\t+\r\n+chr1\t2066700\t2066786\tNM_001033581\t0\t+\r\n+chr1\t2522080\t2522528\tNM_033467\t0\t-\r\n+chr1\t3322058\t3322212\tNM_199454\t0\t+\r\n+chr1\t3643678\t3643788\tNM_001204187\t0\t+\r\n+chr1\t3656796\t3656951\tNR_033712\t0\t-\r\n+chr1\t4475612\t4475647\tNR_027088\t0\t+\r\n+chr1\t5923949\t5924093\tNM_001291594\t0\t-\r\n+chr1\t5993206\t5993389\tNM_001291594\t0\t-\r\n+chr1\t6008129\t6008311\tNM_001291593\t0\t-\r\n+chr1\t6052303\t6052533\tNR_111987\t0\t-\r\n+chr1\t6156695\t6156816\tNM_001199863\t0\t+\r\n+chr1\t6156695\t6156816\tNM_003636\t0\t+\r\n+chr1\t6185159\t6185293\tNM_015557\t0\t-\r\n+chr1\t6188558\t6188669\tNM_015557\t0\t-\r\n+chr1\t6270929\t6270998\tNM_207396\t0\t+\r\n+chr1\t6523131\t6523187\tNM_003790\t0\t-\r\n+chr1\t6529101\t6529301\tNM_001265592\t0\t-\r\n+chr1\t6529394\t6529510\tNM_001042663\t0\t-\r\n+chr1\t6529603\t6529736\tNM_001042665\t0\t-\r\n+chr1\t6531049\t6531160\tNM_198681\t0\t-\r\n+chr1\t6537588\t6537718\tNM_001265593\t0\t-\r\n+chr1\t6630968\t6631275\tNM_177540\t0\t+\r\n+chr1\t6638712\t6639817\tNM_177540\t0\t+\r\n+chr1\t6692855\t6693642\tNM_001195752\t0\t+\r\n+chr1\t6885151\t6885270\tNM_001242701\t0\t+\r\n+chr1\t7848104\t7848306\tNM_001289863\t0\t+\r\n+chr1\t8418255\t8418976\tNM_012102\t0\t-\r\n+chr1\t8424805\t8424898\tNM_001042681\t0\t-\r\n+chr1\t9034405\t9034503\tNM_001270500\t0\t+\r\n+chr1\t9085034\t9085133\tNM_207420\t0\t-\r\n+chr1\t9294862\t9295125\tNM_004285\t0\t+\r\n+chr1\t9630315\t9630416\tNM_032315\t0\t+\r\n+chr1\t9770481\t9770654\tNM_005026\t0\t+\r\n+chr1\t9780799\t9780967\tNM_005026\t0\t+\r\n+chr1\t9789078\t9790763\tNM_014944\t0\t-\r\n+chr1\t9794029\t9794200\tNM_014944\t0\t-\r\n+chr1\t9797555\t9797612\tNM_014944\t0\t-\r\n+chr1\t9932026\t9932146\tNM_020248\t0\t-\r\n+chr1\t10057254\t10057389\tNM_052960\t0\t+\r\n+chr1\t10190557\t10190674\tNM_006048\t0\t+\r\n+chr1\t10336378\t10336457\tNM_015074\t0\t+\r\n+chr1\t10356954\t10357135\tNM_015074\t0\t+\r\n+chr1\t10399826\t10399917\tNM_015074\t0\t+\r\n+chr1\t10402107\t10402226\tNM_015074\t0\t+\r\n+chr1\t10713433\t10714275\tNM_001079843\t0\t-\r\n+chr1\t10719758\t10720593\tNM_001079843\t0\t-\r\n+chr1\t11107259\t11107296\tNM_006610\t0\t-\r\n+chr1\t11116660\t11116814\tNM_003132\t0\t-\r\n+chr1\t11148179\t11148255\tNM_001001998\t0\t-\r\n+chr1\t11189794\t11189895\tNM_004958\t0\t-\r\n+chr1\t11272852\t11272965\tNM_004958\t0\t-\r\n+chr1\t11316048\t11316249\tNM_004958\t0\t-\r\n+chr1\t11721186\t11723384\tNM_183412\t0\t+\r\n+chr1\t11736102\t11736197\tNM_006341\t0\t-\r\n+chr1\t11796141\t11796292\tNM_001040195\t0\t+\r\n+chr1\t11879544\t11879611\tNM_001286\t0\t+\r\n+chr1\t11905603\t11905823\tNR_037806\t0\t+\r\n+chr1\t12170097\t12170261\tNM_001281430\t0\t+\r\n+chr1\t12251830\t12251980\tNM_001066\t0\t+\r\n+chr1\t12304302\t12304493\tNM_015378\t0\t+\r\n+chr1\t12326937\t12327068\tNM_018156\t0\t+\r\n+chr1\t12328762\t12328933\tNM_015378\t0\t+\r\n+chr1\t12374171\t12374384\tNM_018156\t0\t+\r\n+chr1\t12382592\t12382803\tNM_015378\t0\t+\r\n+chr1\t12851545\t12851623\tNM_023013\t0\t+\r\n+chr1\t13196244\t13196556\tNR_111945\t0\t+\r\n+chr1\t13388457\t13389033\tNM_001012276\t0\t-\r\n+chr1\t13673433\t13673511\tNM_001024661\t0\t-\r\n+chr1\t13942399\t13944452\tNM_198389\t0\t+\r\n+chr1\t13942399\t13944452\tNM_001006624\t0\t+\r\n+chr1\t14026734\t14026795\tNM_001135610\t0\t+\r\n+chr1\t14059273\t14059377\tNM_015866\t0\t+\r\n+chr1\t14059273\t14059377\tNM_001135610\t0\t+\r\n+chr1\t14068499\t14068652\tNM_012231\t0\t+\r\n+chr1\t15382586\t15382776\tNM_001018001\t0\t+\r\n+chr1\t15671911\t15672019\tNM_052929\t0\t+\r\n+chr1\t15689137\t15689222\tNM_052929\t0\t'..b'r1\t222902961\t222903043\tNM_144695\t0\t+\r\n+chr1\t223814690\t223814859\tNM_001143962\t0\t-\r\n+chr1\t223905463\t223905533\tNM_001748\t0\t+\r\n+chr1\t223946971\t223947183\tNM_001146068\t0\t+\r\n+chr1\t223949282\t223949319\tNM_001748\t0\t+\r\n+chr1\t223962535\t223963720\tNM_001748\t0\t+\r\n+chr1\t224477185\t224477435\tNM_206840\t0\t-\r\n+chr1\t224619178\t224619283\tNM_025160\t0\t-\r\n+chr1\t224922264\t224922408\tNM_152495\t0\t+\r\n+chr1\t225147854\t225148004\tNM_001145154\t0\t+\r\n+chr1\t225270250\t225270441\tNM_001373\t0\t+\r\n+chr1\t225393672\t225393881\tNM_001373\t0\t+\r\n+chr1\t225609778\t225609979\tNM_194442\t0\t-\r\n+chr1\t225616407\t225616557\tNM_194442\t0\t-\r\n+chr1\t226043578\t226043641\tNM_014698\t0\t-\r\n+chr1\t226066919\t226067110\tNM_014698\t0\t-\r\n+chr1\t226843780\t226844975\tNR_103784\t0\t-\r\n+chr1\t227182523\t227182681\tNM_003607\t0\t-\r\n+chr1\t227204627\t227204751\tNM_014826\t0\t-\r\n+chr1\t227257477\t227257554\tNM_003607\t0\t-\r\n+chr1\t228290599\t228290750\tNM_024319\t0\t-\r\n+chr1\t228327784\t228327860\tNM_001242839\t0\t+\r\n+chr1\t228351786\t228352193\tNR_103540\t0\t-\r\n+chr1\t228473778\t228474051\tNM_001098623\t0\t+\r\n+chr1\t228504409\t228504685\tNM_001098623\t0\t+\r\n+chr1\t228527645\t228527802\tNM_052843\t0\t+\r\n+chr1\t228557642\t228557795\tNM_001098623\t0\t+\r\n+chr1\t228566318\t228566575\tNM_001271223\t0\t+\r\n+chr1\t228601497\t228601593\tNM_016102\t0\t-\r\n+chr1\t228699883\t228699989\tNM_001287262\t0\t-\r\n+chr1\t229593907\t229594043\tNM_018230\t0\t-\r\n+chr1\t230379051\t230379173\tNM_004481\t0\t+\r\n+chr1\t230798886\t230798967\tNM_007357\t0\t+\r\n+chr1\t230891082\t230891152\tNM_016452\t0\t+\r\n+chr1\t231057166\t231057250\tNM_024525\t0\t-\r\n+chr1\t231061262\t231061393\tNM_001122835\t0\t-\r\n+chr1\t231067043\t231067218\tNM_001122835\t0\t-\r\n+chr1\t231069524\t231069607\tNM_024525\t0\t-\r\n+chr1\t231902885\t231903015\tNM_001164556\t0\t+\r\n+chr1\t231906580\t231906816\tNM_001012957\t0\t+\r\n+chr1\t231906580\t231907408\tNM_001164549\t0\t+\r\n+chr1\t232144530\t232144795\tNM_001164537\t0\t+\r\n+chr1\t232162180\t232162298\tNM_001164537\t0\t+\r\n+chr1\t232172437\t232177019\tNM_001164537\t0\t+\r\n+chr1\t232551239\t232551371\tNM_020808\t0\t-\r\n+chr1\t232940637\t232946092\tNM_019090\t0\t+\r\n+chr1\t233388386\t233388534\tNM_014801\t0\t-\r\n+chr1\t234509975\t234510135\tNM_001206641\t0\t+\r\n+chr1\t234546190\t234546277\tNM_005646\t0\t-\r\n+chr1\t235277082\t235277225\tNM_014765\t0\t-\r\n+chr1\t235318195\t235318427\tNM_001161533\t0\t-\r\n+chr1\t235330209\t235331967\tNM_001206794\t0\t-\r\n+chr1\t235335932\t235336079\tNM_031371\t0\t-\r\n+chr1\t235377083\t235377341\tNM_016374\t0\t-\r\n+chr1\t235383616\t235383860\tNM_001206794\t0\t-\r\n+chr1\t235386480\t235386575\tNM_016374\t0\t-\r\n+chr1\t235403627\t235403766\tNM_001206794\t0\t-\r\n+chr1\t235599858\t235599923\tNM_001079515\t0\t+\r\n+chr1\t236016299\t236016360\tNR_031718\t0\t-\r\n+chr1\t236388369\t236388439\tNM_019891\t0\t-\r\n+chr1\t236719399\t236719558\tNM_018072\t0\t-\r\n+chr1\t236767721\t236767841\tNM_018072\t0\t-\r\n+chr1\t236988637\t236988699\tNM_000254\t0\t+\r\n+chr1\t236990128\t236990196\tNM_001291940\t0\t+\r\n+chr1\t237001713\t237001899\tNM_001291939\t0\t+\r\n+chr1\t237057656\t237057857\tNM_000254\t0\t+\r\n+chr1\t237586391\t237586548\tNM_001035\t0\t+\r\n+chr1\t237755038\t237755153\tNM_001035\t0\t+\r\n+chr1\t237850754\t237850804\tNM_001035\t0\t+\r\n+chr1\t237862264\t237862325\tNM_001035\t0\t+\r\n+chr1\t240975217\t240975343\tNM_001282773\t0\t-\r\n+chr1\t241033354\t241033419\tNM_002924\t0\t-\r\n+chr1\t241146378\t241146429\tNM_001282775\t0\t-\r\n+chr1\t241676902\t241677013\tNM_000143\t0\t-\r\n+chr1\t242020646\t242020784\tNM_003686\t0\t+\r\n+chr1\t242024707\t242024804\tNM_006027\t0\t+\r\n+chr1\t242030131\t242030357\tNM_003686\t0\t+\r\n+chr1\t243332929\t243333056\tNM_001042404\t0\t-\r\n+chr1\t243716030\t243716245\tNM_005465\t0\t-\r\n+chr1\t243776972\t243777041\tNM_005465\t0\t-\r\n+chr1\t243776972\t243777041\tNM_001206729\t0\t-\r\n+chr1\t244541864\t244541941\tNM_001276348\t0\t+\r\n+chr1\t245025762\t245025836\tNM_031844\t0\t-\r\n+chr1\t245912641\t245912966\tNM_022743\t0\t-\r\n+chr1\t246805050\t246805072\tNM_152609\t0\t+\r\n+chr1\t247020992\t247021116\tNM_015446\t0\t-\r\n+chr1\t247108848\t247109129\tNR_037894\t0\t-\r\n+chr1\t247108848\t247109129\tNR_037892\t0\t-\r\n+chr1\t247473000\t247473108\tNM_032752\t0\t-\r\n+chr1\t247586531\t247586651\tNM_001243133\t0\t+\r\n+chr1\t247670359\t247670535\tNM_001281838\t0\t+\r\n+chr1\t247693433\t247695842\tNM_198074\t0\t-\r\n+chr1\t247701977\t247702093\tNM_001281834\t0\t+\r\n+chr1\t247712346\t247712522\tNM_145278\t0\t+\r\n+chr1\t249120033\t249120154\tNM_030645\t0\t-\r\n+\t\t\t\t\t\r\n' |
b |
diff -r 000000000000 -r a80c93182db3 test-data/sample3.bed.meta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sample3.bed.meta Tue Jun 26 09:08:06 2018 -0400 |
b |
@@ -0,0 +1,4 @@ +ann_type exons +provider RefSeq +name RefSeqGeneExons +assembly hg19 |
b |
diff -r 000000000000 -r a80c93182db3 test-data/select1.gmql_query --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/select1.gmql_query Tue Jun 26 09:08:06 2018 -0400 |
b |
@@ -0,0 +1,1 @@ +OUT = SELECT (grant == 'Stam') Example_Dataset_1 ; |
b |
diff -r 000000000000 -r a80c93182db3 test-data/select2.gmql_query --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/select2.gmql_query Tue Jun 26 09:08:06 2018 -0400 |
b |
@@ -0,0 +1,1 @@ +OUT = SELECT (patient_age == '64' AND cell == '8988T'; region: score > 3) Example_Dataset_1 ; |
b |
diff -r 000000000000 -r a80c93182db3 test-data/select3.gmql_query --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/select3.gmql_query Tue Jun 26 09:08:06 2018 -0400 |
b |
@@ -0,0 +1,2 @@ +OUT = SELECT (patient_age == '64' AND cell == '8988T'; region: score > 3) Example_Dataset_1 ; +OUT2 = SELECT (semijoin: cell IN OUT) Example_Dataset_2 ; |
b |
diff -r 000000000000 -r a80c93182db3 test-data/union1.gmql_query --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/union1.gmql_query Tue Jun 26 09:08:06 2018 -0400 |
b |
@@ -0,0 +1,3 @@ +IN = SELECT () Example_Dataset_1 ; +IN2 = SELECT () Example_Dataset_2 ; +OUT = UNION () IN IN2; |
b |
diff -r 000000000000 -r a80c93182db3 utilities.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/utilities.py Tue Jun 26 09:08:06 2018 -0400 |
[ |
@@ -0,0 +1,226 @@ +# Helper functions to perform REST calls on the GMQL server. +# ---------------------------------------------------------------------------- +# Luana Brancato, luana.brancato@mail.polimi.it +# ---------------------------------------------------------------------------- + +import sys +import os +import yaml +import requests + + +def load_parts(module, call) : + """Given the module and the single operation, returns the fragments for the url to call""" + + y_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),'gmql_rest.yaml') + + with open(y_path,'r') as yamlf : + cfg = yaml.load(yamlf) + + parts = list () + + gmql = cfg['GMQL_URL'] + prefix = cfg[module]['prefix'] + op = cfg[module]['operations'][call] + + parts.append(gmql) + if prefix : + parts.append(prefix) + + for p in op : + parts.append(p) + + return parts + +def compose_url(module, call) : + """Given the fragments of a url, return the composite one""" + + parts = load_parts(module,call) + url = '/'.join(parts) + + return url + +def add_url_param(params, module, op, value,) : + """Given the params dict, add a new pair of key:value with the given value and the key set for given module and operation""" + + y_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'gmql_rest.yaml') + + with open(y_path, 'r') as yamlf: + cfg = yaml.load(yamlf) + yamlf.close() + + key = cfg[module]['params'][op] + + params.update({key : value}) + + return params + + +def read_token(input): + """It takes the tabular file with the information over the user + name authToken valid_flag + It checks if the user is still valid and extract the authToken for the REST calls""" + + with open(input,'r') as f_in : + user = f_in.readline().rstrip('\n').split('\t') + + if user[2] : + token = user[1] + else : + stop_err("This session is no longer valid") + + + return token + +def expire_user(input): + """Set the validity flag of a user token to false""" + + with open(input,'r') as f: + user = f.readline().rstrip('\n').split('\t') + + user[2] = False + + with open(input,'w') as f : + f.write('{fullName}\t{token}\t{valid}\n'.format(fullName=user[0], token=user[1], + valid=user[2])) + + +def get(url, user=None, response_type='json') : + """GET Request + :param url: url where to fetch the requested resource + :param user: for authenticated requests; if not provided make an unauthenticated request (es. for login) + :param response_type: type of the fetched response. + JSON ( Default ) + TEXT + ZIP + FILE + """ + + #Set request headers + headers = dict () + + if user : + headers.update({'X-AUTH-TOKEN' : read_token(user)}) + + if response_type == 'text' : + headers.update({'Accept' : 'text/plain'}) + elif response_type == 'zip' : + pass + elif response_type == 'file' : + headers.update({'Accept' : 'file'}) + else : + headers.update({'Accept' : 'application/json'}) + + + #Make the request + response = requests.get(url, headers=headers) + + #Check returned server status + status_code = response.status_code + + #Read result. If Server OK, read according to response_type. Raise an error otherwise. + if status_code == requests.codes.ok : + if response_type == 'json' : + return response.json() + elif response_type == 'text' : + return response.text + else : + return response + elif status_code == requests.codes.unauthorized : + expire_user(user) + stop_err("You are not authorized to do this. \nPlease login first.") + elif status_code == requests.codes.not_found : + stop_err("Resource not found for this user.") + else : + stop_err("Error {code}: {reason}\n{message}".format(code=status_code, + reason=response.reason, + message=response.content)) + + +def post(url, payload, user=None, params=None, content_type='json', response_type='json') : + """ POST Request + :param url: url where to post data + :param payload: payload for the post request. Type is specified by content_type. + :param user: for authenticated requests; if not provided make an unauthenticated request (es. for registration) + :param params: optional query parameters + :param content_type + :param response_type: Default is json + """ + + + # Set request headers + headers = dict() + + if user: + headers.update({'X-AUTH-TOKEN': read_token(user)}) + + headers.update({'Accept': 'application/json'}) + + if content_type == 'text' : + headers.update({'Content-Type' : 'text/plain'}) + response = requests.post(url, params=params, headers=headers, data=payload) + elif content_type == 'multiform' : + response = requests.post(url, params=params, headers=headers, files=payload) + else : + headers.update({'Content-Type': 'application/json'}) + response = requests.post(url, params=params, headers=headers, json=payload) + + # Check returned server status + status_code = response.status_code + + + if status_code == requests.codes.ok : + return response.json() + elif status_code == requests.codes.unauthorized : + content = response.content + if content.__contains__("The username or password you entered don't match") : + stop_err("The username or password you entered don't match") + else: + expire_user(user) + stop_err("You are not authorized to do this. \nPlease login first.") + else : + stop_err("Error {code}: {reason}\n{message}".format(code=status_code, + reason=response.reason, + message=response.content)) + + + +def delete(url, user=None, response_type='json') : + """DELETE request + :param url: url where to post data + :param user: for authenticated requests; if not provided make an unauthenticated request (es. for registration) + :param response_type: Default is json + """ + + # Set request headers + headers = dict() + + if user: + headers.update({'X-AUTH-TOKEN': read_token(user)}) + + headers.update({'Accept': 'application/json'}) + + #Make the request + response = requests.delete(url, headers=headers) + + #Check returned server status + status_code = response.status_code + + + #If Server OK, read result. Raise an error otherwise. + if status_code == requests.codes.ok : + return response.json() + elif status_code == requests.codes.unauthorized : + expire_user(user) + stop_err("You are not authorized to do this. \nPlease login first.") + elif status_code == requests.codes.not_found : + stop_err("Resource not found for this user.") + else : + stop_err("Error {code}: {reason}".format(code=status_code, + reason=response.reason)) + + + +def stop_err(msg): + sys.stderr.write("%s\n" % msg) + sys.exit() |