Repository 'gmql_download'
hg clone https://toolshed.g2.bx.psu.edu/repos/geco-team/gmql_download

Changeset 0:35d52820e7c7 (2018-06-26)
Commit message:
planemo upload for repository https://github.com/lu-brn/gmql-galaxy commit 953ee36ceda5814dc9baa03427bc0eb4ee2e93bd-dirty
added:
dynamic_utils.py
gmql_rest.yaml
gmql_rest_datasets.py
gmql_rest_datasets_download.xml
gmql_rest_macros.xml
test-data/Example1_Archive.zip
test-data/TG/S_00000.gdm
test-data/TG/S_00000.gdm.meta
test-data/TG/S_00001.gdm
test-data/TG/S_00001.gdm.meta
test-data/TG/S_00002.gdm
test-data/TG/S_00002.gdm.meta
test-data/TG/schema.xml
test-data/cover1.gmql_query
test-data/cover2.gmql_query
test-data/difference1.gmql_query
test-data/extend1.gmql_query
test-data/group1.gmql_query
test-data/guest.gmql_user
test-data/guest2.gmql_user
test-data/join1.gmql_query
test-data/join2.gmql_query
test-data/map1.gmql_query
test-data/merge1.gmql_query
test-data/order1.gmql_query
test-data/project1.gmql_query
test-data/query.gmql_query
test-data/rep.gmql_repository
test-data/sample1.bed
test-data/sample1.bed.meta
test-data/sample2.bed
test-data/sample2.bed.meta
test-data/sample3.bed
test-data/sample3.bed.meta
test-data/select1.gmql_query
test-data/select2.gmql_query
test-data/select3.gmql_query
test-data/union1.gmql_query
utilities.py
b
diff -r 000000000000 -r 35d52820e7c7 dynamic_utils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/dynamic_utils.py Tue Jun 26 09:01:39 2018 -0400
[
b'@@ -0,0 +1,281 @@\n+#!/usr/bin/env python\n+# --------------------------------------------------------------------------------\n+# Class for the dynamic options in the GMQL tools\n+# --------------------------------------------------------------------------------\n+# Luana Brancato, luana.brancato@mail.polimi.it\n+# --------------------------------------------------------------------------------\n+\n+import sys, requests\n+\n+\n+def validate(request_context, error_map, params, inputs):\n+    """Generic validate function, it checks if the user is valid."""\n+\n+    user = params.get(\'authToken\', \'\')\n+\n+    if user:\n+        try:\n+            validate_user(user.file_name)\n+        except:\n+            error_msg = \'User has expired\'\n+            error_map[\'authToken\'] = error_msg\n+\n+\n+def validate_upload(request_context, error_map, params, inputs):\n+    """Validate function for uploading tool. It also checks the chosen ds name does not exists already."""\n+\n+    validate(request_context, error_map, params, inputs)\n+\n+    name = params.get(\'name\')\n+\n+    user = params.get(\'authToken\')\n+\n+    #This MUST be changed in the future to a parametric solution. Hopefully in the future Galaxy will allow\n+    #validation without external scripts\n+\n+    url = \'http://genomic.elet.polimi.it/gmql-rest/datasets\'\n+\n+    datasets = get(url, user=user.file_name)\n+    list_datasets = [x[\'name\'] for x in datasets[\'datasets\']]\n+\n+    if name in list_datasets:\n+        error_msg = \'Dataset already exists. Choose another name.\'\n+        error_map[\'name\'] = error_msg\n+\n+\n+def validate_variables(request_context, error_map, params, inputs):\n+    """Validate function for gmql_compositor. It checks that all queries input variables\n+    have been previously defined. """\n+\n+    validate(request_context, error_map, params, inputs)\n+\n+    output_vars = set([])\n+\n+    # TODO: Include in the check output variables eventually defined previously in another query\n+\n+    for op in params.get(\'operations\'):\n+        op_curr = op.get(\'operation\')\n+        if op_curr.get(\'input\', \'\'):\n+            input_var = op_curr.get(\'input\').get(\'input_var\', \'\')\n+            if input_var:\n+                if input_var not in output_vars:\n+                    error_msg = \'%s has not been defined yet\\n\' % (input_var)\n+                    name = \'|\'.join([\'operations_%d\' % (op.get(\'__index__\')), \'operation\', \'input\', \'input_var\'])\n+                    error_map[name] = error_msg\n+        else:\n+            for key in op_curr.keys():\n+                if key.startswith(\'input_var\'):\n+                    input_var = op_curr.get(key)\n+                    if input_var:\n+                        if input_var not in output_vars:\n+                            error_msg = \'%s has not been defined yet\\n\' % (input_var)\n+                            name = \'|\'.join([\'operations_%d\' % (op.get(\'__index__\')), \'operation\', key])\n+                            error_map[name] = error_msg\n+\n+        # Update output_vars with the result of current operation\n+        output_vars.add(op_curr.get(\'output_var\'))\n+\n+\n+def validate_user(user):\n+    """Check if the user is a valid one"""\n+\n+    if user:\n+        with open(user, \'r\') as f:\n+            valid = f.readline().rstrip(\'\\n\').split(\'\\t\')[2]\n+            if valid == \'False\':\n+                raise Exception, "User has expired"\n+\n+\n+def get_metadata_attr(user, ds, ds_list):\n+    options = []\n+\n+    try:\n+        validate_user(user)\n+        if ds_list:\n+\n+            owner = \'\'\n+\n+            with open(ds_list, \'r\') as f:\n+                for d in f.readlines():\n+                    if d.split(\'\\t\')[0] == ds:\n+                        owner = d.split(\'\\t\')[1].rstrip(\'\\n\')\n+            f.close()\n+\n+            attr_list = get_metadata(user, ds, str(owner))\n+\n+            for i, att in enumerate(attr_list[\'attributes\']):\n+                options.append((att.get(\'key\', \' \'), att.get(\'key\', \' \'), i == 0))\n+\n+            return options\n+\n+        else:\n+            return option'..b'    """It takes the tabular file with the information over the user\n+     name   authToken   valid_flag\n+     It checks if the user is still valid and extract the authToken for the REST calls"""\n+\n+    with open(input, \'r\') as f_in:\n+        user = f_in.readline().rstrip(\'\\n\').split(\'\\t\')\n+\n+    if user[2]:\n+        token = user[1]\n+    else:\n+        stop_err("This session is no longer valid")\n+\n+    return token\n+\n+\n+def get(url, user=None, response_type=\'json\'):\n+    """GET Request\n+    :param url: url where to fetch the requested resource\n+    :param user: for authenticated requests; if not provided make an unauthenticated request (es. for login)\n+    :param response_type: type of the fetched response.\n+        JSON ( Default )\n+        TEXT\n+        ZIP\n+        FILE\n+    """\n+\n+    # Set request headers\n+    headers = dict()\n+\n+    if user:\n+        headers.update({\'X-AUTH-TOKEN\': read_token(user)})\n+\n+    if response_type == \'text\':\n+        headers.update({\'Accept\': \'text/plain\'})\n+    elif response_type == \'zip\':\n+        pass\n+    elif response_type == \'file\':\n+        headers.update({\'Accept\': \'file\'})\n+    else:\n+        headers.update({\'Accept\': \'application/json\'})\n+\n+    # Make the request\n+    response = requests.get(url, headers=headers)\n+\n+    # Check returned server status\n+    status_code = response.status_code\n+\n+    # Read result. If Server OK, read according to response_type. Raise an error otherwise.\n+    if status_code == requests.codes.ok:\n+        if response_type == \'json\':\n+            return response.json()\n+        elif response_type == \'text\':\n+            return response.text\n+        else:\n+            return response\n+    elif status_code == requests.codes.unauthorized:\n+        #expire_user(user)\n+        stop_err("You are not authorized to do this. \\nPlease login first.")\n+    elif status_code == requests.codes.not_found:\n+        stop_err("Resource not found for this user.")\n+    else:\n+        stop_err("Error {code}: {reason}\\n{message}".format(code=status_code,\n+                                                            reason=response.reason,\n+                                                            message=response.content))\n+\n+def post(url, payload, user=None, params=None, content_type=\'json\', response_type=\'json\') :\n+    """ POST Request\n+    :param url: url where to post data\n+    :param payload: payload for the post request. Type is specified by content_type.\n+    :param user:  for authenticated requests; if not provided make an unauthenticated request (es. for registration)\n+    :param params: optional query parameters\n+    :param content_type\n+    :param response_type: Default is json\n+    """\n+\n+\n+    # Set request headers\n+    headers = dict()\n+\n+    if user:\n+        headers.update({\'X-AUTH-TOKEN\': read_token(user)})\n+\n+    headers.update({\'Accept\': \'application/json\'})\n+\n+    if content_type == \'text\' :\n+        headers.update({\'Content-Type\' : \'text/plain\'})\n+        response = requests.post(url, params=params, headers=headers, data=payload)\n+    elif content_type == \'multiform\' :\n+        response = requests.post(url, params=params, headers=headers, files=payload)\n+    else :\n+        headers.update({\'Content-Type\': \'application/json\'})\n+        response = requests.post(url, params=params, headers=headers, json=payload)\n+\n+    # Check returned server status\n+    status_code = response.status_code\n+\n+\n+    if status_code == requests.codes.ok :\n+       return response.json()\n+    elif status_code == requests.codes.unauthorized :\n+       #expire_user(user)\n+       stop_err("You are not authorized to do this. \\nPlease login first.")\n+    else :\n+        stop_err("Error {code}: {reason}\\n{message}".format(code=status_code,\n+                                                 reason=response.reason,\n+                                                 message=response.content))\n+\n+\n+def stop_err(msg):\n+    sys.stderr.write("%s\\n" % msg)\n+    sys.exit()\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r 35d52820e7c7 gmql_rest.yaml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gmql_rest.yaml Tue Jun 26 09:01:39 2018 -0400
[
@@ -0,0 +1,43 @@
+---
+GMQL_URL :
+    http://genomic.elet.polimi.it/gmql-rest #server to use
+access:
+    prefix: ''
+    operations:
+        user: [user]
+        guest: [guest]
+        register: [register]
+        login: [login]
+        logout: [logout]
+metadata:
+    prefix: 'metadata'
+    operations:
+        list: ['{datasetName}', filter]
+repository: 
+    prefix: datasets
+    operations:
+        list_datasets : []
+        list_samples : ['{datasetName}']
+        delete_dataset : ['{datasetName}']
+        rename_dataset : ['{datasetName}', rename, '{newDatasetName}']
+        download_zip : ['{datasetName}', zip]
+        download_sample : ['{datasetName}','{sample}',region]
+        download_meta : ['{datasetName}','{sample}',metadata]
+        upload_url : ['{datasetName}',uploadSampleUrls]
+        upload_data : ['{datasetName}',uploadSample]
+        schema : ['{datasetName}',schema]
+    params:
+        upload_url: schemaName
+        upload_data: schemaName
+query_exec: 
+    prefix: queries
+    operations:
+        compile: [compile]
+        run: [run,'{name}','{output}']
+query_monitor : 
+    prefix: jobs
+    operations:
+        jobs: []
+        log: ['{jobid}',log]
+        stop: ['{jobid}',stop]
+        status: ['{jobid}',trace]
b
diff -r 000000000000 -r 35d52820e7c7 gmql_rest_datasets.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gmql_rest_datasets.py Tue Jun 26 09:01:39 2018 -0400
[
b'@@ -0,0 +1,411 @@\n+# Galaxy plugin to REST access to the GMQL services\n+# (Datasets)\n+# ----------------------------------------------------------------------------\n+# Luana Brancato, luana.brancato@mail.polimi.it\n+# ----------------------------------------------------------------------------\n+\n+import argparse\n+\n+import tempfile\n+import json\n+from utilities import *\n+\n+module = \'repository\'\n+\n+\n+def list_datasets(user, output, saveResult=True):\n+    """Retrieve the list of available datasets"""\n+\n+    call = \'list_datasets\'\n+    url = compose_url(module,call)\n+\n+    datasets = get(url, user=user)\n+    list_datasets = datasets[\'datasets\']\n+\n+    if saveResult:\n+        with open(output,\'w\') as f:\n+            for ds in list_datasets:\n+                f.write("{name}\\t{owner}\\n".format(name=ds[\'name\'],owner=ds[\'owner\']))\n+        f.close()\n+    else:\n+        return list_datasets\n+\n+\n+def list_samples(user, output, ds):\n+    """List the samples of a given dataset"""\n+\n+    call = \'list_samples\'\n+    url = compose_url(module,call)\n+\n+    # Specify for which dataset.\n+    # If it\'s a public dataset, the \'public.\' prefix must be added to the dataset name\n+\n+    # Check if the ds is public or not\n+    owner = \'\'\n+    for d in list_datasets(user, \'\', False):\n+        if d[\'name\'] == ds :\n+            owner = d[\'owner\']\n+\n+    if (owner==\'public\'):\n+        url = url.format(datasetName=\'public.\'+ ds)\n+    else :\n+        url = url.format(datasetName=ds)\n+\n+    samples = get(url, user=user)\n+    list_s = samples[\'samples\']\n+\n+    with open(output, \'w\') as f_out:\n+        for s in list_s:\n+            f_out.write("{id}\\t{name}\\t{ext}\\n".format(id=s[\'id\'], name=s[\'name\'],ext=s[\'path\'].rsplit(\'.\',1)[1]))\n+\n+\n+def rename_dataset(user, output, ds, new):\n+    """Rename a dataset from the user\'s private space"""\n+\n+    call = \'rename_dataset\'\n+    url = compose_url(module,call)\n+    url = url.format(datasetName=ds, newDatasetName=new)\n+\n+    outcome = get(url, user=user)\n+\n+    # Return the updated list of user\'s datasets\n+    list_datasets(user, output)\n+\n+    # Write on stdout the operation outcome\n+    sys.stdout.write("Rename: {result}".format(result=outcome[\'result\']))\n+\n+\n+def delete_dataset(user, output, ds):\n+    """Delete a dataset from the user\'s private space"""\n+\n+    call = \'delete_dataset\'\n+    url = compose_url(module,call)\n+    url = url.format(datasetName=ds)\n+\n+    outcome = delete(url, user=user)\n+\n+    #Return the updated list of user\'s datasets\n+    list_datasets(user, output)\n+\n+    #Write on stdout the operation outcome\n+    sys.stdout.write("Delete: {result}".format(result=outcome[\'result\']))\n+\n+\n+def upload_samples_url(user, output, dataset, schema, samples, updatedDsList):\n+    """Upload a dataset given the urls of the samples and their schema"""\n+\n+    #Compose the url for the REST call\n+    call = \'upload_url\'\n+    url = compose_url(module,call)\n+    url = url.format(datasetName=dataset)\n+\n+    content = dict()\n+\n+    # Put back escaped \'&\'\n+    samples = samples.replace(\'__amp__\', \'&\')\n+    schema = schema.replace(\'__amp__\', \'&\')\n+\n+    # If schema type is given, add the option to the url. Otherwise, it check if the provided schema is a valid url.\n+\n+    params = dict ()\n+\n+    if schema in [\'bed\',\'bedGraph\',\'NarrowPeak\',\'BroadPeak\',\'vcf\'] :\n+        params = add_url_param(params, module, call, schema)\n+    else:\n+        #check_schema = validators.url(schema)\n+        #if isinstance(check_schema, validators.utils.ValidationFailure): stop_err("Schema URL not valid")\n+        content.update(schema_file=schema)\n+\n+\n+    # Samples are listed one per line. It lists them looking for the new line marker (\'__cn__\')\n+    samples_list = samples.split(\'__cn__\')\n+\n+    # The regexp in input can allow a final empty string. The following removes it if present.\n+    if not samples_list[-1]:\n+        samples_list.remove("")\n+\n+    # # For each sample url, check if it is valid. If at least ones is not, upload fails\n+    # # and'..b'_samples(user, temp.name, ds)\n+\n+    # Retrieve names and extensions of the samples\n+    with open(temp.name, "r") as t:\n+        samples = map(lambda x: helper_samples(x), t)\n+    t.close()\n+\n+    os.makedirs(\'samples\')\n+    os.makedirs(\'metadata\')\n+\n+    # Create a new dict containing names and actual path to files\n+\n+    for s in samples:\n+\n+        # Get the sample\n+        get_sample(user, "samples/{name}.{ext}".format(name=s[\'name\'].replace(\'_\',\'\'), ext=s[\'ext\']), ds, s[\'name\'])\n+\n+        # Get its metadata\n+        get_sample_meta(user,"metadata/{name}.meta".format(name=s[\'name\'].replace(\'_\',\'\')),ds,s[\'name\'])\n+\n+def helper_samples(s):\n+    """From a list of samples retrieve name and extension"""\n+    split = s.split(\'\\t\')\n+    sample = dict()\n+    sample.update(name=split[1])\n+    sample.update(ext=split[2].rstrip(\'\\n\'))\n+\n+    return sample\n+\n+\n+def get_schema(user, ds, file) :\n+    """Get the schema field of the input dataset and save it in file"""\n+\n+    call = "schema"\n+\n+    url = compose_url(module, call)\n+\n+    # Check if the ds is public or not\n+    owner = \'\'\n+    for d in list_datasets(user, \'\', False):\n+        if d[\'name\'] == ds :\n+            owner = d[\'owner\']\n+\n+    if (owner==\'public\'):\n+        url = url.format(datasetName=\'public.\'+ ds)\n+    else :\n+        url = url.format(datasetName=ds)\n+\n+    schema = get(url, user=user)\n+\n+\n+    with open(file,\'w\') as f_out:\n+        for f in schema[\'fields\'] :\n+            f_out.write(\'{field}\\t{type}\\n\'.format(field=f[\'name\'],type=f[\'type\']))\n+\n+\n+\n+def set_columns_names(user, ds_name, samples_file, schema_file):\n+\n+    get_schema(user,ds_name, schema_file)\n+\n+    cwd = os.getcwd().rsplit(\'/\',1)[0]\n+    file = \'/\'.join([cwd, \'galaxy.json\'])\n+\n+    with open(schema_file, \'r\') as f_in:\n+        columns = [x.split(\'\\t\') for x in f_in]\n+        column_names = [x[0] for x in columns]\n+        column_types = [x[1].rstrip(\'\\n\') for x in columns]\n+\n+    metadata = dict()\n+    metadata.update(column_names=column_names,\n+                    column_types=column_types)\n+\n+\n+    with open(file, \'w\') as f_out:\n+        with open(samples_file, \'r\') as f_in:\n+            samples_list = map(lambda x: x, f_in)\n+            samples_list.pop()\n+            for s in samples_list:\n+                config = dict()\n+                config.update(type=\'new_primary_dataset\',\n+                                  filename=s,\n+                                  metadata=metadata)\n+                f_out.write(json.dumps(config) + \'\\n\')\n+\n+\n+\n+def stop_err(msg):\n+    sys.stderr.write("%s\\n" % msg)\n+    sys.exit()\n+\n+\n+def __main__():\n+\n+    parser = argparse.ArgumentParser()\n+    parser.add_argument("output")\n+    parser.add_argument("-opt_out1")\n+    parser.add_argument("-user")\n+    parser.add_argument("-cmd")\n+    parser.add_argument("-samples")\n+    parser.add_argument("-dataset")\n+    parser.add_argument("-new_name")\n+    parser.add_argument("-schema")\n+    parser.add_argument("-add_output")\n+\n+    args = parser.parse_args()\n+\n+    if args.cmd == \'list\':\n+        list_datasets(args.user, args.output)\n+    if args.cmd == \'samples\':\n+        list_samples(args.user, args.output, args.dataset)\n+    if args.cmd == \'rename\' :\n+        rename_dataset(args.user, args.output, args.dataset, args.new_name)\n+    if args.cmd == \'delete\':\n+        delete_dataset(args.user, args.output, args.dataset)\n+    if args.cmd == \'upload_url\':\n+        upload_samples_url(args.user, args.output, args.dataset, args.schema, args.samples, args.add_output)\n+    if args.cmd == \'upload\' :\n+        upload_samples(args.user, args.output, args.dataset, args.schema, args.samples, args.add_output)\n+    if args.cmd == \'import\':\n+        import_samples(args.user, args.dataset)\n+    if args.cmd == \'download\' :\n+        download_samples(args.user,args.output,args.dataset)\n+    if args.cmd == \'schema\' :\n+        set_columns_names(args.user, args.dataset, args.samples, args.output)\n+\n+\n+if __name__ == "__main__":\n+    __main__()\n'
b
diff -r 000000000000 -r 35d52820e7c7 gmql_rest_datasets_download.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gmql_rest_datasets_download.xml Tue Jun 26 09:01:39 2018 -0400
[
@@ -0,0 +1,90 @@
+<tool id="gmql_download" name="GMLQ Import Dataset" version="0.1.1">
+  <macros>
+    <import>gmql_rest_macros.xml</import>
+  </macros>
+    <command><![CDATA[
+        #if $operation == 'import' :
+         python $__tool_directory__/gmql_rest_datasets.py '' -user=$authToken -cmd=import -dataset=${dataset}
+        #else :
+         python $__tool_directory__/gmql_rest_datasets.py $ds_archive -user=$authToken -cmd=download -dataset=${dataset}
+        #end if
+   ]]></command>
+    <code file="dynamic_utils.py">
+        <hook validate_input="validate" />
+    </code>
+    <inputs>
+        <param format="gmql_user" name="authToken" type="data" label="Select user" />
+        <param name="gmql_datasets" type="data" format="gmql_repository" label="Working Datasets"/>
+        <param name="dataset" type="select" label="Select Dataset">
+            <options from_dataset="gmql_datasets">
+                <column name="value" index="0"/>
+                <filter column="1" type="static_value" value="public" keep="false"/>
+            </options>
+        </param>
+        <param name="operation" type="select" display="radio" multiple="false" label="Select the desired action">
+            <option value="import">Import data as a collection</option>
+            <option value="zip">Download zip archive</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data format="zip" name="ds_archive" label="${dataset} Compressed Archive">
+            <filter>operation == 'zip'</filter>
+        </data>
+        <collection name="query_results_m" type="list" label="${dataset} metadata">
+            <discover_datasets pattern="(?P&lt;identifier_0&gt;[\w]+)\.(?P&lt;ext&gt;[^\._]+)?"
+                               directory="metadata"/>
+            <filter>operation == 'import'</filter>
+        </collection>
+        <collection name="query_results_s" type="list" label="${dataset}">
+            <discover_datasets pattern="(?P&lt;identifier_0&gt;[\w]+)\.(?P&lt;ext&gt;[^\._]+)?"
+                               directory="samples"/>
+            <filter>operation == 'import'</filter>
+        </collection>
+    </outputs>
+    <tests>
+        <test>
+            <param name="authToken" value="guest.gmql_user" />
+            <param name="gmql_datasets" value="rep.gmql_repository" />
+            <param name="dataset" value="Example1" />
+            <param name="operation" value="zip" />
+            <output name="ds_archive" file="Example1_Archive.zip" compare="sim_size" delta="0"/>
+        </test>
+        <test>
+            <param name="authToken" value="guest.gmql_user" />
+            <param name="gmql_datasets" value="rep.gmql_repository" />
+            <param name="dataset" value="Example1" />
+            <param name="operation" value="import" />
+            <collection name="query_results_s" type="list">
+               <metadata name="name" value="Example1" />
+               <discovered_dataset designation="sample1" file="sample1.bed"/>
+               <discovered_dataset designation="sample2" file="sample2.bed"/>
+               <discovered_dataset designation="sample3" file="sample3.bed"/>
+            </collection>
+            <collection name="query_results_m" type="list">
+               <metadata name="name" value="Example1 metadata" />
+               <discovered_dataset designation="sample1" file="sample1.bed.meta"/>
+               <discovered_dataset designation="sample2" file="sample2.bed.meta"/>
+               <discovered_dataset designation="sample3" file="sample3.bed.meta"/>
+            </collection>
+        </test>
+    </tests>
+    <help>
+It allows importing in the current Galaxy history the selected dataset.
+
+----
+
+**What it does**
+
+- **Import as Collection**: returning data consist of two collections, one for samples and one for their metadata
+- **Download ZIP Archive**: returns the dataset as a zip archive
+
+
+The user provides a list of datasets (gmql_repository file) and selects from there the dataset he wants to import.
+
+.. class:: warningmark
+
+A gmql_user authentication token is required for every action.
+
+  </help>
+  <expand macro="citations" />
+</tool>
b
diff -r 000000000000 -r 35d52820e7c7 gmql_rest_macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gmql_rest_macros.xml Tue Jun 26 09:01:39 2018 -0400
b
@@ -0,0 +1,28 @@
+<macros>
+    <xml name="citations">
+        <citations>
+            <citation type="bibtex">
+                @article{Masseroli2015,
+                author = {{Marco Masseroli, Pietro Pinoli, Francesco Venco, Abdulrahman Kaitoua, Vahid Jalili, Fernando Palluzzi, Heiko Muller, Stefano Ceri}},
+                doi = {10.1093/bioinformatics/btv048},
+                isbn = {13674811 (Electronic)},
+                issn = {14602059},
+                journal = {Bioinformatics},
+                number = {12},
+                pages = {1881-1888},
+                pmid = {25649616},
+                title = {{GenoMetric Query Language: A novel approach to large-scale genomic data management}},
+                volume = {31},
+                year = {2015}
+                }
+            </citation>
+            <citation type="bibtex">
+                @misc{BionformaticsGroup,
+                author = {{Bionformatics Group}, Politecnico di Milano},
+                title = {{GMQL Documentation}},
+                url = {http://www.bioinformatics.deib.polimi.it/genomic{\_}computing/GMQLsystem/documentation.html},
+                }
+            </citation>
+        </citations>
+    </xml>
+</macros>
\ No newline at end of file
b
diff -r 000000000000 -r 35d52820e7c7 test-data/Example1_Archive.zip
b
Binary file test-data/Example1_Archive.zip has changed
b
diff -r 000000000000 -r 35d52820e7c7 test-data/TG/S_00000.gdm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TG/S_00000.gdm Tue Jun 26 09:01:39 2018 -0400
b
@@ -0,0 +1,3 @@
+chr1 99 150 * GMQL Region 4 . . 10 7.0638 -1 -1
+chr1 119 180 * GMQL Region 3 . . 24 19.7648 -1 -1
+chr1 219 240 * GMQL Region 8 . . 13 11.2001 -1 -1
b
diff -r 000000000000 -r 35d52820e7c7 test-data/TG/S_00000.gdm.meta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TG/S_00000.gdm.meta Tue Jun 26 09:01:39 2018 -0400
b
@@ -0,0 +1,71 @@
+ID 1460
+avg_score 6
+cell AG04450
+cell_description fetal lung fibroblast
+cell_karyotype normal
+cell_lineage endoderm
+cell_orderUrl http://ccr.coriell.org/Sections/Search/Sample_Detail.aspx?Ref=AG04450
+cell_organism human
+cell_protocol Stam:AG04450_Stam_protocol.pdf
+cell_sex Female
+cell_tag AG04450
+cell_termId BTO:0000763
+cell_termUrl http://www.ebi.ac.uk/ontology-lookup/browse.do?ontName=BTO&termId=BTO%3A0000763
+cell_tier 3
+cell_tissue lung
+cell_type Cell Line
+cell_vendorId AG04450
+cell_vendorName Coriell
+composite wgEncodeUwDnase
+dataType DnaseSeq
+dataType_dataGroup Open Chromatin
+dataType_description DNaseI HS Sequencing
+dataType_label DNase-seq
+dataType_tag DNASESEQ
+dataType_type dataType
+dataVersion ENCODE June 2010 Freeze
+dateResubmitted 2010-06-17
+dateSubmitted 2010-01-07
+dateUnrestricted 2010-10-07
+dccAccession wgEncodeEH000506
+geoSampleAccession GSM736563
+grant Stam
+grant_description Stamatoyannopoulous
+grant_grantInst University of Washington
+grant_label Stamatoyannopoulous
+grant_projectName UW
+grant_tag STAM
+grant_type grant
+lab UW
+labExpId DS12255
+labVersion lmax-v1.0
+lab_description Stamatoyannopoulous - University of Washington
+lab_grantPi Stam
+lab_labInst University of Washington
+lab_labPi Stam
+lab_labPiFull John Stamatoyannopoulous
+lab_label Stamatoyannopoulous - UW
+lab_organism human
+lab_tag UW
+lab_type lab
+md5sum 1250e67a89764a7e877bca3bbee70f62
+origAssembly hg18
+patient_age 75
+project wgEncode
+replicate 2
+sex Female
+size 1.8M
+subId 1549
+tableName wgEncodeUwDnaseAg04450PkRep2
+treatment None
+treatment_description No special treatment or protocol applies
+treatment_label No treatment or prot
+treatment_tag NONE
+treatment_type control
+type narrowPeak
+url http://hgdownload.cse.ucsc.edu/goldenPath/hg19/encodeDCC/wgEncodeUwDnase/wgEncodeUwDnaseAg04450PkRep2.narrowPeak.gz
+view Peaks
+view_description Regions of enriched signal in experiment
+view_label Peaks
+view_tag PKS
+view_type view
b
diff -r 000000000000 -r 35d52820e7c7 test-data/TG/S_00001.gdm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TG/S_00001.gdm Tue Jun 26 09:01:39 2018 -0400
b
@@ -0,0 +1,2 @@
+chr1 149 180 * GMQL Region 6 . . 35 30.2764 -1 -1
+chr1 229 235 * GMQL Region 9 . . 17 11.1675 -1 -1
b
diff -r 000000000000 -r 35d52820e7c7 test-data/TG/S_00001.gdm.meta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TG/S_00001.gdm.meta Tue Jun 26 09:01:39 2018 -0400
b
@@ -0,0 +1,93 @@
+ID 1891
+antibody CTCF
+antibody_antibodyDescription Rabbit polyclonal. Antibody Target: CTCF
+antibody_lab Myers, Hardison, Snyder
+antibody_label CTCF (07-729)
+antibody_lots 1350637 DAM1472197
+antibody_orderUrl http://www.millipore.com/catalogue/item/07-729
+antibody_tag CTCF
+antibody_target CTCF
+antibody_targetClass TFSS
+antibody_targetDescription CTCF zinc finger transcription factor. A sequence specific DNA binding protein that functions as an insulator, blocking enhancer activity. It has also been suggested to block the spreading of chromatin structure in certain instances.
+antibody_targetId GeneCard:CTCF
+antibody_targetUrl http://www.genecards.org/cgi-bin/carddisp.pl?gene=CTCF
+antibody_type Antibody
+antibody_validation Human_-_CTCF_(07-729)(Western_blot,Motif_Enrichment):human_CTCF_validation_Myers.pdf Human_-_CTCF_(07-729)(Western_blot):human_CTCF_07-729_validation_Snyder.pdf Mouse_-_CTCF(Western_blot):mouse_CTCF_validation_Hardison.pdf
+antibody_vendorId 07-729
+antibody_vendorName Millipore
+avg_score 7.5
+cell AG04450
+cell_description fetal lung fibroblast
+cell_karyotype normal
+cell_lineage endoderm
+cell_orderUrl http://ccr.coriell.org/Sections/Search/Sample_Detail.aspx?Ref=AG04450
+cell_organism human
+cell_protocol Stam:AG04450_Stam_protocol.pdf
+cell_sex Male
+cell_tag AG04450
+cell_termId BTO:0000763
+cell_termUrl http://www.ebi.ac.uk/ontology-lookup/browse.do?ontName=BTO&termId=BTO%3A0000763
+cell_tier 2
+cell_tissue lung
+cell_type Cell Line
+cell_vendorId AG04450
+cell_vendorName Coriell
+composite wgEncodeUwTfbs
+control std
+controlId wgEncodeEH000930
+control_description Standard input signal for most experiments.
+control_label Standard Control
+control_tag STD
+control_type control
+dataType ChipSeq
+dataType_dataGroup TFBS & Histones
+dataType_description Chromatin IP Sequencing
+dataType_label ChIP-seq
+dataType_tag CHIPSEQ
+dataType_type dataType
+dataVersion ENCODE Jan 2011 Freeze
+dateSubmitted 2010-10-23
+dateUnrestricted 2011-07-23
+dccAccession wgEncodeEH000976
+geoSampleAccession GSM749769
+grant Stam
+grant_description Stamatoyannopoulous
+grant_grantInst University of Washington
+grant_label Stamatoyannopoulous
+grant_projectName UW
+grant_tag STAM
+grant_type grant
+lab UW
+labExpId DS16029
+labVersion lmax-v1.0
+lab_description Stamatoyannopoulous - University of Washington
+lab_grantPi Stam
+lab_labInst University of Washington
+lab_labPi Stam
+lab_labPiFull John Stamatoyannopoulous
+lab_label Stamatoyannopoulous - UW
+lab_organism human
+lab_tag UW
+lab_type lab
+md5sum 3c6b5bf3eefc28b6bab4ef37916011a1
+origAssembly hg19
+patient_age 63
+project wgEncode
+replicate 1
+setType exp
+sex Male
+size 776K
+subId 2663
+tableName wgEncodeUwTfbsAg04450CtcfStdPkRep1
+treatment None
+treatment_description No special treatment or protocol applies
+treatment_label No treatment or prot
+treatment_tag NONE
+treatment_type control
+type narrowPeak
+url http://hgdownload.cse.ucsc.edu/goldenPath/hg19/encodeDCC/wgEncodeUwTfbs/wgEncodeUwTfbsAg04450CtcfStdPkRep1.narrowPeak.gz
+view Peaks
+view_description Regions of enriched signal in experiment
+view_label Peaks
+view_tag PKS
+view_type view
b
diff -r 000000000000 -r 35d52820e7c7 test-data/TG/S_00002.gdm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TG/S_00002.gdm Tue Jun 26 09:01:39 2018 -0400
b
@@ -0,0 +1,11 @@
+chr2 99 150 * GMQL Region 10 . . 9 5.27936 -1 -1
+chr2 199 240 * GMQL Region 9 . . 22 19.6922 -1 -1
+chr2 399 440 * GMQL Region 0.9 . . 15 15.1452 -1 -1
+chr2 539 570 * GMQL Region 0.9 . . 15 15.1452 -1 -1
+chr4 49 100 + GMQL Region 0.9 . . 15 15.1452 -1 -1
+chr4 149 179 - GMQL Region 0.9 . . 15 15.1452 -1 -1
+chr4 199 249 - GMQL Region 0.9 . . 15 15.1452 -1 -1
+chr5 99 200 * GMQL Region 4 . . 15 17.1452 -1 -1
+chr5 299 340 + GMQL Region 4 . . 15 15.1452 -1 -1
+chr5 319 340 - GMQL Region 3 . . 15 17.1452 -1 -1
+chr5 379 420 - GMQL Region 3 . . 15 17.1452 -1 -1
b
diff -r 000000000000 -r 35d52820e7c7 test-data/TG/S_00002.gdm.meta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TG/S_00002.gdm.meta Tue Jun 26 09:01:39 2018 -0400
b
@@ -0,0 +1,71 @@
+ID 1459
+Info example text
+avg_score 5
+cell AG04450
+cell_description fetal lung fibroblast
+cell_karyotype normal
+cell_lineage endoderm
+cell_orderUrl http://ccr.coriell.org/Sections/Search/Sample_Detail.aspx?Ref=AG04450
+cell_organism human
+cell_protocol Stam:AG04450_Stam_protocol.pdf
+cell_sex Male
+cell_tag AG04450
+cell_termId BTO:0000763
+cell_termUrl http://www.ebi.ac.uk/ontology-lookup/browse.do?ontName=BTO&termId=BTO%3A0000763
+cell_tier 2
+cell_tissue lung
+cell_type Cell Line
+cell_vendorId AG04450
+cell_vendorName Coriell
+composite wgEncodeUwDnase
+dataType DnaseSeq
+dataType_dataGroup Open Chromatin
+dataType_description DNaseI HS Sequencing
+dataType_label DNase-seq
+dataType_tag DNASESEQ
+dataType_type dataType
+dataVersion ENCODE June 2010 Freeze
+dateResubmitted 2010-06-17
+dateSubmitted 2010-01-07
+dateUnrestricted 2010-10-07
+dccAccession wgEncodeEH000506
+geoSampleAccession GSM736514
+grant Stam
+grant_description Stamatoyannopoulous
+grant_grantInst University of Washington
+grant_label Stamatoyannopoulous
+grant_projectName UW
+grant_tag STAM
+grant_type grant
+lab UW
+labExpId DS12270
+labVersion lmax-v1.0
+lab_description Stamatoyannopoulous - University of Washington
+lab_grantPi Stam
+lab_labInst University of Washington
+lab_labPi Stam
+lab_labPiFull John Stamatoyannopoulous
+lab_label Stamatoyannopoulous - UW
+lab_organism human
+lab_tag UW
+lab_type lab
+md5sum 8f7c4f145d130385f9b5e732961b3c42
+origAssembly hg18
+project wgEncode
+replicate 1
+sex Male
+size 1.7M
+subId 1550
+tableName wgEncodeUwDnaseAg04450PkRep1
+treatment None
+treatment_description No special treatment or protocol applies
+treatment_label No treatment or prot
+treatment_tag NONE
+treatment_type control
+type narrowPeak
+url http://hgdownload.cse.ucsc.edu/goldenPath/hg19/encodeDCC/wgEncodeUwDnase/wgEncodeUwDnaseAg04450PkRep1.narrowPeak.gz
+view Peaks
+view_description Regions of enriched signal in experiment
+view_label Peaks
+view_tag PKS
+view_type view
b
diff -r 000000000000 -r 35d52820e7c7 test-data/TG/schema.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TG/schema.xml Tue Jun 26 09:01:39 2018 -0400
b
@@ -0,0 +1,18 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<gmqlSchemaCollection name="queryname_20180417_000927_TG" xmlns="http://genomic.elet.polimi.it/entities">
+ <gmqlSchema type="Peak" coordinate_system="default">
+ <field type="STRING">chr</field>
+ <field type="LONG">left</field>
+ <field type="LONG">right</field>
+ <field type="CHAR">strand</field>
+ <field type="STRING">source</field>
+ <field type="STRING">feature</field>
+ <field type="DOUBLE">score</field>
+ <field type="STRING">frame</field>
+ <field type="STRING">name</field>
+ <field type="DOUBLE">signal</field>
+ <field type="DOUBLE">pvalue</field>
+ <field type="DOUBLE">qvalue</field>
+ <field type="DOUBLE">peak</field>
+ </gmqlSchema>
+</gmqlSchemaCollection>
\ No newline at end of file
b
diff -r 000000000000 -r 35d52820e7c7 test-data/cover1.gmql_query
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cover1.gmql_query Tue Jun 26 09:01:39 2018 -0400
b
@@ -0,0 +1,2 @@
+IN = SELECT () Example_Dataset_1 ;
+OUT = COVER (1, ANY; groupby: cell, antibody_target; aggregate: min_pvalue AS MIN(pvalue)) IN ;
b
diff -r 000000000000 -r 35d52820e7c7 test-data/cover2.gmql_query
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cover2.gmql_query Tue Jun 26 09:01:39 2018 -0400
b
@@ -0,0 +1,4 @@
+IN = SELECT () Example_Dataset_1 ;
+OUT1 = FLAT (2, 4; groupby: cell) IN ;
+OUT2 = SUMMIT (2, 4; groupby: cell) IN ;
+OUT3 = HISTOGRAM (ALL / 2, (ALL + 1) / 2; groupby: antibody_target) IN ;
b
diff -r 000000000000 -r 35d52820e7c7 test-data/difference1.gmql_query
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/difference1.gmql_query Tue Jun 26 09:01:39 2018 -0400
b
@@ -0,0 +1,3 @@
+IN = SELECT () Example_Dataset_1 ;
+IN2 = SELECT () Example_Dataset_2 ;
+OUT = DIFFERENCE (exact: true; joinby: cell) IN IN2;
b
diff -r 000000000000 -r 35d52820e7c7 test-data/extend1.gmql_query
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/extend1.gmql_query Tue Jun 26 09:01:39 2018 -0400
b
@@ -0,0 +1,2 @@
+OUT = SELECT () Example_Dataset_1 ;
+OUT2 = EXTEND (avg_score AS AVG(score), max_p AS MAX(pvalue)) OUT ;
b
diff -r 000000000000 -r 35d52820e7c7 test-data/group1.gmql_query
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/group1.gmql_query Tue Jun 26 09:01:39 2018 -0400
b
@@ -0,0 +1,2 @@
+OUT = SELECT () Example_Dataset_1 ;
+OUT2 = GROUP (cell_tissue; meta_aggregates: min_tier AS MIN(cell_tier); region_keys: score; region_aggregates: min_signal AS MIN(signal)) OUT ;
b
diff -r 000000000000 -r 35d52820e7c7 test-data/guest.gmql_user
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/guest.gmql_user Tue Jun 26 09:01:39 2018 -0400
b
@@ -0,0 +1,1 @@
+Guest 833ed58f-d1d3-4963-8bf3-46e0562beac2 True
b
diff -r 000000000000 -r 35d52820e7c7 test-data/guest2.gmql_user
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/guest2.gmql_user Tue Jun 26 09:01:39 2018 -0400
b
@@ -0,0 +1,1 @@
+Guest 3134ead6-4661-4994-a7c6-2fc2ae0a56da True
b
diff -r 000000000000 -r 35d52820e7c7 test-data/join1.gmql_query
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/join1.gmql_query Tue Jun 26 09:01:39 2018 -0400
b
@@ -0,0 +1,3 @@
+IN = SELECT () Example_Dataset_1 ;
+IN2 = SELECT () Example_Dataset_2 ;
+OUT = JOIN (MD(1), UP; output: RIGHT; joinby: cell) IN IN2;
b
diff -r 000000000000 -r 35d52820e7c7 test-data/join2.gmql_query
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/join2.gmql_query Tue Jun 26 09:01:39 2018 -0400
b
@@ -0,0 +1,3 @@
+IN = SELECT () Example_Dataset_1 ;
+IN2 = SELECT () Example_Dataset_2 ;
+OUT = JOIN (DL(0); on_attributes: score, chr; output: INT; joinby: cell, provider) IN IN2;
b
diff -r 000000000000 -r 35d52820e7c7 test-data/map1.gmql_query
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/map1.gmql_query Tue Jun 26 09:01:39 2018 -0400
b
@@ -0,0 +1,3 @@
+IN = SELECT () Example_Dataset_1 ;
+IN2 = SELECT () Example_Dataset_2 ;
+OUT = MAP (avg_score AS AVG(score), min_score AS MIN(score), chr_list AS BAG(chr); count_name: mapped_n; joinby: cell_tissue) IN IN2;
b
diff -r 000000000000 -r 35d52820e7c7 test-data/merge1.gmql_query
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/merge1.gmql_query Tue Jun 26 09:01:39 2018 -0400
b
@@ -0,0 +1,2 @@
+OUT = SELECT () Example_Dataset_1 ;
+OUT2 = MERGE (groupby: EXACT(antibody_target)) OUT ;
b
diff -r 000000000000 -r 35d52820e7c7 test-data/order1.gmql_query
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/order1.gmql_query Tue Jun 26 09:01:39 2018 -0400
b
@@ -0,0 +1,2 @@
+OUT = SELECT () Example_Dataset_1 ;
+OUT2 = ORDER (ID; meta_top: 5; region_order: score DESC, start) OUT ;
b
diff -r 000000000000 -r 35d52820e7c7 test-data/project1.gmql_query
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/project1.gmql_query Tue Jun 26 09:01:39 2018 -0400
b
@@ -0,0 +1,2 @@
+OUT = SELECT () Example_Dataset_1 ;
+OUT2 = PROJECT (ALLBUT name; frame; metadata: cell; region_update: lengh AS stop - start) OUT ;
b
diff -r 000000000000 -r 35d52820e7c7 test-data/query.gmql_query
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/query.gmql_query Tue Jun 26 09:01:39 2018 -0400
b
@@ -0,0 +1,2 @@
+VAR = SELECT (grant == 'Stam') Example_Dataset_1 ;
+MATERIALIZE VAR INTO TG;
b
diff -r 000000000000 -r 35d52820e7c7 test-data/rep.gmql_repository
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rep.gmql_repository Tue Jun 26 09:01:39 2018 -0400
b
@@ -0,0 +1,42 @@
+Example1 guest_new880
+ChIA_PET public
+Example_Dataset_1 public
+Example_Dataset_2 public
+GRCh38_ANNOTATION_GENCODE public
+GRCh38_ANNOTATION_REFSEQ public
+GRCh38_ENCODE_BROAD_AUG_2017 public
+GRCh38_ENCODE_BROAD_NOV_2017 public
+GRCh38_ENCODE_NARROW_AUG_2017 public
+GRCh38_ENCODE_NARROW_NOV_2017 public
+GRCh38_TCGA_copy_number public
+GRCh38_TCGA_copy_number_masked public
+GRCh38_TCGA_gene_expression public
+GRCh38_TCGA_methylation public
+GRCh38_TCGA_miRNA_expression public
+GRCh38_TCGA_miRNA_isoform_expression public
+GRCh38_TCGA_somatic_mutation_masked public
+HG19_ANNOTATION_GENCODE public
+HG19_ANNOTATION_REFSEQ public
+HG19_BED_ANNOTATION public
+HG19_ENCODE_BROAD_AUG_2017 public
+HG19_ENCODE_BROAD_NOV_2016 public
+HG19_ENCODE_BROAD_NOV_2017 public
+HG19_ENCODE_NARROW_AUG_2017 public
+HG19_ENCODE_NARROW_NOV_2016 public
+HG19_ENCODE_NARROW_NOV_2017 public
+HG19_ROADMAP_EPIGENOMICS_BED public
+HG19_ROADMAP_EPIGENOMICS_BROADPEAK public
+HG19_TCGA_cnv public
+HG19_TCGA_dnamethylation public
+HG19_TCGA_dnaseq public
+HG19_TCGA_mirnaseq_isoform public
+HG19_TCGA_mirnaseq_mirna public
+HG19_TCGA_rnaseq_exon public
+HG19_TCGA_rnaseq_gene public
+HG19_TCGA_rnaseq_spljxn public
+HG19_TCGA_rnaseqv2_exon public
+HG19_TCGA_rnaseqv2_gene public
+HG19_TCGA_rnaseqv2_isoform public
+HG19_TCGA_rnaseqv2_spljxn public
+TADs_Aiden public
+TADs_Dixon public
b
diff -r 000000000000 -r 35d52820e7c7 test-data/sample1.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample1.bed Tue Jun 26 09:01:39 2018 -0400
b
b'@@ -0,0 +1,12475 @@\n+chr1\t10464\t10465\tCHR1_P0001_R1\t1000\t+\n+chr1\t11873\t11874\tCHR1_P0001_R2\t1000\t+\n+chr1\t13420\t13421\tCHR1_P0001_R3\t1000\t+\n+chr1\t15945\t15946\tCHR1_M0001_R10\t1000\t-\n+chr1\t17480\t17481\tCHR1_M0001_R2\t1000\t-\n+chr1\t18003\t18004\tCHR1_M0001_R9\t1000\t-\n+chr1\t18733\t18734\tCHR1_M0001_R8\t1000\t-\n+chr1\t19172\t19173\tCHR1_M0001_R6\t1000\t-\n+chr1\t19735\t19736\tCHR1_M0001_R5\t1000\t-\n+chr1\t20510\t20511\tCHR1_M0001_R7\t1000\t-\n+chr1\t24892\t24893\tCHR1_M0001_R4\t1000\t-\n+chr1\t26794\t26795\tCHR1_P0002_R1\t1000\t+\n+chr1\t29348\t29349\tCHR1_M0001_R1\t1000\t-\n+chr1\t29554\t29555\tCHR1_P0003_R1\t1000\t+\n+chr1\t29889\t29890\tCHR1_M0001_R3\t1000\t-\n+chr1\t36073\t36074\tCHR1_M0002_R1\t1000\t-\n+chr1\t65561\t65562\tCHR1_P0004_R1\t1000\t+\n+chr1\t69085\t69086\tCHR1_P0004_R2\t1000\t+\n+chr1\t105534\t105535\tCHR1_M0004_R1\t1000\t-\n+chr1\t137188\t137189\tCHR1_M0005_R2\t1000\t-\n+chr1\t139459\t139460\tCHR1_M0005_R1\t1000\t-\n+chr1\t251849\t251850\tCHR1_M0006_R1\t1000\t-\n+chr1\t261364\t261365\tCHR1_P0005_R1\t1000\t+\n+chr1\t324713\t324714\tCHR1_P0006_R1\t1000\t+\n+chr1\t348281\t348282\tCHR1_M0007_R1\t1000\t-\n+chr1\t367658\t367659\tCHR1_P0007_R1\t1000\t+\n+chr1\t420205\t420206\tCHR1_P0008_R1\t1000\t+\n+chr1\t455711\t455712\tCHR1_M0008_R1\t1000\t-\n+chr1\t462271\t462272\tCHR1_P0009_R1\t1000\t+\n+chr1\t528069\t528070\tCHR1_P0010_R1\t1000\t+\n+chr1\t564682\t564683\tCHR1_M0009_R1\t1000\t-\n+chr1\t565034\t565035\tCHR1_P0011_R1\t1000\t+\n+chr1\t565946\t565947\tCHR1_M0009_P1557\t1000\t-\n+chr1\t566776\t566777\tCHR1_P0012_R1\t1000\t+\n+chr1\t567249\t567250\tCHR1_M0009_P2860\t1000\t-\n+chr1\t568037\t568038\tCHR1_M0009_P3648\t1000\t-\n+chr1\t568209\t568210\tCHR1_P0013_R1\t1000\t+\n+chr1\t569580\t569581\tCHR1_P0014_R1\t1000\t+\n+chr1\t569810\t569811\tCHR1_M0009_P5421\t1000\t-\n+chr1\t622034\t622035\tCHR1_M0010_R1\t1000\t-\n+chr1\t664909\t664910\tCHR1_M0011_R1\t1000\t-\n+chr1\t688889\t688890\tCHR1_M0012_R1\t1000\t-\n+chr1\t714020\t714021\tCHR1_M0013_R1\t1000\t-\n+chr1\t714287\t714288\tCHR1_P0015_P38463\t1000\t+\n+chr1\t731090\t731091\tCHR1_M0014_R1\t1000\t-\n+chr1\t752750\t752751\tCHR1_P0015_R1\t1000\t+\n+chr1\t762883\t762884\tCHR1_M0015_R1\t1000\t-\n+chr1\t763067\t763068\tCHR1_P0016_R1\t1000\t+\n+chr1\t766984\t766985\tCHR1_P0017_R1\t1000\t+\n+chr1\t791390\t791391\tCHR1_P0018_R2\t1000\t+\n+chr1\t791897\t791898\tCHR1_P0018_R3\t1000\t+\n+chr1\t792894\t792895\tCHR1_P0018_R1\t1000\t+\n+chr1\t812283\t812284\tCHR1_M0016_R1\t1000\t-\n+chr1\t845442\t845443\tCHR1_P0019_P1372\t1000\t+\n+chr1\t846814\t846815\tCHR1_P0019_R1\t1000\t+\n+chr1\t847832\t847833\tCHR1_P0019_R2\t1000\t+\n+chr1\t854050\t854051\tCHR1_M0017_R2\t1000\t-\n+chr1\t856396\t856397\tCHR1_M0017_R1\t1000\t-\n+chr1\t859335\t859336\tCHR1_P0020_P1785\t1000\t+\n+chr1\t861115\t861116\tCHR1_P0020_R1\t1000\t+\n+chr1\t871145\t871146\tCHR1_P0020_R3\t1000\t+\n+chr1\t873159\t873160\tCHR1_M0018_R4\t1000\t-\n+chr1\t874671\t874672\tCHR1_P0020_R2\t1000\t+\n+chr1\t879598\t879599\tCHR1_P0020_R4\t1000\t+\n+chr1\t880512\t880513\tCHR1_M0018_R7\t1000\t-\n+chr1\t882440\t882441\tCHR1_M0018_R3\t1000\t-\n+chr1\t883985\t883986\tCHR1_M0018_R6\t1000\t-\n+chr1\t889465\t889466\tCHR1_M0018_R2\t1000\t-\n+chr1\t891576\t891577\tCHR1_M0018_R5\t1000\t-\n+chr1\t894644\t894645\tCHR1_M0018_R1\t1000\t-\n+chr1\t896005\t896006\tCHR1_P0021_R1\t1000\t+\n+chr1\t896830\t896831\tCHR1_P0021_R2\t1000\t+\n+chr1\t898720\t898721\tCHR1_P0021_R3\t1000\t+\n+chr1\t901876\t901877\tCHR1_P0022_R1\t1000\t+\n+chr1\t912021\t912022\tCHR1_M0019_R2\t1000\t-\n+chr1\t917473\t917474\tCHR1_M0019_R1\t1000\t-\n+chr1\t934406\t934407\tCHR1_M0020_R4\t1000\t-\n+chr1\t934910\t934911\tCHR1_M0020_R2\t1000\t-\n+chr1\t935458\t935459\tCHR1_M0020_R1\t1000\t-\n+chr1\t935872\t935873\tCHR1_P0023_P13000\t1000\t+\n+chr1\t936091\t936092\tCHR1_M0020_R3\t1000\t-\n+chr1\t936808\t936809\tCHR1_P0023_P12064\t1000\t+\n+chr1\t941884\t941885\tCHR1_M0020_P6414\t1000\t-\n+chr1\t948877\t948878\tCHR1_P0023_R1\t1000\t+\n+chr1\t949362\t949363\tCHR1_P0023_R2\t1000\t+\n+chr1\t955494\t955495\tCHR1_P0024_R1\t1000\t+\n+chr1\t956289\t956290\tCHR1_P0024_R13\t1000\t+\n+chr1\t956712\t956713\tCHR1_P0024_R5\t1000\t+\n+chr1\t957632\t957633\tCHR1_P0024_R4\t1000\t+\n+chr1\t957789\t957790\tCHR1_M0020_P22319\t1000\t-\n+chr1\t968862\t968863\tCHR1_M0020_P33392\t1000\t-\n+chr1\t969327\t969328\tCHR1_M0020_P33857\t1000\t-\n+chr1\t969751\t969752\tCHR1_P0024_R3\t1000\t+\n+chr1\t976294\t976295\tCHR1_P0024_R2\t1000\t+\n+chr1\t977020\t977021\tCHR1_P0024_R14\t1000\t+\n+chr1\t977552\t977553\tCHR1_P0024_R9\t1000\t+\n+chr1\t978755\t978756\tCHR1_P0024_'..b'7492547\t247492548\tCHR1_M1791_R4\t1000\t-\n+chr1\t247494616\t247494617\tCHR1_P1881_R2\t1000\t+\n+chr1\t247495113\t247495114\tCHR1_M1791_R1\t1000\t-\n+chr1\t247495205\t247495206\tCHR1_P1881_R1\t1000\t+\n+chr1\t247579479\t247579480\tCHR1_P1882_R2\t1000\t+\n+chr1\t247581358\t247581359\tCHR1_P1882_R1\t1000\t+\n+chr1\t247581945\t247581946\tCHR1_P1882_R3\t1000\t+\n+chr1\t247587920\t247587921\tCHR1_P1882_R5\t1000\t+\n+chr1\t247588635\t247588636\tCHR1_P1882_R4\t1000\t+\n+chr1\t247615284\t247615285\tCHR1_M1792_R1\t1000\t-\n+chr1\t247654429\t247654430\tCHR1_P1883_R1\t1000\t+\n+chr1\t247670419\t247670420\tCHR1_P1884_R1\t1000\t+\n+chr1\t247681164\t247681165\tCHR1_P1885_R2\t1000\t+\n+chr1\t247681679\t247681680\tCHR1_P1885_R1\t1000\t+\n+chr1\t247690174\t247690175\tCHR1_M1793_R3\t1000\t-\n+chr1\t247694106\t247694107\tCHR1_M1793_R2\t1000\t-\n+chr1\t247697141\t247697142\tCHR1_M1793_R1\t1000\t-\n+chr1\t247712442\t247712443\tCHR1_P1884_R2\t1000\t+\n+chr1\t247751661\t247751662\tCHR1_P1886_R1\t1000\t+\n+chr1\t247768887\t247768888\tCHR1_P1887_R1\t1000\t+\n+chr1\t247803044\t247803045\tCHR1_P1888_R1\t1000\t+\n+chr1\t247836343\t247836344\tCHR1_M1794_R1\t1000\t-\n+chr1\t247876057\t247876058\tCHR1_M1795_R1\t1000\t-\n+chr1\t247901916\t247901917\tCHR1_P1889_R1\t1000\t+\n+chr1\t247921334\t247921335\tCHR1_M1796_R2\t1000\t-\n+chr1\t247921906\t247921907\tCHR1_M1796_R1\t1000\t-\n+chr1\t247937985\t247937986\tCHR1_P1890_R1\t1000\t+\n+chr1\t247979031\t247979032\tCHR1_M1797_R1\t1000\t-\n+chr1\t248005198\t248005199\tCHR1_M1798_R1\t1000\t-\n+chr1\t248020538\t248020539\tCHR1_P1891.1_R1\t1000\t+\n+chr1\t248023917\t248023918\tCHR1_P1891.1_R2\t1000\t+\n+chr1\t248031276\t248031277\tCHR1_P1891.2_R1\t1000\t+\n+chr1\t248058888\t248058889\tCHR1_P1891.2_R2\t1000\t+\n+chr1\t248084319\t248084320\tCHR1_P1892_R1\t1000\t+\n+chr1\t248100494\t248100495\tCHR1_P1893_R1\t1000\t+\n+chr1\t248112159\t248112160\tCHR1_P1894_R1\t1000\t+\n+chr1\t248128633\t248128634\tCHR1_P1895_R1\t1000\t+\n+chr1\t248153940\t248153941\tCHR1_P1896_R1\t1000\t+\n+chr1\t248201569\t248201570\tCHR1_P1897_R1\t1000\t+\n+chr1\t248201940\t248201941\tCHR1_P1897_R2\t1000\t+\n+chr1\t248223983\t248223984\tCHR1_P1898_R1\t1000\t+\n+chr1\t248262665\t248262666\tCHR1_P1893_R2\t1000\t+\n+chr1\t248285607\t248285608\tCHR1_P1899_R1\t1000\t+\n+chr1\t248308449\t248308450\tCHR1_P1900_R1\t1000\t+\n+chr1\t248343287\t248343288\tCHR1_P1901_R1\t1000\t+\n+chr1\t248366369\t248366370\tCHR1_P1902_R1\t1000\t+\n+chr1\t248402230\t248402231\tCHR1_P1903_R1\t1000\t+\n+chr1\t248402604\t248402605\tCHR1_P1903_R2\t1000\t+\n+chr1\t248437116\t248437117\tCHR1_M1799_R1\t1000\t-\n+chr1\t248458880\t248458881\tCHR1_M1800_R1\t1000\t-\n+chr1\t248487870\t248487871\tCHR1_M1801_R1\t1000\t-\n+chr1\t248512076\t248512077\tCHR1_P1904_R1\t1000\t+\n+chr1\t248524882\t248524883\tCHR1_P1905_R1\t1000\t+\n+chr1\t248550909\t248550910\tCHR1_P1906_R1\t1000\t+\n+chr1\t248569295\t248569296\tCHR1_P1907_R1\t1000\t+\n+chr1\t248616098\t248616099\tCHR1_P1908_R1\t1000\t+\n+chr1\t248636651\t248636652\tCHR1_P1909_R1\t1000\t+\n+chr1\t248651889\t248651890\tCHR1_P1910_R1\t1000\t+\n+chr1\t248684947\t248684948\tCHR1_P1911_R1\t1000\t+\n+chr1\t248722774\t248722775\tCHR1_M1803_R1\t1000\t-\n+chr1\t248738058\t248738059\tCHR1_M1804_R1\t1000\t-\n+chr1\t248757069\t248757070\tCHR1_M1805_R1\t1000\t-\n+chr1\t248790429\t248790430\tCHR1_M1806_R1\t1000\t-\n+chr1\t248802559\t248802560\tCHR1_M1807_R1\t1000\t-\n+chr1\t248814185\t248814186\tCHR1_M1808_R1\t1000\t-\n+chr1\t248845605\t248845606\tCHR1_M1809_R1\t1000\t-\n+chr1\t248885507\t248885508\tCHR1_M1810_R1\t1000\t-\n+chr1\t249077603\t249077604\tCHR1_P1912_R1\t1000\t+\n+chr1\t249105099\t249105100\tCHR1_M1811_R8\t1000\t-\n+chr1\t249106484\t249106485\tCHR1_M1811_R7\t1000\t-\n+chr1\t249108372\t249108373\tCHR1_M1811_R6\t1000\t-\n+chr1\t249110861\t249110862\tCHR1_M1811_R3\t1000\t-\n+chr1\t249119240\t249119241\tCHR1_M1811_R4\t1000\t-\n+chr1\t249119841\t249119842\tCHR1_M1811_R5\t1000\t-\n+chr1\t249120110\t249120111\tCHR1_M1811_R1\t1000\t-\n+chr1\t249120851\t249120852\tCHR1_M1811_R2\t1000\t-\n+chr1\t249132511\t249132512\tCHR1_P1913_R1\t1000\t+\n+chr1\t249132910\t249132911\tCHR1_M1811_P12012\t1000\t-\n+chr1\t249133269\t249133270\tCHR1_P1913_R6\t1000\t+\n+chr1\t249139842\t249139843\tCHR1_P1913_R4\t1000\t+\n+chr1\t249140249\t249140250\tCHR1_P1913_R2\t1000\t+\n+chr1\t249141584\t249141585\tCHR1_P1913_R3\t1000\t+\n+chr1\t249142171\t249142172\tCHR1_P1913_R5\t1000\t+\n+chr1\t249200437\t249200438\tCHR1_P1914_R1\t1000\t+\n+chr1\t249208722\t249208723\tCHR1_P1915_R1\t1000\t+\n'
b
diff -r 000000000000 -r 35d52820e7c7 test-data/sample1.bed.meta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample1.bed.meta Tue Jun 26 09:01:39 2018 -0400
b
@@ -0,0 +1,3 @@
+ann_type TSS
+provider UCSC
+assembly hg19
b
diff -r 000000000000 -r 35d52820e7c7 test-data/sample2.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample2.bed Tue Jun 26 09:01:39 2018 -0400
b
@@ -0,0 +1,111 @@
+chr1 3190581 3191428 element_705 900
+chr1 8130439 8131887 element_1833 900
+chr1 10593123 10594209 element_677 200
+chr1 10732070 10733118 element_289 900
+chr1 10757664 10758631 element_361 200
+chr1 10781239 10781744 element_389 900
+chr1 10851570 10852173 element_408 900
+chr1 10925202 10925728 element_417 200
+chr1 10965129 10966144 element_241 900
+chr1 27049620 27050905 element_569 900
+chr1 32510030 32510913 element_645 900
+chr1 33722494 33723960 element_917 200
+chr1 33828789 33830947 element_1388 900
+chr1 38494689 38495688 element_1105 200
+chr1 38560194 38561606 element_238 900
+chr1 38627529 38629265 element_1124 200
+chr1 38656780 38657626 element_1179 200
+chr1 38662809 38664040 element_790 200
+chr1 38735942 38737376 element_1008 200
+chr1 38791998 38793333 element_1098 200
+chr1 38801888 38802630 element_270 200
+chr1 38819171 38820577 element_1045 200
+chr1 39192609 39194134 element_1031 900
+chr1 39248757 39250129 element_1139 900
+chr1 39291500 39292441 element_850 200
+chr1 41710596 41712937 element_1674 200
+chr1 44500383 44503337 element_1857 900
+chr1 44715420 44716129 element_277 900
+chr1 44989824 44991149 element_280 900
+chr1 49112757 49113363 element_295 200
+chr1 51006090 51007314 element_247 200
+chr1 51034546 51036289 element_194 900
+chr1 51098841 51099913 element_197 200
+chr1 51165195 51166786 element_200 900
+chr1 54925046 54928826 element_1487 900
+chr1 59522325 59524092 element_1485 900
+chr1 60102624 60105491 element_1609 200
+chr1 61086857 61087871 element_683 200
+chr1 61917795 61920190 element_1309 900
+chr1 62045460 62048159 element_1484 900
+chr1 62053433 62055908 element_1450 900
+chr1 63369349 63370894 element_311 900
+chr1 63443534 63444884 element_168 900
+chr1 63464282 63465717 element_169 900
+chr1 63665219 63666122 element_764 900
+chr1 82557436 82558140 element_436 200
+chr1 82663553 82664196 element_437 200
+chr1 82664608 82665419 element_438 200
+chr1 83220709 83221363 element_439 200
+chr1 83252219 83253218 element_440 200
+chr1 83310680 83311769 element_441 200
+chr1 83345366 83346271 element_442 900
+chr1 83360512 83361298 element_443 200
+chr1 83411289 83412040 element_444 200
+chr1 83878319 83879217 element_445 200
+chr1 87795192 87796737 element_809 900
+chr1 87803415 87805212 element_1134 200
+chr1 87821621 87823082 element_174 900
+chr1 87821793 87822910 element_322 900
+chr1 88028658 88029378 element_323 200
+chr1 88065041 88066530 element_1217 900
+chr1 88108084 88109396 element_1002 900
+chr1 88183654 88184961 element_1058 200
+chr1 88402821 88404888 element_1055 200
+chr1 88535719 88538390 element_1198 900
+chr1 88577535 88578821 element_1200 200
+chr1 88595049 88596320 element_1135 200
+chr1 88615687 88616927 element_878 900
+chr1 88646698 88648145 element_1216 200
+chr1 88686076 88687740 element_1107 900
+chr1 88841735 88843091 element_1068 200
+chr1 88875731 88877192 element_988 200
+chr1 88926796 88928508 element_327 900
+chr1 91305562 91307215 element_612 900
+chr1 92271540 92273987 element_1499 200
+chr1 97610491 97611741 element_671 900
+chr1 113540056 113542020 element_1672 900
+chr1 119028026 119029955 element_1428 900
+chr1 119452558 119453594 element_499 200
+chr1 163359231 163360494 element_1156 200
+chr1 163441941 163442842 element_762 900
+chr1 163507965 163509139 element_1185 900
+chr1 163939700 163940637 element_201 200
+chr1 164023244 164024214 element_202 200
+chr1 164197827 164199172 element_1230 200
+chr1 164620038 164621164 element_1235 900
+chr1 164637575 164639037 element_203 200
+chr1 164668592 164669823 element_1144 900
+chr1 164672787 164674206 element_970 200
+chr1 164700259 164701522 element_1136 200
+chr1 167296954 167299046 element_1331 900
+chr1 169910396 169913079 element_1442 900
+chr1 181121049 181123654 element_1862 900
+chr1 198263562 198265742 element_1322 900
+chr1 198339402 198341607 element_1443 200
+chr1 204423958 204424935 element_1368 900
+chr1 209064233 209067059 element_1694 200
+chr1 209989050 209989824 element_932 900
+chr1 210433698 210437258 element_1617 200
+chr1 213498112 213501134 element_1324 900
+chr1 213597964 213599524 element_204 900
+chr1 215888921 215890374 element_366 200
+chr1 216772416 216773458 element_175 200
+chr1 217766122 217767351 element_734 200
+chr1 218207432 218208498 element_1257 900
+chr1 218222277 218224086 element_1255 200
+chr1 221906778 221908480 element_1663 200
+chr1 232753930 232757436 element_1714 200
+chr1 243876467 243877893 element_545 900
+chr1 243895796 243896468 element_214 200
+chr1 244217325 244218426 element_476 200
b
diff -r 000000000000 -r 35d52820e7c7 test-data/sample2.bed.meta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample2.bed.meta Tue Jun 26 09:01:39 2018 -0400
b
@@ -0,0 +1,4 @@
+ann_type enhancer
+provider UCSC
+name VistaEnhancers
+assembly hg19
b
diff -r 000000000000 -r 35d52820e7c7 test-data/sample3.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample3.bed Tue Jun 26 09:01:39 2018 -0400
b
b'@@ -0,0 +1,1096 @@\n+chr1\t324287\t324345\tNR_028327\t0\t+\r\n+chr1\t567704\t567793\tNR_106781\t0\t-\r\n+chr1\t764382\t764484\tNR_047522\t0\t+\r\n+chr1\t901876\t901994\tNM_032129\t0\t+\r\n+chr1\t1021257\t1021392\tNM_017891\t0\t-\r\n+chr1\t1139778\t1139866\tNM_148901\t0\t-\r\n+chr1\t1163847\t1164326\tNM_016547\t0\t-\r\n+chr1\t1178441\t1178532\tNM_001014980\t0\t-\r\n+chr1\t1230097\t1230196\tNM_030649\t0\t-\r\n+chr1\t1231489\t1231550\tNR_106784\t0\t-\r\n+chr1\t1248414\t1248504\tNM_017871\t0\t-\r\n+chr1\t1394540\t1394611\tNM_001039211\t0\t+\r\n+chr1\t1447522\t1447853\tNM_001170535\t0\t+\r\n+chr1\t1461840\t1461911\tNM_001170536\t0\t+\r\n+chr1\t1480242\t1480382\tNM_014188\t0\t-\r\n+chr1\t1551887\t1551994\tNM_001170686\t0\t+\r\n+chr1\t1563652\t1563779\tNM_001170688\t0\t+\r\n+chr1\t1573123\t1573245\tNM_033490\t0\t-\r\n+chr1\t1576408\t1576474\tNM_033490\t0\t-\r\n+chr1\t1637080\t1637171\tNM_033529\t0\t-\r\n+chr1\t1647784\t1647917\tNM_033489\t0\t-\r\n+chr1\t1670674\t1671143\tNM_182838\t0\t-\r\n+chr1\t1670674\t1671143\tNM_001199787\t0\t-\r\n+chr1\t1682670\t1684499\tNM_023018\t0\t-\r\n+chr1\t1688619\t1688749\tNM_001198993\t0\t-\r\n+chr1\t2066700\t2066786\tNM_002744\t0\t+\r\n+chr1\t2066700\t2066786\tNM_001033581\t0\t+\r\n+chr1\t2522080\t2522528\tNM_033467\t0\t-\r\n+chr1\t3322058\t3322212\tNM_199454\t0\t+\r\n+chr1\t3643678\t3643788\tNM_001204187\t0\t+\r\n+chr1\t3656796\t3656951\tNR_033712\t0\t-\r\n+chr1\t4475612\t4475647\tNR_027088\t0\t+\r\n+chr1\t5923949\t5924093\tNM_001291594\t0\t-\r\n+chr1\t5993206\t5993389\tNM_001291594\t0\t-\r\n+chr1\t6008129\t6008311\tNM_001291593\t0\t-\r\n+chr1\t6052303\t6052533\tNR_111987\t0\t-\r\n+chr1\t6156695\t6156816\tNM_001199863\t0\t+\r\n+chr1\t6156695\t6156816\tNM_003636\t0\t+\r\n+chr1\t6185159\t6185293\tNM_015557\t0\t-\r\n+chr1\t6188558\t6188669\tNM_015557\t0\t-\r\n+chr1\t6270929\t6270998\tNM_207396\t0\t+\r\n+chr1\t6523131\t6523187\tNM_003790\t0\t-\r\n+chr1\t6529101\t6529301\tNM_001265592\t0\t-\r\n+chr1\t6529394\t6529510\tNM_001042663\t0\t-\r\n+chr1\t6529603\t6529736\tNM_001042665\t0\t-\r\n+chr1\t6531049\t6531160\tNM_198681\t0\t-\r\n+chr1\t6537588\t6537718\tNM_001265593\t0\t-\r\n+chr1\t6630968\t6631275\tNM_177540\t0\t+\r\n+chr1\t6638712\t6639817\tNM_177540\t0\t+\r\n+chr1\t6692855\t6693642\tNM_001195752\t0\t+\r\n+chr1\t6885151\t6885270\tNM_001242701\t0\t+\r\n+chr1\t7848104\t7848306\tNM_001289863\t0\t+\r\n+chr1\t8418255\t8418976\tNM_012102\t0\t-\r\n+chr1\t8424805\t8424898\tNM_001042681\t0\t-\r\n+chr1\t9034405\t9034503\tNM_001270500\t0\t+\r\n+chr1\t9085034\t9085133\tNM_207420\t0\t-\r\n+chr1\t9294862\t9295125\tNM_004285\t0\t+\r\n+chr1\t9630315\t9630416\tNM_032315\t0\t+\r\n+chr1\t9770481\t9770654\tNM_005026\t0\t+\r\n+chr1\t9780799\t9780967\tNM_005026\t0\t+\r\n+chr1\t9789078\t9790763\tNM_014944\t0\t-\r\n+chr1\t9794029\t9794200\tNM_014944\t0\t-\r\n+chr1\t9797555\t9797612\tNM_014944\t0\t-\r\n+chr1\t9932026\t9932146\tNM_020248\t0\t-\r\n+chr1\t10057254\t10057389\tNM_052960\t0\t+\r\n+chr1\t10190557\t10190674\tNM_006048\t0\t+\r\n+chr1\t10336378\t10336457\tNM_015074\t0\t+\r\n+chr1\t10356954\t10357135\tNM_015074\t0\t+\r\n+chr1\t10399826\t10399917\tNM_015074\t0\t+\r\n+chr1\t10402107\t10402226\tNM_015074\t0\t+\r\n+chr1\t10713433\t10714275\tNM_001079843\t0\t-\r\n+chr1\t10719758\t10720593\tNM_001079843\t0\t-\r\n+chr1\t11107259\t11107296\tNM_006610\t0\t-\r\n+chr1\t11116660\t11116814\tNM_003132\t0\t-\r\n+chr1\t11148179\t11148255\tNM_001001998\t0\t-\r\n+chr1\t11189794\t11189895\tNM_004958\t0\t-\r\n+chr1\t11272852\t11272965\tNM_004958\t0\t-\r\n+chr1\t11316048\t11316249\tNM_004958\t0\t-\r\n+chr1\t11721186\t11723384\tNM_183412\t0\t+\r\n+chr1\t11736102\t11736197\tNM_006341\t0\t-\r\n+chr1\t11796141\t11796292\tNM_001040195\t0\t+\r\n+chr1\t11879544\t11879611\tNM_001286\t0\t+\r\n+chr1\t11905603\t11905823\tNR_037806\t0\t+\r\n+chr1\t12170097\t12170261\tNM_001281430\t0\t+\r\n+chr1\t12251830\t12251980\tNM_001066\t0\t+\r\n+chr1\t12304302\t12304493\tNM_015378\t0\t+\r\n+chr1\t12326937\t12327068\tNM_018156\t0\t+\r\n+chr1\t12328762\t12328933\tNM_015378\t0\t+\r\n+chr1\t12374171\t12374384\tNM_018156\t0\t+\r\n+chr1\t12382592\t12382803\tNM_015378\t0\t+\r\n+chr1\t12851545\t12851623\tNM_023013\t0\t+\r\n+chr1\t13196244\t13196556\tNR_111945\t0\t+\r\n+chr1\t13388457\t13389033\tNM_001012276\t0\t-\r\n+chr1\t13673433\t13673511\tNM_001024661\t0\t-\r\n+chr1\t13942399\t13944452\tNM_198389\t0\t+\r\n+chr1\t13942399\t13944452\tNM_001006624\t0\t+\r\n+chr1\t14026734\t14026795\tNM_001135610\t0\t+\r\n+chr1\t14059273\t14059377\tNM_015866\t0\t+\r\n+chr1\t14059273\t14059377\tNM_001135610\t0\t+\r\n+chr1\t14068499\t14068652\tNM_012231\t0\t+\r\n+chr1\t15382586\t15382776\tNM_001018001\t0\t+\r\n+chr1\t15671911\t15672019\tNM_052929\t0\t+\r\n+chr1\t15689137\t15689222\tNM_052929\t0\t'..b'r1\t222902961\t222903043\tNM_144695\t0\t+\r\n+chr1\t223814690\t223814859\tNM_001143962\t0\t-\r\n+chr1\t223905463\t223905533\tNM_001748\t0\t+\r\n+chr1\t223946971\t223947183\tNM_001146068\t0\t+\r\n+chr1\t223949282\t223949319\tNM_001748\t0\t+\r\n+chr1\t223962535\t223963720\tNM_001748\t0\t+\r\n+chr1\t224477185\t224477435\tNM_206840\t0\t-\r\n+chr1\t224619178\t224619283\tNM_025160\t0\t-\r\n+chr1\t224922264\t224922408\tNM_152495\t0\t+\r\n+chr1\t225147854\t225148004\tNM_001145154\t0\t+\r\n+chr1\t225270250\t225270441\tNM_001373\t0\t+\r\n+chr1\t225393672\t225393881\tNM_001373\t0\t+\r\n+chr1\t225609778\t225609979\tNM_194442\t0\t-\r\n+chr1\t225616407\t225616557\tNM_194442\t0\t-\r\n+chr1\t226043578\t226043641\tNM_014698\t0\t-\r\n+chr1\t226066919\t226067110\tNM_014698\t0\t-\r\n+chr1\t226843780\t226844975\tNR_103784\t0\t-\r\n+chr1\t227182523\t227182681\tNM_003607\t0\t-\r\n+chr1\t227204627\t227204751\tNM_014826\t0\t-\r\n+chr1\t227257477\t227257554\tNM_003607\t0\t-\r\n+chr1\t228290599\t228290750\tNM_024319\t0\t-\r\n+chr1\t228327784\t228327860\tNM_001242839\t0\t+\r\n+chr1\t228351786\t228352193\tNR_103540\t0\t-\r\n+chr1\t228473778\t228474051\tNM_001098623\t0\t+\r\n+chr1\t228504409\t228504685\tNM_001098623\t0\t+\r\n+chr1\t228527645\t228527802\tNM_052843\t0\t+\r\n+chr1\t228557642\t228557795\tNM_001098623\t0\t+\r\n+chr1\t228566318\t228566575\tNM_001271223\t0\t+\r\n+chr1\t228601497\t228601593\tNM_016102\t0\t-\r\n+chr1\t228699883\t228699989\tNM_001287262\t0\t-\r\n+chr1\t229593907\t229594043\tNM_018230\t0\t-\r\n+chr1\t230379051\t230379173\tNM_004481\t0\t+\r\n+chr1\t230798886\t230798967\tNM_007357\t0\t+\r\n+chr1\t230891082\t230891152\tNM_016452\t0\t+\r\n+chr1\t231057166\t231057250\tNM_024525\t0\t-\r\n+chr1\t231061262\t231061393\tNM_001122835\t0\t-\r\n+chr1\t231067043\t231067218\tNM_001122835\t0\t-\r\n+chr1\t231069524\t231069607\tNM_024525\t0\t-\r\n+chr1\t231902885\t231903015\tNM_001164556\t0\t+\r\n+chr1\t231906580\t231906816\tNM_001012957\t0\t+\r\n+chr1\t231906580\t231907408\tNM_001164549\t0\t+\r\n+chr1\t232144530\t232144795\tNM_001164537\t0\t+\r\n+chr1\t232162180\t232162298\tNM_001164537\t0\t+\r\n+chr1\t232172437\t232177019\tNM_001164537\t0\t+\r\n+chr1\t232551239\t232551371\tNM_020808\t0\t-\r\n+chr1\t232940637\t232946092\tNM_019090\t0\t+\r\n+chr1\t233388386\t233388534\tNM_014801\t0\t-\r\n+chr1\t234509975\t234510135\tNM_001206641\t0\t+\r\n+chr1\t234546190\t234546277\tNM_005646\t0\t-\r\n+chr1\t235277082\t235277225\tNM_014765\t0\t-\r\n+chr1\t235318195\t235318427\tNM_001161533\t0\t-\r\n+chr1\t235330209\t235331967\tNM_001206794\t0\t-\r\n+chr1\t235335932\t235336079\tNM_031371\t0\t-\r\n+chr1\t235377083\t235377341\tNM_016374\t0\t-\r\n+chr1\t235383616\t235383860\tNM_001206794\t0\t-\r\n+chr1\t235386480\t235386575\tNM_016374\t0\t-\r\n+chr1\t235403627\t235403766\tNM_001206794\t0\t-\r\n+chr1\t235599858\t235599923\tNM_001079515\t0\t+\r\n+chr1\t236016299\t236016360\tNR_031718\t0\t-\r\n+chr1\t236388369\t236388439\tNM_019891\t0\t-\r\n+chr1\t236719399\t236719558\tNM_018072\t0\t-\r\n+chr1\t236767721\t236767841\tNM_018072\t0\t-\r\n+chr1\t236988637\t236988699\tNM_000254\t0\t+\r\n+chr1\t236990128\t236990196\tNM_001291940\t0\t+\r\n+chr1\t237001713\t237001899\tNM_001291939\t0\t+\r\n+chr1\t237057656\t237057857\tNM_000254\t0\t+\r\n+chr1\t237586391\t237586548\tNM_001035\t0\t+\r\n+chr1\t237755038\t237755153\tNM_001035\t0\t+\r\n+chr1\t237850754\t237850804\tNM_001035\t0\t+\r\n+chr1\t237862264\t237862325\tNM_001035\t0\t+\r\n+chr1\t240975217\t240975343\tNM_001282773\t0\t-\r\n+chr1\t241033354\t241033419\tNM_002924\t0\t-\r\n+chr1\t241146378\t241146429\tNM_001282775\t0\t-\r\n+chr1\t241676902\t241677013\tNM_000143\t0\t-\r\n+chr1\t242020646\t242020784\tNM_003686\t0\t+\r\n+chr1\t242024707\t242024804\tNM_006027\t0\t+\r\n+chr1\t242030131\t242030357\tNM_003686\t0\t+\r\n+chr1\t243332929\t243333056\tNM_001042404\t0\t-\r\n+chr1\t243716030\t243716245\tNM_005465\t0\t-\r\n+chr1\t243776972\t243777041\tNM_005465\t0\t-\r\n+chr1\t243776972\t243777041\tNM_001206729\t0\t-\r\n+chr1\t244541864\t244541941\tNM_001276348\t0\t+\r\n+chr1\t245025762\t245025836\tNM_031844\t0\t-\r\n+chr1\t245912641\t245912966\tNM_022743\t0\t-\r\n+chr1\t246805050\t246805072\tNM_152609\t0\t+\r\n+chr1\t247020992\t247021116\tNM_015446\t0\t-\r\n+chr1\t247108848\t247109129\tNR_037894\t0\t-\r\n+chr1\t247108848\t247109129\tNR_037892\t0\t-\r\n+chr1\t247473000\t247473108\tNM_032752\t0\t-\r\n+chr1\t247586531\t247586651\tNM_001243133\t0\t+\r\n+chr1\t247670359\t247670535\tNM_001281838\t0\t+\r\n+chr1\t247693433\t247695842\tNM_198074\t0\t-\r\n+chr1\t247701977\t247702093\tNM_001281834\t0\t+\r\n+chr1\t247712346\t247712522\tNM_145278\t0\t+\r\n+chr1\t249120033\t249120154\tNM_030645\t0\t-\r\n+\t\t\t\t\t\r\n'
b
diff -r 000000000000 -r 35d52820e7c7 test-data/sample3.bed.meta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample3.bed.meta Tue Jun 26 09:01:39 2018 -0400
b
@@ -0,0 +1,4 @@
+ann_type exons
+provider RefSeq
+name RefSeqGeneExons
+assembly hg19
b
diff -r 000000000000 -r 35d52820e7c7 test-data/select1.gmql_query
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/select1.gmql_query Tue Jun 26 09:01:39 2018 -0400
b
@@ -0,0 +1,1 @@
+OUT = SELECT (grant == 'Stam') Example_Dataset_1 ;
b
diff -r 000000000000 -r 35d52820e7c7 test-data/select2.gmql_query
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/select2.gmql_query Tue Jun 26 09:01:39 2018 -0400
b
@@ -0,0 +1,1 @@
+OUT = SELECT (patient_age == '64' AND cell == '8988T'; region: score > 3) Example_Dataset_1 ;
b
diff -r 000000000000 -r 35d52820e7c7 test-data/select3.gmql_query
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/select3.gmql_query Tue Jun 26 09:01:39 2018 -0400
b
@@ -0,0 +1,2 @@
+OUT = SELECT (patient_age == '64' AND cell == '8988T'; region: score > 3) Example_Dataset_1 ;
+OUT2 = SELECT (semijoin: cell IN OUT) Example_Dataset_2 ;
b
diff -r 000000000000 -r 35d52820e7c7 test-data/union1.gmql_query
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/union1.gmql_query Tue Jun 26 09:01:39 2018 -0400
b
@@ -0,0 +1,3 @@
+IN = SELECT () Example_Dataset_1 ;
+IN2 = SELECT () Example_Dataset_2 ;
+OUT = UNION () IN IN2;
b
diff -r 000000000000 -r 35d52820e7c7 utilities.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/utilities.py Tue Jun 26 09:01:39 2018 -0400
[
@@ -0,0 +1,226 @@
+# Helper functions to perform REST calls on the GMQL server.
+# ----------------------------------------------------------------------------
+# Luana Brancato, luana.brancato@mail.polimi.it
+# ----------------------------------------------------------------------------
+
+import sys
+import os
+import yaml
+import requests
+
+
+def load_parts(module, call) :
+    """Given the module and the single operation, returns the fragments for the url to call"""
+
+    y_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),'gmql_rest.yaml')
+
+    with open(y_path,'r') as yamlf :
+        cfg = yaml.load(yamlf)
+
+    parts = list ()
+
+    gmql = cfg['GMQL_URL']
+    prefix = cfg[module]['prefix']
+    op = cfg[module]['operations'][call]
+
+    parts.append(gmql)
+    if prefix :
+        parts.append(prefix)
+
+    for p in op :
+        parts.append(p)
+
+    return parts
+
+def compose_url(module, call) :
+    """Given the fragments of a url, return the composite one"""
+
+    parts = load_parts(module,call)
+    url = '/'.join(parts)
+
+    return url
+
+def add_url_param(params, module, op, value,) :
+    """Given the params dict, add a new pair of key:value with the given value and the key set for given module and operation"""
+
+    y_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'gmql_rest.yaml')
+
+    with open(y_path, 'r') as yamlf:
+        cfg = yaml.load(yamlf)
+    yamlf.close()
+
+    key = cfg[module]['params'][op]
+
+    params.update({key : value})
+
+    return params
+
+
+def read_token(input):
+    """It takes the tabular file with the information over the user
+     name   authToken   valid_flag
+     It checks if the user is still valid and extract the authToken for the REST calls"""
+
+    with open(input,'r') as f_in :
+        user = f_in.readline().rstrip('\n').split('\t')
+
+    if user[2] :
+        token = user[1]
+    else :
+        stop_err("This session is no longer valid")
+
+
+    return token
+
+def expire_user(input):
+    """Set the validity flag of a user token to false"""
+
+    with open(input,'r') as f:
+        user = f.readline().rstrip('\n').split('\t')
+
+    user[2] = False
+
+    with open(input,'w') as f :
+         f.write('{fullName}\t{token}\t{valid}\n'.format(fullName=user[0], token=user[1],
+                                                            valid=user[2]))
+
+
+def get(url, user=None, response_type='json') :
+    """GET Request
+    :param url: url where to fetch the requested resource
+    :param user: for authenticated requests; if not provided make an unauthenticated request (es. for login)
+    :param response_type: type of the fetched response.
+        JSON ( Default )
+        TEXT
+        ZIP
+        FILE
+    """
+
+    #Set request headers
+    headers = dict ()
+
+    if user :
+        headers.update({'X-AUTH-TOKEN' : read_token(user)})
+
+    if response_type == 'text' :
+        headers.update({'Accept' : 'text/plain'})
+    elif response_type == 'zip' :
+        pass
+    elif response_type == 'file' :
+        headers.update({'Accept' : 'file'})
+    else :
+        headers.update({'Accept' : 'application/json'})
+
+
+    #Make the request
+    response = requests.get(url, headers=headers)
+
+    #Check returned server status
+    status_code = response.status_code
+
+    #Read result. If Server OK, read according to response_type. Raise an error otherwise.
+    if status_code == requests.codes.ok :
+        if response_type == 'json' :
+            return response.json()
+        elif response_type == 'text' :
+            return response.text
+        else :
+            return response
+    elif status_code == requests.codes.unauthorized :
+        expire_user(user)
+        stop_err("You are not authorized to do this. \nPlease login first.")
+    elif status_code == requests.codes.not_found :
+        stop_err("Resource not found for this user.")
+    else :
+        stop_err("Error {code}: {reason}\n{message}".format(code=status_code,
+                                                            reason=response.reason,
+                                                            message=response.content))
+
+
+def post(url, payload, user=None, params=None, content_type='json', response_type='json') :
+    """ POST Request
+    :param url: url where to post data
+    :param payload: payload for the post request. Type is specified by content_type.
+    :param user:  for authenticated requests; if not provided make an unauthenticated request (es. for registration)
+    :param params: optional query parameters
+    :param content_type
+    :param response_type: Default is json
+    """
+
+
+    # Set request headers
+    headers = dict()
+
+    if user:
+        headers.update({'X-AUTH-TOKEN': read_token(user)})
+
+    headers.update({'Accept': 'application/json'})
+
+    if content_type == 'text' :
+        headers.update({'Content-Type' : 'text/plain'})
+        response = requests.post(url, params=params, headers=headers, data=payload)
+    elif content_type == 'multiform' :
+        response = requests.post(url, params=params, headers=headers, files=payload)
+    else :
+        headers.update({'Content-Type': 'application/json'})
+        response = requests.post(url, params=params, headers=headers, json=payload)
+
+    # Check returned server status
+    status_code = response.status_code
+
+
+    if status_code == requests.codes.ok :
+       return response.json()
+    elif status_code == requests.codes.unauthorized :
+       content = response.content
+       if content.__contains__("The username or password you entered don't match") :
+           stop_err("The username or password you entered don't match")
+       else:
+           expire_user(user)
+           stop_err("You are not authorized to do this. \nPlease login first.")
+    else :
+        stop_err("Error {code}: {reason}\n{message}".format(code=status_code,
+                                                 reason=response.reason,
+                                                 message=response.content))
+
+
+
+def delete(url, user=None, response_type='json') :
+    """DELETE request
+    :param url: url where to post data
+    :param user:  for authenticated requests; if not provided make an unauthenticated request (es. for registration)
+    :param response_type: Default is json
+    """
+
+    # Set request headers
+    headers = dict()
+
+    if user:
+        headers.update({'X-AUTH-TOKEN': read_token(user)})
+
+    headers.update({'Accept': 'application/json'})
+
+    #Make the request
+    response = requests.delete(url, headers=headers)
+
+    #Check returned server status
+    status_code = response.status_code
+
+
+    #If Server OK, read result. Raise an error otherwise.
+    if status_code == requests.codes.ok :
+            return response.json()
+    elif status_code == requests.codes.unauthorized :
+        expire_user(user)
+        stop_err("You are not authorized to do this. \nPlease login first.")
+    elif status_code == requests.codes.not_found :
+        stop_err("Resource not found for this user.")
+    else :
+        stop_err("Error {code}: {reason}".format(code=status_code,
+                                                 reason=response.reason))
+
+
+
+def stop_err(msg):
+    sys.stderr.write("%s\n" % msg)
+    sys.exit()