Repository 'query_tabular'
hg clone https://toolshed.g2.bx.psu.edu/repos/jjohnson/query_tabular

Changeset 0:926c62f7fa09 (2016-01-21)
Next changeset 1:c7a1a686e42b (2016-02-12)
Commit message:
planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/query_tabular commit 9ae87502ea7c3da33ecc453872c4eb2f41ecea4a-dirty
added:
query_tabular.py
query_tabular.xml
test-data/IEDB.tsv
test-data/customers.tsv
test-data/netMHC_summary.tsv
test-data/query_results.tsv
test-data/regex_results.tsv
test-data/sales.tsv
test-data/sales_results.tsv
b
diff -r 000000000000 -r 926c62f7fa09 query_tabular.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/query_tabular.py Thu Jan 21 08:23:45 2016 -0500
[
b'@@ -0,0 +1,247 @@\n+#!/usr/bin/env python\n+"""\n+"""\n+import sys\n+import re\n+import os.path\n+import json\n+import sqlite3 as sqlite\n+import optparse\n+from optparse import OptionParser\n+\n+"""\n+TODO:\n+- could read column names from comment lines, but issues with legal names\n+- could add some transformations on tabular columns,\n+  e.g. a regex to format date/time strings\n+    c2 : re.sub(\'pat\', \'sub\', c2)\n+    c3 :\n+- column_defs dict of columns to create from tabular input\n+    column_defs : { \'name1\' : \'expr\', \'name2\' : \'expr\'}\n+- allow multiple queries and outputs\n+- add a --json input for table definitions (or yaml)\n+JSON config:\n+{ tables : [\n+    { file_path : \'/home/galaxy/dataset_101.dat\',\n+            table_name : \'t1\',\n+            column_names : [\'c1\', \'c2\', \'c3\'],\n+            comment_lines : 1\n+    },\n+    { file_path : \'/home/galaxy/dataset_102.dat\',\n+            table_name : \'t2\',\n+            column_names : [\'c1\', \'c2\', \'c3\']\n+    },\n+    { file_path : \'/home/galaxy/dataset_103.dat\',\n+            table_name : \'test\',\n+            column_names : [\'c1\', \'c2\', \'c3\']\n+    }\n+    ]\n+}\n+"""\n+\n+tables_query = \\\n+    "SELECT name, sql FROM sqlite_master WHERE type=\'table\' ORDER BY name"\n+\n+\n+def getValueType(val):\n+    if val or 0. == val:\n+        try:\n+            int(val)\n+            return \'INTEGER\'\n+        except:\n+            try:\n+                float(val)\n+                return \'REAL\'\n+            except:\n+                return \'TEXT\'\n+    return None\n+\n+\n+def get_column_def(file_path, table_name, skip=0, comment_char=\'#\',\n+                   column_names=None, max_lines=100):\n+    col_pref = [\'TEXT\', \'REAL\', \'INTEGER\', None]\n+    col_types = []\n+    data_lines = 0\n+    try:\n+        with open(file_path, "r") as fh:\n+            for linenum, line in enumerate(fh):\n+                if linenum < skip:\n+                    continue\n+                if line.startswith(comment_char):\n+                    continue\n+                data_lines += 1\n+                try:\n+                    fields = line.split(\'\\t\')\n+                    while len(col_types) < len(fields):\n+                        col_types.append(None)\n+                    for i, val in enumerate(fields):\n+                        colType = getValueType(val)\n+                        if col_pref.index(colType) < col_pref.index(col_types[i]):\n+                            col_types[i] = colType\n+                except Exception, e:\n+                    print >> sys.stderr, \'Failed at line: %d err: %s\' % (linenum, e)\n+    except Exception, e:\n+        print >> sys.stderr, \'Failed: %s\' % (e)\n+    for i, col_type in enumerate(col_types):\n+        if not col_type:\n+            col_types[i] = \'TEXT\'\n+    col_names = [\'c%d\' % i for i in range(1, len(col_types) + 1)]\n+    if column_names:\n+        for i, cname in enumerate([cn.strip() for cn in column_names.split(\',\')]):\n+            if cname and i < len(col_names):\n+                col_names[i] = cname\n+    col_def = []\n+    for i, col_name in enumerate(col_names):\n+        col_def.append(\'%s %s\' % (col_names[i], col_types[i]))\n+    return col_names, col_types, col_def\n+\n+\n+def create_table(conn, file_path, table_name, skip=0, comment_char=\'#\', column_names=None):\n+    col_names, col_types, col_def = get_column_def(file_path, table_name, skip=skip, comment_char=comment_char, column_names=column_names)\n+    col_func = [float if t == \'REAL\' else int if t == \'INTEGER\' else str for t in col_types]\n+    table_def = \'CREATE TABLE %s (\\n    %s\\n);\' % (table_name, \', \\n    \'.join(col_def))\n+    # print >> sys.stdout, table_def\n+    insert_stmt = \'INSERT INTO %s(%s) VALUES(%s)\' % (table_name, \',\'.join(col_names), \',\'.join(["?" for x in col_names]))\n+    # print >> sys.stdout, insert_stmt\n+    data_lines = 0\n+    try:\n+        c = conn.cursor()\n+        c.execute(table_def)\n+        with open(file_path, "r") as fh:\n+            for linenum, line in enumerate(fh):\n+                if linenum < skip or line.startswith(comme'..b'=\'query\', default=None, help=\'SQL query\')\n+    parser.add_option(\'-Q\', \'--query_file\', dest=\'query_file\', default=None, help=\'SQL query file\')\n+    parser.add_option(\'-n\', \'--no_header\', dest=\'no_header\', action=\'store_true\', default=False, help=\'Include a column headers line\')\n+    parser.add_option(\'-o\', \'--output\', dest=\'output\', default=None, help=\'Output file for query results\')\n+    (options, args) = parser.parse_args()\n+\n+    # open sqlite connection\n+    conn = get_connection(options.sqlitedb)\n+    # determine output destination\n+    if options.output is not None:\n+        try:\n+            outputPath = os.path.abspath(options.output)\n+            outputFile = open(outputPath, \'w\')\n+        except Exception, e:\n+            print >> sys.stderr, "failed: %s" % e\n+            exit(3)\n+    else:\n+        outputFile = sys.stdout\n+\n+    # get table defs\n+    if options.tables:\n+        for ti, table in enumerate(options.tables):\n+            table_name = \'t%d\' % (ti + 1)\n+            column_names = None\n+            fields = table.split(\'=\')\n+            path = fields[0]\n+            if len(fields) > 1:\n+                names = fields[1].split(\':\')\n+                table_name = names[0] if names[0] else table_name\n+                if len(names) > 1:\n+                    column_names = names[1]\n+            # print >> sys.stdout, \'%s %s\' % (table_name, path)\n+            create_table(conn, path, table_name, column_names=column_names)\n+    if options.jsonfile:\n+        try:\n+            fh = open(options.jsonfile)\n+            tdef = json.load(fh)\n+            if \'tables\' in tdef:\n+                for ti, table in enumerate(tdef[\'tables\']):\n+                    path = table[\'file_path\']\n+                    table_name = table[\'table_name\'] if \'table_name\' in table else \'t%d\' % (ti + 1)\n+                    column_names = table[\'column_names\'] if \'column_names\' in table else None\n+                    comment_lines = table[\'comment_lines\'] if \'comment_lines\' in table else 0\n+                    create_table(conn, path, table_name, column_names=column_names, skip=comment_lines)\n+        except Exception, exc:\n+            print >> sys.stderr, "Error: %s" % exc\n+    conn.close()\n+\n+    query = None\n+    if (options.query_file is not None):\n+        with open(options.query_file, \'r\') as fh:\n+            query = \'\'\n+            for line in fh:\n+                query += line\n+    elif (options.query is not None):\n+        query = options.query\n+\n+    if (query is None):\n+        try:\n+            conn = get_connection(options.sqlitedb)\n+            c = conn.cursor()\n+            rslt = c.execute(tables_query).fetchall()\n+            for table, sql in rslt:\n+                print >> sys.stderr, "Table %s:" % table\n+                try:\n+                    col_query = \'SELECT * FROM %s LIMIT 0\' % table\n+                    cur = conn.cursor().execute(col_query)\n+                    cols = [col[0] for col in cur.description]\n+                    print >> sys.stderr, " Columns: %s" % cols\n+                except Exception, exc:\n+                    print >> sys.stderr, "Error: %s" % exc\n+        except Exception, exc:\n+            print >> sys.stderr, "Error: %s" % exc\n+        exit(0)\n+    # if not sqlite.is_read_only_query(query):\n+    #    print >> sys.stderr, "Error: Must be a read only query"\n+    #    exit(2)\n+    try:\n+        conn = get_connection(options.sqlitedb, addfunctions=True)\n+        cur = conn.cursor()\n+        results = cur.execute(query)\n+        if not options.no_header:\n+            outputFile.write("#%s\\n" % \'\\t\'.join([str(col[0]) for col in cur.description]))\n+            # yield [col[0] for col in cur.description]\n+        for i, row in enumerate(results):\n+            # yield [val for val in row]\n+            outputFile.write("%s\\n" % \'\\t\'.join([str(val) for val in row]))\n+    except Exception, exc:\n+        print >> sys.stderr, "Error: %s" % exc\n+        exit(1)\n+\n+if __name__ == "__main__":\n+    __main__()\n'
b
diff -r 000000000000 -r 926c62f7fa09 query_tabular.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/query_tabular.xml Thu Jan 21 08:23:45 2016 -0500
[
b'@@ -0,0 +1,303 @@\n+<tool id="query_tabular" name="Query Tabular" version="0.1.0">\n+    <description>using sqlite sql</description>\n+\n+    <requirements>\n+    </requirements>\n+    <stdio>\n+        <exit_code range="1:" />\n+    </stdio>\n+    <command interpreter="python"><![CDATA[\n+        query_tabular.py \n+        #if $save_db\n+        -s $sqlitedb\n+        #else\n+        -s $workdb\n+        #end if\n+        -j $table_json\n+        #*\n+        ##          #for $i,$tbl in enumerate($tables):\n+        ##            #if $tbl.table_name\n+        ##            #set $tname = $tbl.table_name\n+        ##            #else\n+        ##            #set $tname = \'t\' + str($i + 1) \n+        ##            #end if\n+        ##            #if $tbl.col_names:\n+        ##            #set $col_names = \':\' + str($tbl.col_names)\n+        ##            #else \n+        ##            #set $col_names = \'\'\n+        ##            #end if\n+        ##            -t ${tbl.table}=${tname}${$col_names}\n+        ##          #end for\n+        *#\n+        #if $sqlquery:\n+          -Q "$query_file" \n+          $no_header\n+          -o $output\n+        #end if\n+    ]]></command>\n+    <configfiles>\n+        <configfile name="query_file">\n+$sqlquery\n+        </configfile>\n+        <configfile name="table_json">\n+#import json\n+#set $jtbldef = dict()\n+#set $jtbls = []\n+#set $jtbldef[\'tables\'] = $jtbls\n+#for $i,$tbl in enumerate($tables):\n+  #set $jtbl = dict()\n+  #set $jtbl[\'file_path\'] = str($tbl.table)\n+  #if $tbl.table_name\n+  #set $tname = str($tbl.table_name)\n+  #else\n+  #set $tname = \'t\' + str($i + 1) \n+  #end if\n+  #set $jtbl[\'table_name\'] = $tname\n+  #if $tbl.col_names:\n+  #set $col_names = str($tbl.col_names)\n+  #else \n+  #set $col_names = \'\'\n+  #end if\n+  #set $jtbl[\'column_names\'] = $col_names\n+  #if str($tbl.skip_lines) != \'\':\n+    #set $jtbl[\'comment_lines\'] = $tbl.skip_lines\n+  #elif $tbl.table.metadata.comment_lines > 0:\n+    #set $jtbl[\'comment_lines\'] = int($tbl.table.metadata.comment_lines)\n+  #end if\n+  #set $jtbls += [$jtbl]\n+#end for\n+#echo $json.dumps($jtbldef)\n+        </configfile>\n+    </configfiles>\n+    <inputs>\n+        <param name="workdb" type="hidden" value="workdb.sqlite" label=""/>\n+        <repeat name="tables" title="Add tables" min="1">\n+            <param name="table" type="data" format="tabular" label="Dataset"/>\n+            <param name="table_name" type="text" value="" optional="true" label="Table name">\n+                <help>By default, tables will be named: t1,t2,...,tn</help>\n+                <validator type="regex" message="Table name should start with a letter and may contain additional letters, digits, and underscores">^[A-Za-z]\\w*$</validator>\n+            </param>\n+            <!--\n+            <param name="sel_cols" label="Include columns" type="data_column" multiple="true" data_ref="table" />\n+            -->\n+            <param name="col_names" type="text" value="" optional="true" label="Column names">\n+                <help>By default, table columns will be named: c1,c2,c3,...,cn</help>\n+                <validator type="regex" message="A List of separated by commas: Column names should start with a letter and may contain additional letters, digits, and underscores">^([A-Za-z]\\w*)?(,([A-Za-z]\\w*)?)*$</validator>\n+            </param>\n+            <param name="skip_lines" type="integer" value="" min="0" optional="true" label="Skip lines" help="Leave blank to use the datatype comment lines metadata" />\n+        </repeat>\n+        <param name="sqlquery" type="text" area="true" size="10x80" value="" optional="true" label="SQL Query">\n+                <help>By default, tables will be named: t1,t2,...,tn</help>\n+                <sanitizer sanitize="False"/>\n+                <validator type="regex" message="">^(?i)\\s*select\\s+.*\\s+from\\s+.*$</validator>\n+        </param>\n+        <param name="no_header" type="boolean" truevalue="-n" falsevalue="" checked="False" label="Omit column headers"/>\n+\n+        <param name="save_db"'..b'\n+    =========== ========== ========== ===================== ========== ============\n+    #CustomerID FirstName  LastName   Email                 DOB        Phone\n+    =========== ========== ========== ===================== ========== ============\n+    1           John       Smith      John.Smith@yahoo.com  1968-02-04 626 222-2222\n+    2           Steven     Goldfish   goldfish@fishhere.net 1974-04-04 323 455-4545\n+    3           Paula      Brown      pb@herowndomain.org   1978-05-24 416 323-3232\n+    4           James      Smith      jim@supergig.co.uk    1980-10-20 416 323-8888\n+    =========== ========== ========== ===================== ========== ============\n+  \n+   Dataset *sales*\n+  \n+    Table name: "sales"\n+  \n+    Column names: "CustomerID,Date,SaleAmount"\n+  \n+    =============  ============  ============\n+      #CustomerID    Date          SaleAmount\n+    =============  ============  ============\n+               2    2004-05-06         100.22\n+               1    2004-05-07          99.95\n+               3    2004-05-07         122.95\n+               3    2004-05-13         100.00\n+               4    2004-05-22         555.55\n+    =============  ============  ============\n+  \n+  The query\n+  \n+  ::\n+  \n+    SELECT FirstName,LastName,sum(SaleAmount) as "TotalSales" \n+    FROM customers join sales on customers.CustomerID = sales.CustomerID \n+    GROUP BY customers.CustomerID ORDER BY TotalSales DESC;\n+  \n+  Produces this tabular output:\n+  \n+    ========== ======== ==========\n+    #FirstName LastName TotalSales\n+    ========== ======== ==========\n+    James      Smith    555.55\n+    Paula      Brown    222.95\n+    Steven     Goldfish 100.22\n+    John       Smith    99.95\n+    ========== ======== ==========\n+  \n+  \n+  If the optional Table name and Column names inputs are not used, the query would be:\n+  \n+  ::\n+  \n+    SELECT t1.c2 as "FirstName", t1.c3 as "LastName", sum(t2.c3) as "TotalSales" \n+    FROM t1 join t2 on t1.c1 = t2.c1 \n+    GROUP BY t1.c1 ORDER BY TotalSales DESC;\n+  \n+  You can selectively name columns, e.g. on the customers input you could just name columns 2,3, and 5: \n+  \n+    Column names: ,FirstName,LastName,,BirthDate\n+  \n+    Results in the following data base table\n+  \n+    =========== ========== ========== ===================== ========== ============\n+    #c1         FirstName  LastName   c4                    BirthDate  c6\n+    =========== ========== ========== ===================== ========== ============\n+    1           John       Smith      John.Smith@yahoo.com  1968-02-04 626 222-2222\n+    2           Steven     Goldfish   goldfish@fishhere.net 1974-04-04 323 455-4545\n+    3           Paula      Brown      pb@herowndomain.org   1978-05-24 416 323-3232\n+    4           James      Smith      jim@supergig.co.uk    1980-10-20 416 323-8888\n+    =========== ========== ========== ===================== ========== ============\n+\n+  Regular_expression_ functions are included for: \n+\n+  ::\n+\n+    matching:      re_match(\'pattern\',column) \n+\n+    SELECT t1.FirstName, t1.LastName\n+    FROM t1\n+    WHERE re_match(\'^.*\\.(net|org)$\',c4)\n+\n+  Results:\n+\n+    =========== ==========\n+    #FirstName  LastName\n+    =========== ==========\n+    Steven      Goldfish\n+    Paula       Brown\n+    =========== ==========\n+\n+\n+  ::\n+\n+    searching:     re_search(\'pattern\',column)\n+    substituting:  re_sub(\'pattern\',\'replacement,column)\n+\n+    SELECT t1.FirstName, t1.LastName, re_sub(\'^\\d{2}(\\d{2})-(\\d\\d)-(\\d\\d)\',\'\\3/\\2/\\1\',BirthDate) as "DOB"\n+    FROM t1\n+    WHERE re_search(\'[hp]er\',c4)\n+\n+  Results:\n+\n+    =========== ========== ==========\n+    #FirstName  LastName   DOB\n+    =========== ========== ==========\n+    Steven      Goldfish   04/04/74\n+    Paula       Brown      24/05/78\n+    James       Smith      20/10/80\n+    =========== ========== ==========\n+\n+.. _Regular_expression: https://docs.python.org/release/2.7/library/re.html\n+.. _SQLite: http://www.sqlite.org/index.html\n+\n+    ]]></help>\n+</tool>\n'
b
diff -r 000000000000 -r 926c62f7fa09 test-data/IEDB.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/IEDB.tsv Thu Jan 21 08:23:45 2016 -0500
b
@@ -0,0 +1,17 @@
+#ID allele seq_num start end length peptide method percentile_rank ann_ic50 ann_rank smm_ic50 smm_rank comblib_sidney2008_score comblib_sidney2008_rank netmhcpan_ic50 netmhcpan_rank
+PPAP2C HLA-A*02:01 1 3 11 9 GMYCMVFLV Consensus (ann/smm/comblib_sidney2008) 0.2 4 0.2 3.77 0.2 7.1e-06 0.5 - -
+PPAP2C HLA-A*23:01 1 1 9 9 SFGMYCMVF Consensus (ann/smm) 0.5 67 0.5 137.54 0.5 - - - -
+PPAP2C HLA-A*23:01 1 4 12 9 MYCMVFLVK Consensus (ann/smm) 0.65 146 0.7 160.11 0.6 - - - -
+PPAP2C HLA-A*02:01 1 2 10 9 FGMYCMVFL Consensus (ann/smm/comblib_sidney2008) 2.3 222 3.1 150.01 2.3 2.14e-05 1.3 - -
+PPAP2C HLA-A*23:01 1 3 11 9 GMYCMVFLV Consensus (ann/smm) 4.95 3256 4 2706.64 5.9 - - - -
+PPAP2C HLA-A*23:01 1 2 10 9 FGMYCMVFL Consensus (ann/smm) 6.55 4423 4.9 4144.10 8.2 - - - -
+PPAP2C HLA-A*02:01 1 1 9 9 SFGMYCMVF Consensus (ann/smm/comblib_sidney2008) 45 24390 45 44989.38 39 0.01 91 - -
+PPAP2C HLA-A*02:01 1 4 12 9 MYCMVFLVK Consensus (ann/smm/comblib_sidney2008) 54 23399 41 157801.09 54 0.01 86 - -
+ADAMTSL1 HLA-A*02:01 1 1 9 9 SLDMCISGL Consensus (ann/smm/comblib_sidney2008) 1 26 1 51.65 0.9 3.02e-05 1.7 - -
+ADAMTSL1 HLA-A*23:01 1 4 12 9 MCISGLCQL Consensus (ann/smm) 6.65 5781 5.9 3626.02 7.4 - - - -
+ADAMTSL1 HLA-A*02:01 1 4 12 9 MCISGLCQL Consensus (ann/smm/comblib_sidney2008) 14 1823 6.5 2612.82 14 0.00056 24 - -
+ADAMTSL1 HLA-A*23:01 1 1 9 9 SLDMCISGL Consensus (ann/smm) 30.5 27179 34 24684.82 27 - - - -
+ADAMTSL1 HLA-A*02:01 1 2 10 9 LDMCISGLC Consensus (ann/smm/comblib_sidney2008) 42 23677 42 53716.78 41 0.01 71 - -
+ADAMTSL1 HLA-A*23:01 1 3 11 9 DMCISGLCQ Consensus (ann/smm) 64.5 34451 73 118148.99 56 - - - -
+ADAMTSL1 HLA-A*23:01 1 2 10 9 LDMCISGLC Consensus (ann/smm) 76.0 33222 62 665932.18 90 - - - -
+ADAMTSL1 HLA-A*02:01 1 3 11 9 DMCISGLCQ Consensus (ann/smm/comblib_sidney2008) 97 31630 98 639896.89 71 0.03 97 - -
b
diff -r 000000000000 -r 926c62f7fa09 test-data/customers.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/customers.tsv Thu Jan 21 08:23:45 2016 -0500
b
@@ -0,0 +1,5 @@
+#CustomerID FirstName LastName Email DOB Phone
+1 John Smith John.Smith@yahoo.com 1968-02-04 626 222-2222
+2 Steven Goldfish goldfish@fishhere.net 1974-04-04 323 455-4545
+3 Paula Brown pb@herowndomain.org 1978-05-24 416 323-3232
+4 James Smith jim@supergig.co.uk 1980-10-20 416 323-8888
b
diff -r 000000000000 -r 926c62f7fa09 test-data/netMHC_summary.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/netMHC_summary.tsv Thu Jan 21 08:23:45 2016 -0500
b
@@ -0,0 +1,9 @@
+#pos peptide logscore affinity(nM) Bind Level Protein Name Allele
+2 GMYCMVFLV 0.858 4 SB PPAP2C HLA-A02:01
+1 FGMYCMVFL 0.501 222 WB PPAP2C HLA-A02:01
+3 MYCMVFLVK 0.070 23399 PPAP2C HLA-A02:01
+0 SFGMYCMVF 0.066 24390 PPAP2C HLA-A02:01
+0 SLDMCISGL 0.698 26 SB ADAMTSL1 HLA-A02:01
+3 MCISGLCQL 0.306 1823 ADAMTSL1 HLA-A02:01
+1 LDMCISGLC 0.069 23677 ADAMTSL1 HLA-A02:01
+2 DMCISGLCQ 0.042 31630 ADAMTSL1 HLA-A02:01
b
diff -r 000000000000 -r 926c62f7fa09 test-data/query_results.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/query_results.tsv Thu Jan 21 08:23:45 2016 -0500
b
@@ -0,0 +1,17 @@
+#ID peptide start end percentile_rank logscore affinity Bind_Level
+PPAP2C GMYCMVFLV 3 11 0.2 0.858 4 SB
+PPAP2C GMYCMVFLV 3 11 4.95 0.858 4 SB
+ADAMTSL1 SLDMCISGL 1 9 1.0 0.698 26 SB
+ADAMTSL1 SLDMCISGL 1 9 30.5 0.698 26 SB
+PPAP2C FGMYCMVFL 2 10 2.3 0.501 222 WB
+PPAP2C FGMYCMVFL 2 10 6.55 0.501 222 WB
+ADAMTSL1 MCISGLCQL 4 12 6.65 0.306 1823 None
+ADAMTSL1 MCISGLCQL 4 12 14.0 0.306 1823 None
+PPAP2C MYCMVFLVK 4 12 0.65 0.07 23399 None
+PPAP2C MYCMVFLVK 4 12 54.0 0.07 23399 None
+ADAMTSL1 LDMCISGLC 2 10 42.0 0.069 23677 None
+ADAMTSL1 LDMCISGLC 2 10 76.0 0.069 23677 None
+PPAP2C SFGMYCMVF 1 9 0.5 0.066 24390 None
+PPAP2C SFGMYCMVF 1 9 45.0 0.066 24390 None
+ADAMTSL1 DMCISGLCQ 3 11 64.5 0.042 31630 None
+ADAMTSL1 DMCISGLCQ 3 11 97.0 0.042 31630 None
b
diff -r 000000000000 -r 926c62f7fa09 test-data/regex_results.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/regex_results.tsv Thu Jan 21 08:23:45 2016 -0500
b
@@ -0,0 +1,4 @@
+#FirstName LastName DOB
+Steven Goldfish 04/04/74
+Paula Brown 24/05/78
+James Smith 20/10/80
b
diff -r 000000000000 -r 926c62f7fa09 test-data/sales.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sales.tsv Thu Jan 21 08:23:45 2016 -0500
b
@@ -0,0 +1,6 @@
+#CustomerID Date SaleAmount
+2 2004-05-06 100.22
+1 2004-05-07 99.95
+3 2004-05-07 122.95
+3 2004-05-13 100.00
+4 2004-05-22 555.55
b
diff -r 000000000000 -r 926c62f7fa09 test-data/sales_results.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sales_results.tsv Thu Jan 21 08:23:45 2016 -0500
b
@@ -0,0 +1,5 @@
+#FirstName LastName TotalSales
+James Smith 555.55
+Paula Brown 222.95
+Steven Goldfish 100.22
+John Smith 99.95