Galaxy |

Changeset 0:926c62f7fa09 (2016-01-21)

Next changeset 1:c7a1a686e42b (2016-02-12)

Commit message:
planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/query_tabular commit 9ae87502ea7c3da33ecc453872c4eb2f41ecea4a-dirty

added:
query_tabular.py
query_tabular.xml
test-data/IEDB.tsv
test-data/customers.tsv
test-data/netMHC_summary.tsv
test-data/query_results.tsv
test-data/regex_results.tsv
test-data/sales.tsv
test-data/sales_results.tsv

diff -r 000000000000 -r 926c62f7fa09 query_tabular.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/query_tabular.py Thu Jan 21 08:23:45 2016 -0500

[

b'@@ -0,0 +1,247 @@\n+#!/usr/bin/env python\n+"""\n+"""\n+import sys\n+import re\n+import os.path\n+import json\n+import sqlite3 as sqlite\n+import optparse\n+from optparse import OptionParser\n+\n+"""\n+TODO:\n+- could read column names from comment lines, but issues with legal names\n+- could add some transformations on tabular columns,\n+ e.g. a regex to format date/time strings\n+ c2 : re.sub(\'pat\', \'sub\', c2)\n+ c3 :\n+- column_defs dict of columns to create from tabular input\n+ column_defs : { \'name1\' : \'expr\', \'name2\' : \'expr\'}\n+- allow multiple queries and outputs\n+- add a --json input for table definitions (or yaml)\n+JSON config:\n+{ tables : [\n+ { file_path : \'/home/galaxy/dataset_101.dat\',\n+ table_name : \'t1\',\n+ column_names : [\'c1\', \'c2\', \'c3\'],\n+ comment_lines : 1\n+ },\n+ { file_path : \'/home/galaxy/dataset_102.dat\',\n+ table_name : \'t2\',\n+ column_names : [\'c1\', \'c2\', \'c3\']\n+ },\n+ { file_path : \'/home/galaxy/dataset_103.dat\',\n+ table_name : \'test\',\n+ column_names : [\'c1\', \'c2\', \'c3\']\n+ }\n+ ]\n+}\n+"""\n+\n+tables_query = \\\n+ "SELECT name, sql FROM sqlite_master WHERE type=\'table\' ORDER BY name"\n+\n+\n+def getValueType(val):\n+ if val or 0. == val:\n+ try:\n+ int(val)\n+ return \'INTEGER\'\n+ except:\n+ try:\n+ float(val)\n+ return \'REAL\'\n+ except:\n+ return \'TEXT\'\n+ return None\n+\n+\n+def get_column_def(file_path, table_name, skip=0, comment_char=\'#\',\n+ column_names=None, max_lines=100):\n+ col_pref = [\'TEXT\', \'REAL\', \'INTEGER\', None]\n+ col_types = []\n+ data_lines = 0\n+ try:\n+ with open(file_path, "r") as fh:\n+ for linenum, line in enumerate(fh):\n+ if linenum < skip:\n+ continue\n+ if line.startswith(comment_char):\n+ continue\n+ data_lines += 1\n+ try:\n+ fields = line.split(\'\\t\')\n+ while len(col_types) < len(fields):\n+ col_types.append(None)\n+ for i, val in enumerate(fields):\n+ colType = getValueType(val)\n+ if col_pref.index(colType) < col_pref.index(col_types[i]):\n+ col_types[i] = colType\n+ except Exception, e:\n+ print >> sys.stderr, \'Failed at line: %d err: %s\' % (linenum, e)\n+ except Exception, e:\n+ print >> sys.stderr, \'Failed: %s\' % (e)\n+ for i, col_type in enumerate(col_types):\n+ if not col_type:\n+ col_types[i] = \'TEXT\'\n+ col_names = [\'c%d\' % i for i in range(1, len(col_types) + 1)]\n+ if column_names:\n+ for i, cname in enumerate([cn.strip() for cn in column_names.split(\',\')]):\n+ if cname and i < len(col_names):\n+ col_names[i] = cname\n+ col_def = []\n+ for i, col_name in enumerate(col_names):\n+ col_def.append(\'%s %s\' % (col_names[i], col_types[i]))\n+ return col_names, col_types, col_def\n+\n+\n+def create_table(conn, file_path, table_name, skip=0, comment_char=\'#\', column_names=None):\n+ col_names, col_types, col_def = get_column_def(file_path, table_name, skip=skip, comment_char=comment_char, column_names=column_names)\n+ col_func = [float if t == \'REAL\' else int if t == \'INTEGER\' else str for t in col_types]\n+ table_def = \'CREATE TABLE %s (\\n %s\\n);\' % (table_name, \', \\n \'.join(col_def))\n+ # print >> sys.stdout, table_def\n+ insert_stmt = \'INSERT INTO %s(%s) VALUES(%s)\' % (table_name, \',\'.join(col_names), \',\'.join(["?" for x in col_names]))\n+ # print >> sys.stdout, insert_stmt\n+ data_lines = 0\n+ try:\n+ c = conn.cursor()\n+ c.execute(table_def)\n+ with open(file_path, "r") as fh:\n+ for linenum, line in enumerate(fh):\n+ if linenum < skip or line.startswith(comme'..b'=\'query\', default=None, help=\'SQL query\')\n+ parser.add_option(\'-Q\', \'--query_file\', dest=\'query_file\', default=None, help=\'SQL query file\')\n+ parser.add_option(\'-n\', \'--no_header\', dest=\'no_header\', action=\'store_true\', default=False, help=\'Include a column headers line\')\n+ parser.add_option(\'-o\', \'--output\', dest=\'output\', default=None, help=\'Output file for query results\')\n+ (options, args) = parser.parse_args()\n+\n+ # open sqlite connection\n+ conn = get_connection(options.sqlitedb)\n+ # determine output destination\n+ if options.output is not None:\n+ try:\n+ outputPath = os.path.abspath(options.output)\n+ outputFile = open(outputPath, \'w\')\n+ except Exception, e:\n+ print >> sys.stderr, "failed: %s" % e\n+ exit(3)\n+ else:\n+ outputFile = sys.stdout\n+\n+ # get table defs\n+ if options.tables:\n+ for ti, table in enumerate(options.tables):\n+ table_name = \'t%d\' % (ti + 1)\n+ column_names = None\n+ fields = table.split(\'=\')\n+ path = fields[0]\n+ if len(fields) > 1:\n+ names = fields[1].split(\':\')\n+ table_name = names[0] if names[0] else table_name\n+ if len(names) > 1:\n+ column_names = names[1]\n+ # print >> sys.stdout, \'%s %s\' % (table_name, path)\n+ create_table(conn, path, table_name, column_names=column_names)\n+ if options.jsonfile:\n+ try:\n+ fh = open(options.jsonfile)\n+ tdef = json.load(fh)\n+ if \'tables\' in tdef:\n+ for ti, table in enumerate(tdef[\'tables\']):\n+ path = table[\'file_path\']\n+ table_name = table[\'table_name\'] if \'table_name\' in table else \'t%d\' % (ti + 1)\n+ column_names = table[\'column_names\'] if \'column_names\' in table else None\n+ comment_lines = table[\'comment_lines\'] if \'comment_lines\' in table else 0\n+ create_table(conn, path, table_name, column_names=column_names, skip=comment_lines)\n+ except Exception, exc:\n+ print >> sys.stderr, "Error: %s" % exc\n+ conn.close()\n+\n+ query = None\n+ if (options.query_file is not None):\n+ with open(options.query_file, \'r\') as fh:\n+ query = \'\'\n+ for line in fh:\n+ query += line\n+ elif (options.query is not None):\n+ query = options.query\n+\n+ if (query is None):\n+ try:\n+ conn = get_connection(options.sqlitedb)\n+ c = conn.cursor()\n+ rslt = c.execute(tables_query).fetchall()\n+ for table, sql in rslt:\n+ print >> sys.stderr, "Table %s:" % table\n+ try:\n+ col_query = \'SELECT * FROM %s LIMIT 0\' % table\n+ cur = conn.cursor().execute(col_query)\n+ cols = [col[0] for col in cur.description]\n+ print >> sys.stderr, " Columns: %s" % cols\n+ except Exception, exc:\n+ print >> sys.stderr, "Error: %s" % exc\n+ except Exception, exc:\n+ print >> sys.stderr, "Error: %s" % exc\n+ exit(0)\n+ # if not sqlite.is_read_only_query(query):\n+ # print >> sys.stderr, "Error: Must be a read only query"\n+ # exit(2)\n+ try:\n+ conn = get_connection(options.sqlitedb, addfunctions=True)\n+ cur = conn.cursor()\n+ results = cur.execute(query)\n+ if not options.no_header:\n+ outputFile.write("#%s\\n" % \'\\t\'.join([str(col[0]) for col in cur.description]))\n+ # yield [col[0] for col in cur.description]\n+ for i, row in enumerate(results):\n+ # yield [val for val in row]\n+ outputFile.write("%s\\n" % \'\\t\'.join([str(val) for val in row]))\n+ except Exception, exc:\n+ print >> sys.stderr, "Error: %s" % exc\n+ exit(1)\n+\n+if __name__ == "__main__":\n+ __main__()\n'

diff -r 000000000000 -r 926c62f7fa09 query_tabular.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/query_tabular.xml Thu Jan 21 08:23:45 2016 -0500

[

b'@@ -0,0 +1,303 @@\n+<tool id="query_tabular" name="Query Tabular" version="0.1.0">\n+ <description>using sqlite sql</description>\n+\n+ <requirements>\n+ </requirements>\n+ <stdio>\n+ <exit_code range="1:" />\n+ </stdio>\n+ <command interpreter="python"><![CDATA[\n+ query_tabular.py \n+ #if $save_db\n+ -s $sqlitedb\n+ #else\n+ -s $workdb\n+ #end if\n+ -j $table_json\n+ #*\n+ ## #for $i,$tbl in enumerate($tables):\n+ ## #if $tbl.table_name\n+ ## #set $tname = $tbl.table_name\n+ ## #else\n+ ## #set $tname = \'t\' + str($i + 1) \n+ ## #end if\n+ ## #if $tbl.col_names:\n+ ## #set $col_names = \':\' + str($tbl.col_names)\n+ ## #else \n+ ## #set $col_names = \'\'\n+ ## #end if\n+ ## -t ${tbl.table}=${tname}${$col_names}\n+ ## #end for\n+ *#\n+ #if $sqlquery:\n+ -Q "$query_file" \n+ $no_header\n+ -o $output\n+ #end if\n+ ]]></command>\n+ <configfiles>\n+ <configfile name="query_file">\n+$sqlquery\n+ </configfile>\n+ <configfile name="table_json">\n+#import json\n+#set $jtbldef = dict()\n+#set $jtbls = []\n+#set $jtbldef[\'tables\'] = $jtbls\n+#for $i,$tbl in enumerate($tables):\n+ #set $jtbl = dict()\n+ #set $jtbl[\'file_path\'] = str($tbl.table)\n+ #if $tbl.table_name\n+ #set $tname = str($tbl.table_name)\n+ #else\n+ #set $tname = \'t\' + str($i + 1) \n+ #end if\n+ #set $jtbl[\'table_name\'] = $tname\n+ #if $tbl.col_names:\n+ #set $col_names = str($tbl.col_names)\n+ #else \n+ #set $col_names = \'\'\n+ #end if\n+ #set $jtbl[\'column_names\'] = $col_names\n+ #if str($tbl.skip_lines) != \'\':\n+ #set $jtbl[\'comment_lines\'] = $tbl.skip_lines\n+ #elif $tbl.table.metadata.comment_lines > 0:\n+ #set $jtbl[\'comment_lines\'] = int($tbl.table.metadata.comment_lines)\n+ #end if\n+ #set $jtbls += [$jtbl]\n+#end for\n+#echo $json.dumps($jtbldef)\n+ </configfile>\n+ </configfiles>\n+ <inputs>\n+ <param name="workdb" type="hidden" value="workdb.sqlite" label=""/>\n+ <repeat name="tables" title="Add tables" min="1">\n+ <param name="table" type="data" format="tabular" label="Dataset"/>\n+ <param name="table_name" type="text" value="" optional="true" label="Table name">\n+ <help>By default, tables will be named: t1,t2,...,tn</help>\n+ <validator type="regex" message="Table name should start with a letter and may contain additional letters, digits, and underscores">^[A-Za-z]\\w*$</validator>\n+ </param>\n+ \n+ <param name="col_names" type="text" value="" optional="true" label="Column names">\n+ <help>By default, table columns will be named: c1,c2,c3,...,cn</help>\n+ <validator type="regex" message="A List of separated by commas: Column names should start with a letter and may contain additional letters, digits, and underscores">^([A-Za-z]\\w*)?(,([A-Za-z]\\w*)?)*$</validator>\n+ </param>\n+ <param name="skip_lines" type="integer" value="" min="0" optional="true" label="Skip lines" help="Leave blank to use the datatype comment lines metadata" />\n+ </repeat>\n+ <param name="sqlquery" type="text" area="true" size="10x80" value="" optional="true" label="SQL Query">\n+ <help>By default, tables will be named: t1,t2,...,tn</help>\n+ <sanitizer sanitize="False"/>\n+ <validator type="regex" message="">^(?i)\\s*select\\s+.*\\s+from\\s+.*$</validator>\n+ </param>\n+ <param name="no_header" type="boolean" truevalue="-n" falsevalue="" checked="False" label="Omit column headers"/>\n+\n+ <param name="save_db"'..b'\n+ =========== ========== ========== ===================== ========== ============\n+ #CustomerID FirstName LastName Email DOB Phone\n+ =========== ========== ========== ===================== ========== ============\n+ 1 John Smith John.Smith@yahoo.com 1968-02-04 626 222-2222\n+ 2 Steven Goldfish goldfish@fishhere.net 1974-04-04 323 455-4545\n+ 3 Paula Brown pb@herowndomain.org 1978-05-24 416 323-3232\n+ 4 James Smith jim@supergig.co.uk 1980-10-20 416 323-8888\n+ =========== ========== ========== ===================== ========== ============\n+ \n+ Dataset *sales*\n+ \n+ Table name: "sales"\n+ \n+ Column names: "CustomerID,Date,SaleAmount"\n+ \n+ ============= ============ ============\n+ #CustomerID Date SaleAmount\n+ ============= ============ ============\n+ 2 2004-05-06 100.22\n+ 1 2004-05-07 99.95\n+ 3 2004-05-07 122.95\n+ 3 2004-05-13 100.00\n+ 4 2004-05-22 555.55\n+ ============= ============ ============\n+ \n+ The query\n+ \n+ ::\n+ \n+ SELECT FirstName,LastName,sum(SaleAmount) as "TotalSales" \n+ FROM customers join sales on customers.CustomerID = sales.CustomerID \n+ GROUP BY customers.CustomerID ORDER BY TotalSales DESC;\n+ \n+ Produces this tabular output:\n+ \n+ ========== ======== ==========\n+ #FirstName LastName TotalSales\n+ ========== ======== ==========\n+ James Smith 555.55\n+ Paula Brown 222.95\n+ Steven Goldfish 100.22\n+ John Smith 99.95\n+ ========== ======== ==========\n+ \n+ \n+ If the optional Table name and Column names inputs are not used, the query would be:\n+ \n+ ::\n+ \n+ SELECT t1.c2 as "FirstName", t1.c3 as "LastName", sum(t2.c3) as "TotalSales" \n+ FROM t1 join t2 on t1.c1 = t2.c1 \n+ GROUP BY t1.c1 ORDER BY TotalSales DESC;\n+ \n+ You can selectively name columns, e.g. on the customers input you could just name columns 2,3, and 5: \n+ \n+ Column names: ,FirstName,LastName,,BirthDate\n+ \n+ Results in the following data base table\n+ \n+ =========== ========== ========== ===================== ========== ============\n+ #c1 FirstName LastName c4 BirthDate c6\n+ =========== ========== ========== ===================== ========== ============\n+ 1 John Smith John.Smith@yahoo.com 1968-02-04 626 222-2222\n+ 2 Steven Goldfish goldfish@fishhere.net 1974-04-04 323 455-4545\n+ 3 Paula Brown pb@herowndomain.org 1978-05-24 416 323-3232\n+ 4 James Smith jim@supergig.co.uk 1980-10-20 416 323-8888\n+ =========== ========== ========== ===================== ========== ============\n+\n+ Regular_expression_ functions are included for: \n+\n+ ::\n+\n+ matching: re_match(\'pattern\',column) \n+\n+ SELECT t1.FirstName, t1.LastName\n+ FROM t1\n+ WHERE re_match(\'^.*\\.(net|org)$\',c4)\n+\n+ Results:\n+\n+ =========== ==========\n+ #FirstName LastName\n+ =========== ==========\n+ Steven Goldfish\n+ Paula Brown\n+ =========== ==========\n+\n+\n+ ::\n+\n+ searching: re_search(\'pattern\',column)\n+ substituting: re_sub(\'pattern\',\'replacement,column)\n+\n+ SELECT t1.FirstName, t1.LastName, re_sub(\'^\\d{2}(\\d{2})-(\\d\\d)-(\\d\\d)\',\'\\3/\\2/\\1\',BirthDate) as "DOB"\n+ FROM t1\n+ WHERE re_search(\'[hp]er\',c4)\n+\n+ Results:\n+\n+ =========== ========== ==========\n+ #FirstName LastName DOB\n+ =========== ========== ==========\n+ Steven Goldfish 04/04/74\n+ Paula Brown 24/05/78\n+ James Smith 20/10/80\n+ =========== ========== ==========\n+\n+.. _Regular_expression: https://docs.python.org/release/2.7/library/re.html\n+.. _SQLite: http://www.sqlite.org/index.html\n+\n+ ]]></help>\n+</tool>\n'

diff -r 000000000000 -r 926c62f7fa09 test-data/IEDB.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/IEDB.tsv Thu Jan 21 08:23:45 2016 -0500

@@ -0,0 +1,17 @@
+#ID allele seq_num start end length peptide method percentile_rank ann_ic50 ann_rank smm_ic50 smm_rank comblib_sidney2008_score comblib_sidney2008_rank netmhcpan_ic50 netmhcpan_rank
+PPAP2C HLA-A*02:01 1 3 11 9 GMYCMVFLV Consensus (ann/smm/comblib_sidney2008) 0.2 4 0.2 3.77 0.2 7.1e-06 0.5 - -
+PPAP2C HLA-A*23:01 1 1 9 9 SFGMYCMVF Consensus (ann/smm) 0.5 67 0.5 137.54 0.5 - - - -
+PPAP2C HLA-A*23:01 1 4 12 9 MYCMVFLVK Consensus (ann/smm) 0.65 146 0.7 160.11 0.6 - - - -
+PPAP2C HLA-A*02:01 1 2 10 9 FGMYCMVFL Consensus (ann/smm/comblib_sidney2008) 2.3 222 3.1 150.01 2.3 2.14e-05 1.3 - -
+PPAP2C HLA-A*23:01 1 3 11 9 GMYCMVFLV Consensus (ann/smm) 4.95 3256 4 2706.64 5.9 - - - -
+PPAP2C HLA-A*23:01 1 2 10 9 FGMYCMVFL Consensus (ann/smm) 6.55 4423 4.9 4144.10 8.2 - - - -
+PPAP2C HLA-A*02:01 1 1 9 9 SFGMYCMVF Consensus (ann/smm/comblib_sidney2008) 45 24390 45 44989.38 39 0.01 91 - -
+PPAP2C HLA-A*02:01 1 4 12 9 MYCMVFLVK Consensus (ann/smm/comblib_sidney2008) 54 23399 41 157801.09 54 0.01 86 - -
+ADAMTSL1 HLA-A*02:01 1 1 9 9 SLDMCISGL Consensus (ann/smm/comblib_sidney2008) 1 26 1 51.65 0.9 3.02e-05 1.7 - -
+ADAMTSL1 HLA-A*23:01 1 4 12 9 MCISGLCQL Consensus (ann/smm) 6.65 5781 5.9 3626.02 7.4 - - - -
+ADAMTSL1 HLA-A*02:01 1 4 12 9 MCISGLCQL Consensus (ann/smm/comblib_sidney2008) 14 1823 6.5 2612.82 14 0.00056 24 - -
+ADAMTSL1 HLA-A*23:01 1 1 9 9 SLDMCISGL Consensus (ann/smm) 30.5 27179 34 24684.82 27 - - - -
+ADAMTSL1 HLA-A*02:01 1 2 10 9 LDMCISGLC Consensus (ann/smm/comblib_sidney2008) 42 23677 42 53716.78 41 0.01 71 - -
+ADAMTSL1 HLA-A*23:01 1 3 11 9 DMCISGLCQ Consensus (ann/smm) 64.5 34451 73 118148.99 56 - - - -
+ADAMTSL1 HLA-A*23:01 1 2 10 9 LDMCISGLC Consensus (ann/smm) 76.0 33222 62 665932.18 90 - - - -
+ADAMTSL1 HLA-A*02:01 1 3 11 9 DMCISGLCQ Consensus (ann/smm/comblib_sidney2008) 97 31630 98 639896.89 71 0.03 97 - -

diff -r 000000000000 -r 926c62f7fa09 test-data/customers.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/customers.tsv Thu Jan 21 08:23:45 2016 -0500

@@ -0,0 +1,5 @@
+#CustomerID FirstName LastName Email DOB Phone
+1 John Smith John.Smith@yahoo.com 1968-02-04 626 222-2222
+2 Steven Goldfish goldfish@fishhere.net 1974-04-04 323 455-4545
+3 Paula Brown pb@herowndomain.org 1978-05-24 416 323-3232
+4 James Smith jim@supergig.co.uk 1980-10-20 416 323-8888

diff -r 000000000000 -r 926c62f7fa09 test-data/netMHC_summary.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/netMHC_summary.tsv Thu Jan 21 08:23:45 2016 -0500

@@ -0,0 +1,9 @@
+#pos peptide logscore affinity(nM) Bind Level Protein Name Allele
+2 GMYCMVFLV 0.858 4 SB PPAP2C HLA-A02:01
+1 FGMYCMVFL 0.501 222 WB PPAP2C HLA-A02:01
+3 MYCMVFLVK 0.070 23399 PPAP2C HLA-A02:01
+0 SFGMYCMVF 0.066 24390 PPAP2C HLA-A02:01
+0 SLDMCISGL 0.698 26 SB ADAMTSL1 HLA-A02:01
+3 MCISGLCQL 0.306 1823 ADAMTSL1 HLA-A02:01
+1 LDMCISGLC 0.069 23677 ADAMTSL1 HLA-A02:01
+2 DMCISGLCQ 0.042 31630 ADAMTSL1 HLA-A02:01

diff -r 000000000000 -r 926c62f7fa09 test-data/query_results.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/query_results.tsv Thu Jan 21 08:23:45 2016 -0500

@@ -0,0 +1,17 @@
+#ID peptide start end percentile_rank logscore affinity Bind_Level
+PPAP2C GMYCMVFLV 3 11 0.2 0.858 4 SB
+PPAP2C GMYCMVFLV 3 11 4.95 0.858 4 SB
+ADAMTSL1 SLDMCISGL 1 9 1.0 0.698 26 SB
+ADAMTSL1 SLDMCISGL 1 9 30.5 0.698 26 SB
+PPAP2C FGMYCMVFL 2 10 2.3 0.501 222 WB
+PPAP2C FGMYCMVFL 2 10 6.55 0.501 222 WB
+ADAMTSL1 MCISGLCQL 4 12 6.65 0.306 1823 None
+ADAMTSL1 MCISGLCQL 4 12 14.0 0.306 1823 None
+PPAP2C MYCMVFLVK 4 12 0.65 0.07 23399 None
+PPAP2C MYCMVFLVK 4 12 54.0 0.07 23399 None
+ADAMTSL1 LDMCISGLC 2 10 42.0 0.069 23677 None
+ADAMTSL1 LDMCISGLC 2 10 76.0 0.069 23677 None
+PPAP2C SFGMYCMVF 1 9 0.5 0.066 24390 None
+PPAP2C SFGMYCMVF 1 9 45.0 0.066 24390 None
+ADAMTSL1 DMCISGLCQ 3 11 64.5 0.042 31630 None
+ADAMTSL1 DMCISGLCQ 3 11 97.0 0.042 31630 None

diff -r 000000000000 -r 926c62f7fa09 test-data/regex_results.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/regex_results.tsv Thu Jan 21 08:23:45 2016 -0500

@@ -0,0 +1,4 @@
+#FirstName LastName DOB
+Steven Goldfish 04/04/74
+Paula Brown 24/05/78
+James Smith 20/10/80

diff -r 000000000000 -r 926c62f7fa09 test-data/sales.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sales.tsv Thu Jan 21 08:23:45 2016 -0500

@@ -0,0 +1,6 @@
+#CustomerID Date SaleAmount
+2 2004-05-06 100.22
+1 2004-05-07 99.95
+3 2004-05-07 122.95
+3 2004-05-13 100.00
+4 2004-05-22 555.55

diff -r 000000000000 -r 926c62f7fa09 test-data/sales_results.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sales_results.tsv Thu Jan 21 08:23:45 2016 -0500

@@ -0,0 +1,5 @@
+#FirstName LastName TotalSales
+James Smith 555.55
+Paula Brown 222.95
+Steven Goldfish 100.22
+John Smith 99.95