Galaxy |

Changeset 20:ab27c4bd14b9 (2017-07-14)

Previous changeset 19:9d9ab2c69014 (2017-07-05) Next changeset 21:357fe86f245d (2017-07-14)

Commit message:
Uploaded

modified:
query_tabular.py
query_tabular.xml

added:
filter_tabular.py
filter_tabular.xml
filters.py
load_db.py
macros.xml
query_db.py
sqlite_to_tabular.py
sqlite_to_tabular.xml
test-data/._IEDB.tsv
test-data/._filtered_people_results.tsv
test-data/._filtered_pets_results.tsv
test-data/._netMHC_summary.tsv
test-data/._pet_normalized_query_results.tsv
test-data/._query_results.tsv
test-data/._regex_results.tsv
test-data/._sales_results.tsv
test-data/filtered_people_results.tsv
test-data/filtered_pets_results.tsv
test-data/testdb.sqlite

diff -r 9d9ab2c69014 -r ab27c4bd14b9 filter_tabular.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/filter_tabular.py Fri Jul 14 11:39:27 2017 -0400

[

@@ -0,0 +1,71 @@
+#!/usr/bin/env python
+
+from __future__ import print_function
+
+import json
+import optparse
+import os.path
+import sys
+
+from filters import filter_file
+
+
+def __main__():
+    # Parse Command Line
+    parser = optparse.OptionParser()
+    parser.add_option('-i', '--input', dest='input', default=None,
+                      help='Input file for filtering')
+    parser.add_option('-j', '--jsonfile', dest='jsonfile', default=None,
+                      help='JSON array of filter specifications')
+    parser.add_option('-o', '--output', dest='output', default=None,
+                      help='Output file for query results')
+    parser.add_option('-v', '--verbose', dest='verbose', default=False,
+                      action='store_true',
+                      help='verbose')
+    (options, args) = parser.parse_args()
+
+    if options.input is not None:
+        try:
+            inputPath = os.path.abspath(options.input)
+            inputFile = open(inputPath, 'r')
+        except Exception as e:
+            print("failed: %s" % e, file=sys.stderr)
+            exit(3)
+    else:
+        inputFile = sys.stdin
+
+    if options.output is not None:
+        try:
+            outputPath = os.path.abspath(options.output)
+            outputFile = open(outputPath, 'w')
+        except Exception as e:
+            print("failed: %s" % e, file=sys.stderr)
+            exit(3)
+    else:
+        outputFile = sys.stdout
+
+    filters = None
+    if options.jsonfile:
+        try:
+            fh = open(options.jsonfile)
+            filters = json.load(fh)
+        except Exception as exc:
+            print("Error: %s" % exc, file=sys.stderr)
+
+    if options.verbose and filters:
+        for f in filters:
+            print('%s  %s' % (f['filter'],
+                  ', '.join(
+                  ['%s: %s' % (k, f[k])
+                   for k in set(f.keys()) - set(['filter'])])),
+                  file=sys.stdout)
+
+    try:
+        filter_file(inputFile, outputFile, filters=filters)
+    except Exception as exc:
+        print("Error: %s" % exc, file=sys.stderr)
+        exit(1)
+
+
+if __name__ == "__main__":
+    __main__()

diff -r 9d9ab2c69014 -r ab27c4bd14b9 filter_tabular.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/filter_tabular.xml Fri Jul 14 11:39:27 2017 -0400

[

@@ -0,0 +1,102 @@
+<tool id="filter_tabular" name="Filter Tabular" version="5.0.0">
+    <description></description>
+
+    <macros>
+         <import>macros.xml</import>
+    </macros>
+
+    <requirements>
+    </requirements>
+    <stdio>
+        <exit_code range="1:" />
+    </stdio>
+    <command><![CDATA[
+        python $__tool_directory__/filter_tabular.py
+        -i '$input'
+        -j '$filter_json'
+        -o '$output'
+    ]]></command>
+    <configfiles>
+        <configfile name="filter_json">
+#import json
+@LINEFILTERS@
+#if $input_filters:
+#echo $json.dumps($input_filters)
+#end if
+        </configfile>
+    </configfiles>
+    <inputs>
+        <param name="input" type="data" format="tabular" label="Tabular Dataset to filter"/>
+        <expand macro="macro_line_filters" />
+    </inputs>
+    <outputs>
+        <data format="tabular" name="output" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" ftype="tabular" value="pets.tsv"/>
+            <repeat name="linefilters">
+                <param name="filter_type" value="comment"/>
+                <param name="comment_char" value="35"/>
+            </repeat>
+            <repeat name="linefilters">
+                <param name="filter_type" value="append_line_num"/>
+            </repeat>
+            <repeat name="linefilters">
+                <param name="filter_type" value="select_columns"/>
+                <param name="columns" value="7,2,3,4,1"/>
+            </repeat>
+            <repeat name="linefilters">
+                <param name="filter_type" value="replace"/>
+                <param name="column" value="c4"/>
+                <param name="regex_pattern" value="(\d+)/(\d+)/(\d+)"/>
+                <param name="regex_replace" value="19\3-\2-\1"/>
+            </repeat>
+            <output name="output" file="filtered_people_results.tsv"/>
+        </test>
+        <test>
+            <param name="input" ftype="tabular" value="pets.tsv"/>
+            <repeat name="linefilters">
+                <param name="filter_type" value="comment"/>
+                <param name="comment_char" value="35"/>
+            </repeat>
+            <repeat name="linefilters">
+                <param name="filter_type" value="append_line_num"/>
+            </repeat>
+            <repeat name="linefilters">
+                <param name="filter_type" value="select_columns"/>
+                <param name="columns" value="c7,c5,c6"/>
+            </repeat>
+            <repeat name="linefilters">
+                <param name="filter_type" value="normalize"/>
+                <param name="columns" value="c2,c3"/>
+                <param name="separator" value=","/>
+            </repeat>
+            <output name="output" file="filtered_pets_results.tsv"/>
+        </test>
+
+    </tests>
+    <help><![CDATA[
+==============
+Filter Tabular
+==============
+
+  Filter a tabular dataset by applying line filters as it is being read.
+  Multiple filters may be used with each filter using the result of the previous filter.
+
+**Inputs**
+
+  A tabular dataset.
+
+
+**Outputs**
+
+  A filtered tabular dataset.
+
+
+@LINEFILTERS_HELP@
+
+@LINEFILTERS_HELP_EXAMPLE@
+
+    ]]></help>
+</tool>

diff -r 9d9ab2c69014 -r ab27c4bd14b9 filters.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/filters.py Fri Jul 14 11:39:27 2017 -0400

[

@@ -0,0 +1,159 @@
+#!/usr/binsenv python
+
+from __future__ import print_function
+
+import re
+import sys
+
+
+class LineFilter(object):
+    def __init__(self, source, filter_dict):
+        self.source = source
+        self.filter_dict = filter_dict
+        self.func = lambda i, l: l.rstrip('\r\n') if l else None
+        self.src_lines = []
+        self.src_line_cnt = 0
+        if not filter_dict:
+            return
+        if filter_dict['filter'] == 'regex':
+            rgx = re.compile(filter_dict['pattern'])
+            if filter_dict['action'] == 'exclude_match':
+                self.func = lambda i, l: l if not rgx.match(l) else None
+            elif filter_dict['action'] == 'include_match':
+                self.func = lambda i, l: l if rgx.match(l) else None
+            elif filter_dict['action'] == 'exclude_find':
+                self.func = lambda i, l: l if not rgx.search(l) else None
+            elif filter_dict['action'] == 'include_find':
+                self.func = lambda i, l: l if rgx.search(l) else None
+        elif filter_dict['filter'] == 'select_columns':
+            cols = [int(c) - 1 for c in filter_dict['columns']]
+            self.func = lambda i, l: self.select_columns(l, cols)
+        elif filter_dict['filter'] == 'replace':
+            p = filter_dict['pattern']
+            r = filter_dict['replace']
+            c = int(filter_dict['column']) - 1
+            self.func = lambda i, l: '\t'.join(
+                [x if j != c else re.sub(p, r, x) for j, x in enumerate(l.split('\t'))])
+        elif filter_dict['filter'] == 'prepend_line_num':
+            self.func = lambda i, l: '%d\t%s' % (i, l)
+        elif filter_dict['filter'] == 'append_line_num':
+            self.func = lambda i, l: '%s\t%d' % (l.rstrip('\r\n'), i)
+        elif filter_dict['filter'] == 'prepend_text':
+            s = filter_dict['column_text']
+            self.func = lambda i, l: '%s\t%s' % (s, l)
+        elif filter_dict['filter'] == 'append_text':
+            s = filter_dict['column_text']
+            self.func = lambda i, l: '%s\t%s' % (l.rstrip('\r\n'), s)
+        elif filter_dict['filter'] == 'skip':
+            cnt = filter_dict['count']
+            self.func = lambda i, l: l if i > cnt else None
+        elif filter_dict['filter'] == 'normalize':
+            cols = [int(c) - 1 for c in filter_dict['columns']]
+            sep = filter_dict['separator']
+            self.func = lambda i, l: self.normalize(l, cols, sep)
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        return next(self)
+
+    def next(self):
+        if not self.src_lines:
+            self.get_lines()
+        if self.src_lines:
+            return self.src_lines.pop(0)
+        raise StopIteration
+
+    def select_columns(self, line, cols):
+        fields = line.split('\t')
+        return '\t'.join([fields[x] for x in cols])
+
+    def normalize(self, line, split_cols, sep):
+        lines = []
+        fields = line.rstrip('\r\n').split('\t')
+        split_fields = dict()
+        cnt = 0
+        for c in split_cols:
+            if c < len(fields):
+                split_fields[c] = fields[c].split(sep)
+                cnt = max(cnt, len(split_fields[c]))
+        if cnt == 0:
+            lines.append('\t'.join(fields))
+        else:
+            for n in range(0, cnt):
+                flds = [x if c not in split_cols else split_fields[c][n]
+                        if n < len(split_fields[c])
+                        else '' for (c, x) in enumerate(fields)]
+                lines.append('\t'.join(flds))
+        return lines
+
+    def get_lines(self):
+        for i, next_line in enumerate(self.source):
+            self.src_line_cnt += 1
+            line = self.func(self.src_line_cnt, next_line)
+            if line:
+                if isinstance(line, list):
+                    self.src_lines.extend(line)
+                else:
+                    self.src_lines.append(line)
+                return
+
+
+class TabularReader:
+    """
+    Tabular file iterator. Returns a list
+    """
+    def __init__(self, input_file, skip=0, comment_char=None, col_idx=None,
+                 filters=None):
+        self.skip = skip
+        self.comment_char = comment_char
+        self.col_idx = col_idx
+        self.filters = filters
+        self.tsv_file = \
+            input_file if isinstance(input_file, file) else open(input_file)
+        if skip and skip > 0:
+            for i in range(skip):
+                if not self.tsv_file.readline():
+                    break
+        source = LineFilter(self.tsv_file, None)
+        if comment_char:
+            source = LineFilter(source,
+                                {"filter": "regex", "pattern": comment_char,
+                                 "action": "exclude_match"})
+        if filters:
+            for f in filters:
+                source = LineFilter(source, f)
+        self.source = source
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        return next(self)
+
+    def next(self):
+        ''' Iteration '''
+        for i, line in enumerate(self.source):
+            fields = line.rstrip('\r\n').split('\t')
+            if self.col_idx:
+                fields = [fields[i] for i in self.col_idx]
+            return fields
+        raise StopIteration
+
+
+def filter_file(input_file, output, skip=0, comment_char='#', filters=None):
+    data_lines = 0
+    try:
+        tr = TabularReader(input_file, skip=skip, comment_char=comment_char,
+                           filters=filters)
+        for linenum, fields in enumerate(tr):
+            data_lines += 1
+            try:
+                output.write('%s\n' % '\t'.join(fields))
+            except Exception as e:
+                print('Failed at line: %d err: %s' % (linenum, e),
+                      file=sys.stderr)
+    except Exception as e:
+        print('Failed: %s' % (e), file=sys.stderr)
+        exit(1)

diff -r 9d9ab2c69014 -r ab27c4bd14b9 load_db.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/load_db.py Fri Jul 14 11:39:27 2017 -0400

[

@@ -0,0 +1,136 @@
+#!/usr/bin/env python
+
+from __future__ import print_function
+
+import sys
+
+from filters import TabularReader
+
+
+def getValueType(val):
+    if val or 0. == val:
+        try:
+            int(val)
+            return 'INTEGER'
+        except:
+            try:
+                float(val)
+                return 'REAL'
+            except:
+                return 'TEXT'
+    return None
+
+
+def get_column_def(file_path, table_name, skip=0, comment_char='#',
+                   column_names=None, max_lines=100, load_named_columns=False,
+                   filters=None):
+    col_pref = ['TEXT', 'REAL', 'INTEGER', None]
+    col_types = []
+    col_idx = None
+    try:
+        tr = TabularReader(file_path, skip=skip, comment_char=comment_char,
+                           col_idx=None, filters=filters)
+        for linenum, fields in enumerate(tr):
+            if linenum > max_lines:
+                break
+            try:
+                while len(col_types) < len(fields):
+                    col_types.append(None)
+                for i, val in enumerate(fields):
+                    colType = getValueType(val)
+                    if col_pref.index(colType) < col_pref.index(col_types[i]):
+                        col_types[i] = colType
+            except Exception as e:
+                print('Failed at line: %d err: %s' % (linenum, e),
+                      file=sys.stderr)
+    except Exception as e:
+        print('Failed: %s' % (e), file=sys.stderr)
+    for i, col_type in enumerate(col_types):
+        if not col_type:
+            col_types[i] = 'TEXT'
+    if column_names:
+        col_names = []
+        if load_named_columns:
+            col_idx = []
+            for i, cname in enumerate(
+                    [cn.strip() for cn in column_names.split(',')]):
+                if cname != '':
+                    col_idx.append(i)
+                    col_names.append(cname)
+            col_types = [col_types[i] for i in col_idx]
+        else:
+            col_names = ['c%d' % i for i in range(1, len(col_types) + 1)]
+            for i, cname in enumerate(
+                    [cn.strip() for cn in column_names.split(',')]):
+                if cname and i < len(col_names):
+                    col_names[i] = cname
+    else:
+        col_names = ['c%d' % i for i in range(1, len(col_types) + 1)]
+    col_def = []
+    for i, col_name in enumerate(col_names):
+        col_def.append('%s %s' % (col_names[i], col_types[i]))
+    return col_names, col_types, col_def, col_idx
+
+
+def create_table(conn, file_path, table_name, skip=0, comment_char='#',
+                 pkey_autoincr=None, column_names=None,
+                 load_named_columns=False, filters=None,
+                 unique_indexes=[], indexes=[]):
+    col_names, col_types, col_def, col_idx = \
+        get_column_def(file_path, table_name, skip=skip,
+                       comment_char=comment_char, column_names=column_names,
+                       load_named_columns=load_named_columns, filters=filters)
+    col_func = [float if t == 'REAL' else int
+                if t == 'INTEGER' else str for t in col_types]
+    table_def = 'CREATE TABLE %s (\n    %s%s\n);' % (
+                table_name,
+                '%s INTEGER PRIMARY KEY AUTOINCREMENT,' %
+                pkey_autoincr if pkey_autoincr else '',
+                ', \n    '.join(col_def))
+    # print >> sys.stdout, table_def
+    insert_stmt = 'INSERT INTO %s(%s) VALUES(%s)' % (
+                  table_name, ','.join(col_names),
+                  ','.join(["?" for x in col_names]))
+    # print >> sys.stdout, insert_stmt
+    data_lines = 0
+    try:
+        c = conn.cursor()
+        c.execute(table_def)
+        conn.commit()
+        c.close()
+        for i, index in enumerate(unique_indexes):
+            index_name = 'idx_uniq_%s_%d' % (table_name, i)
+            index_columns = index.split(',')
+            create_index(conn, table_name, index_name, index_columns,
+                         unique=True)
+        for i, index in enumerate(indexes):
+            index_name = 'idx_%s_%d' % (table_name, i)
+            index_columns = index.split(',')
+            create_index(conn, table_name, index_name, index_columns)
+        c = conn.cursor()
+        tr = TabularReader(file_path, skip=skip, comment_char=comment_char,
+                           col_idx=col_idx, filters=filters)
+        for linenum, fields in enumerate(tr):
+            data_lines += 1
+            try:
+                vals = [col_func[i](x)
+                        if x else None for i, x in enumerate(fields)]
+                c.execute(insert_stmt, vals)
+            except Exception as e:
+                print('Failed at line: %d err: %s' % (linenum, e),
+                      file=sys.stderr)
+        conn.commit()
+        c.close()
+    except Exception as e:
+        print('Failed: %s' % (e), file=sys.stderr)
+        exit(1)
+
+
+def create_index(conn, table_name, index_name, index_columns, unique=False):
+    index_def = "CREATE %s INDEX %s on %s(%s)" % (
+                'UNIQUE' if unique else '', index_name,
+                table_name, ','.join(index_columns))
+    c = conn.cursor()
+    c.execute(index_def)
+    conn.commit()
+    c.close()

diff -r 9d9ab2c69014 -r ab27c4bd14b9 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Fri Jul 14 11:39:27 2017 -0400

[

b'@@ -0,0 +1,383 @@\n+<macros>\n+ <token name="@LINEFILTERS@">\n+<![CDATA[\n+ ## set linefilters to the \n+ #set $input_filters = []\n+ #for $fi in $linefilters:\n+ #if $fi.filter.filter_type == \'skip\':\n+ #set $skip_lines = None\n+ #if str($fi.filter.skip_lines) != \'\':\n+ #set $skip_lines = int($fi.filter.skip_lines)\n+ #elif $tbl.table.metadata.comment_lines and $tbl.table.metadata.comment_lines > 0:\n+ #set $skip_lines = int($tbl.table.metadata.comment_lines)\n+ #end if\n+ #if $skip_lines is not None:\n+ #set $filter_dict = dict()\n+ #set $filter_dict[\'filter\'] = str($fi.filter.filter_type)\n+ #set $filter_dict[\'count\'] = $skip_lines\n+ #silent $input_filters.append($filter_dict)\n+ #end if\n+ #elif $fi.filter.filter_type == \'comment\':\n+ #set $filter_dict = dict()\n+ #set $filter_dict[\'filter\'] = \'regex\'\n+ #set $filter_dict[\'pattern\'] = \'^(%s).*$\' % \'|\'.join([chr(int(x)).replace(\'|\',\'[|]\') for x in (str($fi.filter.comment_char)).split(\',\')])\n+ #set $filter_dict[\'action\'] = \'exclude_match\'\n+ #silent $input_filters.append($filter_dict)\n+ #elif $fi.filter.filter_type == \'regex\':\n+ #set $filter_dict = dict()\n+ #set $filter_dict[\'filter\'] = str($fi.filter.filter_type)\n+ #set $filter_dict[\'pattern\'] = str($fi.filter.regex_pattern)\n+ #set $filter_dict[\'action\'] = str($fi.filter.regex_action)\n+ #silent $input_filters.append($filter_dict)\n+ #elif $fi.filter.filter_type == \'select_columns\':\n+ #set $filter_dict = dict()\n+ #set $filter_dict[\'filter\'] = str($fi.filter.filter_type)\n+ #set $filter_dict[\'columns\'] = [int(str($ci).replace(\'c\',\'\')) for $ci in str($fi.filter.columns).split(\',\')]\n+ #silent $input_filters.append($filter_dict)\n+ #elif $fi.filter.filter_type == \'replace\':\n+ #set $filter_dict = dict()\n+ #set $filter_dict[\'filter\'] = str($fi.filter.filter_type)\n+ #set $filter_dict[\'column\'] = int(str($fi.filter.column).replace(\'c\',\'\'))\n+ #set $filter_dict[\'pattern\'] = str($fi.filter.regex_pattern)\n+ #set $filter_dict[\'replace\'] = str($fi.filter.regex_replace)\n+ #silent $input_filters.append($filter_dict)\n+ #elif str($fi.filter.filter_type).endswith(\'pend_line_num\'):\n+ #set $filter_dict = dict()\n+ #set $filter_dict[\'filter\'] = str($fi.filter.filter_type)\n+ #silent $input_filters.append($filter_dict)\n+ #elif str($fi.filter.filter_type).endswith(\'pend_text\'):\n+ #set $filter_dict = dict()\n+ #set $filter_dict[\'filter\'] = str($fi.filter.filter_type)\n+ #set $filter_dict[\'column_text\'] = str($fi.filter.column_text)\n+ #silent $input_filters.append($filter_dict)\n+ #elif $fi.filter.filter_type == \'normalize\':\n+ #set $filter_dict = dict()\n+ #set $filter_dict[\'filter\'] = str($fi.filter.filter_type)\n+ #set $filter_dict[\'columns\'] = [int(str($ci).replace(\'c\',\'\')) for $ci in str($fi.filter.columns).split(\',\')]\n+ #set $filter_dict[\'separator\'] = str($fi.filter.separator)\n+ #silent $input_filters.append($filter_dict)\n+ #end if\n+ #end for\n+]]>\n+ </token>\n+ <xml name="macro_line_filters">\n+ <repeat name="linefilters" title="Filter Tabular Input Lines">\n+ <conditional name="filter">\n+ <param name="filter_type" type="select" label="Filter By">\n+ <option value="skip">skip leading lines</option>\n+ <option value="comment">comment char</option>\n+ <option value="regex">by regex expression matching</option>\n+ <option value="select_columns">select columns</option>\n+ <option value="replace">regex replace value in column</option>\n+ <option value="prepend_line_num">prepend a line number column</option>\n+ <option value="append_line_num">append a line number column</option>\n+ '..b'==== ============\n+ #CustomerID FirstName LastName Email DOB Phone\n+ =========== ========== ========== ===================== ========== ============\n+ 1 John Smith John.Smith@yahoo.com 1968-02-04 626 222-2222\n+ 2 Steven Goldfish goldfish@fishhere.net 1974-04-04 323 455-4545\n+ 3 Paula Brown pb@herowndomain.org 1978-05-24 416 323-3232\n+ 4 James Smith jim@supergig.co.uk 1980-10-20 416 323-8888\n+ =========== ========== ========== ===================== ========== ============\n+ \n+ Dataset *sales*\n+ \n+ Table name: "sales"\n+ \n+ Column names: "CustomerID,Date,SaleAmount"\n+ \n+ ============= ============ ============\n+ #CustomerID Date SaleAmount\n+ ============= ============ ============\n+ 2 2004-05-06 100.22\n+ 1 2004-05-07 99.95\n+ 3 2004-05-07 122.95\n+ 3 2004-05-13 100.00\n+ 4 2004-05-22 555.55\n+ ============= ============ ============\n+ \n+ The query\n+ \n+ ::\n+ \n+ SELECT FirstName,LastName,sum(SaleAmount) as "TotalSales" \n+ FROM customers join sales on customers.CustomerID = sales.CustomerID \n+ GROUP BY customers.CustomerID ORDER BY TotalSales DESC;\n+ \n+ Produces this tabular output:\n+ \n+ ========== ======== ==========\n+ #FirstName LastName TotalSales\n+ ========== ======== ==========\n+ James Smith 555.55\n+ Paula Brown 222.95\n+ Steven Goldfish 100.22\n+ John Smith 99.95\n+ ========== ======== ==========\n+ \n+ \n+ If the optional Table name and Column names inputs are not used, the query would be:\n+ \n+ ::\n+ \n+ SELECT t1.c2 as "FirstName", t1.c3 as "LastName", sum(t2.c3) as "TotalSales" \n+ FROM t1 join t2 on t1.c1 = t2.c1 \n+ GROUP BY t1.c1 ORDER BY TotalSales DESC;\n+ \n+ You can selectively name columns, e.g. on the customers input you could just name columns 2,3, and 5: \n+ \n+ Column names: ,FirstName,LastName,,BirthDate\n+ \n+ Results in the following data base table\n+ \n+ =========== ========== ========== ===================== ========== ============\n+ #c1 FirstName LastName c4 BirthDate c6\n+ =========== ========== ========== ===================== ========== ============\n+ 1 John Smith John.Smith@yahoo.com 1968-02-04 626 222-2222\n+ 2 Steven Goldfish goldfish@fishhere.net 1974-04-04 323 455-4545\n+ 3 Paula Brown pb@herowndomain.org 1978-05-24 416 323-3232\n+ 4 James Smith jim@supergig.co.uk 1980-10-20 416 323-8888\n+ =========== ========== ========== ===================== ========== ============\n+\n+\n+ Regular_expression_ functions are included for: \n+\n+ ::\n+\n+ matching: re_match(\'pattern\',column) \n+\n+ SELECT t1.FirstName, t1.LastName\n+ FROM t1\n+ WHERE re_match(\'^.*\\.(net|org)$\',c4)\n+\n+ Results:\n+\n+ =========== ==========\n+ #FirstName LastName\n+ =========== ==========\n+ Steven Goldfish\n+ Paula Brown\n+ =========== ==========\n+\n+\n+ ::\n+\n+ searching: re_search(\'pattern\',column)\n+ substituting: re_sub(\'pattern\',\'replacement,column)\n+\n+ SELECT t1.FirstName, t1.LastName, re_sub(\'^\\d{2}(\\d{2})-(\\d\\d)-(\\d\\d)\',\'\\3/\\2/\\1\',BirthDate) as "DOB"\n+ FROM t1\n+ WHERE re_search(\'[hp]er\',c4)\n+\n+ Results:\n+\n+\n+ =========== ========== ==========\n+ #FirstName LastName DOB\n+ =========== ========== ==========\n+ Steven Goldfish 04/04/74\n+ Paula Brown 24/05/78\n+ James Smith 20/10/80\n+ =========== ========== ==========\n+\n+.. _Regular_expression: https://docs.python.org/release/2.7/library/re.html\n+.. _SQLite: http://www.sqlite.org/index.html\n+.. _SQLite_functions: http://www.sqlite.org/docs.html\n+\n+\n+]]>\n+ </token>\n+\n+</macros>\n+\n'

diff -r 9d9ab2c69014 -r ab27c4bd14b9 query_db.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/query_db.py Fri Jul 14 11:39:27 2017 -0400

[

@@ -0,0 +1,69 @@
+#!/usr/bin/env python
+
+from __future__ import print_function
+
+import re
+import sqlite3 as sqlite
+import sys
+
+
+TABLE_QUERY = \
+    """
+    SELECT name, sql
+    FROM sqlite_master
+    WHERE type='table'
+    ORDER BY name
+    """
+
+
+def regex_match(expr, item):
+    return re.match(expr, item) is not None
+
+
+def regex_search(expr, item):
+    return re.search(expr, item) is not None
+
+
+def regex_sub(expr, replace, item):
+    return re.sub(expr, replace, item)
+
+
+def get_connection(sqlitedb_path, addfunctions=False):
+    conn = sqlite.connect(sqlitedb_path)
+    if addfunctions:
+        conn.create_function("re_match", 2, regex_match)
+        conn.create_function("re_search", 2, regex_search)
+        conn.create_function("re_sub", 3, regex_sub)
+    return conn
+
+
+def describe_tables(conn, outputFile):
+    try:
+        c = conn.cursor()
+        tables_query = TABLE_QUERY
+        rslt = c.execute(tables_query).fetchall()
+        for table, sql in rslt:
+            print("Table %s:" % table, file=sys.stderr)
+            try:
+                col_query = 'SELECT * FROM %s LIMIT 0' % table
+                cur = conn.cursor().execute(col_query)
+                cols = [col[0] for col in cur.description]
+                print(" Columns: %s" % cols, file=sys.stderr)
+            except Exception as exc:
+                print("Error: %s" % exc, file=sys.stderr)
+    except Exception as exc:
+        print("Error: %s" % exc, file=sys.stderr)
+    exit(0)
+
+
+def run_query(conn, query, outputFile, no_header=False):
+    cur = conn.cursor()
+    results = cur.execute(query)
+    if not no_header:
+        outputFile.write("#%s\n" % '\t'.join(
+            [str(col[0]) for col in cur.description]))
+        # yield [col[0] for col in cur.description]
+    for i, row in enumerate(results):
+        # yield [val for val in row]
+        outputFile.write("%s\n" % '\t'.join(
+            [str(val) if val is not None else '' for val in row]))

diff -r 9d9ab2c69014 -r ab27c4bd14b9 query_tabular.py
--- a/query_tabular.py Wed Jul 05 11:51:50 2017 -0400
+++ b/query_tabular.py Fri Jul 14 11:39:27 2017 -0400

[

b'@@ -1,54 +1,37 @@\n #!/usr/bin/env python\n-"""\n-"""\n+\n+from __future__ import print_function\n+\n+import json\n+import optparse\n+import os.path\n import sys\n-import re\n-import os.path\n-import json\n-import sqlite3 as sqlite\n-import optparse\n-from optparse import OptionParser\n+\n+from load_db import create_table\n+\n+from query_db import describe_tables, get_connection, run_query\n+\n \n """\n-TODO:\n-- could read column names from comment lines, but issues with legal names\n-- could add some transformations on tabular columns,\n- filter - skip_regex\n- e.g. a regex to format date/time strings\n- format: {\n- c2 : re.sub(\'pat\', \'sub\', c2)\n- c3 : len(c3)\n- }\n- def format(colname,val, expr):\n- normalize input list columns\n- iterate over list values creating one row per iteration\n- option for input line_num column\n- create associated table \n- fk, name, value # e.g. PSM table with list of proteins containing peptide\n- fk, name, value[, value] # if multiple columns similarly indexed, e.g. vcf\n-- column_defs dict of columns to create from tabular input\n- column_defs : { \'name1\' : \'expr\', \'name2\' : \'expr\'}\n-- allow multiple queries and outputs\n- repeat min - max with up to max conditional outputs\n-\n-- add a --json input for table definitions (or yaml)\n JSON config:\n { tables : [\n { file_path : \'/home/galaxy/dataset_101.dat\',\n table_name : \'t1\',\n- column_names : [\'c1\', \'c2\', \'c3\'],\n+ column_names : [\'c1\',\'c2\',\'c3\'],\n pkey_autoincr : \'id\'\n comment_lines : 1\n unique: [\'c1\'],\n- index: [\'c2\',\'c3\']\n+ index: [\'c2\', \'c3\']\n },\n { file_path : \'/home/galaxy/dataset_102.dat\',\n table_name : \'gff\',\n column_names : [\'seqname\',,\'date\',\'start\',\'end\']\n comment_lines : 1\n load_named_columns : True\n- filters : [{\'filter\': \'regex\', \'pattern\': \'#peptide\', \'action\': \'exclude_match\'}, \n- {\'filter\': \'replace\', \'column\': 3, \'replace\': \'gi[|]\', \'pattern\': \'\'}]\n+ filters : [{\'filter\': \'regex\', \'pattern\': \'#peptide\',\n+ \'action\': \'exclude_match\'},\n+ {\'filter\': \'replace\', \'column\': 3,\n+ \'replace\': \'gi[|]\', \'pattern\': \'\'}]\n },\n { file_path : \'/home/galaxy/dataset_103.dat\',\n table_name : \'test\',\n@@ -59,331 +42,75 @@\n """\n \n \n-class LineFilter( object ):\n- def __init__(self,source,filter_dict):\n- self.source = source\n- self.filter_dict = filter_dict\n- # print >> sys.stderr, \'LineFilter %s\' % filter_dict if filter_dict else \'NONE\'\n- self.func = lambda i,l: l.rstrip(\'\\r\\n\') if l else None\n- self.src_lines = []\n- self.src_line_cnt = 0\n- if not filter_dict:\n- return\n- if filter_dict[\'filter\'] == \'regex\':\n- rgx = re.compile(filter_dict[\'pattern\'])\n- if filter_dict[\'action\'] == \'exclude_match\':\n- self.func = lambda i,l: l if not rgx.match(l) else None\n- elif filter_dict[\'action\'] == \'include_match\':\n- self.func = lambda i,l: l if rgx.match(l) else None\n- elif filter_dict[\'action\'] == \'exclude_find\':\n- self.func = lambda i,l: l if not rgx.search(l) else None\n- elif filter_dict[\'action\'] == \'include_find\':\n- self.func = lambda i,l: l if rgx.search(l) else None\n- elif filter_dict[\'filter\'] == \'select_columns\':\n- cols = [int(c) - 1 for c in filter_dict[\'columns\']]\n- self.func = lambda i,l: self.select_columns(l,cols)\n- elif filter_dict[\'filter\'] == \'replace\':\n- p = filter_dict[\'pattern\']\n- r = filter_dict[\'replace\']\n- c = int(filter_dict[\'column\']) - 1\n- self.func = lambda i,l: \'\\t\'.join([x if i != c else re.sub(p,r,x) for i,x in enumerate(l.split(\'\\t\'))])\n- elif filter_dict[\'filter\'] == \'prepend_line_n'..b'toincr\' in table else None\n+ create_table(get_connection(options.sqlitedb), path, table_name,\n+ pkey_autoincr=pkey_autoincr,\n+ column_names=column_names,\n+ skip=comment_lines,\n+ comment_char=comment_char,\n+ load_named_columns=load_named_columns,\n+ filters=filters,\n+ unique_indexes=unique_indexes,\n+ indexes=indexes)\n+\n if options.jsonfile:\n try:\n fh = open(options.jsonfile)\n tdef = json.load(fh)\n if \'tables\' in tdef:\n for ti, table in enumerate(tdef[\'tables\']):\n- path = table[\'file_path\']\n- table_name = table[\'table_name\'] if \'table_name\' in table else \'t%d\' % (ti + 1)\n- comment_lines = table[\'comment_lines\'] if \'comment_lines\' in table else 0\n- comment_char = table[\'comment_char\'] if \'comment_char\' in table else None\n- column_names = table[\'column_names\'] if \'column_names\' in table else None\n- if column_names:\n- load_named_columns = table[\'load_named_columns\'] if \'load_named_columns\' in table else False\n- else: \n- load_named_columns = False\n- unique_indexes = table[\'unique\'] if \'unique\' in table else []\n- indexes = table[\'index\'] if \'index\' in table else []\n- filters = table[\'filters\'] if \'filters\' in table else None\n- pkey_autoincr = table[\'pkey_autoincr\'] if \'pkey_autoincr\' in table else None\n- create_table(conn, path, table_name, pkey_autoincr=pkey_autoincr, column_names=column_names, \n- skip=comment_lines, comment_char=comment_char, load_named_columns=load_named_columns, \n- filters=filters,unique_indexes=unique_indexes, indexes=indexes)\n- except Exception, exc:\n- print >> sys.stderr, "Error: %s" % exc\n- conn.close()\n+ _create_table(ti, table)\n+ except Exception as exc:\n+ print("Error: %s" % exc, file=sys.stderr)\n \n query = None\n if (options.query_file is not None):\n@@ -395,32 +122,18 @@\n query = options.query\n \n if (query is None):\n- tables_query = \\\n- "SELECT name, sql FROM sqlite_master WHERE type=\'table\' ORDER BY name"\n+ try:\n+ describe_tables(get_connection(options.sqlitedb), outputFile)\n+ except Exception as exc:\n+ print("Error: %s" % exc, file=sys.stderr)\n+ else:\n try:\n- conn = get_connection(options.sqlitedb)\n- c = conn.cursor()\n- rslt = c.execute(tables_query).fetchall()\n- for table, sql in rslt:\n- print >> sys.stderr, "Table %s:" % table\n- try:\n- col_query = \'SELECT * FROM %s LIMIT 0\' % table\n- cur = conn.cursor().execute(col_query)\n- cols = [col[0] for col in cur.description]\n- print >> sys.stderr, " Columns: %s" % cols\n- except Exception, exc:\n- print >> sys.stderr, "Error: %s" % exc\n- except Exception, exc:\n- print >> sys.stderr, "Error: %s" % exc\n- exit(0)\n- # if not sqlite.is_read_only_query(query):\n- # print >> sys.stderr, "Error: Must be a read only query"\n- # exit(2)\n- try:\n- run_query(query,outputFile)\n- except Exception, exc:\n- print >> sys.stderr, "Error: %s" % exc\n- exit(1)\n+ run_query(get_connection(options.sqlitedb), query, outputFile,\n+ no_header=options.no_header)\n+ except Exception as exc:\n+ print("Error: %s" % exc, file=sys.stderr)\n+ exit(1)\n+\n \n if __name__ == "__main__":\n __main__()\n'

diff -r 9d9ab2c69014 -r ab27c4bd14b9 query_tabular.xml
--- a/query_tabular.xml Wed Jul 05 11:51:50 2017 -0400
+++ b/query_tabular.xml Fri Jul 14 11:39:27 2017 -0400

[

b'@@ -1,6 +1,10 @@\n-<tool id="query_tabular" name="Query Tabular" version="4.0.0">\n+<tool id="query_tabular" name="Query Tabular" version="5.0.0">\n <description>using sqlite sql</description>\n \n+ <macros>\n+ <import>macros.xml</import>\n+ </macros>\n+\n <requirements>\n </requirements>\n <stdio>\n@@ -76,62 +80,8 @@\n #if len($idx_non) > 0:\n #set $jtbl[\'index\'] = $idx_non\n #end if\n- #set $input_filters = []\n- #for $fi in $tbl.input_opts.linefilters:\n- #if $fi.filter.filter_type == \'skip\':\n- #set $skip_lines = None\n- #if str($fi.filter.skip_lines) != \'\':\n- #set $skip_lines = int($fi.filter.skip_lines)\n- #elif $tbl.table.metadata.comment_lines and $tbl.table.metadata.comment_lines > 0:\n- #set $skip_lines = int($tbl.table.metadata.comment_lines)\n- #end if\n- #if $skip_lines is not None:\n- #set $filter_dict = dict()\n- #set $filter_dict[\'filter\'] = str($fi.filter.filter_type)\n- #set $filter_dict[\'count\'] = $skip_lines\n- #silent $input_filters.append($filter_dict)\n- #end if\n- #elif $fi.filter.filter_type == \'comment\':\n- #set $filter_dict = dict()\n- #set $filter_dict[\'filter\'] = \'regex\'\n- #set $filter_dict[\'pattern\'] = \'^(%s).*$\' % \'|\'.join([chr(int(x)).replace(\'|\',\'[|]\') for x in (str($fi.filter.comment_char)).split(\',\')])\n- #set $filter_dict[\'action\'] = \'exclude_match\'\n- #silent $input_filters.append($filter_dict)\n- #elif $fi.filter.filter_type == \'regex\':\n- #set $filter_dict = dict()\n- #set $filter_dict[\'filter\'] = str($fi.filter.filter_type)\n- #set $filter_dict[\'pattern\'] = str($fi.filter.regex_pattern)\n- #set $filter_dict[\'action\'] = str($fi.filter.regex_action)\n- #silent $input_filters.append($filter_dict)\n- #elif $fi.filter.filter_type == \'select_columns\':\n- #set $filter_dict = dict()\n- #set $filter_dict[\'filter\'] = str($fi.filter.filter_type)\n- #set $filter_dict[\'columns\'] = [int(str($ci).replace(\'c\',\'\')) for $ci in str($fi.filter.columns).split(\',\')]\n- #silent $input_filters.append($filter_dict)\n- #elif $fi.filter.filter_type == \'replace\':\n- #set $filter_dict = dict()\n- #set $filter_dict[\'filter\'] = str($fi.filter.filter_type)\n- #set $filter_dict[\'column\'] = int(str($fi.filter.column).replace(\'c\',\'\'))\n- #set $filter_dict[\'pattern\'] = str($fi.filter.regex_pattern)\n- #set $filter_dict[\'replace\'] = str($fi.filter.regex_replace)\n- #silent $input_filters.append($filter_dict)\n- #elif str($fi.filter.filter_type).endswith(\'pend_line_num\'):\n- #set $filter_dict = dict()\n- #set $filter_dict[\'filter\'] = str($fi.filter.filter_type)\n- #silent $input_filters.append($filter_dict)\n- #elif str($fi.filter.filter_type).endswith(\'pend_text\'):\n- #set $filter_dict = dict()\n- #set $filter_dict[\'filter\'] = str($fi.filter.filter_type)\n- #set $filter_dict[\'column_text\'] = str($fi.filter.column_text)\n- #silent $input_filters.append($filter_dict)\n- #elif $fi.filter.filter_type == \'normalize\':\n- #set $filter_dict = dict()\n- #set $filter_dict[\'filter\'] = str($fi.filter.filter_type)\n- #set $filter_dict[\'columns\'] = [int(str($ci).replace(\'c\',\'\')) for $ci in str($fi.filter.columns).split(\',\')]\n- #set $filter_dict[\'separator\'] = str($fi.filter.separator)\n- #silent $input_filters.append($filter_dict)\n- #end if\n- #end for\n+ #set $linefilters = $tbl.input_opts.linefilters\n+ @LINEFILTERS@\n #if $input_filters:\n #set $jtbl[\'filters\'] = $input_filters\n #end if\n@@ -149,99 +99,7 @@\n <repeat name="tables" title="Database Table" min="0">\n <param name="table" type="data" format="tabular" label="Tabular Dataset for Table"/>\n <section name="input_opts" expanded="false" title="Filter Dataset Input">\n- <repeat name="linefilters" title="Filter Tabular Input Lines">\n- <conditional name="filter">\n- '..b'05/78 Rex,Fluff dog,cat\n- 1 Steven Jones 04/04/74 Allie cat\n- 0 Jane Doe 24/05/78 \n- 1 James Smith 20/10/80 Spot\n-\n-\n- Filter 1 - append a line number column:\n-\n- #People with pets 1\n- Pets FirstName LastName DOB PetNames PetType 2\n- 2 Paula Brown 24/05/78 Rex,Fluff dog,cat 3\n- 1 Steven Jones 04/04/74 Allie cat 4\n- 0 Jane Doe 24/05/78 5\n- 1 James Smith 20/10/80 Spot 6\n-\n- Filter 2 - by regex expression matching [include]: \'^\\d+\' (include lines that start with a number) \n-\n- 2 Paula Brown 24/05/78 Rex,Fluff dog,cat 3\n- 1 Steven Jones 04/04/74 Allie cat 4\n- 0 Jane Doe 24/05/78 5\n- 1 James Smith 20/10/80 Spot 6\n-\n- Filter 3 - append a line number column:\n-\n- 2 Paula Brown 24/05/78 Rex,Fluff dog,cat 3 1\n- 1 Steven Jones 04/04/74 Allie cat 4 2\n- 0 Jane Doe 24/05/78 5 3\n- 1 James Smith 20/10/80 Spot 6 4\n-\n- Filter 4 - regex replace value in column[4]: \'(\\d+)/(\\d+)/(\\d+)\' \'19\\3-\\2-\\1\' (convert dates to sqlite format) \n-\n- 2 Paula Brown 1978-05-24 Rex,Fluff dog,cat 3 1\n- 1 Steven Jones 1974-04-04 Allie cat 4 2\n- 0 Jane Doe 1978-05-24 5 3\n- 1 James Smith 1980-10-20 Spot 6 4\n-\n- Filter 5 - normalize list columns[5,6]:\n-\n- 2 Paula Brown 1978-05-24 Rex dog 3 1\n- 2 Paula Brown 1978-05-24 Fluff cat 3 1\n- 1 Steven Jones 1974-04-04 Allie cat 4 2\n- 0 Jane Doe 1978-05-24 5 3\n- 1 James Smith 1980-10-20 Spot 6 4\n-\n- Filter 6 - append a line number column:\n-\n- 2 Paula Brown 1978-05-24 Rex dog 3 1 1\n- 2 Paula Brown 1978-05-24 Fluff cat 3 1 2\n- 1 Steven Jones 1974-04-04 Allie cat 4 2 3\n- 0 Jane Doe 1978-05-24 5 3 4\n- 1 James Smith 1980-10-20 Spot 6 4 5\n+@LINEFILTERS_HELP_EXAMPLE@\n \n \n Table name: pets\n@@ -634,6 +297,13 @@\n \n **Normalizing by Line Filtering into 2 Tables** \n \n+*Relational database opertions work with single-valued column entries. \n+To apply relational operations to tabular files that contain fields with lists of values,\n+we need to "normalize" those fields, duplicating lines for each item in the list. \n+In this example we create 2 tables, one for single-valued fields and a second with list-valued fields normalized. \n+Becauce we add a line number first for each table, we can join the 2 tables on the line number column.*\n+https://en.wikipedia.org/wiki/First_normal_form \n+\n *People Table*\n \n ::\n@@ -679,7 +349,7 @@\n == ======== ========\n \n \n- Query: SELECT FirstName,LastName,PetName FROM People join Pet on People.id = Pet.id WHERE PetType = \'cat\'; \n+ Query: SELECT FirstName,LastName,PetName FROM People JOIN Pet ON People.id = Pet.id WHERE PetType = \'cat\'; \n \n Result:\n \n@@ -690,8 +360,6 @@\n Steven Jones Allie \n ========= ======== ========\n \n-.. _Regular_expression: https://docs.python.org/release/2.7/library/re.html\n-.. _SQLite: http://www.sqlite.org/index.html\n \n ]]></help>\n </tool>\n'

diff -r 9d9ab2c69014 -r ab27c4bd14b9 sqlite_to_tabular.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sqlite_to_tabular.py Fri Jul 14 11:39:27 2017 -0400

@@ -0,0 +1,64 @@
+#!/usr/bin/env python
+
+from __future__ import print_function
+
+import optparse
+import os.path
+import sys
+
+from query_db import describe_tables, get_connection, run_query
+
+
+def __main__():
+    # Parse Command Line
+    parser = optparse.OptionParser()
+    parser.add_option('-s', '--sqlitedb', dest='sqlitedb', default=None,
+                      help='The SQLite Database')
+    parser.add_option('-q', '--query', dest='query', default=None,
+                      help='SQL query')
+    parser.add_option('-Q', '--query_file', dest='query_file', default=None,
+                      help='SQL query file')
+    parser.add_option('-n', '--no_header', dest='no_header', default=False,
+                      action='store_true',
+                      help='Include a column headers line')
+    parser.add_option('-o', '--output', dest='output', default=None,
+                      help='Output file for query results')
+    (options, args) = parser.parse_args()
+
+    # determine output destination
+    if options.output is not None:
+        try:
+            outputPath = os.path.abspath(options.output)
+            outputFile = open(outputPath, 'w')
+        except Exception as e:
+            print("failed: %s" % e, file=sys.stderr)
+            exit(3)
+    else:
+        outputFile = sys.stdout
+
+    query = None
+    if (options.query_file is not None):
+        with open(options.query_file, 'r') as fh:
+            query = ''
+            for line in fh:
+                query += line
+    elif (options.query is not None):
+        query = options.query
+
+    if (query is None):
+        try:
+            describe_tables(get_connection(options.sqlitedb), outputFile)
+        except Exception as exc:
+            print("Error: %s" % exc, file=sys.stderr)
+        exit(0)
+    else:
+        try:
+            run_query(get_connection(options.sqlitedb), query, outputFile,
+                      no_header=options.no_header)
+        except Exception as exc:
+            print("Error: %s" % exc, file=sys.stderr)
+            exit(1)
+
+
+if __name__ == "__main__":
+    __main__()

diff -r 9d9ab2c69014 -r ab27c4bd14b9 sqlite_to_tabular.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sqlite_to_tabular.xml Fri Jul 14 11:39:27 2017 -0400

[

@@ -0,0 +1,66 @@
+<?xml version="1.0"?>
+<tool id="sqlite_to_tabular" name="SQLite to tabular" version="5.0.0">
+    <description>for SQL query</description>
+
+    <macros>
+         <import>macros.xml</import>
+    </macros>
+
+    <stdio>
+        <exit_code range="1:" level="fatal" description="Error" />
+    </stdio>
+    <command><![CDATA[
+    python $__tool_directory__/sqlite_to_tabular.py
+    --sqlitedb="$sqlitedb"
+    --query_file="$query_file"
+    $no_header
+    --output="$query_results"
+    ]]></command>
+    <configfiles>
+        <configfile name="query_file">
+$sqlquery
+        </configfile>
+    </configfiles>
+    <inputs>
+        <param name="sqlitedb" type="data" format="sqlite" label="SQLite Database"/>
+        <param name="sqlquery" type="text" area="True" size="120x20" label="SQL query">
+            <validator type="regex">(?ims)^\s*SELECT\s.*\sFROM\s.*$</validator>
+            <sanitizer sanitize="False"/>
+        </param>
+        <param name="no_header" type="boolean" truevalue="-n" falsevalue="" checked="False" label="Omit column headers"/>
+    </inputs>
+    <outputs>
+        <data name="query_results" format="tabular" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="sqlitedb" ftype="sqlite" value="testdb.sqlite" />
+            <param name="sqlquery" value="SELECT first_name, last_name, age FROM contacts WHERE first_name = 'Sam'" />
+            <output name="query_results">
+                <assert_contents>
+                    <has_text text="Smith" />
+                    <not_has_text text="Doe" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+=================
+SQLite to Tabular
+=================
+
+**Inputs**
+
+  An existing SQLite_ data base.
+
+
+**Outputs**
+
+  The results of a SQL query are output to the history as a tabular file.
+
+
+
+@QUERY_HELP@
+
+    ]]></help>
+</tool>

diff -r 9d9ab2c69014 -r ab27c4bd14b9 test-data/._IEDB.tsv

Binary file test-data/._IEDB.tsv has changed

diff -r 9d9ab2c69014 -r ab27c4bd14b9 test-data/._filtered_people_results.tsv

Binary file test-data/._filtered_people_results.tsv has changed

diff -r 9d9ab2c69014 -r ab27c4bd14b9 test-data/._filtered_pets_results.tsv

Binary file test-data/._filtered_pets_results.tsv has changed

diff -r 9d9ab2c69014 -r ab27c4bd14b9 test-data/._netMHC_summary.tsv

Binary file test-data/._netMHC_summary.tsv has changed

diff -r 9d9ab2c69014 -r ab27c4bd14b9 test-data/._pet_normalized_query_results.tsv

Binary file test-data/._pet_normalized_query_results.tsv has changed

diff -r 9d9ab2c69014 -r ab27c4bd14b9 test-data/._query_results.tsv

Binary file test-data/._query_results.tsv has changed

diff -r 9d9ab2c69014 -r ab27c4bd14b9 test-data/._regex_results.tsv

Binary file test-data/._regex_results.tsv has changed

diff -r 9d9ab2c69014 -r ab27c4bd14b9 test-data/._sales_results.tsv

Binary file test-data/._sales_results.tsv has changed

diff -r 9d9ab2c69014 -r ab27c4bd14b9 test-data/filtered_people_results.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filtered_people_results.tsv Fri Jul 14 11:39:27 2017 -0400

@@ -0,0 +1,5 @@
+1 FirstName LastName DOB Pets
+2 Paula Brown 1978-05-24 2
+3 Steven Jones 1974-04-04 1
+4 Jane Doe 1978-05-24 0
+5 James Smith 1980-10-20 1

diff -r 9d9ab2c69014 -r ab27c4bd14b9 test-data/filtered_pets_results.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filtered_pets_results.tsv Fri Jul 14 11:39:27 2017 -0400

@@ -0,0 +1,6 @@
+1 PetNames PetType
+2 Rex dog
+2 Fluff cat
+3 Allie cat
+4
+5 Spot

diff -r 9d9ab2c69014 -r ab27c4bd14b9 test-data/testdb.sqlite

Binary file test-data/testdb.sqlite has changed