Repository 'query_tabular'
hg clone https://toolshed.g2.bx.psu.edu/repos/jjohnson/query_tabular

Changeset 20:ab27c4bd14b9 (2017-07-14)
Previous changeset 19:9d9ab2c69014 (2017-07-05) Next changeset 21:357fe86f245d (2017-07-14)
Commit message:
Uploaded
modified:
query_tabular.py
query_tabular.xml
added:
filter_tabular.py
filter_tabular.xml
filters.py
load_db.py
macros.xml
query_db.py
sqlite_to_tabular.py
sqlite_to_tabular.xml
test-data/._IEDB.tsv
test-data/._filtered_people_results.tsv
test-data/._filtered_pets_results.tsv
test-data/._netMHC_summary.tsv
test-data/._pet_normalized_query_results.tsv
test-data/._query_results.tsv
test-data/._regex_results.tsv
test-data/._sales_results.tsv
test-data/filtered_people_results.tsv
test-data/filtered_pets_results.tsv
test-data/testdb.sqlite
b
diff -r 9d9ab2c69014 -r ab27c4bd14b9 filter_tabular.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/filter_tabular.py Fri Jul 14 11:39:27 2017 -0400
[
@@ -0,0 +1,71 @@
+#!/usr/bin/env python
+
+from __future__ import print_function
+
+import json
+import optparse
+import os.path
+import sys
+
+from filters import filter_file
+
+
+def __main__():
+    # Parse Command Line
+    parser = optparse.OptionParser()
+    parser.add_option('-i', '--input', dest='input', default=None,
+                      help='Input file for filtering')
+    parser.add_option('-j', '--jsonfile', dest='jsonfile', default=None,
+                      help='JSON array of filter specifications')
+    parser.add_option('-o', '--output', dest='output', default=None,
+                      help='Output file for query results')
+    parser.add_option('-v', '--verbose', dest='verbose', default=False,
+                      action='store_true',
+                      help='verbose')
+    (options, args) = parser.parse_args()
+
+    if options.input is not None:
+        try:
+            inputPath = os.path.abspath(options.input)
+            inputFile = open(inputPath, 'r')
+        except Exception as e:
+            print("failed: %s" % e, file=sys.stderr)
+            exit(3)
+    else:
+        inputFile = sys.stdin
+
+    if options.output is not None:
+        try:
+            outputPath = os.path.abspath(options.output)
+            outputFile = open(outputPath, 'w')
+        except Exception as e:
+            print("failed: %s" % e, file=sys.stderr)
+            exit(3)
+    else:
+        outputFile = sys.stdout
+
+    filters = None
+    if options.jsonfile:
+        try:
+            fh = open(options.jsonfile)
+            filters = json.load(fh)
+        except Exception as exc:
+            print("Error: %s" % exc, file=sys.stderr)
+
+    if options.verbose and filters:
+        for f in filters:
+            print('%s  %s' % (f['filter'],
+                  ', '.join(
+                  ['%s: %s' % (k, f[k])
+                   for k in set(f.keys()) - set(['filter'])])),
+                  file=sys.stdout)
+
+    try:
+        filter_file(inputFile, outputFile, filters=filters)
+    except Exception as exc:
+        print("Error: %s" % exc, file=sys.stderr)
+        exit(1)
+
+
+if __name__ == "__main__":
+    __main__()
b
diff -r 9d9ab2c69014 -r ab27c4bd14b9 filter_tabular.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/filter_tabular.xml Fri Jul 14 11:39:27 2017 -0400
[
@@ -0,0 +1,102 @@
+<tool id="filter_tabular" name="Filter Tabular" version="5.0.0">
+    <description></description>
+
+    <macros>
+         <import>macros.xml</import>
+    </macros>
+
+    <requirements>
+    </requirements>
+    <stdio>
+        <exit_code range="1:" />
+    </stdio>
+    <command><![CDATA[
+        python $__tool_directory__/filter_tabular.py 
+        -i '$input'
+        -j '$filter_json'
+        -o '$output'
+    ]]></command>
+    <configfiles>
+        <configfile name="filter_json">
+#import json
+@LINEFILTERS@
+#if $input_filters:
+#echo $json.dumps($input_filters)
+#end if
+        </configfile>
+    </configfiles>
+    <inputs>
+        <param name="input" type="data" format="tabular" label="Tabular Dataset to filter"/>
+        <expand macro="macro_line_filters" />
+    </inputs>
+    <outputs>
+        <data format="tabular" name="output" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" ftype="tabular" value="pets.tsv"/>
+            <repeat name="linefilters">
+                <param name="filter_type" value="comment"/>
+                <param name="comment_char" value="35"/>
+            </repeat>
+            <repeat name="linefilters">
+                <param name="filter_type" value="append_line_num"/>
+            </repeat>
+            <repeat name="linefilters">
+                <param name="filter_type" value="select_columns"/>
+                <param name="columns" value="7,2,3,4,1"/>
+            </repeat>
+            <repeat name="linefilters">
+                <param name="filter_type" value="replace"/>
+                <param name="column" value="c4"/>
+                <param name="regex_pattern" value="(\d+)/(\d+)/(\d+)"/>
+                <param name="regex_replace" value="19\3-\2-\1"/>
+            </repeat>
+            <output name="output" file="filtered_people_results.tsv"/>
+        </test>
+        <test>
+            <param name="input" ftype="tabular" value="pets.tsv"/>
+            <repeat name="linefilters">
+                <param name="filter_type" value="comment"/>
+                <param name="comment_char" value="35"/>
+            </repeat>
+            <repeat name="linefilters">
+                <param name="filter_type" value="append_line_num"/>
+            </repeat>
+            <repeat name="linefilters">
+                <param name="filter_type" value="select_columns"/>
+                <param name="columns" value="c7,c5,c6"/>
+            </repeat>
+            <repeat name="linefilters">
+                <param name="filter_type" value="normalize"/>
+                <param name="columns" value="c2,c3"/>
+                <param name="separator" value=","/>
+            </repeat>
+            <output name="output" file="filtered_pets_results.tsv"/>
+        </test>
+
+    </tests>
+    <help><![CDATA[
+==============
+Filter Tabular
+==============
+
+  Filter a tabular dataset by applying line filters as it is being read.
+  Multiple filters may be used with each filter using the result of the previous filter.  
+
+**Inputs**
+
+  A tabular dataset.
+
+
+**Outputs**
+
+  A filtered tabular dataset.
+
+
+@LINEFILTERS_HELP@
+
+@LINEFILTERS_HELP_EXAMPLE@
+
+    ]]></help>
+</tool>
b
diff -r 9d9ab2c69014 -r ab27c4bd14b9 filters.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/filters.py Fri Jul 14 11:39:27 2017 -0400
[
@@ -0,0 +1,159 @@
+#!/usr/binsenv python
+
+from __future__ import print_function
+
+import re
+import sys
+
+
+class LineFilter(object):
+    def __init__(self, source, filter_dict):
+        self.source = source
+        self.filter_dict = filter_dict
+        self.func = lambda i, l: l.rstrip('\r\n') if l else None
+        self.src_lines = []
+        self.src_line_cnt = 0
+        if not filter_dict:
+            return
+        if filter_dict['filter'] == 'regex':
+            rgx = re.compile(filter_dict['pattern'])
+            if filter_dict['action'] == 'exclude_match':
+                self.func = lambda i, l: l if not rgx.match(l) else None
+            elif filter_dict['action'] == 'include_match':
+                self.func = lambda i, l: l if rgx.match(l) else None
+            elif filter_dict['action'] == 'exclude_find':
+                self.func = lambda i, l: l if not rgx.search(l) else None
+            elif filter_dict['action'] == 'include_find':
+                self.func = lambda i, l: l if rgx.search(l) else None
+        elif filter_dict['filter'] == 'select_columns':
+            cols = [int(c) - 1 for c in filter_dict['columns']]
+            self.func = lambda i, l: self.select_columns(l, cols)
+        elif filter_dict['filter'] == 'replace':
+            p = filter_dict['pattern']
+            r = filter_dict['replace']
+            c = int(filter_dict['column']) - 1
+            self.func = lambda i, l: '\t'.join(
+                [x if j != c else re.sub(p, r, x) for j, x in enumerate(l.split('\t'))])
+        elif filter_dict['filter'] == 'prepend_line_num':
+            self.func = lambda i, l: '%d\t%s' % (i, l)
+        elif filter_dict['filter'] == 'append_line_num':
+            self.func = lambda i, l: '%s\t%d' % (l.rstrip('\r\n'), i)
+        elif filter_dict['filter'] == 'prepend_text':
+            s = filter_dict['column_text']
+            self.func = lambda i, l: '%s\t%s' % (s, l)
+        elif filter_dict['filter'] == 'append_text':
+            s = filter_dict['column_text']
+            self.func = lambda i, l: '%s\t%s' % (l.rstrip('\r\n'), s)
+        elif filter_dict['filter'] == 'skip':
+            cnt = filter_dict['count']
+            self.func = lambda i, l: l if i > cnt else None
+        elif filter_dict['filter'] == 'normalize':
+            cols = [int(c) - 1 for c in filter_dict['columns']]
+            sep = filter_dict['separator']
+            self.func = lambda i, l: self.normalize(l, cols, sep)
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        return next(self)
+
+    def next(self):
+        if not self.src_lines:
+            self.get_lines()
+        if self.src_lines:
+            return self.src_lines.pop(0)
+        raise StopIteration
+
+    def select_columns(self, line, cols):
+        fields = line.split('\t')
+        return '\t'.join([fields[x] for x in cols])
+
+    def normalize(self, line, split_cols, sep):
+        lines = []
+        fields = line.rstrip('\r\n').split('\t')
+        split_fields = dict()
+        cnt = 0
+        for c in split_cols:
+            if c < len(fields):
+                split_fields[c] = fields[c].split(sep)
+                cnt = max(cnt, len(split_fields[c]))
+        if cnt == 0:
+            lines.append('\t'.join(fields))
+        else:
+            for n in range(0, cnt):
+                flds = [x if c not in split_cols else split_fields[c][n]
+                        if n < len(split_fields[c])
+                        else '' for (c, x) in enumerate(fields)]
+                lines.append('\t'.join(flds))
+        return lines
+
+    def get_lines(self):
+        for i, next_line in enumerate(self.source):
+            self.src_line_cnt += 1
+            line = self.func(self.src_line_cnt, next_line)
+            if line:
+                if isinstance(line, list):
+                    self.src_lines.extend(line)
+                else:
+                    self.src_lines.append(line)
+                return
+
+
+class TabularReader:
+    """
+    Tabular file iterator. Returns a list
+    """
+    def __init__(self, input_file, skip=0, comment_char=None, col_idx=None,
+                 filters=None):
+        self.skip = skip
+        self.comment_char = comment_char
+        self.col_idx = col_idx
+        self.filters = filters
+        self.tsv_file = \
+            input_file if isinstance(input_file, file) else open(input_file)
+        if skip and skip > 0:
+            for i in range(skip):
+                if not self.tsv_file.readline():
+                    break
+        source = LineFilter(self.tsv_file, None)
+        if comment_char:
+            source = LineFilter(source,
+                                {"filter": "regex", "pattern": comment_char,
+                                 "action": "exclude_match"})
+        if filters:
+            for f in filters:
+                source = LineFilter(source, f)
+        self.source = source
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        return next(self)
+
+    def next(self):
+        ''' Iteration '''
+        for i, line in enumerate(self.source):
+            fields = line.rstrip('\r\n').split('\t')
+            if self.col_idx:
+                fields = [fields[i] for i in self.col_idx]
+            return fields
+        raise StopIteration
+
+
+def filter_file(input_file, output, skip=0, comment_char='#', filters=None):
+    data_lines = 0
+    try:
+        tr = TabularReader(input_file, skip=skip, comment_char=comment_char,
+                           filters=filters)
+        for linenum, fields in enumerate(tr):
+            data_lines += 1
+            try:
+                output.write('%s\n' % '\t'.join(fields))
+            except Exception as e:
+                print('Failed at line: %d err: %s' % (linenum, e),
+                      file=sys.stderr)
+    except Exception as e:
+        print('Failed: %s' % (e), file=sys.stderr)
+        exit(1)
b
diff -r 9d9ab2c69014 -r ab27c4bd14b9 load_db.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/load_db.py Fri Jul 14 11:39:27 2017 -0400
[
@@ -0,0 +1,136 @@
+#!/usr/bin/env python
+
+from __future__ import print_function
+
+import sys
+
+from filters import TabularReader
+
+
+def getValueType(val):
+    if val or 0. == val:
+        try:
+            int(val)
+            return 'INTEGER'
+        except:
+            try:
+                float(val)
+                return 'REAL'
+            except:
+                return 'TEXT'
+    return None
+
+
+def get_column_def(file_path, table_name, skip=0, comment_char='#',
+                   column_names=None, max_lines=100, load_named_columns=False,
+                   filters=None):
+    col_pref = ['TEXT', 'REAL', 'INTEGER', None]
+    col_types = []
+    col_idx = None
+    try:
+        tr = TabularReader(file_path, skip=skip, comment_char=comment_char,
+                           col_idx=None, filters=filters)
+        for linenum, fields in enumerate(tr):
+            if linenum > max_lines:
+                break
+            try:
+                while len(col_types) < len(fields):
+                    col_types.append(None)
+                for i, val in enumerate(fields):
+                    colType = getValueType(val)
+                    if col_pref.index(colType) < col_pref.index(col_types[i]):
+                        col_types[i] = colType
+            except Exception as e:
+                print('Failed at line: %d err: %s' % (linenum, e),
+                      file=sys.stderr)
+    except Exception as e:
+        print('Failed: %s' % (e), file=sys.stderr)
+    for i, col_type in enumerate(col_types):
+        if not col_type:
+            col_types[i] = 'TEXT'
+    if column_names:
+        col_names = []
+        if load_named_columns:
+            col_idx = []
+            for i, cname in enumerate(
+                    [cn.strip() for cn in column_names.split(',')]):
+                if cname != '':
+                    col_idx.append(i)
+                    col_names.append(cname)
+            col_types = [col_types[i] for i in col_idx]
+        else:
+            col_names = ['c%d' % i for i in range(1, len(col_types) + 1)]
+            for i, cname in enumerate(
+                    [cn.strip() for cn in column_names.split(',')]):
+                if cname and i < len(col_names):
+                    col_names[i] = cname
+    else:
+        col_names = ['c%d' % i for i in range(1, len(col_types) + 1)]
+    col_def = []
+    for i, col_name in enumerate(col_names):
+        col_def.append('%s %s' % (col_names[i], col_types[i]))
+    return col_names, col_types, col_def, col_idx
+
+
+def create_table(conn, file_path, table_name, skip=0, comment_char='#',
+                 pkey_autoincr=None, column_names=None,
+                 load_named_columns=False, filters=None,
+                 unique_indexes=[], indexes=[]):
+    col_names, col_types, col_def, col_idx = \
+        get_column_def(file_path, table_name, skip=skip,
+                       comment_char=comment_char, column_names=column_names,
+                       load_named_columns=load_named_columns, filters=filters)
+    col_func = [float if t == 'REAL' else int
+                if t == 'INTEGER' else str for t in col_types]
+    table_def = 'CREATE TABLE %s (\n    %s%s\n);' % (
+                table_name,
+                '%s INTEGER PRIMARY KEY AUTOINCREMENT,' %
+                pkey_autoincr if pkey_autoincr else '',
+                ', \n    '.join(col_def))
+    # print >> sys.stdout, table_def
+    insert_stmt = 'INSERT INTO %s(%s) VALUES(%s)' % (
+                  table_name, ','.join(col_names),
+                  ','.join(["?" for x in col_names]))
+    # print >> sys.stdout, insert_stmt
+    data_lines = 0
+    try:
+        c = conn.cursor()
+        c.execute(table_def)
+        conn.commit()
+        c.close()
+        for i, index in enumerate(unique_indexes):
+            index_name = 'idx_uniq_%s_%d' % (table_name, i)
+            index_columns = index.split(',')
+            create_index(conn, table_name, index_name, index_columns,
+                         unique=True)
+        for i, index in enumerate(indexes):
+            index_name = 'idx_%s_%d' % (table_name, i)
+            index_columns = index.split(',')
+            create_index(conn, table_name, index_name, index_columns)
+        c = conn.cursor()
+        tr = TabularReader(file_path, skip=skip, comment_char=comment_char,
+                           col_idx=col_idx, filters=filters)
+        for linenum, fields in enumerate(tr):
+            data_lines += 1
+            try:
+                vals = [col_func[i](x)
+                        if x else None for i, x in enumerate(fields)]
+                c.execute(insert_stmt, vals)
+            except Exception as e:
+                print('Failed at line: %d err: %s' % (linenum, e),
+                      file=sys.stderr)
+        conn.commit()
+        c.close()
+    except Exception as e:
+        print('Failed: %s' % (e), file=sys.stderr)
+        exit(1)
+
+
+def create_index(conn, table_name, index_name, index_columns, unique=False):
+    index_def = "CREATE %s INDEX %s on %s(%s)" % (
+                'UNIQUE' if unique else '', index_name,
+                table_name, ','.join(index_columns))
+    c = conn.cursor()
+    c.execute(index_def)
+    conn.commit()
+    c.close()
b
diff -r 9d9ab2c69014 -r ab27c4bd14b9 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Fri Jul 14 11:39:27 2017 -0400
[
b'@@ -0,0 +1,383 @@\n+<macros>\n+  <token name="@LINEFILTERS@">\n+<![CDATA[\n+  ## set linefilters to the \n+  #set $input_filters = []\n+  #for $fi in $linefilters:\n+    #if $fi.filter.filter_type == \'skip\':\n+      #set $skip_lines = None\n+      #if str($fi.filter.skip_lines) != \'\':\n+        #set $skip_lines = int($fi.filter.skip_lines)\n+      #elif $tbl.table.metadata.comment_lines and $tbl.table.metadata.comment_lines > 0:\n+        #set $skip_lines = int($tbl.table.metadata.comment_lines)\n+      #end if\n+      #if $skip_lines is not None:\n+        #set $filter_dict = dict()\n+        #set $filter_dict[\'filter\'] = str($fi.filter.filter_type)\n+        #set $filter_dict[\'count\'] = $skip_lines\n+        #silent $input_filters.append($filter_dict)\n+      #end if\n+    #elif $fi.filter.filter_type == \'comment\':\n+      #set $filter_dict = dict()\n+      #set $filter_dict[\'filter\'] = \'regex\'\n+      #set $filter_dict[\'pattern\'] = \'^(%s).*$\' % \'|\'.join([chr(int(x)).replace(\'|\',\'[|]\') for x in (str($fi.filter.comment_char)).split(\',\')])\n+      #set $filter_dict[\'action\'] = \'exclude_match\'\n+      #silent $input_filters.append($filter_dict)\n+    #elif $fi.filter.filter_type == \'regex\':\n+      #set $filter_dict = dict()\n+      #set $filter_dict[\'filter\'] = str($fi.filter.filter_type)\n+      #set $filter_dict[\'pattern\'] = str($fi.filter.regex_pattern)\n+      #set $filter_dict[\'action\'] = str($fi.filter.regex_action)\n+      #silent $input_filters.append($filter_dict)\n+    #elif $fi.filter.filter_type == \'select_columns\':\n+      #set $filter_dict = dict()\n+      #set $filter_dict[\'filter\'] = str($fi.filter.filter_type)\n+      #set $filter_dict[\'columns\'] = [int(str($ci).replace(\'c\',\'\')) for $ci in str($fi.filter.columns).split(\',\')]\n+      #silent $input_filters.append($filter_dict)\n+    #elif $fi.filter.filter_type == \'replace\':\n+      #set $filter_dict = dict()\n+      #set $filter_dict[\'filter\'] = str($fi.filter.filter_type)\n+      #set $filter_dict[\'column\'] = int(str($fi.filter.column).replace(\'c\',\'\'))\n+      #set $filter_dict[\'pattern\'] = str($fi.filter.regex_pattern)\n+      #set $filter_dict[\'replace\'] = str($fi.filter.regex_replace)\n+      #silent $input_filters.append($filter_dict)\n+    #elif str($fi.filter.filter_type).endswith(\'pend_line_num\'):\n+      #set $filter_dict = dict()\n+      #set $filter_dict[\'filter\'] = str($fi.filter.filter_type)\n+      #silent $input_filters.append($filter_dict)\n+    #elif str($fi.filter.filter_type).endswith(\'pend_text\'):\n+      #set $filter_dict = dict()\n+      #set $filter_dict[\'filter\'] = str($fi.filter.filter_type)\n+      #set $filter_dict[\'column_text\'] = str($fi.filter.column_text)\n+      #silent $input_filters.append($filter_dict)\n+    #elif $fi.filter.filter_type == \'normalize\':\n+      #set $filter_dict = dict()\n+      #set $filter_dict[\'filter\'] = str($fi.filter.filter_type)\n+      #set $filter_dict[\'columns\'] = [int(str($ci).replace(\'c\',\'\')) for $ci in str($fi.filter.columns).split(\',\')]\n+      #set $filter_dict[\'separator\'] = str($fi.filter.separator)\n+      #silent $input_filters.append($filter_dict)\n+    #end if\n+  #end for\n+]]>\n+  </token>\n+  <xml name="macro_line_filters">\n+                <repeat name="linefilters" title="Filter Tabular Input Lines">\n+                    <conditional name="filter">\n+                        <param name="filter_type" type="select" label="Filter By">\n+                            <option value="skip">skip leading lines</option>\n+                            <option value="comment">comment char</option>\n+                            <option value="regex">by regex expression matching</option>\n+                            <option value="select_columns">select columns</option>\n+                            <option value="replace">regex replace value in column</option>\n+                            <option value="prepend_line_num">prepend a line number column</option>\n+                            <option value="append_line_num">append a line number column</option>\n+         '..b'==== ============\n+    #CustomerID FirstName  LastName   Email                 DOB        Phone\n+    =========== ========== ========== ===================== ========== ============\n+    1           John       Smith      John.Smith@yahoo.com  1968-02-04 626 222-2222\n+    2           Steven     Goldfish   goldfish@fishhere.net 1974-04-04 323 455-4545\n+    3           Paula      Brown      pb@herowndomain.org   1978-05-24 416 323-3232\n+    4           James      Smith      jim@supergig.co.uk    1980-10-20 416 323-8888\n+    =========== ========== ========== ===================== ========== ============\n+  \n+   Dataset *sales*\n+  \n+    Table name: "sales"\n+  \n+    Column names: "CustomerID,Date,SaleAmount"\n+  \n+    =============  ============  ============\n+      #CustomerID    Date          SaleAmount\n+    =============  ============  ============\n+               2    2004-05-06         100.22\n+               1    2004-05-07          99.95\n+               3    2004-05-07         122.95\n+               3    2004-05-13         100.00\n+               4    2004-05-22         555.55\n+    =============  ============  ============\n+  \n+  The query\n+  \n+  ::\n+  \n+    SELECT FirstName,LastName,sum(SaleAmount) as "TotalSales" \n+    FROM customers join sales on customers.CustomerID = sales.CustomerID \n+    GROUP BY customers.CustomerID ORDER BY TotalSales DESC;\n+  \n+  Produces this tabular output:\n+  \n+    ========== ======== ==========\n+    #FirstName LastName TotalSales\n+    ========== ======== ==========\n+    James      Smith    555.55\n+    Paula      Brown    222.95\n+    Steven     Goldfish 100.22\n+    John       Smith    99.95\n+    ========== ======== ==========\n+  \n+  \n+  If the optional Table name and Column names inputs are not used, the query would be:\n+  \n+  ::\n+  \n+    SELECT t1.c2 as "FirstName", t1.c3 as "LastName", sum(t2.c3) as "TotalSales" \n+    FROM t1 join t2 on t1.c1 = t2.c1 \n+    GROUP BY t1.c1 ORDER BY TotalSales DESC;\n+  \n+  You can selectively name columns, e.g. on the customers input you could just name columns 2,3, and 5: \n+  \n+    Column names: ,FirstName,LastName,,BirthDate\n+  \n+    Results in the following data base table\n+  \n+    =========== ========== ========== ===================== ========== ============\n+    #c1         FirstName  LastName   c4                    BirthDate  c6\n+    =========== ========== ========== ===================== ========== ============\n+    1           John       Smith      John.Smith@yahoo.com  1968-02-04 626 222-2222\n+    2           Steven     Goldfish   goldfish@fishhere.net 1974-04-04 323 455-4545\n+    3           Paula      Brown      pb@herowndomain.org   1978-05-24 416 323-3232\n+    4           James      Smith      jim@supergig.co.uk    1980-10-20 416 323-8888\n+    =========== ========== ========== ===================== ========== ============\n+\n+\n+  Regular_expression_ functions are included for: \n+\n+  ::\n+\n+    matching:      re_match(\'pattern\',column) \n+\n+    SELECT t1.FirstName, t1.LastName\n+    FROM t1\n+    WHERE re_match(\'^.*\\.(net|org)$\',c4)\n+\n+  Results:\n+\n+    =========== ==========\n+    #FirstName  LastName\n+    =========== ==========\n+    Steven      Goldfish\n+    Paula       Brown\n+    =========== ==========\n+\n+\n+  ::\n+\n+    searching:     re_search(\'pattern\',column)\n+    substituting:  re_sub(\'pattern\',\'replacement,column)\n+\n+    SELECT t1.FirstName, t1.LastName, re_sub(\'^\\d{2}(\\d{2})-(\\d\\d)-(\\d\\d)\',\'\\3/\\2/\\1\',BirthDate) as "DOB"\n+    FROM t1\n+    WHERE re_search(\'[hp]er\',c4)\n+\n+  Results:\n+\n+\n+    =========== ========== ==========\n+    #FirstName  LastName   DOB\n+    =========== ========== ==========\n+    Steven      Goldfish   04/04/74\n+    Paula       Brown      24/05/78\n+    James       Smith      20/10/80\n+    =========== ========== ==========\n+\n+.. _Regular_expression: https://docs.python.org/release/2.7/library/re.html\n+.. _SQLite: http://www.sqlite.org/index.html\n+.. _SQLite_functions: http://www.sqlite.org/docs.html\n+\n+\n+]]>\n+  </token>\n+\n+</macros>\n+\n'
b
diff -r 9d9ab2c69014 -r ab27c4bd14b9 query_db.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/query_db.py Fri Jul 14 11:39:27 2017 -0400
[
@@ -0,0 +1,69 @@
+#!/usr/bin/env python
+
+from __future__ import print_function
+
+import re
+import sqlite3 as sqlite
+import sys
+
+
+TABLE_QUERY = \
+    """
+    SELECT name, sql
+    FROM sqlite_master
+    WHERE type='table'
+    ORDER BY name
+    """
+
+
+def regex_match(expr, item):
+    return re.match(expr, item) is not None
+
+
+def regex_search(expr, item):
+    return re.search(expr, item) is not None
+
+
+def regex_sub(expr, replace, item):
+    return re.sub(expr, replace, item)
+
+
+def get_connection(sqlitedb_path, addfunctions=False):
+    conn = sqlite.connect(sqlitedb_path)
+    if addfunctions:
+        conn.create_function("re_match", 2, regex_match)
+        conn.create_function("re_search", 2, regex_search)
+        conn.create_function("re_sub", 3, regex_sub)
+    return conn
+
+
+def describe_tables(conn, outputFile):
+    try:
+        c = conn.cursor()
+        tables_query = TABLE_QUERY
+        rslt = c.execute(tables_query).fetchall()
+        for table, sql in rslt:
+            print("Table %s:" % table, file=sys.stderr)
+            try:
+                col_query = 'SELECT * FROM %s LIMIT 0' % table
+                cur = conn.cursor().execute(col_query)
+                cols = [col[0] for col in cur.description]
+                print(" Columns: %s" % cols, file=sys.stderr)
+            except Exception as exc:
+                print("Error: %s" % exc, file=sys.stderr)
+    except Exception as exc:
+        print("Error: %s" % exc, file=sys.stderr)
+    exit(0)
+
+
+def run_query(conn, query, outputFile, no_header=False):
+    cur = conn.cursor()
+    results = cur.execute(query)
+    if not no_header:
+        outputFile.write("#%s\n" % '\t'.join(
+            [str(col[0]) for col in cur.description]))
+        # yield [col[0] for col in cur.description]
+    for i, row in enumerate(results):
+        # yield [val for val in row]
+        outputFile.write("%s\n" % '\t'.join(
+            [str(val) if val is not None else '' for val in row]))
b
diff -r 9d9ab2c69014 -r ab27c4bd14b9 query_tabular.py
--- a/query_tabular.py Wed Jul 05 11:51:50 2017 -0400
+++ b/query_tabular.py Fri Jul 14 11:39:27 2017 -0400
[
b'@@ -1,54 +1,37 @@\n #!/usr/bin/env python\n-"""\n-"""\n+\n+from __future__ import print_function\n+\n+import json\n+import optparse\n+import os.path\n import sys\n-import re\n-import os.path\n-import json\n-import sqlite3 as sqlite\n-import optparse\n-from optparse import OptionParser\n+\n+from load_db import create_table\n+\n+from query_db import describe_tables, get_connection, run_query\n+\n \n """\n-TODO:\n-- could read column names from comment lines, but issues with legal names\n-- could add some transformations on tabular columns,\n-  filter - skip_regex\n-  e.g. a regex to format date/time strings\n-    format: {\n-      c2 : re.sub(\'pat\', \'sub\', c2)\n-      c3 : len(c3)\n-    }\n-    def format(colname,val, expr):\n-  normalize input list columns\n-    iterate over list values creating one row per iteration\n-      option for input line_num column\n-    create associated table \n-      fk, name, value  # e.g. PSM table with list of proteins containing peptide\n-      fk, name, value[, value] # if multiple columns similarly indexed, e.g. vcf\n-- column_defs dict of columns to create from tabular input\n-    column_defs : { \'name1\' : \'expr\', \'name2\' : \'expr\'}\n-- allow multiple queries and outputs\n-  repeat min - max with up to max conditional outputs\n-\n-- add a --json input for table definitions (or yaml)\n JSON config:\n { tables : [\n     { file_path : \'/home/galaxy/dataset_101.dat\',\n             table_name : \'t1\',\n-            column_names : [\'c1\', \'c2\', \'c3\'],\n+            column_names : [\'c1\',\'c2\',\'c3\'],\n             pkey_autoincr : \'id\'\n             comment_lines : 1\n             unique: [\'c1\'],\n-            index: [\'c2\',\'c3\']\n+            index: [\'c2\', \'c3\']\n     },\n     { file_path : \'/home/galaxy/dataset_102.dat\',\n             table_name : \'gff\',\n             column_names : [\'seqname\',,\'date\',\'start\',\'end\']\n             comment_lines : 1\n             load_named_columns : True\n-            filters : [{\'filter\': \'regex\', \'pattern\': \'#peptide\', \'action\': \'exclude_match\'}, \n-                       {\'filter\': \'replace\', \'column\': 3, \'replace\': \'gi[|]\', \'pattern\': \'\'}]\n+            filters : [{\'filter\': \'regex\', \'pattern\': \'#peptide\',\n+                        \'action\': \'exclude_match\'},\n+                       {\'filter\': \'replace\', \'column\': 3,\n+                        \'replace\': \'gi[|]\', \'pattern\': \'\'}]\n     },\n     { file_path : \'/home/galaxy/dataset_103.dat\',\n             table_name : \'test\',\n@@ -59,331 +42,75 @@\n """\n \n \n-class LineFilter( object ):\n-    def __init__(self,source,filter_dict):\n-        self.source = source\n-        self.filter_dict = filter_dict\n-        # print >> sys.stderr, \'LineFilter %s\' % filter_dict if filter_dict else \'NONE\'\n-        self.func = lambda i,l: l.rstrip(\'\\r\\n\') if l else None\n-        self.src_lines = []\n-        self.src_line_cnt = 0\n-        if not filter_dict:\n-            return\n-        if filter_dict[\'filter\'] == \'regex\':\n-            rgx = re.compile(filter_dict[\'pattern\'])\n-            if filter_dict[\'action\'] == \'exclude_match\':\n-                self.func = lambda i,l: l if not rgx.match(l) else None\n-            elif filter_dict[\'action\'] == \'include_match\':\n-                self.func = lambda i,l: l if rgx.match(l) else None\n-            elif filter_dict[\'action\'] == \'exclude_find\':\n-                self.func = lambda i,l: l if not rgx.search(l) else None\n-            elif filter_dict[\'action\'] == \'include_find\':\n-                self.func = lambda i,l: l if rgx.search(l) else None\n-        elif filter_dict[\'filter\'] == \'select_columns\':\n-            cols = [int(c) - 1 for c in filter_dict[\'columns\']]\n-            self.func = lambda i,l: self.select_columns(l,cols)\n-        elif filter_dict[\'filter\'] == \'replace\':\n-            p = filter_dict[\'pattern\']\n-            r = filter_dict[\'replace\']\n-            c = int(filter_dict[\'column\']) - 1\n-            self.func = lambda i,l: \'\\t\'.join([x if i != c else re.sub(p,r,x) for i,x in enumerate(l.split(\'\\t\'))])\n-        elif filter_dict[\'filter\'] == \'prepend_line_n'..b'toincr\' in table else None\n+        create_table(get_connection(options.sqlitedb), path, table_name,\n+                     pkey_autoincr=pkey_autoincr,\n+                     column_names=column_names,\n+                     skip=comment_lines,\n+                     comment_char=comment_char,\n+                     load_named_columns=load_named_columns,\n+                     filters=filters,\n+                     unique_indexes=unique_indexes,\n+                     indexes=indexes)\n+\n     if options.jsonfile:\n         try:\n             fh = open(options.jsonfile)\n             tdef = json.load(fh)\n             if \'tables\' in tdef:\n                 for ti, table in enumerate(tdef[\'tables\']):\n-                    path = table[\'file_path\']\n-                    table_name = table[\'table_name\'] if \'table_name\' in table else \'t%d\' % (ti + 1)\n-                    comment_lines = table[\'comment_lines\'] if \'comment_lines\' in table else 0\n-                    comment_char = table[\'comment_char\'] if \'comment_char\' in table else None\n-                    column_names = table[\'column_names\'] if \'column_names\' in table else None\n-                    if column_names:\n-                        load_named_columns = table[\'load_named_columns\'] if \'load_named_columns\' in table else False\n-                    else:   \n-                        load_named_columns = False\n-                    unique_indexes = table[\'unique\'] if \'unique\' in table else []\n-                    indexes = table[\'index\'] if \'index\' in table else []\n-                    filters = table[\'filters\'] if \'filters\' in table else None\n-                    pkey_autoincr = table[\'pkey_autoincr\'] if \'pkey_autoincr\' in table else None\n-                    create_table(conn, path, table_name, pkey_autoincr=pkey_autoincr, column_names=column_names, \n-                                 skip=comment_lines, comment_char=comment_char, load_named_columns=load_named_columns, \n-                                 filters=filters,unique_indexes=unique_indexes, indexes=indexes)\n-        except Exception, exc:\n-            print >> sys.stderr, "Error: %s" % exc\n-    conn.close()\n+                    _create_table(ti, table)\n+        except Exception as exc:\n+            print("Error: %s" % exc, file=sys.stderr)\n \n     query = None\n     if (options.query_file is not None):\n@@ -395,32 +122,18 @@\n         query = options.query\n \n     if (query is None):\n-        tables_query = \\\n-            "SELECT name, sql FROM sqlite_master WHERE type=\'table\' ORDER BY name"\n+        try:\n+            describe_tables(get_connection(options.sqlitedb), outputFile)\n+        except Exception as exc:\n+            print("Error: %s" % exc, file=sys.stderr)\n+    else:\n         try:\n-            conn = get_connection(options.sqlitedb)\n-            c = conn.cursor()\n-            rslt = c.execute(tables_query).fetchall()\n-            for table, sql in rslt:\n-                print >> sys.stderr, "Table %s:" % table\n-                try:\n-                    col_query = \'SELECT * FROM %s LIMIT 0\' % table\n-                    cur = conn.cursor().execute(col_query)\n-                    cols = [col[0] for col in cur.description]\n-                    print >> sys.stderr, " Columns: %s" % cols\n-                except Exception, exc:\n-                    print >> sys.stderr, "Error: %s" % exc\n-        except Exception, exc:\n-            print >> sys.stderr, "Error: %s" % exc\n-        exit(0)\n-    # if not sqlite.is_read_only_query(query):\n-    #    print >> sys.stderr, "Error: Must be a read only query"\n-    #    exit(2)\n-    try:\n-        run_query(query,outputFile)\n-    except Exception, exc:\n-        print >> sys.stderr, "Error: %s" % exc\n-        exit(1)\n+            run_query(get_connection(options.sqlitedb), query, outputFile,\n+                      no_header=options.no_header)\n+        except Exception as exc:\n+            print("Error: %s" % exc, file=sys.stderr)\n+            exit(1)\n+\n \n if __name__ == "__main__":\n     __main__()\n'
b
diff -r 9d9ab2c69014 -r ab27c4bd14b9 query_tabular.xml
--- a/query_tabular.xml Wed Jul 05 11:51:50 2017 -0400
+++ b/query_tabular.xml Fri Jul 14 11:39:27 2017 -0400
[
b'@@ -1,6 +1,10 @@\n-<tool id="query_tabular" name="Query Tabular" version="4.0.0">\n+<tool id="query_tabular" name="Query Tabular" version="5.0.0">\n     <description>using sqlite sql</description>\n \n+    <macros>\n+         <import>macros.xml</import>\n+    </macros>\n+\n     <requirements>\n     </requirements>\n     <stdio>\n@@ -76,62 +80,8 @@\n   #if len($idx_non) > 0:\n     #set $jtbl[\'index\'] = $idx_non\n   #end if\n-  #set $input_filters = []\n-  #for $fi in $tbl.input_opts.linefilters:\n-    #if $fi.filter.filter_type == \'skip\':\n-      #set $skip_lines = None\n-      #if str($fi.filter.skip_lines) != \'\':\n-        #set $skip_lines = int($fi.filter.skip_lines)\n-      #elif $tbl.table.metadata.comment_lines and $tbl.table.metadata.comment_lines > 0:\n-        #set $skip_lines = int($tbl.table.metadata.comment_lines)\n-      #end if\n-      #if $skip_lines is not None:\n-        #set $filter_dict = dict()\n-        #set $filter_dict[\'filter\'] = str($fi.filter.filter_type)\n-        #set $filter_dict[\'count\'] = $skip_lines\n-        #silent $input_filters.append($filter_dict)\n-      #end if\n-    #elif $fi.filter.filter_type == \'comment\':\n-      #set $filter_dict = dict()\n-      #set $filter_dict[\'filter\'] = \'regex\'\n-      #set $filter_dict[\'pattern\'] = \'^(%s).*$\' % \'|\'.join([chr(int(x)).replace(\'|\',\'[|]\') for x in (str($fi.filter.comment_char)).split(\',\')])\n-      #set $filter_dict[\'action\'] = \'exclude_match\'\n-      #silent $input_filters.append($filter_dict)\n-    #elif $fi.filter.filter_type == \'regex\':\n-      #set $filter_dict = dict()\n-      #set $filter_dict[\'filter\'] = str($fi.filter.filter_type)\n-      #set $filter_dict[\'pattern\'] = str($fi.filter.regex_pattern)\n-      #set $filter_dict[\'action\'] = str($fi.filter.regex_action)\n-      #silent $input_filters.append($filter_dict)\n-    #elif $fi.filter.filter_type == \'select_columns\':\n-      #set $filter_dict = dict()\n-      #set $filter_dict[\'filter\'] = str($fi.filter.filter_type)\n-      #set $filter_dict[\'columns\'] = [int(str($ci).replace(\'c\',\'\')) for $ci in str($fi.filter.columns).split(\',\')]\n-      #silent $input_filters.append($filter_dict)\n-    #elif $fi.filter.filter_type == \'replace\':\n-      #set $filter_dict = dict()\n-      #set $filter_dict[\'filter\'] = str($fi.filter.filter_type)\n-      #set $filter_dict[\'column\'] = int(str($fi.filter.column).replace(\'c\',\'\'))\n-      #set $filter_dict[\'pattern\'] = str($fi.filter.regex_pattern)\n-      #set $filter_dict[\'replace\'] = str($fi.filter.regex_replace)\n-      #silent $input_filters.append($filter_dict)\n-    #elif str($fi.filter.filter_type).endswith(\'pend_line_num\'):\n-      #set $filter_dict = dict()\n-      #set $filter_dict[\'filter\'] = str($fi.filter.filter_type)\n-      #silent $input_filters.append($filter_dict)\n-    #elif str($fi.filter.filter_type).endswith(\'pend_text\'):\n-      #set $filter_dict = dict()\n-      #set $filter_dict[\'filter\'] = str($fi.filter.filter_type)\n-      #set $filter_dict[\'column_text\'] = str($fi.filter.column_text)\n-      #silent $input_filters.append($filter_dict)\n-    #elif $fi.filter.filter_type == \'normalize\':\n-      #set $filter_dict = dict()\n-      #set $filter_dict[\'filter\'] = str($fi.filter.filter_type)\n-      #set $filter_dict[\'columns\'] = [int(str($ci).replace(\'c\',\'\')) for $ci in str($fi.filter.columns).split(\',\')]\n-      #set $filter_dict[\'separator\'] = str($fi.filter.separator)\n-      #silent $input_filters.append($filter_dict)\n-    #end if\n-  #end for\n+  #set $linefilters = $tbl.input_opts.linefilters\n+  @LINEFILTERS@\n   #if $input_filters:\n     #set $jtbl[\'filters\'] = $input_filters\n   #end if\n@@ -149,99 +99,7 @@\n         <repeat name="tables" title="Database Table" min="0">\n             <param name="table" type="data" format="tabular" label="Tabular Dataset for Table"/>\n             <section name="input_opts" expanded="false" title="Filter Dataset Input">\n-                <repeat name="linefilters" title="Filter Tabular Input Lines">\n-                    <conditional name="filter">\n-                      '..b'05/78  Rex,Fluff dog,cat\n-    1    Steven              Jones      04/04/74  Allie     cat\n-    0    Jane                Doe        24/05/78  \n-    1    James               Smith      20/10/80  Spot\n-\n-\n-    Filter 1 - append a line number column:\n-\n-    #People with pets                                                 1\n-    Pets FirstName           LastName   DOB       PetNames  PetType   2\n-    2    Paula               Brown      24/05/78  Rex,Fluff dog,cat   3\n-    1    Steven              Jones      04/04/74  Allie     cat       4\n-    0    Jane                Doe        24/05/78                      5\n-    1    James               Smith      20/10/80  Spot                6\n-\n-    Filter 2 - by regex expression matching [include]: \'^\\d+\' (include lines that start with a number) \n-\n-    2    Paula               Brown      24/05/78  Rex,Fluff dog,cat   3\n-    1    Steven              Jones      04/04/74  Allie     cat       4\n-    0    Jane                Doe        24/05/78                      5\n-    1    James               Smith      20/10/80  Spot                6\n-\n-    Filter 3 - append a line number column:\n-\n-    2    Paula               Brown      24/05/78  Rex,Fluff dog,cat   3  1\n-    1    Steven              Jones      04/04/74  Allie     cat       4  2\n-    0    Jane                Doe        24/05/78                      5  3\n-    1    James               Smith      20/10/80  Spot                6  4\n-\n-    Filter 4 - regex replace value in column[4]: \'(\\d+)/(\\d+)/(\\d+)\' \'19\\3-\\2-\\1\' (convert dates to sqlite format) \n-\n-    2    Paula               Brown      1978-05-24  Rex,Fluff dog,cat   3  1\n-    1    Steven              Jones      1974-04-04  Allie     cat       4  2\n-    0    Jane                Doe        1978-05-24                      5  3\n-    1    James               Smith      1980-10-20  Spot                6  4\n-\n-    Filter 5 - normalize list columns[5,6]:\n-\n-    2    Paula               Brown      1978-05-24  Rex       dog       3  1\n-    2    Paula               Brown      1978-05-24  Fluff     cat       3  1\n-    1    Steven              Jones      1974-04-04  Allie     cat       4  2\n-    0    Jane                Doe        1978-05-24                      5  3\n-    1    James               Smith      1980-10-20  Spot                6  4\n-\n-    Filter 6 - append a line number column:\n-\n-    2    Paula               Brown      1978-05-24  Rex       dog       3  1  1\n-    2    Paula               Brown      1978-05-24  Fluff     cat       3  1  2\n-    1    Steven              Jones      1974-04-04  Allie     cat       4  2  3\n-    0    Jane                Doe        1978-05-24                      5  3  4\n-    1    James               Smith      1980-10-20  Spot                6  4  5\n+@LINEFILTERS_HELP_EXAMPLE@\n \n \n   Table name: pets\n@@ -634,6 +297,13 @@\n \n **Normalizing by Line Filtering into 2 Tables** \n \n+*Relational database opertions work with single-valued column entries.  \n+To apply relational operations to tabular files that contain fields with lists of values,\n+we need to "normalize" those fields, duplicating lines for each item in the list.  \n+In this example we create 2 tables, one for single-valued fields and a second with list-valued fields normalized.  \n+Becauce we add a line number first for each table, we can join the 2 tables on the line number column.*\n+https://en.wikipedia.org/wiki/First_normal_form \n+\n     *People Table*\n \n       ::\n@@ -679,7 +349,7 @@\n       ==  ========  ========\n \n \n-    Query: SELECT FirstName,LastName,PetName FROM People join Pet on People.id = Pet.id WHERE PetType = \'cat\';     \n+    Query: SELECT FirstName,LastName,PetName FROM People JOIN Pet ON People.id = Pet.id WHERE PetType = \'cat\';     \n \n     Result:\n \n@@ -690,8 +360,6 @@\n      Steven     Jones     Allie   \n      =========  ========  ========\n \n-.. _Regular_expression: https://docs.python.org/release/2.7/library/re.html\n-.. _SQLite: http://www.sqlite.org/index.html\n \n     ]]></help>\n </tool>\n'
b
diff -r 9d9ab2c69014 -r ab27c4bd14b9 sqlite_to_tabular.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sqlite_to_tabular.py Fri Jul 14 11:39:27 2017 -0400
b
@@ -0,0 +1,64 @@
+#!/usr/bin/env python
+
+from __future__ import print_function
+
+import optparse
+import os.path
+import sys
+
+from query_db import describe_tables, get_connection, run_query
+
+
+def __main__():
+    # Parse Command Line
+    parser = optparse.OptionParser()
+    parser.add_option('-s', '--sqlitedb', dest='sqlitedb', default=None,
+                      help='The SQLite Database')
+    parser.add_option('-q', '--query', dest='query', default=None,
+                      help='SQL query')
+    parser.add_option('-Q', '--query_file', dest='query_file', default=None,
+                      help='SQL query file')
+    parser.add_option('-n', '--no_header', dest='no_header', default=False,
+                      action='store_true',
+                      help='Include a column headers line')
+    parser.add_option('-o', '--output', dest='output', default=None,
+                      help='Output file for query results')
+    (options, args) = parser.parse_args()
+
+    # determine output destination
+    if options.output is not None:
+        try:
+            outputPath = os.path.abspath(options.output)
+            outputFile = open(outputPath, 'w')
+        except Exception as e:
+            print("failed: %s" % e, file=sys.stderr)
+            exit(3)
+    else:
+        outputFile = sys.stdout
+
+    query = None
+    if (options.query_file is not None):
+        with open(options.query_file, 'r') as fh:
+            query = ''
+            for line in fh:
+                query += line
+    elif (options.query is not None):
+        query = options.query
+
+    if (query is None):
+        try:
+            describe_tables(get_connection(options.sqlitedb), outputFile)
+        except Exception as exc:
+            print("Error: %s" % exc, file=sys.stderr)
+        exit(0)
+    else:
+        try:
+            run_query(get_connection(options.sqlitedb), query, outputFile,
+                      no_header=options.no_header)
+        except Exception as exc:
+            print("Error: %s" % exc, file=sys.stderr)
+            exit(1)
+
+
+if __name__ == "__main__":
+    __main__()
b
diff -r 9d9ab2c69014 -r ab27c4bd14b9 sqlite_to_tabular.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sqlite_to_tabular.xml Fri Jul 14 11:39:27 2017 -0400
[
@@ -0,0 +1,66 @@
+<?xml version="1.0"?>
+<tool id="sqlite_to_tabular" name="SQLite to tabular" version="5.0.0">
+    <description>for SQL query</description>
+
+    <macros>
+         <import>macros.xml</import>
+    </macros>
+
+    <stdio>
+        <exit_code range="1:" level="fatal" description="Error" />
+    </stdio>
+    <command><![CDATA[
+    python $__tool_directory__/sqlite_to_tabular.py 
+    --sqlitedb="$sqlitedb" 
+    --query_file="$query_file"
+    $no_header 
+    --output="$query_results"
+    ]]></command>
+    <configfiles>
+        <configfile name="query_file">
+$sqlquery
+        </configfile>
+    </configfiles>
+    <inputs>
+        <param name="sqlitedb" type="data" format="sqlite" label="SQLite Database"/>
+        <param name="sqlquery" type="text" area="True" size="120x20" label="SQL query">
+            <validator type="regex">(?ims)^\s*SELECT\s.*\sFROM\s.*$</validator>
+            <sanitizer sanitize="False"/>
+        </param>
+        <param name="no_header" type="boolean" truevalue="-n" falsevalue="" checked="False" label="Omit column headers"/>
+    </inputs>
+    <outputs>
+        <data name="query_results" format="tabular" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="sqlitedb" ftype="sqlite" value="testdb.sqlite" />
+            <param name="sqlquery" value="SELECT first_name, last_name, age FROM contacts WHERE first_name = 'Sam'" />
+            <output name="query_results">
+                <assert_contents>
+                    <has_text text="Smith" />
+                    <not_has_text text="Doe" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+=================
+SQLite to Tabular
+=================
+
+**Inputs**
+
+  An existing SQLite_ data base.
+
+
+**Outputs**
+
+  The results of a SQL query are output to the history as a tabular file.
+
+
+
+@QUERY_HELP@
+
+    ]]></help>
+</tool>
b
diff -r 9d9ab2c69014 -r ab27c4bd14b9 test-data/._IEDB.tsv
b
Binary file test-data/._IEDB.tsv has changed
b
diff -r 9d9ab2c69014 -r ab27c4bd14b9 test-data/._filtered_people_results.tsv
b
Binary file test-data/._filtered_people_results.tsv has changed
b
diff -r 9d9ab2c69014 -r ab27c4bd14b9 test-data/._filtered_pets_results.tsv
b
Binary file test-data/._filtered_pets_results.tsv has changed
b
diff -r 9d9ab2c69014 -r ab27c4bd14b9 test-data/._netMHC_summary.tsv
b
Binary file test-data/._netMHC_summary.tsv has changed
b
diff -r 9d9ab2c69014 -r ab27c4bd14b9 test-data/._pet_normalized_query_results.tsv
b
Binary file test-data/._pet_normalized_query_results.tsv has changed
b
diff -r 9d9ab2c69014 -r ab27c4bd14b9 test-data/._query_results.tsv
b
Binary file test-data/._query_results.tsv has changed
b
diff -r 9d9ab2c69014 -r ab27c4bd14b9 test-data/._regex_results.tsv
b
Binary file test-data/._regex_results.tsv has changed
b
diff -r 9d9ab2c69014 -r ab27c4bd14b9 test-data/._sales_results.tsv
b
Binary file test-data/._sales_results.tsv has changed
b
diff -r 9d9ab2c69014 -r ab27c4bd14b9 test-data/filtered_people_results.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filtered_people_results.tsv Fri Jul 14 11:39:27 2017 -0400
b
@@ -0,0 +1,5 @@
+1 FirstName LastName DOB Pets
+2 Paula Brown 1978-05-24 2
+3 Steven Jones 1974-04-04 1
+4 Jane Doe 1978-05-24 0
+5 James Smith 1980-10-20 1
b
diff -r 9d9ab2c69014 -r ab27c4bd14b9 test-data/filtered_pets_results.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filtered_pets_results.tsv Fri Jul 14 11:39:27 2017 -0400
b
@@ -0,0 +1,6 @@
+1 PetNames PetType
+2 Rex dog
+2 Fluff cat
+3 Allie cat
+4
+5 Spot
b
diff -r 9d9ab2c69014 -r ab27c4bd14b9 test-data/testdb.sqlite
b
Binary file test-data/testdb.sqlite has changed