Mercurial > repos > jjohnson > query_tabular

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/filter_tabular.py	Fri Jul 14 11:39:27 2017 -0400
@@ -0,0 +1,71 @@
+#!/usr/bin/env python
+
+from __future__ import print_function
+
+import json
+import optparse
+import os.path
+import sys
+
+from filters import filter_file
+
+
+def __main__():
+    # Parse Command Line
+    parser = optparse.OptionParser()
+    parser.add_option('-i', '--input', dest='input', default=None,
+                      help='Input file for filtering')
+    parser.add_option('-j', '--jsonfile', dest='jsonfile', default=None,
+                      help='JSON array of filter specifications')
+    parser.add_option('-o', '--output', dest='output', default=None,
+                      help='Output file for query results')
+    parser.add_option('-v', '--verbose', dest='verbose', default=False,
+                      action='store_true',
+                      help='verbose')
+    (options, args) = parser.parse_args()
+
+    if options.input is not None:
+        try:
+            inputPath = os.path.abspath(options.input)
+            inputFile = open(inputPath, 'r')
+        except Exception as e:
+            print("failed: %s" % e, file=sys.stderr)
+            exit(3)
+    else:
+        inputFile = sys.stdin
+
+    if options.output is not None:
+        try:
+            outputPath = os.path.abspath(options.output)
+            outputFile = open(outputPath, 'w')
+        except Exception as e:
+            print("failed: %s" % e, file=sys.stderr)
+            exit(3)
+    else:
+        outputFile = sys.stdout
+
+    filters = None
+    if options.jsonfile:
+        try:
+            fh = open(options.jsonfile)
+            filters = json.load(fh)
+        except Exception as exc:
+            print("Error: %s" % exc, file=sys.stderr)
+
+    if options.verbose and filters:
+        for f in filters:
+            print('%s  %s' % (f['filter'],
+                  ', '.join(
+                  ['%s: %s' % (k, f[k])
+                   for k in set(f.keys()) - set(['filter'])])),
+                  file=sys.stdout)
+
+    try:
+        filter_file(inputFile, outputFile, filters=filters)
+    except Exception as exc:
+        print("Error: %s" % exc, file=sys.stderr)
+        exit(1)
+
+
+if __name__ == "__main__":
+    __main__()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/filter_tabular.xml	Fri Jul 14 11:39:27 2017 -0400
@@ -0,0 +1,102 @@
+<tool id="filter_tabular" name="Filter Tabular" version="5.0.0">
+    <description></description>
+
+    <macros>
+         <import>macros.xml</import>
+    </macros>
+
+    <requirements>
+    </requirements>
+    <stdio>
+        <exit_code range="1:" />
+    </stdio>
+    <command><![CDATA[
+        python $__tool_directory__/filter_tabular.py
+        -i '$input'
+        -j '$filter_json'
+        -o '$output'
+    ]]></command>
+    <configfiles>
+        <configfile name="filter_json">
+#import json
+@LINEFILTERS@
+#if $input_filters:
+#echo $json.dumps($input_filters)
+#end if
+        </configfile>
+    </configfiles>
+    <inputs>
+        <param name="input" type="data" format="tabular" label="Tabular Dataset to filter"/>
+        <expand macro="macro_line_filters" />
+    </inputs>
+    <outputs>
+        <data format="tabular" name="output" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" ftype="tabular" value="pets.tsv"/>
+            <repeat name="linefilters">
+                <param name="filter_type" value="comment"/>
+                <param name="comment_char" value="35"/>
+            </repeat>
+            <repeat name="linefilters">
+                <param name="filter_type" value="append_line_num"/>
+            </repeat>
+            <repeat name="linefilters">
+                <param name="filter_type" value="select_columns"/>
+                <param name="columns" value="7,2,3,4,1"/>
+            </repeat>
+            <repeat name="linefilters">
+                <param name="filter_type" value="replace"/>
+                <param name="column" value="c4"/>
+                <param name="regex_pattern" value="(\d+)/(\d+)/(\d+)"/>
+                <param name="regex_replace" value="19\3-\2-\1"/>
+            </repeat>
+            <output name="output" file="filtered_people_results.tsv"/>
+        </test>
+        <test>
+            <param name="input" ftype="tabular" value="pets.tsv"/>
+            <repeat name="linefilters">
+                <param name="filter_type" value="comment"/>
+                <param name="comment_char" value="35"/>
+            </repeat>
+            <repeat name="linefilters">
+                <param name="filter_type" value="append_line_num"/>
+            </repeat>
+            <repeat name="linefilters">
+                <param name="filter_type" value="select_columns"/>
+                <param name="columns" value="c7,c5,c6"/>
+            </repeat>
+            <repeat name="linefilters">
+                <param name="filter_type" value="normalize"/>
+                <param name="columns" value="c2,c3"/>
+                <param name="separator" value=","/>
+            </repeat>
+            <output name="output" file="filtered_pets_results.tsv"/>
+        </test>
+
+    </tests>
+    <help><![CDATA[
+==============
+Filter Tabular
+==============
+
+  Filter a tabular dataset by applying line filters as it is being read.
+  Multiple filters may be used with each filter using the result of the previous filter.
+
+**Inputs**
+
+  A tabular dataset.
+
+
+**Outputs**
+
+  A filtered tabular dataset.
+
+
+@LINEFILTERS_HELP@
+
+@LINEFILTERS_HELP_EXAMPLE@
+
+    ]]></help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/filters.py	Fri Jul 14 11:39:27 2017 -0400
@@ -0,0 +1,159 @@
+#!/usr/binsenv python
+
+from __future__ import print_function
+
+import re
+import sys
+
+
+class LineFilter(object):
+    def __init__(self, source, filter_dict):
+        self.source = source
+        self.filter_dict = filter_dict
+        self.func = lambda i, l: l.rstrip('\r\n') if l else None
+        self.src_lines = []
+        self.src_line_cnt = 0
+        if not filter_dict:
+            return
+        if filter_dict['filter'] == 'regex':
+            rgx = re.compile(filter_dict['pattern'])
+            if filter_dict['action'] == 'exclude_match':
+                self.func = lambda i, l: l if not rgx.match(l) else None
+            elif filter_dict['action'] == 'include_match':
+                self.func = lambda i, l: l if rgx.match(l) else None
+            elif filter_dict['action'] == 'exclude_find':
+                self.func = lambda i, l: l if not rgx.search(l) else None
+            elif filter_dict['action'] == 'include_find':
+                self.func = lambda i, l: l if rgx.search(l) else None
+        elif filter_dict['filter'] == 'select_columns':
+            cols = [int(c) - 1 for c in filter_dict['columns']]
+            self.func = lambda i, l: self.select_columns(l, cols)
+        elif filter_dict['filter'] == 'replace':
+            p = filter_dict['pattern']
+            r = filter_dict['replace']
+            c = int(filter_dict['column']) - 1
+            self.func = lambda i, l: '\t'.join(
+                [x if j != c else re.sub(p, r, x) for j, x in enumerate(l.split('\t'))])
+        elif filter_dict['filter'] == 'prepend_line_num':
+            self.func = lambda i, l: '%d\t%s' % (i, l)
+        elif filter_dict['filter'] == 'append_line_num':
+            self.func = lambda i, l: '%s\t%d' % (l.rstrip('\r\n'), i)
+        elif filter_dict['filter'] == 'prepend_text':
+            s = filter_dict['column_text']
+            self.func = lambda i, l: '%s\t%s' % (s, l)
+        elif filter_dict['filter'] == 'append_text':
+            s = filter_dict['column_text']
+            self.func = lambda i, l: '%s\t%s' % (l.rstrip('\r\n'), s)
+        elif filter_dict['filter'] == 'skip':
+            cnt = filter_dict['count']
+            self.func = lambda i, l: l if i > cnt else None
+        elif filter_dict['filter'] == 'normalize':
+            cols = [int(c) - 1 for c in filter_dict['columns']]
+            sep = filter_dict['separator']
+            self.func = lambda i, l: self.normalize(l, cols, sep)
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        return next(self)
+
+    def next(self):
+        if not self.src_lines:
+            self.get_lines()
+        if self.src_lines:
+            return self.src_lines.pop(0)
+        raise StopIteration
+
+    def select_columns(self, line, cols):
+        fields = line.split('\t')
+        return '\t'.join([fields[x] for x in cols])
+
+    def normalize(self, line, split_cols, sep):
+        lines = []
+        fields = line.rstrip('\r\n').split('\t')
+        split_fields = dict()
+        cnt = 0
+        for c in split_cols:
+            if c < len(fields):
+                split_fields[c] = fields[c].split(sep)
+                cnt = max(cnt, len(split_fields[c]))
+        if cnt == 0:
+            lines.append('\t'.join(fields))
+        else:
+            for n in range(0, cnt):
+                flds = [x if c not in split_cols else split_fields[c][n]
+                        if n < len(split_fields[c])
+                        else '' for (c, x) in enumerate(fields)]
+                lines.append('\t'.join(flds))
+        return lines
+
+    def get_lines(self):
+        for i, next_line in enumerate(self.source):
+            self.src_line_cnt += 1
+            line = self.func(self.src_line_cnt, next_line)
+            if line:
+                if isinstance(line, list):
+                    self.src_lines.extend(line)
+                else:
+                    self.src_lines.append(line)
+                return
+
+
+class TabularReader:
+    """
+    Tabular file iterator. Returns a list
+    """
+    def __init__(self, input_file, skip=0, comment_char=None, col_idx=None,
+                 filters=None):
+        self.skip = skip
+        self.comment_char = comment_char
+        self.col_idx = col_idx
+        self.filters = filters
+        self.tsv_file = \
+            input_file if isinstance(input_file, file) else open(input_file)
+        if skip and skip > 0:
+            for i in range(skip):
+                if not self.tsv_file.readline():
+                    break
+        source = LineFilter(self.tsv_file, None)
+        if comment_char:
+            source = LineFilter(source,
+                                {"filter": "regex", "pattern": comment_char,
+                                 "action": "exclude_match"})
+        if filters:
+            for f in filters:
+                source = LineFilter(source, f)
+        self.source = source
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        return next(self)
+
+    def next(self):
+        ''' Iteration '''
+        for i, line in enumerate(self.source):
+            fields = line.rstrip('\r\n').split('\t')
+            if self.col_idx:
+                fields = [fields[i] for i in self.col_idx]
+            return fields
+        raise StopIteration
+
+
+def filter_file(input_file, output, skip=0, comment_char='#', filters=None):
+    data_lines = 0
+    try:
+        tr = TabularReader(input_file, skip=skip, comment_char=comment_char,
+                           filters=filters)
+        for linenum, fields in enumerate(tr):
+            data_lines += 1
+            try:
+                output.write('%s\n' % '\t'.join(fields))
+            except Exception as e:
+                print('Failed at line: %d err: %s' % (linenum, e),
+                      file=sys.stderr)
+    except Exception as e:
+        print('Failed: %s' % (e), file=sys.stderr)
+        exit(1)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/load_db.py	Fri Jul 14 11:39:27 2017 -0400
@@ -0,0 +1,136 @@
+#!/usr/bin/env python
+
+from __future__ import print_function
+
+import sys
+
+from filters import TabularReader
+
+
+def getValueType(val):
+    if val or 0. == val:
+        try:
+            int(val)
+            return 'INTEGER'
+        except:
+            try:
+                float(val)
+                return 'REAL'
+            except:
+                return 'TEXT'
+    return None
+
+
+def get_column_def(file_path, table_name, skip=0, comment_char='#',
+                   column_names=None, max_lines=100, load_named_columns=False,
+                   filters=None):
+    col_pref = ['TEXT', 'REAL', 'INTEGER', None]
+    col_types = []
+    col_idx = None
+    try:
+        tr = TabularReader(file_path, skip=skip, comment_char=comment_char,
+                           col_idx=None, filters=filters)
+        for linenum, fields in enumerate(tr):
+            if linenum > max_lines:
+                break
+            try:
+                while len(col_types) < len(fields):
+                    col_types.append(None)
+                for i, val in enumerate(fields):
+                    colType = getValueType(val)
+                    if col_pref.index(colType) < col_pref.index(col_types[i]):
+                        col_types[i] = colType
+            except Exception as e:
+                print('Failed at line: %d err: %s' % (linenum, e),
+                      file=sys.stderr)
+    except Exception as e:
+        print('Failed: %s' % (e), file=sys.stderr)
+    for i, col_type in enumerate(col_types):
+        if not col_type:
+            col_types[i] = 'TEXT'
+    if column_names:
+        col_names = []
+        if load_named_columns:
+            col_idx = []
+            for i, cname in enumerate(
+                    [cn.strip() for cn in column_names.split(',')]):
+                if cname != '':
+                    col_idx.append(i)
+                    col_names.append(cname)
+            col_types = [col_types[i] for i in col_idx]
+        else:
+            col_names = ['c%d' % i for i in range(1, len(col_types) + 1)]
+            for i, cname in enumerate(
+                    [cn.strip() for cn in column_names.split(',')]):
+                if cname and i < len(col_names):
+                    col_names[i] = cname
+    else:
+        col_names = ['c%d' % i for i in range(1, len(col_types) + 1)]
+    col_def = []
+    for i, col_name in enumerate(col_names):
+        col_def.append('%s %s' % (col_names[i], col_types[i]))
+    return col_names, col_types, col_def, col_idx
+
+
+def create_table(conn, file_path, table_name, skip=0, comment_char='#',
+                 pkey_autoincr=None, column_names=None,
+                 load_named_columns=False, filters=None,
+                 unique_indexes=[], indexes=[]):
+    col_names, col_types, col_def, col_idx = \
+        get_column_def(file_path, table_name, skip=skip,
+                       comment_char=comment_char, column_names=column_names,
+                       load_named_columns=load_named_columns, filters=filters)
+    col_func = [float if t == 'REAL' else int
+                if t == 'INTEGER' else str for t in col_types]
+    table_def = 'CREATE TABLE %s (\n    %s%s\n);' % (
+                table_name,
+                '%s INTEGER PRIMARY KEY AUTOINCREMENT,' %
+                pkey_autoincr if pkey_autoincr else '',
+                ', \n    '.join(col_def))
+    # print >> sys.stdout, table_def
+    insert_stmt = 'INSERT INTO %s(%s) VALUES(%s)' % (
+                  table_name, ','.join(col_names),
+                  ','.join(["?" for x in col_names]))
+    # print >> sys.stdout, insert_stmt
+    data_lines = 0
+    try:
+        c = conn.cursor()
+        c.execute(table_def)
+        conn.commit()
+        c.close()
+        for i, index in enumerate(unique_indexes):
+            index_name = 'idx_uniq_%s_%d' % (table_name, i)
+            index_columns = index.split(',')
+            create_index(conn, table_name, index_name, index_columns,
+                         unique=True)
+        for i, index in enumerate(indexes):
+            index_name = 'idx_%s_%d' % (table_name, i)
+            index_columns = index.split(',')
+            create_index(conn, table_name, index_name, index_columns)
+        c = conn.cursor()
+        tr = TabularReader(file_path, skip=skip, comment_char=comment_char,
+                           col_idx=col_idx, filters=filters)
+        for linenum, fields in enumerate(tr):
+            data_lines += 1
+            try:
+                vals = [col_func[i](x)
+                        if x else None for i, x in enumerate(fields)]
+                c.execute(insert_stmt, vals)
+            except Exception as e:
+                print('Failed at line: %d err: %s' % (linenum, e),
+                      file=sys.stderr)
+        conn.commit()
+        c.close()
+    except Exception as e:
+        print('Failed: %s' % (e), file=sys.stderr)
+        exit(1)
+
+
+def create_index(conn, table_name, index_name, index_columns, unique=False):
+    index_def = "CREATE %s INDEX %s on %s(%s)" % (
+                'UNIQUE' if unique else '', index_name,
+                table_name, ','.join(index_columns))
+    c = conn.cursor()
+    c.execute(index_def)
+    conn.commit()
+    c.close()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Fri Jul 14 11:39:27 2017 -0400
@@ -0,0 +1,383 @@
+<macros>
+  <token name="@LINEFILTERS@">
+<![CDATA[
+  ## set linefilters to the
+  #set $input_filters = []
+  #for $fi in $linefilters:
+    #if $fi.filter.filter_type == 'skip':
+      #set $skip_lines = None
+      #if str($fi.filter.skip_lines) != '':
+        #set $skip_lines = int($fi.filter.skip_lines)
+      #elif $tbl.table.metadata.comment_lines and $tbl.table.metadata.comment_lines > 0:
+        #set $skip_lines = int($tbl.table.metadata.comment_lines)
+      #end if
+      #if $skip_lines is not None:
+        #set $filter_dict = dict()
+        #set $filter_dict['filter'] = str($fi.filter.filter_type)
+        #set $filter_dict['count'] = $skip_lines
+        #silent $input_filters.append($filter_dict)
+      #end if
+    #elif $fi.filter.filter_type == 'comment':
+      #set $filter_dict = dict()
+      #set $filter_dict['filter'] = 'regex'
+      #set $filter_dict['pattern'] = '^(%s).*$' % '|'.join([chr(int(x)).replace('|','[|]') for x in (str($fi.filter.comment_char)).split(',')])
+      #set $filter_dict['action'] = 'exclude_match'
+      #silent $input_filters.append($filter_dict)
+    #elif $fi.filter.filter_type == 'regex':
+      #set $filter_dict = dict()
+      #set $filter_dict['filter'] = str($fi.filter.filter_type)
+      #set $filter_dict['pattern'] = str($fi.filter.regex_pattern)
+      #set $filter_dict['action'] = str($fi.filter.regex_action)
+      #silent $input_filters.append($filter_dict)
+    #elif $fi.filter.filter_type == 'select_columns':
+      #set $filter_dict = dict()
+      #set $filter_dict['filter'] = str($fi.filter.filter_type)
+      #set $filter_dict['columns'] = [int(str($ci).replace('c','')) for $ci in str($fi.filter.columns).split(',')]
+      #silent $input_filters.append($filter_dict)
+    #elif $fi.filter.filter_type == 'replace':
+      #set $filter_dict = dict()
+      #set $filter_dict['filter'] = str($fi.filter.filter_type)
+      #set $filter_dict['column'] = int(str($fi.filter.column).replace('c',''))
+      #set $filter_dict['pattern'] = str($fi.filter.regex_pattern)
+      #set $filter_dict['replace'] = str($fi.filter.regex_replace)
+      #silent $input_filters.append($filter_dict)
+    #elif str($fi.filter.filter_type).endswith('pend_line_num'):
+      #set $filter_dict = dict()
+      #set $filter_dict['filter'] = str($fi.filter.filter_type)
+      #silent $input_filters.append($filter_dict)
+    #elif str($fi.filter.filter_type).endswith('pend_text'):
+      #set $filter_dict = dict()
+      #set $filter_dict['filter'] = str($fi.filter.filter_type)
+      #set $filter_dict['column_text'] = str($fi.filter.column_text)
+      #silent $input_filters.append($filter_dict)
+    #elif $fi.filter.filter_type == 'normalize':
+      #set $filter_dict = dict()
+      #set $filter_dict['filter'] = str($fi.filter.filter_type)
+      #set $filter_dict['columns'] = [int(str($ci).replace('c','')) for $ci in str($fi.filter.columns).split(',')]
+      #set $filter_dict['separator'] = str($fi.filter.separator)
+      #silent $input_filters.append($filter_dict)
+    #end if
+  #end for
+]]>
+  </token>
+  <xml name="macro_line_filters">
+                <repeat name="linefilters" title="Filter Tabular Input Lines">
+                    <conditional name="filter">
+                        <param name="filter_type" type="select" label="Filter By">
+                            <option value="skip">skip leading lines</option>
+                            <option value="comment">comment char</option>
+                            <option value="regex">by regex expression matching</option>
+                            <option value="select_columns">select columns</option>
+                            <option value="replace">regex replace value in column</option>
+                            <option value="prepend_line_num">prepend a line number column</option>
+                            <option value="append_line_num">append a line number column</option>
+                            <option value="prepend_text">prepend a column with the given text</option>
+                            <option value="append_text">append a column with the given text</option>
+                            <option value="normalize">normalize list columns, replicates row for each item in list</option>
+                        </param>
+                        <when value="skip">
+                             <param name="skip_lines" type="integer" value="" min="0" optional="true" label="Skip lines"
+                                 help="Leave blank to use the comment lines metadata for this dataset" />
+                        </when>
+                        <when value="comment">
+                            <param name="comment_char" type="select" display="checkboxes" multiple="True" label="Ignore lines beginning with these characters" help="lines beginning with these are skipped">
+                                <option value="62">&gt;</option>
+                                <option value="64">@</option>
+                                <option value="43">+</option>
+                                <option value="60">&lt;</option>
+                                <option value="42">*</option>
+                                <option value="45">-</option>
+                                <option value="61">=</option>
+                                <option value="124">|</option>
+                                <option value="63">?</option>
+                                <option value="36">$</option>
+                                <option value="46">.</option>
+                                <option value="58">:</option>
+                                <option value="38">&amp;</option>
+                                <option value="37">%</option>
+                                <option value="94">^</option>
+                                <option value="35">&#35;</option>
+                                <option value="33">!</option>
+                            </param>
+                        </when>
+                        <when value="prepend_line_num"/>
+                        <when value="append_line_num"/>
+                        <when value="prepend_text">
+                            <param name="column_text" type="text" value="" label="text for column">
+                            </param>
+                        </when>
+                        <when value="append_text">
+                            <param name="column_text" type="text" value="" label="text for column">
+                            </param>
+                        </when>
+                        <when value="regex">
+                            <param name="regex_pattern" type="text" value="" label="regex pattern">
+                                <sanitizer sanitize="False"/>
+                            </param>
+                            <param name="regex_action" type="select" label="action for regex match">
+                                <option value="exclude_match">exclude line on pattern match</option>
+                                <option value="include_match">include line on pattern match</option>
+                                <option value="exclude_find">exclude line if pattern found</option>
+                                <option value="include_find">include line if pattern found</option>
+                            </param>
+                        </when>
+                        <when value="select_columns">
+                            <param name="columns" type="text" value="" label="enter column numbers to keep"
+                                help="example: 1,4,2 or c1,c4,c2(selects the first,fourth, and second columns)">
+                                <validator type="regex" message="Column ordinal positions separated by commas">^(c?[1-9]\d*)(,c?[1-9]\d*)*$</validator>
+                            </param>
+                        </when>
+                        <when value="replace">
+                            <param name="column" type="text" value="" label="enter column number to replace"
+                                help="example: 1 or c1 (selects the first column)">
+                                <validator type="regex" message="Column ordinal position separated by commas">^(c?[1-9]\d*)$</validator>
+                            </param>
+                            <param name="regex_pattern" type="text" value="" label="regex pattern">
+                                <sanitizer sanitize="False"/>
+                            </param>
+                            <param name="regex_replace" type="text" value="" label="replacement expression">
+                                <sanitizer sanitize="False"/>
+                            </param>
+                        </when>
+                        <when value="normalize">
+                            <param name="columns" type="text" value="" label="enter column numbers to normalize">
+                                <help><![CDATA[
+                                example: 2,4 or c2,c4 (selects the second, and fourth columns)
+                                If multiple columns are selected, they should have the same length and separator on each line
+                                ]]></help>
+                                <validator type="regex" message="Column ordinal positions separated by commas">^(c?[1-9]\d*)(,c?[1-9]\d*)*$</validator>
+                            </param>
+                            <param name="separator" type="text" value="," label="List item delimiter in column">
+                                <sanitizer sanitize="False"/>
+                                <validator type="regex" message="Anything but TAB or Newline">^[^\t\n\r\f\v]+$</validator>
+                            </param>
+                        </when>
+                    </conditional>
+                </repeat>
+  </xml>
+
+  <token name="@LINEFILTERS_HELP@">
+<![CDATA[
+**Input Line Filters**
+
+  As a tabular file is being read, line filters may be applied.
+
+  ::
+
+  - skip leading lines              skip the first *number* of lines
+  - comment char                    omit any lines that start with the specified comment character
+  - by regex expression matching    *include/exclude* lines the match the regex expression
+  - select columns                  choose to include only selected columns in the order specified
+  - regex replace value in column   replace a field in a column using a regex substitution (good for date reformatting)
+  - prepend a line number column    each line has the ordinal value of the line read by this filter as the first column
+  - append a line number column     each line has the ordinal value of the line read by this filter as the last column
+  - prepend a text column           each line has the text string as the first column
+  - append a text column            each line has the text string as the last column
+  - normalize list columns          replicates the line for each item in the specified list *columns*
+]]>
+  </token>
+
+  <token name="@LINEFILTERS_HELP_EXAMPLE@">
+<![CDATA[
+**Line Filtering Example**
+  *(Six filters are applied as the following file is read)*
+
+  ::
+
+    Input Tabular File:
+
+    #People with pets
+    Pets FirstName           LastName   DOB       PetNames  PetType
+    2    Paula               Brown      24/05/78  Rex,Fluff dog,cat
+    1    Steven              Jones      04/04/74  Allie     cat
+    0    Jane                Doe        24/05/78
+    1    James               Smith      20/10/80  Spot
+
+
+    Filter 1 - append a line number column:
+
+    #People with pets                                                 1
+    Pets FirstName           LastName   DOB       PetNames  PetType   2
+    2    Paula               Brown      24/05/78  Rex,Fluff dog,cat   3
+    1    Steven              Jones      04/04/74  Allie     cat       4
+    0    Jane                Doe        24/05/78                      5
+    1    James               Smith      20/10/80  Spot                6
+
+    Filter 2 - by regex expression matching [include]: '^\d+' (include lines that start with a number)
+
+    2    Paula               Brown      24/05/78  Rex,Fluff dog,cat   3
+    1    Steven              Jones      04/04/74  Allie     cat       4
+    0    Jane                Doe        24/05/78                      5
+    1    James               Smith      20/10/80  Spot                6
+
+    Filter 3 - append a line number column:
+
+    2    Paula               Brown      24/05/78  Rex,Fluff dog,cat   3  1
+    1    Steven              Jones      04/04/74  Allie     cat       4  2
+    0    Jane                Doe        24/05/78                      5  3
+    1    James               Smith      20/10/80  Spot                6  4
+
+    Filter 4 - regex replace value in column[4]: '(\d+)/(\d+)/(\d+)' '19\3-\2-\1' (convert dates to sqlite format)
+
+    2    Paula               Brown      1978-05-24  Rex,Fluff dog,cat   3  1
+    1    Steven              Jones      1974-04-04  Allie     cat       4  2
+    0    Jane                Doe        1978-05-24                      5  3
+    1    James               Smith      1980-10-20  Spot                6  4
+
+    Filter 5 - normalize list columns[5,6]:
+
+    2    Paula               Brown      1978-05-24  Rex       dog       3  1
+    2    Paula               Brown      1978-05-24  Fluff     cat       3  1
+    1    Steven              Jones      1974-04-04  Allie     cat       4  2
+    0    Jane                Doe        1978-05-24                      5  3
+    1    James               Smith      1980-10-20  Spot                6  4
+
+    Filter 6 - append a line number column:
+
+    2    Paula               Brown      1978-05-24  Rex       dog       3  1  1
+    2    Paula               Brown      1978-05-24  Fluff     cat       3  1  2
+    1    Steven              Jones      1974-04-04  Allie     cat       4  2  3
+    0    Jane                Doe        1978-05-24                      5  3  4
+    1    James               Smith      1980-10-20  Spot                6  4  5
+
+]]>
+  </token>
+
+  <token name="@QUERY_HELP@">
+<![CDATA[
+
+For help in using SQLite_ see:  http://www.sqlite.org/docs.html
+
+**NOTE:** input for SQLite dates input field must be in the format: *YYYY-MM-DD* for example: 2015-09-30
+
+See: http://www.sqlite.org/lang_datefunc.html
+
+**Example**
+
+  Given 2 tabular datasets: *customers* and *sales*
+
+   Dataset *customers*
+
+    Table name: "customers"
+
+    Column names: "CustomerID,FirstName,LastName,Email,DOB,Phone"
+
+    =========== ========== ========== ===================== ========== ============
+    #CustomerID FirstName  LastName   Email                 DOB        Phone
+    =========== ========== ========== ===================== ========== ============
+    1           John       Smith      John.Smith@yahoo.com  1968-02-04 626 222-2222
+    2           Steven     Goldfish   goldfish@fishhere.net 1974-04-04 323 455-4545
+    3           Paula      Brown      pb@herowndomain.org   1978-05-24 416 323-3232
+    4           James      Smith      jim@supergig.co.uk    1980-10-20 416 323-8888
+    =========== ========== ========== ===================== ========== ============
+
+   Dataset *sales*
+
+    Table name: "sales"
+
+    Column names: "CustomerID,Date,SaleAmount"
+
+    =============  ============  ============
+      #CustomerID    Date          SaleAmount
+    =============  ============  ============
+               2    2004-05-06         100.22
+               1    2004-05-07          99.95
+               3    2004-05-07         122.95
+               3    2004-05-13         100.00
+               4    2004-05-22         555.55
+    =============  ============  ============
+
+  The query
+
+  ::
+
+    SELECT FirstName,LastName,sum(SaleAmount) as "TotalSales"
+    FROM customers join sales on customers.CustomerID = sales.CustomerID
+    GROUP BY customers.CustomerID ORDER BY TotalSales DESC;
+
+  Produces this tabular output:
+
+    ========== ======== ==========
+    #FirstName LastName TotalSales
+    ========== ======== ==========
+    James      Smith    555.55
+    Paula      Brown    222.95
+    Steven     Goldfish 100.22
+    John       Smith    99.95
+    ========== ======== ==========
+
+
+  If the optional Table name and Column names inputs are not used, the query would be:
+
+  ::
+
+    SELECT t1.c2 as "FirstName", t1.c3 as "LastName", sum(t2.c3) as "TotalSales"
+    FROM t1 join t2 on t1.c1 = t2.c1
+    GROUP BY t1.c1 ORDER BY TotalSales DESC;
+
+  You can selectively name columns, e.g. on the customers input you could just name columns 2,3, and 5:
+
+    Column names: ,FirstName,LastName,,BirthDate
+
+    Results in the following data base table
+
+    =========== ========== ========== ===================== ========== ============
+    #c1         FirstName  LastName   c4                    BirthDate  c6
+    =========== ========== ========== ===================== ========== ============
+    1           John       Smith      John.Smith@yahoo.com  1968-02-04 626 222-2222
+    2           Steven     Goldfish   goldfish@fishhere.net 1974-04-04 323 455-4545
+    3           Paula      Brown      pb@herowndomain.org   1978-05-24 416 323-3232
+    4           James      Smith      jim@supergig.co.uk    1980-10-20 416 323-8888
+    =========== ========== ========== ===================== ========== ============
+
+
+  Regular_expression_ functions are included for:
+
+  ::
+
+    matching:      re_match('pattern',column)
+
+    SELECT t1.FirstName, t1.LastName
+    FROM t1
+    WHERE re_match('^.*\.(net|org)$',c4)
+
+  Results:
+
+    =========== ==========
+    #FirstName  LastName
+    =========== ==========
+    Steven      Goldfish
+    Paula       Brown
+    =========== ==========
+
+
+  ::
+
+    searching:     re_search('pattern',column)
+    substituting:  re_sub('pattern','replacement,column)
+
+    SELECT t1.FirstName, t1.LastName, re_sub('^\d{2}(\d{2})-(\d\d)-(\d\d)','\3/\2/\1',BirthDate) as "DOB"
+    FROM t1
+    WHERE re_search('[hp]er',c4)
+
+  Results:
+
+
+    =========== ========== ==========
+    #FirstName  LastName   DOB
+    =========== ========== ==========
+    Steven      Goldfish   04/04/74
+    Paula       Brown      24/05/78
+    James       Smith      20/10/80
+    =========== ========== ==========
+
+.. _Regular_expression: https://docs.python.org/release/2.7/library/re.html
+.. _SQLite: http://www.sqlite.org/index.html
+.. _SQLite_functions: http://www.sqlite.org/docs.html
+
+
+]]>
+  </token>
+
+</macros>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/query_db.py	Fri Jul 14 11:39:27 2017 -0400
@@ -0,0 +1,69 @@
+#!/usr/bin/env python
+
+from __future__ import print_function
+
+import re
+import sqlite3 as sqlite
+import sys
+
+
+TABLE_QUERY = \
+    """
+    SELECT name, sql
+    FROM sqlite_master
+    WHERE type='table'
+    ORDER BY name
+    """
+
+
+def regex_match(expr, item):
+    return re.match(expr, item) is not None
+
+
+def regex_search(expr, item):
+    return re.search(expr, item) is not None
+
+
+def regex_sub(expr, replace, item):
+    return re.sub(expr, replace, item)
+
+
+def get_connection(sqlitedb_path, addfunctions=False):
+    conn = sqlite.connect(sqlitedb_path)
+    if addfunctions:
+        conn.create_function("re_match", 2, regex_match)
+        conn.create_function("re_search", 2, regex_search)
+        conn.create_function("re_sub", 3, regex_sub)
+    return conn
+
+
+def describe_tables(conn, outputFile):
+    try:
+        c = conn.cursor()
+        tables_query = TABLE_QUERY
+        rslt = c.execute(tables_query).fetchall()
+        for table, sql in rslt:
+            print("Table %s:" % table, file=sys.stderr)
+            try:
+                col_query = 'SELECT * FROM %s LIMIT 0' % table
+                cur = conn.cursor().execute(col_query)
+                cols = [col[0] for col in cur.description]
+                print(" Columns: %s" % cols, file=sys.stderr)
+            except Exception as exc:
+                print("Error: %s" % exc, file=sys.stderr)
+    except Exception as exc:
+        print("Error: %s" % exc, file=sys.stderr)
+    exit(0)
+
+
+def run_query(conn, query, outputFile, no_header=False):
+    cur = conn.cursor()
+    results = cur.execute(query)
+    if not no_header:
+        outputFile.write("#%s\n" % '\t'.join(
+            [str(col[0]) for col in cur.description]))
+        # yield [col[0] for col in cur.description]
+    for i, row in enumerate(results):
+        # yield [val for val in row]
+        outputFile.write("%s\n" % '\t'.join(
+            [str(val) if val is not None else '' for val in row]))
--- a/query_tabular.py	Wed Jul 05 11:51:50 2017 -0400
+++ b/query_tabular.py	Fri Jul 14 11:39:27 2017 -0400
@@ -1,54 +1,37 @@
 #!/usr/bin/env python
-"""
-"""
+
+from __future__ import print_function
+
+import json
+import optparse
+import os.path
 import sys
-import re
-import os.path
-import json
-import sqlite3 as sqlite
-import optparse
-from optparse import OptionParser
+
+from load_db import create_table
+
+from query_db import describe_tables, get_connection, run_query
+

 """
-TODO:
-- could read column names from comment lines, but issues with legal names
-- could add some transformations on tabular columns,
-  filter - skip_regex
-  e.g. a regex to format date/time strings
-    format: {
-      c2 : re.sub('pat', 'sub', c2)
-      c3 : len(c3)
-    }
-    def format(colname,val, expr):
-  normalize input list columns
-    iterate over list values creating one row per iteration
-      option for input line_num column
-    create associated table
-      fk, name, value  # e.g. PSM table with list of proteins containing peptide
-      fk, name, value[, value] # if multiple columns similarly indexed, e.g. vcf
-- column_defs dict of columns to create from tabular input
-    column_defs : { 'name1' : 'expr', 'name2' : 'expr'}
-- allow multiple queries and outputs
-  repeat min - max with up to max conditional outputs
-
-- add a --json input for table definitions (or yaml)
 JSON config:
 { tables : [
     { file_path : '/home/galaxy/dataset_101.dat',
             table_name : 't1',
-            column_names : ['c1', 'c2', 'c3'],
+            column_names : ['c1','c2','c3'],
             pkey_autoincr : 'id'
             comment_lines : 1
             unique: ['c1'],
-            index: ['c2','c3']
+            index: ['c2', 'c3']
     },
     { file_path : '/home/galaxy/dataset_102.dat',
             table_name : 'gff',
             column_names : ['seqname',,'date','start','end']
             comment_lines : 1
             load_named_columns : True
-            filters : [{'filter': 'regex', 'pattern': '#peptide', 'action': 'exclude_match'},
-                       {'filter': 'replace', 'column': 3, 'replace': 'gi[|]', 'pattern': ''}]
+            filters : [{'filter': 'regex', 'pattern': '#peptide',
+                        'action': 'exclude_match'},
+                       {'filter': 'replace', 'column': 3,
+                        'replace': 'gi[|]', 'pattern': ''}]
     },
     { file_path : '/home/galaxy/dataset_103.dat',
             table_name : 'test',
@@ -59,331 +42,75 @@
 """


-class LineFilter( object ):
-    def __init__(self,source,filter_dict):
-        self.source = source
-        self.filter_dict = filter_dict
-        # print >> sys.stderr, 'LineFilter %s' % filter_dict if filter_dict else 'NONE'
-        self.func = lambda i,l: l.rstrip('\r\n') if l else None
-        self.src_lines = []
-        self.src_line_cnt = 0
-        if not filter_dict:
-            return
-        if filter_dict['filter'] == 'regex':
-            rgx = re.compile(filter_dict['pattern'])
-            if filter_dict['action'] == 'exclude_match':
-                self.func = lambda i,l: l if not rgx.match(l) else None
-            elif filter_dict['action'] == 'include_match':
-                self.func = lambda i,l: l if rgx.match(l) else None
-            elif filter_dict['action'] == 'exclude_find':
-                self.func = lambda i,l: l if not rgx.search(l) else None
-            elif filter_dict['action'] == 'include_find':
-                self.func = lambda i,l: l if rgx.search(l) else None
-        elif filter_dict['filter'] == 'select_columns':
-            cols = [int(c) - 1 for c in filter_dict['columns']]
-            self.func = lambda i,l: self.select_columns(l,cols)
-        elif filter_dict['filter'] == 'replace':
-            p = filter_dict['pattern']
-            r = filter_dict['replace']
-            c = int(filter_dict['column']) - 1
-            self.func = lambda i,l: '\t'.join([x if i != c else re.sub(p,r,x) for i,x in enumerate(l.split('\t'))])
-        elif filter_dict['filter'] == 'prepend_line_num':
-            self.func = lambda i,l: '%d\t%s' % (i,l)
-        elif filter_dict['filter'] == 'append_line_num':
-            self.func = lambda i,l: '%s\t%d' % (l.rstrip('\r\n'),i)
-        elif filter_dict['filter'] == 'prepend_text':
-            s = filter_dict['column_text']
-            self.func = lambda i,l: '%s\t%s' % (s,l)
-        elif filter_dict['filter'] == 'append_text':
-            s = filter_dict['column_text']
-            self.func = lambda i,l: '%s\t%s' % (l.rstrip('\r\n'),s)
-        elif filter_dict['filter'] == 'skip':
-            cnt = filter_dict['count']
-            self.func = lambda i,l: l if i > cnt else None
-        elif filter_dict['filter'] == 'normalize':
-            cols = [int(c) - 1 for c in filter_dict['columns']]
-            sep = filter_dict['separator']
-            self.func = lambda i,l: self.normalize(l,cols,sep)
-    def __iter__(self):
-        return self
-    def select_columns(self,line,cols):
-        fields = line.split('\t')
-        return '\t'.join([fields[x] for x in cols])
-    def normalize(self,line,split_cols,sep):
-        lines = []
-        fields = line.rstrip('\r\n').split('\t')
-        split_fields = dict()
-        cnt = 0
-        for c in split_cols:
-            if c < len(fields):
-                split_fields[c] = fields[c].split(sep)
-                cnt = max(cnt, len(split_fields[c]))
-        if cnt == 0:
-            lines.append('\t'.join(fields))
-        else:
-            for n in range(0, cnt):
-                flds = [x if c not in split_cols else split_fields[c][n] if n < len(split_fields[c]) else ''  for (c, x) in enumerate(fields)]
-                lines.append('\t'.join(flds))
-        return lines
-    def get_lines(self):
-        for i,next_line in enumerate(self.source):
-            self.src_line_cnt += 1
-            line = self.func(self.src_line_cnt,next_line)
-            # print >> sys.stderr, 'LineFilter %s: %d %s' % (str(self.filter_dict),self.src_line_cnt,line)
-            if line:
-               if isinstance(line,list):
-                   self.src_lines.extend(line)
-               else:
-                   self.src_lines.append(line)
-               return
-    def next(self):
-        if not self.src_lines:
-            self.get_lines()
-        if self.src_lines:
-                return self.src_lines.pop(0)
-        raise StopIteration
-
-
-class TabularReader:
-    """
-    Tabular file iterator. Returns a list
-    """
-    def __init__(self, file_path, skip=0, comment_char=None, col_idx=None, filters=None):
-        self.skip = skip
-        self.comment_char = comment_char
-        self.col_idx = col_idx
-        self.filters = filters
-        self.tsv_file = open(file_path)
-        if skip and skip > 0:
-            for i in range(skip):
-                if not self.tsv_file.readline():
-                    break
-        source = LineFilter(self.tsv_file,None)
-        if comment_char:
-            source = LineFilter(source,{"filter": "regex", "pattern": comment_char, "action": "exclude_match"})
-        if filters:
-            for f in filters:
-                source = LineFilter(source,f)
-        self.source = source
-    def __iter__(self):
-        return self
-    def next(self):
-        ''' Iteration '''
-        for i,line in enumerate(self.source):
-            fields = line.rstrip('\r\n').split('\t')
-            if self.col_idx:
-                fields = [fields[i] for i in self.col_idx]
-            return fields
-        raise StopIteration
-
-
-def getValueType(val):
-    if val or 0. == val:
-        try:
-            int(val)
-            return 'INTEGER'
-        except:
-            try:
-                float(val)
-                return 'REAL'
-            except:
-                return 'TEXT'
-    return None
-
-
-def get_column_def(file_path, table_name, skip=0, comment_char='#',
-                   column_names=None, max_lines=100,load_named_columns=False,filters=None):
-    col_pref = ['TEXT', 'REAL', 'INTEGER', None]
-    col_types = []
-    col_idx = None
-    data_lines = 0
-    try:
-        tr = TabularReader(file_path,skip=skip, comment_char=comment_char, col_idx=None, filters=filters)
-        for linenum, fields in enumerate(tr):
-            if linenum > max_lines:
-                break
-            try:
-                while len(col_types) < len(fields):
-                    col_types.append(None)
-                for i, val in enumerate(fields):
-                    colType = getValueType(val)
-                    if col_pref.index(colType) < col_pref.index(col_types[i]):
-                        col_types[i] = colType
-            except Exception, e:
-                print >> sys.stderr, 'Failed at line: %d err: %s' % (linenum, e)
-    except Exception, e:
-        print >> sys.stderr, 'Failed: %s' % (e)
-    for i,col_type in enumerate(col_types):
-        if not col_type:
-            col_types[i] = 'TEXT'
-    if column_names:
-        col_names = []
-        if load_named_columns:
-            col_idx = []
-            for i, cname in enumerate([cn.strip() for cn in column_names.split(',')]):
-                if cname != '':
-                    col_idx.append(i)
-                    col_names.append(cname)
-            col_types = [col_types[i] for i in col_idx]
-        else:
-            col_names = ['c%d' % i for i in range(1, len(col_types) + 1)]
-            for i, cname in enumerate([cn.strip() for cn in column_names.split(',')]):
-                if cname and i < len(col_names):
-                    col_names[i] = cname
-    else:
-        col_names = ['c%d' % i for i in range(1, len(col_types) + 1)]
-    col_def = []
-    for i, col_name in enumerate(col_names):
-        col_def.append('%s %s' % (col_names[i], col_types[i]))
-    return col_names, col_types, col_def, col_idx
-
-
-def create_table(conn, file_path, table_name, skip=0, comment_char='#', pkey_autoincr=None, column_names=None,load_named_columns=False,filters=None,unique_indexes=[],indexes=[]):
-
-    col_names, col_types, col_def, col_idx = get_column_def(file_path, table_name, skip=skip, comment_char=comment_char,
-        column_names=column_names,load_named_columns=load_named_columns,filters=filters)
-    col_func = [float if t == 'REAL' else int if t == 'INTEGER' else str for t in col_types]
-    table_def = 'CREATE TABLE %s (\n    %s%s\n);' % (
-                table_name,
-                '%s INTEGER PRIMARY KEY AUTOINCREMENT,' % pkey_autoincr if pkey_autoincr else '',
-                ', \n    '.join(col_def))
-    # print >> sys.stdout, table_def
-    insert_stmt = 'INSERT INTO %s(%s) VALUES(%s)' % (table_name, ','.join(col_names), ','.join(["?" for x in col_names]))
-    # print >> sys.stdout, insert_stmt
-    data_lines = 0
-    try:
-        c = conn.cursor()
-        c.execute(table_def)
-        conn.commit()
-        c.close()
-        for i,index in enumerate(unique_indexes):
-            index_name='idx_uniq_%s_%d' % (table_name,i)
-            index_columns = index.split(',')
-            create_index(conn, table_name, index_name, index_columns,unique=True)
-        for i,index in enumerate(indexes):
-            index_name='idx_%s_%d' % (table_name,i)
-            index_columns = index.split(',')
-            create_index(conn, table_name, index_name, index_columns)
-        c = conn.cursor()
-        tr = TabularReader(file_path,skip=skip, comment_char=comment_char, col_idx=col_idx, filters=filters)
-        for linenum, fields in enumerate(tr):
-            data_lines += 1
-            try:
-                vals = [col_func[i](x) if x else None for i, x in enumerate(fields)]
-                c.execute(insert_stmt, vals)
-            except Exception, e:
-                print >> sys.stderr, 'Failed at line: %d err: %s' % (linenum, e)
-        conn.commit()
-        c.close()
-    except Exception, e:
-        print >> sys.stderr, 'Failed: %s' % (e)
-        exit(1)
-
-
-def create_index(conn, table_name, index_name, index_columns, unique=False):
-    index_def = "CREATE %s INDEX %s on %s(%s)" % ('UNIQUE' if unique else '', index_name, table_name, ','.join(index_columns))
-    c = conn.cursor()
-    c.execute(index_def)
-    conn.commit()
-    c.close()
-
-
-def regex_match(expr, item):
-    return re.match(expr, item) is not None
-
-
-def regex_search(expr, item):
-    return re.search(expr, item) is not None
-
-
-def regex_sub(expr, replace, item):
-    return re.sub(expr, replace, item)
-
-
-def get_connection(sqlitedb_path, addfunctions=False):
-    conn = sqlite.connect(sqlitedb_path)
-    if addfunctions:
-        conn.create_function("re_match", 2, regex_match)
-        conn.create_function("re_search", 2, regex_search)
-        conn.create_function("re_sub", 3, regex_sub)
-    return conn
-
-
 def __main__():
     # Parse Command Line
     parser = optparse.OptionParser()
-    parser.add_option('-s', '--sqlitedb', dest='sqlitedb', default=None, help='The SQLite Database')
-    parser.add_option('-t', '--table', dest='tables', action="append", default=[], help='Tabular file: file_path[=table_name[:column_name, ...]')
-    parser.add_option('-j', '--jsonfile', dest='jsonfile', default=None, help='Tabular file: file_path[=table_name[:column_name, ...]')
-    parser.add_option('-q', '--query', dest='query', default=None, help='SQL query')
-    parser.add_option('-Q', '--query_file', dest='query_file', default=None, help='SQL query file')
-    parser.add_option('-n', '--no_header', dest='no_header', action='store_true', default=False, help='Include a column headers line')
-    parser.add_option('-o', '--output', dest='output', default=None, help='Output file for query results')
+    parser.add_option('-s', '--sqlitedb', dest='sqlitedb', default=None,
+                      help='The SQLite Database')
+    parser.add_option('-j', '--jsonfile', dest='jsonfile', default=None,
+                      help='JSON dict of table specifications')
+    parser.add_option('-q', '--query', dest='query', default=None,
+                      help='SQL query')
+    parser.add_option('-Q', '--query_file', dest='query_file', default=None,
+                      help='SQL query file')
+    parser.add_option('-n', '--no_header', dest='no_header', default=False,
+                      action='store_true',
+                      help='Include a column headers line')
+    parser.add_option('-o', '--output', dest='output', default=None,
+                      help='Output file for query results')
     (options, args) = parser.parse_args()

-    def run_query(query,outputFile):
-        conn = get_connection(options.sqlitedb, addfunctions=True)
-        cur = conn.cursor()
-        results = cur.execute(query)
-        if not options.no_header:
-            outputFile.write("#%s\n" % '\t'.join([str(col[0]) for col in cur.description]))
-            # yield [col[0] for col in cur.description]
-        for i, row in enumerate(results):
-            # yield [val for val in row]
-            outputFile.write("%s\n" % '\t'.join([str(val) if val is not None else '' for val in row]))
-
-    # open sqlite connection
-    conn = get_connection(options.sqlitedb)
     # determine output destination
     if options.output is not None:
         try:
             outputPath = os.path.abspath(options.output)
             outputFile = open(outputPath, 'w')
-        except Exception, e:
-            print >> sys.stderr, "failed: %s" % e
+        except Exception as e:
+            print("failed: %s" % e, file=sys.stderr)
             exit(3)
     else:
         outputFile = sys.stdout

-    # get table defs
-    if options.tables:
-        for ti, table in enumerate(options.tables):
-            table_name = 't%d' % (ti + 1)
-            column_names = None
-            fields = table.split('=')
-            path = fields[0]
-            if len(fields) > 1:
-                names = fields[1].split(':')
-                table_name = names[0] if names[0] else table_name
-                if len(names) > 1:
-                    column_names = names[1]
-            # print >> sys.stdout, '%s %s' % (table_name, path)
-            create_table(conn, path, table_name, column_names=column_names)
+    def _create_table(ti, table):
+        path = table['file_path']
+        table_name =\
+            table['table_name'] if 'table_name' in table else 't%d' % (ti + 1)
+        comment_lines =\
+            table['comment_lines'] if 'comment_lines' in table else 0
+        comment_char =\
+            table['comment_char'] if 'comment_char' in table else None
+        column_names =\
+            table['column_names'] if 'column_names' in table else None
+        if column_names:
+            load_named_columns =\
+                table['load_named_columns']\
+                if 'load_named_columns' in table else False
+        else:
+            load_named_columns = False
+        unique_indexes = table['unique'] if 'unique' in table else []
+        indexes = table['index'] if 'index' in table else []
+        filters = table['filters'] if 'filters' in table else None
+        pkey_autoincr = \
+            table['pkey_autoincr'] if 'pkey_autoincr' in table else None
+        create_table(get_connection(options.sqlitedb), path, table_name,
+                     pkey_autoincr=pkey_autoincr,
+                     column_names=column_names,
+                     skip=comment_lines,
+                     comment_char=comment_char,
+                     load_named_columns=load_named_columns,
+                     filters=filters,
+                     unique_indexes=unique_indexes,
+                     indexes=indexes)
+
     if options.jsonfile:
         try:
             fh = open(options.jsonfile)
             tdef = json.load(fh)
             if 'tables' in tdef:
                 for ti, table in enumerate(tdef['tables']):
-                    path = table['file_path']
-                    table_name = table['table_name'] if 'table_name' in table else 't%d' % (ti + 1)
-                    comment_lines = table['comment_lines'] if 'comment_lines' in table else 0
-                    comment_char = table['comment_char'] if 'comment_char' in table else None
-                    column_names = table['column_names'] if 'column_names' in table else None
-                    if column_names:
-                        load_named_columns = table['load_named_columns'] if 'load_named_columns' in table else False
-                    else:
-                        load_named_columns = False
-                    unique_indexes = table['unique'] if 'unique' in table else []
-                    indexes = table['index'] if 'index' in table else []
-                    filters = table['filters'] if 'filters' in table else None
-                    pkey_autoincr = table['pkey_autoincr'] if 'pkey_autoincr' in table else None
-                    create_table(conn, path, table_name, pkey_autoincr=pkey_autoincr, column_names=column_names,
-                                 skip=comment_lines, comment_char=comment_char, load_named_columns=load_named_columns,
-                                 filters=filters,unique_indexes=unique_indexes, indexes=indexes)
-        except Exception, exc:
-            print >> sys.stderr, "Error: %s" % exc
-    conn.close()
+                    _create_table(ti, table)
+        except Exception as exc:
+            print("Error: %s" % exc, file=sys.stderr)

     query = None
     if (options.query_file is not None):
@@ -395,32 +122,18 @@
         query = options.query

     if (query is None):
-        tables_query = \
-            "SELECT name, sql FROM sqlite_master WHERE type='table' ORDER BY name"
+        try:
+            describe_tables(get_connection(options.sqlitedb), outputFile)
+        except Exception as exc:
+            print("Error: %s" % exc, file=sys.stderr)
+    else:
         try:
-            conn = get_connection(options.sqlitedb)
-            c = conn.cursor()
-            rslt = c.execute(tables_query).fetchall()
-            for table, sql in rslt:
-                print >> sys.stderr, "Table %s:" % table
-                try:
-                    col_query = 'SELECT * FROM %s LIMIT 0' % table
-                    cur = conn.cursor().execute(col_query)
-                    cols = [col[0] for col in cur.description]
-                    print >> sys.stderr, " Columns: %s" % cols
-                except Exception, exc:
-                    print >> sys.stderr, "Error: %s" % exc
-        except Exception, exc:
-            print >> sys.stderr, "Error: %s" % exc
-        exit(0)
-    # if not sqlite.is_read_only_query(query):
-    #    print >> sys.stderr, "Error: Must be a read only query"
-    #    exit(2)
-    try:
-        run_query(query,outputFile)
-    except Exception, exc:
-        print >> sys.stderr, "Error: %s" % exc
-        exit(1)
+            run_query(get_connection(options.sqlitedb), query, outputFile,
+                      no_header=options.no_header)
+        except Exception as exc:
+            print("Error: %s" % exc, file=sys.stderr)
+            exit(1)
+

 if __name__ == "__main__":
     __main__()
--- a/query_tabular.xml	Wed Jul 05 11:51:50 2017 -0400
+++ b/query_tabular.xml	Fri Jul 14 11:39:27 2017 -0400
@@ -1,6 +1,10 @@
-<tool id="query_tabular" name="Query Tabular" version="4.0.0">
+<tool id="query_tabular" name="Query Tabular" version="5.0.0">
     <description>using sqlite sql</description>

+    <macros>
+         <import>macros.xml</import>
+    </macros>
+
     <requirements>
     </requirements>
     <stdio>
@@ -76,62 +80,8 @@
   #if len($idx_non) > 0:
     #set $jtbl['index'] = $idx_non
   #end if
-  #set $input_filters = []
-  #for $fi in $tbl.input_opts.linefilters:
-    #if $fi.filter.filter_type == 'skip':
-      #set $skip_lines = None
-      #if str($fi.filter.skip_lines) != '':
-        #set $skip_lines = int($fi.filter.skip_lines)
-      #elif $tbl.table.metadata.comment_lines and $tbl.table.metadata.comment_lines > 0:
-        #set $skip_lines = int($tbl.table.metadata.comment_lines)
-      #end if
-      #if $skip_lines is not None:
-        #set $filter_dict = dict()
-        #set $filter_dict['filter'] = str($fi.filter.filter_type)
-        #set $filter_dict['count'] = $skip_lines
-        #silent $input_filters.append($filter_dict)
-      #end if
-    #elif $fi.filter.filter_type == 'comment':
-      #set $filter_dict = dict()
-      #set $filter_dict['filter'] = 'regex'
-      #set $filter_dict['pattern'] = '^(%s).*$' % '|'.join([chr(int(x)).replace('|','[|]') for x in (str($fi.filter.comment_char)).split(',')])
-      #set $filter_dict['action'] = 'exclude_match'
-      #silent $input_filters.append($filter_dict)
-    #elif $fi.filter.filter_type == 'regex':
-      #set $filter_dict = dict()
-      #set $filter_dict['filter'] = str($fi.filter.filter_type)
-      #set $filter_dict['pattern'] = str($fi.filter.regex_pattern)
-      #set $filter_dict['action'] = str($fi.filter.regex_action)
-      #silent $input_filters.append($filter_dict)
-    #elif $fi.filter.filter_type == 'select_columns':
-      #set $filter_dict = dict()
-      #set $filter_dict['filter'] = str($fi.filter.filter_type)
-      #set $filter_dict['columns'] = [int(str($ci).replace('c','')) for $ci in str($fi.filter.columns).split(',')]
-      #silent $input_filters.append($filter_dict)
-    #elif $fi.filter.filter_type == 'replace':
-      #set $filter_dict = dict()
-      #set $filter_dict['filter'] = str($fi.filter.filter_type)
-      #set $filter_dict['column'] = int(str($fi.filter.column).replace('c',''))
-      #set $filter_dict['pattern'] = str($fi.filter.regex_pattern)
-      #set $filter_dict['replace'] = str($fi.filter.regex_replace)
-      #silent $input_filters.append($filter_dict)
-    #elif str($fi.filter.filter_type).endswith('pend_line_num'):
-      #set $filter_dict = dict()
-      #set $filter_dict['filter'] = str($fi.filter.filter_type)
-      #silent $input_filters.append($filter_dict)
-    #elif str($fi.filter.filter_type).endswith('pend_text'):
-      #set $filter_dict = dict()
-      #set $filter_dict['filter'] = str($fi.filter.filter_type)
-      #set $filter_dict['column_text'] = str($fi.filter.column_text)
-      #silent $input_filters.append($filter_dict)
-    #elif $fi.filter.filter_type == 'normalize':
-      #set $filter_dict = dict()
-      #set $filter_dict['filter'] = str($fi.filter.filter_type)
-      #set $filter_dict['columns'] = [int(str($ci).replace('c','')) for $ci in str($fi.filter.columns).split(',')]
-      #set $filter_dict['separator'] = str($fi.filter.separator)
-      #silent $input_filters.append($filter_dict)
-    #end if
-  #end for
+  #set $linefilters = $tbl.input_opts.linefilters
+  @LINEFILTERS@
   #if $input_filters:
     #set $jtbl['filters'] = $input_filters
   #end if
@@ -149,99 +99,7 @@
         <repeat name="tables" title="Database Table" min="0">
             <param name="table" type="data" format="tabular" label="Tabular Dataset for Table"/>
             <section name="input_opts" expanded="false" title="Filter Dataset Input">
-                <repeat name="linefilters" title="Filter Tabular Input Lines">
-                    <conditional name="filter">
-                        <param name="filter_type" type="select" label="Filter By">
-                            <option value="skip">skip leading lines</option>
-                            <option value="comment">comment char</option>
-                            <option value="regex">by regex expression matching</option>
-                            <option value="select_columns">select columns</option>
-                            <option value="replace">regex replace value in column</option>
-                            <option value="prepend_line_num">prepend a line number column</option>
-                            <option value="append_line_num">append a line number column</option>
-                            <option value="prepend_text">prepend a column with the given text</option>
-                            <option value="append_text">append a column with the given text</option>
-                            <option value="normalize">normalize list columns, replicates row for each item in list</option>
-                        </param>
-                        <when value="skip">
-                             <param name="skip_lines" type="integer" value="" min="0" optional="true" label="Skip lines"
-                                 help="Leave blank to use the comment lines metadata for this dataset" />
-                        </when>
-                        <when value="comment">
-                            <param name="comment_char" type="select" display="checkboxes" multiple="True" label="Ignore lines beginning with these characters" help="lines beginning with these are skipped">
-                                <option value="62">&gt;</option>
-                                <option value="64">@</option>
-                                <option value="43">+</option>
-                                <option value="60">&lt;</option>
-                                <option value="42">*</option>
-                                <option value="45">-</option>
-                                <option value="61">=</option>
-                                <option value="124">|</option>
-                                <option value="63">?</option>
-                                <option value="36">$</option>
-                                <option value="46">.</option>
-                                <option value="58">:</option>
-                                <option value="38">&amp;</option>
-                                <option value="37">%</option>
-                                <option value="94">^</option>
-                                <option value="35">&#35;</option>
-                                <option value="33">!</option>
-                            </param>
-                        </when>
-                        <when value="prepend_line_num"/>
-                        <when value="append_line_num"/>
-                        <when value="prepend_text">
-                            <param name="column_text" type="text" value="" label="text for column">
-                            </param>
-                        </when>
-                        <when value="append_text">
-                            <param name="column_text" type="text" value="" label="text for column">
-                            </param>
-                        </when>
-                        <when value="regex">
-                            <param name="regex_pattern" type="text" value="" label="regex pattern">
-                                <sanitizer sanitize="False"/>
-                            </param>
-                            <param name="regex_action" type="select" label="action for regex match">
-                                <option value="exclude_match">exclude line on pattern match</option>
-                                <option value="include_match">include line on pattern match</option>
-                                <option value="exclude_find">exclude line if pattern found</option>
-                                <option value="include_find">include line if pattern found</option>
-                            </param>
-                        </when>
-                        <when value="select_columns">
-                            <param name="columns" type="text" value="" label="enter column numbers to keep"
-                                help="example: 1,4,2 or c1,c4,c2(selects the first,fourth, and second columns)">
-                                <validator type="regex" message="Column ordinal positions separated by commas">^(c?[1-9]\d*)(,c?[1-9]\d*)*$</validator>
-                            </param>
-                        </when>
-                        <when value="replace">
-                            <param name="column" type="text" value="" label="enter column number to replace"
-                                help="example: 1 or c1 (selects the first column)">
-                                <validator type="regex" message="Column ordinal position separated by commas">^(c?[1-9]\d*)$</validator>
-                            </param>
-                            <param name="regex_pattern" type="text" value="" label="regex pattern">
-                                <sanitizer sanitize="False"/>
-                            </param>
-                            <param name="regex_replace" type="text" value="" label="replacement expression">
-                                <sanitizer sanitize="False"/>
-                            </param>
-                        </when>
-                        <when value="normalize">
-                            <param name="columns" type="text" value="" label="enter column numbers to normalize">
-                                <help><![CDATA[
-                                example: 2,4 or c2,c4 (selects the second, and fourth columns)
-                                If multiple columns are selected, they should have the same length and separator on each line
-                                ]]></help>
-                                <validator type="regex" message="Column ordinal positions separated by commas">^(c?[1-9]\d*)(,c?[1-9]\d*)*$</validator>
-                            </param>
-                            <param name="separator" type="text" value="," label="List item delimiter in column">
-                                <sanitizer sanitize="False"/>
-                                <validator type="regex" message="Anything but TAB or Newline">^[^\t\n\r\f\v]+$</validator>
-                            </param>
-                        </when>
-                    </conditional>
-                </repeat>
+                <expand macro="macro_line_filters" />
             </section>
             <section name="tbl_opts" expanded="false" title="Table Options">
                 <param name="table_name" type="text" value="" optional="true" label="Specify Name for Table">
@@ -269,7 +127,8 @@
                 </repeat>
             </section>
         </repeat>
-        <param name="save_db" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Save the sqlite database in your history"/>
+        <param name="save_db" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Save the sqlite database in your history"
+            help="SQLite to tabular tool can run additional queries on this database"/>
         <param name="sqlquery" type="text" area="true" size="20x80" value="" optional="true" label="SQL Query to generate tabular output">
                 <help>By default: tables are named: t1,t2,...,tn and columns in each table: c1,c2,...,cn</help>
                 <sanitizer sanitize="False"/>
@@ -279,10 +138,10 @@
     </inputs>
     <outputs>
         <data format="sqlite" name="sqlitedb" label="sqlite db of ${on_string}">
-            <filter>save_db or not (sqlquery and len(sqlquery) > 0)</filter>
+            <filter>save_db</filter>
         </data>
         <data format="tabular" name="output" label="query results on ${on_string}">
-            <filter>sqlquery and len(sqlquery) > 0</filter>
+            <filter>not save_db or (sqlquery and len(sqlquery.strip()) > 0)</filter>
         </data>
     </outputs>
     <tests>
@@ -400,20 +259,7 @@
   An existing SQLite_ data base can be used as input, and any selected tabular datasets will be added as new tables in that data base.


-**Input Line Filters**
-
-  As a tabular file is being read, line filters may be applied.
-
-  ::
-
-  - skip leading lines              skip the first *number* of lines
-  - comment char                    omit any lines that start with the specified comment character
-  - by regex expression matching    *include/exclude* lines the match the regex expression
-  - select columns                  choose to include only selected columns in the order specified
-  - regex replace value in column   replace a field in a column using a regex substitution (good for date reformatting)
-  - prepend a line number column    each line has the ordinal value of the line read by this filter as the first column
-  - append a line number column     each line has the ordinal value of the line read by this filter as the last column
-  - normalize list columns          replicates the line for each item in the specified list *columns*
+@LINEFILTERS_HELP@


 **Outputs**
@@ -425,192 +271,9 @@
     *(The* **SQLite to tabular** *tool can run additional queries on this database.)*


-For help in using SQLite_ see:  http://www.sqlite.org/docs.html
-
-**NOTE:** input for SQLite dates input field must be in the format: *YYYY-MM-DD* for example: 2015-09-30
-
-See: http://www.sqlite.org/lang_datefunc.html
-
-**Example**
-
-  Given 2 tabular datasets: *customers* and *sales*
-
-   Dataset *customers*
-
-    Table name: "customers"
-
-    Column names: "CustomerID,FirstName,LastName,Email,DOB,Phone"
-
-    =========== ========== ========== ===================== ========== ============
-    #CustomerID FirstName  LastName   Email                 DOB        Phone
-    =========== ========== ========== ===================== ========== ============
-    1           John       Smith      John.Smith@yahoo.com  1968-02-04 626 222-2222
-    2           Steven     Goldfish   goldfish@fishhere.net 1974-04-04 323 455-4545
-    3           Paula      Brown      pb@herowndomain.org   1978-05-24 416 323-3232
-    4           James      Smith      jim@supergig.co.uk    1980-10-20 416 323-8888
-    =========== ========== ========== ===================== ========== ============
-
-   Dataset *sales*
-
-    Table name: "sales"
-
-    Column names: "CustomerID,Date,SaleAmount"
-
-    =============  ============  ============
-      #CustomerID    Date          SaleAmount
-    =============  ============  ============
-               2    2004-05-06         100.22
-               1    2004-05-07          99.95
-               3    2004-05-07         122.95
-               3    2004-05-13         100.00
-               4    2004-05-22         555.55
-    =============  ============  ============
-
-  The query
-
-  ::
-
-    SELECT FirstName,LastName,sum(SaleAmount) as "TotalSales"
-    FROM customers join sales on customers.CustomerID = sales.CustomerID
-    GROUP BY customers.CustomerID ORDER BY TotalSales DESC;
-
-  Produces this tabular output:
-
-    ========== ======== ==========
-    #FirstName LastName TotalSales
-    ========== ======== ==========
-    James      Smith    555.55
-    Paula      Brown    222.95
-    Steven     Goldfish 100.22
-    John       Smith    99.95
-    ========== ======== ==========
-
-
-  If the optional Table name and Column names inputs are not used, the query would be:
-
-  ::
-
-    SELECT t1.c2 as "FirstName", t1.c3 as "LastName", sum(t2.c3) as "TotalSales"
-    FROM t1 join t2 on t1.c1 = t2.c1
-    GROUP BY t1.c1 ORDER BY TotalSales DESC;
-
-  You can selectively name columns, e.g. on the customers input you could just name columns 2,3, and 5:
-
-    Column names: ,FirstName,LastName,,BirthDate
-
-    Results in the following data base table
-
-    =========== ========== ========== ===================== ========== ============
-    #c1         FirstName  LastName   c4                    BirthDate  c6
-    =========== ========== ========== ===================== ========== ============
-    1           John       Smith      John.Smith@yahoo.com  1968-02-04 626 222-2222
-    2           Steven     Goldfish   goldfish@fishhere.net 1974-04-04 323 455-4545
-    3           Paula      Brown      pb@herowndomain.org   1978-05-24 416 323-3232
-    4           James      Smith      jim@supergig.co.uk    1980-10-20 416 323-8888
-    =========== ========== ========== ===================== ========== ============
-
-
-  Regular_expression_ functions are included for:
-
-  ::
-
-    matching:      re_match('pattern',column)
+@QUERY_HELP@

-    SELECT t1.FirstName, t1.LastName
-    FROM t1
-    WHERE re_match('^.*\.(net|org)$',c4)
-
-  Results:
-
-    =========== ==========
-    #FirstName  LastName
-    =========== ==========
-    Steven      Goldfish
-    Paula       Brown
-    =========== ==========
-
-
-  ::
-
-    searching:     re_search('pattern',column)
-    substituting:  re_sub('pattern','replacement,column)
-
-    SELECT t1.FirstName, t1.LastName, re_sub('^\d{2}(\d{2})-(\d\d)-(\d\d)','\3/\2/\1',BirthDate) as "DOB"
-    FROM t1
-    WHERE re_search('[hp]er',c4)
-
-  Results:
-
-
-    =========== ========== ==========
-    #FirstName  LastName   DOB
-    =========== ========== ==========
-    Steven      Goldfish   04/04/74
-    Paula       Brown      24/05/78
-    James       Smith      20/10/80
-    =========== ========== ==========
-
-
-**Line Filtering Example**
-  *(Six filters are applied as the following file is read)*
-
-  ::
-
-    Input Tabular File:
-
-    #People with pets
-    Pets FirstName           LastName   DOB       PetNames  PetType
-    2    Paula               Brown      24/05/78  Rex,Fluff dog,cat
-    1    Steven              Jones      04/04/74  Allie     cat
-    0    Jane                Doe        24/05/78
-    1    James               Smith      20/10/80  Spot
-
-
-    Filter 1 - append a line number column:
-
-    #People with pets                                                 1
-    Pets FirstName           LastName   DOB       PetNames  PetType   2
-    2    Paula               Brown      24/05/78  Rex,Fluff dog,cat   3
-    1    Steven              Jones      04/04/74  Allie     cat       4
-    0    Jane                Doe        24/05/78                      5
-    1    James               Smith      20/10/80  Spot                6
-
-    Filter 2 - by regex expression matching [include]: '^\d+' (include lines that start with a number)
-
-    2    Paula               Brown      24/05/78  Rex,Fluff dog,cat   3
-    1    Steven              Jones      04/04/74  Allie     cat       4
-    0    Jane                Doe        24/05/78                      5
-    1    James               Smith      20/10/80  Spot                6
-
-    Filter 3 - append a line number column:
-
-    2    Paula               Brown      24/05/78  Rex,Fluff dog,cat   3  1
-    1    Steven              Jones      04/04/74  Allie     cat       4  2
-    0    Jane                Doe        24/05/78                      5  3
-    1    James               Smith      20/10/80  Spot                6  4
-
-    Filter 4 - regex replace value in column[4]: '(\d+)/(\d+)/(\d+)' '19\3-\2-\1' (convert dates to sqlite format)
-
-    2    Paula               Brown      1978-05-24  Rex,Fluff dog,cat   3  1
-    1    Steven              Jones      1974-04-04  Allie     cat       4  2
-    0    Jane                Doe        1978-05-24                      5  3
-    1    James               Smith      1980-10-20  Spot                6  4
-
-    Filter 5 - normalize list columns[5,6]:
-
-    2    Paula               Brown      1978-05-24  Rex       dog       3  1
-    2    Paula               Brown      1978-05-24  Fluff     cat       3  1
-    1    Steven              Jones      1974-04-04  Allie     cat       4  2
-    0    Jane                Doe        1978-05-24                      5  3
-    1    James               Smith      1980-10-20  Spot                6  4
-
-    Filter 6 - append a line number column:
-
-    2    Paula               Brown      1978-05-24  Rex       dog       3  1  1
-    2    Paula               Brown      1978-05-24  Fluff     cat       3  1  2
-    1    Steven              Jones      1974-04-04  Allie     cat       4  2  3
-    0    Jane                Doe        1978-05-24                      5  3  4
-    1    James               Smith      1980-10-20  Spot                6  4  5
+@LINEFILTERS_HELP_EXAMPLE@


   Table name: pets
@@ -634,6 +297,13 @@

 **Normalizing by Line Filtering into 2 Tables**

+*Relational database opertions work with single-valued column entries.
+To apply relational operations to tabular files that contain fields with lists of values,
+we need to "normalize" those fields, duplicating lines for each item in the list.
+In this example we create 2 tables, one for single-valued fields and a second with list-valued fields normalized.
+Becauce we add a line number first for each table, we can join the 2 tables on the line number column.*
+https://en.wikipedia.org/wiki/First_normal_form
+
     *People Table*

       ::
@@ -679,7 +349,7 @@
       ==  ========  ========


-    Query: SELECT FirstName,LastName,PetName FROM People join Pet on People.id = Pet.id WHERE PetType = 'cat';
+    Query: SELECT FirstName,LastName,PetName FROM People JOIN Pet ON People.id = Pet.id WHERE PetType = 'cat';

     Result:

@@ -690,8 +360,6 @@
      Steven     Jones     Allie
      =========  ========  ========

-.. _Regular_expression: https://docs.python.org/release/2.7/library/re.html
-.. _SQLite: http://www.sqlite.org/index.html

     ]]></help>
 </tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sqlite_to_tabular.py	Fri Jul 14 11:39:27 2017 -0400
@@ -0,0 +1,64 @@
+#!/usr/bin/env python
+
+from __future__ import print_function
+
+import optparse
+import os.path
+import sys
+
+from query_db import describe_tables, get_connection, run_query
+
+
+def __main__():
+    # Parse Command Line
+    parser = optparse.OptionParser()
+    parser.add_option('-s', '--sqlitedb', dest='sqlitedb', default=None,
+                      help='The SQLite Database')
+    parser.add_option('-q', '--query', dest='query', default=None,
+                      help='SQL query')
+    parser.add_option('-Q', '--query_file', dest='query_file', default=None,
+                      help='SQL query file')
+    parser.add_option('-n', '--no_header', dest='no_header', default=False,
+                      action='store_true',
+                      help='Include a column headers line')
+    parser.add_option('-o', '--output', dest='output', default=None,
+                      help='Output file for query results')
+    (options, args) = parser.parse_args()
+
+    # determine output destination
+    if options.output is not None:
+        try:
+            outputPath = os.path.abspath(options.output)
+            outputFile = open(outputPath, 'w')
+        except Exception as e:
+            print("failed: %s" % e, file=sys.stderr)
+            exit(3)
+    else:
+        outputFile = sys.stdout
+
+    query = None
+    if (options.query_file is not None):
+        with open(options.query_file, 'r') as fh:
+            query = ''
+            for line in fh:
+                query += line
+    elif (options.query is not None):
+        query = options.query
+
+    if (query is None):
+        try:
+            describe_tables(get_connection(options.sqlitedb), outputFile)
+        except Exception as exc:
+            print("Error: %s" % exc, file=sys.stderr)
+        exit(0)
+    else:
+        try:
+            run_query(get_connection(options.sqlitedb), query, outputFile,
+                      no_header=options.no_header)
+        except Exception as exc:
+            print("Error: %s" % exc, file=sys.stderr)
+            exit(1)
+
+
+if __name__ == "__main__":
+    __main__()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sqlite_to_tabular.xml	Fri Jul 14 11:39:27 2017 -0400
@@ -0,0 +1,66 @@
+<?xml version="1.0"?>
+<tool id="sqlite_to_tabular" name="SQLite to tabular" version="5.0.0">
+    <description>for SQL query</description>
+
+    <macros>
+         <import>macros.xml</import>
+    </macros>
+
+    <stdio>
+        <exit_code range="1:" level="fatal" description="Error" />
+    </stdio>
+    <command><![CDATA[
+    python $__tool_directory__/sqlite_to_tabular.py
+    --sqlitedb="$sqlitedb"
+    --query_file="$query_file"
+    $no_header
+    --output="$query_results"
+    ]]></command>
+    <configfiles>
+        <configfile name="query_file">
+$sqlquery
+        </configfile>
+    </configfiles>
+    <inputs>
+        <param name="sqlitedb" type="data" format="sqlite" label="SQLite Database"/>
+        <param name="sqlquery" type="text" area="True" size="120x20" label="SQL query">
+            <validator type="regex">(?ims)^\s*SELECT\s.*\sFROM\s.*$</validator>
+            <sanitizer sanitize="False"/>
+        </param>
+        <param name="no_header" type="boolean" truevalue="-n" falsevalue="" checked="False" label="Omit column headers"/>
+    </inputs>
+    <outputs>
+        <data name="query_results" format="tabular" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="sqlitedb" ftype="sqlite" value="testdb.sqlite" />
+            <param name="sqlquery" value="SELECT first_name, last_name, age FROM contacts WHERE first_name = 'Sam'" />
+            <output name="query_results">
+                <assert_contents>
+                    <has_text text="Smith" />
+                    <not_has_text text="Doe" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+=================
+SQLite to Tabular
+=================
+
+**Inputs**
+
+  An existing SQLite_ data base.
+
+
+**Outputs**
+
+  The results of a SQL query are output to the history as a tabular file.
+
+
+
+@QUERY_HELP@
+
+    ]]></help>
+</tool>
Binary file test-data/._IEDB.tsv has changed
Binary file test-data/._filtered_people_results.tsv has changed
Binary file test-data/._filtered_pets_results.tsv has changed
Binary file test-data/._netMHC_summary.tsv has changed
Binary file test-data/._pet_normalized_query_results.tsv has changed
Binary file test-data/._query_results.tsv has changed
Binary file test-data/._regex_results.tsv has changed
Binary file test-data/._sales_results.tsv has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filtered_people_results.tsv	Fri Jul 14 11:39:27 2017 -0400
@@ -0,0 +1,5 @@
+1	FirstName	LastName	DOB	Pets
+2	Paula	Brown	1978-05-24	2
+3	Steven	Jones	1974-04-04	1
+4	Jane	Doe	1978-05-24	0
+5	James	Smith	1980-10-20	1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filtered_pets_results.tsv	Fri Jul 14 11:39:27 2017 -0400
@@ -0,0 +1,6 @@
+1	PetNames	PetType
+2	Rex	dog
+2	Fluff	cat
+3	Allie	cat
+4
+5	Spot
Binary file test-data/testdb.sqlite has changed