# HG changeset patch
# User iuc
# Date 1500383246 14400
# Node ID 859064f07be491736fb5b9d2b93ff709fae89cf2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
diff -r 000000000000 -r 859064f07be4 filter_tabular.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/filter_tabular.py Tue Jul 18 09:07:26 2017 -0400
@@ -0,0 +1,68 @@
+#!/usr/bin/env python
+
+from __future__ import print_function
+
+import json
+import optparse
+import os.path
+import sys
+
+from filters import filter_file
+
+
+def __main__():
+ # Parse Command Line
+ parser = optparse.OptionParser()
+ parser.add_option('-i', '--input', dest='input', default=None,
+ help='Input file for filtering')
+ parser.add_option('-j', '--jsonfile', dest='jsonfile', default=None,
+ help='JSON array of filter specifications')
+ parser.add_option('-o', '--output', dest='output', default=None,
+ help='Output file for query results')
+ parser.add_option('-v', '--verbose', dest='verbose', default=False,
+ action='store_true',
+ help='verbose')
+ (options, args) = parser.parse_args()
+
+ if options.input is not None:
+ try:
+ inputPath = os.path.abspath(options.input)
+ inputFile = open(inputPath, 'r')
+ except Exception as e:
+ exit('Error: %s' % (e))
+ else:
+ inputFile = sys.stdin
+
+ if options.output is not None:
+ try:
+ outputPath = os.path.abspath(options.output)
+ outputFile = open(outputPath, 'w')
+ except Exception as e:
+ exit('Error: %s' % (e))
+ else:
+ outputFile = sys.stdout
+
+ filters = None
+ if options.jsonfile:
+ try:
+ with open(options.jsonfile) as fh:
+ filters = json.load(fh)
+ except Exception as e:
+ exit('Error: %s' % (e))
+
+ if options.verbose and filters:
+ for f in filters:
+ print('%s %s' % (f['filter'],
+ ', '.join(
+ ['%s: %s' % (k, f[k])
+ for k in set(f.keys()) - set(['filter'])])),
+ file=sys.stdout)
+
+ try:
+ filter_file(inputFile, outputFile, filters=filters)
+ except Exception as e:
+ exit('Error: %s' % (e))
+
+
+if __name__ == "__main__":
+ __main__()
diff -r 000000000000 -r 859064f07be4 filters.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/filters.py Tue Jul 18 09:07:26 2017 -0400
@@ -0,0 +1,156 @@
+#!/usr/binsenv python
+
+from __future__ import print_function
+
+import re
+import sys
+
+
+class LineFilter(object):
+ def __init__(self, source, filter_dict):
+ self.source = source
+ self.filter_dict = filter_dict
+ self.func = lambda i, l: l.rstrip('\r\n') if l else None
+ self.src_lines = []
+ self.src_line_cnt = 0
+ if not filter_dict:
+ return
+ if filter_dict['filter'] == 'regex':
+ rgx = re.compile(filter_dict['pattern'])
+ if filter_dict['action'] == 'exclude_match':
+ self.func = lambda i, l: l if not rgx.match(l) else None
+ elif filter_dict['action'] == 'include_match':
+ self.func = lambda i, l: l if rgx.match(l) else None
+ elif filter_dict['action'] == 'exclude_find':
+ self.func = lambda i, l: l if not rgx.search(l) else None
+ elif filter_dict['action'] == 'include_find':
+ self.func = lambda i, l: l if rgx.search(l) else None
+ elif filter_dict['filter'] == 'select_columns':
+ cols = [int(c) - 1 for c in filter_dict['columns']]
+ self.func = lambda i, l: self.select_columns(l, cols)
+ elif filter_dict['filter'] == 'replace':
+ p = filter_dict['pattern']
+ r = filter_dict['replace']
+ c = int(filter_dict['column']) - 1
+ self.func = lambda i, l: '\t'.join(
+ [x if j != c else re.sub(p, r, x) for j, x in enumerate(l.split('\t'))])
+ elif filter_dict['filter'] == 'prepend_line_num':
+ self.func = lambda i, l: '%d\t%s' % (i, l)
+ elif filter_dict['filter'] == 'append_line_num':
+ self.func = lambda i, l: '%s\t%d' % (l.rstrip('\r\n'), i)
+ elif filter_dict['filter'] == 'prepend_text':
+ s = filter_dict['column_text']
+ self.func = lambda i, l: '%s\t%s' % (s, l)
+ elif filter_dict['filter'] == 'append_text':
+ s = filter_dict['column_text']
+ self.func = lambda i, l: '%s\t%s' % (l.rstrip('\r\n'), s)
+ elif filter_dict['filter'] == 'skip':
+ cnt = filter_dict['count']
+ self.func = lambda i, l: l if i > cnt else None
+ elif filter_dict['filter'] == 'normalize':
+ cols = [int(c) - 1 for c in filter_dict['columns']]
+ sep = filter_dict['separator']
+ self.func = lambda i, l: self.normalize(l, cols, sep)
+
+ def __iter__(self):
+ return self
+
+ def __next__(self):
+ if not self.src_lines:
+ self.get_lines()
+ if self.src_lines:
+ return self.src_lines.pop(0)
+ raise StopIteration
+
+ next = __next__
+
+ def select_columns(self, line, cols):
+ fields = line.split('\t')
+ return '\t'.join([fields[x] for x in cols])
+
+ def normalize(self, line, split_cols, sep):
+ lines = []
+ fields = line.rstrip('\r\n').split('\t')
+ split_fields = dict()
+ cnt = 0
+ for c in split_cols:
+ if c < len(fields):
+ split_fields[c] = fields[c].split(sep)
+ cnt = max(cnt, len(split_fields[c]))
+ if cnt == 0:
+ lines.append('\t'.join(fields))
+ else:
+ for n in range(0, cnt):
+ flds = [x if c not in split_cols else split_fields[c][n]
+ if n < len(split_fields[c])
+ else '' for (c, x) in enumerate(fields)]
+ lines.append('\t'.join(flds))
+ return lines
+
+ def get_lines(self):
+ for i, next_line in enumerate(self.source):
+ self.src_line_cnt += 1
+ line = self.func(self.src_line_cnt, next_line)
+ if line:
+ if isinstance(line, list):
+ self.src_lines.extend(line)
+ else:
+ self.src_lines.append(line)
+ return
+
+
+class TabularReader:
+ """
+ Tabular file iterator. Returns a list
+ """
+ def __init__(self, input_file, skip=0, comment_char=None, col_idx=None,
+ filters=None):
+ self.skip = skip
+ self.comment_char = comment_char
+ self.col_idx = col_idx
+ self.filters = filters
+ self.tsv_file = \
+ input_file if hasattr(input_file, 'readline') else open(input_file)
+ if skip and skip > 0:
+ for i in range(skip):
+ if not self.tsv_file.readline():
+ break
+ source = LineFilter(self.tsv_file, None)
+ if comment_char:
+ source = LineFilter(source,
+ {"filter": "regex", "pattern": comment_char,
+ "action": "exclude_match"})
+ if filters:
+ for f in filters:
+ source = LineFilter(source, f)
+ self.source = source
+
+ def __iter__(self):
+ return self
+
+ def __next__(self):
+ ''' Iteration '''
+ for i, line in enumerate(self.source):
+ fields = line.rstrip('\r\n').split('\t')
+ if self.col_idx:
+ fields = [fields[i] for i in self.col_idx]
+ return fields
+ raise StopIteration
+
+ next = __next__
+
+
+def filter_file(input_file, output, skip=0, comment_char='#', filters=None):
+ data_lines = 0
+ try:
+ tr = TabularReader(input_file, skip=skip, comment_char=comment_char,
+ filters=filters)
+ for linenum, fields in enumerate(tr):
+ data_lines += 1
+ try:
+ output.write('%s\n' % '\t'.join(fields))
+ except Exception as e:
+ print('Failed at line: %d err: %s' % (linenum, e),
+ file=sys.stderr)
+ except Exception as e:
+ exit('Error: %s' % (e))
diff -r 000000000000 -r 859064f07be4 load_db.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/load_db.py Tue Jul 18 09:07:26 2017 -0400
@@ -0,0 +1,135 @@
+#!/usr/bin/env python
+
+from __future__ import print_function
+
+import sys
+
+from filters import TabularReader
+
+
+def getValueType(val):
+ if val or 0. == val:
+ try:
+ int(val)
+ return 'INTEGER'
+ except:
+ try:
+ float(val)
+ return 'REAL'
+ except:
+ return 'TEXT'
+ return None
+
+
+def get_column_def(file_path, table_name, skip=0, comment_char='#',
+ column_names=None, max_lines=100, load_named_columns=False,
+ filters=None):
+ col_pref = ['TEXT', 'REAL', 'INTEGER', None]
+ col_types = []
+ col_idx = None
+ try:
+ tr = TabularReader(file_path, skip=skip, comment_char=comment_char,
+ col_idx=None, filters=filters)
+ for linenum, fields in enumerate(tr):
+ if linenum > max_lines:
+ break
+ try:
+ while len(col_types) < len(fields):
+ col_types.append(None)
+ for i, val in enumerate(fields):
+ colType = getValueType(val)
+ if col_pref.index(colType) < col_pref.index(col_types[i]):
+ col_types[i] = colType
+ except Exception as e:
+ print('Failed at line: %d err: %s' % (linenum, e),
+ file=sys.stderr)
+ except Exception as e:
+ print('Failed: %s' % (e), file=sys.stderr)
+ for i, col_type in enumerate(col_types):
+ if not col_type:
+ col_types[i] = 'TEXT'
+ if column_names:
+ col_names = []
+ if load_named_columns:
+ col_idx = []
+ for i, cname in enumerate(
+ [cn.strip() for cn in column_names.split(',')]):
+ if cname != '':
+ col_idx.append(i)
+ col_names.append(cname)
+ col_types = [col_types[i] for i in col_idx]
+ else:
+ col_names = ['c%d' % i for i in range(1, len(col_types) + 1)]
+ for i, cname in enumerate(
+ [cn.strip() for cn in column_names.split(',')]):
+ if cname and i < len(col_names):
+ col_names[i] = cname
+ else:
+ col_names = ['c%d' % i for i in range(1, len(col_types) + 1)]
+ col_def = []
+ for i, col_name in enumerate(col_names):
+ col_def.append('%s %s' % (col_names[i], col_types[i]))
+ return col_names, col_types, col_def, col_idx
+
+
+def create_table(conn, file_path, table_name, skip=0, comment_char='#',
+ pkey_autoincr=None, column_names=None,
+ load_named_columns=False, filters=None,
+ unique_indexes=[], indexes=[]):
+ col_names, col_types, col_def, col_idx = \
+ get_column_def(file_path, table_name, skip=skip,
+ comment_char=comment_char, column_names=column_names,
+ load_named_columns=load_named_columns, filters=filters)
+ col_func = [float if t == 'REAL' else int
+ if t == 'INTEGER' else str for t in col_types]
+ table_def = 'CREATE TABLE %s (\n %s%s\n);' % (
+ table_name,
+ '%s INTEGER PRIMARY KEY AUTOINCREMENT,' %
+ pkey_autoincr if pkey_autoincr else '',
+ ', \n '.join(col_def))
+ # print >> sys.stdout, table_def
+ insert_stmt = 'INSERT INTO %s(%s) VALUES(%s)' % (
+ table_name, ','.join(col_names),
+ ','.join(["?" for x in col_names]))
+ # print >> sys.stdout, insert_stmt
+ data_lines = 0
+ try:
+ c = conn.cursor()
+ c.execute(table_def)
+ conn.commit()
+ c.close()
+ for i, index in enumerate(unique_indexes):
+ index_name = 'idx_uniq_%s_%d' % (table_name, i)
+ index_columns = index.split(',')
+ create_index(conn, table_name, index_name, index_columns,
+ unique=True)
+ for i, index in enumerate(indexes):
+ index_name = 'idx_%s_%d' % (table_name, i)
+ index_columns = index.split(',')
+ create_index(conn, table_name, index_name, index_columns)
+ c = conn.cursor()
+ tr = TabularReader(file_path, skip=skip, comment_char=comment_char,
+ col_idx=col_idx, filters=filters)
+ for linenum, fields in enumerate(tr):
+ data_lines += 1
+ try:
+ vals = [col_func[i](x)
+ if x else None for i, x in enumerate(fields)]
+ c.execute(insert_stmt, vals)
+ except Exception as e:
+ print('Failed at line: %d err: %s' % (linenum, e),
+ file=sys.stderr)
+ conn.commit()
+ c.close()
+ except Exception as e:
+ exit('Error: %s' % (e))
+
+
+def create_index(conn, table_name, index_name, index_columns, unique=False):
+ index_def = "CREATE %s INDEX %s on %s(%s)" % (
+ 'UNIQUE' if unique else '', index_name,
+ table_name, ','.join(index_columns))
+ c = conn.cursor()
+ c.execute(index_def)
+ conn.commit()
+ c.close()
diff -r 000000000000 -r 859064f07be4 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Tue Jul 18 09:07:26 2017 -0400
@@ -0,0 +1,383 @@
+
+
+ 0:
+ #set $skip_lines = int($tbl.table.metadata.comment_lines)
+ #end if
+ #if $skip_lines is not None:
+ #set $filter_dict = dict()
+ #set $filter_dict['filter'] = str($fi.filter.filter_type)
+ #set $filter_dict['count'] = $skip_lines
+ #silent $input_filters.append($filter_dict)
+ #end if
+ #elif $fi.filter.filter_type == 'comment':
+ #set $filter_dict = dict()
+ #set $filter_dict['filter'] = 'regex'
+ #set $filter_dict['pattern'] = '^(%s).*$' % '|'.join([chr(int(x)).replace('|','[|]') for x in (str($fi.filter.comment_char)).split(',')])
+ #set $filter_dict['action'] = 'exclude_match'
+ #silent $input_filters.append($filter_dict)
+ #elif $fi.filter.filter_type == 'regex':
+ #set $filter_dict = dict()
+ #set $filter_dict['filter'] = str($fi.filter.filter_type)
+ #set $filter_dict['pattern'] = str($fi.filter.regex_pattern)
+ #set $filter_dict['action'] = str($fi.filter.regex_action)
+ #silent $input_filters.append($filter_dict)
+ #elif $fi.filter.filter_type == 'select_columns':
+ #set $filter_dict = dict()
+ #set $filter_dict['filter'] = str($fi.filter.filter_type)
+ #set $filter_dict['columns'] = [int(str($ci).replace('c','')) for $ci in str($fi.filter.columns).split(',')]
+ #silent $input_filters.append($filter_dict)
+ #elif $fi.filter.filter_type == 'replace':
+ #set $filter_dict = dict()
+ #set $filter_dict['filter'] = str($fi.filter.filter_type)
+ #set $filter_dict['column'] = int(str($fi.filter.column).replace('c',''))
+ #set $filter_dict['pattern'] = str($fi.filter.regex_pattern)
+ #set $filter_dict['replace'] = str($fi.filter.regex_replace)
+ #silent $input_filters.append($filter_dict)
+ #elif str($fi.filter.filter_type).endswith('pend_line_num'):
+ #set $filter_dict = dict()
+ #set $filter_dict['filter'] = str($fi.filter.filter_type)
+ #silent $input_filters.append($filter_dict)
+ #elif str($fi.filter.filter_type).endswith('pend_text'):
+ #set $filter_dict = dict()
+ #set $filter_dict['filter'] = str($fi.filter.filter_type)
+ #set $filter_dict['column_text'] = str($fi.filter.column_text)
+ #silent $input_filters.append($filter_dict)
+ #elif $fi.filter.filter_type == 'normalize':
+ #set $filter_dict = dict()
+ #set $filter_dict['filter'] = str($fi.filter.filter_type)
+ #set $filter_dict['columns'] = [int(str($ci).replace('c','')) for $ci in str($fi.filter.columns).split(',')]
+ #set $filter_dict['separator'] = str($fi.filter.separator)
+ #silent $input_filters.append($filter_dict)
+ #end if
+ #end for
+]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ^(c?[1-9]\d*)(,c?[1-9]\d*)*$
+
+
+
+
+ ^(c?[1-9]\d*)$
+
+
+
+
+
+
+
+
+
+
+
+ ^(c?[1-9]\d*)(,c?[1-9]\d*)*$
+
+
+
+ ^[^\t\n\r\f\v]+$
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r 859064f07be4 query_db.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/query_db.py Tue Jul 18 09:07:26 2017 -0400
@@ -0,0 +1,67 @@
+#!/usr/bin/env python
+
+from __future__ import print_function
+
+import re
+import sqlite3 as sqlite
+import sys
+
+
+TABLE_QUERY = \
+ """
+ SELECT name, sql
+ FROM sqlite_master
+ WHERE type='table'
+ ORDER BY name
+ """
+
+
+def regex_match(expr, item):
+ return re.match(expr, item) is not None
+
+
+def regex_search(expr, item):
+ return re.search(expr, item) is not None
+
+
+def regex_sub(expr, replace, item):
+ return re.sub(expr, replace, item)
+
+
+def get_connection(sqlitedb_path, addfunctions=True):
+ conn = sqlite.connect(sqlitedb_path)
+ if addfunctions:
+ conn.create_function("re_match", 2, regex_match)
+ conn.create_function("re_search", 2, regex_search)
+ conn.create_function("re_sub", 3, regex_sub)
+ return conn
+
+
+def describe_tables(conn, outputFile):
+ try:
+ c = conn.cursor()
+ tables_query = TABLE_QUERY
+ rslt = c.execute(tables_query).fetchall()
+ for table, sql in rslt:
+ print("Table %s:" % table, file=outputFile)
+ try:
+ col_query = 'SELECT * FROM %s LIMIT 0' % table
+ cur = conn.cursor().execute(col_query)
+ cols = [col[0] for col in cur.description]
+ print(" Columns: %s" % cols, file=outputFile)
+ except Exception as exc:
+ print("Warning: %s" % exc, file=sys.stderr)
+ except Exception as e:
+ exit('Error: %s' % (e))
+ exit(0)
+
+
+def run_query(conn, query, outputFile, no_header=False):
+ cur = conn.cursor()
+ results = cur.execute(query)
+ if not no_header:
+ outputFile.write("#%s\n" % '\t'.join(
+ [str(col[0]) for col in cur.description]))
+ for i, row in enumerate(results):
+ outputFile.write("%s\n" % '\t'.join(
+ [str(val) if val is not None else '' for val in row]))
diff -r 000000000000 -r 859064f07be4 query_tabular.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/query_tabular.py Tue Jul 18 09:07:26 2017 -0400
@@ -0,0 +1,137 @@
+#!/usr/bin/env python
+
+from __future__ import print_function
+
+import json
+import optparse
+import os.path
+import sys
+
+from load_db import create_table
+
+from query_db import describe_tables, get_connection, run_query
+
+
+"""
+JSON config:
+{ tables : [
+ { file_path : '/home/galaxy/dataset_101.dat',
+ table_name : 't1',
+ column_names : ['c1','c2','c3'],
+ pkey_autoincr : 'id'
+ comment_lines : 1
+ unique: ['c1'],
+ index: ['c2', 'c3']
+ },
+ { file_path : '/home/galaxy/dataset_102.dat',
+ table_name : 'gff',
+ column_names : ['seqname',,'date','start','end']
+ comment_lines : 1
+ load_named_columns : True
+ filters : [{'filter': 'regex', 'pattern': '#peptide',
+ 'action': 'exclude_match'},
+ {'filter': 'replace', 'column': 3,
+ 'replace': 'gi[|]', 'pattern': ''}]
+ },
+ { file_path : '/home/galaxy/dataset_103.dat',
+ table_name : 'test',
+ column_names : ['c1', 'c2', 'c3']
+ }
+ ]
+}
+"""
+
+
+def __main__():
+ # Parse Command Line
+ parser = optparse.OptionParser()
+ parser.add_option('-s', '--sqlitedb', dest='sqlitedb', default=None,
+ help='The SQLite Database')
+ parser.add_option('-j', '--jsonfile', dest='jsonfile', default=None,
+ help='JSON dict of table specifications')
+ parser.add_option('-q', '--query', dest='query', default=None,
+ help='SQL query')
+ parser.add_option('-Q', '--query_file', dest='query_file', default=None,
+ help='SQL query file')
+ parser.add_option('-n', '--no_header', dest='no_header', default=False,
+ action='store_true',
+ help='Include a column headers line')
+ parser.add_option('-o', '--output', dest='output', default=None,
+ help='Output file for query results')
+ (options, args) = parser.parse_args()
+
+ # determine output destination
+ if options.output is not None:
+ try:
+ outputPath = os.path.abspath(options.output)
+ outputFile = open(outputPath, 'w')
+ except Exception as e:
+ exit('Error: %s' % (e))
+ else:
+ outputFile = sys.stdout
+
+ def _create_table(ti, table):
+ path = table['file_path']
+ table_name =\
+ table['table_name'] if 'table_name' in table else 't%d' % (ti + 1)
+ comment_lines =\
+ table['comment_lines'] if 'comment_lines' in table else 0
+ comment_char =\
+ table['comment_char'] if 'comment_char' in table else None
+ column_names =\
+ table['column_names'] if 'column_names' in table else None
+ if column_names:
+ load_named_columns =\
+ table['load_named_columns']\
+ if 'load_named_columns' in table else False
+ else:
+ load_named_columns = False
+ unique_indexes = table['unique'] if 'unique' in table else []
+ indexes = table['index'] if 'index' in table else []
+ filters = table['filters'] if 'filters' in table else None
+ pkey_autoincr = \
+ table['pkey_autoincr'] if 'pkey_autoincr' in table else None
+ create_table(get_connection(options.sqlitedb), path, table_name,
+ pkey_autoincr=pkey_autoincr,
+ column_names=column_names,
+ skip=comment_lines,
+ comment_char=comment_char,
+ load_named_columns=load_named_columns,
+ filters=filters,
+ unique_indexes=unique_indexes,
+ indexes=indexes)
+
+ if options.jsonfile:
+ try:
+ with open(options.jsonfile) as fh:
+ tdef = json.load(fh)
+ if 'tables' in tdef:
+ for ti, table in enumerate(tdef['tables']):
+ _create_table(ti, table)
+ except Exception as e:
+ exit('Error: %s' % (e))
+
+ query = None
+ if options.query_file is not None:
+ with open(options.query_file, 'r') as fh:
+ query = ''
+ for line in fh:
+ query += line
+ elif options.query is not None:
+ query = options.query
+
+ if query is None:
+ try:
+ describe_tables(get_connection(options.sqlitedb), outputFile)
+ except Exception as e:
+ exit('Error: %s' % (e))
+ else:
+ try:
+ run_query(get_connection(options.sqlitedb), query, outputFile,
+ no_header=options.no_header)
+ except Exception as e:
+ exit('Error: %s' % (e))
+
+
+if __name__ == "__main__":
+ __main__()
diff -r 000000000000 -r 859064f07be4 sqlite_to_tabular.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sqlite_to_tabular.py Tue Jul 18 09:07:26 2017 -0400
@@ -0,0 +1,60 @@
+#!/usr/bin/env python
+
+from __future__ import print_function
+
+import optparse
+import os.path
+import sys
+
+from query_db import describe_tables, get_connection, run_query
+
+
+def __main__():
+ # Parse Command Line
+ parser = optparse.OptionParser()
+ parser.add_option('-s', '--sqlitedb', dest='sqlitedb', default=None,
+ help='The SQLite Database')
+ parser.add_option('-q', '--query', dest='query', default=None,
+ help='SQL query')
+ parser.add_option('-Q', '--query_file', dest='query_file', default=None,
+ help='SQL query file')
+ parser.add_option('-n', '--no_header', dest='no_header', default=False,
+ action='store_true',
+ help='Include a column headers line')
+ parser.add_option('-o', '--output', dest='output', default=None,
+ help='Output file for query results')
+ (options, args) = parser.parse_args()
+
+ # determine output destination
+ if options.output is not None:
+ try:
+ outputPath = os.path.abspath(options.output)
+ outputFile = open(outputPath, 'w')
+ except Exception as e:
+ exit('Error: %s' % (e))
+ else:
+ outputFile = sys.stdout
+
+ query = None
+ if options.query_file is not None:
+ with open(options.query_file, 'r') as fh:
+ query = fh.read()
+ elif options.query is not None:
+ query = options.query
+
+ if query is None:
+ try:
+ describe_tables(get_connection(options.sqlitedb), outputFile)
+ except Exception as e:
+ exit('Error: %s' % (e))
+ exit(0)
+ else:
+ try:
+ run_query(get_connection(options.sqlitedb), query, outputFile,
+ no_header=options.no_header)
+ except Exception as e:
+ exit('Error: %s' % (e))
+
+
+if __name__ == "__main__":
+ __main__()
diff -r 000000000000 -r 859064f07be4 sqlite_to_tabular.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sqlite_to_tabular.xml Tue Jul 18 09:07:26 2017 -0400
@@ -0,0 +1,63 @@
+
+
+ for SQL query
+
+
+ macros.xml
+
+
+
+
+
+$sqlquery
+
+
+
+
+
+ (?ims)^\s*SELECT\s.*\sFROM\s.*$
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r 859064f07be4 test-data/IEDB.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/IEDB.tsv Tue Jul 18 09:07:26 2017 -0400
@@ -0,0 +1,17 @@
+#ID allele seq_num start end length peptide method percentile_rank ann_ic50 ann_rank smm_ic50 smm_rank comblib_sidney2008_score comblib_sidney2008_rank netmhcpan_ic50 netmhcpan_rank
+PPAP2C HLA-A*02:01 1 3 11 9 GMYCMVFLV Consensus (ann/smm/comblib_sidney2008) 0.2 4 0.2 3.77 0.2 7.1e-06 0.5 - -
+PPAP2C HLA-A*23:01 1 1 9 9 SFGMYCMVF Consensus (ann/smm) 0.5 67 0.5 137.54 0.5 - - - -
+PPAP2C HLA-A*23:01 1 4 12 9 MYCMVFLVK Consensus (ann/smm) 0.65 146 0.7 160.11 0.6 - - - -
+PPAP2C HLA-A*02:01 1 2 10 9 FGMYCMVFL Consensus (ann/smm/comblib_sidney2008) 2.3 222 3.1 150.01 2.3 2.14e-05 1.3 - -
+PPAP2C HLA-A*23:01 1 3 11 9 GMYCMVFLV Consensus (ann/smm) 4.95 3256 4 2706.64 5.9 - - - -
+PPAP2C HLA-A*23:01 1 2 10 9 FGMYCMVFL Consensus (ann/smm) 6.55 4423 4.9 4144.10 8.2 - - - -
+PPAP2C HLA-A*02:01 1 1 9 9 SFGMYCMVF Consensus (ann/smm/comblib_sidney2008) 45 24390 45 44989.38 39 0.01 91 - -
+PPAP2C HLA-A*02:01 1 4 12 9 MYCMVFLVK Consensus (ann/smm/comblib_sidney2008) 54 23399 41 157801.09 54 0.01 86 - -
+ADAMTSL1 HLA-A*02:01 1 1 9 9 SLDMCISGL Consensus (ann/smm/comblib_sidney2008) 1 26 1 51.65 0.9 3.02e-05 1.7 - -
+ADAMTSL1 HLA-A*23:01 1 4 12 9 MCISGLCQL Consensus (ann/smm) 6.65 5781 5.9 3626.02 7.4 - - - -
+ADAMTSL1 HLA-A*02:01 1 4 12 9 MCISGLCQL Consensus (ann/smm/comblib_sidney2008) 14 1823 6.5 2612.82 14 0.00056 24 - -
+ADAMTSL1 HLA-A*23:01 1 1 9 9 SLDMCISGL Consensus (ann/smm) 30.5 27179 34 24684.82 27 - - - -
+ADAMTSL1 HLA-A*02:01 1 2 10 9 LDMCISGLC Consensus (ann/smm/comblib_sidney2008) 42 23677 42 53716.78 41 0.01 71 - -
+ADAMTSL1 HLA-A*23:01 1 3 11 9 DMCISGLCQ Consensus (ann/smm) 64.5 34451 73 118148.99 56 - - - -
+ADAMTSL1 HLA-A*23:01 1 2 10 9 LDMCISGLC Consensus (ann/smm) 76.0 33222 62 665932.18 90 - - - -
+ADAMTSL1 HLA-A*02:01 1 3 11 9 DMCISGLCQ Consensus (ann/smm/comblib_sidney2008) 97 31630 98 639896.89 71 0.03 97 - -
diff -r 000000000000 -r 859064f07be4 test-data/add_to_db_results.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/add_to_db_results.tsv Tue Jul 18 09:07:26 2017 -0400
@@ -0,0 +1,3 @@
+#id first last pets quote
+1 Paula Brown 2 Time flies like and arrow. Fruit flies like a banana.
+2 Steven Jones 1 I would have wrtten less if I had more time
diff -r 000000000000 -r 859064f07be4 test-data/customers.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/customers.tsv Tue Jul 18 09:07:26 2017 -0400
@@ -0,0 +1,5 @@
+#CustomerID FirstName LastName Email DOB Phone
+1 John Smith John.Smith@yahoo.com 1968-02-04 626 222-2222
+2 Steven Goldfish goldfish@fishhere.net 1974-04-04 323 455-4545
+3 Paula Brown pb@herowndomain.org 1978-05-24 416 323-3232
+4 James Smith jim@supergig.co.uk 1980-10-20 416 323-8888
diff -r 000000000000 -r 859064f07be4 test-data/filtered_people_results.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filtered_people_results.tsv Tue Jul 18 09:07:26 2017 -0400
@@ -0,0 +1,4 @@
+1 Paula Brown 1978-05-24 2
+2 Steven Jones 1974-04-04 1
+3 Jane Doe 1978-05-24 0
+4 James Smith 1980-10-20 1
diff -r 000000000000 -r 859064f07be4 test-data/filtered_pets_results.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filtered_pets_results.tsv Tue Jul 18 09:07:26 2017 -0400
@@ -0,0 +1,4 @@
+1 Rex dog
+1 Fluff cat
+2 Allie cat
+4 Spot
diff -r 000000000000 -r 859064f07be4 test-data/netMHC_summary.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/netMHC_summary.tsv Tue Jul 18 09:07:26 2017 -0400
@@ -0,0 +1,9 @@
+#pos peptide logscore affinity(nM) Bind Level Protein Name Allele
+2 GMYCMVFLV 0.858 4 SB PPAP2C HLA-A02:01
+1 FGMYCMVFL 0.501 222 WB PPAP2C HLA-A02:01
+3 MYCMVFLVK 0.070 23399 PPAP2C HLA-A02:01
+0 SFGMYCMVF 0.066 24390 PPAP2C HLA-A02:01
+0 SLDMCISGL 0.698 26 SB ADAMTSL1 HLA-A02:01
+3 MCISGLCQL 0.306 1823 ADAMTSL1 HLA-A02:01
+1 LDMCISGLC 0.069 23677 ADAMTSL1 HLA-A02:01
+2 DMCISGLCQ 0.042 31630 ADAMTSL1 HLA-A02:01
diff -r 000000000000 -r 859064f07be4 test-data/pet_normalized_query_results.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/pet_normalized_query_results.tsv Tue Jul 18 09:07:26 2017 -0400
@@ -0,0 +1,3 @@
+#id first last dob name animal pets
+1 Paula Brown 1978-05-24 Fluff cat 2
+2 Steven Jones 1974-04-04 Allie cat 1
diff -r 000000000000 -r 859064f07be4 test-data/pets.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/pets.tsv Tue Jul 18 09:07:26 2017 -0400
@@ -0,0 +1,7 @@
+#People with pets
+Pets FirstName LastName DOB PetNames PetType
+2 Paula Brown 24/05/78 Rex,Fluff dog,cat
+1 Steven Jones 04/04/74 Allie cat
+0 Jane Doe 24/05/78
+1 James Smith 20/10/80 Spot
+
diff -r 000000000000 -r 859064f07be4 test-data/query_results.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/query_results.tsv Tue Jul 18 09:07:26 2017 -0400
@@ -0,0 +1,17 @@
+#ID peptide start end percentile_rank logscore affinity Bind_Level
+PPAP2C GMYCMVFLV 3 11 0.2 0.858 4 SB
+PPAP2C GMYCMVFLV 3 11 4.95 0.858 4 SB
+ADAMTSL1 SLDMCISGL 1 9 1.0 0.698 26 SB
+ADAMTSL1 SLDMCISGL 1 9 30.5 0.698 26 SB
+PPAP2C FGMYCMVFL 2 10 2.3 0.501 222 WB
+PPAP2C FGMYCMVFL 2 10 6.55 0.501 222 WB
+ADAMTSL1 MCISGLCQL 4 12 6.65 0.306 1823
+ADAMTSL1 MCISGLCQL 4 12 14.0 0.306 1823
+PPAP2C MYCMVFLVK 4 12 0.65 0.07 23399
+PPAP2C MYCMVFLVK 4 12 54.0 0.07 23399
+ADAMTSL1 LDMCISGLC 2 10 42.0 0.069 23677
+ADAMTSL1 LDMCISGLC 2 10 76.0 0.069 23677
+PPAP2C SFGMYCMVF 1 9 0.5 0.066 24390
+PPAP2C SFGMYCMVF 1 9 45.0 0.066 24390
+ADAMTSL1 DMCISGLCQ 3 11 64.5 0.042 31630
+ADAMTSL1 DMCISGLCQ 3 11 97.0 0.042 31630
diff -r 000000000000 -r 859064f07be4 test-data/regex_results.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/regex_results.tsv Tue Jul 18 09:07:26 2017 -0400
@@ -0,0 +1,4 @@
+#FirstName LastName DOB
+Steven Goldfish 04/04/74
+Paula Brown 24/05/78
+James Smith 20/10/80
diff -r 000000000000 -r 859064f07be4 test-data/sales.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sales.tsv Tue Jul 18 09:07:26 2017 -0400
@@ -0,0 +1,6 @@
+#CustomerID Date SaleAmount
+2 2004-05-06 100.22
+1 2004-05-07 99.95
+3 2004-05-07 122.95
+3 2004-05-13 100.00
+4 2004-05-22 555.55
diff -r 000000000000 -r 859064f07be4 test-data/sales_results.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sales_results.tsv Tue Jul 18 09:07:26 2017 -0400
@@ -0,0 +1,5 @@
+#FirstName LastName TotalSales
+James Smith 555.55
+Paula Brown 222.95
+Steven Goldfish 100.22
+John Smith 99.95
diff -r 000000000000 -r 859064f07be4 test-data/testdb.sqlite
Binary file test-data/testdb.sqlite has changed