comparison query_tabular.py @ 5:19ae309ec53c draft

planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/query_tabular commit 29288f94a382686e263623cf6ddcd235ed5f2310-dirty
author jjohnson
date Wed, 20 Apr 2016 15:46:04 -0400
parents 9d73dca48178
children 03842a4f71c6
comparison
equal deleted inserted replaced
4:9d73dca48178 5:19ae309ec53c
12 """ 12 """
13 TODO: 13 TODO:
14 - could read column names from comment lines, but issues with legal names 14 - could read column names from comment lines, but issues with legal names
15 - could add some transformations on tabular columns, 15 - could add some transformations on tabular columns,
16 e.g. a regex to format date/time strings 16 e.g. a regex to format date/time strings
17 index: ['c2','c4,c5']
18 unique: ['c1']
19 format: { 17 format: {
20 c2 : re.sub('pat', 'sub', c2) 18 c2 : re.sub('pat', 'sub', c2)
21 c3 : len(c3) 19 c3 : len(c3)
22 } 20 }
23 def format(colname,val, expr): 21 def format(colname,val, expr):
24
25 - allow optional autoincrement id column - user supplied name?
26 autoincrement : 'id'
27 - column_defs dict of columns to create from tabular input 22 - column_defs dict of columns to create from tabular input
28 column_defs : { 'name1' : 'expr', 'name2' : 'expr'} 23 column_defs : { 'name1' : 'expr', 'name2' : 'expr'}
29 - allow multiple queries and outputs 24 - allow multiple queries and outputs
30 - add a --json input for table definitions (or yaml) 25 - add a --json input for table definitions (or yaml)
31 JSON config: 26 JSON config:
32 { tables : [ 27 { tables : [
33 { file_path : '/home/galaxy/dataset_101.dat', 28 { file_path : '/home/galaxy/dataset_101.dat',
34 table_name : 't1', 29 table_name : 't1',
35 column_names : ['c1', 'c2', 'c3'], 30 column_names : ['c1', 'c2', 'c3'],
31 pkey_autoincr : 'id'
36 comment_lines : 1 32 comment_lines : 1
33 unique: ['c1'],
34 index: ['c2','c3']
37 }, 35 },
38 { file_path : '/home/galaxy/dataset_102.dat', 36 { file_path : '/home/galaxy/dataset_102.dat',
39 table_name : 'gff', 37 table_name : 'gff',
40 column_names : ['seqname',,,'start','end'] 38 column_names : ['seqname',,,'start','end']
41 comment_lines : 1 39 comment_lines : 1
117 for i, col_name in enumerate(col_names): 115 for i, col_name in enumerate(col_names):
118 col_def.append('%s %s' % (col_names[i], col_types[i])) 116 col_def.append('%s %s' % (col_names[i], col_types[i]))
119 return col_names, col_types, col_def, col_idx 117 return col_names, col_types, col_def, col_idx
120 118
121 119
122 def create_table(conn, file_path, table_name, skip=0, comment_char='#', column_names=None,load_named_columns=False,unique_indexes=[],indexes=[]): 120 def create_table(conn, file_path, table_name, skip=0, comment_char='#', pkey_autoincr=None, column_names=None,load_named_columns=False,unique_indexes=[],indexes=[]):
123 col_names, col_types, col_def, col_idx = get_column_def(file_path, table_name, skip=skip, comment_char=comment_char, column_names=column_names,load_named_columns=load_named_columns) 121 col_names, col_types, col_def, col_idx = get_column_def(file_path, table_name, skip=skip, comment_char=comment_char, column_names=column_names,load_named_columns=load_named_columns)
124 col_func = [float if t == 'REAL' else int if t == 'INTEGER' else str for t in col_types] 122 col_func = [float if t == 'REAL' else int if t == 'INTEGER' else str for t in col_types]
125 table_def = 'CREATE TABLE %s (\n %s\n);' % (table_name, ', \n '.join(col_def)) 123 table_def = 'CREATE TABLE %s (\n %s%s\n);' % (
124 table_name,
125 '%s INTEGER PRIMARY KEY AUTOINCREMENT,' % pkey_autoincr if pkey_autoincr else '',
126 ', \n '.join(col_def))
126 # print >> sys.stdout, table_def 127 # print >> sys.stdout, table_def
127 insert_stmt = 'INSERT INTO %s(%s) VALUES(%s)' % (table_name, ','.join(col_names), ','.join(["?" for x in col_names])) 128 insert_stmt = 'INSERT INTO %s(%s) VALUES(%s)' % (table_name, ','.join(col_names), ','.join(["?" for x in col_names]))
128 # print >> sys.stdout, insert_stmt 129 # print >> sys.stdout, insert_stmt
129 data_lines = 0 130 data_lines = 0
130 try: 131 try:
241 load_named_columns = table['load_named_columns'] if 'load_named_columns' in table else False 242 load_named_columns = table['load_named_columns'] if 'load_named_columns' in table else False
242 else: 243 else:
243 load_named_columns = False 244 load_named_columns = False
244 unique_indexes = table['unique'] if 'unique' in table else [] 245 unique_indexes = table['unique'] if 'unique' in table else []
245 indexes = table['index'] if 'index' in table else [] 246 indexes = table['index'] if 'index' in table else []
246 create_table(conn, path, table_name, column_names=column_names, 247 pkey_autoincr = table['pkey_autoincr'] if 'pkey_autoincr' in table else None
248 create_table(conn, path, table_name, pkey_autoincr=pkey_autoincr, column_names=column_names,
247 skip=comment_lines, load_named_columns=load_named_columns, 249 skip=comment_lines, load_named_columns=load_named_columns,
248 unique_indexes=unique_indexes, indexes=indexes) 250 unique_indexes=unique_indexes, indexes=indexes)
249 except Exception, exc: 251 except Exception, exc:
250 print >> sys.stderr, "Error: %s" % exc 252 print >> sys.stderr, "Error: %s" % exc
251 conn.close() 253 conn.close()