Mercurial > repos > jjohnson > query_tabular
comparison query_tabular.py @ 5:19ae309ec53c draft
planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/query_tabular commit 29288f94a382686e263623cf6ddcd235ed5f2310-dirty
author | jjohnson |
---|---|
date | Wed, 20 Apr 2016 15:46:04 -0400 |
parents | 9d73dca48178 |
children | 03842a4f71c6 |
comparison
equal
deleted
inserted
replaced
4:9d73dca48178 | 5:19ae309ec53c |
---|---|
12 """ | 12 """ |
13 TODO: | 13 TODO: |
14 - could read column names from comment lines, but issues with legal names | 14 - could read column names from comment lines, but issues with legal names |
15 - could add some transformations on tabular columns, | 15 - could add some transformations on tabular columns, |
16 e.g. a regex to format date/time strings | 16 e.g. a regex to format date/time strings |
17 index: ['c2','c4,c5'] | |
18 unique: ['c1'] | |
19 format: { | 17 format: { |
20 c2 : re.sub('pat', 'sub', c2) | 18 c2 : re.sub('pat', 'sub', c2) |
21 c3 : len(c3) | 19 c3 : len(c3) |
22 } | 20 } |
23 def format(colname,val, expr): | 21 def format(colname,val, expr): |
24 | |
25 - allow optional autoincrement id column - user supplied name? | |
26 autoincrement : 'id' | |
27 - column_defs dict of columns to create from tabular input | 22 - column_defs dict of columns to create from tabular input |
28 column_defs : { 'name1' : 'expr', 'name2' : 'expr'} | 23 column_defs : { 'name1' : 'expr', 'name2' : 'expr'} |
29 - allow multiple queries and outputs | 24 - allow multiple queries and outputs |
30 - add a --json input for table definitions (or yaml) | 25 - add a --json input for table definitions (or yaml) |
31 JSON config: | 26 JSON config: |
32 { tables : [ | 27 { tables : [ |
33 { file_path : '/home/galaxy/dataset_101.dat', | 28 { file_path : '/home/galaxy/dataset_101.dat', |
34 table_name : 't1', | 29 table_name : 't1', |
35 column_names : ['c1', 'c2', 'c3'], | 30 column_names : ['c1', 'c2', 'c3'], |
31 pkey_autoincr : 'id' | |
36 comment_lines : 1 | 32 comment_lines : 1 |
33 unique: ['c1'], | |
34 index: ['c2','c3'] | |
37 }, | 35 }, |
38 { file_path : '/home/galaxy/dataset_102.dat', | 36 { file_path : '/home/galaxy/dataset_102.dat', |
39 table_name : 'gff', | 37 table_name : 'gff', |
40 column_names : ['seqname',,,'start','end'] | 38 column_names : ['seqname',,,'start','end'] |
41 comment_lines : 1 | 39 comment_lines : 1 |
117 for i, col_name in enumerate(col_names): | 115 for i, col_name in enumerate(col_names): |
118 col_def.append('%s %s' % (col_names[i], col_types[i])) | 116 col_def.append('%s %s' % (col_names[i], col_types[i])) |
119 return col_names, col_types, col_def, col_idx | 117 return col_names, col_types, col_def, col_idx |
120 | 118 |
121 | 119 |
122 def create_table(conn, file_path, table_name, skip=0, comment_char='#', column_names=None,load_named_columns=False,unique_indexes=[],indexes=[]): | 120 def create_table(conn, file_path, table_name, skip=0, comment_char='#', pkey_autoincr=None, column_names=None,load_named_columns=False,unique_indexes=[],indexes=[]): |
123 col_names, col_types, col_def, col_idx = get_column_def(file_path, table_name, skip=skip, comment_char=comment_char, column_names=column_names,load_named_columns=load_named_columns) | 121 col_names, col_types, col_def, col_idx = get_column_def(file_path, table_name, skip=skip, comment_char=comment_char, column_names=column_names,load_named_columns=load_named_columns) |
124 col_func = [float if t == 'REAL' else int if t == 'INTEGER' else str for t in col_types] | 122 col_func = [float if t == 'REAL' else int if t == 'INTEGER' else str for t in col_types] |
125 table_def = 'CREATE TABLE %s (\n %s\n);' % (table_name, ', \n '.join(col_def)) | 123 table_def = 'CREATE TABLE %s (\n %s%s\n);' % ( |
124 table_name, | |
125 '%s INTEGER PRIMARY KEY AUTOINCREMENT,' % pkey_autoincr if pkey_autoincr else '', | |
126 ', \n '.join(col_def)) | |
126 # print >> sys.stdout, table_def | 127 # print >> sys.stdout, table_def |
127 insert_stmt = 'INSERT INTO %s(%s) VALUES(%s)' % (table_name, ','.join(col_names), ','.join(["?" for x in col_names])) | 128 insert_stmt = 'INSERT INTO %s(%s) VALUES(%s)' % (table_name, ','.join(col_names), ','.join(["?" for x in col_names])) |
128 # print >> sys.stdout, insert_stmt | 129 # print >> sys.stdout, insert_stmt |
129 data_lines = 0 | 130 data_lines = 0 |
130 try: | 131 try: |
241 load_named_columns = table['load_named_columns'] if 'load_named_columns' in table else False | 242 load_named_columns = table['load_named_columns'] if 'load_named_columns' in table else False |
242 else: | 243 else: |
243 load_named_columns = False | 244 load_named_columns = False |
244 unique_indexes = table['unique'] if 'unique' in table else [] | 245 unique_indexes = table['unique'] if 'unique' in table else [] |
245 indexes = table['index'] if 'index' in table else [] | 246 indexes = table['index'] if 'index' in table else [] |
246 create_table(conn, path, table_name, column_names=column_names, | 247 pkey_autoincr = table['pkey_autoincr'] if 'pkey_autoincr' in table else None |
248 create_table(conn, path, table_name, pkey_autoincr=pkey_autoincr, column_names=column_names, | |
247 skip=comment_lines, load_named_columns=load_named_columns, | 249 skip=comment_lines, load_named_columns=load_named_columns, |
248 unique_indexes=unique_indexes, indexes=indexes) | 250 unique_indexes=unique_indexes, indexes=indexes) |
249 except Exception, exc: | 251 except Exception, exc: |
250 print >> sys.stderr, "Error: %s" % exc | 252 print >> sys.stderr, "Error: %s" % exc |
251 conn.close() | 253 conn.close() |