Mercurial > repos > jjohnson > query_tabular
diff load_db.py @ 20:ab27c4bd14b9 draft
Uploaded
author | jjohnson |
---|---|
date | Fri, 14 Jul 2017 11:39:27 -0400 |
parents | |
children | bed5018e7ae3 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/load_db.py Fri Jul 14 11:39:27 2017 -0400 @@ -0,0 +1,136 @@ +#!/usr/bin/env python + +from __future__ import print_function + +import sys + +from filters import TabularReader + + +def getValueType(val): + if val or 0. == val: + try: + int(val) + return 'INTEGER' + except: + try: + float(val) + return 'REAL' + except: + return 'TEXT' + return None + + +def get_column_def(file_path, table_name, skip=0, comment_char='#', + column_names=None, max_lines=100, load_named_columns=False, + filters=None): + col_pref = ['TEXT', 'REAL', 'INTEGER', None] + col_types = [] + col_idx = None + try: + tr = TabularReader(file_path, skip=skip, comment_char=comment_char, + col_idx=None, filters=filters) + for linenum, fields in enumerate(tr): + if linenum > max_lines: + break + try: + while len(col_types) < len(fields): + col_types.append(None) + for i, val in enumerate(fields): + colType = getValueType(val) + if col_pref.index(colType) < col_pref.index(col_types[i]): + col_types[i] = colType + except Exception as e: + print('Failed at line: %d err: %s' % (linenum, e), + file=sys.stderr) + except Exception as e: + print('Failed: %s' % (e), file=sys.stderr) + for i, col_type in enumerate(col_types): + if not col_type: + col_types[i] = 'TEXT' + if column_names: + col_names = [] + if load_named_columns: + col_idx = [] + for i, cname in enumerate( + [cn.strip() for cn in column_names.split(',')]): + if cname != '': + col_idx.append(i) + col_names.append(cname) + col_types = [col_types[i] for i in col_idx] + else: + col_names = ['c%d' % i for i in range(1, len(col_types) + 1)] + for i, cname in enumerate( + [cn.strip() for cn in column_names.split(',')]): + if cname and i < len(col_names): + col_names[i] = cname + else: + col_names = ['c%d' % i for i in range(1, len(col_types) + 1)] + col_def = [] + for i, col_name in enumerate(col_names): + col_def.append('%s %s' % (col_names[i], col_types[i])) + return col_names, col_types, col_def, col_idx + + +def create_table(conn, file_path, table_name, skip=0, comment_char='#', + pkey_autoincr=None, column_names=None, + load_named_columns=False, filters=None, + unique_indexes=[], indexes=[]): + col_names, col_types, col_def, col_idx = \ + get_column_def(file_path, table_name, skip=skip, + comment_char=comment_char, column_names=column_names, + load_named_columns=load_named_columns, filters=filters) + col_func = [float if t == 'REAL' else int + if t == 'INTEGER' else str for t in col_types] + table_def = 'CREATE TABLE %s (\n %s%s\n);' % ( + table_name, + '%s INTEGER PRIMARY KEY AUTOINCREMENT,' % + pkey_autoincr if pkey_autoincr else '', + ', \n '.join(col_def)) + # print >> sys.stdout, table_def + insert_stmt = 'INSERT INTO %s(%s) VALUES(%s)' % ( + table_name, ','.join(col_names), + ','.join(["?" for x in col_names])) + # print >> sys.stdout, insert_stmt + data_lines = 0 + try: + c = conn.cursor() + c.execute(table_def) + conn.commit() + c.close() + for i, index in enumerate(unique_indexes): + index_name = 'idx_uniq_%s_%d' % (table_name, i) + index_columns = index.split(',') + create_index(conn, table_name, index_name, index_columns, + unique=True) + for i, index in enumerate(indexes): + index_name = 'idx_%s_%d' % (table_name, i) + index_columns = index.split(',') + create_index(conn, table_name, index_name, index_columns) + c = conn.cursor() + tr = TabularReader(file_path, skip=skip, comment_char=comment_char, + col_idx=col_idx, filters=filters) + for linenum, fields in enumerate(tr): + data_lines += 1 + try: + vals = [col_func[i](x) + if x else None for i, x in enumerate(fields)] + c.execute(insert_stmt, vals) + except Exception as e: + print('Failed at line: %d err: %s' % (linenum, e), + file=sys.stderr) + conn.commit() + c.close() + except Exception as e: + print('Failed: %s' % (e), file=sys.stderr) + exit(1) + + +def create_index(conn, table_name, index_name, index_columns, unique=False): + index_def = "CREATE %s INDEX %s on %s(%s)" % ( + 'UNIQUE' if unique else '', index_name, + table_name, ','.join(index_columns)) + c = conn.cursor() + c.execute(index_def) + conn.commit() + c.close()