comparison load_db.py @ 20:ab27c4bd14b9 draft

Uploaded
author jjohnson
date Fri, 14 Jul 2017 11:39:27 -0400
parents
children bed5018e7ae3
comparison
equal deleted inserted replaced
19:9d9ab2c69014 20:ab27c4bd14b9
1 #!/usr/bin/env python
2
3 from __future__ import print_function
4
5 import sys
6
7 from filters import TabularReader
8
9
10 def getValueType(val):
11 if val or 0. == val:
12 try:
13 int(val)
14 return 'INTEGER'
15 except:
16 try:
17 float(val)
18 return 'REAL'
19 except:
20 return 'TEXT'
21 return None
22
23
24 def get_column_def(file_path, table_name, skip=0, comment_char='#',
25 column_names=None, max_lines=100, load_named_columns=False,
26 filters=None):
27 col_pref = ['TEXT', 'REAL', 'INTEGER', None]
28 col_types = []
29 col_idx = None
30 try:
31 tr = TabularReader(file_path, skip=skip, comment_char=comment_char,
32 col_idx=None, filters=filters)
33 for linenum, fields in enumerate(tr):
34 if linenum > max_lines:
35 break
36 try:
37 while len(col_types) < len(fields):
38 col_types.append(None)
39 for i, val in enumerate(fields):
40 colType = getValueType(val)
41 if col_pref.index(colType) < col_pref.index(col_types[i]):
42 col_types[i] = colType
43 except Exception as e:
44 print('Failed at line: %d err: %s' % (linenum, e),
45 file=sys.stderr)
46 except Exception as e:
47 print('Failed: %s' % (e), file=sys.stderr)
48 for i, col_type in enumerate(col_types):
49 if not col_type:
50 col_types[i] = 'TEXT'
51 if column_names:
52 col_names = []
53 if load_named_columns:
54 col_idx = []
55 for i, cname in enumerate(
56 [cn.strip() for cn in column_names.split(',')]):
57 if cname != '':
58 col_idx.append(i)
59 col_names.append(cname)
60 col_types = [col_types[i] for i in col_idx]
61 else:
62 col_names = ['c%d' % i for i in range(1, len(col_types) + 1)]
63 for i, cname in enumerate(
64 [cn.strip() for cn in column_names.split(',')]):
65 if cname and i < len(col_names):
66 col_names[i] = cname
67 else:
68 col_names = ['c%d' % i for i in range(1, len(col_types) + 1)]
69 col_def = []
70 for i, col_name in enumerate(col_names):
71 col_def.append('%s %s' % (col_names[i], col_types[i]))
72 return col_names, col_types, col_def, col_idx
73
74
75 def create_table(conn, file_path, table_name, skip=0, comment_char='#',
76 pkey_autoincr=None, column_names=None,
77 load_named_columns=False, filters=None,
78 unique_indexes=[], indexes=[]):
79 col_names, col_types, col_def, col_idx = \
80 get_column_def(file_path, table_name, skip=skip,
81 comment_char=comment_char, column_names=column_names,
82 load_named_columns=load_named_columns, filters=filters)
83 col_func = [float if t == 'REAL' else int
84 if t == 'INTEGER' else str for t in col_types]
85 table_def = 'CREATE TABLE %s (\n %s%s\n);' % (
86 table_name,
87 '%s INTEGER PRIMARY KEY AUTOINCREMENT,' %
88 pkey_autoincr if pkey_autoincr else '',
89 ', \n '.join(col_def))
90 # print >> sys.stdout, table_def
91 insert_stmt = 'INSERT INTO %s(%s) VALUES(%s)' % (
92 table_name, ','.join(col_names),
93 ','.join(["?" for x in col_names]))
94 # print >> sys.stdout, insert_stmt
95 data_lines = 0
96 try:
97 c = conn.cursor()
98 c.execute(table_def)
99 conn.commit()
100 c.close()
101 for i, index in enumerate(unique_indexes):
102 index_name = 'idx_uniq_%s_%d' % (table_name, i)
103 index_columns = index.split(',')
104 create_index(conn, table_name, index_name, index_columns,
105 unique=True)
106 for i, index in enumerate(indexes):
107 index_name = 'idx_%s_%d' % (table_name, i)
108 index_columns = index.split(',')
109 create_index(conn, table_name, index_name, index_columns)
110 c = conn.cursor()
111 tr = TabularReader(file_path, skip=skip, comment_char=comment_char,
112 col_idx=col_idx, filters=filters)
113 for linenum, fields in enumerate(tr):
114 data_lines += 1
115 try:
116 vals = [col_func[i](x)
117 if x else None for i, x in enumerate(fields)]
118 c.execute(insert_stmt, vals)
119 except Exception as e:
120 print('Failed at line: %d err: %s' % (linenum, e),
121 file=sys.stderr)
122 conn.commit()
123 c.close()
124 except Exception as e:
125 print('Failed: %s' % (e), file=sys.stderr)
126 exit(1)
127
128
129 def create_index(conn, table_name, index_name, index_columns, unique=False):
130 index_def = "CREATE %s INDEX %s on %s(%s)" % (
131 'UNIQUE' if unique else '', index_name,
132 table_name, ','.join(index_columns))
133 c = conn.cursor()
134 c.execute(index_def)
135 conn.commit()
136 c.close()