20
|
1 #!/usr/bin/env python
|
|
2
|
|
3 from __future__ import print_function
|
|
4
|
|
5 import sys
|
|
6
|
|
7 from filters import TabularReader
|
|
8
|
|
9
|
|
10 def getValueType(val):
|
|
11 if val or 0. == val:
|
|
12 try:
|
|
13 int(val)
|
|
14 return 'INTEGER'
|
|
15 except:
|
|
16 try:
|
|
17 float(val)
|
|
18 return 'REAL'
|
|
19 except:
|
|
20 return 'TEXT'
|
|
21 return None
|
|
22
|
|
23
|
|
24 def get_column_def(file_path, table_name, skip=0, comment_char='#',
|
|
25 column_names=None, max_lines=100, load_named_columns=False,
|
|
26 filters=None):
|
|
27 col_pref = ['TEXT', 'REAL', 'INTEGER', None]
|
|
28 col_types = []
|
|
29 col_idx = None
|
|
30 try:
|
|
31 tr = TabularReader(file_path, skip=skip, comment_char=comment_char,
|
|
32 col_idx=None, filters=filters)
|
|
33 for linenum, fields in enumerate(tr):
|
|
34 if linenum > max_lines:
|
|
35 break
|
|
36 try:
|
|
37 while len(col_types) < len(fields):
|
|
38 col_types.append(None)
|
|
39 for i, val in enumerate(fields):
|
|
40 colType = getValueType(val)
|
|
41 if col_pref.index(colType) < col_pref.index(col_types[i]):
|
|
42 col_types[i] = colType
|
|
43 except Exception as e:
|
|
44 print('Failed at line: %d err: %s' % (linenum, e),
|
|
45 file=sys.stderr)
|
|
46 except Exception as e:
|
|
47 print('Failed: %s' % (e), file=sys.stderr)
|
|
48 for i, col_type in enumerate(col_types):
|
|
49 if not col_type:
|
|
50 col_types[i] = 'TEXT'
|
|
51 if column_names:
|
|
52 col_names = []
|
|
53 if load_named_columns:
|
|
54 col_idx = []
|
|
55 for i, cname in enumerate(
|
|
56 [cn.strip() for cn in column_names.split(',')]):
|
|
57 if cname != '':
|
|
58 col_idx.append(i)
|
|
59 col_names.append(cname)
|
|
60 col_types = [col_types[i] for i in col_idx]
|
|
61 else:
|
|
62 col_names = ['c%d' % i for i in range(1, len(col_types) + 1)]
|
|
63 for i, cname in enumerate(
|
|
64 [cn.strip() for cn in column_names.split(',')]):
|
|
65 if cname and i < len(col_names):
|
|
66 col_names[i] = cname
|
|
67 else:
|
|
68 col_names = ['c%d' % i for i in range(1, len(col_types) + 1)]
|
|
69 col_def = []
|
|
70 for i, col_name in enumerate(col_names):
|
|
71 col_def.append('%s %s' % (col_names[i], col_types[i]))
|
|
72 return col_names, col_types, col_def, col_idx
|
|
73
|
|
74
|
|
75 def create_table(conn, file_path, table_name, skip=0, comment_char='#',
|
|
76 pkey_autoincr=None, column_names=None,
|
|
77 load_named_columns=False, filters=None,
|
|
78 unique_indexes=[], indexes=[]):
|
|
79 col_names, col_types, col_def, col_idx = \
|
|
80 get_column_def(file_path, table_name, skip=skip,
|
|
81 comment_char=comment_char, column_names=column_names,
|
|
82 load_named_columns=load_named_columns, filters=filters)
|
|
83 col_func = [float if t == 'REAL' else int
|
|
84 if t == 'INTEGER' else str for t in col_types]
|
|
85 table_def = 'CREATE TABLE %s (\n %s%s\n);' % (
|
|
86 table_name,
|
|
87 '%s INTEGER PRIMARY KEY AUTOINCREMENT,' %
|
|
88 pkey_autoincr if pkey_autoincr else '',
|
|
89 ', \n '.join(col_def))
|
|
90 # print >> sys.stdout, table_def
|
|
91 insert_stmt = 'INSERT INTO %s(%s) VALUES(%s)' % (
|
|
92 table_name, ','.join(col_names),
|
|
93 ','.join(["?" for x in col_names]))
|
|
94 # print >> sys.stdout, insert_stmt
|
|
95 data_lines = 0
|
|
96 try:
|
|
97 c = conn.cursor()
|
|
98 c.execute(table_def)
|
|
99 conn.commit()
|
|
100 c.close()
|
|
101 for i, index in enumerate(unique_indexes):
|
|
102 index_name = 'idx_uniq_%s_%d' % (table_name, i)
|
|
103 index_columns = index.split(',')
|
|
104 create_index(conn, table_name, index_name, index_columns,
|
|
105 unique=True)
|
|
106 for i, index in enumerate(indexes):
|
|
107 index_name = 'idx_%s_%d' % (table_name, i)
|
|
108 index_columns = index.split(',')
|
|
109 create_index(conn, table_name, index_name, index_columns)
|
|
110 c = conn.cursor()
|
|
111 tr = TabularReader(file_path, skip=skip, comment_char=comment_char,
|
|
112 col_idx=col_idx, filters=filters)
|
|
113 for linenum, fields in enumerate(tr):
|
|
114 data_lines += 1
|
|
115 try:
|
|
116 vals = [col_func[i](x)
|
|
117 if x else None for i, x in enumerate(fields)]
|
|
118 c.execute(insert_stmt, vals)
|
|
119 except Exception as e:
|
|
120 print('Failed at line: %d err: %s' % (linenum, e),
|
|
121 file=sys.stderr)
|
|
122 conn.commit()
|
|
123 c.close()
|
|
124 except Exception as e:
|
|
125 print('Failed: %s' % (e), file=sys.stderr)
|
|
126 exit(1)
|
|
127
|
|
128
|
|
129 def create_index(conn, table_name, index_name, index_columns, unique=False):
|
|
130 index_def = "CREATE %s INDEX %s on %s(%s)" % (
|
|
131 'UNIQUE' if unique else '', index_name,
|
|
132 table_name, ','.join(index_columns))
|
|
133 c = conn.cursor()
|
|
134 c.execute(index_def)
|
|
135 conn.commit()
|
|
136 c.close()
|