comparison load_db.py @ 1:cd2a99849f8b draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 81f69ad5f39223059c40501e55ac777d3feca845
author iuc
date Fri, 18 Aug 2017 16:47:57 -0400
parents 6fbd9d25ceef
children 4a9b1cb3639b
comparison
equal deleted inserted replaced
0:6fbd9d25ceef 1:cd2a99849f8b
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 2
3 from __future__ import print_function 3 from __future__ import print_function
4 4
5 import re
5 import sys 6 import sys
6 7
7 from filters import TabularReader 8 from filters import TabularReader
9
10
11 SQLITE_KEYWORDS = [
12 'ABORT',
13 'ACTION',
14 'ADD',
15 'AFTER',
16 'ALL',
17 'ALTER',
18 'ANALYZE',
19 'AND',
20 'AS',
21 'ASC',
22 'ATTACH',
23 'AUTOINCREMENT',
24 'BEFORE',
25 'BEGIN',
26 'BETWEEN',
27 'BY',
28 'CASCADE',
29 'CASE',
30 'CAST',
31 'CHECK',
32 'COLLATE',
33 'COLUMN',
34 'COMMIT',
35 'CONFLICT',
36 'CONSTRAINT',
37 'CREATE',
38 'CROSS',
39 'CURRENT_DATE',
40 'CURRENT_TIME',
41 'CURRENT_TIMESTAMP',
42 'DATABASE',
43 'DEFAULT',
44 'DEFERRABLE',
45 'DEFERRED',
46 'DELETE',
47 'DESC',
48 'DETACH',
49 'DISTINCT',
50 'DROP',
51 'EACH',
52 'ELSE',
53 'END',
54 'ESCAPE',
55 'EXCEPT',
56 'EXCLUSIVE',
57 'EXISTS',
58 'EXPLAIN',
59 'FAIL',
60 'FOR',
61 'FOREIGN',
62 'FROM',
63 'FULL',
64 'GLOB',
65 'GROUP',
66 'HAVING',
67 'IF',
68 'IGNORE',
69 'IMMEDIATE',
70 'IN',
71 'INDEX',
72 'INDEXED',
73 'INITIALLY',
74 'INNER',
75 'INSERT',
76 'INSTEAD',
77 'INTERSECT',
78 'INTO',
79 'IS',
80 'ISNULL',
81 'JOIN',
82 'KEY',
83 'LEFT',
84 'LIKE',
85 'LIMIT',
86 'MATCH',
87 'NATURAL',
88 'NO',
89 'NOT',
90 'NOTNULL',
91 'NULL',
92 'OF',
93 'OFFSET',
94 'ON',
95 'OR',
96 'ORDER',
97 'OUTER',
98 'PLAN',
99 'PRAGMA',
100 'PRIMARY',
101 'QUERY',
102 'RAISE',
103 'RECURSIVE',
104 'REFERENCES',
105 'REGEXP',
106 'REINDEX',
107 'RELEASE',
108 'RENAME',
109 'REPLACE',
110 'RESTRICT',
111 'RIGHT',
112 'ROLLBACK',
113 'ROW',
114 'SAVEPOINT',
115 'SELECT',
116 'SET',
117 'TABLE',
118 'TEMP',
119 'TEMPORARY',
120 'THEN',
121 'TO',
122 'TRANSACTION',
123 'TRIGGER',
124 'UNION',
125 'UNIQUE',
126 'UPDATE',
127 'USING',
128 'VACUUM',
129 'VALUES',
130 'VIEW',
131 'VIRTUAL',
132 'WHEN',
133 'WHERE',
134 'WITH',
135 'WITHOUT'
136 ]
137
138
139 def get_valid_column_name(name):
140 valid_name = name
141 if not name or not name.strip():
142 return None
143 elif name.upper() in SQLITE_KEYWORDS:
144 valid_name = '"%s"' % name
145 elif re.match('^[a-zA-Z]\w*$', name):
146 pass
147 elif re.match('^"[^"]+"$', name):
148 pass
149 elif re.match('^\[[^\[\]]*\]$', name):
150 pass
151 elif re.match("^`[^`]+`$", name):
152 pass
153 elif name.find('"') < 0:
154 valid_name = '"%s"' % name
155 elif name.find('[') < 0 and name.find(']') < 0:
156 valid_name = '[%s]' % name
157 elif name.find('`') < 0:
158 valid_name = '`%s`' % name
159 elif name.find("'") < 0:
160 valid_name = "'%s'" % name
161 return valid_name
8 162
9 163
10 def getValueType(val): 164 def getValueType(val):
11 if val or 0. == val: 165 if val or 0. == val:
12 try: 166 try:
21 return None 175 return None
22 176
23 177
24 def get_column_def(file_path, table_name, skip=0, comment_char='#', 178 def get_column_def(file_path, table_name, skip=0, comment_char='#',
25 column_names=None, max_lines=100, load_named_columns=False, 179 column_names=None, max_lines=100, load_named_columns=False,
26 filters=None): 180 firstlinenames=False, filters=None):
27 col_pref = ['TEXT', 'REAL', 'INTEGER', None] 181 col_pref = ['TEXT', 'REAL', 'INTEGER', None]
28 col_types = [] 182 col_types = []
29 col_idx = None 183 col_idx = None
184 col_names = []
30 try: 185 try:
31 tr = TabularReader(file_path, skip=skip, comment_char=comment_char, 186 tr = TabularReader(file_path, skip=skip, comment_char=comment_char,
32 col_idx=None, filters=filters) 187 col_idx=None, filters=filters)
33 for linenum, fields in enumerate(tr): 188 for linenum, fields in enumerate(tr):
189 if linenum == 0 and firstlinenames:
190 col_names = [get_valid_column_name(name) or 'c%d' % (i + 1)
191 for i, name in enumerate(fields)]
192 continue
34 if linenum > max_lines: 193 if linenum > max_lines:
35 break 194 break
36 try: 195 try:
37 while len(col_types) < len(fields): 196 while len(col_types) < len(fields):
38 col_types.append(None) 197 col_types.append(None)
46 except Exception as e: 205 except Exception as e:
47 print('Failed: %s' % (e), file=sys.stderr) 206 print('Failed: %s' % (e), file=sys.stderr)
48 for i, col_type in enumerate(col_types): 207 for i, col_type in enumerate(col_types):
49 if not col_type: 208 if not col_type:
50 col_types[i] = 'TEXT' 209 col_types[i] = 'TEXT'
210 if not col_names:
211 col_names = ['c%d' % i for i in range(1, len(col_types) + 1)]
51 if column_names: 212 if column_names:
52 col_names = []
53 if load_named_columns: 213 if load_named_columns:
54 col_idx = [] 214 col_idx = []
215 cnames = []
55 for i, cname in enumerate( 216 for i, cname in enumerate(
56 [cn.strip() for cn in column_names.split(',')]): 217 [cn.strip() for cn in column_names.split(',')]):
57 if cname != '': 218 if cname != '':
58 col_idx.append(i) 219 col_idx.append(i)
59 col_names.append(cname) 220 cnames.append(cname)
60 col_types = [col_types[i] for i in col_idx] 221 col_types = [col_types[i] for i in col_idx]
222 col_names = cnames
61 else: 223 else:
62 col_names = ['c%d' % i for i in range(1, len(col_types) + 1)]
63 for i, cname in enumerate( 224 for i, cname in enumerate(
64 [cn.strip() for cn in column_names.split(',')]): 225 [cn.strip() for cn in column_names.split(',')]):
65 if cname and i < len(col_names): 226 if cname and i < len(col_names):
66 col_names[i] = cname 227 col_names[i] = cname
67 else:
68 col_names = ['c%d' % i for i in range(1, len(col_types) + 1)]
69 col_def = [] 228 col_def = []
70 for i, col_name in enumerate(col_names): 229 for i, col_name in enumerate(col_names):
71 col_def.append('%s %s' % (col_names[i], col_types[i])) 230 col_def.append('%s %s' % (col_names[i], col_types[i]))
72 return col_names, col_types, col_def, col_idx 231 return col_names, col_types, col_def, col_idx
73 232
74 233
75 def create_table(conn, file_path, table_name, skip=0, comment_char='#', 234 def create_table(conn, file_path, table_name, skip=0, comment_char='#',
76 pkey_autoincr=None, column_names=None, 235 pkey_autoincr=None, column_names=None,
77 load_named_columns=False, filters=None, 236 load_named_columns=False, firstlinenames=False,
78 unique_indexes=[], indexes=[]): 237 filters=None, unique_indexes=[], indexes=[]):
79 col_names, col_types, col_def, col_idx = \ 238 col_names, col_types, col_def, col_idx = \
80 get_column_def(file_path, table_name, skip=skip, 239 get_column_def(file_path, table_name, skip=skip,
81 comment_char=comment_char, column_names=column_names, 240 comment_char=comment_char, column_names=column_names,
82 load_named_columns=load_named_columns, filters=filters) 241 load_named_columns=load_named_columns,
242 firstlinenames=firstlinenames,
243 filters=filters)
83 col_func = [float if t == 'REAL' else int 244 col_func = [float if t == 'REAL' else int
84 if t == 'INTEGER' else str for t in col_types] 245 if t == 'INTEGER' else str for t in col_types]
85 table_def = 'CREATE TABLE %s (\n %s%s\n);' % ( 246 table_def = 'CREATE TABLE %s (\n %s%s\n);' % (
86 table_name, 247 table_name,
87 '%s INTEGER PRIMARY KEY AUTOINCREMENT,' % 248 '%s INTEGER PRIMARY KEY AUTOINCREMENT,' %
96 try: 257 try:
97 c = conn.cursor() 258 c = conn.cursor()
98 c.execute(table_def) 259 c.execute(table_def)
99 conn.commit() 260 conn.commit()
100 c.close() 261 c.close()
101 for i, index in enumerate(unique_indexes): 262
102 index_name = 'idx_uniq_%s_%d' % (table_name, i)
103 index_columns = index.split(',')
104 create_index(conn, table_name, index_name, index_columns,
105 unique=True)
106 for i, index in enumerate(indexes):
107 index_name = 'idx_%s_%d' % (table_name, i)
108 index_columns = index.split(',')
109 create_index(conn, table_name, index_name, index_columns)
110 c = conn.cursor() 263 c = conn.cursor()
111 tr = TabularReader(file_path, skip=skip, comment_char=comment_char, 264 tr = TabularReader(file_path, skip=skip, comment_char=comment_char,
112 col_idx=col_idx, filters=filters) 265 col_idx=col_idx, filters=filters)
113 for linenum, fields in enumerate(tr): 266 for linenum, fields in enumerate(tr):
267 if linenum == 0 and firstlinenames:
268 continue
114 data_lines += 1 269 data_lines += 1
115 try: 270 try:
116 vals = [col_func[i](x) 271 vals = [col_func[i](x)
117 if x else None for i, x in enumerate(fields)] 272 if x else None for i, x in enumerate(fields)]
118 c.execute(insert_stmt, vals) 273 c.execute(insert_stmt, vals)
119 except Exception as e: 274 except Exception as e:
120 print('Failed at line: %d err: %s' % (linenum, e), 275 print('Failed at line: %d err: %s' % (linenum, e),
121 file=sys.stderr) 276 file=sys.stderr)
122 conn.commit() 277 conn.commit()
123 c.close() 278 c.close()
279 for i, index in enumerate(unique_indexes):
280 index_name = 'idx_uniq_%s_%d' % (table_name, i)
281 index_columns = index.split(',')
282 create_index(conn, table_name, index_name, index_columns,
283 unique=True)
284 for i, index in enumerate(indexes):
285 index_name = 'idx_%s_%d' % (table_name, i)
286 index_columns = index.split(',')
287 create_index(conn, table_name, index_name, index_columns)
124 except Exception as e: 288 except Exception as e:
125 exit('Error: %s' % (e)) 289 exit('Error: %s' % (e))
126 290
127 291
128 def create_index(conn, table_name, index_name, index_columns, unique=False): 292 def create_index(conn, table_name, index_name, index_columns, unique=False):
129 index_def = "CREATE %s INDEX %s on %s(%s)" % ( 293 index_def = 'CREATE %s INDEX %s on %s(%s)' % (
130 'UNIQUE' if unique else '', index_name, 294 'UNIQUE' if unique else '', index_name,
131 table_name, ','.join(index_columns)) 295 table_name, ','.join(index_columns))
132 c = conn.cursor() 296 try:
133 c.execute(index_def) 297 c = conn.cursor()
134 conn.commit() 298 c.execute(index_def)
135 c.close() 299 conn.commit()
300 c.close()
301 except Exception as e:
302 print('Failed: %s err: %s' % (index_def, e), file=sys.stderr)
303 raise(e)