Mercurial > repos > sem4j > sql_tools
comparison sqlite.py @ 1:86c92a1d55fa draft
Uploaded
| author | sem4j |
|---|---|
| date | Wed, 25 Sep 2013 01:29:30 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:9e9da6cf492b | 1:86c92a1d55fa |
|---|---|
| 1 # sqlite.py version:0.3 | |
| 2 # USAGE: python sqlite.py <output_file> <explain_plan> <header> <str_select> <input_file1> <index1> <header1> <input_file2> <index2> <header2> .. | |
| 3 | |
| 4 import sys, csv, sqlite3, time | |
| 5 | |
| 6 argvs = sys.argv | |
| 7 | |
| 8 if ((len(argvs) % 3) != 2): | |
| 9 print 'Number of argv is incorrect' | |
| 10 quit() | |
| 11 | |
| 12 num_table = (len(argvs) - 4) / 3 | |
| 13 print('Number of Tables: ' + str(num_table) + '\n') | |
| 14 | |
| 15 output_file = argvs[1] | |
| 16 explain_plan = argvs[2] | |
| 17 output_header = argvs[3] | |
| 18 str_select = argvs[4] | |
| 19 | |
| 20 conn = sqlite3.connect(':memory:') | |
| 21 conn.row_factory = sqlite3.Row | |
| 22 conn.text_factory = lambda x: unicode(x, "utf-8", "ignore") | |
| 23 cur = conn.cursor() | |
| 24 | |
| 25 for i in range(1, num_table + 1): | |
| 26 input_file = argvs[3 * i + 2] | |
| 27 input_header = argvs[3 * i + 3] | |
| 28 str_index = argvs[3 * i + 4] | |
| 29 | |
| 30 with open(input_file,'rb') as infile: | |
| 31 dr = csv.reader(infile, delimiter='\t') | |
| 32 to_db = [] | |
| 33 row_count = 0 | |
| 34 for row in dr: | |
| 35 row_count += 1 | |
| 36 values =[] | |
| 37 col_count = 0 | |
| 38 for col in row: | |
| 39 col_count += 1 | |
| 40 values.append(col) | |
| 41 if input_header == '1' and row_count == 1: | |
| 42 header = values | |
| 43 print('Header:') | |
| 44 else: | |
| 45 to_db.append(values) | |
| 46 | |
| 47 # PREPARE DDL&DML | |
| 48 str_table1 = 't' + str(i) + '(' | |
| 49 str_table2 = 't' + str(i) + '(' | |
| 50 str_value = 'VALUES (' | |
| 51 for j in range(col_count): | |
| 52 if input_header == '1': | |
| 53 print(' ' + str(j+1) + ' ' + header[j]); | |
| 54 col_name = header[j] | |
| 55 else: | |
| 56 col_name = 'c' + str(j + 1) | |
| 57 str_table1 = str_table1 + col_name + ' NUMERIC' | |
| 58 str_table2 = str_table2 + col_name + ' ' | |
| 59 str_value = str_value + '?' | |
| 60 if j != col_count - 1: | |
| 61 str_table1 = str_table1 + ',' | |
| 62 str_table2 = str_table2 + ',' | |
| 63 str_value = str_value + ',' | |
| 64 str_table1 = str_table1 + ')' | |
| 65 str_table2 = str_table2 + ')' | |
| 66 str_value = str_value + ')' | |
| 67 print('') | |
| 68 | |
| 69 # CREATE TABLE | |
| 70 str_create = 'CREATE TABLE ' + str_table1 + ';' | |
| 71 print(str_create) | |
| 72 time_start = time.time() | |
| 73 cur.execute(str_create) | |
| 74 print('Elapsed Time: ' + str(time.time() - time_start) + '\n') | |
| 75 | |
| 76 # LOAD DATA | |
| 77 str_insert = 'INSERT INTO ' + str_table2 + ' ' + str_value + ';' | |
| 78 print(str_insert) | |
| 79 time_start = time.time() | |
| 80 cur.executemany(str_insert, to_db) | |
| 81 conn.commit() | |
| 82 print('Elapsed Time: ' + str(time.time() - time_start) + '\n') | |
| 83 | |
| 84 # CREATE INDEX | |
| 85 array_idx = str_index.split(',') | |
| 86 for col_idx in array_idx: | |
| 87 if col_idx != '': | |
| 88 time_start = time.time() | |
| 89 if input_header == '1': | |
| 90 sql_index = 'CREATE INDEX idx_t' + str(i) +'c' + col_idx + ' on t' + str(i) + '(' + header[int(col_idx) - 1] + ');' | |
| 91 else: | |
| 92 sql_index = 'CREATE INDEX idx_t' + str(i) +'c' + col_idx + ' on t' + str(i) + '(c' + col_idx + ');' | |
| 93 cur.execute(sql_index) | |
| 94 print(sql_index) | |
| 95 print('Elapsed Time: ' + str(time.time() - time_start) + '\n') | |
| 96 | |
| 97 # SELECT | |
| 98 time_start = time.time() | |
| 99 print(str_select) | |
| 100 cur.execute(str_select) | |
| 101 print('Elapsed Time: ' + str(time.time() - time_start) + '\n') | |
| 102 | |
| 103 # OUTPUT | |
| 104 out = open(output_file, 'w') | |
| 105 | |
| 106 # HEADER | |
| 107 if output_header == '1': | |
| 108 col_count = 0 | |
| 109 for col in cur.description: | |
| 110 col_count += 1 | |
| 111 if col_count != len(cur.description): | |
| 112 out.write(str(col[0]) + '\t') | |
| 113 else: | |
| 114 out.write(str(col[0]) + '\n') | |
| 115 | |
| 116 # CONTENT | |
| 117 for row in cur: | |
| 118 col_count = 0 | |
| 119 for col in row: | |
| 120 col_count += 1 | |
| 121 if col_count != len(row): | |
| 122 out.write(str(col) + '\t') | |
| 123 else: | |
| 124 out.write(str(col) + '\n') | |
| 125 | |
| 126 out.close() | |
| 127 | |
| 128 # EXPLAIN PLAN | |
| 129 if explain_plan == '1': | |
| 130 #conn.executescript('.explain on') | |
| 131 cur.execute('explain query plan ' + str_select) | |
| 132 for row in cur: | |
| 133 col_count = 0 | |
| 134 for col in row: | |
| 135 col_count += 1 | |
| 136 if col_count != len(row): | |
| 137 print(str(col) + '\t') | |
| 138 else: | |
| 139 print(str(col) + '\n') | |
| 140 | |
| 141 cur.close() | |
| 142 conn.close() | |
| 143 |
