# HG changeset patch # User jjohnson # Date 1461098778 14400 # Node ID 9d73dca4817801779fed5e882361f2ad8240eded # Parent 3e3b3c883bec893874af2760375a2882d60a58eb planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/query_tabular commit 9893037a046e4f4c1e7ba859e05f49378c398cb1-dirty diff -r 3e3b3c883bec -r 9d73dca48178 query_tabular.py --- a/query_tabular.py Tue Feb 16 09:34:30 2016 -0500 +++ b/query_tabular.py Tue Apr 19 16:46:18 2016 -0400 @@ -14,8 +14,16 @@ - could read column names from comment lines, but issues with legal names - could add some transformations on tabular columns, e.g. a regex to format date/time strings - c2 : re.sub('pat', 'sub', c2) - c3 : + index: ['c2','c4,c5'] + unique: ['c1'] + format: { + c2 : re.sub('pat', 'sub', c2) + c3 : len(c3) + } + def format(colname,val, expr): + +- allow optional autoincrement id column - user supplied name? + autoincrement : 'id' - column_defs dict of columns to create from tabular input column_defs : { 'name1' : 'expr', 'name2' : 'expr'} - allow multiple queries and outputs @@ -111,7 +119,7 @@ return col_names, col_types, col_def, col_idx -def create_table(conn, file_path, table_name, skip=0, comment_char='#', column_names=None,load_named_columns=False): +def create_table(conn, file_path, table_name, skip=0, comment_char='#', column_names=None,load_named_columns=False,unique_indexes=[],indexes=[]): col_names, col_types, col_def, col_idx = get_column_def(file_path, table_name, skip=skip, comment_char=comment_char, column_names=column_names,load_named_columns=load_named_columns) col_func = [float if t == 'REAL' else int if t == 'INTEGER' else str for t in col_types] table_def = 'CREATE TABLE %s (\n %s\n);' % (table_name, ', \n '.join(col_def)) @@ -122,6 +130,17 @@ try: c = conn.cursor() c.execute(table_def) + conn.commit() + c.close() + for i,index in enumerate(unique_indexes): + index_name='idx_uniq_%s_%d' % (table_name,i) + index_columns = index.split(',') + create_index(conn, table_name, index_name, index_columns,unique=True) + for i,index in enumerate(indexes): + index_name='idx_%s_%d' % (table_name,i) + index_columns = index.split(',') + create_index(conn, table_name, index_name, index_columns) + c = conn.cursor() with open(file_path, "r") as fh: for linenum, line in enumerate(fh): if linenum < skip or line.startswith(comment_char): @@ -141,6 +160,12 @@ print >> sys.stderr, 'Failed: %s' % (e) exit(1) +def create_index(conn, table_name, index_name, index_columns, unique=False): + index_def = "CREATE %s INDEX %s on %s(%s)" % ('UNIQUE' if unique else '', index_name, table_name, ','.join(index_columns)) + c = conn.cursor() + c.execute(index_def) + conn.commit() + c.close() def regex_match(expr, item): return re.match(expr, item) is not None @@ -216,7 +241,11 @@ load_named_columns = table['load_named_columns'] if 'load_named_columns' in table else False else: load_named_columns = False - create_table(conn, path, table_name, column_names=column_names, skip=comment_lines,load_named_columns=load_named_columns) + unique_indexes = table['unique'] if 'unique' in table else [] + indexes = table['index'] if 'index' in table else [] + create_table(conn, path, table_name, column_names=column_names, + skip=comment_lines, load_named_columns=load_named_columns, + unique_indexes=unique_indexes, indexes=indexes) except Exception, exc: print >> sys.stderr, "Error: %s" % exc conn.close() diff -r 3e3b3c883bec -r 9d73dca48178 query_tabular.xml --- a/query_tabular.xml Tue Feb 16 09:34:30 2016 -0500 +++ b/query_tabular.xml Tue Apr 19 16:46:18 2016 -0400 @@ -1,4 +1,4 @@ - + using sqlite sql @@ -32,26 +32,44 @@ #for $i,$tbl in enumerate($tables): #set $jtbl = dict() #set $jtbl['file_path'] = str($tbl.table) - #if $tbl.table_name - #set $tname = str($tbl.table_name) + #if $tbl.tbl_opts.table_name: + #set $tname = str($tbl.tbl_opts.table_name) #else #set $tname = 't' + str($i + 1) #end if #set $jtbl['table_name'] = $tname - #if $tbl.col_names: - #set $col_names = str($tbl.col_names) - #if $tbl.load_named_columns: + ## #if $tbl.tbl_opts.sel_cols: + ## #set $jtbl['sel_cols'] = $tbl.tbl_opts.sel_cols el_cols + ## #end if + #if $tbl.tbl_opts.col_names: + #set $col_names = str($tbl.tbl_opts.col_names) + #if $tbl.tbl_opts.load_named_columns: #set $jtbl['load_named_columns'] = True #end if #else #set $col_names = '' #end if #set $jtbl['column_names'] = $col_names - #if str($tbl.skip_lines) != '': - #set $jtbl['comment_lines'] = int($tbl.skip_lines) - #elif $tbl.table.metadata.comment_lines > 0: + #if str($tbl.tbl_opts.skip_lines) != '': + #set $jtbl['comment_lines'] = int($tbl.tbl_opts.skip_lines) + #elif $tbl.table.metadata.comment_lines and $tbl.table.metadata.comment_lines > 0: #set $jtbl['comment_lines'] = int($tbl.table.metadata.comment_lines) #end if + #set $idx_unique = [] + #set $idx_non = [] + #for $idx in $tbl.tbl_opts.indexes: + #if $idx.unique: + #silent $idx_unique.append(str($idx.index_columns)) + #else: + #silent $idx_non.append(str($idx.index_columns)) + #end if + #end for + #if len($idx_unique) > 0: + #set $jtbl['unique'] = $idx_unique + #end if + #if len($idx_non) > 0: + #set $jtbl['index'] = $idx_non + #end if #set $jtbls += [$jtbl] #end for #echo $json.dumps($jtbldef) @@ -61,22 +79,31 @@ - - By default, tables will be named: t1,t2,...,tn - ^[A-Za-z]\w*$ - - - By default, table columns will be named: c1,c2,c3,...,cn - - ^([A-Za-z]\w*|"\S+[^,"]*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])?(,([A-Za-z]\w*|"\S+.*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])?)*$ - - - +
+ + By default, tables will be named: t1,t2,...,tn + ^[A-Za-z]\w*$ + + + By default, table columns will be named: c1,c2,c3,...,cn + + ^([A-Za-z]\w*|"\S+[^,"]*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])?(,([A-Za-z]\w*|"\S+.*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])?)*$ + + + + + + + Create an index on the column names: e,g, c1 or c2,c4 + ^([A-Za-z]\w*|"\S+[^,"]*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])(,([A-Za-z]\w*|"\S+.*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])?)*$ + + +
By default, tables will be named: t1,t2,...,tn - ^(?im)\s*select\s+.*\s+from\s+.*$ + ^(?ims)\s*select\s+.*\s+from\s+.*$