Mercurial > repos > jjohnson > query_tabular

--- a/query_tabular.py	Tue Feb 16 09:34:30 2016 -0500
+++ b/query_tabular.py	Tue Apr 19 16:46:18 2016 -0400
@@ -14,8 +14,16 @@
 - could read column names from comment lines, but issues with legal names
 - could add some transformations on tabular columns,
   e.g. a regex to format date/time strings
-    c2 : re.sub('pat', 'sub', c2)
-    c3 :
+    index: ['c2','c4,c5']
+    unique: ['c1']
+    format: {
+      c2 : re.sub('pat', 'sub', c2)
+      c3 : len(c3)
+   }
+   def format(colname,val, expr):
+
+- allow optional autoincrement id column - user supplied name?
+    autoincrement : 'id'
 - column_defs dict of columns to create from tabular input
     column_defs : { 'name1' : 'expr', 'name2' : 'expr'}
 - allow multiple queries and outputs
@@ -111,7 +119,7 @@
     return col_names, col_types, col_def, col_idx


-def create_table(conn, file_path, table_name, skip=0, comment_char='#', column_names=None,load_named_columns=False):
+def create_table(conn, file_path, table_name, skip=0, comment_char='#', column_names=None,load_named_columns=False,unique_indexes=[],indexes=[]):
     col_names, col_types, col_def, col_idx = get_column_def(file_path, table_name, skip=skip, comment_char=comment_char, column_names=column_names,load_named_columns=load_named_columns)
     col_func = [float if t == 'REAL' else int if t == 'INTEGER' else str for t in col_types]
     table_def = 'CREATE TABLE %s (\n    %s\n);' % (table_name, ', \n    '.join(col_def))
@@ -122,6 +130,17 @@
     try:
         c = conn.cursor()
         c.execute(table_def)
+        conn.commit()
+        c.close()
+        for i,index in enumerate(unique_indexes):
+            index_name='idx_uniq_%s_%d' % (table_name,i)
+            index_columns = index.split(',')
+            create_index(conn, table_name, index_name, index_columns,unique=True)
+        for i,index in enumerate(indexes):
+            index_name='idx_%s_%d' % (table_name,i)
+            index_columns = index.split(',')
+            create_index(conn, table_name, index_name, index_columns)
+        c = conn.cursor()
         with open(file_path, "r") as fh:
             for linenum, line in enumerate(fh):
                 if linenum < skip or line.startswith(comment_char):
@@ -141,6 +160,12 @@
         print >> sys.stderr, 'Failed: %s' % (e)
         exit(1)

+def create_index(conn, table_name, index_name, index_columns, unique=False):
+    index_def = "CREATE %s INDEX %s on %s(%s)" % ('UNIQUE' if unique else '', index_name, table_name, ','.join(index_columns))
+    c = conn.cursor()
+    c.execute(index_def)
+    conn.commit()
+    c.close()

 def regex_match(expr, item):
     return re.match(expr, item) is not None
@@ -216,7 +241,11 @@
                         load_named_columns = table['load_named_columns'] if 'load_named_columns' in table else False
                     else:
                         load_named_columns = False
-                    create_table(conn, path, table_name, column_names=column_names, skip=comment_lines,load_named_columns=load_named_columns)
+                    unique_indexes = table['unique'] if 'unique' in table else []
+                    indexes = table['index'] if 'index' in table else []
+                    create_table(conn, path, table_name, column_names=column_names,
+                                 skip=comment_lines, load_named_columns=load_named_columns,
+                                 unique_indexes=unique_indexes, indexes=indexes)
         except Exception, exc:
             print >> sys.stderr, "Error: %s" % exc
     conn.close()
--- a/query_tabular.xml	Tue Feb 16 09:34:30 2016 -0500
+++ b/query_tabular.xml	Tue Apr 19 16:46:18 2016 -0400
@@ -1,4 +1,4 @@
-<tool id="query_tabular" name="Query Tabular" version="0.1.1">
+<tool id="query_tabular" name="Query Tabular" version="0.1.2">
     <description>using sqlite sql</description>

     <requirements>
@@ -32,26 +32,44 @@
 #for $i,$tbl in enumerate($tables):
   #set $jtbl = dict()
   #set $jtbl['file_path'] = str($tbl.table)
-  #if $tbl.table_name
-  #set $tname = str($tbl.table_name)
+  #if $tbl.tbl_opts.table_name:
+  #set $tname = str($tbl.tbl_opts.table_name)
   #else
   #set $tname = 't' + str($i + 1)
   #end if
   #set $jtbl['table_name'] = $tname
-  #if $tbl.col_names:
-  #set $col_names = str($tbl.col_names)
-    #if $tbl.load_named_columns:
+  ## #if $tbl.tbl_opts.sel_cols:
+  ##   #set $jtbl['sel_cols'] = $tbl.tbl_opts.sel_cols el_cols
+  ## #end if
+  #if $tbl.tbl_opts.col_names:
+  #set $col_names = str($tbl.tbl_opts.col_names)
+    #if $tbl.tbl_opts.load_named_columns:
       #set $jtbl['load_named_columns'] = True
     #end if
   #else
   #set $col_names = ''
   #end if
   #set $jtbl['column_names'] = $col_names
-  #if str($tbl.skip_lines) != '':
-    #set $jtbl['comment_lines'] = int($tbl.skip_lines)
-  #elif $tbl.table.metadata.comment_lines > 0:
+  #if str($tbl.tbl_opts.skip_lines) != '':
+    #set $jtbl['comment_lines'] = int($tbl.tbl_opts.skip_lines)
+  #elif $tbl.table.metadata.comment_lines and $tbl.table.metadata.comment_lines > 0:
     #set $jtbl['comment_lines'] = int($tbl.table.metadata.comment_lines)
   #end if
+  #set $idx_unique = []
+  #set $idx_non = []
+  #for $idx in $tbl.tbl_opts.indexes:
+    #if $idx.unique:
+      #silent $idx_unique.append(str($idx.index_columns))
+    #else:
+      #silent $idx_non.append(str($idx.index_columns))
+    #end if
+  #end for
+  #if len($idx_unique) > 0:
+    #set $jtbl['unique'] = $idx_unique
+  #end if
+  #if len($idx_non) > 0:
+    #set $jtbl['index'] = $idx_non
+  #end if
   #set $jtbls += [$jtbl]
 #end for
 #echo $json.dumps($jtbldef)
@@ -61,22 +79,31 @@
         <param name="workdb" type="hidden" value="workdb.sqlite" label=""/>
         <repeat name="tables" title="Add tables" min="1">
             <param name="table" type="data" format="tabular" label="Dataset"/>
-            <param name="table_name" type="text" value="" optional="true" label="Table name">
-                <help>By default, tables will be named: t1,t2,...,tn</help>
-                <validator type="regex" message="Table name should start with a letter and may contain additional letters, digits, and underscores">^[A-Za-z]\w*$</validator>
-            </param>
-            <param name="col_names" type="text" value="" optional="true" label="Column names">
-                <help>By default, table columns will be named: c1,c2,c3,...,cn</help>
-                <sanitizer sanitize="False"/>
-                <validator type="regex" message="A List of names separated by commas: Column names should start with a letter and may contain additional letters, digits, and underscores. Otherwise, the name must be eclosed in: double quotes, back quotes, or square brackets.">^([A-Za-z]\w*|"\S+[^,"]*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])?(,([A-Za-z]\w*|"\S+.*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])?)*$</validator>
-            </param>
-            <param name="load_named_columns" type="boolean" truevalue="load_named_columns" falsevalue="" checked="false" label="Only load named columns into database"/>
-            <param name="skip_lines" type="integer" value="" min="0" optional="true" label="Skip lines" help="Leave blank to use the datatype comment lines metadata" />
+            <section name="tbl_opts" expanded="false" title="Table Options">
+                <param name="table_name" type="text" value="" optional="true" label="Table name">
+                    <help>By default, tables will be named: t1,t2,...,tn</help>
+                    <validator type="regex" message="Table name should start with a letter and may contain additional letters, digits, and underscores">^[A-Za-z]\w*$</validator>
+                </param>
+                <param name="col_names" type="text" value="" optional="true" label="Column names">
+                    <help>By default, table columns will be named: c1,c2,c3,...,cn</help>
+                    <sanitizer sanitize="False"/>
+                    <validator type="regex" message="A List of names separated by commas: Column names should start with a letter and may contain additional letters, digits, and underscores. Otherwise, the name must be eclosed in: double quotes, back quotes, or square brackets.">^([A-Za-z]\w*|"\S+[^,"]*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])?(,([A-Za-z]\w*|"\S+.*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])?)*$</validator>
+                </param>
+                <param name="load_named_columns" type="boolean" truevalue="load_named_columns" falsevalue="" checked="false" label="Only load named columns into database"/>
+                <param name="skip_lines" type="integer" value="" min="0" optional="true" label="Skip lines" help="Leave blank to use the datatype comment lines metadata" />
+                <repeat name="indexes">
+                    <param name="unique" type="boolean" truevalue="yes" falsevalue="no" checked="False" label="This is a unique index"/>
+                    <param name="index_columns" type="text" value="" label="Index on Columns">
+                        <help>Create an index on the column names: e,g, c1  or c2,c4</help>
+                        <validator type="regex" message="Column name, separated by commes if more than one">^([A-Za-z]\w*|"\S+[^,"]*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])(,([A-Za-z]\w*|"\S+.*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])?)*$</validator>
+                    </param>
+                </repeat>
+            </section>
         </repeat>
         <param name="sqlquery" type="text" area="true" size="10x80" value="" optional="true" label="SQL Query">
                 <help>By default, tables will be named: t1,t2,...,tn</help>
                 <sanitizer sanitize="False"/>
-                <validator type="regex" message="">^(?im)\s*select\s+.*\s+from\s+.*$</validator>
+                <validator type="regex" message="">^(?ims)\s*select\s+.*\s+from\s+.*$</validator>
         </param>
         <param name="no_header" type="boolean" truevalue="-n" falsevalue="" checked="False" label="Omit column headers"/>