changeset 3:3e3b3c883bec draft

planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/query_tabular commit 60d1a49c09f87c1c1ec6fecbe54aa226bdc695a7-dirty
author jjohnson
date Tue, 16 Feb 2016 09:34:30 -0500
parents f5df461a6b90
children 9d73dca48178
files query_tabular.py query_tabular.xml
diffstat 2 files changed, 42 insertions(+), 34 deletions(-) [+]
line wrap: on
line diff
--- a/query_tabular.py	Fri Feb 12 08:37:07 2016 -0500
+++ b/query_tabular.py	Tue Feb 16 09:34:30 2016 -0500
@@ -28,8 +28,10 @@
             comment_lines : 1
     },
     { file_path : '/home/galaxy/dataset_102.dat',
-            table_name : 't2',
-            column_names : ['c1', 'c2', 'c3']
+            table_name : 'gff',
+            column_names : ['seqname',,,'start','end']
+            comment_lines : 1
+            load_named_columns : True
     },
     { file_path : '/home/galaxy/dataset_103.dat',
             table_name : 'test',
@@ -58,10 +60,12 @@
 
 
 def get_column_def(file_path, table_name, skip=0, comment_char='#',
-                   column_names=None, max_lines=100):
+                   column_names=None, max_lines=100,load_named_columns=False):
     col_pref = ['TEXT', 'REAL', 'INTEGER', None]
     col_types = []
+    col_idx = None
     data_lines = 0
+
     try:
         with open(file_path, "r") as fh:
             for linenum, line in enumerate(fh):
@@ -82,22 +86,33 @@
                     print >> sys.stderr, 'Failed at line: %d err: %s' % (linenum, e)
     except Exception, e:
         print >> sys.stderr, 'Failed: %s' % (e)
-    for i, col_type in enumerate(col_types):
+    for i,col_type in enumerate(col_types):
         if not col_type:
             col_types[i] = 'TEXT'
-    col_names = ['c%d' % i for i in range(1, len(col_types) + 1)]
-    if column_names:
-        for i, cname in enumerate([cn.strip() for cn in column_names.split(',')]):
-            if cname and i < len(col_names):
-                col_names[i] = cname
+    if column_names: 
+        col_names = []
+        if load_named_columns:
+            col_idx = []
+            for i, cname in enumerate([cn.strip() for cn in column_names.split(',')]):
+                if cname != '':
+                    col_idx.append(i)
+                    col_names.append(cname)                
+            col_types = [col_types[i] for i in col_idx]
+        else:
+            col_names = ['c%d' % i for i in range(1, len(col_types) + 1)]
+            for i, cname in enumerate([cn.strip() for cn in column_names.split(',')]):
+                if cname and i < len(col_names):
+                    col_names[i] = cname
+    else:
+        col_names = ['c%d' % i for i in range(1, len(col_types) + 1)]
     col_def = []
     for i, col_name in enumerate(col_names):
         col_def.append('%s %s' % (col_names[i], col_types[i]))
-    return col_names, col_types, col_def
+    return col_names, col_types, col_def, col_idx
 
 
-def create_table(conn, file_path, table_name, skip=0, comment_char='#', column_names=None):
-    col_names, col_types, col_def = get_column_def(file_path, table_name, skip=skip, comment_char=comment_char, column_names=column_names)
+def create_table(conn, file_path, table_name, skip=0, comment_char='#', column_names=None,load_named_columns=False):
+    col_names, col_types, col_def, col_idx = get_column_def(file_path, table_name, skip=skip, comment_char=comment_char, column_names=column_names,load_named_columns=load_named_columns)
     col_func = [float if t == 'REAL' else int if t == 'INTEGER' else str for t in col_types]
     table_def = 'CREATE TABLE %s (\n    %s\n);' % (table_name, ', \n    '.join(col_def))
     # print >> sys.stdout, table_def
@@ -114,6 +129,8 @@
                 data_lines += 1
                 try:
                     fields = line.rstrip('\r\n').split('\t')
+                    if col_idx:
+                        fields = [fields[i] for i in col_idx]
                     vals = [col_func[i](x) if x else None for i, x in enumerate(fields)]
                     c.execute(insert_stmt, vals)
                 except Exception, e:
@@ -193,9 +210,13 @@
                 for ti, table in enumerate(tdef['tables']):
                     path = table['file_path']
                     table_name = table['table_name'] if 'table_name' in table else 't%d' % (ti + 1)
+                    comment_lines = table['comment_lines'] if 'comment_lines' in table else 0
                     column_names = table['column_names'] if 'column_names' in table else None
-                    comment_lines = table['comment_lines'] if 'comment_lines' in table else 0
-                    create_table(conn, path, table_name, column_names=column_names, skip=comment_lines)
+                    if column_names:
+                        load_named_columns = table['load_named_columns'] if 'load_named_columns' in table else False
+                    else:   
+                        load_named_columns = False
+                    create_table(conn, path, table_name, column_names=column_names, skip=comment_lines,load_named_columns=load_named_columns)
         except Exception, exc:
             print >> sys.stderr, "Error: %s" % exc
     conn.close()
--- a/query_tabular.xml	Fri Feb 12 08:37:07 2016 -0500
+++ b/query_tabular.xml	Tue Feb 16 09:34:30 2016 -0500
@@ -1,4 +1,4 @@
-<tool id="query_tabular" name="Query Tabular" version="0.1.0">
+<tool id="query_tabular" name="Query Tabular" version="0.1.1">
     <description>using sqlite sql</description>
 
     <requirements>
@@ -14,21 +14,6 @@
         -s $workdb
         #end if
         -j $table_json
-        #*
-        ##          #for $i,$tbl in enumerate($tables):
-        ##            #if $tbl.table_name
-        ##            #set $tname = $tbl.table_name
-        ##            #else
-        ##            #set $tname = 't' + str($i + 1) 
-        ##            #end if
-        ##            #if $tbl.col_names:
-        ##            #set $col_names = ':' + str($tbl.col_names)
-        ##            #else 
-        ##            #set $col_names = ''
-        ##            #end if
-        ##            -t ${tbl.table}=${tname}${$col_names}
-        ##          #end for
-        *#
         #if $sqlquery:
           -Q "$query_file" 
           $no_header
@@ -55,6 +40,9 @@
   #set $jtbl['table_name'] = $tname
   #if $tbl.col_names:
   #set $col_names = str($tbl.col_names)
+    #if $tbl.load_named_columns:
+      #set $jtbl['load_named_columns'] = True
+    #end if
   #else 
   #set $col_names = ''
   #end if
@@ -77,13 +65,12 @@
                 <help>By default, tables will be named: t1,t2,...,tn</help>
                 <validator type="regex" message="Table name should start with a letter and may contain additional letters, digits, and underscores">^[A-Za-z]\w*$</validator>
             </param>
-            <!--
-            <param name="sel_cols" label="Include columns" type="data_column" multiple="true" data_ref="table" />
-            -->
             <param name="col_names" type="text" value="" optional="true" label="Column names">
                 <help>By default, table columns will be named: c1,c2,c3,...,cn</help>
-                <validator type="regex" message="A List of separated by commas: Column names should start with a letter and may contain additional letters, digits, and underscores">^([A-Za-z]\w*)?(,([A-Za-z]\w*)?)*$</validator>
+                <sanitizer sanitize="False"/>
+                <validator type="regex" message="A List of names separated by commas: Column names should start with a letter and may contain additional letters, digits, and underscores. Otherwise, the name must be eclosed in: double quotes, back quotes, or square brackets.">^([A-Za-z]\w*|"\S+[^,"]*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])?(,([A-Za-z]\w*|"\S+.*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])?)*$</validator>
             </param>
+            <param name="load_named_columns" type="boolean" truevalue="load_named_columns" falsevalue="" checked="false" label="Only load named columns into database"/>
             <param name="skip_lines" type="integer" value="" min="0" optional="true" label="Skip lines" help="Leave blank to use the datatype comment lines metadata" />
         </repeat>
         <param name="sqlquery" type="text" area="true" size="10x80" value="" optional="true" label="SQL Query">