comparison query_tabular.xml @ 11:fd16243931d6 draft

Uploaded
author jjohnson
date Fri, 12 May 2017 10:18:42 -0400
parents ae27dab228b8
children 3003fe70f297
comparison
equal deleted inserted replaced
10:e84d1c3bf4fe 11:fd16243931d6
1 <tool id="query_tabular" name="Query Tabular" version="2.0.0"> 1 <tool id="query_tabular" name="Query Tabular" version="3.0.0">
2 <description>using sqlite sql</description> 2 <description>using sqlite sql</description>
3 3
4 <requirements> 4 <requirements>
5 </requirements> 5 </requirements>
6 <stdio> 6 <stdio>
77 #set $jtbl['index'] = $idx_non 77 #set $jtbl['index'] = $idx_non
78 #end if 78 #end if
79 #set $input_filters = [] 79 #set $input_filters = []
80 #for $fi in $tbl.input_opts.linefilters: 80 #for $fi in $tbl.input_opts.linefilters:
81 #if $fi.filter.filter_type == 'skip': 81 #if $fi.filter.filter_type == 'skip':
82 #set $skip_lines = None
82 #if str($fi.filter.skip_lines) != '': 83 #if str($fi.filter.skip_lines) != '':
83 #set $jtbl['comment_lines'] = int($fi.filter.skip_lines) 84 #set $skip_lines = int($fi.filter.skip_lines)
84 #elif $tbl.table.metadata.comment_lines and $tbl.table.metadata.comment_lines > 0: 85 #elif $tbl.table.metadata.comment_lines and $tbl.table.metadata.comment_lines > 0:
85 #set $jtbl['comment_lines'] = int($tbl.table.metadata.comment_lines) 86 #set $skip_lines = int($tbl.table.metadata.comment_lines)
87 #end if
88 #if $skip_lines is not None:
89 #set $filter_dict = dict()
90 #set $filter_dict['filter'] = str($fi.filter.filter_type)
91 #set $filter_dict['count'] = $skip_lines
92 #silent $input_filters.append($filter_dict)
86 #end if 93 #end if
87 #elif $fi.filter.filter_type == 'comment': 94 #elif $fi.filter.filter_type == 'comment':
88 #set $jtbl['comment_char'] = str($fi.filter.comment_char) 95 #set $filter_dict = dict()
96 #set $filter_dict['filter'] = 'regex'
97 #set $filter_dict['pattern'] = '^' + str($fi.filter.comment_char)
98 #set $filter_dict['action'] = 'exclude'
99 #silent $input_filters.append($filter_dict)
89 #elif $fi.filter.filter_type == 'regex': 100 #elif $fi.filter.filter_type == 'regex':
90 #set $filter_dict = dict() 101 #set $filter_dict = dict()
91 #set $filter_dict['filter'] = str($fi.filter.filter_type) 102 #set $filter_dict['filter'] = str($fi.filter.filter_type)
92 #set $filter_dict['pattern'] = str($fi.filter.regex_pattern) 103 #set $filter_dict['pattern'] = str($fi.filter.regex_pattern)
93 #set $filter_dict['action'] = str($fi.filter.regex_action) 104 #set $filter_dict['action'] = str($fi.filter.regex_action)
97 #set $filter_dict['filter'] = str($fi.filter.filter_type) 108 #set $filter_dict['filter'] = str($fi.filter.filter_type)
98 #set $filter_dict['column'] = int(str($fi.filter.column)) 109 #set $filter_dict['column'] = int(str($fi.filter.column))
99 #set $filter_dict['pattern'] = str($fi.filter.regex_pattern) 110 #set $filter_dict['pattern'] = str($fi.filter.regex_pattern)
100 #set $filter_dict['replace'] = str($fi.filter.regex_replace) 111 #set $filter_dict['replace'] = str($fi.filter.regex_replace)
101 #silent $input_filters.append($filter_dict) 112 #silent $input_filters.append($filter_dict)
102 ## #elif $fi.filter.filter_type == 'normalize': 113 #elif str($fi.filter.filter_type).endswith('pend_line_num'):
103 ## #set $filter_dict = dict() 114 #set $filter_dict = dict()
104 ## #set $filter_dict['filter'] = str($fi.filter.filter_type) 115 #set $filter_dict['filter'] = str($fi.filter.filter_type)
105 ## #set $filter_dict['columns'] = [int(str($ci)) for $ci in str($fi.filter.columns).split(',')] 116 #silent $input_filters.append($filter_dict)
106 ## #set $filter_dict['separator'] = str($fi.filter.separator) 117 #elif $fi.filter.filter_type == 'normalize':
107 ## #silent $input_filters.append($filter_dict) 118 #set $filter_dict = dict()
119 #set $filter_dict['filter'] = str($fi.filter.filter_type)
120 #set $filter_dict['columns'] = [int(str($ci)) for $ci in str($fi.filter.columns).split(',')]
121 #set $filter_dict['separator'] = str($fi.filter.separator)
122 #silent $input_filters.append($filter_dict)
108 #end if 123 #end if
109 #end for 124 #end for
110 #if $input_filters: 125 #if $input_filters:
111 #set $jtbl['filters'] = $input_filters 126 #set $jtbl['filters'] = $input_filters
112 #end if 127 #end if
129 <param name="filter_type" type="select" label="Filter By"> 144 <param name="filter_type" type="select" label="Filter By">
130 <option value="skip">skip leading lines</option> 145 <option value="skip">skip leading lines</option>
131 <option value="comment">comment char</option> 146 <option value="comment">comment char</option>
132 <option value="regex">by regex expression matching</option> 147 <option value="regex">by regex expression matching</option>
133 <option value="replace">regex replace value in column</option> 148 <option value="replace">regex replace value in column</option>
134 <!-- 149 <option value="prepend_line_num">prepend a line number column</option>
150 <option value="append_line_num">append a line number column</option>
135 <option value="normalize">normalize list columns, replicates row for each item in list</option> 151 <option value="normalize">normalize list columns, replicates row for each item in list</option>
136 -->
137 </param> 152 </param>
138 <when value="skip"> 153 <when value="skip">
139 <param name="skip_lines" type="integer" value="" min="0" optional="true" label="Skip lines" 154 <param name="skip_lines" type="integer" value="" min="0" optional="true" label="Skip lines"
140 help="Leave blank to use the comment lines metadata for this dataset" /> 155 help="Leave blank to use the comment lines metadata for this dataset" />
141 </when> 156 </when>
142 <when value="comment"> 157 <when value="comment">
143 <param name="comment_char" type="text" value="#" label="Comment line starting text"> 158 <param name="comment_char" type="text" value="#" label="Comment line starting text">
144 <sanitizer sanitize="False"/> 159 <sanitizer sanitize="False"/>
145 </param> 160 </param>
146 </when> 161 </when>
162 <when value="prepend_line_num"/>
163 <when value="append_line_num"/>
147 <when value="regex"> 164 <when value="regex">
148 <param name="regex_pattern" type="text" value="" label="regex pattern"> 165 <param name="regex_pattern" type="text" value="" label="regex pattern">
149 <sanitizer sanitize="False"/> 166 <sanitizer sanitize="False"/>
150 </param> 167 </param>
151 <param name="regex_action" type="select" label="action for regex match"> 168 <param name="regex_action" type="select" label="action for regex match">
163 </param> 180 </param>
164 <param name="regex_replace" type="text" value="" label="replacement expression"> 181 <param name="regex_replace" type="text" value="" label="replacement expression">
165 <sanitizer sanitize="False"/> 182 <sanitizer sanitize="False"/>
166 </param> 183 </param>
167 </when> 184 </when>
168 <!--
169 <when value="normalize"> 185 <when value="normalize">
170 <param name="columns" type="data_column" data_ref="table" multiple="True" label="Columns to split" 186 <param name="columns" type="data_column" data_ref="table" multiple="True" label="Columns to split"
171 help=""/> 187 help="If multiple columns are selected, they should have the same length and separator on each line"/>
172 <param name="separator" type="text" value="," label="List item delimiter in column"> 188 <param name="separator" type="text" value="," label="List item delimiter in column">
173 <sanitizer sanitize="False"/> 189 <sanitizer sanitize="False"/>
174 <validator type="regex" message="Anything but TAB or Newline">^[^\t\n\r\f\v]+$</validator> 190 <validator type="regex" message="Anything but TAB or Newline">^[^\t\n\r\f\v]+$</validator>
175 </param> 191 </param>
176 </when> 192 </when>
177 -->
178 </conditional> 193 </conditional>
179 </repeat> 194 </repeat>
180 </section> 195 </section>
181 <section name="tbl_opts" expanded="false" title="Table Options"> 196 <section name="tbl_opts" expanded="false" title="Table Options">
182 <param name="table_name" type="text" value="" optional="true" label="Specify Name for Table"> 197 <param name="table_name" type="text" value="" optional="true" label="Specify Name for Table">
183 <help>By default, tables will be named: t1,t2,...,tn (table names must be unique)</help> 198 <help>By default, tables will be named: t1,t2,...,tn (table names must be unique)</help>
184 <validator type="regex" message="Table name should start with a letter and may contain additional letters, digits, and underscores">^[A-Za-z]\w*$</validator> 199 <validator type="regex" message="Table name should start with a letter and may contain additional letters, digits, and underscores">^[A-Za-z]\w*$</validator>
185 </param> 200 </param>
186 <param name="col_names" type="text" value="" optional="true" label="Specify Column Names"> 201 <param name="col_names" type="text" value="" optional="true" label="Specify Column Names (comma-separated list)">
187 <help>By default, table columns will be named: c1,c2,c3,...,cn (column names for a table must be unique)</help> 202 <help>By default, table columns will be named: c1,c2,c3,...,cn (column names for a table must be unique)
203 You can override the default names by entering a comma -separated list of names, e.g. ',name1,,,name2' would rename the second and fifth columns.
204 </help>
188 <sanitizer sanitize="False"/> 205 <sanitizer sanitize="False"/>
189 <validator type="regex" message="A List of names separated by commas: Column names should start with a letter and may contain additional letters, digits, and underscores. Otherwise, the name must be eclosed in: double quotes, back quotes, or square brackets.">^([A-Za-z]\w*|"\S+[^,"]*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])?(,([A-Za-z]\w*|"\S+.*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])?)*$</validator> 206 <validator type="regex" message="A List of names separated by commas: Column names should start with a letter and may contain additional letters, digits, and underscores. Otherwise, the name must be eclosed in: double quotes, back quotes, or square brackets.">^([A-Za-z]\w*|"\S+[^,"]*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])?(,([A-Za-z]\w*|"\S+.*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])?)*$</validator>
190 </param> 207 </param>
191 <param name="load_named_columns" type="boolean" truevalue="load_named_columns" falsevalue="" checked="false" label="Only load the columns you have named into database"/> 208 <param name="load_named_columns" type="boolean" truevalue="load_named_columns" falsevalue="" checked="false" label="Only load the columns you have named into database"/>
192 <param name="pkey_autoincr" type="text" value="" optional="true" label="Add an auto increment primary key column with this name" 209 <param name="pkey_autoincr" type="text" value="" optional="true" label="Add an auto increment primary key column with this name"
282 Loads tabular datasets into a SQLite_ data base. 299 Loads tabular datasets into a SQLite_ data base.
283 300
284 An existing SQLite_ data base can be used as input, and any selected tabular datasets will be added as new tables in that data base. 301 An existing SQLite_ data base can be used as input, and any selected tabular datasets will be added as new tables in that data base.
285 302
286 303
304 **Input Line Filters**
305
306 As a tabular file is being read, line filters may be applied.
307
308 ::
309
310 - skip leading lines skip the first *number* of lines
311 - comment char omit any lines that start with the specified comment character
312 - by regex expression matching *include/exclude* lines the match the regex expression
313 - regex replace value in column replace a field in a column using a regex substitution (good for date reformatting)
314 - prepend a line number column each line has the ordinal value of the line read by this filter as the first column
315 - append a line number column each line has the ordinal value of the line read by this filter as the last column
316 - normalize list columns replicates the line for each item in the specified list *columns*
317
318
287 **Outputs** 319 **Outputs**
288 320
289 The results of a SQL query are output to the history as a tabular file. 321 The results of a SQL query are output to the history as a tabular file.
290 322
291 The SQLite_ data base can also be saved and output as a dataset in the history. 323 The SQLite_ data base can also be saved and output as a dataset in the history.
375 2 Steven Goldfish goldfish@fishhere.net 1974-04-04 323 455-4545 407 2 Steven Goldfish goldfish@fishhere.net 1974-04-04 323 455-4545
376 3 Paula Brown pb@herowndomain.org 1978-05-24 416 323-3232 408 3 Paula Brown pb@herowndomain.org 1978-05-24 416 323-3232
377 4 James Smith jim@supergig.co.uk 1980-10-20 416 323-8888 409 4 James Smith jim@supergig.co.uk 1980-10-20 416 323-8888
378 =========== ========== ========== ===================== ========== ============ 410 =========== ========== ========== ===================== ========== ============
379 411
412
380 Regular_expression_ functions are included for: 413 Regular_expression_ functions are included for:
381 414
382 :: 415 ::
383 416
384 matching: re_match('pattern',column) 417 matching: re_match('pattern',column)
405 SELECT t1.FirstName, t1.LastName, re_sub('^\d{2}(\d{2})-(\d\d)-(\d\d)','\3/\2/\1',BirthDate) as "DOB" 438 SELECT t1.FirstName, t1.LastName, re_sub('^\d{2}(\d{2})-(\d\d)-(\d\d)','\3/\2/\1',BirthDate) as "DOB"
406 FROM t1 439 FROM t1
407 WHERE re_search('[hp]er',c4) 440 WHERE re_search('[hp]er',c4)
408 441
409 Results: 442 Results:
443
410 444
411 =========== ========== ========== 445 =========== ========== ==========
412 #FirstName LastName DOB 446 #FirstName LastName DOB
413 =========== ========== ========== 447 =========== ========== ==========
414 Steven Goldfish 04/04/74 448 Steven Goldfish 04/04/74
415 Paula Brown 24/05/78 449 Paula Brown 24/05/78
416 James Smith 20/10/80 450 James Smith 20/10/80
417 =========== ========== ========== 451 =========== ========== ==========
418 452
453
454 **Line Filtering Example**
455 *(Six filters are applied as the following file is read)*
456
457 ::
458
459 Input Tabular File:
460
461 #People with pets
462 Pets FirstName LastName DOB PetNames PetType
463 2 Paula Brown 24/05/78 Rex,Fluff dog,cat
464 1 Steven Jones 04/04/74 Allie cat
465 0 Jane Doe 24/05/78
466 1 James Smith 20/10/80 Spot
467
468
469 Filter 1 - append a line number column:
470
471 #People with pets 1
472 Pets FirstName LastName DOB PetNames PetType 2
473 2 Paula Brown 24/05/78 Rex,Fluff dog,cat 3
474 1 Steven Jones 04/04/74 Allie cat 4
475 0 Jane Doe 24/05/78 5
476 1 James Smith 20/10/80 Spot 6
477
478 Filter 2 - by regex expression matching [include]: '^\d+' (include lines that start with a number)
479
480 2 Paula Brown 24/05/78 Rex,Fluff dog,cat 3
481 1 Steven Jones 04/04/74 Allie cat 4
482 0 Jane Doe 24/05/78 5
483 1 James Smith 20/10/80 Spot 6
484
485 Filter 3 - append a line number column:
486
487 2 Paula Brown 24/05/78 Rex,Fluff dog,cat 3 1
488 1 Steven Jones 04/04/74 Allie cat 4 2
489 0 Jane Doe 24/05/78 5 3
490 1 James Smith 20/10/80 Spot 6 4
491
492 Filter 4 - regex replace value in column[4]: '(\d+)/(\d+)/(\d+)' '19\3-\2-\1' (convert dates to sqlite format)
493
494 2 Paula Brown 1978-05-24 Rex,Fluff dog,cat 3 1
495 1 Steven Jones 1974-04-04 Allie cat 4 2
496 0 Jane Doe 1978-05-24 5 3
497 1 James Smith 1980-10-20 Spot 6 4
498
499 Filter 5 - normalize list columns[5,6]:
500
501 2 Paula Brown 1978-05-24 Rex dog 3 1
502 2 Paula Brown 1978-05-24 Fluff cat 3 1
503 1 Steven Jones 1974-04-04 Allie cat 4 2
504 0 Jane Doe 1978-05-24 5 3
505 1 James Smith 1980-10-20 Spot 6 4
506
507 Filter 6 - append a line number column:
508
509 2 Paula Brown 1978-05-24 Rex dog 3 1 1
510 2 Paula Brown 1978-05-24 Fluff cat 3 1 2
511 1 Steven Jones 1974-04-04 Allie cat 4 2 3
512 0 Jane Doe 1978-05-24 5 3 4
513 1 James Smith 1980-10-20 Spot 6 4 5
514
515
516 Table name: pets
517
518 Table columns: Pets,FirstName,LastName,Birthdate,PetNames,PetType,line_num,entry_num,row_num
519
520 Query: SELECT * FROM pets
521
522 Result:
523
524 ===== ========= ======== ========== ======== ======= ======== ========= =======
525 #Pets FirstName LastName Brithdate PetNames PetType line_num entry_num row_num
526 ===== ========= ======== ========== ======== ======= ======== ========= =======
527 2 Paula Brown 1978-05-24 Rex dog 3 1 1
528 2 Paula Brown 1978-05-24 Fluff cat 3 1 2
529 1 Steven Jones 1974-04-04 Allie cat 4 2 3
530 0 Jane Doe 1978-05-24 5 3 4
531 1 James Smith 1980-10-20 Spot 6 4 5
532 ===== ========= ======== ========== ======== ======= ======== ========= =======
533
534
419 .. _Regular_expression: https://docs.python.org/release/2.7/library/re.html 535 .. _Regular_expression: https://docs.python.org/release/2.7/library/re.html
420 .. _SQLite: http://www.sqlite.org/index.html 536 .. _SQLite: http://www.sqlite.org/index.html
421 537
422 ]]></help> 538 ]]></help>
423 </tool> 539 </tool>