Mercurial > repos > jjohnson > query_tabular
comparison query_tabular.xml @ 11:fd16243931d6 draft
Uploaded
author | jjohnson |
---|---|
date | Fri, 12 May 2017 10:18:42 -0400 |
parents | ae27dab228b8 |
children | 3003fe70f297 |
comparison
equal
deleted
inserted
replaced
10:e84d1c3bf4fe | 11:fd16243931d6 |
---|---|
1 <tool id="query_tabular" name="Query Tabular" version="2.0.0"> | 1 <tool id="query_tabular" name="Query Tabular" version="3.0.0"> |
2 <description>using sqlite sql</description> | 2 <description>using sqlite sql</description> |
3 | 3 |
4 <requirements> | 4 <requirements> |
5 </requirements> | 5 </requirements> |
6 <stdio> | 6 <stdio> |
77 #set $jtbl['index'] = $idx_non | 77 #set $jtbl['index'] = $idx_non |
78 #end if | 78 #end if |
79 #set $input_filters = [] | 79 #set $input_filters = [] |
80 #for $fi in $tbl.input_opts.linefilters: | 80 #for $fi in $tbl.input_opts.linefilters: |
81 #if $fi.filter.filter_type == 'skip': | 81 #if $fi.filter.filter_type == 'skip': |
82 #set $skip_lines = None | |
82 #if str($fi.filter.skip_lines) != '': | 83 #if str($fi.filter.skip_lines) != '': |
83 #set $jtbl['comment_lines'] = int($fi.filter.skip_lines) | 84 #set $skip_lines = int($fi.filter.skip_lines) |
84 #elif $tbl.table.metadata.comment_lines and $tbl.table.metadata.comment_lines > 0: | 85 #elif $tbl.table.metadata.comment_lines and $tbl.table.metadata.comment_lines > 0: |
85 #set $jtbl['comment_lines'] = int($tbl.table.metadata.comment_lines) | 86 #set $skip_lines = int($tbl.table.metadata.comment_lines) |
87 #end if | |
88 #if $skip_lines is not None: | |
89 #set $filter_dict = dict() | |
90 #set $filter_dict['filter'] = str($fi.filter.filter_type) | |
91 #set $filter_dict['count'] = $skip_lines | |
92 #silent $input_filters.append($filter_dict) | |
86 #end if | 93 #end if |
87 #elif $fi.filter.filter_type == 'comment': | 94 #elif $fi.filter.filter_type == 'comment': |
88 #set $jtbl['comment_char'] = str($fi.filter.comment_char) | 95 #set $filter_dict = dict() |
96 #set $filter_dict['filter'] = 'regex' | |
97 #set $filter_dict['pattern'] = '^' + str($fi.filter.comment_char) | |
98 #set $filter_dict['action'] = 'exclude' | |
99 #silent $input_filters.append($filter_dict) | |
89 #elif $fi.filter.filter_type == 'regex': | 100 #elif $fi.filter.filter_type == 'regex': |
90 #set $filter_dict = dict() | 101 #set $filter_dict = dict() |
91 #set $filter_dict['filter'] = str($fi.filter.filter_type) | 102 #set $filter_dict['filter'] = str($fi.filter.filter_type) |
92 #set $filter_dict['pattern'] = str($fi.filter.regex_pattern) | 103 #set $filter_dict['pattern'] = str($fi.filter.regex_pattern) |
93 #set $filter_dict['action'] = str($fi.filter.regex_action) | 104 #set $filter_dict['action'] = str($fi.filter.regex_action) |
97 #set $filter_dict['filter'] = str($fi.filter.filter_type) | 108 #set $filter_dict['filter'] = str($fi.filter.filter_type) |
98 #set $filter_dict['column'] = int(str($fi.filter.column)) | 109 #set $filter_dict['column'] = int(str($fi.filter.column)) |
99 #set $filter_dict['pattern'] = str($fi.filter.regex_pattern) | 110 #set $filter_dict['pattern'] = str($fi.filter.regex_pattern) |
100 #set $filter_dict['replace'] = str($fi.filter.regex_replace) | 111 #set $filter_dict['replace'] = str($fi.filter.regex_replace) |
101 #silent $input_filters.append($filter_dict) | 112 #silent $input_filters.append($filter_dict) |
102 ## #elif $fi.filter.filter_type == 'normalize': | 113 #elif str($fi.filter.filter_type).endswith('pend_line_num'): |
103 ## #set $filter_dict = dict() | 114 #set $filter_dict = dict() |
104 ## #set $filter_dict['filter'] = str($fi.filter.filter_type) | 115 #set $filter_dict['filter'] = str($fi.filter.filter_type) |
105 ## #set $filter_dict['columns'] = [int(str($ci)) for $ci in str($fi.filter.columns).split(',')] | 116 #silent $input_filters.append($filter_dict) |
106 ## #set $filter_dict['separator'] = str($fi.filter.separator) | 117 #elif $fi.filter.filter_type == 'normalize': |
107 ## #silent $input_filters.append($filter_dict) | 118 #set $filter_dict = dict() |
119 #set $filter_dict['filter'] = str($fi.filter.filter_type) | |
120 #set $filter_dict['columns'] = [int(str($ci)) for $ci in str($fi.filter.columns).split(',')] | |
121 #set $filter_dict['separator'] = str($fi.filter.separator) | |
122 #silent $input_filters.append($filter_dict) | |
108 #end if | 123 #end if |
109 #end for | 124 #end for |
110 #if $input_filters: | 125 #if $input_filters: |
111 #set $jtbl['filters'] = $input_filters | 126 #set $jtbl['filters'] = $input_filters |
112 #end if | 127 #end if |
129 <param name="filter_type" type="select" label="Filter By"> | 144 <param name="filter_type" type="select" label="Filter By"> |
130 <option value="skip">skip leading lines</option> | 145 <option value="skip">skip leading lines</option> |
131 <option value="comment">comment char</option> | 146 <option value="comment">comment char</option> |
132 <option value="regex">by regex expression matching</option> | 147 <option value="regex">by regex expression matching</option> |
133 <option value="replace">regex replace value in column</option> | 148 <option value="replace">regex replace value in column</option> |
134 <!-- | 149 <option value="prepend_line_num">prepend a line number column</option> |
150 <option value="append_line_num">append a line number column</option> | |
135 <option value="normalize">normalize list columns, replicates row for each item in list</option> | 151 <option value="normalize">normalize list columns, replicates row for each item in list</option> |
136 --> | |
137 </param> | 152 </param> |
138 <when value="skip"> | 153 <when value="skip"> |
139 <param name="skip_lines" type="integer" value="" min="0" optional="true" label="Skip lines" | 154 <param name="skip_lines" type="integer" value="" min="0" optional="true" label="Skip lines" |
140 help="Leave blank to use the comment lines metadata for this dataset" /> | 155 help="Leave blank to use the comment lines metadata for this dataset" /> |
141 </when> | 156 </when> |
142 <when value="comment"> | 157 <when value="comment"> |
143 <param name="comment_char" type="text" value="#" label="Comment line starting text"> | 158 <param name="comment_char" type="text" value="#" label="Comment line starting text"> |
144 <sanitizer sanitize="False"/> | 159 <sanitizer sanitize="False"/> |
145 </param> | 160 </param> |
146 </when> | 161 </when> |
162 <when value="prepend_line_num"/> | |
163 <when value="append_line_num"/> | |
147 <when value="regex"> | 164 <when value="regex"> |
148 <param name="regex_pattern" type="text" value="" label="regex pattern"> | 165 <param name="regex_pattern" type="text" value="" label="regex pattern"> |
149 <sanitizer sanitize="False"/> | 166 <sanitizer sanitize="False"/> |
150 </param> | 167 </param> |
151 <param name="regex_action" type="select" label="action for regex match"> | 168 <param name="regex_action" type="select" label="action for regex match"> |
163 </param> | 180 </param> |
164 <param name="regex_replace" type="text" value="" label="replacement expression"> | 181 <param name="regex_replace" type="text" value="" label="replacement expression"> |
165 <sanitizer sanitize="False"/> | 182 <sanitizer sanitize="False"/> |
166 </param> | 183 </param> |
167 </when> | 184 </when> |
168 <!-- | |
169 <when value="normalize"> | 185 <when value="normalize"> |
170 <param name="columns" type="data_column" data_ref="table" multiple="True" label="Columns to split" | 186 <param name="columns" type="data_column" data_ref="table" multiple="True" label="Columns to split" |
171 help=""/> | 187 help="If multiple columns are selected, they should have the same length and separator on each line"/> |
172 <param name="separator" type="text" value="," label="List item delimiter in column"> | 188 <param name="separator" type="text" value="," label="List item delimiter in column"> |
173 <sanitizer sanitize="False"/> | 189 <sanitizer sanitize="False"/> |
174 <validator type="regex" message="Anything but TAB or Newline">^[^\t\n\r\f\v]+$</validator> | 190 <validator type="regex" message="Anything but TAB or Newline">^[^\t\n\r\f\v]+$</validator> |
175 </param> | 191 </param> |
176 </when> | 192 </when> |
177 --> | |
178 </conditional> | 193 </conditional> |
179 </repeat> | 194 </repeat> |
180 </section> | 195 </section> |
181 <section name="tbl_opts" expanded="false" title="Table Options"> | 196 <section name="tbl_opts" expanded="false" title="Table Options"> |
182 <param name="table_name" type="text" value="" optional="true" label="Specify Name for Table"> | 197 <param name="table_name" type="text" value="" optional="true" label="Specify Name for Table"> |
183 <help>By default, tables will be named: t1,t2,...,tn (table names must be unique)</help> | 198 <help>By default, tables will be named: t1,t2,...,tn (table names must be unique)</help> |
184 <validator type="regex" message="Table name should start with a letter and may contain additional letters, digits, and underscores">^[A-Za-z]\w*$</validator> | 199 <validator type="regex" message="Table name should start with a letter and may contain additional letters, digits, and underscores">^[A-Za-z]\w*$</validator> |
185 </param> | 200 </param> |
186 <param name="col_names" type="text" value="" optional="true" label="Specify Column Names"> | 201 <param name="col_names" type="text" value="" optional="true" label="Specify Column Names (comma-separated list)"> |
187 <help>By default, table columns will be named: c1,c2,c3,...,cn (column names for a table must be unique)</help> | 202 <help>By default, table columns will be named: c1,c2,c3,...,cn (column names for a table must be unique) |
203 You can override the default names by entering a comma -separated list of names, e.g. ',name1,,,name2' would rename the second and fifth columns. | |
204 </help> | |
188 <sanitizer sanitize="False"/> | 205 <sanitizer sanitize="False"/> |
189 <validator type="regex" message="A List of names separated by commas: Column names should start with a letter and may contain additional letters, digits, and underscores. Otherwise, the name must be eclosed in: double quotes, back quotes, or square brackets.">^([A-Za-z]\w*|"\S+[^,"]*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])?(,([A-Za-z]\w*|"\S+.*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])?)*$</validator> | 206 <validator type="regex" message="A List of names separated by commas: Column names should start with a letter and may contain additional letters, digits, and underscores. Otherwise, the name must be eclosed in: double quotes, back quotes, or square brackets.">^([A-Za-z]\w*|"\S+[^,"]*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])?(,([A-Za-z]\w*|"\S+.*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])?)*$</validator> |
190 </param> | 207 </param> |
191 <param name="load_named_columns" type="boolean" truevalue="load_named_columns" falsevalue="" checked="false" label="Only load the columns you have named into database"/> | 208 <param name="load_named_columns" type="boolean" truevalue="load_named_columns" falsevalue="" checked="false" label="Only load the columns you have named into database"/> |
192 <param name="pkey_autoincr" type="text" value="" optional="true" label="Add an auto increment primary key column with this name" | 209 <param name="pkey_autoincr" type="text" value="" optional="true" label="Add an auto increment primary key column with this name" |
282 Loads tabular datasets into a SQLite_ data base. | 299 Loads tabular datasets into a SQLite_ data base. |
283 | 300 |
284 An existing SQLite_ data base can be used as input, and any selected tabular datasets will be added as new tables in that data base. | 301 An existing SQLite_ data base can be used as input, and any selected tabular datasets will be added as new tables in that data base. |
285 | 302 |
286 | 303 |
304 **Input Line Filters** | |
305 | |
306 As a tabular file is being read, line filters may be applied. | |
307 | |
308 :: | |
309 | |
310 - skip leading lines skip the first *number* of lines | |
311 - comment char omit any lines that start with the specified comment character | |
312 - by regex expression matching *include/exclude* lines the match the regex expression | |
313 - regex replace value in column replace a field in a column using a regex substitution (good for date reformatting) | |
314 - prepend a line number column each line has the ordinal value of the line read by this filter as the first column | |
315 - append a line number column each line has the ordinal value of the line read by this filter as the last column | |
316 - normalize list columns replicates the line for each item in the specified list *columns* | |
317 | |
318 | |
287 **Outputs** | 319 **Outputs** |
288 | 320 |
289 The results of a SQL query are output to the history as a tabular file. | 321 The results of a SQL query are output to the history as a tabular file. |
290 | 322 |
291 The SQLite_ data base can also be saved and output as a dataset in the history. | 323 The SQLite_ data base can also be saved and output as a dataset in the history. |
375 2 Steven Goldfish goldfish@fishhere.net 1974-04-04 323 455-4545 | 407 2 Steven Goldfish goldfish@fishhere.net 1974-04-04 323 455-4545 |
376 3 Paula Brown pb@herowndomain.org 1978-05-24 416 323-3232 | 408 3 Paula Brown pb@herowndomain.org 1978-05-24 416 323-3232 |
377 4 James Smith jim@supergig.co.uk 1980-10-20 416 323-8888 | 409 4 James Smith jim@supergig.co.uk 1980-10-20 416 323-8888 |
378 =========== ========== ========== ===================== ========== ============ | 410 =========== ========== ========== ===================== ========== ============ |
379 | 411 |
412 | |
380 Regular_expression_ functions are included for: | 413 Regular_expression_ functions are included for: |
381 | 414 |
382 :: | 415 :: |
383 | 416 |
384 matching: re_match('pattern',column) | 417 matching: re_match('pattern',column) |
405 SELECT t1.FirstName, t1.LastName, re_sub('^\d{2}(\d{2})-(\d\d)-(\d\d)','\3/\2/\1',BirthDate) as "DOB" | 438 SELECT t1.FirstName, t1.LastName, re_sub('^\d{2}(\d{2})-(\d\d)-(\d\d)','\3/\2/\1',BirthDate) as "DOB" |
406 FROM t1 | 439 FROM t1 |
407 WHERE re_search('[hp]er',c4) | 440 WHERE re_search('[hp]er',c4) |
408 | 441 |
409 Results: | 442 Results: |
443 | |
410 | 444 |
411 =========== ========== ========== | 445 =========== ========== ========== |
412 #FirstName LastName DOB | 446 #FirstName LastName DOB |
413 =========== ========== ========== | 447 =========== ========== ========== |
414 Steven Goldfish 04/04/74 | 448 Steven Goldfish 04/04/74 |
415 Paula Brown 24/05/78 | 449 Paula Brown 24/05/78 |
416 James Smith 20/10/80 | 450 James Smith 20/10/80 |
417 =========== ========== ========== | 451 =========== ========== ========== |
418 | 452 |
453 | |
454 **Line Filtering Example** | |
455 *(Six filters are applied as the following file is read)* | |
456 | |
457 :: | |
458 | |
459 Input Tabular File: | |
460 | |
461 #People with pets | |
462 Pets FirstName LastName DOB PetNames PetType | |
463 2 Paula Brown 24/05/78 Rex,Fluff dog,cat | |
464 1 Steven Jones 04/04/74 Allie cat | |
465 0 Jane Doe 24/05/78 | |
466 1 James Smith 20/10/80 Spot | |
467 | |
468 | |
469 Filter 1 - append a line number column: | |
470 | |
471 #People with pets 1 | |
472 Pets FirstName LastName DOB PetNames PetType 2 | |
473 2 Paula Brown 24/05/78 Rex,Fluff dog,cat 3 | |
474 1 Steven Jones 04/04/74 Allie cat 4 | |
475 0 Jane Doe 24/05/78 5 | |
476 1 James Smith 20/10/80 Spot 6 | |
477 | |
478 Filter 2 - by regex expression matching [include]: '^\d+' (include lines that start with a number) | |
479 | |
480 2 Paula Brown 24/05/78 Rex,Fluff dog,cat 3 | |
481 1 Steven Jones 04/04/74 Allie cat 4 | |
482 0 Jane Doe 24/05/78 5 | |
483 1 James Smith 20/10/80 Spot 6 | |
484 | |
485 Filter 3 - append a line number column: | |
486 | |
487 2 Paula Brown 24/05/78 Rex,Fluff dog,cat 3 1 | |
488 1 Steven Jones 04/04/74 Allie cat 4 2 | |
489 0 Jane Doe 24/05/78 5 3 | |
490 1 James Smith 20/10/80 Spot 6 4 | |
491 | |
492 Filter 4 - regex replace value in column[4]: '(\d+)/(\d+)/(\d+)' '19\3-\2-\1' (convert dates to sqlite format) | |
493 | |
494 2 Paula Brown 1978-05-24 Rex,Fluff dog,cat 3 1 | |
495 1 Steven Jones 1974-04-04 Allie cat 4 2 | |
496 0 Jane Doe 1978-05-24 5 3 | |
497 1 James Smith 1980-10-20 Spot 6 4 | |
498 | |
499 Filter 5 - normalize list columns[5,6]: | |
500 | |
501 2 Paula Brown 1978-05-24 Rex dog 3 1 | |
502 2 Paula Brown 1978-05-24 Fluff cat 3 1 | |
503 1 Steven Jones 1974-04-04 Allie cat 4 2 | |
504 0 Jane Doe 1978-05-24 5 3 | |
505 1 James Smith 1980-10-20 Spot 6 4 | |
506 | |
507 Filter 6 - append a line number column: | |
508 | |
509 2 Paula Brown 1978-05-24 Rex dog 3 1 1 | |
510 2 Paula Brown 1978-05-24 Fluff cat 3 1 2 | |
511 1 Steven Jones 1974-04-04 Allie cat 4 2 3 | |
512 0 Jane Doe 1978-05-24 5 3 4 | |
513 1 James Smith 1980-10-20 Spot 6 4 5 | |
514 | |
515 | |
516 Table name: pets | |
517 | |
518 Table columns: Pets,FirstName,LastName,Birthdate,PetNames,PetType,line_num,entry_num,row_num | |
519 | |
520 Query: SELECT * FROM pets | |
521 | |
522 Result: | |
523 | |
524 ===== ========= ======== ========== ======== ======= ======== ========= ======= | |
525 #Pets FirstName LastName Brithdate PetNames PetType line_num entry_num row_num | |
526 ===== ========= ======== ========== ======== ======= ======== ========= ======= | |
527 2 Paula Brown 1978-05-24 Rex dog 3 1 1 | |
528 2 Paula Brown 1978-05-24 Fluff cat 3 1 2 | |
529 1 Steven Jones 1974-04-04 Allie cat 4 2 3 | |
530 0 Jane Doe 1978-05-24 5 3 4 | |
531 1 James Smith 1980-10-20 Spot 6 4 5 | |
532 ===== ========= ======== ========== ======== ======= ======== ========= ======= | |
533 | |
534 | |
419 .. _Regular_expression: https://docs.python.org/release/2.7/library/re.html | 535 .. _Regular_expression: https://docs.python.org/release/2.7/library/re.html |
420 .. _SQLite: http://www.sqlite.org/index.html | 536 .. _SQLite: http://www.sqlite.org/index.html |
421 | 537 |
422 ]]></help> | 538 ]]></help> |
423 </tool> | 539 </tool> |