Mercurial > repos > jjohnson > query_tabular
changeset 18:b9f797bf4f38 draft
Uploaded
author | jjohnson |
---|---|
date | Wed, 05 Jul 2017 11:51:15 -0400 |
parents | c6e6e224509c |
children | 9d9ab2c69014 |
files | ._query_tabular.py query_tabular.py query_tabular.xml test-data/._pet_normalized_query_results.tsv test-data/pet_normalized_query_results.tsv |
diffstat | 5 files changed, 110 insertions(+), 14 deletions(-) [+] |
line wrap: on
line diff
--- a/query_tabular.py Thu Jun 22 09:15:36 2017 -0400 +++ b/query_tabular.py Wed Jul 05 11:51:15 2017 -0400 @@ -91,6 +91,12 @@ self.func = lambda i,l: '%d\t%s' % (i,l) elif filter_dict['filter'] == 'append_line_num': self.func = lambda i,l: '%s\t%d' % (l.rstrip('\r\n'),i) + elif filter_dict['filter'] == 'prepend_text': + s = filter_dict['column_text'] + self.func = lambda i,l: '%s\t%s' % (s,l) + elif filter_dict['filter'] == 'append_text': + s = filter_dict['column_text'] + self.func = lambda i,l: '%s\t%s' % (l.rstrip('\r\n'),s) elif filter_dict['filter'] == 'skip': cnt = filter_dict['count'] self.func = lambda i,l: l if i > cnt else None
--- a/query_tabular.xml Thu Jun 22 09:15:36 2017 -0400 +++ b/query_tabular.xml Wed Jul 05 11:51:15 2017 -0400 @@ -1,4 +1,4 @@ -<tool id="query_tabular" name="Query Tabular" version="3.1.0"> +<tool id="query_tabular" name="Query Tabular" version="4.0.0"> <description>using sqlite sql</description> <requirements> @@ -94,8 +94,8 @@ #elif $fi.filter.filter_type == 'comment': #set $filter_dict = dict() #set $filter_dict['filter'] = 'regex' - #set $filter_dict['pattern'] = '^' + str($fi.filter.comment_char) - #set $filter_dict['action'] = 'exclude' + #set $filter_dict['pattern'] = '^(%s).*$' % '|'.join([chr(int(x)).replace('|','[|]') for x in (str($fi.filter.comment_char)).split(',')]) + #set $filter_dict['action'] = 'exclude_match' #silent $input_filters.append($filter_dict) #elif $fi.filter.filter_type == 'regex': #set $filter_dict = dict() @@ -106,12 +106,12 @@ #elif $fi.filter.filter_type == 'select_columns': #set $filter_dict = dict() #set $filter_dict['filter'] = str($fi.filter.filter_type) - #set $filter_dict['columns'] = [int(str($ci)) for $ci in str($fi.filter.columns).split(',')] + #set $filter_dict['columns'] = [int(str($ci).replace('c','')) for $ci in str($fi.filter.columns).split(',')] #silent $input_filters.append($filter_dict) #elif $fi.filter.filter_type == 'replace': #set $filter_dict = dict() #set $filter_dict['filter'] = str($fi.filter.filter_type) - #set $filter_dict['column'] = int(str($fi.filter.column)) + #set $filter_dict['column'] = int(str($fi.filter.column).replace('c','')) #set $filter_dict['pattern'] = str($fi.filter.regex_pattern) #set $filter_dict['replace'] = str($fi.filter.regex_replace) #silent $input_filters.append($filter_dict) @@ -119,10 +119,15 @@ #set $filter_dict = dict() #set $filter_dict['filter'] = str($fi.filter.filter_type) #silent $input_filters.append($filter_dict) + #elif str($fi.filter.filter_type).endswith('pend_text'): + #set $filter_dict = dict() + #set $filter_dict['filter'] = str($fi.filter.filter_type) + #set $filter_dict['column_text'] = str($fi.filter.column_text) + #silent $input_filters.append($filter_dict) #elif $fi.filter.filter_type == 'normalize': #set $filter_dict = dict() #set $filter_dict['filter'] = str($fi.filter.filter_type) - #set $filter_dict['columns'] = [int(str($ci)) for $ci in str($fi.filter.columns).split(',')] + #set $filter_dict['columns'] = [int(str($ci).replace('c','')) for $ci in str($fi.filter.columns).split(',')] #set $filter_dict['separator'] = str($fi.filter.separator) #silent $input_filters.append($filter_dict) #end if @@ -154,6 +159,8 @@ <option value="replace">regex replace value in column</option> <option value="prepend_line_num">prepend a line number column</option> <option value="append_line_num">append a line number column</option> + <option value="prepend_text">prepend a column with the given text</option> + <option value="append_text">append a column with the given text</option> <option value="normalize">normalize list columns, replicates row for each item in list</option> </param> <when value="skip"> @@ -161,12 +168,36 @@ help="Leave blank to use the comment lines metadata for this dataset" /> </when> <when value="comment"> - <param name="comment_char" type="text" value="#" label="Comment line starting text"> - <sanitizer sanitize="False"/> + <param name="comment_char" type="select" display="checkboxes" multiple="True" label="Ignore lines beginning with these characters" help="lines beginning with these are skipped"> + <option value="62">></option> + <option value="64">@</option> + <option value="43">+</option> + <option value="60"><</option> + <option value="42">*</option> + <option value="45">-</option> + <option value="61">=</option> + <option value="124">|</option> + <option value="63">?</option> + <option value="36">$</option> + <option value="46">.</option> + <option value="58">:</option> + <option value="38">&</option> + <option value="37">%</option> + <option value="94">^</option> + <option value="35">#</option> + <option value="33">!</option> </param> </when> <when value="prepend_line_num"/> <when value="append_line_num"/> + <when value="prepend_text"> + <param name="column_text" type="text" value="" label="text for column"> + </param> + </when> + <when value="append_text"> + <param name="column_text" type="text" value="" label="text for column"> + </param> + </when> <when value="regex"> <param name="regex_pattern" type="text" value="" label="regex pattern"> <sanitizer sanitize="False"/> @@ -180,13 +211,15 @@ </when> <when value="select_columns"> <param name="columns" type="text" value="" label="enter column numbers to keep" - help="example: 1,4,2 (selects the first,fourth, and second columns)"> - <validator type="regex" message="Column ordinal positions separated by commas">^([1-9]\d*)(,[1-9]\d*)*$</validator> + help="example: 1,4,2 or c1,c4,c2(selects the first,fourth, and second columns)"> + <validator type="regex" message="Column ordinal positions separated by commas">^(c?[1-9]\d*)(,c?[1-9]\d*)*$</validator> </param> </when> <when value="replace"> - <param name="column" type="data_column" data_ref="table" label="Column to replace text" - help=""/> + <param name="column" type="text" value="" label="enter column number to replace" + help="example: 1 or c1 (selects the first column)"> + <validator type="regex" message="Column ordinal position separated by commas">^(c?[1-9]\d*)$</validator> + </param> <param name="regex_pattern" type="text" value="" label="regex pattern"> <sanitizer sanitize="False"/> </param> @@ -195,8 +228,13 @@ </param> </when> <when value="normalize"> - <param name="columns" type="data_column" data_ref="table" multiple="True" label="Columns to split" - help="If multiple columns are selected, they should have the same length and separator on each line"/> + <param name="columns" type="text" value="" label="enter column numbers to normalize"> + <help><![CDATA[ + example: 2,4 or c2,c4 (selects the second, and fourth columns) + If multiple columns are selected, they should have the same length and separator on each line + ]]></help> + <validator type="regex" message="Column ordinal positions separated by commas">^(c?[1-9]\d*)(,c?[1-9]\d*)*$</validator> + </param> <param name="separator" type="text" value="," label="List item delimiter in column"> <sanitizer sanitize="False"/> <validator type="regex" message="Anything but TAB or Newline">^[^\t\n\r\f\v]+$</validator> @@ -300,6 +338,55 @@ <output name="output" file="query_results.tsv"/> </test> + <test> + <repeat name="tables"> + <param name="table" ftype="tabular" value="pets.tsv"/> + <repeat name="linefilters"> + <param name="filter_type" value="comment"/> + <param name="comment_char" value="35"/> + </repeat> + <repeat name="linefilters"> + <param name="filter_type" value="append_line_num"/> + </repeat> + <repeat name="linefilters"> + <param name="filter_type" value="select_columns"/> + <param name="columns" value="7,2,3,4,1"/> + </repeat> + <repeat name="linefilters"> + <param name="filter_type" value="replace"/> + <param name="column" value="c4"/> + <param name="regex_pattern" value="(\d+)/(\d+)/(\d+)"/> + <param name="regex_replace" value="19\3-\2-\1"/> + </repeat> + <param name="table_name" value="people"/> + <param name="col_names" value="id,first,last,dob,pets"/> + + </repeat> + <repeat name="tables"> + <param name="table" ftype="tabular" value="pets.tsv"/> + <repeat name="linefilters"> + <param name="filter_type" value="comment"/> + <param name="comment_char" value="35"/> + </repeat> + <repeat name="linefilters"> + <param name="filter_type" value="append_line_num"/> + </repeat> + <repeat name="linefilters"> + <param name="filter_type" value="select_columns"/> + <param name="columns" value="c7,c5,c6"/> + </repeat> + <repeat name="linefilters"> + <param name="filter_type" value="normalize"/> + <param name="columns" value="c2,c3"/> + <param name="separator" value=","/> + </repeat> + <param name="table_name" value="pet"/> + <param name="col_names" value="id,name,animal"/> + </repeat> + <param name="sqlquery" value="SELECT people.id,first,last,dob,name,animal,pets FROM people JOIN pet ON people.id = pet.id WHERE animal = 'cat'"/> + <output name="output" file="pet_normalized_query_results.tsv"/> + </test> + </tests> <help><![CDATA[ =============