Previous changeset 10:e84d1c3bf4fe (2017-03-02) Next changeset 12:a6aec10e5dbf (2017-05-12) |
Commit message:
Uploaded |
modified:
query_tabular.py query_tabular.xml |
added:
._query_tabular.py ._query_tabular.xml test-data/pets.tsv |
b |
diff -r e84d1c3bf4fe -r fd16243931d6 ._query_tabular.py |
b |
Binary file ._query_tabular.py has changed |
b |
diff -r e84d1c3bf4fe -r fd16243931d6 ._query_tabular.xml |
b |
Binary file ._query_tabular.xml has changed |
b |
diff -r e84d1c3bf4fe -r fd16243931d6 query_tabular.py --- a/query_tabular.py Thu Mar 02 13:43:25 2017 -0500 +++ b/query_tabular.py Fri May 12 10:18:42 2017 -0400 |
[ |
@@ -64,31 +64,71 @@ self.source = source self.filter_dict = filter_dict # print >> sys.stderr, 'LineFilter %s' % filter_dict if filter_dict else 'NONE' - self.func = lambda l: l.rstrip('\r\n') if l else None + self.func = lambda i,l: l.rstrip('\r\n') if l else None + self.src_lines = [] + self.src_line_cnt = 0 if not filter_dict: return if filter_dict['filter'] == 'regex': rgx = re.compile(filter_dict['pattern']) if filter_dict['action'] == 'exclude_match': - self.func = lambda l: l if not rgx.match(l) else None + self.func = lambda i,l: l if not rgx.match(l) else None elif filter_dict['action'] == 'include_match': - self.func = lambda l: l if rgx.match(l) else None + self.func = lambda i,l: l if rgx.match(l) else None elif filter_dict['action'] == 'exclude_find': - self.func = lambda l: l if not rgx.search(l) else None + self.func = lambda i,l: l if not rgx.search(l) else None elif filter_dict['action'] == 'include_find': - self.func = lambda l: l if rgx.search(l) else None + self.func = lambda i,l: l if rgx.search(l) else None elif filter_dict['filter'] == 'replace': p = filter_dict['pattern'] r = filter_dict['replace'] c = int(filter_dict['column']) - 1 - self.func = lambda l: '\t'.join([x if i != c else re.sub(p,r,x) for i,x in enumerate(l.split('\t'))]) + self.func = lambda i,l: '\t'.join([x if i != c else re.sub(p,r,x) for i,x in enumerate(l.split('\t'))]) + elif filter_dict['filter'] == 'prepend_line_num': + self.func = lambda i,l: '%d\t%s' % (i,l) + elif filter_dict['filter'] == 'append_line_num': + self.func = lambda i,l: '%s\t%d' % (l.rstrip('\r\n'),i) + elif filter_dict['filter'] == 'skip': + cnt = filter_dict['count'] + self.func = lambda i,l: l if i > cnt else None + elif filter_dict['filter'] == 'normalize': + cols = [int(c) - 1 for c in filter_dict['columns']] + sep = filter_dict['separator'] + self.func = lambda i,l: self.normalize(l,cols,sep) def __iter__(self): return self - def next(self): + def normalize(self,line,split_cols,sep): + lines = [] + fields = line.rstrip('\r\n').split('\t') + split_fields = dict() + cnt = 0 + for c in split_cols: + if c < len(fields): + split_fields[c] = fields[c].split(sep) + cnt = max(cnt, len(split_fields[c])) + if cnt == 0: + lines.append('\t'.join(fields)) + else: + for n in range(0, cnt): + flds = [x if c not in split_cols else split_fields[c][n] if n < len(split_fields[c]) else '' for (c, x) in enumerate(fields)] + lines.append('\t'.join(flds)) + return lines + def get_lines(self): for i,next_line in enumerate(self.source): - line = self.func(next_line) + self.src_line_cnt += 1 + line = self.func(self.src_line_cnt,next_line) + # print >> sys.stderr, 'LineFilter %s: %d %s' % (str(self.filter_dict),self.src_line_cnt,line) if line: - return line + if isinstance(line,list): + self.src_lines.extend(line) + else: + self.src_lines.append(line) + return + def next(self): + if not self.src_lines: + self.get_lines() + if self.src_lines: + return self.src_lines.pop(0) raise StopIteration |
b |
diff -r e84d1c3bf4fe -r fd16243931d6 query_tabular.xml --- a/query_tabular.xml Thu Mar 02 13:43:25 2017 -0500 +++ b/query_tabular.xml Fri May 12 10:18:42 2017 -0400 |
[ |
b'@@ -1,4 +1,4 @@\n-<tool id="query_tabular" name="Query Tabular" version="2.0.0">\n+<tool id="query_tabular" name="Query Tabular" version="3.0.0">\n <description>using sqlite sql</description>\n \n <requirements>\n@@ -79,13 +79,24 @@\n #set $input_filters = []\n #for $fi in $tbl.input_opts.linefilters:\n #if $fi.filter.filter_type == \'skip\':\n+ #set $skip_lines = None\n #if str($fi.filter.skip_lines) != \'\':\n- #set $jtbl[\'comment_lines\'] = int($fi.filter.skip_lines)\n+ #set $skip_lines = int($fi.filter.skip_lines)\n #elif $tbl.table.metadata.comment_lines and $tbl.table.metadata.comment_lines > 0:\n- #set $jtbl[\'comment_lines\'] = int($tbl.table.metadata.comment_lines)\n+ #set $skip_lines = int($tbl.table.metadata.comment_lines)\n+ #end if\n+ #if $skip_lines is not None:\n+ #set $filter_dict = dict()\n+ #set $filter_dict[\'filter\'] = str($fi.filter.filter_type)\n+ #set $filter_dict[\'count\'] = $skip_lines\n+ #silent $input_filters.append($filter_dict)\n #end if\n #elif $fi.filter.filter_type == \'comment\':\n- #set $jtbl[\'comment_char\'] = str($fi.filter.comment_char)\n+ #set $filter_dict = dict()\n+ #set $filter_dict[\'filter\'] = \'regex\'\n+ #set $filter_dict[\'pattern\'] = \'^\' + str($fi.filter.comment_char)\n+ #set $filter_dict[\'action\'] = \'exclude\'\n+ #silent $input_filters.append($filter_dict)\n #elif $fi.filter.filter_type == \'regex\':\n #set $filter_dict = dict()\n #set $filter_dict[\'filter\'] = str($fi.filter.filter_type)\n@@ -99,12 +110,16 @@\n #set $filter_dict[\'pattern\'] = str($fi.filter.regex_pattern)\n #set $filter_dict[\'replace\'] = str($fi.filter.regex_replace)\n #silent $input_filters.append($filter_dict)\n- ## #elif $fi.filter.filter_type == \'normalize\':\n- ## #set $filter_dict = dict()\n- ## #set $filter_dict[\'filter\'] = str($fi.filter.filter_type)\n- ## #set $filter_dict[\'columns\'] = [int(str($ci)) for $ci in str($fi.filter.columns).split(\',\')]\n- ## #set $filter_dict[\'separator\'] = str($fi.filter.separator)\n- ## #silent $input_filters.append($filter_dict)\n+ #elif str($fi.filter.filter_type).endswith(\'pend_line_num\'):\n+ #set $filter_dict = dict()\n+ #set $filter_dict[\'filter\'] = str($fi.filter.filter_type)\n+ #silent $input_filters.append($filter_dict)\n+ #elif $fi.filter.filter_type == \'normalize\':\n+ #set $filter_dict = dict()\n+ #set $filter_dict[\'filter\'] = str($fi.filter.filter_type)\n+ #set $filter_dict[\'columns\'] = [int(str($ci)) for $ci in str($fi.filter.columns).split(\',\')]\n+ #set $filter_dict[\'separator\'] = str($fi.filter.separator)\n+ #silent $input_filters.append($filter_dict)\n #end if\n #end for\n #if $input_filters:\n@@ -131,9 +146,9 @@\n <option value="comment">comment char</option>\n <option value="regex">by regex expression matching</option>\n <option value="replace">regex replace value in column</option>\n- <!--\n+ <option value="prepend_line_num">prepend a line number column</option>\n+ <option value="append_line_num">append a line number column</option>\n <option value="normalize">normalize list columns, replicates row for each item in list</option>\n- -->\n </param>\n <when value="skip">\n <param name="skip_lines" type="integer" value="" min="0" optional="true" label="Skip lines" \n@@ -144,6 +159,8 @@\n <sanitizer sanitize="False"/>\n </param>\n </when>\n+ <when value="prepend_line_num"/>\n+ <when value="append_line_num"/>\n <when value="regex">\n <pa'..b"rstName LastName DOB PetNames PetType\n+ 2 Paula Brown 24/05/78 Rex,Fluff dog,cat\n+ 1 Steven Jones 04/04/74 Allie cat\n+ 0 Jane Doe 24/05/78 \n+ 1 James Smith 20/10/80 Spot\n+\n+\n+ Filter 1 - append a line number column:\n+\n+ #People with pets 1\n+ Pets FirstName LastName DOB PetNames PetType 2\n+ 2 Paula Brown 24/05/78 Rex,Fluff dog,cat 3\n+ 1 Steven Jones 04/04/74 Allie cat 4\n+ 0 Jane Doe 24/05/78 5\n+ 1 James Smith 20/10/80 Spot 6\n+\n+ Filter 2 - by regex expression matching [include]: '^\\d+' (include lines that start with a number) \n+\n+ 2 Paula Brown 24/05/78 Rex,Fluff dog,cat 3\n+ 1 Steven Jones 04/04/74 Allie cat 4\n+ 0 Jane Doe 24/05/78 5\n+ 1 James Smith 20/10/80 Spot 6\n+\n+ Filter 3 - append a line number column:\n+\n+ 2 Paula Brown 24/05/78 Rex,Fluff dog,cat 3 1\n+ 1 Steven Jones 04/04/74 Allie cat 4 2\n+ 0 Jane Doe 24/05/78 5 3\n+ 1 James Smith 20/10/80 Spot 6 4\n+\n+ Filter 4 - regex replace value in column[4]: '(\\d+)/(\\d+)/(\\d+)' '19\\3-\\2-\\1' (convert dates to sqlite format) \n+\n+ 2 Paula Brown 1978-05-24 Rex,Fluff dog,cat 3 1\n+ 1 Steven Jones 1974-04-04 Allie cat 4 2\n+ 0 Jane Doe 1978-05-24 5 3\n+ 1 James Smith 1980-10-20 Spot 6 4\n+\n+ Filter 5 - normalize list columns[5,6]:\n+\n+ 2 Paula Brown 1978-05-24 Rex dog 3 1\n+ 2 Paula Brown 1978-05-24 Fluff cat 3 1\n+ 1 Steven Jones 1974-04-04 Allie cat 4 2\n+ 0 Jane Doe 1978-05-24 5 3\n+ 1 James Smith 1980-10-20 Spot 6 4\n+\n+ Filter 6 - append a line number column:\n+\n+ 2 Paula Brown 1978-05-24 Rex dog 3 1 1\n+ 2 Paula Brown 1978-05-24 Fluff cat 3 1 2\n+ 1 Steven Jones 1974-04-04 Allie cat 4 2 3\n+ 0 Jane Doe 1978-05-24 5 3 4\n+ 1 James Smith 1980-10-20 Spot 6 4 5\n+\n+\n+ Table name: pets\n+\n+ Table columns: Pets,FirstName,LastName,Birthdate,PetNames,PetType,line_num,entry_num,row_num\n+\n+ Query: SELECT * FROM pets \n+\n+ Result:\n+\n+ ===== ========= ======== ========== ======== ======= ======== ========= ======= \n+ #Pets FirstName LastName Brithdate PetNames PetType line_num entry_num row_num\n+ ===== ========= ======== ========== ======== ======= ======== ========= ======= \n+ 2 Paula Brown 1978-05-24 Rex dog 3 1 1\n+ 2 Paula Brown 1978-05-24 Fluff cat 3 1 2\n+ 1 Steven Jones 1974-04-04 Allie cat 4 2 3\n+ 0 Jane Doe 1978-05-24 5 3 4\n+ 1 James Smith 1980-10-20 Spot 6 4 5 \n+ ===== ========= ======== ========== ======== ======= ======== ========= ======= \n+\n+\n .. _Regular_expression: https://docs.python.org/release/2.7/library/re.html\n .. _SQLite: http://www.sqlite.org/index.html\n \n" |
b |
diff -r e84d1c3bf4fe -r fd16243931d6 test-data/pets.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/pets.tsv Fri May 12 10:18:42 2017 -0400 |
b |
@@ -0,0 +1,7 @@ +#People with pets +Pets FirstName LastName DOB PetNames PetType +2 Paula Brown 24/05/78 Rex,Fluff dog,cat +1 Steven Jones 04/04/74 Allie cat +0 Jane Doe 24/05/78 +1 James Smith 20/10/80 Spot + |