Mercurial > repos > iuc > sqlite_to_tabular
annotate filters.py @ 11:bce29ec10b78 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit d624cde6382bc326a5ae318482e16e643ef7d7d1"
author | iuc |
---|---|
date | Fri, 12 Feb 2021 21:20:55 +0000 |
parents | 4678715f7147 |
children | c29d2f80a066 |
rev | line source |
---|---|
0
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
1 #!/usr/binsenv python |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
2 |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
3 from __future__ import print_function |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
4 |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
5 import re |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
6 import sys |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
7 |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
8 |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
9 class LineFilter(object): |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
10 def __init__(self, source, filter_dict): |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
11 self.source = source |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
12 self.filter_dict = filter_dict |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
13 self.func = lambda i, l: l.rstrip('\r\n') if l else None |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
14 self.src_lines = [] |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
15 self.src_line_cnt = 0 |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
16 if not filter_dict: |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
17 return |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
18 if filter_dict['filter'] == 'regex': |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
19 rgx = re.compile(filter_dict['pattern']) |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
20 if filter_dict['action'] == 'exclude_match': |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
21 self.func = lambda i, l: l if not rgx.match(l) else None |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
22 elif filter_dict['action'] == 'include_match': |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
23 self.func = lambda i, l: l if rgx.match(l) else None |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
24 elif filter_dict['action'] == 'exclude_find': |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
25 self.func = lambda i, l: l if not rgx.search(l) else None |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
26 elif filter_dict['action'] == 'include_find': |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
27 self.func = lambda i, l: l if rgx.search(l) else None |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
28 elif filter_dict['filter'] == 'select_columns': |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
29 cols = [int(c) - 1 for c in filter_dict['columns']] |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
30 self.func = lambda i, l: self.select_columns(l, cols) |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
31 elif filter_dict['filter'] == 'replace': |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
32 p = filter_dict['pattern'] |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
33 r = filter_dict['replace'] |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
34 c = int(filter_dict['column']) - 1 |
9
4678715f7147
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents:
1
diff
changeset
|
35 if 'add' not in filter_dict\ |
4678715f7147
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents:
1
diff
changeset
|
36 or filter_dict['add'] not in ['prepend', |
4678715f7147
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents:
1
diff
changeset
|
37 'append', |
4678715f7147
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents:
1
diff
changeset
|
38 'before', |
4678715f7147
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents:
1
diff
changeset
|
39 'after']: |
4678715f7147
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents:
1
diff
changeset
|
40 self.func = lambda i, l: '\t'.join( |
4678715f7147
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents:
1
diff
changeset
|
41 [x if j != c else re.sub(p, r, x) |
4678715f7147
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents:
1
diff
changeset
|
42 for j, x in enumerate(l.split('\t'))]) |
4678715f7147
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents:
1
diff
changeset
|
43 else: |
4678715f7147
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents:
1
diff
changeset
|
44 a = 0 if filter_dict['add'] == 'prepend'\ |
4678715f7147
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents:
1
diff
changeset
|
45 else min(0, c - 1) if filter_dict['add'] == 'before'\ |
4678715f7147
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents:
1
diff
changeset
|
46 else c + 1 if filter_dict['add'] == 'after'\ |
4678715f7147
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents:
1
diff
changeset
|
47 else None |
4678715f7147
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents:
1
diff
changeset
|
48 self.func = lambda i, l: self.replace_add(l, p, r, c, a) |
0
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
49 elif filter_dict['filter'] == 'prepend_line_num': |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
50 self.func = lambda i, l: '%d\t%s' % (i, l) |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
51 elif filter_dict['filter'] == 'append_line_num': |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
52 self.func = lambda i, l: '%s\t%d' % (l.rstrip('\r\n'), i) |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
53 elif filter_dict['filter'] == 'prepend_text': |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
54 s = filter_dict['column_text'] |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
55 self.func = lambda i, l: '%s\t%s' % (s, l) |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
56 elif filter_dict['filter'] == 'append_text': |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
57 s = filter_dict['column_text'] |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
58 self.func = lambda i, l: '%s\t%s' % (l.rstrip('\r\n'), s) |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
59 elif filter_dict['filter'] == 'skip': |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
60 cnt = filter_dict['count'] |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
61 self.func = lambda i, l: l if i > cnt else None |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
62 elif filter_dict['filter'] == 'normalize': |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
63 cols = [int(c) - 1 for c in filter_dict['columns']] |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
64 sep = filter_dict['separator'] |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
65 self.func = lambda i, l: self.normalize(l, cols, sep) |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
66 |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
67 def __iter__(self): |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
68 return self |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
69 |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
70 def __next__(self): |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
71 if not self.src_lines: |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
72 self.get_lines() |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
73 if self.src_lines: |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
74 return self.src_lines.pop(0) |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
75 raise StopIteration |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
76 |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
77 next = __next__ |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
78 |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
79 def select_columns(self, line, cols): |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
80 fields = line.split('\t') |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
81 return '\t'.join([fields[x] for x in cols]) |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
82 |
9
4678715f7147
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents:
1
diff
changeset
|
83 def replace_add(self, line, pat, rep, col, pos): |
4678715f7147
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents:
1
diff
changeset
|
84 fields = line.rstrip('\r\n').split('\t') |
11
bce29ec10b78
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit d624cde6382bc326a5ae318482e16e643ef7d7d1"
iuc
parents:
9
diff
changeset
|
85 i = pos if pos is not None else len(fields) |
9
4678715f7147
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents:
1
diff
changeset
|
86 val = '' |
4678715f7147
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents:
1
diff
changeset
|
87 if col < len(fields) and re.search(pat, fields[col]): |
4678715f7147
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents:
1
diff
changeset
|
88 val = re.sub(pat, rep, fields[col]).replace('\t', ' ') |
4678715f7147
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents:
1
diff
changeset
|
89 return '\t'.join(fields[:i] + [val] + fields[i:]) |
4678715f7147
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents:
1
diff
changeset
|
90 |
0
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
91 def normalize(self, line, split_cols, sep): |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
92 lines = [] |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
93 fields = line.rstrip('\r\n').split('\t') |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
94 split_fields = dict() |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
95 cnt = 0 |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
96 for c in split_cols: |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
97 if c < len(fields): |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
98 split_fields[c] = fields[c].split(sep) |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
99 cnt = max(cnt, len(split_fields[c])) |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
100 if cnt == 0: |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
101 lines.append('\t'.join(fields)) |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
102 else: |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
103 for n in range(0, cnt): |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
104 flds = [x if c not in split_cols else split_fields[c][n] |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
105 if n < len(split_fields[c]) |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
106 else '' for (c, x) in enumerate(fields)] |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
107 lines.append('\t'.join(flds)) |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
108 return lines |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
109 |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
110 def get_lines(self): |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
111 for i, next_line in enumerate(self.source): |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
112 self.src_line_cnt += 1 |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
113 line = self.func(self.src_line_cnt, next_line) |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
114 if line: |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
115 if isinstance(line, list): |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
116 self.src_lines.extend(line) |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
117 else: |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
118 self.src_lines.append(line) |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
119 return |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
120 |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
121 |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
122 class TabularReader: |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
123 """ |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
124 Tabular file iterator. Returns a list |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
125 """ |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
126 def __init__(self, input_file, skip=0, comment_char=None, col_idx=None, |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
127 filters=None): |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
128 self.skip = skip |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
129 self.comment_char = comment_char |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
130 self.col_idx = col_idx |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
131 self.filters = filters |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
132 self.tsv_file = \ |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
133 input_file if hasattr(input_file, 'readline') else open(input_file) |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
134 if skip and skip > 0: |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
135 for i in range(skip): |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
136 if not self.tsv_file.readline(): |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
137 break |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
138 source = LineFilter(self.tsv_file, None) |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
139 if comment_char: |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
140 source = LineFilter(source, |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
141 {"filter": "regex", "pattern": comment_char, |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
142 "action": "exclude_match"}) |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
143 if filters: |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
144 for f in filters: |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
145 source = LineFilter(source, f) |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
146 self.source = source |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
147 |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
148 def __iter__(self): |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
149 return self |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
150 |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
151 def __next__(self): |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
152 ''' Iteration ''' |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
153 for i, line in enumerate(self.source): |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
154 fields = line.rstrip('\r\n').split('\t') |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
155 if self.col_idx: |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
156 fields = [fields[i] for i in self.col_idx] |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
157 return fields |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
158 raise StopIteration |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
159 |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
160 next = __next__ |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
161 |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
162 |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
163 def filter_file(input_file, output, skip=0, comment_char='#', filters=None): |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
164 data_lines = 0 |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
165 try: |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
166 tr = TabularReader(input_file, skip=skip, comment_char=comment_char, |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
167 filters=filters) |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
168 for linenum, fields in enumerate(tr): |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
169 data_lines += 1 |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
170 try: |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
171 output.write('%s\n' % '\t'.join(fields)) |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
172 except Exception as e: |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
173 print('Failed at line: %d err: %s' % (linenum, e), |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
174 file=sys.stderr) |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
175 except Exception as e: |
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff
changeset
|
176 exit('Error: %s' % (e)) |