Mercurial > repos > iuc > sqlite_to_tabular
annotate filters.py @ 1:c1b700bc0150 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 81f69ad5f39223059c40501e55ac777d3feca845
| author | iuc | 
|---|---|
| date | Fri, 18 Aug 2017 16:48:20 -0400 | 
| parents | 859064f07be4 | 
| children | 4678715f7147 | 
| rev | line source | 
|---|---|
| 
0
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
1 #!/usr/binsenv python | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
2 | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
3 from __future__ import print_function | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
4 | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
5 import re | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
6 import sys | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
7 | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
8 | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
9 class LineFilter(object): | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
10 def __init__(self, source, filter_dict): | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
11 self.source = source | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
12 self.filter_dict = filter_dict | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
13 self.func = lambda i, l: l.rstrip('\r\n') if l else None | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
14 self.src_lines = [] | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
15 self.src_line_cnt = 0 | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
16 if not filter_dict: | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
17 return | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
18 if filter_dict['filter'] == 'regex': | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
19 rgx = re.compile(filter_dict['pattern']) | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
20 if filter_dict['action'] == 'exclude_match': | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
21 self.func = lambda i, l: l if not rgx.match(l) else None | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
22 elif filter_dict['action'] == 'include_match': | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
23 self.func = lambda i, l: l if rgx.match(l) else None | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
24 elif filter_dict['action'] == 'exclude_find': | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
25 self.func = lambda i, l: l if not rgx.search(l) else None | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
26 elif filter_dict['action'] == 'include_find': | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
27 self.func = lambda i, l: l if rgx.search(l) else None | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
28 elif filter_dict['filter'] == 'select_columns': | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
29 cols = [int(c) - 1 for c in filter_dict['columns']] | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
30 self.func = lambda i, l: self.select_columns(l, cols) | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
31 elif filter_dict['filter'] == 'replace': | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
32 p = filter_dict['pattern'] | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
33 r = filter_dict['replace'] | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
34 c = int(filter_dict['column']) - 1 | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
35 self.func = lambda i, l: '\t'.join( | 
| 
1
 
c1b700bc0150
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 81f69ad5f39223059c40501e55ac777d3feca845
 
iuc 
parents: 
0 
diff
changeset
 | 
36 [x if j != c else re.sub(p, r, x) | 
| 
 
c1b700bc0150
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 81f69ad5f39223059c40501e55ac777d3feca845
 
iuc 
parents: 
0 
diff
changeset
 | 
37 for j, x in enumerate(l.split('\t'))]) | 
| 
0
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
38 elif filter_dict['filter'] == 'prepend_line_num': | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
39 self.func = lambda i, l: '%d\t%s' % (i, l) | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
40 elif filter_dict['filter'] == 'append_line_num': | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
41 self.func = lambda i, l: '%s\t%d' % (l.rstrip('\r\n'), i) | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
42 elif filter_dict['filter'] == 'prepend_text': | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
43 s = filter_dict['column_text'] | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
44 self.func = lambda i, l: '%s\t%s' % (s, l) | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
45 elif filter_dict['filter'] == 'append_text': | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
46 s = filter_dict['column_text'] | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
47 self.func = lambda i, l: '%s\t%s' % (l.rstrip('\r\n'), s) | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
48 elif filter_dict['filter'] == 'skip': | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
49 cnt = filter_dict['count'] | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
50 self.func = lambda i, l: l if i > cnt else None | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
51 elif filter_dict['filter'] == 'normalize': | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
52 cols = [int(c) - 1 for c in filter_dict['columns']] | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
53 sep = filter_dict['separator'] | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
54 self.func = lambda i, l: self.normalize(l, cols, sep) | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
55 | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
56 def __iter__(self): | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
57 return self | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
58 | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
59 def __next__(self): | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
60 if not self.src_lines: | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
61 self.get_lines() | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
62 if self.src_lines: | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
63 return self.src_lines.pop(0) | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
64 raise StopIteration | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
65 | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
66 next = __next__ | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
67 | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
68 def select_columns(self, line, cols): | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
69 fields = line.split('\t') | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
70 return '\t'.join([fields[x] for x in cols]) | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
71 | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
72 def normalize(self, line, split_cols, sep): | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
73 lines = [] | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
74 fields = line.rstrip('\r\n').split('\t') | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
75 split_fields = dict() | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
76 cnt = 0 | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
77 for c in split_cols: | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
78 if c < len(fields): | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
79 split_fields[c] = fields[c].split(sep) | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
80 cnt = max(cnt, len(split_fields[c])) | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
81 if cnt == 0: | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
82 lines.append('\t'.join(fields)) | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
83 else: | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
84 for n in range(0, cnt): | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
85 flds = [x if c not in split_cols else split_fields[c][n] | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
86 if n < len(split_fields[c]) | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
87 else '' for (c, x) in enumerate(fields)] | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
88 lines.append('\t'.join(flds)) | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
89 return lines | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
90 | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
91 def get_lines(self): | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
92 for i, next_line in enumerate(self.source): | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
93 self.src_line_cnt += 1 | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
94 line = self.func(self.src_line_cnt, next_line) | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
95 if line: | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
96 if isinstance(line, list): | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
97 self.src_lines.extend(line) | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
98 else: | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
99 self.src_lines.append(line) | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
100 return | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
101 | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
102 | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
103 class TabularReader: | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
104 """ | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
105 Tabular file iterator. Returns a list | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
106 """ | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
107 def __init__(self, input_file, skip=0, comment_char=None, col_idx=None, | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
108 filters=None): | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
109 self.skip = skip | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
110 self.comment_char = comment_char | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
111 self.col_idx = col_idx | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
112 self.filters = filters | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
113 self.tsv_file = \ | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
114 input_file if hasattr(input_file, 'readline') else open(input_file) | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
115 if skip and skip > 0: | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
116 for i in range(skip): | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
117 if not self.tsv_file.readline(): | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
118 break | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
119 source = LineFilter(self.tsv_file, None) | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
120 if comment_char: | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
121 source = LineFilter(source, | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
122 {"filter": "regex", "pattern": comment_char, | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
123 "action": "exclude_match"}) | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
124 if filters: | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
125 for f in filters: | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
126 source = LineFilter(source, f) | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
127 self.source = source | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
128 | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
129 def __iter__(self): | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
130 return self | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
131 | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
132 def __next__(self): | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
133 ''' Iteration ''' | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
134 for i, line in enumerate(self.source): | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
135 fields = line.rstrip('\r\n').split('\t') | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
136 if self.col_idx: | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
137 fields = [fields[i] for i in self.col_idx] | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
138 return fields | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
139 raise StopIteration | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
140 | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
141 next = __next__ | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
142 | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
143 | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
144 def filter_file(input_file, output, skip=0, comment_char='#', filters=None): | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
145 data_lines = 0 | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
146 try: | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
147 tr = TabularReader(input_file, skip=skip, comment_char=comment_char, | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
148 filters=filters) | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
149 for linenum, fields in enumerate(tr): | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
150 data_lines += 1 | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
151 try: | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
152 output.write('%s\n' % '\t'.join(fields)) | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
153 except Exception as e: | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
154 print('Failed at line: %d err: %s' % (linenum, e), | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
155 file=sys.stderr) | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
156 except Exception as e: | 
| 
 
859064f07be4
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
 
iuc 
parents:  
diff
changeset
 | 
157 exit('Error: %s' % (e)) | 
