annotate filters.py @ 10:dfe38b7c34ed draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 67db58361546009a77b2cbd140967fbc634d425b"
author iuc
date Thu, 24 Sep 2020 11:27:11 +0000
parents 4678715f7147
children bce29ec10b78
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
1 #!/usr/binsenv python
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
2
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
3 from __future__ import print_function
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
4
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
5 import re
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
6 import sys
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
7
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
8
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
9 class LineFilter(object):
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
10 def __init__(self, source, filter_dict):
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
11 self.source = source
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
12 self.filter_dict = filter_dict
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
13 self.func = lambda i, l: l.rstrip('\r\n') if l else None
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
14 self.src_lines = []
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
15 self.src_line_cnt = 0
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
16 if not filter_dict:
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
17 return
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
18 if filter_dict['filter'] == 'regex':
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
19 rgx = re.compile(filter_dict['pattern'])
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
20 if filter_dict['action'] == 'exclude_match':
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
21 self.func = lambda i, l: l if not rgx.match(l) else None
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
22 elif filter_dict['action'] == 'include_match':
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
23 self.func = lambda i, l: l if rgx.match(l) else None
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
24 elif filter_dict['action'] == 'exclude_find':
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
25 self.func = lambda i, l: l if not rgx.search(l) else None
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
26 elif filter_dict['action'] == 'include_find':
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
27 self.func = lambda i, l: l if rgx.search(l) else None
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
28 elif filter_dict['filter'] == 'select_columns':
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
29 cols = [int(c) - 1 for c in filter_dict['columns']]
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
30 self.func = lambda i, l: self.select_columns(l, cols)
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
31 elif filter_dict['filter'] == 'replace':
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
32 p = filter_dict['pattern']
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
33 r = filter_dict['replace']
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
34 c = int(filter_dict['column']) - 1
9
4678715f7147 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents: 1
diff changeset
35 if 'add' not in filter_dict\
4678715f7147 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents: 1
diff changeset
36 or filter_dict['add'] not in ['prepend',
4678715f7147 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents: 1
diff changeset
37 'append',
4678715f7147 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents: 1
diff changeset
38 'before',
4678715f7147 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents: 1
diff changeset
39 'after']:
4678715f7147 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents: 1
diff changeset
40 self.func = lambda i, l: '\t'.join(
4678715f7147 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents: 1
diff changeset
41 [x if j != c else re.sub(p, r, x)
4678715f7147 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents: 1
diff changeset
42 for j, x in enumerate(l.split('\t'))])
4678715f7147 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents: 1
diff changeset
43 else:
4678715f7147 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents: 1
diff changeset
44 a = 0 if filter_dict['add'] == 'prepend'\
4678715f7147 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents: 1
diff changeset
45 else min(0, c - 1) if filter_dict['add'] == 'before'\
4678715f7147 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents: 1
diff changeset
46 else c + 1 if filter_dict['add'] == 'after'\
4678715f7147 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents: 1
diff changeset
47 else None
4678715f7147 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents: 1
diff changeset
48 self.func = lambda i, l: self.replace_add(l, p, r, c, a)
0
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
49 elif filter_dict['filter'] == 'prepend_line_num':
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
50 self.func = lambda i, l: '%d\t%s' % (i, l)
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
51 elif filter_dict['filter'] == 'append_line_num':
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
52 self.func = lambda i, l: '%s\t%d' % (l.rstrip('\r\n'), i)
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
53 elif filter_dict['filter'] == 'prepend_text':
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
54 s = filter_dict['column_text']
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
55 self.func = lambda i, l: '%s\t%s' % (s, l)
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
56 elif filter_dict['filter'] == 'append_text':
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
57 s = filter_dict['column_text']
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
58 self.func = lambda i, l: '%s\t%s' % (l.rstrip('\r\n'), s)
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
59 elif filter_dict['filter'] == 'skip':
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
60 cnt = filter_dict['count']
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
61 self.func = lambda i, l: l if i > cnt else None
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
62 elif filter_dict['filter'] == 'normalize':
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
63 cols = [int(c) - 1 for c in filter_dict['columns']]
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
64 sep = filter_dict['separator']
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
65 self.func = lambda i, l: self.normalize(l, cols, sep)
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
66
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
67 def __iter__(self):
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
68 return self
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
69
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
70 def __next__(self):
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
71 if not self.src_lines:
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
72 self.get_lines()
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
73 if self.src_lines:
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
74 return self.src_lines.pop(0)
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
75 raise StopIteration
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
76
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
77 next = __next__
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
78
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
79 def select_columns(self, line, cols):
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
80 fields = line.split('\t')
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
81 return '\t'.join([fields[x] for x in cols])
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
82
9
4678715f7147 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents: 1
diff changeset
83 def replace_add(self, line, pat, rep, col, pos):
4678715f7147 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents: 1
diff changeset
84 fields = line.rstrip('\r\n').split('\t')
4678715f7147 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents: 1
diff changeset
85 i = pos if pos else len(fields)
4678715f7147 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents: 1
diff changeset
86 val = ''
4678715f7147 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents: 1
diff changeset
87 if col < len(fields) and re.search(pat, fields[col]):
4678715f7147 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents: 1
diff changeset
88 val = re.sub(pat, rep, fields[col]).replace('\t', ' ')
4678715f7147 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents: 1
diff changeset
89 return '\t'.join(fields[:i] + [val] + fields[i:])
4678715f7147 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12"
iuc
parents: 1
diff changeset
90
0
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
91 def normalize(self, line, split_cols, sep):
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
92 lines = []
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
93 fields = line.rstrip('\r\n').split('\t')
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
94 split_fields = dict()
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
95 cnt = 0
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
96 for c in split_cols:
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
97 if c < len(fields):
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
98 split_fields[c] = fields[c].split(sep)
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
99 cnt = max(cnt, len(split_fields[c]))
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
100 if cnt == 0:
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
101 lines.append('\t'.join(fields))
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
102 else:
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
103 for n in range(0, cnt):
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
104 flds = [x if c not in split_cols else split_fields[c][n]
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
105 if n < len(split_fields[c])
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
106 else '' for (c, x) in enumerate(fields)]
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
107 lines.append('\t'.join(flds))
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
108 return lines
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
109
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
110 def get_lines(self):
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
111 for i, next_line in enumerate(self.source):
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
112 self.src_line_cnt += 1
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
113 line = self.func(self.src_line_cnt, next_line)
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
114 if line:
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
115 if isinstance(line, list):
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
116 self.src_lines.extend(line)
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
117 else:
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
118 self.src_lines.append(line)
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
119 return
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
120
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
121
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
122 class TabularReader:
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
123 """
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
124 Tabular file iterator. Returns a list
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
125 """
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
126 def __init__(self, input_file, skip=0, comment_char=None, col_idx=None,
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
127 filters=None):
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
128 self.skip = skip
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
129 self.comment_char = comment_char
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
130 self.col_idx = col_idx
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
131 self.filters = filters
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
132 self.tsv_file = \
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
133 input_file if hasattr(input_file, 'readline') else open(input_file)
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
134 if skip and skip > 0:
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
135 for i in range(skip):
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
136 if not self.tsv_file.readline():
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
137 break
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
138 source = LineFilter(self.tsv_file, None)
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
139 if comment_char:
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
140 source = LineFilter(source,
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
141 {"filter": "regex", "pattern": comment_char,
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
142 "action": "exclude_match"})
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
143 if filters:
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
144 for f in filters:
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
145 source = LineFilter(source, f)
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
146 self.source = source
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
147
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
148 def __iter__(self):
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
149 return self
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
150
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
151 def __next__(self):
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
152 ''' Iteration '''
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
153 for i, line in enumerate(self.source):
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
154 fields = line.rstrip('\r\n').split('\t')
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
155 if self.col_idx:
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
156 fields = [fields[i] for i in self.col_idx]
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
157 return fields
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
158 raise StopIteration
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
159
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
160 next = __next__
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
161
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
162
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
163 def filter_file(input_file, output, skip=0, comment_char='#', filters=None):
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
164 data_lines = 0
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
165 try:
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
166 tr = TabularReader(input_file, skip=skip, comment_char=comment_char,
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
167 filters=filters)
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
168 for linenum, fields in enumerate(tr):
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
169 data_lines += 1
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
170 try:
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
171 output.write('%s\n' % '\t'.join(fields))
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
172 except Exception as e:
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
173 print('Failed at line: %d err: %s' % (linenum, e),
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
174 file=sys.stderr)
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
175 except Exception as e:
859064f07be4 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
iuc
parents:
diff changeset
176 exit('Error: %s' % (e))