Mercurial > repos > jjohnson > query_tabular
comparison filters.py @ 22:bed5018e7ae3 draft
Uploaded
author | jjohnson |
---|---|
date | Mon, 17 Jul 2017 15:22:04 -0400 |
parents | ab27c4bd14b9 |
children |
comparison
equal
deleted
inserted
replaced
21:357fe86f245d | 22:bed5018e7ae3 |
---|---|
54 | 54 |
55 def __iter__(self): | 55 def __iter__(self): |
56 return self | 56 return self |
57 | 57 |
58 def __next__(self): | 58 def __next__(self): |
59 return next(self) | |
60 | |
61 def next(self): | |
62 if not self.src_lines: | 59 if not self.src_lines: |
63 self.get_lines() | 60 self.get_lines() |
64 if self.src_lines: | 61 if self.src_lines: |
65 return self.src_lines.pop(0) | 62 return self.src_lines.pop(0) |
66 raise StopIteration | 63 raise StopIteration |
64 | |
65 next = __next__ | |
67 | 66 |
68 def select_columns(self, line, cols): | 67 def select_columns(self, line, cols): |
69 fields = line.split('\t') | 68 fields = line.split('\t') |
70 return '\t'.join([fields[x] for x in cols]) | 69 return '\t'.join([fields[x] for x in cols]) |
71 | 70 |
109 self.skip = skip | 108 self.skip = skip |
110 self.comment_char = comment_char | 109 self.comment_char = comment_char |
111 self.col_idx = col_idx | 110 self.col_idx = col_idx |
112 self.filters = filters | 111 self.filters = filters |
113 self.tsv_file = \ | 112 self.tsv_file = \ |
114 input_file if isinstance(input_file, file) else open(input_file) | 113 input_file if hasattr(input_file, 'readline') else open(input_file) |
115 if skip and skip > 0: | 114 if skip and skip > 0: |
116 for i in range(skip): | 115 for i in range(skip): |
117 if not self.tsv_file.readline(): | 116 if not self.tsv_file.readline(): |
118 break | 117 break |
119 source = LineFilter(self.tsv_file, None) | 118 source = LineFilter(self.tsv_file, None) |
128 | 127 |
129 def __iter__(self): | 128 def __iter__(self): |
130 return self | 129 return self |
131 | 130 |
132 def __next__(self): | 131 def __next__(self): |
133 return next(self) | |
134 | |
135 def next(self): | |
136 ''' Iteration ''' | 132 ''' Iteration ''' |
137 for i, line in enumerate(self.source): | 133 for i, line in enumerate(self.source): |
138 fields = line.rstrip('\r\n').split('\t') | 134 fields = line.rstrip('\r\n').split('\t') |
139 if self.col_idx: | 135 if self.col_idx: |
140 fields = [fields[i] for i in self.col_idx] | 136 fields = [fields[i] for i in self.col_idx] |
141 return fields | 137 return fields |
142 raise StopIteration | 138 raise StopIteration |
139 | |
140 next = __next__ | |
143 | 141 |
144 | 142 |
145 def filter_file(input_file, output, skip=0, comment_char='#', filters=None): | 143 def filter_file(input_file, output, skip=0, comment_char='#', filters=None): |
146 data_lines = 0 | 144 data_lines = 0 |
147 try: | 145 try: |
153 output.write('%s\n' % '\t'.join(fields)) | 151 output.write('%s\n' % '\t'.join(fields)) |
154 except Exception as e: | 152 except Exception as e: |
155 print('Failed at line: %d err: %s' % (linenum, e), | 153 print('Failed at line: %d err: %s' % (linenum, e), |
156 file=sys.stderr) | 154 file=sys.stderr) |
157 except Exception as e: | 155 except Exception as e: |
158 print('Failed: %s' % (e), file=sys.stderr) | 156 exit('Error: %s' % (e)) |
159 exit(1) |