comparison filters.py @ 22:bed5018e7ae3 draft

Uploaded
author jjohnson
date Mon, 17 Jul 2017 15:22:04 -0400
parents ab27c4bd14b9
children
comparison
equal deleted inserted replaced
21:357fe86f245d 22:bed5018e7ae3
54 54
55 def __iter__(self): 55 def __iter__(self):
56 return self 56 return self
57 57
58 def __next__(self): 58 def __next__(self):
59 return next(self)
60
61 def next(self):
62 if not self.src_lines: 59 if not self.src_lines:
63 self.get_lines() 60 self.get_lines()
64 if self.src_lines: 61 if self.src_lines:
65 return self.src_lines.pop(0) 62 return self.src_lines.pop(0)
66 raise StopIteration 63 raise StopIteration
64
65 next = __next__
67 66
68 def select_columns(self, line, cols): 67 def select_columns(self, line, cols):
69 fields = line.split('\t') 68 fields = line.split('\t')
70 return '\t'.join([fields[x] for x in cols]) 69 return '\t'.join([fields[x] for x in cols])
71 70
109 self.skip = skip 108 self.skip = skip
110 self.comment_char = comment_char 109 self.comment_char = comment_char
111 self.col_idx = col_idx 110 self.col_idx = col_idx
112 self.filters = filters 111 self.filters = filters
113 self.tsv_file = \ 112 self.tsv_file = \
114 input_file if isinstance(input_file, file) else open(input_file) 113 input_file if hasattr(input_file, 'readline') else open(input_file)
115 if skip and skip > 0: 114 if skip and skip > 0:
116 for i in range(skip): 115 for i in range(skip):
117 if not self.tsv_file.readline(): 116 if not self.tsv_file.readline():
118 break 117 break
119 source = LineFilter(self.tsv_file, None) 118 source = LineFilter(self.tsv_file, None)
128 127
129 def __iter__(self): 128 def __iter__(self):
130 return self 129 return self
131 130
132 def __next__(self): 131 def __next__(self):
133 return next(self)
134
135 def next(self):
136 ''' Iteration ''' 132 ''' Iteration '''
137 for i, line in enumerate(self.source): 133 for i, line in enumerate(self.source):
138 fields = line.rstrip('\r\n').split('\t') 134 fields = line.rstrip('\r\n').split('\t')
139 if self.col_idx: 135 if self.col_idx:
140 fields = [fields[i] for i in self.col_idx] 136 fields = [fields[i] for i in self.col_idx]
141 return fields 137 return fields
142 raise StopIteration 138 raise StopIteration
139
140 next = __next__
143 141
144 142
145 def filter_file(input_file, output, skip=0, comment_char='#', filters=None): 143 def filter_file(input_file, output, skip=0, comment_char='#', filters=None):
146 data_lines = 0 144 data_lines = 0
147 try: 145 try:
153 output.write('%s\n' % '\t'.join(fields)) 151 output.write('%s\n' % '\t'.join(fields))
154 except Exception as e: 152 except Exception as e:
155 print('Failed at line: %d err: %s' % (linenum, e), 153 print('Failed at line: %d err: %s' % (linenum, e),
156 file=sys.stderr) 154 file=sys.stderr)
157 except Exception as e: 155 except Exception as e:
158 print('Failed: %s' % (e), file=sys.stderr) 156 exit('Error: %s' % (e))
159 exit(1)