Mercurial > repos > galaxyp > regex_find_replace
comparison regex.py @ 0:60d04307b027 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
author | galaxyp |
---|---|
date | Wed, 18 Jan 2017 17:45:20 -0500 |
parents | |
children | 538933d9fccc |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:60d04307b027 |
---|---|
1 import sys | |
2 import os | |
3 import re | |
4 import string | |
5 import commands | |
6 from optparse import OptionParser | |
7 from tempfile import NamedTemporaryFile | |
8 | |
9 def main(): | |
10 parser = OptionParser() | |
11 parser.add_option("--input", dest="input") | |
12 parser.add_option("--output", dest="output") | |
13 parser.add_option("--input_display_name", dest="input_display_name") | |
14 parser.add_option("--pattern", dest="patterns", action="append", | |
15 help="regex pattern for replacement") | |
16 parser.add_option("--replacement", dest="replacements", action="append", | |
17 help="replacement for regex match") | |
18 parser.add_option("--column", dest="column", default=None) | |
19 (options, args) = parser.parse_args() | |
20 | |
21 mapped_chars = { '\'' :'__sq__', '\\' : '__backslash__' } | |
22 | |
23 column = None | |
24 if options.column is not None: | |
25 column = int(options.column) - 1 # galaxy tabular is 1-based, python array are zero-based | |
26 | |
27 with open(options.input, 'r') as input: | |
28 with open(options.output, 'w') as output: | |
29 while True: | |
30 line = input.readline() | |
31 if line == "": | |
32 break | |
33 for (pattern, replacement) in zip(options.patterns, options.replacements): | |
34 for key, value in mapped_chars.items(): | |
35 pattern = pattern.replace(value, key) | |
36 replacement = replacement.replace(value, key) | |
37 replacement = replacement.replace("#{input_name}", options.input_display_name) | |
38 if column is None: | |
39 line = re.sub(pattern, replacement, line) | |
40 else: | |
41 cells = line.split("\t") | |
42 if cells and len(cells) > column: | |
43 cell = cells[column] | |
44 cell = re.sub(pattern, replacement, cell) | |
45 cells[column] = cell | |
46 line = "\t".join(cells) | |
47 output.write(line) | |
48 | |
49 if __name__ == "__main__": | |
50 main() |