Mercurial > repos > galaxyp > regex_find_replace
diff regex.py @ 0:60d04307b027 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
author | galaxyp |
---|---|
date | Wed, 18 Jan 2017 17:45:20 -0500 |
parents | |
children | 538933d9fccc |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/regex.py Wed Jan 18 17:45:20 2017 -0500 @@ -0,0 +1,50 @@ +import sys +import os +import re +import string +import commands +from optparse import OptionParser +from tempfile import NamedTemporaryFile + +def main(): + parser = OptionParser() + parser.add_option("--input", dest="input") + parser.add_option("--output", dest="output") + parser.add_option("--input_display_name", dest="input_display_name") + parser.add_option("--pattern", dest="patterns", action="append", + help="regex pattern for replacement") + parser.add_option("--replacement", dest="replacements", action="append", + help="replacement for regex match") + parser.add_option("--column", dest="column", default=None) + (options, args) = parser.parse_args() + + mapped_chars = { '\'' :'__sq__', '\\' : '__backslash__' } + + column = None + if options.column is not None: + column = int(options.column) - 1 # galaxy tabular is 1-based, python array are zero-based + + with open(options.input, 'r') as input: + with open(options.output, 'w') as output: + while True: + line = input.readline() + if line == "": + break + for (pattern, replacement) in zip(options.patterns, options.replacements): + for key, value in mapped_chars.items(): + pattern = pattern.replace(value, key) + replacement = replacement.replace(value, key) + replacement = replacement.replace("#{input_name}", options.input_display_name) + if column is None: + line = re.sub(pattern, replacement, line) + else: + cells = line.split("\t") + if cells and len(cells) > column: + cell = cells[column] + cell = re.sub(pattern, replacement, cell) + cells[column] = cell + line = "\t".join(cells) + output.write(line) + +if __name__ == "__main__": + main()