Mercurial > repos > galaxyp > regex_find_replace
annotate regex.py @ 0:60d04307b027 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
author | galaxyp |
---|---|
date | Wed, 18 Jan 2017 17:45:20 -0500 |
parents | |
children | 538933d9fccc |
rev | line source |
---|---|
0
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
1 import sys |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
2 import os |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
3 import re |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
4 import string |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
5 import commands |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
6 from optparse import OptionParser |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
7 from tempfile import NamedTemporaryFile |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
8 |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
9 def main(): |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
10 parser = OptionParser() |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
11 parser.add_option("--input", dest="input") |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
12 parser.add_option("--output", dest="output") |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
13 parser.add_option("--input_display_name", dest="input_display_name") |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
14 parser.add_option("--pattern", dest="patterns", action="append", |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
15 help="regex pattern for replacement") |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
16 parser.add_option("--replacement", dest="replacements", action="append", |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
17 help="replacement for regex match") |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
18 parser.add_option("--column", dest="column", default=None) |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
19 (options, args) = parser.parse_args() |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
20 |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
21 mapped_chars = { '\'' :'__sq__', '\\' : '__backslash__' } |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
22 |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
23 column = None |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
24 if options.column is not None: |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
25 column = int(options.column) - 1 # galaxy tabular is 1-based, python array are zero-based |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
26 |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
27 with open(options.input, 'r') as input: |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
28 with open(options.output, 'w') as output: |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
29 while True: |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
30 line = input.readline() |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
31 if line == "": |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
32 break |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
33 for (pattern, replacement) in zip(options.patterns, options.replacements): |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
34 for key, value in mapped_chars.items(): |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
35 pattern = pattern.replace(value, key) |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
36 replacement = replacement.replace(value, key) |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
37 replacement = replacement.replace("#{input_name}", options.input_display_name) |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
38 if column is None: |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
39 line = re.sub(pattern, replacement, line) |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
40 else: |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
41 cells = line.split("\t") |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
42 if cells and len(cells) > column: |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
43 cell = cells[column] |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
44 cell = re.sub(pattern, replacement, cell) |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
45 cells[column] = cell |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
46 line = "\t".join(cells) |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
47 output.write(line) |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
48 |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
49 if __name__ == "__main__": |
60d04307b027
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
50 main() |