annotate regex.py @ 4:399da6b5ec21 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit a9b01eafeefb50c416fbfe342b15298fe9321679
author galaxyp
date Thu, 14 Jul 2022 10:49:01 +0000
parents 538933d9fccc
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4
399da6b5ec21 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit a9b01eafeefb50c416fbfe342b15298fe9321679
galaxyp
parents: 2
diff changeset
1 import re
2
538933d9fccc "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents: 0
diff changeset
2 from optparse import OptionParser
538933d9fccc "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents: 0
diff changeset
3
0
60d04307b027 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
4
60d04307b027 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
5 def main():
2
538933d9fccc "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents: 0
diff changeset
6 parser = OptionParser()
538933d9fccc "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents: 0
diff changeset
7 parser.add_option("--input", dest="input")
538933d9fccc "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents: 0
diff changeset
8 parser.add_option("--output", dest="output")
538933d9fccc "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents: 0
diff changeset
9 parser.add_option("--input_display_name", dest="input_display_name")
538933d9fccc "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents: 0
diff changeset
10 parser.add_option("--pattern", dest="patterns", action="append",
538933d9fccc "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents: 0
diff changeset
11 help="regex pattern for replacement")
538933d9fccc "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents: 0
diff changeset
12 parser.add_option("--replacement", dest="replacements", action="append",
538933d9fccc "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents: 0
diff changeset
13 help="replacement for regex match")
538933d9fccc "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents: 0
diff changeset
14 parser.add_option("--column", dest="column", default=None)
538933d9fccc "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents: 0
diff changeset
15 (options, args) = parser.parse_args()
0
60d04307b027 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
16
2
538933d9fccc "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents: 0
diff changeset
17 mapped_chars = {'\'': '__sq__', '\\': '__backslash__'}
0
60d04307b027 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
18
2
538933d9fccc "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents: 0
diff changeset
19 column = None
538933d9fccc "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents: 0
diff changeset
20 if options.column is not None:
538933d9fccc "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents: 0
diff changeset
21 # galaxy tabular is 1-based, python array are zero-based
538933d9fccc "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents: 0
diff changeset
22 column = int(options.column) - 1
0
60d04307b027 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
23
2
538933d9fccc "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents: 0
diff changeset
24 with open(options.input, 'r') as input, open(options.output, 'w') as output:
538933d9fccc "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents: 0
diff changeset
25 while True:
538933d9fccc "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents: 0
diff changeset
26 line = input.readline()
538933d9fccc "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents: 0
diff changeset
27 if line == "":
538933d9fccc "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents: 0
diff changeset
28 break
538933d9fccc "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents: 0
diff changeset
29 for (pattern, replacement) in zip(options.patterns, options.replacements):
538933d9fccc "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents: 0
diff changeset
30 for key, value in mapped_chars.items():
538933d9fccc "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents: 0
diff changeset
31 pattern = pattern.replace(value, key)
538933d9fccc "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents: 0
diff changeset
32 replacement = replacement.replace(value, key)
538933d9fccc "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents: 0
diff changeset
33 replacement = replacement.replace("#{input_name}", options.input_display_name)
538933d9fccc "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents: 0
diff changeset
34 if column is None:
538933d9fccc "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents: 0
diff changeset
35 line = re.sub(pattern, replacement, line)
538933d9fccc "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents: 0
diff changeset
36 else:
538933d9fccc "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents: 0
diff changeset
37 cells = line.split("\t")
538933d9fccc "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents: 0
diff changeset
38 if cells and len(cells) > column:
538933d9fccc "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents: 0
diff changeset
39 cell = cells[column]
538933d9fccc "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents: 0
diff changeset
40 cell = re.sub(pattern, replacement, cell)
538933d9fccc "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents: 0
diff changeset
41 cells[column] = cell
538933d9fccc "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents: 0
diff changeset
42 line = "\t".join(cells)
538933d9fccc "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents: 0
diff changeset
43 output.write(line)
538933d9fccc "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents: 0
diff changeset
44
0
60d04307b027 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
45
60d04307b027 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
46 if __name__ == "__main__":
60d04307b027 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
47 main()