diff regex.py @ 0:60d04307b027 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
author galaxyp
date Wed, 18 Jan 2017 17:45:20 -0500
parents
children 538933d9fccc
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/regex.py	Wed Jan 18 17:45:20 2017 -0500
@@ -0,0 +1,50 @@
+import sys
+import os
+import re
+import string
+import commands
+from optparse import OptionParser
+from tempfile import NamedTemporaryFile
+
+def main():
+  parser = OptionParser()
+  parser.add_option("--input", dest="input")
+  parser.add_option("--output", dest="output")
+  parser.add_option("--input_display_name", dest="input_display_name")
+  parser.add_option("--pattern", dest="patterns", action="append",
+                    help="regex pattern for replacement")
+  parser.add_option("--replacement", dest="replacements", action="append",
+                    help="replacement for regex match")
+  parser.add_option("--column", dest="column", default=None)
+  (options, args) = parser.parse_args()
+
+  mapped_chars = { '\'' :'__sq__', '\\' : '__backslash__' }
+
+  column = None
+  if options.column is not None:
+    column = int(options.column) - 1 # galaxy tabular is 1-based, python array are zero-based 
+
+  with open(options.input, 'r') as input:
+    with open(options.output, 'w') as output:
+      while True:
+        line = input.readline()
+        if line == "":
+          break
+        for (pattern, replacement) in zip(options.patterns, options.replacements):
+          for key, value in mapped_chars.items():
+            pattern = pattern.replace(value, key)
+            replacement = replacement.replace(value, key)
+          replacement = replacement.replace("#{input_name}", options.input_display_name)
+          if column is None:
+            line = re.sub(pattern, replacement, line)
+          else:
+            cells = line.split("\t")
+            if cells and len(cells) > column:
+              cell = cells[column]
+              cell = re.sub(pattern, replacement, cell)
+              cells[column] = cell
+              line = "\t".join(cells)
+        output.write(line)
+
+if __name__ == "__main__":
+    main()