diff frameshift_deletions_report_fixer.py @ 1:029d90b0c4f6 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/smallgenomeutilities commit e702dcdbc7c3235ef3c4ee8998c7247d1af49465
author iuc
date Fri, 14 Jul 2023 22:07:22 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/frameshift_deletions_report_fixer.py	Fri Jul 14 22:07:22 2023 +0000
@@ -0,0 +1,35 @@
+"""Polish the output of the frameshift_deletions_check command.
+
+- Drops the first index column, which is rather pointless to include
+- Turns ref bases printed as literal bytes strings into plain output
+- Removes [] around pos lists and spaces after comma separating list elements
+- Turns None and empty list values into . as a cell placeholder
+"""
+
+import re
+import sys
+
+
+def matchrepl(matchobj):
+    bytes_string_content = matchobj.group(1)
+    if bytes_string_content is not None:
+        return bytes_string_content
+    list_content = matchobj.group(2)
+    if list_content is not None:
+        if list_content == '':
+            return '.'
+        return list_content.replace(', ', ',')
+    none_cell = matchobj.group(3)
+    if none_cell is not None:
+        return '\t.\t'
+
+    raise ValueError('Error in regex parsing code')
+
+
+if __name__ == '__main__':
+    regex = re.compile(r"b'(.+)'|\[([^\]]*)\]|\t(None)\t")
+    with open(sys.argv[1]) as i:
+        with open(sys.argv[2], 'w') as o:
+            for line in i:
+                line = line[line.index('\t') + 1:]
+                o.write(regex.sub(matchrepl, line))