comparison frameshift_deletions_report_fixer.py @ 1:029d90b0c4f6 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/smallgenomeutilities commit e702dcdbc7c3235ef3c4ee8998c7247d1af49465
author iuc
date Fri, 14 Jul 2023 22:07:22 +0000
parents
children
comparison
equal deleted inserted replaced
0:f079716f598c 1:029d90b0c4f6
1 """Polish the output of the frameshift_deletions_check command.
2
3 - Drops the first index column, which is rather pointless to include
4 - Turns ref bases printed as literal bytes strings into plain output
5 - Removes [] around pos lists and spaces after comma separating list elements
6 - Turns None and empty list values into . as a cell placeholder
7 """
8
9 import re
10 import sys
11
12
13 def matchrepl(matchobj):
14 bytes_string_content = matchobj.group(1)
15 if bytes_string_content is not None:
16 return bytes_string_content
17 list_content = matchobj.group(2)
18 if list_content is not None:
19 if list_content == '':
20 return '.'
21 return list_content.replace(', ', ',')
22 none_cell = matchobj.group(3)
23 if none_cell is not None:
24 return '\t.\t'
25
26 raise ValueError('Error in regex parsing code')
27
28
29 if __name__ == '__main__':
30 regex = re.compile(r"b'(.+)'|\[([^\]]*)\]|\t(None)\t")
31 with open(sys.argv[1]) as i:
32 with open(sys.argv[2], 'w') as o:
33 for line in i:
34 line = line[line.index('\t') + 1:]
35 o.write(regex.sub(matchrepl, line))