Mercurial > repos > iuc > smgu_frameshift_deletions_checks
diff frameshift_deletions_report_fixer.py @ 1:029d90b0c4f6 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/smallgenomeutilities commit e702dcdbc7c3235ef3c4ee8998c7247d1af49465
author | iuc |
---|---|
date | Fri, 14 Jul 2023 22:07:22 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/frameshift_deletions_report_fixer.py Fri Jul 14 22:07:22 2023 +0000 @@ -0,0 +1,35 @@ +"""Polish the output of the frameshift_deletions_check command. + +- Drops the first index column, which is rather pointless to include +- Turns ref bases printed as literal bytes strings into plain output +- Removes [] around pos lists and spaces after comma separating list elements +- Turns None and empty list values into . as a cell placeholder +""" + +import re +import sys + + +def matchrepl(matchobj): + bytes_string_content = matchobj.group(1) + if bytes_string_content is not None: + return bytes_string_content + list_content = matchobj.group(2) + if list_content is not None: + if list_content == '': + return '.' + return list_content.replace(', ', ',') + none_cell = matchobj.group(3) + if none_cell is not None: + return '\t.\t' + + raise ValueError('Error in regex parsing code') + + +if __name__ == '__main__': + regex = re.compile(r"b'(.+)'|\[([^\]]*)\]|\t(None)\t") + with open(sys.argv[1]) as i: + with open(sys.argv[2], 'w') as o: + for line in i: + line = line[line.index('\t') + 1:] + o.write(regex.sub(matchrepl, line))