Mercurial > repos > iuc > cat_contigs
diff tabpad.py @ 0:11a61934bfb3 draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
author | iuc |
---|---|
date | Tue, 10 Dec 2019 16:05:34 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tabpad.py Tue Dec 10 16:05:34 2019 -0500 @@ -0,0 +1,67 @@ +#!/usr/bin/env python + +import argparse +import re + + +def padfile(infile, outfile, fieldcnt=None): + with open(infile, 'r') as fh: + out = open(outfile, 'w') + commentlines = [] + tabs = '\t' * fieldcnt if fieldcnt is not None else None + + def pad_line(txtline, tabs=None): + line = txtline.rstrip('\r\n') + fields = line.split('\t') + if not tabs: + tabs = '\t' * len(fields) + out.write('%s%s\n' % (line, tabs[len(fields):])) + + for i, txtline in enumerate(fh): + if txtline.lstrip().startswith('#'): + commentlines.append(txtline) + else: + if commentlines: + for i in range(len(commentlines) - 1): + out.write(commentlines[i]) + pad_line(commentlines[-1], tabs=tabs) + commentlines = [] + pad_line(txtline, tabs=tabs) + out.close() + + +def fieldcount(infile): + fieldcnt = 0 + with open(infile, 'r') as fh: + for i, line in enumerate(fh): + fieldcnt = max(fieldcnt, len(line.rstrip('\r\n').split('\t'))) + return fieldcnt + + +def tsvname(infile): + return re.sub('.txt$', '', infile) + '.tsv' + + +def __main__(): + parser = argparse.ArgumentParser( + description='Pad a file with TABS for equal field size across lines') + parser.add_argument( + '-i', '--input', help='input file') + parser.add_argument( + '-o', '--output', help='output file') + parser.add_argument( + 'files', nargs='*', help='.txt files') + args = parser.parse_args() + + if args.input: + outfile = args.output if args.output else tsvname(args.input) + fieldcnt = fieldcount(args.input) + padfile(args.input, outfile, fieldcnt=fieldcnt) + for infile in args.files: + outfile = tsvname(infile) + fieldcnt = fieldcount(infile) + padfile(infile, outfile, fieldcnt=fieldcnt) + + +if __name__ == "__main__": + __main__()