comparison tabpad.py @ 0:13192095fd5a draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
author iuc
date Tue, 10 Dec 2019 16:04:22 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:13192095fd5a
1 #!/usr/bin/env python
2
3 import argparse
4 import re
5
6
7 def padfile(infile, outfile, fieldcnt=None):
8 with open(infile, 'r') as fh:
9 out = open(outfile, 'w')
10 commentlines = []
11 tabs = '\t' * fieldcnt if fieldcnt is not None else None
12
13 def pad_line(txtline, tabs=None):
14 line = txtline.rstrip('\r\n')
15 fields = line.split('\t')
16 if not tabs:
17 tabs = '\t' * len(fields)
18 out.write('%s%s\n' % (line, tabs[len(fields):]))
19
20 for i, txtline in enumerate(fh):
21 if txtline.lstrip().startswith('#'):
22 commentlines.append(txtline)
23 else:
24 if commentlines:
25 for i in range(len(commentlines) - 1):
26 out.write(commentlines[i])
27 pad_line(commentlines[-1], tabs=tabs)
28 commentlines = []
29 pad_line(txtline, tabs=tabs)
30 out.close()
31
32
33 def fieldcount(infile):
34 fieldcnt = 0
35 with open(infile, 'r') as fh:
36 for i, line in enumerate(fh):
37 fieldcnt = max(fieldcnt, len(line.rstrip('\r\n').split('\t')))
38 return fieldcnt
39
40
41 def tsvname(infile):
42 return re.sub('.txt$', '', infile) + '.tsv'
43
44
45 def __main__():
46 parser = argparse.ArgumentParser(
47 description='Pad a file with TABS for equal field size across lines')
48 parser.add_argument(
49 '-i', '--input', help='input file')
50 parser.add_argument(
51 '-o', '--output', help='output file')
52 parser.add_argument(
53 'files', nargs='*', help='.txt files')
54 args = parser.parse_args()
55
56 if args.input:
57 outfile = args.output if args.output else tsvname(args.input)
58 fieldcnt = fieldcount(args.input)
59 padfile(args.input, outfile, fieldcnt=fieldcnt)
60 for infile in args.files:
61 outfile = tsvname(infile)
62 fieldcnt = fieldcount(infile)
63 padfile(infile, outfile, fieldcnt=fieldcnt)
64
65
66 if __name__ == "__main__":
67 __main__()