annotate tabpad.py @ 2:9a01840eac52 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 6b978e61a7f66160f2577d907f52dd641f103986-dirty
author jjohnson
date Mon, 25 Nov 2019 15:09:24 -0500
parents ad7507073c3f
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
1 #!/usr/bin/env python
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
2
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
3 import argparse
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
4 import re
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
5
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
6
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
7 def padfile(infile, outfile, fieldcnt=None):
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
8 with open(infile, 'r') as fh:
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
9 out = open(outfile, 'w')
2
9a01840eac52 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 6b978e61a7f66160f2577d907f52dd641f103986-dirty
jjohnson
parents: 0
diff changeset
10 commentlines = []
0
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
11 tabs = '\t' * fieldcnt if fieldcnt is not None else None
2
9a01840eac52 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 6b978e61a7f66160f2577d907f52dd641f103986-dirty
jjohnson
parents: 0
diff changeset
12 def pad_line(txtline, tabs=None):
0
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
13 line = txtline.rstrip('\r\n')
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
14 fields = line.split('\t')
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
15 if not tabs:
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
16 tabs = '\t' * len(fields)
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
17 out.write('%s%s\n' % (line, tabs[len(fields):]))
2
9a01840eac52 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 6b978e61a7f66160f2577d907f52dd641f103986-dirty
jjohnson
parents: 0
diff changeset
18 for i, txtline in enumerate(fh):
9a01840eac52 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 6b978e61a7f66160f2577d907f52dd641f103986-dirty
jjohnson
parents: 0
diff changeset
19 if txtline.lstrip().startswith('#'):
9a01840eac52 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 6b978e61a7f66160f2577d907f52dd641f103986-dirty
jjohnson
parents: 0
diff changeset
20 commentlines.append(txtline)
9a01840eac52 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 6b978e61a7f66160f2577d907f52dd641f103986-dirty
jjohnson
parents: 0
diff changeset
21 else:
9a01840eac52 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 6b978e61a7f66160f2577d907f52dd641f103986-dirty
jjohnson
parents: 0
diff changeset
22 if commentlines:
9a01840eac52 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 6b978e61a7f66160f2577d907f52dd641f103986-dirty
jjohnson
parents: 0
diff changeset
23 for i in range(len(commentlines)-1):
9a01840eac52 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 6b978e61a7f66160f2577d907f52dd641f103986-dirty
jjohnson
parents: 0
diff changeset
24 out.write(commentlines[i])
9a01840eac52 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 6b978e61a7f66160f2577d907f52dd641f103986-dirty
jjohnson
parents: 0
diff changeset
25 pad_line(commentlines[-1], tabs=tabs)
9a01840eac52 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 6b978e61a7f66160f2577d907f52dd641f103986-dirty
jjohnson
parents: 0
diff changeset
26 commentlines = []
9a01840eac52 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 6b978e61a7f66160f2577d907f52dd641f103986-dirty
jjohnson
parents: 0
diff changeset
27 pad_line(txtline, tabs=tabs)
0
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
28 out.close()
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
29
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
30
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
31 def fieldcount(infile):
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
32 fieldcnt = 0
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
33 with open(infile, 'r') as fh:
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
34 for i, line in enumerate(fh):
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
35 fieldcnt = max(fieldcnt, len(line.rstrip('\r\n').split('\t')))
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
36 return fieldcnt
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
37
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
38
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
39 def tsvname(infile):
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
40 return re.sub('\.txt$', '', infile) + '.tsv'
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
41
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
42
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
43 def __main__():
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
44 parser = argparse.ArgumentParser(
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
45 description='Pad a file with TABS for equal field size across lines')
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
46 parser.add_argument(
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
47 '-i', '--input', help='input file')
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
48 parser.add_argument(
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
49 '-o', '--output', help='output file')
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
50 parser.add_argument(
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
51 'files', nargs='*', help='.txt files')
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
52 args = parser.parse_args()
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
53
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
54 if args.input:
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
55 outfile = args.output if args.output else tsvname(args.input)
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
56 fieldcnt = fieldcount(args.input)
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
57 padfile(args.input, outfile, fieldcnt=fieldcnt)
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
58 for infile in args.files:
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
59 outfile = tsvname(infile)
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
60 fieldcnt = fieldcount(infile)
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
61 padfile(infile, outfile, fieldcnt=fieldcnt)
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
62
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
63
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
64 if __name__ == "__main__":
ad7507073c3f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
65 __main__()