Mercurial > repos > jjohnson > contig_annotation_tool
annotate tabpad.py @ 0:ad7507073c3f draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
author | jjohnson |
---|---|
date | Sun, 24 Nov 2019 21:56:00 -0500 |
parents | |
children | 9a01840eac52 |
rev | line source |
---|---|
0
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
1 #!/usr/bin/env python |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
2 |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
3 import argparse |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
4 import re |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
5 |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
6 |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
7 def padfile(infile, outfile, fieldcnt=None): |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
8 with open(infile, 'r') as fh: |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
9 out = open(outfile, 'w') |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
10 tabs = '\t' * fieldcnt if fieldcnt is not None else None |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
11 for i, txtline in enumerate(fh): |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
12 line = txtline.rstrip('\r\n') |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
13 fields = line.split('\t') |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
14 if not tabs: |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
15 tabs = '\t' * len(fields) |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
16 out.write('%s%s\n' % (line, tabs[len(fields):])) |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
17 out.close() |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
18 |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
19 |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
20 def fieldcount(infile): |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
21 fieldcnt = 0 |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
22 with open(infile, 'r') as fh: |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
23 for i, line in enumerate(fh): |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
24 fieldcnt = max(fieldcnt, len(line.rstrip('\r\n').split('\t'))) |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
25 return fieldcnt |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
26 |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
27 |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
28 def tsvname(infile): |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
29 return re.sub('\.txt$', '', infile) + '.tsv' |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
30 |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
31 |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
32 def __main__(): |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
33 parser = argparse.ArgumentParser( |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
34 description='Pad a file with TABS for equal field size across lines') |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
35 parser.add_argument( |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
36 '-i', '--input', help='input file') |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
37 parser.add_argument( |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
38 '-o', '--output', help='output file') |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
39 parser.add_argument( |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
40 'files', nargs='*', help='.txt files') |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
41 args = parser.parse_args() |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
42 |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
43 if args.input: |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
44 outfile = args.output if args.output else tsvname(args.input) |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
45 fieldcnt = fieldcount(args.input) |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
46 padfile(args.input, outfile, fieldcnt=fieldcnt) |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
47 for infile in args.files: |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
48 outfile = tsvname(infile) |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
49 fieldcnt = fieldcount(infile) |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
50 padfile(infile, outfile, fieldcnt=fieldcnt) |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
51 |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
52 |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
53 if __name__ == "__main__": |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
54 __main__() |