diff tabpad.py @ 0:13192095fd5a draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
author iuc
date Tue, 10 Dec 2019 16:04:22 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tabpad.py	Tue Dec 10 16:04:22 2019 -0500
@@ -0,0 +1,67 @@
+#!/usr/bin/env python
+
+import argparse
+import re
+
+
+def padfile(infile, outfile, fieldcnt=None):
+    with open(infile, 'r') as fh:
+        out = open(outfile, 'w')
+        commentlines = []
+        tabs = '\t' * fieldcnt if fieldcnt is not None else None
+
+        def pad_line(txtline, tabs=None):
+            line = txtline.rstrip('\r\n')
+            fields = line.split('\t')
+            if not tabs:
+                tabs = '\t' * len(fields)
+            out.write('%s%s\n' % (line, tabs[len(fields):]))
+
+        for i, txtline in enumerate(fh):
+            if txtline.lstrip().startswith('#'):
+                commentlines.append(txtline)
+            else:
+                if commentlines:
+                    for i in range(len(commentlines) - 1):
+                        out.write(commentlines[i])
+                    pad_line(commentlines[-1], tabs=tabs)
+                    commentlines = []
+                pad_line(txtline, tabs=tabs)
+        out.close()
+
+
+def fieldcount(infile):
+    fieldcnt = 0
+    with open(infile, 'r') as fh:
+        for i, line in enumerate(fh):
+            fieldcnt = max(fieldcnt, len(line.rstrip('\r\n').split('\t')))
+    return fieldcnt
+
+
+def tsvname(infile):
+    return re.sub('.txt$', '', infile) + '.tsv'
+
+
+def __main__():
+    parser = argparse.ArgumentParser(
+        description='Pad a file with TABS for equal field size across lines')
+    parser.add_argument(
+        '-i', '--input', help='input file')
+    parser.add_argument(
+        '-o', '--output', help='output file')
+    parser.add_argument(
+        'files', nargs='*', help='.txt files')
+    args = parser.parse_args()
+
+    if args.input:
+        outfile = args.output if args.output else tsvname(args.input)
+        fieldcnt = fieldcount(args.input)
+        padfile(args.input, outfile, fieldcnt=fieldcnt)
+    for infile in args.files:
+        outfile = tsvname(infile)
+        fieldcnt = fieldcount(infile)
+        padfile(infile, outfile, fieldcnt=fieldcnt)
+
+
+if __name__ == "__main__":
+    __main__()