diff test-data/util/shrink_simple_tab.py @ 4:cdd90678004a draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
author iuc
date Fri, 14 Dec 2018 12:47:19 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/util/shrink_simple_tab.py	Fri Dec 14 12:47:19 2018 -0500
@@ -0,0 +1,61 @@
+from __future__ import print_function
+
+import argparse
+from functools import partial
+
+
+def keep_line(line, pos_cols, region):
+    fields = line.rstrip().split(b'\t')
+    if fields[pos_cols[0]] == region[0]:  # same chromosome
+        if (
+            region[1] < int(fields[pos_cols[1]]) < region[2]
+        ) or (
+            region[1] < int(fields[pos_cols[2]]) < region[2]
+        ):
+            return True
+
+
+def main(infile, ofile, num_header_lines):
+    print(infile, '->', ofile)
+    with open(infile, 'rb') as i:
+        with open(ofile, 'wb') as o:
+            # copy header lines
+            for c in range(num_header_lines):
+                o.write(next(i))
+            for line in i:
+                if keep_line(line):
+                    o.write(line)
+
+
+if __name__ == '__main__':
+    p = argparse.ArgumentParser()
+    p.add_argument('infile')
+    p.add_argument(
+        '-r', '--region',
+        required=True,
+        help='the region of the input file to rewrite'
+    )
+    p.add_argument(
+        '-o', '--ofile',
+        required=True,
+        help="the name of the output file"
+    )
+    p.add_argument(
+        '-c', '--cols',
+        nargs=3, type=int, required=True,
+        help="the columns of the input file specifying chrom, start and stop, "
+             "respectively"
+    )
+    p.add_argument(
+        '-n', '--num-header-lines',
+        type=int, default=0,
+        help='the number of header lines present in the input; These will '
+             'always be copied over to the new file.'
+    )
+    args = vars(p.parse_args())
+
+    chrom, reg = args['region'].split(':')
+    region = [chrom.encode()] + [int(x) for x in reg.split('-')]
+    keep_line = partial(keep_line, pos_cols=args['cols'], region=region)
+
+    main(args['infile'], args['ofile'], args['num_header_lines'])