diff find_intervals.py @ 17:a3af29edcce2

Uploaded Miller Lab Devshed version a51c894f5bed
author miller-lab
date Fri, 28 Sep 2012 11:57:18 -0400
parents 2c498d40ecde
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/find_intervals.py	Fri Sep 28 11:57:18 2012 -0400
@@ -0,0 +1,115 @@
+#!/usr/bin/env python
+
+import errno
+import os
+import subprocess
+import sys
+
+################################################################################
+
+def mkdir_p(path):
+    try:
+        os.makedirs(path)
+    except OSError, e:
+        if e.errno <> errno.EEXIST:
+            raise
+
+def run_program(prog, args, stdout_file=None):
+    #print "args:", ' '.join(args)
+    p = subprocess.Popen(args, bufsize=-1, executable=prog, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    (stdoutdata, stderrdata) = p.communicate()
+    rc = p.returncode
+
+    if stdout_file is not None:
+        with open(stdout_file, 'w') as ofh:
+            print >> ofh, stdoutdata.rstrip('\r\n')
+
+    if rc != 0:
+        print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args))
+        print >> sys.stderr, stderrdata
+        sys.exit(1)
+
+################################################################################
+
+if len(sys.argv) != 11:
+    print "usage"
+    sys.exit(1)
+
+input, dbkey, output, output_files_path, chrom_col, pos_col, score_col, shuffles, cutoff, report_snps = sys.argv[1:11]
+
+prog = 'sweep'
+
+args = [ prog ]
+args.append(input)
+args.append(chrom_col)
+args.append(pos_col)
+args.append(score_col)
+args.append(cutoff)
+args.append(shuffles)
+args.append(report_snps)
+
+run_program(None, args, stdout_file=output)
+
+if report_snps == "0":
+    sys.exit(0)
+
+################################################################################
+
+mkdir_p(output_files_path)
+
+bedgraph_filename = 'bedgraph.txt'
+links_filename = os.path.join(output_files_path, 'links.txt')
+
+data = []
+links_data = []
+    
+with open(output) as fh:
+    chrom = None
+    for line in fh:
+        line = line.rstrip('\r\n')
+        if not line:
+            continue
+        if line[0] != ' ':
+            # chrom line, add a link
+            chrom, interval_begin, interval_end, interval_value = line.split('\t')
+            links_data.append((chrom, int(interval_begin), int(interval_end)))
+        else:
+            # data line, add a bedgraph line
+            begin, value = line.split()
+            data.append((chrom, int(begin), value))
+
+with open(bedgraph_filename, 'w') as ofh:
+    print >> ofh, 'track type=bedGraph'
+    for chrom, begin, value in sorted(data):
+        print >> ofh, chrom, begin, begin+1, value
+
+with open(links_filename, 'w') as ofh:
+    for chrom, begin, end in sorted(links_data):
+        print >> ofh, chrom, begin, end
+
+################################################################################
+
+chrom_sizes_filename = '{0}.chrom.sizes'.format(dbkey)
+
+prog = 'fetchChromSizes'
+
+args = [ prog ]
+args.append(dbkey)
+
+run_program(None, args, stdout_file=chrom_sizes_filename)
+
+################################################################################
+
+prog = 'bedGraphToBigWig'
+
+args = [ prog ]
+args.append(bedgraph_filename)
+args.append(chrom_sizes_filename)
+args.append(output)
+
+run_program(None, args)
+
+################################################################################
+
+sys.exit(0)
+