annotate find_intervals.py @ 17:a3af29edcce2

Uploaded Miller Lab Devshed version a51c894f5bed
author miller-lab
date Fri, 28 Sep 2012 11:57:18 -0400
parents 2c498d40ecde
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
1 #!/usr/bin/env python
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
2
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
3 import errno
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
4 import os
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
5 import subprocess
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
6 import sys
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
7
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
8 ################################################################################
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
9
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
10 def mkdir_p(path):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
11 try:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
12 os.makedirs(path)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
13 except OSError, e:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
14 if e.errno <> errno.EEXIST:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
15 raise
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
16
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
17 def run_program(prog, args, stdout_file=None):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
18 #print "args:", ' '.join(args)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
19 p = subprocess.Popen(args, bufsize=-1, executable=prog, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
20 (stdoutdata, stderrdata) = p.communicate()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
21 rc = p.returncode
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
22
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
23 if stdout_file is not None:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
24 with open(stdout_file, 'w') as ofh:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
25 print >> ofh, stdoutdata.rstrip('\r\n')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
26
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
27 if rc != 0:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
28 print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args))
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
29 print >> sys.stderr, stderrdata
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
30 sys.exit(1)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
31
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
32 ################################################################################
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
33
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
34 if len(sys.argv) != 11:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
35 print "usage"
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
36 sys.exit(1)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
37
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
38 input, dbkey, output, output_files_path, chrom_col, pos_col, score_col, shuffles, cutoff, report_snps = sys.argv[1:11]
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
39
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
40 prog = 'sweep'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
41
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
42 args = [ prog ]
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
43 args.append(input)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
44 args.append(chrom_col)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
45 args.append(pos_col)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
46 args.append(score_col)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
47 args.append(cutoff)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
48 args.append(shuffles)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
49 args.append(report_snps)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
50
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
51 run_program(None, args, stdout_file=output)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
52
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
53 if report_snps == "0":
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
54 sys.exit(0)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
55
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
56 ################################################################################
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
57
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
58 mkdir_p(output_files_path)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
59
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
60 bedgraph_filename = 'bedgraph.txt'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
61 links_filename = os.path.join(output_files_path, 'links.txt')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
62
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
63 data = []
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
64 links_data = []
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
65
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
66 with open(output) as fh:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
67 chrom = None
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
68 for line in fh:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
69 line = line.rstrip('\r\n')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
70 if not line:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
71 continue
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
72 if line[0] != ' ':
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
73 # chrom line, add a link
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
74 chrom, interval_begin, interval_end, interval_value = line.split('\t')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
75 links_data.append((chrom, int(interval_begin), int(interval_end)))
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
76 else:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
77 # data line, add a bedgraph line
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
78 begin, value = line.split()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
79 data.append((chrom, int(begin), value))
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
80
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
81 with open(bedgraph_filename, 'w') as ofh:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
82 print >> ofh, 'track type=bedGraph'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
83 for chrom, begin, value in sorted(data):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
84 print >> ofh, chrom, begin, begin+1, value
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
85
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
86 with open(links_filename, 'w') as ofh:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
87 for chrom, begin, end in sorted(links_data):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
88 print >> ofh, chrom, begin, end
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
89
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
90 ################################################################################
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
91
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
92 chrom_sizes_filename = '{0}.chrom.sizes'.format(dbkey)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
93
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
94 prog = 'fetchChromSizes'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
95
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
96 args = [ prog ]
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
97 args.append(dbkey)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
98
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
99 run_program(None, args, stdout_file=chrom_sizes_filename)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
100
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
101 ################################################################################
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
102
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
103 prog = 'bedGraphToBigWig'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
104
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
105 args = [ prog ]
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
106 args.append(bedgraph_filename)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
107 args.append(chrom_sizes_filename)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
108 args.append(output)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
109
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
110 run_program(None, args)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
111
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
112 ################################################################################
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
113
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
114 sys.exit(0)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
115