annotate tools/regVariation/getIndels.py @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 #!/usr/bin/env python
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 Estimate INDELs for pair-wise alignments.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 usage: %prog maf_input out_file1 out_file2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 from __future__ import division
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 from galaxy import eggs
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 import pkg_resources
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 pkg_resources.require( "bx-python" )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 pkg_resources.require("numpy")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 pass
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 import psyco_full
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 import sys
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 from bx.cookbook import doc_optparse
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 from galaxy.tools.exception_handling import *
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 import bx.align.maf
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 assert sys.version_info[:2] >= ( 2, 4 )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 def main():
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 # Parsing Command Line here
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 options, args = doc_optparse.parse( __doc__ )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 inp_file, out_file1 = args
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 print >> sys.stderr, "Tool initialization error."
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 sys.exit()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 fin = open(inp_file,'r')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 print >> sys.stderr, "Unable to open input file"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 sys.exit()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 fout1 = open(out_file1,'w')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 #fout2 = open(out_file2,'w')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 print >> sys.stderr, "Unable to open output file"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 sys.exit()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 maf_reader = bx.align.maf.Reader( open(inp_file, 'r') )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 print >>sys.stderr, "Your MAF file appears to be malformed."
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 sys.exit()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 maf_count = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 print >>fout1, "#Block\tSource\tSeq1_Start\tSeq1_End\tSeq2_Start\tSeq2_End\tIndel_length"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 for block_ind, block in enumerate(maf_reader):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 if len(block.components) < 2:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57 continue
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58 seq1 = block.components[0].text
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 src1 = block.components[0].src
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 start1 = block.components[0].start
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 if len(block.components) == 2:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62 seq2 = block.components[1].text
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63 src2 = block.components[1].src
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64 start2 = block.components[1].start
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65 #for pos in range(len(seq1)):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
66 nt_pos1 = start1-1 #position of the nucleotide (without counting gaps)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
67 nt_pos2 = start2-1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
68 pos = 0 #character column position
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
69 gaplen1 = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
70 gaplen2 = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
71 prev_pos_gap1 = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
72 prev_pos_gap2 = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
73 while pos < len(seq1):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
74 if prev_pos_gap1 == 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
75 gaplen1 = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
76 if prev_pos_gap2 == 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
77 gaplen2 = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
78
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
79 if seq1[pos] == '-':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
80 if seq2[pos] != '-':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
81 nt_pos2 += 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
82 gaplen1 += 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
83 prev_pos_gap1 = 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
84 #write 2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
85 if prev_pos_gap2 == 1:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
86 prev_pos_gap2 = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
87 print >>fout1,"%d\t%s\t%s\t%s\t%s\t%s\t%s" %(block_ind+1,src2,nt_pos1,nt_pos1+1,nt_pos2-1,nt_pos2-1+gaplen2,gaplen2)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
88 if pos == len(seq1)-1:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
89 print >>fout1,"%d\t%s\t%s\t%s\t%s\t%s\t%s" %(block_ind+1,src1,nt_pos1,nt_pos1+1,nt_pos2+1-gaplen1,nt_pos2+1,gaplen1)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
90 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
91 prev_pos_gap1 = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
92 prev_pos_gap2 = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
93 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
94 if prev_pos_gap1 == 1:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
95 prev_pos_gap1 = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
96 print >>fout1,"%d\t%s\t%s\t%s\t%s" %(block_ind+1,src1,nt_pos1-1,nt_pos1,gaplen1)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
97 elif prev_pos_gap2 == 1:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
98 prev_pos_gap2 = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
99 print >>fout1,"%d\t%s\t%s\t%s\t%s" %(block_ind+1,src2,nt_pos2-1,nt_pos2,gaplen2)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
100 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
101 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
102 nt_pos1 += 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
103 if seq2[pos] != '-':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
104 nt_pos2 += 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
105 #write both
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
106 if prev_pos_gap1 == 1:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
107 prev_pos_gap1 = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
108 print >>fout1,"%d\t%s\t%s\t%s\t%s\t%s\t%s" %(block_ind+1,src1,nt_pos1-1,nt_pos1,nt_pos2-gaplen1,nt_pos2,gaplen1)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
109 elif prev_pos_gap2 == 1:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
110 prev_pos_gap2 = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
111 print >>fout1,"%d\t%s\t%s\t%s\t%s\t%s\t%s" %(block_ind+1,src2,nt_pos1-gaplen2,nt_pos1,nt_pos2-1,nt_pos2,gaplen2)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
112 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
113 gaplen2 += 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
114 prev_pos_gap2 = 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
115 #write 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
116 if prev_pos_gap1 == 1:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
117 prev_pos_gap1 = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
118 print >>fout1,"%d\t%s\t%s\t%s\t%s\t%s\t%s" %(block_ind+1,src1,nt_pos1-1,nt_pos1,nt_pos2,nt_pos2+gaplen1,gaplen1)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
119 if pos == len(seq1)-1:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
120 print >>fout1,"%d\t%s\t%s\t%s\t%s\t%s\t%s" %(block_ind+1,src2,nt_pos1+1-gaplen2,nt_pos1+1,nt_pos2,nt_pos2+1,gaplen2)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
121 pos += 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
122 if __name__ == "__main__":
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
123 main()