annotate tools/new_operations/get_flanks.py @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 #!/usr/bin/env python
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2 #Done by: Guru
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 Get Flanking regions.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 usage: %prog input out_file size direction region
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 -l, --cols=N,N,N,N: Columns for chrom, start, end, strand in file
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 -o, --off=N: Offset
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 import sys, re, os
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 from galaxy import eggs
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 import pkg_resources; pkg_resources.require( "bx-python" )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 from bx.cookbook import doc_optparse
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 from galaxy.tools.util.galaxyops import *
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 def stop_err( msg ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 sys.stderr.write( msg )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 sys.exit()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 def main():
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 if int( sys.argv[3] ) < 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 raise Exception
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 stop_err( "Length of flanking region(s) must be a non-negative integer." )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 # Parsing Command Line here
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 options, args = doc_optparse.parse( __doc__ )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 inp_file, out_file, size, direction, region = args
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 if strand_col_1 <= 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 strand = "+" #if strand is not defined, default it to +
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 stop_err( "Metadata issue, correct the metadata attributes by clicking on the pencil icon in the history item." )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 offset = int(options.off)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 size = int(size)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 stop_err( "Invalid offset or length entered. Try again by entering valid integer values." )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 fo = open(out_file,'w')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 skipped_lines = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 first_invalid_line = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 invalid_line = None
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 elems = []
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 j=0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 for i, line in enumerate( file( inp_file ) ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 line = line.strip()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 if line and (not line.startswith( '#' )) and line != '':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 j+=1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 elems = line.split('\t')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57 #if the start and/or end columns are not numbers, skip that line.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58 assert int(elems[start_col_1])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 assert int(elems[end_col_1])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 if strand_col_1 != -1:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 strand = elems[strand_col_1]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62 #if the stand value is not + or -, skip that line.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63 assert strand in ['+', '-']
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64 if direction == 'Upstream':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65 if strand == '+':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
66 if region == 'end':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
67 elems[end_col_1] = str(int(elems[end_col_1]) + offset)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
68 elems[start_col_1] = str( int(elems[end_col_1]) - size )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
69 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
70 elems[end_col_1] = str(int(elems[start_col_1]) + offset)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
71 elems[start_col_1] = str( int(elems[end_col_1]) - size )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
72 elif strand == '-':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
73 if region == 'end':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
74 elems[start_col_1] = str(int(elems[start_col_1]) - offset)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
75 elems[end_col_1] = str(int(elems[start_col_1]) + size)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
76 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
77 elems[start_col_1] = str(int(elems[end_col_1]) - offset)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
78 elems[end_col_1] = str(int(elems[start_col_1]) + size)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
79 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
80 fo.write( "%s\n" % '\t'.join( elems ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
81
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
82 elif direction == 'Downstream':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
83 if strand == '-':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
84 if region == 'start':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
85 elems[end_col_1] = str(int(elems[end_col_1]) - offset)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
86 elems[start_col_1] = str( int(elems[end_col_1]) - size )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
87 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
88 elems[end_col_1] = str(int(elems[start_col_1]) - offset)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
89 elems[start_col_1] = str( int(elems[end_col_1]) - size )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
90 elif strand == '+':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
91 if region == 'start':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
92 elems[start_col_1] = str(int(elems[start_col_1]) + offset)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
93 elems[end_col_1] = str(int(elems[start_col_1]) + size)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
94 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
95 elems[start_col_1] = str(int(elems[end_col_1]) + offset)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
96 elems[end_col_1] = str(int(elems[start_col_1]) + size)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
97 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
98 fo.write( "%s\n" % '\t'.join( elems ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
99
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
100 elif direction == 'Both':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
101 if strand == '-':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
102 if region == 'start':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
103 start = str(int(elems[end_col_1]) - offset)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
104 end1 = str(int(start) + size)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
105 end2 = str(int(start) - size)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
106 elems[start_col_1]=start
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
107 elems[end_col_1]=end1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
108 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
109 fo.write( "%s\n" % '\t'.join( elems ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
110 elems[start_col_1]=end2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
111 elems[end_col_1]=start
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
112 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
113 fo.write( "%s\n" % '\t'.join( elems ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
114 elif region == 'end':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
115 start = str(int(elems[start_col_1]) - offset)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
116 end1 = str(int(start) + size)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
117 end2 = str(int(start) - size)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
118 elems[start_col_1]=start
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
119 elems[end_col_1]=end1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
120 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
121 fo.write( "%s\n" % '\t'.join( elems ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
122 elems[start_col_1]=end2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
123 elems[end_col_1]=start
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
124 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
125 fo.write( "%s\n" % '\t'.join( elems ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
126 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
127 start1 = str(int(elems[end_col_1]) - offset)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
128 end1 = str(int(start1) + size)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
129 start2 = str(int(elems[start_col_1]) - offset)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
130 end2 = str(int(start2) - size)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
131 elems[start_col_1]=start1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
132 elems[end_col_1]=end1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
133 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
134 fo.write( "%s\n" % '\t'.join( elems ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
135 elems[start_col_1]=end2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
136 elems[end_col_1]=start2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
137 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
138 fo.write( "%s\n" % '\t'.join( elems ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
139 elif strand == '+':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
140 if region == 'start':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
141 start = str(int(elems[start_col_1]) + offset)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
142 end1 = str(int(start) - size)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
143 end2 = str(int(start) + size)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
144 elems[start_col_1]=end1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
145 elems[end_col_1]=start
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
146 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
147 fo.write( "%s\n" % '\t'.join( elems ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
148 elems[start_col_1]=start
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
149 elems[end_col_1]=end2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
150 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
151 fo.write( "%s\n" % '\t'.join( elems ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
152 elif region == 'end':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
153 start = str(int(elems[end_col_1]) + offset)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
154 end1 = str(int(start) - size)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
155 end2 = str(int(start) + size)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
156 elems[start_col_1]=end1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
157 elems[end_col_1]=start
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
158 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
159 fo.write( "%s\n" % '\t'.join( elems ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
160 elems[start_col_1]=start
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
161 elems[end_col_1]=end2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
162 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
163 fo.write( "%s\n" % '\t'.join( elems ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
164 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
165 start1 = str(int(elems[start_col_1]) + offset)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
166 end1 = str(int(start1) - size)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
167 start2 = str(int(elems[end_col_1]) + offset)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
168 end2 = str(int(start2) + size)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
169 elems[start_col_1]=end1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
170 elems[end_col_1]=start1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
171 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
172 fo.write( "%s\n" % '\t'.join( elems ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
173 elems[start_col_1]=start2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
174 elems[end_col_1]=end2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
175 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
176 fo.write( "%s\n" % '\t'.join( elems ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
177 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
178 skipped_lines += 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
179 if not invalid_line:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
180 first_invalid_line = i + 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
181 invalid_line = line
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
182 fo.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
183
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
184 if skipped_lines == j:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
185 stop_err( "Data issue: click the pencil icon in the history item to correct the metadata attributes." )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
186 if skipped_lines > 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
187 print 'Skipped %d invalid lines starting with #%dL "%s"' % ( skipped_lines, first_invalid_line, invalid_line )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
188 print 'Location: %s, Region: %s, Flank-length: %d, Offset: %d ' %( direction, region, size, offset )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
189
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
190 if __name__ == "__main__":
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
191 main()