comparison tools/new_operations/get_flanks.py @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:9071e359b9a3
1 #!/usr/bin/env python
2 #Done by: Guru
3
4 """
5 Get Flanking regions.
6
7 usage: %prog input out_file size direction region
8 -l, --cols=N,N,N,N: Columns for chrom, start, end, strand in file
9 -o, --off=N: Offset
10 """
11
12 import sys, re, os
13 from galaxy import eggs
14 import pkg_resources; pkg_resources.require( "bx-python" )
15 from bx.cookbook import doc_optparse
16 from galaxy.tools.util.galaxyops import *
17
18 def stop_err( msg ):
19 sys.stderr.write( msg )
20 sys.exit()
21
22 def main():
23 try:
24 if int( sys.argv[3] ) < 0:
25 raise Exception
26 except:
27 stop_err( "Length of flanking region(s) must be a non-negative integer." )
28
29 # Parsing Command Line here
30 options, args = doc_optparse.parse( __doc__ )
31 try:
32 chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols )
33 inp_file, out_file, size, direction, region = args
34 if strand_col_1 <= 0:
35 strand = "+" #if strand is not defined, default it to +
36 except:
37 stop_err( "Metadata issue, correct the metadata attributes by clicking on the pencil icon in the history item." )
38 try:
39 offset = int(options.off)
40 size = int(size)
41 except:
42 stop_err( "Invalid offset or length entered. Try again by entering valid integer values." )
43
44 fo = open(out_file,'w')
45
46 skipped_lines = 0
47 first_invalid_line = 0
48 invalid_line = None
49 elems = []
50 j=0
51 for i, line in enumerate( file( inp_file ) ):
52 line = line.strip()
53 if line and (not line.startswith( '#' )) and line != '':
54 j+=1
55 try:
56 elems = line.split('\t')
57 #if the start and/or end columns are not numbers, skip that line.
58 assert int(elems[start_col_1])
59 assert int(elems[end_col_1])
60 if strand_col_1 != -1:
61 strand = elems[strand_col_1]
62 #if the stand value is not + or -, skip that line.
63 assert strand in ['+', '-']
64 if direction == 'Upstream':
65 if strand == '+':
66 if region == 'end':
67 elems[end_col_1] = str(int(elems[end_col_1]) + offset)
68 elems[start_col_1] = str( int(elems[end_col_1]) - size )
69 else:
70 elems[end_col_1] = str(int(elems[start_col_1]) + offset)
71 elems[start_col_1] = str( int(elems[end_col_1]) - size )
72 elif strand == '-':
73 if region == 'end':
74 elems[start_col_1] = str(int(elems[start_col_1]) - offset)
75 elems[end_col_1] = str(int(elems[start_col_1]) + size)
76 else:
77 elems[start_col_1] = str(int(elems[end_col_1]) - offset)
78 elems[end_col_1] = str(int(elems[start_col_1]) + size)
79 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
80 fo.write( "%s\n" % '\t'.join( elems ) )
81
82 elif direction == 'Downstream':
83 if strand == '-':
84 if region == 'start':
85 elems[end_col_1] = str(int(elems[end_col_1]) - offset)
86 elems[start_col_1] = str( int(elems[end_col_1]) - size )
87 else:
88 elems[end_col_1] = str(int(elems[start_col_1]) - offset)
89 elems[start_col_1] = str( int(elems[end_col_1]) - size )
90 elif strand == '+':
91 if region == 'start':
92 elems[start_col_1] = str(int(elems[start_col_1]) + offset)
93 elems[end_col_1] = str(int(elems[start_col_1]) + size)
94 else:
95 elems[start_col_1] = str(int(elems[end_col_1]) + offset)
96 elems[end_col_1] = str(int(elems[start_col_1]) + size)
97 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
98 fo.write( "%s\n" % '\t'.join( elems ) )
99
100 elif direction == 'Both':
101 if strand == '-':
102 if region == 'start':
103 start = str(int(elems[end_col_1]) - offset)
104 end1 = str(int(start) + size)
105 end2 = str(int(start) - size)
106 elems[start_col_1]=start
107 elems[end_col_1]=end1
108 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
109 fo.write( "%s\n" % '\t'.join( elems ) )
110 elems[start_col_1]=end2
111 elems[end_col_1]=start
112 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
113 fo.write( "%s\n" % '\t'.join( elems ) )
114 elif region == 'end':
115 start = str(int(elems[start_col_1]) - offset)
116 end1 = str(int(start) + size)
117 end2 = str(int(start) - size)
118 elems[start_col_1]=start
119 elems[end_col_1]=end1
120 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
121 fo.write( "%s\n" % '\t'.join( elems ) )
122 elems[start_col_1]=end2
123 elems[end_col_1]=start
124 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
125 fo.write( "%s\n" % '\t'.join( elems ) )
126 else:
127 start1 = str(int(elems[end_col_1]) - offset)
128 end1 = str(int(start1) + size)
129 start2 = str(int(elems[start_col_1]) - offset)
130 end2 = str(int(start2) - size)
131 elems[start_col_1]=start1
132 elems[end_col_1]=end1
133 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
134 fo.write( "%s\n" % '\t'.join( elems ) )
135 elems[start_col_1]=end2
136 elems[end_col_1]=start2
137 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
138 fo.write( "%s\n" % '\t'.join( elems ) )
139 elif strand == '+':
140 if region == 'start':
141 start = str(int(elems[start_col_1]) + offset)
142 end1 = str(int(start) - size)
143 end2 = str(int(start) + size)
144 elems[start_col_1]=end1
145 elems[end_col_1]=start
146 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
147 fo.write( "%s\n" % '\t'.join( elems ) )
148 elems[start_col_1]=start
149 elems[end_col_1]=end2
150 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
151 fo.write( "%s\n" % '\t'.join( elems ) )
152 elif region == 'end':
153 start = str(int(elems[end_col_1]) + offset)
154 end1 = str(int(start) - size)
155 end2 = str(int(start) + size)
156 elems[start_col_1]=end1
157 elems[end_col_1]=start
158 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
159 fo.write( "%s\n" % '\t'.join( elems ) )
160 elems[start_col_1]=start
161 elems[end_col_1]=end2
162 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
163 fo.write( "%s\n" % '\t'.join( elems ) )
164 else:
165 start1 = str(int(elems[start_col_1]) + offset)
166 end1 = str(int(start1) - size)
167 start2 = str(int(elems[end_col_1]) + offset)
168 end2 = str(int(start2) + size)
169 elems[start_col_1]=end1
170 elems[end_col_1]=start1
171 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
172 fo.write( "%s\n" % '\t'.join( elems ) )
173 elems[start_col_1]=start2
174 elems[end_col_1]=end2
175 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
176 fo.write( "%s\n" % '\t'.join( elems ) )
177 except:
178 skipped_lines += 1
179 if not invalid_line:
180 first_invalid_line = i + 1
181 invalid_line = line
182 fo.close()
183
184 if skipped_lines == j:
185 stop_err( "Data issue: click the pencil icon in the history item to correct the metadata attributes." )
186 if skipped_lines > 0:
187 print 'Skipped %d invalid lines starting with #%dL "%s"' % ( skipped_lines, first_invalid_line, invalid_line )
188 print 'Location: %s, Region: %s, Flank-length: %d, Offset: %d ' %( direction, region, size, offset )
189
190 if __name__ == "__main__":
191 main()