0
|
1 #!/usr/bin/env python
|
|
2 #Done by: Guru
|
|
3
|
|
4 """
|
|
5 Get Flanking regions.
|
|
6
|
|
7 usage: %prog input out_file size direction region
|
|
8 -l, --cols=N,N,N,N: Columns for chrom, start, end, strand in file
|
|
9 -o, --off=N: Offset
|
|
10 """
|
|
11
|
|
12 import sys, re, os
|
|
13 from galaxy import eggs
|
|
14 import pkg_resources; pkg_resources.require( "bx-python" )
|
|
15 from bx.cookbook import doc_optparse
|
|
16 from galaxy.tools.util.galaxyops import *
|
|
17
|
|
18 def stop_err( msg ):
|
|
19 sys.stderr.write( msg )
|
|
20 sys.exit()
|
|
21
|
|
22 def main():
|
|
23 try:
|
|
24 if int( sys.argv[3] ) < 0:
|
|
25 raise Exception
|
|
26 except:
|
|
27 stop_err( "Length of flanking region(s) must be a non-negative integer." )
|
|
28
|
|
29 # Parsing Command Line here
|
|
30 options, args = doc_optparse.parse( __doc__ )
|
|
31 try:
|
|
32 chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols )
|
|
33 inp_file, out_file, size, direction, region = args
|
|
34 if strand_col_1 <= 0:
|
|
35 strand = "+" #if strand is not defined, default it to +
|
|
36 except:
|
|
37 stop_err( "Metadata issue, correct the metadata attributes by clicking on the pencil icon in the history item." )
|
|
38 try:
|
|
39 offset = int(options.off)
|
|
40 size = int(size)
|
|
41 except:
|
|
42 stop_err( "Invalid offset or length entered. Try again by entering valid integer values." )
|
|
43
|
|
44 fo = open(out_file,'w')
|
|
45
|
|
46 skipped_lines = 0
|
|
47 first_invalid_line = 0
|
|
48 invalid_line = None
|
|
49 elems = []
|
|
50 j=0
|
|
51 for i, line in enumerate( file( inp_file ) ):
|
|
52 line = line.strip()
|
|
53 if line and (not line.startswith( '#' )) and line != '':
|
|
54 j+=1
|
|
55 try:
|
|
56 elems = line.split('\t')
|
|
57 #if the start and/or end columns are not numbers, skip that line.
|
|
58 assert int(elems[start_col_1])
|
|
59 assert int(elems[end_col_1])
|
|
60 if strand_col_1 != -1:
|
|
61 strand = elems[strand_col_1]
|
|
62 #if the stand value is not + or -, skip that line.
|
|
63 assert strand in ['+', '-']
|
|
64 if direction == 'Upstream':
|
|
65 if strand == '+':
|
|
66 if region == 'end':
|
|
67 elems[end_col_1] = str(int(elems[end_col_1]) + offset)
|
|
68 elems[start_col_1] = str( int(elems[end_col_1]) - size )
|
|
69 else:
|
|
70 elems[end_col_1] = str(int(elems[start_col_1]) + offset)
|
|
71 elems[start_col_1] = str( int(elems[end_col_1]) - size )
|
|
72 elif strand == '-':
|
|
73 if region == 'end':
|
|
74 elems[start_col_1] = str(int(elems[start_col_1]) - offset)
|
|
75 elems[end_col_1] = str(int(elems[start_col_1]) + size)
|
|
76 else:
|
|
77 elems[start_col_1] = str(int(elems[end_col_1]) - offset)
|
|
78 elems[end_col_1] = str(int(elems[start_col_1]) + size)
|
|
79 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
|
|
80 fo.write( "%s\n" % '\t'.join( elems ) )
|
|
81
|
|
82 elif direction == 'Downstream':
|
|
83 if strand == '-':
|
|
84 if region == 'start':
|
|
85 elems[end_col_1] = str(int(elems[end_col_1]) - offset)
|
|
86 elems[start_col_1] = str( int(elems[end_col_1]) - size )
|
|
87 else:
|
|
88 elems[end_col_1] = str(int(elems[start_col_1]) - offset)
|
|
89 elems[start_col_1] = str( int(elems[end_col_1]) - size )
|
|
90 elif strand == '+':
|
|
91 if region == 'start':
|
|
92 elems[start_col_1] = str(int(elems[start_col_1]) + offset)
|
|
93 elems[end_col_1] = str(int(elems[start_col_1]) + size)
|
|
94 else:
|
|
95 elems[start_col_1] = str(int(elems[end_col_1]) + offset)
|
|
96 elems[end_col_1] = str(int(elems[start_col_1]) + size)
|
|
97 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
|
|
98 fo.write( "%s\n" % '\t'.join( elems ) )
|
|
99
|
|
100 elif direction == 'Both':
|
|
101 if strand == '-':
|
|
102 if region == 'start':
|
|
103 start = str(int(elems[end_col_1]) - offset)
|
|
104 end1 = str(int(start) + size)
|
|
105 end2 = str(int(start) - size)
|
|
106 elems[start_col_1]=start
|
|
107 elems[end_col_1]=end1
|
|
108 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
|
|
109 fo.write( "%s\n" % '\t'.join( elems ) )
|
|
110 elems[start_col_1]=end2
|
|
111 elems[end_col_1]=start
|
|
112 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
|
|
113 fo.write( "%s\n" % '\t'.join( elems ) )
|
|
114 elif region == 'end':
|
|
115 start = str(int(elems[start_col_1]) - offset)
|
|
116 end1 = str(int(start) + size)
|
|
117 end2 = str(int(start) - size)
|
|
118 elems[start_col_1]=start
|
|
119 elems[end_col_1]=end1
|
|
120 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
|
|
121 fo.write( "%s\n" % '\t'.join( elems ) )
|
|
122 elems[start_col_1]=end2
|
|
123 elems[end_col_1]=start
|
|
124 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
|
|
125 fo.write( "%s\n" % '\t'.join( elems ) )
|
|
126 else:
|
|
127 start1 = str(int(elems[end_col_1]) - offset)
|
|
128 end1 = str(int(start1) + size)
|
|
129 start2 = str(int(elems[start_col_1]) - offset)
|
|
130 end2 = str(int(start2) - size)
|
|
131 elems[start_col_1]=start1
|
|
132 elems[end_col_1]=end1
|
|
133 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
|
|
134 fo.write( "%s\n" % '\t'.join( elems ) )
|
|
135 elems[start_col_1]=end2
|
|
136 elems[end_col_1]=start2
|
|
137 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
|
|
138 fo.write( "%s\n" % '\t'.join( elems ) )
|
|
139 elif strand == '+':
|
|
140 if region == 'start':
|
|
141 start = str(int(elems[start_col_1]) + offset)
|
|
142 end1 = str(int(start) - size)
|
|
143 end2 = str(int(start) + size)
|
|
144 elems[start_col_1]=end1
|
|
145 elems[end_col_1]=start
|
|
146 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
|
|
147 fo.write( "%s\n" % '\t'.join( elems ) )
|
|
148 elems[start_col_1]=start
|
|
149 elems[end_col_1]=end2
|
|
150 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
|
|
151 fo.write( "%s\n" % '\t'.join( elems ) )
|
|
152 elif region == 'end':
|
|
153 start = str(int(elems[end_col_1]) + offset)
|
|
154 end1 = str(int(start) - size)
|
|
155 end2 = str(int(start) + size)
|
|
156 elems[start_col_1]=end1
|
|
157 elems[end_col_1]=start
|
|
158 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
|
|
159 fo.write( "%s\n" % '\t'.join( elems ) )
|
|
160 elems[start_col_1]=start
|
|
161 elems[end_col_1]=end2
|
|
162 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
|
|
163 fo.write( "%s\n" % '\t'.join( elems ) )
|
|
164 else:
|
|
165 start1 = str(int(elems[start_col_1]) + offset)
|
|
166 end1 = str(int(start1) - size)
|
|
167 start2 = str(int(elems[end_col_1]) + offset)
|
|
168 end2 = str(int(start2) + size)
|
|
169 elems[start_col_1]=end1
|
|
170 elems[end_col_1]=start1
|
|
171 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
|
|
172 fo.write( "%s\n" % '\t'.join( elems ) )
|
|
173 elems[start_col_1]=start2
|
|
174 elems[end_col_1]=end2
|
|
175 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
|
|
176 fo.write( "%s\n" % '\t'.join( elems ) )
|
|
177 except:
|
|
178 skipped_lines += 1
|
|
179 if not invalid_line:
|
|
180 first_invalid_line = i + 1
|
|
181 invalid_line = line
|
|
182 fo.close()
|
|
183
|
|
184 if skipped_lines == j:
|
|
185 stop_err( "Data issue: click the pencil icon in the history item to correct the metadata attributes." )
|
|
186 if skipped_lines > 0:
|
|
187 print 'Skipped %d invalid lines starting with #%dL "%s"' % ( skipped_lines, first_invalid_line, invalid_line )
|
|
188 print 'Location: %s, Region: %s, Flank-length: %d, Offset: %d ' %( direction, region, size, offset )
|
|
189
|
|
190 if __name__ == "__main__":
|
|
191 main()
|