Mercurial > repos > xuebing > sharplabtool
comparison tools/new_operations/get_flanks.py @ 0:9071e359b9a3
Uploaded
author | xuebing |
---|---|
date | Fri, 09 Mar 2012 19:37:19 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9071e359b9a3 |
---|---|
1 #!/usr/bin/env python | |
2 #Done by: Guru | |
3 | |
4 """ | |
5 Get Flanking regions. | |
6 | |
7 usage: %prog input out_file size direction region | |
8 -l, --cols=N,N,N,N: Columns for chrom, start, end, strand in file | |
9 -o, --off=N: Offset | |
10 """ | |
11 | |
12 import sys, re, os | |
13 from galaxy import eggs | |
14 import pkg_resources; pkg_resources.require( "bx-python" ) | |
15 from bx.cookbook import doc_optparse | |
16 from galaxy.tools.util.galaxyops import * | |
17 | |
18 def stop_err( msg ): | |
19 sys.stderr.write( msg ) | |
20 sys.exit() | |
21 | |
22 def main(): | |
23 try: | |
24 if int( sys.argv[3] ) < 0: | |
25 raise Exception | |
26 except: | |
27 stop_err( "Length of flanking region(s) must be a non-negative integer." ) | |
28 | |
29 # Parsing Command Line here | |
30 options, args = doc_optparse.parse( __doc__ ) | |
31 try: | |
32 chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols ) | |
33 inp_file, out_file, size, direction, region = args | |
34 if strand_col_1 <= 0: | |
35 strand = "+" #if strand is not defined, default it to + | |
36 except: | |
37 stop_err( "Metadata issue, correct the metadata attributes by clicking on the pencil icon in the history item." ) | |
38 try: | |
39 offset = int(options.off) | |
40 size = int(size) | |
41 except: | |
42 stop_err( "Invalid offset or length entered. Try again by entering valid integer values." ) | |
43 | |
44 fo = open(out_file,'w') | |
45 | |
46 skipped_lines = 0 | |
47 first_invalid_line = 0 | |
48 invalid_line = None | |
49 elems = [] | |
50 j=0 | |
51 for i, line in enumerate( file( inp_file ) ): | |
52 line = line.strip() | |
53 if line and (not line.startswith( '#' )) and line != '': | |
54 j+=1 | |
55 try: | |
56 elems = line.split('\t') | |
57 #if the start and/or end columns are not numbers, skip that line. | |
58 assert int(elems[start_col_1]) | |
59 assert int(elems[end_col_1]) | |
60 if strand_col_1 != -1: | |
61 strand = elems[strand_col_1] | |
62 #if the stand value is not + or -, skip that line. | |
63 assert strand in ['+', '-'] | |
64 if direction == 'Upstream': | |
65 if strand == '+': | |
66 if region == 'end': | |
67 elems[end_col_1] = str(int(elems[end_col_1]) + offset) | |
68 elems[start_col_1] = str( int(elems[end_col_1]) - size ) | |
69 else: | |
70 elems[end_col_1] = str(int(elems[start_col_1]) + offset) | |
71 elems[start_col_1] = str( int(elems[end_col_1]) - size ) | |
72 elif strand == '-': | |
73 if region == 'end': | |
74 elems[start_col_1] = str(int(elems[start_col_1]) - offset) | |
75 elems[end_col_1] = str(int(elems[start_col_1]) + size) | |
76 else: | |
77 elems[start_col_1] = str(int(elems[end_col_1]) - offset) | |
78 elems[end_col_1] = str(int(elems[start_col_1]) + size) | |
79 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
80 fo.write( "%s\n" % '\t'.join( elems ) ) | |
81 | |
82 elif direction == 'Downstream': | |
83 if strand == '-': | |
84 if region == 'start': | |
85 elems[end_col_1] = str(int(elems[end_col_1]) - offset) | |
86 elems[start_col_1] = str( int(elems[end_col_1]) - size ) | |
87 else: | |
88 elems[end_col_1] = str(int(elems[start_col_1]) - offset) | |
89 elems[start_col_1] = str( int(elems[end_col_1]) - size ) | |
90 elif strand == '+': | |
91 if region == 'start': | |
92 elems[start_col_1] = str(int(elems[start_col_1]) + offset) | |
93 elems[end_col_1] = str(int(elems[start_col_1]) + size) | |
94 else: | |
95 elems[start_col_1] = str(int(elems[end_col_1]) + offset) | |
96 elems[end_col_1] = str(int(elems[start_col_1]) + size) | |
97 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
98 fo.write( "%s\n" % '\t'.join( elems ) ) | |
99 | |
100 elif direction == 'Both': | |
101 if strand == '-': | |
102 if region == 'start': | |
103 start = str(int(elems[end_col_1]) - offset) | |
104 end1 = str(int(start) + size) | |
105 end2 = str(int(start) - size) | |
106 elems[start_col_1]=start | |
107 elems[end_col_1]=end1 | |
108 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
109 fo.write( "%s\n" % '\t'.join( elems ) ) | |
110 elems[start_col_1]=end2 | |
111 elems[end_col_1]=start | |
112 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
113 fo.write( "%s\n" % '\t'.join( elems ) ) | |
114 elif region == 'end': | |
115 start = str(int(elems[start_col_1]) - offset) | |
116 end1 = str(int(start) + size) | |
117 end2 = str(int(start) - size) | |
118 elems[start_col_1]=start | |
119 elems[end_col_1]=end1 | |
120 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
121 fo.write( "%s\n" % '\t'.join( elems ) ) | |
122 elems[start_col_1]=end2 | |
123 elems[end_col_1]=start | |
124 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
125 fo.write( "%s\n" % '\t'.join( elems ) ) | |
126 else: | |
127 start1 = str(int(elems[end_col_1]) - offset) | |
128 end1 = str(int(start1) + size) | |
129 start2 = str(int(elems[start_col_1]) - offset) | |
130 end2 = str(int(start2) - size) | |
131 elems[start_col_1]=start1 | |
132 elems[end_col_1]=end1 | |
133 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
134 fo.write( "%s\n" % '\t'.join( elems ) ) | |
135 elems[start_col_1]=end2 | |
136 elems[end_col_1]=start2 | |
137 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
138 fo.write( "%s\n" % '\t'.join( elems ) ) | |
139 elif strand == '+': | |
140 if region == 'start': | |
141 start = str(int(elems[start_col_1]) + offset) | |
142 end1 = str(int(start) - size) | |
143 end2 = str(int(start) + size) | |
144 elems[start_col_1]=end1 | |
145 elems[end_col_1]=start | |
146 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
147 fo.write( "%s\n" % '\t'.join( elems ) ) | |
148 elems[start_col_1]=start | |
149 elems[end_col_1]=end2 | |
150 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
151 fo.write( "%s\n" % '\t'.join( elems ) ) | |
152 elif region == 'end': | |
153 start = str(int(elems[end_col_1]) + offset) | |
154 end1 = str(int(start) - size) | |
155 end2 = str(int(start) + size) | |
156 elems[start_col_1]=end1 | |
157 elems[end_col_1]=start | |
158 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
159 fo.write( "%s\n" % '\t'.join( elems ) ) | |
160 elems[start_col_1]=start | |
161 elems[end_col_1]=end2 | |
162 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
163 fo.write( "%s\n" % '\t'.join( elems ) ) | |
164 else: | |
165 start1 = str(int(elems[start_col_1]) + offset) | |
166 end1 = str(int(start1) - size) | |
167 start2 = str(int(elems[end_col_1]) + offset) | |
168 end2 = str(int(start2) + size) | |
169 elems[start_col_1]=end1 | |
170 elems[end_col_1]=start1 | |
171 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
172 fo.write( "%s\n" % '\t'.join( elems ) ) | |
173 elems[start_col_1]=start2 | |
174 elems[end_col_1]=end2 | |
175 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
176 fo.write( "%s\n" % '\t'.join( elems ) ) | |
177 except: | |
178 skipped_lines += 1 | |
179 if not invalid_line: | |
180 first_invalid_line = i + 1 | |
181 invalid_line = line | |
182 fo.close() | |
183 | |
184 if skipped_lines == j: | |
185 stop_err( "Data issue: click the pencil icon in the history item to correct the metadata attributes." ) | |
186 if skipped_lines > 0: | |
187 print 'Skipped %d invalid lines starting with #%dL "%s"' % ( skipped_lines, first_invalid_line, invalid_line ) | |
188 print 'Location: %s, Region: %s, Flank-length: %d, Offset: %d ' %( direction, region, size, offset ) | |
189 | |
190 if __name__ == "__main__": | |
191 main() |