annotate tools/fasta_tools/fasta_filter_by_length.py @ 1:cdcb0ce84a1b

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:15 -0500
parents 9071e359b9a3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 #!/usr/bin/env python
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 Input: fasta, minimal length, maximal length
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 Output: fasta
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 Return sequences whose lengths are within the range.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 import sys, os
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 assert sys.version_info[:2] >= ( 2, 4 )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 def stop_err( msg ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 sys.stderr.write( msg )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 sys.exit()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 def __main__():
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 input_filename = sys.argv[1]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 min_length = int( sys.argv[2] )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 stop_err( "Minimal length of the return sequence requires a numerical value." )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 max_length = int( sys.argv[3] )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 stop_err( "Maximum length of the return sequence requires a numerical value." )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 output_filename = sys.argv[4]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 output_handle = open( output_filename, 'w' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 tmp_size = 0 #-1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 tmp_buf = ''
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 at_least_one = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 for line in file(input_filename):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 if not line or line.startswith('#'):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 continue
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 if line[0] == '>':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 if min_length <= tmp_size <= max_length or (min_length <= tmp_size and max_length == 0):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 output_handle.write(tmp_buf)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 at_least_one = 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 tmp_buf = line
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 tmp_size = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 if max_length == 0 or tmp_size < max_length:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 tmp_size += len(line.rstrip('\r\n'))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 tmp_buf += line
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 # final flush of buffer
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 if min_length <= tmp_size <= max_length or (min_length <= tmp_size and max_length == 0):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 output_handle.write(tmp_buf.rstrip('\r\n'))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 at_least_one = 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 output_handle.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 if at_least_one == 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 print "There is no sequence that falls within your range."
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 if __name__ == "__main__" : __main__()