0
|
1 #!/usr/bin/env python
|
|
2 # Refactored on 11/13/2010 by Kanwei Li
|
|
3
|
|
4 import sys
|
|
5 import optparse
|
|
6
|
|
7 def stop_err( msg ):
|
|
8 sys.stderr.write( msg )
|
|
9 sys.exit()
|
|
10
|
|
11 def main():
|
|
12 usage = """%prog [options]
|
|
13
|
|
14 options (listed below) default to 'None' if omitted
|
|
15 """
|
|
16 parser = optparse.OptionParser(usage=usage)
|
|
17
|
|
18 parser.add_option(
|
|
19 '--0x0001','--is_paired',
|
|
20 choices = ( '0','1' ),
|
|
21 dest='is_paired',
|
|
22 metavar="<0|1>",
|
|
23 help='The read is paired in sequencing')
|
|
24
|
|
25 parser.add_option(
|
|
26 '--0x0002','--is_proper_pair',
|
|
27 choices = ( '0','1' ),
|
|
28 metavar="<0|1>",
|
|
29 dest='is_proper_pair',
|
|
30 help='The read is mapped in a proper pair')
|
|
31
|
|
32 parser.add_option(
|
|
33 '--0x0004','--is_unmapped',
|
|
34 choices = ( '0','1' ),
|
|
35 metavar="<0|1>",
|
|
36 dest='is_unmapped',
|
|
37 help='The query sequence itself is unmapped')
|
|
38
|
|
39 parser.add_option(
|
|
40 '--0x0008','--mate_is_unmapped',
|
|
41 choices = ( '0','1' ),
|
|
42 metavar="<0|1>",
|
|
43 dest='mate_is_unmapped',
|
|
44 help='The mate is unmapped')
|
|
45
|
|
46 parser.add_option(
|
|
47 '--0x0010','--query_strand',
|
|
48 dest='query_strand',
|
|
49 metavar="<0|1>",
|
|
50 choices = ( '0','1' ),
|
|
51 help='Strand of the query: 0 = forward, 1 = reverse.')
|
|
52
|
|
53 parser.add_option(
|
|
54 '--0x0020','--mate_strand',
|
|
55 dest='mate_strand',
|
|
56 metavar="<0|1>",
|
|
57 choices = ('0','1'),
|
|
58 help='Strand of the mate: 0 = forward, 1 = reverse.')
|
|
59
|
|
60 parser.add_option(
|
|
61 '--0x0040','--is_first',
|
|
62 choices = ( '0','1' ),
|
|
63 metavar="<0|1>",
|
|
64 dest='is_first',
|
|
65 help='The read is the first read in a pair')
|
|
66
|
|
67 parser.add_option(
|
|
68 '--0x0080','--is_second',
|
|
69 choices = ( '0','1' ),
|
|
70 metavar="<0|1>",
|
|
71 dest='is_second',
|
|
72 help='The read is the second read in a pair')
|
|
73
|
|
74 parser.add_option(
|
|
75 '--0x0100','--is_not_primary',
|
|
76 choices = ( '0','1' ),
|
|
77 metavar="<0|1>",
|
|
78 dest='is_not_primary',
|
|
79 help='The alignment for the given read is not primary')
|
|
80
|
|
81 parser.add_option(
|
|
82 '--0x0200','--is_bad_quality',
|
|
83 choices = ( '0','1' ),
|
|
84 metavar="<0|1>",
|
|
85 dest='is_bad_quality',
|
|
86 help='The read fails platform/vendor quality checks')
|
|
87
|
|
88 parser.add_option(
|
|
89 '--0x0400','--is_duplicate',
|
|
90 choices = ( '0','1' ),
|
|
91 metavar="<0|1>",
|
|
92 dest='is_duplicate',
|
|
93 help='The read is either a PCR or an optical duplicate')
|
|
94
|
|
95 parser.add_option(
|
|
96 '-f','--input_sam_file',
|
|
97 metavar="INPUT_SAM_FILE",
|
|
98 dest='input_sam',
|
|
99 default = False,
|
|
100 help='Name of the SAM file to be filtered. STDIN is default')
|
|
101
|
|
102 parser.add_option(
|
|
103 '-c','--flag_column',
|
|
104 dest='flag_col',
|
|
105 default = '2',
|
|
106 help='Column containing SAM bitwise flag. 1-based')
|
|
107
|
|
108 options, args = parser.parse_args()
|
|
109
|
|
110 if options.input_sam:
|
|
111 infile = open ( options.input_sam, 'r')
|
|
112 else:
|
|
113 infile = sys.stdin
|
|
114
|
|
115 opt_ary = [
|
|
116 options.is_paired,
|
|
117 options.is_proper_pair,
|
|
118 options.is_unmapped,
|
|
119 options.mate_is_unmapped,
|
|
120 options.query_strand,
|
|
121 options.mate_strand,
|
|
122 options.is_first,
|
|
123 options.is_second,
|
|
124 options.is_not_primary,
|
|
125 options.is_bad_quality,
|
|
126 options.is_duplicate
|
|
127 ]
|
|
128
|
|
129 opt_map = { '0': False, '1': True }
|
|
130 used_indices = [(index, opt_map[opt]) for index, opt in enumerate(opt_ary) if opt is not None]
|
|
131 flag_col = int( options.flag_col ) - 1
|
|
132
|
|
133 for line in infile:
|
|
134 line = line.rstrip( '\r\n' )
|
|
135 if line and not line.startswith( '#' ) and not line.startswith( '@' ) :
|
|
136 fields = line.split( '\t' )
|
|
137 flags = int( fields[flag_col] )
|
|
138
|
|
139 valid_line = True
|
|
140 for index, opt_bool in used_indices:
|
|
141 if bool(flags & 0x0001 << index) != opt_bool:
|
|
142 valid_line = False
|
|
143 break
|
|
144
|
|
145 if valid_line:
|
|
146 print line
|
|
147
|
|
148 if __name__ == "__main__": main()
|
|
149
|