Mercurial > repos > devteam > sam_bitwise_flag_filter
comparison sam_bitwise_flag_filter.py @ 0:0b2424a404d9 draft default tip
Uploaded tool tarball.
author | devteam |
---|---|
date | Mon, 26 Aug 2013 15:11:25 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:0b2424a404d9 |
---|---|
1 #!/usr/bin/env python | |
2 # Refactored on 11/13/2010 by Kanwei Li | |
3 | |
4 import sys | |
5 import optparse | |
6 | |
7 def stop_err( msg ): | |
8 sys.stderr.write( msg ) | |
9 sys.exit() | |
10 | |
11 def main(): | |
12 usage = """%prog [options] | |
13 | |
14 options (listed below) default to 'None' if omitted | |
15 """ | |
16 parser = optparse.OptionParser(usage=usage) | |
17 | |
18 parser.add_option( | |
19 '--0x0001','--is_paired', | |
20 choices = ( '0','1' ), | |
21 dest='is_paired', | |
22 metavar="<0|1>", | |
23 help='The read is paired in sequencing') | |
24 | |
25 parser.add_option( | |
26 '--0x0002','--is_proper_pair', | |
27 choices = ( '0','1' ), | |
28 metavar="<0|1>", | |
29 dest='is_proper_pair', | |
30 help='The read is mapped in a proper pair') | |
31 | |
32 parser.add_option( | |
33 '--0x0004','--is_unmapped', | |
34 choices = ( '0','1' ), | |
35 metavar="<0|1>", | |
36 dest='is_unmapped', | |
37 help='The query sequence itself is unmapped') | |
38 | |
39 parser.add_option( | |
40 '--0x0008','--mate_is_unmapped', | |
41 choices = ( '0','1' ), | |
42 metavar="<0|1>", | |
43 dest='mate_is_unmapped', | |
44 help='The mate is unmapped') | |
45 | |
46 parser.add_option( | |
47 '--0x0010','--query_strand', | |
48 dest='query_strand', | |
49 metavar="<0|1>", | |
50 choices = ( '0','1' ), | |
51 help='Strand of the query: 0 = forward, 1 = reverse.') | |
52 | |
53 parser.add_option( | |
54 '--0x0020','--mate_strand', | |
55 dest='mate_strand', | |
56 metavar="<0|1>", | |
57 choices = ('0','1'), | |
58 help='Strand of the mate: 0 = forward, 1 = reverse.') | |
59 | |
60 parser.add_option( | |
61 '--0x0040','--is_first', | |
62 choices = ( '0','1' ), | |
63 metavar="<0|1>", | |
64 dest='is_first', | |
65 help='The read is the first read in a pair') | |
66 | |
67 parser.add_option( | |
68 '--0x0080','--is_second', | |
69 choices = ( '0','1' ), | |
70 metavar="<0|1>", | |
71 dest='is_second', | |
72 help='The read is the second read in a pair') | |
73 | |
74 parser.add_option( | |
75 '--0x0100','--is_not_primary', | |
76 choices = ( '0','1' ), | |
77 metavar="<0|1>", | |
78 dest='is_not_primary', | |
79 help='The alignment for the given read is not primary') | |
80 | |
81 parser.add_option( | |
82 '--0x0200','--is_bad_quality', | |
83 choices = ( '0','1' ), | |
84 metavar="<0|1>", | |
85 dest='is_bad_quality', | |
86 help='The read fails platform/vendor quality checks') | |
87 | |
88 parser.add_option( | |
89 '--0x0400','--is_duplicate', | |
90 choices = ( '0','1' ), | |
91 metavar="<0|1>", | |
92 dest='is_duplicate', | |
93 help='The read is either a PCR or an optical duplicate') | |
94 | |
95 parser.add_option( | |
96 '-f','--input_sam_file', | |
97 metavar="INPUT_SAM_FILE", | |
98 dest='input_sam', | |
99 default = False, | |
100 help='Name of the SAM file to be filtered. STDIN is default') | |
101 | |
102 parser.add_option( | |
103 '-c','--flag_column', | |
104 dest='flag_col', | |
105 default = '2', | |
106 help='Column containing SAM bitwise flag. 1-based') | |
107 | |
108 options, args = parser.parse_args() | |
109 | |
110 if options.input_sam: | |
111 infile = open ( options.input_sam, 'r') | |
112 else: | |
113 infile = sys.stdin | |
114 | |
115 opt_ary = [ | |
116 options.is_paired, | |
117 options.is_proper_pair, | |
118 options.is_unmapped, | |
119 options.mate_is_unmapped, | |
120 options.query_strand, | |
121 options.mate_strand, | |
122 options.is_first, | |
123 options.is_second, | |
124 options.is_not_primary, | |
125 options.is_bad_quality, | |
126 options.is_duplicate | |
127 ] | |
128 | |
129 opt_map = { '0': False, '1': True } | |
130 used_indices = [(index, opt_map[opt]) for index, opt in enumerate(opt_ary) if opt is not None] | |
131 flag_col = int( options.flag_col ) - 1 | |
132 | |
133 for line in infile: | |
134 line = line.rstrip( '\r\n' ) | |
135 if line and not line.startswith( '#' ) and not line.startswith( '@' ) : | |
136 fields = line.split( '\t' ) | |
137 flags = int( fields[flag_col] ) | |
138 | |
139 valid_line = True | |
140 for index, opt_bool in used_indices: | |
141 if bool(flags & 0x0001 << index) != opt_bool: | |
142 valid_line = False | |
143 break | |
144 | |
145 if valid_line: | |
146 print line | |
147 | |
148 if __name__ == "__main__": main() | |
149 |