comparison sam_bitwise_flag_filter.py @ 0:0b2424a404d9 draft default tip

Uploaded tool tarball.
author devteam
date Mon, 26 Aug 2013 15:11:25 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:0b2424a404d9
1 #!/usr/bin/env python
2 # Refactored on 11/13/2010 by Kanwei Li
3
4 import sys
5 import optparse
6
7 def stop_err( msg ):
8 sys.stderr.write( msg )
9 sys.exit()
10
11 def main():
12 usage = """%prog [options]
13
14 options (listed below) default to 'None' if omitted
15 """
16 parser = optparse.OptionParser(usage=usage)
17
18 parser.add_option(
19 '--0x0001','--is_paired',
20 choices = ( '0','1' ),
21 dest='is_paired',
22 metavar="<0|1>",
23 help='The read is paired in sequencing')
24
25 parser.add_option(
26 '--0x0002','--is_proper_pair',
27 choices = ( '0','1' ),
28 metavar="<0|1>",
29 dest='is_proper_pair',
30 help='The read is mapped in a proper pair')
31
32 parser.add_option(
33 '--0x0004','--is_unmapped',
34 choices = ( '0','1' ),
35 metavar="<0|1>",
36 dest='is_unmapped',
37 help='The query sequence itself is unmapped')
38
39 parser.add_option(
40 '--0x0008','--mate_is_unmapped',
41 choices = ( '0','1' ),
42 metavar="<0|1>",
43 dest='mate_is_unmapped',
44 help='The mate is unmapped')
45
46 parser.add_option(
47 '--0x0010','--query_strand',
48 dest='query_strand',
49 metavar="<0|1>",
50 choices = ( '0','1' ),
51 help='Strand of the query: 0 = forward, 1 = reverse.')
52
53 parser.add_option(
54 '--0x0020','--mate_strand',
55 dest='mate_strand',
56 metavar="<0|1>",
57 choices = ('0','1'),
58 help='Strand of the mate: 0 = forward, 1 = reverse.')
59
60 parser.add_option(
61 '--0x0040','--is_first',
62 choices = ( '0','1' ),
63 metavar="<0|1>",
64 dest='is_first',
65 help='The read is the first read in a pair')
66
67 parser.add_option(
68 '--0x0080','--is_second',
69 choices = ( '0','1' ),
70 metavar="<0|1>",
71 dest='is_second',
72 help='The read is the second read in a pair')
73
74 parser.add_option(
75 '--0x0100','--is_not_primary',
76 choices = ( '0','1' ),
77 metavar="<0|1>",
78 dest='is_not_primary',
79 help='The alignment for the given read is not primary')
80
81 parser.add_option(
82 '--0x0200','--is_bad_quality',
83 choices = ( '0','1' ),
84 metavar="<0|1>",
85 dest='is_bad_quality',
86 help='The read fails platform/vendor quality checks')
87
88 parser.add_option(
89 '--0x0400','--is_duplicate',
90 choices = ( '0','1' ),
91 metavar="<0|1>",
92 dest='is_duplicate',
93 help='The read is either a PCR or an optical duplicate')
94
95 parser.add_option(
96 '-f','--input_sam_file',
97 metavar="INPUT_SAM_FILE",
98 dest='input_sam',
99 default = False,
100 help='Name of the SAM file to be filtered. STDIN is default')
101
102 parser.add_option(
103 '-c','--flag_column',
104 dest='flag_col',
105 default = '2',
106 help='Column containing SAM bitwise flag. 1-based')
107
108 options, args = parser.parse_args()
109
110 if options.input_sam:
111 infile = open ( options.input_sam, 'r')
112 else:
113 infile = sys.stdin
114
115 opt_ary = [
116 options.is_paired,
117 options.is_proper_pair,
118 options.is_unmapped,
119 options.mate_is_unmapped,
120 options.query_strand,
121 options.mate_strand,
122 options.is_first,
123 options.is_second,
124 options.is_not_primary,
125 options.is_bad_quality,
126 options.is_duplicate
127 ]
128
129 opt_map = { '0': False, '1': True }
130 used_indices = [(index, opt_map[opt]) for index, opt in enumerate(opt_ary) if opt is not None]
131 flag_col = int( options.flag_col ) - 1
132
133 for line in infile:
134 line = line.rstrip( '\r\n' )
135 if line and not line.startswith( '#' ) and not line.startswith( '@' ) :
136 fields = line.split( '\t' )
137 flags = int( fields[flag_col] )
138
139 valid_line = True
140 for index, opt_bool in used_indices:
141 if bool(flags & 0x0001 << index) != opt_bool:
142 valid_line = False
143 break
144
145 if valid_line:
146 print line
147
148 if __name__ == "__main__": main()
149