annotate sam_bitwise_flag_filter.py @ 0:0b2424a404d9 draft default tip

Uploaded tool tarball.
author devteam
date Mon, 26 Aug 2013 15:11:25 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
1 #!/usr/bin/env python
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
2 # Refactored on 11/13/2010 by Kanwei Li
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
3
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
4 import sys
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
5 import optparse
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
6
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
7 def stop_err( msg ):
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
8 sys.stderr.write( msg )
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
9 sys.exit()
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
10
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
11 def main():
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
12 usage = """%prog [options]
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
13
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
14 options (listed below) default to 'None' if omitted
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
15 """
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
16 parser = optparse.OptionParser(usage=usage)
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
17
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
18 parser.add_option(
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
19 '--0x0001','--is_paired',
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
20 choices = ( '0','1' ),
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
21 dest='is_paired',
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
22 metavar="<0|1>",
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
23 help='The read is paired in sequencing')
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
24
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
25 parser.add_option(
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
26 '--0x0002','--is_proper_pair',
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
27 choices = ( '0','1' ),
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
28 metavar="<0|1>",
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
29 dest='is_proper_pair',
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
30 help='The read is mapped in a proper pair')
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
31
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
32 parser.add_option(
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
33 '--0x0004','--is_unmapped',
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
34 choices = ( '0','1' ),
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
35 metavar="<0|1>",
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
36 dest='is_unmapped',
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
37 help='The query sequence itself is unmapped')
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
38
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
39 parser.add_option(
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
40 '--0x0008','--mate_is_unmapped',
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
41 choices = ( '0','1' ),
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
42 metavar="<0|1>",
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
43 dest='mate_is_unmapped',
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
44 help='The mate is unmapped')
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
45
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
46 parser.add_option(
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
47 '--0x0010','--query_strand',
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
48 dest='query_strand',
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
49 metavar="<0|1>",
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
50 choices = ( '0','1' ),
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
51 help='Strand of the query: 0 = forward, 1 = reverse.')
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
52
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
53 parser.add_option(
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
54 '--0x0020','--mate_strand',
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
55 dest='mate_strand',
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
56 metavar="<0|1>",
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
57 choices = ('0','1'),
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
58 help='Strand of the mate: 0 = forward, 1 = reverse.')
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
59
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
60 parser.add_option(
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
61 '--0x0040','--is_first',
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
62 choices = ( '0','1' ),
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
63 metavar="<0|1>",
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
64 dest='is_first',
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
65 help='The read is the first read in a pair')
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
66
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
67 parser.add_option(
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
68 '--0x0080','--is_second',
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
69 choices = ( '0','1' ),
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
70 metavar="<0|1>",
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
71 dest='is_second',
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
72 help='The read is the second read in a pair')
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
73
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
74 parser.add_option(
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
75 '--0x0100','--is_not_primary',
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
76 choices = ( '0','1' ),
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
77 metavar="<0|1>",
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
78 dest='is_not_primary',
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
79 help='The alignment for the given read is not primary')
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
80
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
81 parser.add_option(
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
82 '--0x0200','--is_bad_quality',
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
83 choices = ( '0','1' ),
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
84 metavar="<0|1>",
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
85 dest='is_bad_quality',
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
86 help='The read fails platform/vendor quality checks')
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
87
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
88 parser.add_option(
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
89 '--0x0400','--is_duplicate',
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
90 choices = ( '0','1' ),
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
91 metavar="<0|1>",
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
92 dest='is_duplicate',
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
93 help='The read is either a PCR or an optical duplicate')
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
94
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
95 parser.add_option(
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
96 '-f','--input_sam_file',
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
97 metavar="INPUT_SAM_FILE",
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
98 dest='input_sam',
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
99 default = False,
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
100 help='Name of the SAM file to be filtered. STDIN is default')
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
101
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
102 parser.add_option(
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
103 '-c','--flag_column',
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
104 dest='flag_col',
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
105 default = '2',
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
106 help='Column containing SAM bitwise flag. 1-based')
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
107
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
108 options, args = parser.parse_args()
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
109
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
110 if options.input_sam:
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
111 infile = open ( options.input_sam, 'r')
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
112 else:
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
113 infile = sys.stdin
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
114
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
115 opt_ary = [
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
116 options.is_paired,
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
117 options.is_proper_pair,
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
118 options.is_unmapped,
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
119 options.mate_is_unmapped,
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
120 options.query_strand,
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
121 options.mate_strand,
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
122 options.is_first,
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
123 options.is_second,
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
124 options.is_not_primary,
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
125 options.is_bad_quality,
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
126 options.is_duplicate
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
127 ]
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
128
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
129 opt_map = { '0': False, '1': True }
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
130 used_indices = [(index, opt_map[opt]) for index, opt in enumerate(opt_ary) if opt is not None]
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
131 flag_col = int( options.flag_col ) - 1
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
132
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
133 for line in infile:
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
134 line = line.rstrip( '\r\n' )
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
135 if line and not line.startswith( '#' ) and not line.startswith( '@' ) :
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
136 fields = line.split( '\t' )
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
137 flags = int( fields[flag_col] )
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
138
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
139 valid_line = True
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
140 for index, opt_bool in used_indices:
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
141 if bool(flags & 0x0001 << index) != opt_bool:
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
142 valid_line = False
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
143 break
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
144
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
145 if valid_line:
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
146 print line
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
147
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
148 if __name__ == "__main__": main()
0b2424a404d9 Uploaded tool tarball.
devteam
parents:
diff changeset
149