Mercurial > repos > devteam > sam_bitwise_flag_filter
diff sam_bitwise_flag_filter.py @ 0:0b2424a404d9 draft default tip
Uploaded tool tarball.
author | devteam |
---|---|
date | Mon, 26 Aug 2013 15:11:25 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sam_bitwise_flag_filter.py Mon Aug 26 15:11:25 2013 -0400 @@ -0,0 +1,149 @@ +#!/usr/bin/env python +# Refactored on 11/13/2010 by Kanwei Li + +import sys +import optparse + +def stop_err( msg ): + sys.stderr.write( msg ) + sys.exit() + +def main(): + usage = """%prog [options] + +options (listed below) default to 'None' if omitted + """ + parser = optparse.OptionParser(usage=usage) + + parser.add_option( + '--0x0001','--is_paired', + choices = ( '0','1' ), + dest='is_paired', + metavar="<0|1>", + help='The read is paired in sequencing') + + parser.add_option( + '--0x0002','--is_proper_pair', + choices = ( '0','1' ), + metavar="<0|1>", + dest='is_proper_pair', + help='The read is mapped in a proper pair') + + parser.add_option( + '--0x0004','--is_unmapped', + choices = ( '0','1' ), + metavar="<0|1>", + dest='is_unmapped', + help='The query sequence itself is unmapped') + + parser.add_option( + '--0x0008','--mate_is_unmapped', + choices = ( '0','1' ), + metavar="<0|1>", + dest='mate_is_unmapped', + help='The mate is unmapped') + + parser.add_option( + '--0x0010','--query_strand', + dest='query_strand', + metavar="<0|1>", + choices = ( '0','1' ), + help='Strand of the query: 0 = forward, 1 = reverse.') + + parser.add_option( + '--0x0020','--mate_strand', + dest='mate_strand', + metavar="<0|1>", + choices = ('0','1'), + help='Strand of the mate: 0 = forward, 1 = reverse.') + + parser.add_option( + '--0x0040','--is_first', + choices = ( '0','1' ), + metavar="<0|1>", + dest='is_first', + help='The read is the first read in a pair') + + parser.add_option( + '--0x0080','--is_second', + choices = ( '0','1' ), + metavar="<0|1>", + dest='is_second', + help='The read is the second read in a pair') + + parser.add_option( + '--0x0100','--is_not_primary', + choices = ( '0','1' ), + metavar="<0|1>", + dest='is_not_primary', + help='The alignment for the given read is not primary') + + parser.add_option( + '--0x0200','--is_bad_quality', + choices = ( '0','1' ), + metavar="<0|1>", + dest='is_bad_quality', + help='The read fails platform/vendor quality checks') + + parser.add_option( + '--0x0400','--is_duplicate', + choices = ( '0','1' ), + metavar="<0|1>", + dest='is_duplicate', + help='The read is either a PCR or an optical duplicate') + + parser.add_option( + '-f','--input_sam_file', + metavar="INPUT_SAM_FILE", + dest='input_sam', + default = False, + help='Name of the SAM file to be filtered. STDIN is default') + + parser.add_option( + '-c','--flag_column', + dest='flag_col', + default = '2', + help='Column containing SAM bitwise flag. 1-based') + + options, args = parser.parse_args() + + if options.input_sam: + infile = open ( options.input_sam, 'r') + else: + infile = sys.stdin + + opt_ary = [ + options.is_paired, + options.is_proper_pair, + options.is_unmapped, + options.mate_is_unmapped, + options.query_strand, + options.mate_strand, + options.is_first, + options.is_second, + options.is_not_primary, + options.is_bad_quality, + options.is_duplicate + ] + + opt_map = { '0': False, '1': True } + used_indices = [(index, opt_map[opt]) for index, opt in enumerate(opt_ary) if opt is not None] + flag_col = int( options.flag_col ) - 1 + + for line in infile: + line = line.rstrip( '\r\n' ) + if line and not line.startswith( '#' ) and not line.startswith( '@' ) : + fields = line.split( '\t' ) + flags = int( fields[flag_col] ) + + valid_line = True + for index, opt_bool in used_indices: + if bool(flags & 0x0001 << index) != opt_bool: + valid_line = False + break + + if valid_line: + print line + +if __name__ == "__main__": main() +