Mercurial > repos > iuc > ngsutils_bam_filter
comparison filter.py @ 2:7a68005de299 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ngsutils commit 9a243c616a4a3156347e38fdb5f35863ae5133f9
author | iuc |
---|---|
date | Sun, 27 Nov 2016 15:01:21 -0500 |
parents | 4e4e4093d65d |
children | 9b9ae5963d3c |
comparison
equal
deleted
inserted
replaced
1:8187a729d9f4 | 2:7a68005de299 |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 ## category General | 2 # category General |
3 ## desc Removes reads from a BAM file based on criteria | 3 # desc Removes reads from a BAM file based on criteria |
4 """ | 4 """ |
5 Removes reads from a BAM file based on criteria | 5 Removes reads from a BAM file based on criteria |
6 | 6 |
7 Given a BAM file, this script will only allow reads that meet filtering | 7 Given a BAM file, this script will only allow reads that meet filtering |
8 criteria to be written to output. The output is another BAM file with the | 8 criteria to be written to output. The output is another BAM file with the |
15 Currently, the available filters are: | 15 Currently, the available filters are: |
16 -minlen val Remove reads that are smaller than {val} | 16 -minlen val Remove reads that are smaller than {val} |
17 -maxlen val Remove reads that are larger than {val} | 17 -maxlen val Remove reads that are larger than {val} |
18 -mapped Keep only mapped reads | 18 -mapped Keep only mapped reads |
19 -unmapped Keep only unmapped reads | 19 -unmapped Keep only unmapped reads |
20 -properpair Keep only properly paired reads (both mapped, | 20 -properpair Keep only properly paired reads (both mapped, |
21 correct orientation, flag set in BAM) | 21 correct orientation, flag set in BAM) |
22 -noproperpair Keep only not-properly paired reads | 22 -noproperpair Keep only not-properly paired reads |
23 | 23 |
24 -mask bitmask Remove reads that match the mask (base 10/hex) | 24 -mask bitmask Remove reads that match the mask (base 10/hex) |
25 -uniq {length} Remove reads that are have the same sequence | 25 -uniq {length} Remove reads that are have the same sequence |
108 | 108 |
109 """ | 109 """ |
110 | 110 |
111 import os | 111 import os |
112 import sys | 112 import sys |
113 | |
113 import pysam | 114 import pysam |
114 from ngsutils.bam import bam_iter | 115 from ngsutils.bam import bam_iter, read_calc_mismatches, read_calc_mismatches_gen, read_calc_mismatches_ref, read_calc_variations |
116 from ngsutils.bed import BedFile | |
115 from ngsutils.support.dbsnp import DBSNP | 117 from ngsutils.support.dbsnp import DBSNP |
116 from ngsutils.bam import read_calc_mismatches, read_calc_mismatches_ref, read_calc_mismatches_gen, read_calc_variations | |
117 from ngsutils.bed import BedFile | |
118 | 118 |
119 | 119 |
120 def usage(): | 120 def usage(): |
121 print __doc__ | 121 print __doc__ |
122 print """ | 122 print """ |
205 del_list.append(k) | 205 del_list.append(k) |
206 | 206 |
207 for k in del_list: | 207 for k in del_list: |
208 self.rev_pos.remove(k) | 208 self.rev_pos.remove(k) |
209 | 209 |
210 if not start_pos in self.rev_pos: | 210 if start_pos not in self.rev_pos: |
211 self.rev_pos.add(start_pos) | 211 self.rev_pos.add(start_pos) |
212 return True | 212 return True |
213 return False | 213 return False |
214 else: | 214 else: |
215 if read.pos != self.last_fwd_pos: | 215 if read.pos != self.last_fwd_pos: |
341 def __repr__(self): | 341 def __repr__(self): |
342 return 'Excluding: %s' % (self.ref) | 342 return 'Excluding: %s' % (self.ref) |
343 | 343 |
344 def close(self): | 344 def close(self): |
345 pass | 345 pass |
346 | |
346 | 347 |
347 class IncludeRef(object): | 348 class IncludeRef(object): |
348 def __init__(self, ref): | 349 def __init__(self, ref): |
349 self.ref = ref | 350 self.ref = ref |
350 | 351 |
643 pass | 644 pass |
644 | 645 |
645 | 646 |
646 class MaskFlag(object): | 647 class MaskFlag(object): |
647 def __init__(self, value): | 648 def __init__(self, value): |
648 if type(value) == type(1): | 649 if isinstance(value, int): |
649 self.flag = value | 650 self.flag = value |
650 else: | 651 else: |
651 if value[0:2] == '0x': | 652 if value[0:2] == '0x': |
652 self.flag = int(value, 16) | 653 self.flag = int(value, 16) |
653 else: | 654 else: |
708 | 709 |
709 def __repr__(self): | 710 def __repr__(self): |
710 return "maximum mismatch ratio: %s" % self.val | 711 return "maximum mismatch ratio: %s" % self.val |
711 | 712 |
712 def filter(self, bam, read): | 713 def filter(self, bam, read): |
713 return read_calc_mismatches(read) <= self.ratio*len(read.seq) | 714 return read_calc_mismatches(read) <= self.ratio * len(read.seq) |
714 | 715 |
715 def close(self): | 716 def close(self): |
716 pass | 717 pass |
717 | 718 |
718 | 719 |
823 | 824 |
824 def filter(self, bam, read): | 825 def filter(self, bam, read): |
825 if self.get_value(read) == self.value: | 826 if self.get_value(read) == self.value: |
826 return True | 827 return True |
827 return False | 828 return False |
829 | |
828 | 830 |
829 _criteria = { | 831 _criteria = { |
830 'mapped': Mapped, | 832 'mapped': Mapped, |
831 'unmapped': Unmapped, | 833 'unmapped': Unmapped, |
832 'properpair': ProperPair, | 834 'properpair': ProperPair, |
893 if not criterion.filter(bamfile, read): | 895 if not criterion.filter(bamfile, read): |
894 p = False | 896 p = False |
895 failed += 1 | 897 failed += 1 |
896 if failed_out: | 898 if failed_out: |
897 failed_out.write('%s\t%s\n' % (read.qname, criterion)) | 899 failed_out.write('%s\t%s\n' % (read.qname, criterion)) |
898 #outfile.write(read_to_unmapped(read)) | 900 # outfile.write(read_to_unmapped(read)) |
899 break | 901 break |
900 if p: | 902 if p: |
901 passed += 1 | 903 passed += 1 |
902 outfile.write(read) | 904 outfile.write(read) |
903 | 905 |
927 read.is_unmapped = True | 929 read.is_unmapped = True |
928 read.tid = None | 930 read.tid = None |
929 read.pos = 0 | 931 read.pos = 0 |
930 read.mapq = 0 | 932 read.mapq = 0 |
931 return read | 933 return read |
934 | |
932 | 935 |
933 if __name__ == '__main__': | 936 if __name__ == '__main__': |
934 infile = None | 937 infile = None |
935 outfile = None | 938 outfile = None |
936 failed = None | 939 failed = None |