Mercurial > repos > iuc > ngsutils_bam_filter

--- a/filter.py	Sun Dec 06 05:03:12 2015 -0500
+++ b/filter.py	Sun Nov 27 15:01:21 2016 -0500
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
-## category General
-## desc Removes reads from a BAM file based on criteria
+# category General
+# desc Removes reads from a BAM file based on criteria
 """
 Removes reads from a BAM file based on criteria

@@ -17,7 +17,7 @@
     -maxlen val                Remove reads that are larger than {val}
     -mapped                    Keep only mapped reads
     -unmapped                  Keep only unmapped reads
-    -properpair                Keep only properly paired reads (both mapped,
+    -properpair                Keep only properly paired reads (both mapped,
                                correct orientation, flag set in BAM)
     -noproperpair              Keep only not-properly paired reads

@@ -110,11 +110,11 @@

 import os
 import sys
+
 import pysam
-from ngsutils.bam import bam_iter
+from ngsutils.bam import bam_iter, read_calc_mismatches, read_calc_mismatches_gen, read_calc_mismatches_ref, read_calc_variations
+from ngsutils.bed import BedFile
 from ngsutils.support.dbsnp import DBSNP
-from ngsutils.bam import read_calc_mismatches, read_calc_mismatches_ref, read_calc_mismatches_gen, read_calc_variations
-from ngsutils.bed import BedFile


 def usage():
@@ -207,7 +207,7 @@
                 for k in del_list:
                     self.rev_pos.remove(k)

-            if not start_pos in self.rev_pos:
+            if start_pos not in self.rev_pos:
                 self.rev_pos.add(start_pos)
                 return True
             return False
@@ -344,6 +344,7 @@
     def close(self):
         pass

+
 class IncludeRef(object):
     def __init__(self, ref):
         self.ref = ref
@@ -645,7 +646,7 @@

 class MaskFlag(object):
     def __init__(self, value):
-        if type(value) == type(1):
+        if isinstance(value, int):
             self.flag = value
         else:
             if value[0:2] == '0x':
@@ -710,7 +711,7 @@
         return "maximum mismatch ratio: %s" % self.val

     def filter(self, bam, read):
-        return read_calc_mismatches(read) <= self.ratio*len(read.seq)
+        return read_calc_mismatches(read) <= self.ratio * len(read.seq)

     def close(self):
         pass
@@ -826,6 +827,7 @@
             return True
         return False

+
 _criteria = {
     'mapped': Mapped,
     'unmapped': Unmapped,
@@ -895,7 +897,7 @@
                 failed += 1
                 if failed_out:
                     failed_out.write('%s\t%s\n' % (read.qname, criterion))
-                #outfile.write(read_to_unmapped(read))
+                # outfile.write(read_to_unmapped(read))
                 break
         if p:
             passed += 1
@@ -930,6 +932,7 @@
     read.mapq = 0
     return read

+
 if __name__ == '__main__':
     infile = None
     outfile = None
--- a/macros.xml	Sun Dec 06 05:03:12 2015 -0500
+++ b/macros.xml	Sun Nov 27 15:01:21 2016 -0500
@@ -1,8 +1,8 @@
 <macros>
-    <token name="@WRAPPER_VERSION@">0.5.7</token>
+    <token name="@WRAPPER_VERSION@">0.5.8</token>
     <xml name="requirements">
         <requirements>
-            <requirement type="package" version="0.7.7">pysam</requirement>
+            <requirement type="package" version="0.9.1.4">pysam</requirement>
         </requirements>
     </xml>
     <xml name="version">
Binary file ngsutils/__init__.pyc has changed
--- a/ngsutils/bam/__init__.py	Sun Dec 06 05:03:12 2015 -0500
+++ b/ngsutils/bam/__init__.py	Sun Nov 27 15:01:21 2016 -0500
@@ -1,12 +1,13 @@
-import sys
 import os
 import re
+import sys
+
+import ngsutils.support
 import pysam
 try:
     from eta import ETA
 except:
     pass
-import ngsutils.support


 def bam_open(fname, mode='r', *args, **kwargs):
@@ -159,7 +160,7 @@
     >>> cigar_tostr(((0, 1), (1, 1), (2, 1), (3, 1), (4, 1), (5, 1), (6, 1)))
     '1M1I1D1N1S1H1P'
     '''
-
+
     s = ''

     for op, size in cigar:
@@ -230,8 +231,9 @@
     md_pos = 0

     while md and md_pos < maxlength:
-        tmp = '0'  # preload a zero so that immediate mismatches will be caught
-                   # the zero will have no affect otherwise...
+        # preload a zero so that immediate mismatches will be caught
+        # the zero will have no affect otherwise...
+        tmp = '0'

         # look for matches
         while md and md[0] in '0123456789':
@@ -625,7 +627,7 @@
                     cur_pos = frag_end
                     frag_idx += 1
                     if len(fragments) <= frag_idx:
-                        print 'ERROR converting: ', name, fragments
+                        print 'ERROR converting: ', name, fragments
                         return (chrom, 0, chr_cigar)
                     frag_start, frag_end = fragments[frag_idx]
                     chr_cigar.append((3, frag_start - cur_pos))
@@ -864,7 +866,7 @@
     if not read.is_unmapped and read.is_reverse:
         newread.seq = ngsutils.support.revcomp(read.seq)
         newread.qual = read.qual[::-1]
-    else:
+    else:
         newread.seq = read.seq
         newread.qual = read.qual

@@ -873,7 +875,6 @@
     return newread


-
 if __name__ == '__main__':
     import doctest
     doctest.testmod()
Binary file ngsutils/bam/__init__.pyc has changed
--- a/ngsutils/bed/__init__.py	Sun Dec 06 05:03:12 2015 -0500
+++ b/ngsutils/bed/__init__.py	Sun Nov 27 15:01:21 2016 -0500
@@ -1,4 +1,5 @@
 import os
+
 import ngsutils.support.ngs_utils
 import pysam

@@ -33,7 +34,6 @@
             raise StopIteration


-
 class BedFile(object):
     '''
     BED files are read in their entirety memory, in a series of bins. Each bin
@@ -135,11 +135,11 @@
                         if strand and strand != region.strand:
                             continue
                         if start <= region.start <= end or start <= region.end <= end:
-                            if not region in buf:
+                            if region not in buf:
                                 yield region
                                 buf.add(region)
                         elif region.start < start and region.end > end:
-                            if not region in buf:
+                            if region not in buf:
                                 yield region
                                 buf.add(region)
Binary file ngsutils/bed/__init__.pyc has changed
--- a/ngsutils/support/__init__.py	Sun Dec 06 05:03:12 2015 -0500
+++ b/ngsutils/support/__init__.py	Sun Nov 27 15:01:21 2016 -0500
@@ -1,13 +1,14 @@
 import collections
 import gzip
 import os
+import re
 import sys
-import re
 try:
     from eta import ETA
 except:
     pass

+
 class FASTARead(collections.namedtuple('FASTARecord', 'name comment seq')):
     def __repr__(self):
         if self.comment:
@@ -142,28 +143,30 @@

 class Symbolize(object):
     'Converts strings to symbols - basically a cache of strings'
+
     def __init__(self):
         self.__cache = {}

     def __getitem__(self, k):
-        if not k in self.__cache:
+        if k not in self.__cache:
             self.__cache[k] = k

         return self.__cache[k]

+
 symbols = Symbolize()

 _compliments = {
-'a': 't',
-'A': 'T',
-'c': 'g',
-'C': 'G',
-'g': 'c',
-'G': 'C',
-'t': 'a',
-'T': 'A',
-'n': 'n',
-'N': 'N'
+    'a': 't',
+    'A': 'T',
+    'c': 'g',
+    'C': 'G',
+    'g': 'c',
+    'G': 'C',
+    't': 'a',
+    'T': 'A',
+    'n': 'n',
+    'N': 'N'
 }


@@ -186,6 +189,7 @@
     Setup simple binning.  Bins are continuous 0->max.  Values are added to
     bins and then means / distributions can be calculated.
     '''
+
     def __init__(self):
         self.bins = []

@@ -214,9 +218,10 @@
         return func

     __cache = {}
+
     def inner(*args, **kwargs):
         k = (args, tuple(kwargs.iteritems()))
-        if  k not in __cache:
+        if k not in __cache:
             __cache[k] = func(*args, **kwargs)
         return __cache[k]
Binary file ngsutils/support/__init__.pyc has changed
--- a/ngsutils/support/bgzip.py	Sun Dec 06 05:03:12 2015 -0500
+++ b/ngsutils/support/bgzip.py	Sun Nov 27 15:01:21 2016 -0500
@@ -6,9 +6,9 @@
 will load the bgzip archive and output the block information.
 '''

-import sys
 import os
 import struct
+import sys


 class BGZip(object):
@@ -41,7 +41,7 @@
         if whence == 0:
             self.seek(0, 0)

-        ### read into chunk, if not enough data in chunk, read next chunk
+        # read into chunk, if not enough data in chunk, read next chunk
         ret = ''
         while amount and self.pos < self.fsize:
             if len(self.cdata) - self.cpos < amount:
@@ -133,5 +133,6 @@
         self.pos += size
         return struct.unpack(field_types, self.fileobj.read(size))

+
 if __name__ == '__main__':
     print BGZip(sys.argv[1]).dump()
--- a/ngsutils/support/dbsnp.py	Sun Dec 06 05:03:12 2015 -0500
+++ b/ngsutils/support/dbsnp.py	Sun Nov 27 15:01:21 2016 -0500
@@ -2,9 +2,10 @@
 Support package for processing a dbSNP tabix dump from UCSC.
 '''

-import pysam
 import collections
 import sys
+
+import pysam
 from ngsutils.support import revcomp


@@ -104,7 +105,7 @@

     def is_valid_variation(self, chrom, op, pos, seq, verbose=False):
         for snp in self.fetch(chrom, pos):
-            if not '/' in snp.observed or snp.clazz not in ['single', 'mixed', 'in-del', 'insertion', 'deletion']:
+            if '/' not in snp.observed or snp.clazz not in ['single', 'mixed', 'in-del', 'insertion', 'deletion']:
                 # these are odd variations that we can't deal with... (microsatellites, tooLongToDisplay members, etc)
                 continue
Binary file ngsutils/support/dbsnp.pyc has changed
--- a/ngsutils/support/llh.py	Sun Dec 06 05:03:12 2015 -0500
+++ b/ngsutils/support/llh.py	Sun Nov 27 15:01:21 2016 -0500
@@ -1,14 +1,16 @@
 '''
 Methods for calculating log-likelihoods for nucleotide frequencies
 '''
+import collections
 import math
-import collections
+
 from ngsutils.support import memoize

 _default_background = {'A': 0.3, 'T': 0.3, 'C': 0.2, 'G': 0.2}

 NucleotideLogLikelihood = collections.namedtuple('NucleotideLogLikelihood', 'A C G T pseudo')

+
 @memoize
 def pseudo_count(N, bg):
     '''
@@ -49,7 +51,6 @@
     return NucleotideLogLikelihood(math.log(freqA / bg['A']), math.log(freqC / bg['C']), math.log(freqG / bg['G']), math.log(freqT / bg['T']), pseudo)


-
 if __name__ == '__main__':
     import doctest
     doctest.testmod()
--- a/ngsutils/support/ngs_utils.py	Sun Dec 06 05:03:12 2015 -0500
+++ b/ngsutils/support/ngs_utils.py	Sun Nov 27 15:01:21 2016 -0500
@@ -1,14 +1,12 @@
 #!/usr/bin/env python
 """
-
 Common util classes / functions for the NGS project
-
 """
+import collections
+import gzip
+import os
+import re
 import sys
-import os
-import gzip
-import re
-import collections


 def format_number(n):
@@ -106,6 +104,7 @@
     A Python 2.6 class to handle 'with' opening of text files that may
     or may not be gzip compressed.
     '''
+
     def __init__(self, fname):
         self.fname = fname

@@ -207,6 +206,7 @@

 class memoize(object):
     'Simple memoizing decorator to cache results'
+
     def __init__(self, func):
         self.func = func
         self.cache = {}
Binary file ngsutils/support/ngs_utils.pyc has changed
--- a/ngsutils/support/regions.py	Sun Dec 06 05:03:12 2015 -0500
+++ b/ngsutils/support/regions.py	Sun Nov 27 15:01:21 2016 -0500
@@ -4,22 +4,23 @@
     particular genomic coordinate maps to any of those ranges.  This is less-
     efficient than an R-Tree, but easier to code.
     '''
+
     def __init__(self, name):
         self.ranges = {}
         self.name = name

     def add_range(self, chrom, strand, start, end):
-        if not chrom in self.ranges:
+        if chrom not in self.ranges:
             self.ranges[chrom] = {}

         bin = start / 100000
-        if not bin in self.ranges[chrom]:
+        if bin not in self.ranges[chrom]:
             self.ranges[chrom][bin] = []
         self.ranges[chrom][bin].insert(0, (start, end, strand))

         if (end / 100000) != bin:
             for bin in xrange(bin + 1, (end / 100000) + 1):
-                if not bin in self.ranges[chrom]:
+                if bin not in self.ranges[chrom]:
                     self.ranges[chrom][bin] = []
                 self.ranges[chrom][bin].insert(0, (start, end, strand))

@@ -27,10 +28,10 @@
         '''
         returns (region, is_reverse_orientation)
         '''
-        if not chrom in self.ranges:
+        if chrom not in self.ranges:
             return None, False
         bin = pos / 100000
-        if not bin in self.ranges[chrom]:
+        if bin not in self.ranges[chrom]:
             return None, False
         for start, end, r_strand in self.ranges[chrom][bin]:
             if pos >= start and pos <= end:
@@ -54,7 +55,7 @@
         promoters = RangeMatch('promoter')

         for gene in gtf.genes:
-            if valid_chroms and not gene.chrom in valid_chroms:
+            if valid_chroms and gene.chrom not in valid_chroms:
                 continue
             if gene.strand == '+':
                 promoters.add_range(gene.chrom, gene.strand, gene.start - 2000, gene.start)
@@ -79,7 +80,6 @@
                     exons.add_range(gene.chrom, gene.strand, start, end)
                     last_end = end

-
         self.regions.append(coding)
         self.regions.append(utr_5)
         self.regions.append(utr_3)
@@ -106,7 +106,7 @@
     def add_read(self, read, chrom):
         if read.is_unmapped:
             return
-
+
         if self.only_first_fragment and read.is_paired and not read.is_read1:
             return

@@ -163,4 +163,4 @@
                     tag = '%s/%s' % (tag, endtag)

         if not tag:
-            tag = 'intergenic'
\ No newline at end of file
+            tag = 'intergenic'
--- a/ngsutils/support/stats.py	Sun Dec 06 05:03:12 2015 -0500
+++ b/ngsutils/support/stats.py	Sun Nov 27 15:01:21 2016 -0500
@@ -2,8 +2,10 @@
 various statistical tests and methods...
 '''
 import math
+
 from ngsutils.support import memoize

+
 def median(vals):
     '''
     >>> median([1,2,3])
@@ -106,6 +108,7 @@

     return (mean, stdev)

+
 @memoize
 def poisson_prob(x, mean):
     '''
@@ -120,15 +123,16 @@
         0.33277427882095645
     '''
     acc = 0.0
-    for i in xrange(1, x+1):
+    for i in xrange(1, x + 1):
         acc += poisson_func(i, mean)
     return acc

+
 @memoize
 def poisson_func(mu, lambd):
     '''
         This is the Poisson distribution function
-
+
         p(mu) = (lambda^mu * e^(-lambda)) / (mu!)

         mu is a count
@@ -156,6 +160,7 @@
     '''
     return math.factorial(x)

+
 if __name__ == '__main__':
     import doctest
     doctest.testmod()
--- a/tool_dependencies.xml	Sun Dec 06 05:03:12 2015 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-    <package name="pysam" version="0.7.7">
-        <repository changeset_revision="0a5141bdf9d0" name="package_pysam_0_7_7" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
-    </package>
-</tool_dependency>