Mercurial > repos > iuc > ngsutils_bam_filter
changeset 2:7a68005de299 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ngsutils commit 9a243c616a4a3156347e38fdb5f35863ae5133f9
author | iuc |
---|---|
date | Sun, 27 Nov 2016 15:01:21 -0500 |
parents | 8187a729d9f4 |
children | 9b9ae5963d3c |
files | filter.py macros.xml ngsutils/__init__.pyc ngsutils/bam/__init__.py ngsutils/bam/__init__.pyc ngsutils/bed/__init__.py ngsutils/bed/__init__.pyc ngsutils/support/__init__.py ngsutils/support/__init__.pyc ngsutils/support/bgzip.py ngsutils/support/dbsnp.py ngsutils/support/dbsnp.pyc ngsutils/support/llh.py ngsutils/support/ngs_utils.py ngsutils/support/ngs_utils.pyc ngsutils/support/regions.py ngsutils/support/stats.py tool_dependencies.xml |
diffstat | 18 files changed, 76 insertions(+), 65 deletions(-) [+] |
line wrap: on
line diff
--- a/filter.py Sun Dec 06 05:03:12 2015 -0500 +++ b/filter.py Sun Nov 27 15:01:21 2016 -0500 @@ -1,6 +1,6 @@ #!/usr/bin/env python -## category General -## desc Removes reads from a BAM file based on criteria +# category General +# desc Removes reads from a BAM file based on criteria """ Removes reads from a BAM file based on criteria @@ -17,7 +17,7 @@ -maxlen val Remove reads that are larger than {val} -mapped Keep only mapped reads -unmapped Keep only unmapped reads - -properpair Keep only properly paired reads (both mapped, + -properpair Keep only properly paired reads (both mapped, correct orientation, flag set in BAM) -noproperpair Keep only not-properly paired reads @@ -110,11 +110,11 @@ import os import sys + import pysam -from ngsutils.bam import bam_iter +from ngsutils.bam import bam_iter, read_calc_mismatches, read_calc_mismatches_gen, read_calc_mismatches_ref, read_calc_variations +from ngsutils.bed import BedFile from ngsutils.support.dbsnp import DBSNP -from ngsutils.bam import read_calc_mismatches, read_calc_mismatches_ref, read_calc_mismatches_gen, read_calc_variations -from ngsutils.bed import BedFile def usage(): @@ -207,7 +207,7 @@ for k in del_list: self.rev_pos.remove(k) - if not start_pos in self.rev_pos: + if start_pos not in self.rev_pos: self.rev_pos.add(start_pos) return True return False @@ -344,6 +344,7 @@ def close(self): pass + class IncludeRef(object): def __init__(self, ref): self.ref = ref @@ -645,7 +646,7 @@ class MaskFlag(object): def __init__(self, value): - if type(value) == type(1): + if isinstance(value, int): self.flag = value else: if value[0:2] == '0x': @@ -710,7 +711,7 @@ return "maximum mismatch ratio: %s" % self.val def filter(self, bam, read): - return read_calc_mismatches(read) <= self.ratio*len(read.seq) + return read_calc_mismatches(read) <= self.ratio * len(read.seq) def close(self): pass @@ -826,6 +827,7 @@ return True return False + _criteria = { 'mapped': Mapped, 'unmapped': Unmapped, @@ -895,7 +897,7 @@ failed += 1 if failed_out: failed_out.write('%s\t%s\n' % (read.qname, criterion)) - #outfile.write(read_to_unmapped(read)) + # outfile.write(read_to_unmapped(read)) break if p: passed += 1 @@ -930,6 +932,7 @@ read.mapq = 0 return read + if __name__ == '__main__': infile = None outfile = None
--- a/macros.xml Sun Dec 06 05:03:12 2015 -0500 +++ b/macros.xml Sun Nov 27 15:01:21 2016 -0500 @@ -1,8 +1,8 @@ <macros> - <token name="@WRAPPER_VERSION@">0.5.7</token> + <token name="@WRAPPER_VERSION@">0.5.8</token> <xml name="requirements"> <requirements> - <requirement type="package" version="0.7.7">pysam</requirement> + <requirement type="package" version="0.9.1.4">pysam</requirement> </requirements> </xml> <xml name="version">
--- a/ngsutils/bam/__init__.py Sun Dec 06 05:03:12 2015 -0500 +++ b/ngsutils/bam/__init__.py Sun Nov 27 15:01:21 2016 -0500 @@ -1,12 +1,13 @@ -import sys import os import re +import sys + +import ngsutils.support import pysam try: from eta import ETA except: pass -import ngsutils.support def bam_open(fname, mode='r', *args, **kwargs): @@ -159,7 +160,7 @@ >>> cigar_tostr(((0, 1), (1, 1), (2, 1), (3, 1), (4, 1), (5, 1), (6, 1))) '1M1I1D1N1S1H1P' ''' - + s = '' for op, size in cigar: @@ -230,8 +231,9 @@ md_pos = 0 while md and md_pos < maxlength: - tmp = '0' # preload a zero so that immediate mismatches will be caught - # the zero will have no affect otherwise... + # preload a zero so that immediate mismatches will be caught + # the zero will have no affect otherwise... + tmp = '0' # look for matches while md and md[0] in '0123456789': @@ -625,7 +627,7 @@ cur_pos = frag_end frag_idx += 1 if len(fragments) <= frag_idx: - print 'ERROR converting: ', name, fragments + print 'ERROR converting: ', name, fragments return (chrom, 0, chr_cigar) frag_start, frag_end = fragments[frag_idx] chr_cigar.append((3, frag_start - cur_pos)) @@ -864,7 +866,7 @@ if not read.is_unmapped and read.is_reverse: newread.seq = ngsutils.support.revcomp(read.seq) newread.qual = read.qual[::-1] - else: + else: newread.seq = read.seq newread.qual = read.qual @@ -873,7 +875,6 @@ return newread - if __name__ == '__main__': import doctest doctest.testmod()
--- a/ngsutils/bed/__init__.py Sun Dec 06 05:03:12 2015 -0500 +++ b/ngsutils/bed/__init__.py Sun Nov 27 15:01:21 2016 -0500 @@ -1,4 +1,5 @@ import os + import ngsutils.support.ngs_utils import pysam @@ -33,7 +34,6 @@ raise StopIteration - class BedFile(object): ''' BED files are read in their entirety memory, in a series of bins. Each bin @@ -135,11 +135,11 @@ if strand and strand != region.strand: continue if start <= region.start <= end or start <= region.end <= end: - if not region in buf: + if region not in buf: yield region buf.add(region) elif region.start < start and region.end > end: - if not region in buf: + if region not in buf: yield region buf.add(region)
--- a/ngsutils/support/__init__.py Sun Dec 06 05:03:12 2015 -0500 +++ b/ngsutils/support/__init__.py Sun Nov 27 15:01:21 2016 -0500 @@ -1,13 +1,14 @@ import collections import gzip import os +import re import sys -import re try: from eta import ETA except: pass + class FASTARead(collections.namedtuple('FASTARecord', 'name comment seq')): def __repr__(self): if self.comment: @@ -142,28 +143,30 @@ class Symbolize(object): 'Converts strings to symbols - basically a cache of strings' + def __init__(self): self.__cache = {} def __getitem__(self, k): - if not k in self.__cache: + if k not in self.__cache: self.__cache[k] = k return self.__cache[k] + symbols = Symbolize() _compliments = { -'a': 't', -'A': 'T', -'c': 'g', -'C': 'G', -'g': 'c', -'G': 'C', -'t': 'a', -'T': 'A', -'n': 'n', -'N': 'N' + 'a': 't', + 'A': 'T', + 'c': 'g', + 'C': 'G', + 'g': 'c', + 'G': 'C', + 't': 'a', + 'T': 'A', + 'n': 'n', + 'N': 'N' } @@ -186,6 +189,7 @@ Setup simple binning. Bins are continuous 0->max. Values are added to bins and then means / distributions can be calculated. ''' + def __init__(self): self.bins = [] @@ -214,9 +218,10 @@ return func __cache = {} + def inner(*args, **kwargs): k = (args, tuple(kwargs.iteritems())) - if k not in __cache: + if k not in __cache: __cache[k] = func(*args, **kwargs) return __cache[k]
--- a/ngsutils/support/bgzip.py Sun Dec 06 05:03:12 2015 -0500 +++ b/ngsutils/support/bgzip.py Sun Nov 27 15:01:21 2016 -0500 @@ -6,9 +6,9 @@ will load the bgzip archive and output the block information. ''' -import sys import os import struct +import sys class BGZip(object): @@ -41,7 +41,7 @@ if whence == 0: self.seek(0, 0) - ### read into chunk, if not enough data in chunk, read next chunk + # read into chunk, if not enough data in chunk, read next chunk ret = '' while amount and self.pos < self.fsize: if len(self.cdata) - self.cpos < amount: @@ -133,5 +133,6 @@ self.pos += size return struct.unpack(field_types, self.fileobj.read(size)) + if __name__ == '__main__': print BGZip(sys.argv[1]).dump()
--- a/ngsutils/support/dbsnp.py Sun Dec 06 05:03:12 2015 -0500 +++ b/ngsutils/support/dbsnp.py Sun Nov 27 15:01:21 2016 -0500 @@ -2,9 +2,10 @@ Support package for processing a dbSNP tabix dump from UCSC. ''' -import pysam import collections import sys + +import pysam from ngsutils.support import revcomp @@ -104,7 +105,7 @@ def is_valid_variation(self, chrom, op, pos, seq, verbose=False): for snp in self.fetch(chrom, pos): - if not '/' in snp.observed or snp.clazz not in ['single', 'mixed', 'in-del', 'insertion', 'deletion']: + if '/' not in snp.observed or snp.clazz not in ['single', 'mixed', 'in-del', 'insertion', 'deletion']: # these are odd variations that we can't deal with... (microsatellites, tooLongToDisplay members, etc) continue
--- a/ngsutils/support/llh.py Sun Dec 06 05:03:12 2015 -0500 +++ b/ngsutils/support/llh.py Sun Nov 27 15:01:21 2016 -0500 @@ -1,14 +1,16 @@ ''' Methods for calculating log-likelihoods for nucleotide frequencies ''' +import collections import math -import collections + from ngsutils.support import memoize _default_background = {'A': 0.3, 'T': 0.3, 'C': 0.2, 'G': 0.2} NucleotideLogLikelihood = collections.namedtuple('NucleotideLogLikelihood', 'A C G T pseudo') + @memoize def pseudo_count(N, bg): ''' @@ -49,7 +51,6 @@ return NucleotideLogLikelihood(math.log(freqA / bg['A']), math.log(freqC / bg['C']), math.log(freqG / bg['G']), math.log(freqT / bg['T']), pseudo) - if __name__ == '__main__': import doctest doctest.testmod()
--- a/ngsutils/support/ngs_utils.py Sun Dec 06 05:03:12 2015 -0500 +++ b/ngsutils/support/ngs_utils.py Sun Nov 27 15:01:21 2016 -0500 @@ -1,14 +1,12 @@ #!/usr/bin/env python """ - Common util classes / functions for the NGS project - """ +import collections +import gzip +import os +import re import sys -import os -import gzip -import re -import collections def format_number(n): @@ -106,6 +104,7 @@ A Python 2.6 class to handle 'with' opening of text files that may or may not be gzip compressed. ''' + def __init__(self, fname): self.fname = fname @@ -207,6 +206,7 @@ class memoize(object): 'Simple memoizing decorator to cache results' + def __init__(self, func): self.func = func self.cache = {}
--- a/ngsutils/support/regions.py Sun Dec 06 05:03:12 2015 -0500 +++ b/ngsutils/support/regions.py Sun Nov 27 15:01:21 2016 -0500 @@ -4,22 +4,23 @@ particular genomic coordinate maps to any of those ranges. This is less- efficient than an R-Tree, but easier to code. ''' + def __init__(self, name): self.ranges = {} self.name = name def add_range(self, chrom, strand, start, end): - if not chrom in self.ranges: + if chrom not in self.ranges: self.ranges[chrom] = {} bin = start / 100000 - if not bin in self.ranges[chrom]: + if bin not in self.ranges[chrom]: self.ranges[chrom][bin] = [] self.ranges[chrom][bin].insert(0, (start, end, strand)) if (end / 100000) != bin: for bin in xrange(bin + 1, (end / 100000) + 1): - if not bin in self.ranges[chrom]: + if bin not in self.ranges[chrom]: self.ranges[chrom][bin] = [] self.ranges[chrom][bin].insert(0, (start, end, strand)) @@ -27,10 +28,10 @@ ''' returns (region, is_reverse_orientation) ''' - if not chrom in self.ranges: + if chrom not in self.ranges: return None, False bin = pos / 100000 - if not bin in self.ranges[chrom]: + if bin not in self.ranges[chrom]: return None, False for start, end, r_strand in self.ranges[chrom][bin]: if pos >= start and pos <= end: @@ -54,7 +55,7 @@ promoters = RangeMatch('promoter') for gene in gtf.genes: - if valid_chroms and not gene.chrom in valid_chroms: + if valid_chroms and gene.chrom not in valid_chroms: continue if gene.strand == '+': promoters.add_range(gene.chrom, gene.strand, gene.start - 2000, gene.start) @@ -79,7 +80,6 @@ exons.add_range(gene.chrom, gene.strand, start, end) last_end = end - self.regions.append(coding) self.regions.append(utr_5) self.regions.append(utr_3) @@ -106,7 +106,7 @@ def add_read(self, read, chrom): if read.is_unmapped: return - + if self.only_first_fragment and read.is_paired and not read.is_read1: return @@ -163,4 +163,4 @@ tag = '%s/%s' % (tag, endtag) if not tag: - tag = 'intergenic' \ No newline at end of file + tag = 'intergenic'
--- a/ngsutils/support/stats.py Sun Dec 06 05:03:12 2015 -0500 +++ b/ngsutils/support/stats.py Sun Nov 27 15:01:21 2016 -0500 @@ -2,8 +2,10 @@ various statistical tests and methods... ''' import math + from ngsutils.support import memoize + def median(vals): ''' >>> median([1,2,3]) @@ -106,6 +108,7 @@ return (mean, stdev) + @memoize def poisson_prob(x, mean): ''' @@ -120,15 +123,16 @@ 0.33277427882095645 ''' acc = 0.0 - for i in xrange(1, x+1): + for i in xrange(1, x + 1): acc += poisson_func(i, mean) return acc + @memoize def poisson_func(mu, lambd): ''' This is the Poisson distribution function - + p(mu) = (lambda^mu * e^(-lambda)) / (mu!) mu is a count @@ -156,6 +160,7 @@ ''' return math.factorial(x) + if __name__ == '__main__': import doctest doctest.testmod()
--- a/tool_dependencies.xml Sun Dec 06 05:03:12 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="pysam" version="0.7.7"> - <repository changeset_revision="0a5141bdf9d0" name="package_pysam_0_7_7" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> - </package> -</tool_dependency>