comparison filter-below-abund.py @ 0:47cea67b74ea draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
author iuc
date Wed, 11 Nov 2015 09:47:17 -0500
parents
children f00d903600fe
comparison
equal deleted inserted replaced
-1:000000000000 0:47cea67b74ea
1 #! /usr/bin/env python
2 # This file is part of khmer, https://github.com/dib-lab/khmer/, and is
3 # Copyright (C) 2011-2015, Michigan State University.
4 # Copyright (C) 2015, The Regents of the University of California.
5 #
6 # Redistribution and use in source and binary forms, with or without
7 # modification, are permitted provided that the following conditions are
8 # met:
9 #
10 # * Redistributions of source code must retain the above copyright
11 # notice, this list of conditions and the following disclaimer.
12 #
13 # * Redistributions in binary form must reproduce the above
14 # copyright notice, this list of conditions and the following
15 # disclaimer in the documentation and/or other materials provided
16 # with the distribution.
17 #
18 # * Neither the name of the Michigan State University nor the names
19 # of its contributors may be used to endorse or promote products
20 # derived from this software without specific prior written
21 # permission.
22 #
23 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27 # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #
35 # Contact: khmer-project@idyll.org
36 from __future__ import print_function
37 import sys
38 import os
39 import khmer
40 from khmer.thread_utils import ThreadedSequenceProcessor, verbose_fasta_iter
41
42 WORKER_THREADS = 8
43 GROUPSIZE = 100
44
45 CUTOFF = 50
46
47 ###
48
49
50 def main():
51 counting_ht = sys.argv[1]
52 infiles = sys.argv[2:]
53
54 print('file with ht: %s' % counting_ht)
55 print('-- settings:')
56 print('N THREADS', WORKER_THREADS)
57 print('--')
58
59 print('making hashtable')
60 ht = khmer.load_countgraph(counting_ht)
61 K = ht.ksize()
62
63 for infile in infiles:
64 print('filtering', infile)
65 outfile = os.path.basename(infile) + '.below'
66
67 outfp = open(outfile, 'w')
68
69 def process_fn(record, ht=ht):
70 name = record['name']
71 seq = record['sequence']
72 if 'N' in seq:
73 return None, None
74
75 trim_seq, trim_at = ht.trim_below_abundance(seq, CUTOFF)
76
77 if trim_at >= K:
78 return name, trim_seq
79
80 return None, None
81
82 tsp = ThreadedSequenceProcessor(process_fn, WORKER_THREADS, GROUPSIZE)
83
84 tsp.start(verbose_fasta_iter(infile), outfp)
85
86 if __name__ == '__main__':
87 main()