Mercurial > repos > iuc > khmer_extract_partitions
comparison filter-below-abund.py @ 0:d5a18dd63529 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
author | iuc |
---|---|
date | Wed, 11 Nov 2015 09:47:38 -0500 |
parents | |
children | 18dc7b2d49d9 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d5a18dd63529 |
---|---|
1 #! /usr/bin/env python | |
2 # This file is part of khmer, https://github.com/dib-lab/khmer/, and is | |
3 # Copyright (C) 2011-2015, Michigan State University. | |
4 # Copyright (C) 2015, The Regents of the University of California. | |
5 # | |
6 # Redistribution and use in source and binary forms, with or without | |
7 # modification, are permitted provided that the following conditions are | |
8 # met: | |
9 # | |
10 # * Redistributions of source code must retain the above copyright | |
11 # notice, this list of conditions and the following disclaimer. | |
12 # | |
13 # * Redistributions in binary form must reproduce the above | |
14 # copyright notice, this list of conditions and the following | |
15 # disclaimer in the documentation and/or other materials provided | |
16 # with the distribution. | |
17 # | |
18 # * Neither the name of the Michigan State University nor the names | |
19 # of its contributors may be used to endorse or promote products | |
20 # derived from this software without specific prior written | |
21 # permission. | |
22 # | |
23 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
24 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
25 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
26 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
27 # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
28 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
29 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
30 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
31 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
32 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
33 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
34 # | |
35 # Contact: khmer-project@idyll.org | |
36 from __future__ import print_function | |
37 import sys | |
38 import os | |
39 import khmer | |
40 from khmer.thread_utils import ThreadedSequenceProcessor, verbose_fasta_iter | |
41 | |
42 WORKER_THREADS = 8 | |
43 GROUPSIZE = 100 | |
44 | |
45 CUTOFF = 50 | |
46 | |
47 ### | |
48 | |
49 | |
50 def main(): | |
51 counting_ht = sys.argv[1] | |
52 infiles = sys.argv[2:] | |
53 | |
54 print('file with ht: %s' % counting_ht) | |
55 print('-- settings:') | |
56 print('N THREADS', WORKER_THREADS) | |
57 print('--') | |
58 | |
59 print('making hashtable') | |
60 ht = khmer.load_countgraph(counting_ht) | |
61 K = ht.ksize() | |
62 | |
63 for infile in infiles: | |
64 print('filtering', infile) | |
65 outfile = os.path.basename(infile) + '.below' | |
66 | |
67 outfp = open(outfile, 'w') | |
68 | |
69 def process_fn(record, ht=ht): | |
70 name = record['name'] | |
71 seq = record['sequence'] | |
72 if 'N' in seq: | |
73 return None, None | |
74 | |
75 trim_seq, trim_at = ht.trim_below_abundance(seq, CUTOFF) | |
76 | |
77 if trim_at >= K: | |
78 return name, trim_seq | |
79 | |
80 return None, None | |
81 | |
82 tsp = ThreadedSequenceProcessor(process_fn, WORKER_THREADS, GROUPSIZE) | |
83 | |
84 tsp.start(verbose_fasta_iter(infile), outfp) | |
85 | |
86 if __name__ == '__main__': | |
87 main() |