Mercurial > repos > iuc > khmer_partition
annotate filter-below-abund.py @ 2:09779cc94f94 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit bfa8bda732de882f6fa5f5375f8468ad229cceea
author | iuc |
---|---|
date | Wed, 09 Nov 2016 05:58:08 -0500 |
parents | 1413823dce99 |
children | eacbaa76d6fc |
rev | line source |
---|---|
0
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
1 #! /usr/bin/env python |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
2 # This file is part of khmer, https://github.com/dib-lab/khmer/, and is |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
3 # Copyright (C) 2011-2015, Michigan State University. |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
4 # Copyright (C) 2015, The Regents of the University of California. |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
5 # |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
6 # Redistribution and use in source and binary forms, with or without |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
7 # modification, are permitted provided that the following conditions are |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
8 # met: |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
9 # |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
10 # * Redistributions of source code must retain the above copyright |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
11 # notice, this list of conditions and the following disclaimer. |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
12 # |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
13 # * Redistributions in binary form must reproduce the above |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
14 # copyright notice, this list of conditions and the following |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
15 # disclaimer in the documentation and/or other materials provided |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
16 # with the distribution. |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
17 # |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
18 # * Neither the name of the Michigan State University nor the names |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
19 # of its contributors may be used to endorse or promote products |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
20 # derived from this software without specific prior written |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
21 # permission. |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
22 # |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
23 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
24 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
25 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
26 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
27 # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
28 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
29 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
30 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
31 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
32 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
33 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
34 # |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
35 # Contact: khmer-project@idyll.org |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
36 from __future__ import print_function |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
37 import sys |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
38 import os |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
39 import khmer |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
40 from khmer.thread_utils import ThreadedSequenceProcessor, verbose_fasta_iter |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
41 |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
42 WORKER_THREADS = 8 |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
43 GROUPSIZE = 100 |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
44 |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
45 CUTOFF = 50 |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
46 |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
47 ### |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
48 |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
49 |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
50 def main(): |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
51 counting_ht = sys.argv[1] |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
52 infiles = sys.argv[2:] |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
53 |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
54 print('file with ht: %s' % counting_ht) |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
55 print('-- settings:') |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
56 print('N THREADS', WORKER_THREADS) |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
57 print('--') |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
58 |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
59 print('making hashtable') |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
60 ht = khmer.load_countgraph(counting_ht) |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
61 K = ht.ksize() |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
62 |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
63 for infile in infiles: |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
64 print('filtering', infile) |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
65 outfile = os.path.basename(infile) + '.below' |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
66 |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
67 outfp = open(outfile, 'w') |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
68 |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
69 def process_fn(record, ht=ht): |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
70 name = record['name'] |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
71 seq = record['sequence'] |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
72 if 'N' in seq: |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
73 return None, None |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
74 |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
75 trim_seq, trim_at = ht.trim_below_abundance(seq, CUTOFF) |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
76 |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
77 if trim_at >= K: |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
78 return name, trim_seq |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
79 |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
80 return None, None |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
81 |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
82 tsp = ThreadedSequenceProcessor(process_fn, WORKER_THREADS, GROUPSIZE) |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
83 |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
84 tsp.start(verbose_fasta_iter(infile), outfp) |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
85 |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
86 if __name__ == '__main__': |
1413823dce99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
87 main() |