Mercurial > repos > iuc > khmer_extract_partitions
diff filter-below-abund.py @ 5:518ba4a77274 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/khmer commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
author | iuc |
---|---|
date | Fri, 07 Sep 2018 10:59:19 -0400 |
parents | 7d8138b4a593 |
children |
line wrap: on
line diff
--- a/filter-below-abund.py Sat Jan 21 14:43:04 2017 -0500 +++ b/filter-below-abund.py Fri Sep 07 10:59:19 2018 -0400 @@ -38,11 +38,10 @@ import os import sys -import khmer -from khmer.thread_utils import ThreadedSequenceProcessor, verbose_fasta_iter +import screed +from khmer import Countgraph, ReadParser +from khmer.utils import (broken_paired_reader, write_record) -WORKER_THREADS = 8 -GROUPSIZE = 100 CUTOFF = 50 @@ -51,12 +50,9 @@ infiles = sys.argv[2:] print('file with ht: %s' % counting_ht) - print('-- settings:') - print('N THREADS', WORKER_THREADS) - print('--') print('making hashtable') - ht = khmer.load_countgraph(counting_ht) + ht = Countgraph.load(counting_ht) K = ht.ksize() for infile in infiles: @@ -65,22 +61,18 @@ outfp = open(outfile, 'w') - def process_fn(record, ht=ht): - name = record['name'] - seq = record['sequence'] + paired_iter = broken_paired_reader(ReadParser(infile), min_length=K, + force_single=True) + for n, is_pair, read1, read2 in paired_iter: + name = read1.name + seq = read1.sequence if 'N' in seq: return None, None trim_seq, trim_at = ht.trim_below_abundance(seq, CUTOFF) if trim_at >= K: - return name, trim_seq - - return None, None - - tsp = ThreadedSequenceProcessor(process_fn, WORKER_THREADS, GROUPSIZE) - - tsp.start(verbose_fasta_iter(infile), outfp) + write_record(screed.Record(name=name, sequence=trim_seq), outfp) if __name__ == '__main__':