Mercurial > repos > peterjc > sample_seqs
diff tools/sample_seqs/sample_seqs.py @ 6:31f5701cd2e9 draft
v0.2.4 Depends on Biopython 1.67 via legacy Tool Shed package or bioconda.
author | peterjc |
---|---|
date | Thu, 11 May 2017 07:24:38 -0400 |
parents | 6b71ad5d43fb |
children | 5f505ed46e16 |
line wrap: on
line diff
--- a/tools/sample_seqs/sample_seqs.py Wed Feb 01 09:39:36 2017 -0500 +++ b/tools/sample_seqs/sample_seqs.py Thu May 11 07:24:38 2017 -0400 @@ -63,7 +63,7 @@ options, args = parser.parse_args() if options.version: - print("v0.2.3") + print("v0.2.4") sys.exit(0) try: @@ -146,6 +146,7 @@ sys.stderr.write("Sampling every %ith sequence\n" % N) def sampler(iterator): + """Sample every Nth sequence.""" global N count = 0 for record in iterator: @@ -157,11 +158,12 @@ percent = float(options.percent) / 100.0 except ValueError: sys.exit("Bad -p percent argument %r" % options.percent) - if percent <= 0.0 or 1.0 <= percent: + if not(0.0 <= percent <= 1.0): sys.exit("Bad -p percent argument %r" % options.percent) sys.stderr.write("Sampling %0.3f%% of sequences\n" % (100.0 * percent)) def sampler(iterator): + """Sample given percentage of sequences.""" global percent count = 0 taken = 0 @@ -215,6 +217,7 @@ assert taken == N, "Picked %i, wanted %i" % (taken, N) else: def sampler(iterator): + """Sample given number of sequences.""" # Mimic the percentage sampler, with double check on final count global N, total # Do we need a floating point fudge factor epsilon? @@ -268,12 +271,11 @@ raise ValueError( "Records in Fasta files should start with '>' character") try: - id = line[1:].split(None, 1)[0] + line[1:].split(None, 1)[0] except IndexError: if not no_id_warned: sys.stderr.write("WARNING - Malformed FASTA entry with no identifier\n") - no_id_warned = True - id = None + no_id_warned = True lines = [line] line = handle.readline() while True: @@ -346,6 +348,7 @@ count = writer.write_file(iterator_filter(SffIterator(in_handle))) return count + if seq_format == "sff": count = sff_filter(in_file, out_file, sampler, interleaved) elif seq_format == "fasta":