sample_seqs: tools/sample_seqs/sample

comparison tools/sample_seqs/sample_seqs.py @ 6:31f5701cd2e9 draft

v0.2.4 Depends on Biopython 1.67 via legacy Tool Shed package or bioconda.

author	peterjc
date	Thu, 11 May 2017 07:24:38 -0400
parents	6b71ad5d43fb
children	5f505ed46e16

comparison

equal deleted inserted replaced

-:6b71ad5d43fb
+:31f5701cd2e9
 default=False, action="store_true",
 help="Show version and quit")
 options, args = parser.parse_args()
 if options.version:
-print("v0.2.3")
+print("v0.2.4")
 sys.exit(0)
 try:
 from Bio import SeqIO
 from Bio.SeqIO.QualityIO import FastqGeneralIterator
 sys.stderr.write("Sampling every %ird sequence\n" % N)
 else:
 sys.stderr.write("Sampling every %ith sequence\n" % N)
 def sampler(iterator):
+"""Sample every Nth sequence."""
 global N
 count = 0
 for record in iterator:
 count += 1
 if count % N == 1:
 elif options.percent:
 try:
 percent = float(options.percent) / 100.0
 except ValueError:
 sys.exit("Bad -p percent argument %r" % options.percent)
-if percent <= 0.0 or 1.0 <= percent:
+if not(0.0 <= percent <= 1.0):
 sys.exit("Bad -p percent argument %r" % options.percent)
 sys.stderr.write("Sampling %0.3f%% of sequences\n" % (100.0 * percent))
 def sampler(iterator):
+"""Sample given percentage of sequences."""
 global percent
 count = 0
 taken = 0
 for record in iterator:
 count += 1
 taken += 1
 yield record
 assert taken == N, "Picked %i, wanted %i" % (taken, N)
 else:
 def sampler(iterator):
+"""Sample given number of sequences."""
 # Mimic the percentage sampler, with double check on final count
 global N, total
 # Do we need a floating point fudge factor epsilon?
 # i.e. What if percentage comes out slighty too low, and
 # we could end up missing last few desired sequences?
 while True:
 if line[0] != ">":
 raise ValueError(
 "Records in Fasta files should start with '>' character")
 try:
-id = line[1:].split(None, 1)[0]
+line[1:].split(None, 1)[0]
 except IndexError:
 if not no_id_warned:
 sys.stderr.write("WARNING - Malformed FASTA entry with no identifier\n")
 no_id_warned = True
-id = None
 lines = [line]
 line = handle.readline()
 while True:
 if not line:
 break
 count /= 2
 else:
 count = writer.write_file(iterator_filter(SffIterator(in_handle)))
 return count
 if seq_format == "sff":
 count = sff_filter(in_file, out_file, sampler, interleaved)
 elif seq_format == "fasta":
 count = fasta_filter(in_file, out_file, sampler, interleaved)
 elif seq_format.startswith("fastq"):

Mercurial > repos > peterjc > sample_seqs

comparison tools/sample_seqs/sample_seqs.py @ 6:31f5701cd2e9 draft