Mercurial > repos > peterjc > sample_seqs
annotate tools/sample_seqs/sample_seqs.py @ 10:bdaefd241921 draft default tip
Remove legacy tool_dependencies.xml
| author | peterjc |
|---|---|
| date | Thu, 30 Nov 2023 09:59:08 +0000 |
| parents | 5f505ed46e16 |
| children |
| rev | line source |
|---|---|
| 0 | 1 #!/usr/bin/env python |
| 2 """Sub-sample sequence from a FASTA, FASTQ or SFF file. | |
| 3 | |
| 4 This tool is a short Python script which requires Biopython 1.62 or later | |
| 2 | 5 for sequence parsing. If you use this tool in scientific work leading to a |
| 0 | 6 publication, please cite the Biopython application note: |
| 7 | |
| 8 Cock et al 2009. Biopython: freely available Python tools for computational | |
| 9 molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3. | |
| 9 | 10 https://doi.org/10.1093/bioinformatics/btp163 pmid:19304878. |
| 0 | 11 |
| 9 | 12 This script is copyright 2014-2021 by Peter Cock, The James Hutton Institute |
| 0 | 13 (formerly the Scottish Crop Research Institute, SCRI), UK. All rights reserved. |
| 14 See accompanying text file for licence details (MIT license). | |
| 15 | |
| 2 | 16 Use -v or --version to get the version, -h or --help for help. |
| 0 | 17 """ |
| 18 import os | |
| 19 import sys | |
| 2 | 20 from optparse import OptionParser |
| 0 | 21 |
|
5
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
22 # Parse Command Line |
| 2 | 23 usage = """Use as follows: |
| 24 | |
| 25 $ python sample_seqs.py [options] | |
| 26 | |
| 27 e.g. Sample 20% of the reads: | |
| 28 | |
| 29 $ python sample_seqs.py -i my_seq.fastq -f fastq -p 20.0 -o sample.fastq | |
| 30 | |
| 31 This samples uniformly though the file, rather than at random, and therefore | |
| 32 should be reproducible. | |
|
5
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
33 |
|
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
34 If you have interleaved paired reads, use the --interleaved switch. If |
| 9 | 35 instead you have two matched files (one for each pair), run this tool |
| 36 on each with the same sampling options to make two matched smaller files. | |
| 2 | 37 """ |
| 38 parser = OptionParser(usage=usage) | |
| 9 | 39 parser.add_option( |
| 40 "-i", | |
| 41 "--input", | |
| 42 dest="input", | |
| 43 default=None, | |
| 44 help="Input sequences filename", | |
| 45 metavar="FILE", | |
| 46 ) | |
| 47 parser.add_option( | |
| 48 "-f", | |
| 49 "--format", | |
| 50 dest="format", | |
| 51 default=None, | |
| 52 help="Input sequence format (e.g. fasta, fastq, sff)", | |
| 53 ) | |
| 54 parser.add_option( | |
| 55 "-o", | |
| 56 "--output", | |
| 57 dest="output", | |
| 58 default=None, | |
| 59 help="Output sampled sequenced filename", | |
| 60 metavar="FILE", | |
| 61 ) | |
| 62 parser.add_option( | |
| 63 "-p", | |
| 64 "--percent", | |
| 65 dest="percent", | |
| 66 default=None, | |
| 67 help="Take this percent of the reads", | |
| 68 ) | |
| 69 parser.add_option( | |
| 70 "-n", "--everyn", dest="everyn", default=None, help="Take every N-th read" | |
| 71 ) | |
| 72 parser.add_option( | |
| 73 "-c", "--count", dest="count", default=None, help="Take exactly N reads" | |
| 74 ) | |
| 75 parser.add_option( | |
| 76 "--interleaved", | |
| 77 dest="interleaved", | |
| 78 default=False, | |
| 79 action="store_true", | |
| 80 help="Input is interleaved reads, preserve the pairings", | |
| 81 ) | |
| 82 parser.add_option( | |
| 83 "-v", | |
| 84 "--version", | |
| 85 dest="version", | |
| 86 default=False, | |
| 87 action="store_true", | |
| 88 help="Show version and quit", | |
| 89 ) | |
| 2 | 90 options, args = parser.parse_args() |
| 91 | |
| 92 if options.version: | |
|
6
31f5701cd2e9
v0.2.4 Depends on Biopython 1.67 via legacy Tool Shed package or bioconda.
peterjc
parents:
5
diff
changeset
|
93 print("v0.2.4") |
| 0 | 94 sys.exit(0) |
| 95 | |
|
5
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
96 try: |
|
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
97 from Bio import SeqIO |
|
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
98 from Bio.SeqIO.QualityIO import FastqGeneralIterator |
|
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
99 from Bio.SeqIO.FastaIO import SimpleFastaParser |
|
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
100 from Bio.SeqIO.SffIO import SffIterator, SffWriter |
|
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
101 except ImportError: |
|
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
102 sys.exit("This script requires Biopython.") |
|
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
103 |
| 2 | 104 in_file = options.input |
| 105 out_file = options.output | |
| 106 interleaved = options.interleaved | |
| 107 | |
| 108 if not in_file: | |
|
5
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
109 sys.exit("Require an input filename") |
| 0 | 110 if in_file != "/dev/stdin" and not os.path.isfile(in_file): |
|
5
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
111 sys.exit("Missing input file %r" % in_file) |
| 2 | 112 if not out_file: |
|
5
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
113 sys.exit("Require an output filename") |
| 2 | 114 if not options.format: |
|
5
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
115 sys.exit("Require the sequence format") |
| 2 | 116 seq_format = options.format.lower() |
| 117 | |
| 118 | |
| 119 def count_fasta(filename): | |
| 120 count = 0 | |
| 121 with open(filename) as handle: | |
| 122 for title, seq in SimpleFastaParser(handle): | |
| 123 count += 1 | |
| 124 return count | |
| 125 | |
| 126 | |
| 127 def count_fastq(filename): | |
| 128 count = 0 | |
| 129 with open(filename) as handle: | |
| 130 for title, seq, qual in FastqGeneralIterator(handle): | |
| 131 count += 1 | |
| 132 return count | |
| 133 | |
| 0 | 134 |
| 2 | 135 def count_sff(filename): |
| 136 # If the SFF file has a built in index (which is normal), | |
| 137 # this will be parsed and is the quicker than scanning | |
| 138 # the whole file. | |
| 139 return len(SeqIO.index(filename, "sff")) | |
| 140 | |
| 141 | |
| 142 def count_sequences(filename, format): | |
|
5
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
143 if format == "sff": |
| 2 | 144 return count_sff(filename) |
|
5
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
145 elif format == "fasta": |
| 2 | 146 return count_fasta(filename) |
|
5
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
147 elif format.startswith("fastq"): |
| 2 | 148 return count_fastq(filename) |
| 149 else: | |
|
5
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
150 sys.exit("Unsupported file type %r" % format) |
| 2 | 151 |
| 152 | |
| 153 if options.percent and options.everyn: | |
|
5
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
154 sys.exit("Cannot combine -p and -n options") |
| 2 | 155 elif options.everyn and options.count: |
|
5
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
156 sys.exit("Cannot combine -p and -c options") |
| 2 | 157 elif options.percent and options.count: |
|
5
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
158 sys.exit("Cannot combine -n and -c options") |
| 2 | 159 elif options.everyn: |
| 0 | 160 try: |
| 2 | 161 N = int(options.everyn) |
|
5
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
162 except ValueError: |
|
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
163 sys.exit("Bad -n argument %r" % options.everyn) |
| 0 | 164 if N < 2: |
|
5
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
165 sys.exit("Bad -n argument %r" % options.everyn) |
| 0 | 166 if (N % 10) == 1: |
| 167 sys.stderr.write("Sampling every %ist sequence\n" % N) | |
| 168 elif (N % 10) == 2: | |
| 169 sys.stderr.write("Sampling every %ind sequence\n" % N) | |
| 170 elif (N % 10) == 3: | |
| 171 sys.stderr.write("Sampling every %ird sequence\n" % N) | |
| 172 else: | |
| 173 sys.stderr.write("Sampling every %ith sequence\n" % N) | |
|
5
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
174 |
| 0 | 175 def sampler(iterator): |
|
6
31f5701cd2e9
v0.2.4 Depends on Biopython 1.67 via legacy Tool Shed package or bioconda.
peterjc
parents:
5
diff
changeset
|
176 """Sample every Nth sequence.""" |
| 0 | 177 global N |
| 178 count = 0 | |
| 179 for record in iterator: | |
| 180 count += 1 | |
| 181 if count % N == 1: | |
| 182 yield record | |
| 9 | 183 |
| 2 | 184 elif options.percent: |
| 0 | 185 try: |
| 2 | 186 percent = float(options.percent) / 100.0 |
|
5
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
187 except ValueError: |
|
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
188 sys.exit("Bad -p percent argument %r" % options.percent) |
| 9 | 189 if not (0.0 <= percent <= 1.0): |
|
5
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
190 sys.exit("Bad -p percent argument %r" % options.percent) |
| 0 | 191 sys.stderr.write("Sampling %0.3f%% of sequences\n" % (100.0 * percent)) |
|
5
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
192 |
| 0 | 193 def sampler(iterator): |
|
6
31f5701cd2e9
v0.2.4 Depends on Biopython 1.67 via legacy Tool Shed package or bioconda.
peterjc
parents:
5
diff
changeset
|
194 """Sample given percentage of sequences.""" |
| 0 | 195 global percent |
| 196 count = 0 | |
| 197 taken = 0 | |
| 198 for record in iterator: | |
| 199 count += 1 | |
| 200 if percent * count > taken: | |
| 201 taken += 1 | |
| 202 yield record | |
| 9 | 203 |
| 2 | 204 elif options.count: |
| 205 try: | |
| 206 N = int(options.count) | |
|
5
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
207 except ValueError: |
|
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
208 sys.exit("Bad -c count argument %r" % options.count) |
| 2 | 209 if N < 1: |
|
5
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
210 sys.exit("Bad -c count argument %r" % options.count) |
| 2 | 211 total = count_sequences(in_file, seq_format) |
|
3
02c13ef1a669
Uploaded v0.2.1, fixed missing test file, more tests.
peterjc
parents:
2
diff
changeset
|
212 sys.stderr.write("Input file has %i sequences\n" % total) |
| 2 | 213 if interleaved: |
| 214 # Paired | |
| 215 if total % 2: | |
| 9 | 216 sys.exit( |
| 217 "Paired mode, but input file has an odd number of sequences: %i" % total | |
| 218 ) | |
| 2 | 219 elif N > total // 2: |
| 9 | 220 sys.exit( |
| 221 "Requested %i sequence pairs, " | |
| 222 "but file only has %i pairs (%i sequences)." % (N, total // 2, total) | |
| 223 ) | |
| 2 | 224 total = total // 2 |
| 225 if N == 1: | |
| 226 sys.stderr.write("Sampling just first sequence pair!\n") | |
| 227 elif N == total: | |
| 228 sys.stderr.write("Taking all the sequence pairs\n") | |
| 229 else: | |
| 230 sys.stderr.write("Sampling %i sequence pairs\n" % N) | |
| 231 else: | |
| 232 # Not paired | |
| 233 if total < N: | |
|
5
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
234 sys.exit("Requested %i sequences, but file only has %i." % (N, total)) |
| 2 | 235 if N == 1: |
| 236 sys.stderr.write("Sampling just first sequence!\n") | |
| 237 elif N == total: | |
| 238 sys.stderr.write("Taking all the sequences\n") | |
| 239 else: | |
| 240 sys.stderr.write("Sampling %i sequences\n" % N) | |
| 241 if N == total: | |
| 9 | 242 |
| 2 | 243 def sampler(iterator): |
| 9 | 244 """No-operation dummy filter, taking everything.""" |
| 2 | 245 global N |
| 246 taken = 0 | |
| 247 for record in iterator: | |
| 248 taken += 1 | |
| 249 yield record | |
| 250 assert taken == N, "Picked %i, wanted %i" % (taken, N) | |
| 9 | 251 |
| 2 | 252 else: |
| 9 | 253 |
| 2 | 254 def sampler(iterator): |
|
6
31f5701cd2e9
v0.2.4 Depends on Biopython 1.67 via legacy Tool Shed package or bioconda.
peterjc
parents:
5
diff
changeset
|
255 """Sample given number of sequences.""" |
| 2 | 256 # Mimic the percentage sampler, with double check on final count |
| 257 global N, total | |
| 258 # Do we need a floating point fudge factor epsilon? | |
| 259 # i.e. What if percentage comes out slighty too low, and | |
| 260 # we could end up missing last few desired sequences? | |
| 261 percentage = float(N) / float(total) | |
|
5
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
262 # print("DEBUG: Want %i out of %i sequences/pairs, as a percentage %0.2f" |
| 2 | 263 # % (N, total, percentage * 100.0)) |
| 264 count = 0 | |
| 265 taken = 0 | |
| 266 for record in iterator: | |
| 267 count += 1 | |
| 268 # Do we need the extra upper bound? | |
| 269 if percentage * count > taken and taken < N: | |
| 270 taken += 1 | |
| 271 yield record | |
| 272 elif total - count + 1 <= N - taken: | |
| 273 # remaining records (incuding this one) <= what we still need. | |
| 274 # This is a safey check for floating point edge cases where | |
| 275 # we need to take all remaining sequences to meet target | |
| 276 taken += 1 | |
| 277 yield record | |
| 278 assert taken == N, "Picked %i, wanted %i" % (taken, N) | |
| 9 | 279 |
| 0 | 280 else: |
|
5
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
281 sys.exit("Must use either -n, -p or -c") |
| 2 | 282 |
| 283 | |
| 284 def pair(iterator): | |
| 285 """Quick and dirty pair batched iterator.""" | |
| 286 while True: | |
| 287 a = next(iterator) | |
| 288 b = next(iterator) | |
| 289 if not b: | |
| 290 assert not a, "Odd number of records?" | |
| 291 break | |
| 292 yield (a, b) | |
| 293 | |
| 0 | 294 |
| 295 def raw_fasta_iterator(handle): | |
| 9 | 296 """Yield raw FASTA records as multi-line strings.""" |
| 0 | 297 while True: |
| 298 line = handle.readline() | |
| 299 if line == "": | |
|
5
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
300 return # Premature end of file, or just empty? |
| 0 | 301 if line[0] == ">": |
| 302 break | |
| 303 | |
| 304 no_id_warned = False | |
| 305 while True: | |
| 306 if line[0] != ">": | |
| 9 | 307 raise ValueError("Records in Fasta files should start with '>' character") |
| 0 | 308 try: |
|
6
31f5701cd2e9
v0.2.4 Depends on Biopython 1.67 via legacy Tool Shed package or bioconda.
peterjc
parents:
5
diff
changeset
|
309 line[1:].split(None, 1)[0] |
| 0 | 310 except IndexError: |
| 311 if not no_id_warned: | |
| 312 sys.stderr.write("WARNING - Malformed FASTA entry with no identifier\n") | |
|
6
31f5701cd2e9
v0.2.4 Depends on Biopython 1.67 via legacy Tool Shed package or bioconda.
peterjc
parents:
5
diff
changeset
|
313 no_id_warned = True |
| 0 | 314 lines = [line] |
| 315 line = handle.readline() | |
| 316 while True: | |
| 317 if not line: | |
| 318 break | |
| 319 if line[0] == ">": | |
| 320 break | |
| 321 lines.append(line) | |
| 322 line = handle.readline() | |
| 323 yield "".join(lines) | |
| 324 if not line: | |
|
5
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
325 return # StopIteration |
|
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
326 |
| 0 | 327 |
| 2 | 328 def fasta_filter(in_file, out_file, iterator_filter, inter): |
| 0 | 329 count = 0 |
|
5
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
330 # Galaxy now requires Python 2.5+ so can use with statements, |
| 0 | 331 with open(in_file) as in_handle: |
| 332 with open(out_file, "w") as pos_handle: | |
| 2 | 333 if inter: |
| 334 for r1, r2 in iterator_filter(pair(raw_fasta_iterator(in_handle))): | |
| 335 count += 1 | |
| 336 pos_handle.write(r1) | |
| 337 pos_handle.write(r2) | |
| 338 else: | |
| 339 for record in iterator_filter(raw_fasta_iterator(in_handle)): | |
| 340 count += 1 | |
| 341 pos_handle.write(record) | |
| 0 | 342 return count |
| 343 | |
| 2 | 344 |
| 345 def fastq_filter(in_file, out_file, iterator_filter, inter): | |
| 346 count = 0 | |
| 347 with open(in_file) as in_handle: | |
| 348 with open(out_file, "w") as pos_handle: | |
| 349 if inter: | |
| 350 for r1, r2 in iterator_filter(pair(FastqGeneralIterator(in_handle))): | |
| 351 count += 1 | |
| 352 pos_handle.write("@%s\n%s\n+\n%s\n" % r1) | |
| 353 pos_handle.write("@%s\n%s\n+\n%s\n" % r2) | |
| 354 else: | |
| 9 | 355 for title, seq, qual in iterator_filter( |
| 356 FastqGeneralIterator(in_handle) | |
| 357 ): | |
| 0 | 358 count += 1 |
| 359 pos_handle.write("@%s\n%s\n+\n%s\n" % (title, seq, qual)) | |
| 2 | 360 return count |
| 0 | 361 |
| 2 | 362 |
| 363 def sff_filter(in_file, out_file, iterator_filter, inter): | |
| 0 | 364 count = 0 |
| 365 try: | |
| 366 from Bio.SeqIO.SffIO import ReadRocheXmlManifest | |
| 367 except ImportError: | |
|
5
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
368 # Prior to Biopython 1.56 this was a private function |
| 0 | 369 from Bio.SeqIO.SffIO import _sff_read_roche_index_xml as ReadRocheXmlManifest |
| 370 with open(in_file, "rb") as in_handle: | |
| 371 try: | |
| 372 manifest = ReadRocheXmlManifest(in_handle) | |
| 373 except ValueError: | |
| 374 manifest = None | |
| 375 in_handle.seek(0) | |
| 376 with open(out_file, "wb") as out_handle: | |
| 377 writer = SffWriter(out_handle, xml=manifest) | |
|
5
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
378 in_handle.seek(0) # start again after getting manifest |
| 2 | 379 if inter: |
| 380 from itertools import chain | |
| 9 | 381 |
| 382 count = writer.write_file( | |
| 383 chain.from_iterable(iterator_filter(pair(SffIterator(in_handle)))) | |
| 384 ) | |
| 2 | 385 assert count % 2 == 0, "Odd number of records? %i" % count |
| 386 count /= 2 | |
| 387 else: | |
| 388 count = writer.write_file(iterator_filter(SffIterator(in_handle))) | |
| 0 | 389 return count |
| 390 | |
|
6
31f5701cd2e9
v0.2.4 Depends on Biopython 1.67 via legacy Tool Shed package or bioconda.
peterjc
parents:
5
diff
changeset
|
391 |
| 2 | 392 if seq_format == "sff": |
| 393 count = sff_filter(in_file, out_file, sampler, interleaved) | |
| 394 elif seq_format == "fasta": | |
| 395 count = fasta_filter(in_file, out_file, sampler, interleaved) | |
| 396 elif seq_format.startswith("fastq"): | |
| 397 count = fastq_filter(in_file, out_file, sampler, interleaved) | |
| 0 | 398 else: |
|
5
6b71ad5d43fb
v0.2.3 clarified help, internal cleanup of Python script
peterjc
parents:
3
diff
changeset
|
399 sys.exit("Unsupported file type %r" % seq_format) |
| 0 | 400 |
| 2 | 401 if interleaved: |
| 402 sys.stderr.write("Selected %i pairs\n" % count) | |
| 403 else: | |
| 404 sys.stderr.write("Selected %i records\n" % count) |
