Mercurial > repos > peterjc > sample_seqs

diff tools/sample_seqs/sample_seqs.py @ 6:31f5701cd2e9 draft
v0.2.4 Depends on Biopython 1.67 via legacy Tool Shed package or bioconda.
author: peterjc
date: Thu, 11 May 2017 07:24:38 -0400
parents: 6b71ad5d43fb
children: 5f505ed46e16
--- a/tools/sample_seqs/sample_seqs.py	Wed Feb 01 09:39:36 2017 -0500
+++ b/tools/sample_seqs/sample_seqs.py	Thu May 11 07:24:38 2017 -0400
@@ -63,7 +63,7 @@
 options, args = parser.parse_args()
 
 if options.version:
-    print("v0.2.3")
+    print("v0.2.4")
     sys.exit(0)
 
 try:
@@ -146,6 +146,7 @@
         sys.stderr.write("Sampling every %ith sequence\n" % N)
 
     def sampler(iterator):
+        """Sample every Nth sequence."""
         global N
         count = 0
         for record in iterator:
@@ -157,11 +158,12 @@
         percent = float(options.percent) / 100.0
     except ValueError:
         sys.exit("Bad -p percent argument %r" % options.percent)
-    if percent <= 0.0 or 1.0 <= percent:
+    if not(0.0 <= percent <= 1.0):
         sys.exit("Bad -p percent argument %r" % options.percent)
     sys.stderr.write("Sampling %0.3f%% of sequences\n" % (100.0 * percent))
 
     def sampler(iterator):
+        """Sample given percentage of sequences."""
         global percent
         count = 0
         taken = 0
@@ -215,6 +217,7 @@
             assert taken == N, "Picked %i, wanted %i" % (taken, N)
     else:
         def sampler(iterator):
+            """Sample given number of sequences."""
             # Mimic the percentage sampler, with double check on final count
             global N, total
             # Do we need a floating point fudge factor epsilon?
@@ -268,12 +271,11 @@
             raise ValueError(
                 "Records in Fasta files should start with '>' character")
         try:
-            id = line[1:].split(None, 1)[0]
+            line[1:].split(None, 1)[0]
         except IndexError:
             if not no_id_warned:
                 sys.stderr.write("WARNING - Malformed FASTA entry with no identifier\n")
-        no_id_warned = True
-        id = None
+                no_id_warned = True
         lines = [line]
         line = handle.readline()
         while True:
@@ -346,6 +348,7 @@
                 count = writer.write_file(iterator_filter(SffIterator(in_handle)))
     return count
 
+
 if seq_format == "sff":
     count = sff_filter(in_file, out_file, sampler, interleaved)
 elif seq_format == "fasta":
author	peterjc
date	Thu, 11 May 2017 07:24:38 -0400
parents	6b71ad5d43fb
children	5f505ed46e16