diff tools/seq_filter_by_id/seq_filter_by_id.py @ 9:141612f8c3e3 draft

v0.2.7 Python 3 compatible print etc
author peterjc
date Thu, 11 May 2017 12:18:52 -0400
parents 2d4537dbf0bc
children 4a7d8ad2a983
line wrap: on
line diff
--- a/tools/seq_filter_by_id/seq_filter_by_id.py	Wed May 10 13:18:01 2017 -0400
+++ b/tools/seq_filter_by_id/seq_filter_by_id.py	Thu May 11 12:18:52 2017 -0400
@@ -21,13 +21,15 @@
 molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3.
 http://dx.doi.org/10.1093/bioinformatics/btp163 pmid:19304878.
 
-This script is copyright 2010-2013 by Peter Cock, The James Hutton Institute
+This script is copyright 2010-2017 by Peter Cock, The James Hutton Institute
 (formerly the Scottish Crop Research Institute, SCRI), UK. All rights reserved.
 See accompanying text file for licence details (MIT license).
 
 Use -v or --version to get the version, -h or --help for help.
 """
 
+from __future__ import print_function
+
 import os
 import re
 import sys
@@ -76,7 +78,7 @@
 options, args = parser.parse_args()
 
 if options.version:
-    print "v0.2.5"
+    print("v0.2.7")
     sys.exit(0)
 
 in_file = options.input
@@ -139,6 +141,7 @@
 
 
 def check_white_space(name):
+    """Check identifier for white space, take first word only."""
     parts = name.split(None, 1)
     global name_warn
     if not name_warn and len(parts) > 1:
@@ -222,7 +225,7 @@
                 name = clean_name(line.rstrip("\n").split("\t")[col])
                 if name:
                     file_ids.add(name)
-    print "Using %i IDs from column %s in tabular file" % (len(file_ids), ", ".join(str(col + 1) for col in columns))
+    print("Using %i IDs from column %s in tabular file" % (len(file_ids), ", ".join(str(col + 1) for col in columns)))
     if ids is None:
         ids = file_ids
     if logic == "UNION":
@@ -232,9 +235,9 @@
     handle.close()
 if len(identifiers) > 1:
     if logic == "UNION":
-        print "Have %i IDs combined from %i tabular files" % (len(ids), len(identifiers))
+        print("Have %i IDs combined from %i tabular files" % (len(ids), len(identifiers)))
     else:
-        print "Have %i IDs in common from %i tabular files" % (len(ids), len(identifiers))
+        print("Have %i IDs in common from %i tabular files" % (len(ids), len(identifiers)))
 if name_warn:
     sys.stderr.write(name_warn)
 
@@ -282,7 +285,7 @@
         # Doing the if statement outside the loop for speed
         # (with the downside of three very similar loops).
         if pos_file is not None and neg_file is not None:
-            print "Generating two FASTA files"
+            print("Generating two FASTA files")
             with open(pos_file, "w") as pos_handle:
                 with open(neg_file, "w") as neg_handle:
                     for identifier, record in crude_fasta_iterator(in_handle):
@@ -293,7 +296,7 @@
                             neg_handle.write(record)
                             neg_count += 1
         elif pos_file is not None:
-            print "Generating matching FASTA file"
+            print("Generating matching FASTA file")
             with open(pos_file, "w") as pos_handle:
                 for identifier, record in crude_fasta_iterator(in_handle):
                     if clean_name(identifier) in wanted:
@@ -302,7 +305,7 @@
                     else:
                         neg_count += 1
         else:
-            print "Generating non-matching FASTA file"
+            print("Generating non-matching FASTA file")
             assert neg_file is not None
             with open(neg_file, "w") as neg_handle:
                 for identifier, record in crude_fasta_iterator(in_handle):
@@ -319,10 +322,10 @@
     from Bio.SeqIO.QualityIO import FastqGeneralIterator
     handle = open(in_file, "r")
     if pos_file is not None and neg_file is not None:
-        print "Generating two FASTQ files"
+        print("Generating two FASTQ files")
         positive_handle = open(pos_file, "w")
         negative_handle = open(neg_file, "w")
-        print in_file
+        print(in_file)
         for title, seq, qual in FastqGeneralIterator(handle):
             print("%s --> %s" % (title, clean_name(title.split(None, 1)[0])))
             if clean_name(title.split(None, 1)[0]) in wanted:
@@ -332,14 +335,14 @@
         positive_handle.close()
         negative_handle.close()
     elif pos_file is not None:
-        print "Generating matching FASTQ file"
+        print("Generating matching FASTQ file")
         positive_handle = open(pos_file, "w")
         for title, seq, qual in FastqGeneralIterator(handle):
             if clean_name(title.split(None, 1)[0]) in wanted:
                 positive_handle.write("@%s\n%s\n+\n%s\n" % (title, seq, qual))
         positive_handle.close()
     elif neg_file is not None:
-        print "Generating non-matching FASTQ file"
+        print("Generating non-matching FASTQ file")
         negative_handle = open(neg_file, "w")
         for title, seq, qual in FastqGeneralIterator(handle):
             if clean_name(title.split(None, 1)[0]) not in wanted:
@@ -398,7 +401,7 @@
 elif seq_format.lower() == "fasta":
     # Write filtered FASTA file based on IDs from tabular file
     pos_count, neg_count = fasta_filter(in_file, out_positive_file, out_negative_file, ids)
-    print "%i with and %i without specified IDs" % (pos_count, neg_count)
+    print("%i with and %i without specified IDs" % (pos_count, neg_count))
 elif seq_format.lower().startswith("fastq"):
     # Write filtered FASTQ file based on IDs from tabular file
     fastq_filter(in_file, out_positive_file, out_negative_file, ids)