diff tools/fastq_paired_unpaired/fastq_paired_unpaired.py @ 6:f396701fbf32 draft

v0.1.3 Depends on Biopython 1.67 via Tool Shed package or bioconda.
author peterjc
date Wed, 10 May 2017 13:28:59 -0400
parents 09f9f0e29e47
children 8cbc866b72ce
line wrap: on
line diff
--- a/tools/fastq_paired_unpaired/fastq_paired_unpaired.py	Wed Aug 05 11:17:49 2015 -0400
+++ b/tools/fastq_paired_unpaired/fastq_paired_unpaired.py	Wed May 10 13:28:59 2017 -0400
@@ -14,22 +14,18 @@
 
 See accompanying text file for licence details (MIT license).
 """
-import os
+
+import re
 import sys
-import re
 
 if "-v" in sys.argv or "--version" in sys.argv:
-    print("Version 0.1.0")
+    print("Version 0.1.3")
     sys.exit(0)
 
-def sys_exit(msg, err=1):
-   sys.stderr.write(msg.rstrip() + "\n")
-   sys.exit(err)
-
 try:
     from Bio.SeqIO.QualityIO import FastqGeneralIterator
 except ImportError:
-    sys_exit("Biopython missing")
+    sys.exit("Biopython missing")
 
 msg = """Expect either 3 or 4 arguments, all FASTQ filenames.
 
@@ -58,7 +54,7 @@
 same identifier with the fragment at the start of the description, e.g.
 
 @HWI-ST916:79:D04M5ACXX:1:1101:10000:100326 1:N:0:TGNCCA
-@HWI-ST916:79:D04M5ACXX:1:1101:10000:100326 2:N:0:TGNCCA 
+@HWI-ST916:79:D04M5ACXX:1:1101:10000:100326 2:N:0:TGNCCA
 
 Note that this does support multiple forward and reverse reads per template
 (which is quite common with Sanger sequencing), e.g. this which is sorted
@@ -83,28 +79,28 @@
 """
 
 if len(sys.argv) == 5:
-    format, input_fastq, pairs_fastq, singles_fastq = sys.argv[1:]
+    seq_format, input_fastq, pairs_fastq, singles_fastq = sys.argv[1:]
 elif len(sys.argv) == 6:
     pairs_fastq = None
-    format, input_fastq, pairs_f_fastq, pairs_r_fastq, singles_fastq = sys.argv[1:]
+    seq_format, input_fastq, pairs_f_fastq, pairs_r_fastq, singles_fastq = sys.argv[1:]
 else:
-    sys_exit(msg)
+    sys.exit(msg)
 
-format = format.replace("fastq", "").lower()
-if not format:
-    format="sanger" #safe default
-elif format not in ["sanger","solexa","illumina","cssanger"]:
-    sys_exit("Unrecognised format %s" % format)
+seq_format = seq_format.replace("fastq", "").lower()
+if not seq_format:
+    seq_format = "sanger"  # safe default
+elif seq_format not in ["sanger", "solexa", "illumina", "cssanger"]:
+    sys.exit("Unrecognised format %s" % seq_format)
 
-#Cope with three widely used suffix naming convensions,
-#Illumina: /1 or /2
-#Forward/revered: .f or .r
-#Sanger, e.g. .p1k and .q1k
-#See http://staden.sourceforge.net/manual/pregap4_unix_50.html
+# Cope with three widely used suffix naming convensions,
+# Illumina: /1 or /2
+# Forward/revered: .f or .r
+# Sanger, e.g. .p1k and .q1k
+# See http://staden.sourceforge.net/manual/pregap4_unix_50.html
 re_f = re.compile(r"(/1|\.f|\.[sfp]\d\w*)$")
 re_r = re.compile(r"(/2|\.r|\.[rq]\d\w*)$")
 
-#assert re_f.match("demo/1")
+# assert re_f.match("demo/1")
 assert re_f.search("demo.f")
 assert re_f.search("demo.s1")
 assert re_f.search("demo.f1k")
@@ -144,7 +140,7 @@
 
 for title, seq, qual in FastqGeneralIterator(in_handle):
     count += 1
-    name = title.split(None,1)[0]
+    name = title.split(None, 1)[0]
     is_forward = False
     suffix = re_f.search(name)
     if suffix:
@@ -220,7 +216,7 @@
     for old in buffered_reads:
         singles_handle.write(FASTQ_TEMPLATE % old)
         singles += 1
-in_handle.close
+in_handle.close()
 singles_handle.close()
 if pairs_fastq:
     pairs_f_handle.close()
@@ -238,4 +234,4 @@
 
 assert count == pairs + singles == forward + reverse + neither, \
     "%i vs %i+%i=%i vs %i+%i+%i=%i" \
-    % (count,pairs,singles,pairs+singles,forward,reverse,neither,forward+reverse+neither)
+    % (count, pairs, singles, pairs + singles, forward, reverse, neither, forward + reverse + neither)