# HG changeset patch
# User pjbriggs
# Date 1526470756 14400
# Node ID 4e625d3672ba9b65adace376c8a2098ad60a1af5
# Parent 5e133b7b79a69fd1854e19086d4fdff54a4c5da5
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
diff -r 5e133b7b79a6 -r 4e625d3672ba README.rst
--- a/README.rst Mon Mar 19 06:33:32 2018 -0400
+++ b/README.rst Wed May 16 07:39:16 2018 -0400
@@ -61,6 +61,11 @@
Version Changes
---------- ----------------------------------------------------------------------
+0.02.04.7 - Trap for errors in ``pal_finder_v0.02.04.pl`` resulting in bad
+ ranges being supplied to ``primer3_core`` for some reads via
+ ``PRIMER_PRODUCT_RANGE_SIZE`` (and enable 'bad' reads to be output
+ to a dataset); add new option to use a random subset of reads for
+ microsatellite detection.
0.02.04.6 - Update to get dependencies using ``conda`` when installed from the
toolshed (this removes the explicit dependency on Perl 5.16
introduced in 0.02.04.2, as a result the outputs from the tool are
diff -r 5e133b7b79a6 -r 4e625d3672ba fastq_subset.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/fastq_subset.py Wed May 16 07:39:16 2018 -0400
@@ -0,0 +1,74 @@
+#!/usr/bin/env python
+
+import argparse
+import random
+from Bio.SeqIO.QualityIO import FastqGeneralIterator
+
+def count_reads(fastq):
+ """
+ Count number of reads in a Fastq file
+ """
+ n = 0
+ with open(fastq,'r') as fq:
+ while True:
+ buf = fq.read()
+ n += buf.count('\n')
+ if buf == "": break
+ return n/4
+
+def fastq_subset(fastq_in,fastq_out,indices):
+ """
+ Output a subset of reads from a Fastq file
+
+ The reads to output are specifed by a list
+ of integer indices; only reads at those
+ positions in the input file will be written
+ to the output.
+ """
+ with open(fastq_in,'r') as fq_in:
+ fq_out = open(fastq_out,'w')
+ i = 0
+ for title,seq,qual in FastqGeneralIterator(fq_in):
+ if i in indices:
+ fq_out.write("@%s\n%s\n+\n%s\n" % (title,
+ seq,
+ qual))
+ i += 1
+ fq_out.close()
+
+if __name__ == "__main__":
+
+ p = argparse.ArgumentParser()
+ p.add_argument("fastq_r1")
+ p.add_argument("fastq_r2")
+ p.add_argument("-n",
+ dest="subset_size",
+ default=None,
+ help="subset size")
+ p.add_argument("-s",
+ dest="seed",
+ type=int,
+ default=None,
+ help="seed for random number generator")
+ args = p.parse_args()
+
+ print "Processing fastq pair:"
+ print "\t%s" % args.fastq_r1
+ print "\t%s" % args.fastq_r2
+
+ nreads = count_reads(args.fastq_r1)
+ print "Counted %d reads in %s" % (nreads,args.fastq_r1)
+
+ if args.subset_size is not None:
+ subset_size = float(args.subset_size)
+ if subset_size < 1.0:
+ subset_size = int(nreads*subset_size)
+ else:
+ subset_size = int(subset_size)
+ print "Extracting subset of reads: %s" % subset_size
+ if args.seed is not None:
+ print "Random number generator seed: %d" % args.seed
+ random.seed(args.seed)
+ subset = random.sample(xrange(nreads),subset_size)
+ fastq_subset(args.fastq_r1,"subset_r1.fq",subset)
+ fastq_subset(args.fastq_r2,"subset_r2.fq",subset)
diff -r 5e133b7b79a6 -r 4e625d3672ba pal_finder_macros.xml
--- a/pal_finder_macros.xml Mon Mar 19 06:33:32 2018 -0400
+++ b/pal_finder_macros.xml Wed May 16 07:39:16 2018 -0400
@@ -14,6 +14,7 @@
+
@@ -21,6 +22,44 @@
+
+
+
+
+
+