# HG changeset patch
# User peterjc
# Date 1618611767 0
# Node ID f82868a026eac23be8680578df3b5f77a5456959
# Parent 481b0a925e66adc1602fe1a20487d304552a2b91
"v0.0.7 - long overdue upload to main Tool Shed"
diff -r 481b0a925e66 -r f82868a026ea tools/seq_filter_by_mapping/README.rst
--- a/tools/seq_filter_by_mapping/README.rst Wed May 17 09:24:01 2017 -0400
+++ b/tools/seq_filter_by_mapping/README.rst Fri Apr 16 22:22:47 2021 +0000
@@ -71,6 +71,7 @@
- Use ```` (internal change only).
- Single quote command line arguments (internal change only).
v0.0.6 - Python 3 compatible print function.
+v0.0.7 - Script works on Python 2 and 3 (fixed input file mode)
======= ======================================================================
diff -r 481b0a925e66 -r f82868a026ea tools/seq_filter_by_mapping/seq_filter_by_mapping.py
--- a/tools/seq_filter_by_mapping/seq_filter_by_mapping.py Wed May 17 09:24:01 2017 -0400
+++ b/tools/seq_filter_by_mapping/seq_filter_by_mapping.py Fri Apr 16 22:22:47 2021 +0000
@@ -10,7 +10,7 @@
Cock et al 2009. Biopython: freely available Python tools for computational
molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3.
-http://dx.doi.org/10.1093/bioinformatics/btp163 pmid:19304878.
+https://doi.org/10.1093/bioinformatics/btp163 pmid:19304878.
This script is copyright 2014 by Peter Cock, The James Hutton Institute
(formerly the Scottish Crop Research Institute, SCRI), UK. All rights reserved.
@@ -40,31 +40,57 @@
Multiple SAM/BAM mapping files may be given.
"""
parser = OptionParser(usage=usage)
-parser.add_option('-i', '--input', dest='input',
- default=None, help='Input sequences filename',
- metavar="FILE")
-parser.add_option('-f', '--format', dest='format',
- default=None,
- help='Input sequence format (e.g. fasta, fastq, sff)')
-parser.add_option('-p', '--positive', dest='output_positive',
- default=None,
- help='Output filename for mapping reads',
- metavar="FILE")
-parser.add_option('-n', '--negative', dest='output_negative',
- default=None,
- help='Output filename for non-mapping reads',
- metavar="FILE")
-parser.add_option("-m", "--pair-mode", dest="pair_mode",
- default="lax",
- help="How to treat paired reads (lax or strict, default lax)")
-parser.add_option("-v", "--version", dest="version",
- default=False, action="store_true",
- help="Show version and quit")
+parser.add_option(
+ "-i",
+ "--input",
+ dest="input",
+ default=None,
+ help="Input sequences filename",
+ metavar="FILE",
+)
+parser.add_option(
+ "-f",
+ "--format",
+ dest="format",
+ default=None,
+ help="Input sequence format (e.g. fasta, fastq, sff)",
+)
+parser.add_option(
+ "-p",
+ "--positive",
+ dest="output_positive",
+ default=None,
+ help="Output filename for mapping reads",
+ metavar="FILE",
+)
+parser.add_option(
+ "-n",
+ "--negative",
+ dest="output_negative",
+ default=None,
+ help="Output filename for non-mapping reads",
+ metavar="FILE",
+)
+parser.add_option(
+ "-m",
+ "--pair-mode",
+ dest="pair_mode",
+ default="lax",
+ help="How to treat paired reads (lax or strict, default lax)",
+)
+parser.add_option(
+ "-v",
+ "--version",
+ dest="version",
+ default=False,
+ action="store_true",
+ help="Show version and quit",
+)
options, args = parser.parse_args()
if options.version:
- print("v0.0.6")
+ print("v0.0.7")
sys.exit(0)
in_file = options.input
@@ -114,7 +140,7 @@
if match:
# Use the fact this is a suffix, and regular expression will be
# anchored to the end of the name:
- return name[:match.start()]
+ return name[: match.start()]
else:
# Nothing to do
return name
@@ -128,19 +154,19 @@
assert clean_name("baz.q2") == "baz"
mapped_chars = {
- '>': '__gt__',
- '<': '__lt__',
- "'": '__sq__',
- '"': '__dq__',
- '[': '__ob__',
- ']': '__cb__',
- '{': '__oc__',
- '}': '__cc__',
- '@': '__at__',
- '\n': '__cn__',
- '\r': '__cr__',
- '\t': '__tc__',
- '#': '__pd__',
+ ">": "__gt__",
+ "<": "__lt__",
+ "'": "__sq__",
+ '"': "__dq__",
+ "[": "__ob__",
+ "]": "__cb__",
+ "{": "__oc__",
+ "}": "__cc__",
+ "@": "__at__",
+ "\n": "__cn__",
+ "\r": "__cr__",
+ "\t": "__tc__",
+ "#": "__pd__",
}
@@ -149,21 +175,25 @@
Parses BAM files via call out to samtools view command.
"""
- handle = open(filename, "rb")
- magic = handle.read(4)
+ with open(filename, "rb") as handle:
+ magic = handle.read(4)
+
if magic == b"\x1f\x8b\x08\x04":
# Presumably a BAM file then...
- handle.close()
# Call samtools view, don't need header so no -h added:
- child = subprocess.Popen(["samtools", "view", filename],
- stdin=None,
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE)
+ child = subprocess.Popen(
+ ["samtools", "view", filename],
+ universal_newlines=True,
+ stdin=None,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ )
handle = child.stdout
else:
# Presumably a SAM file...
child = None
- handle.seek(0)
+ # Open in text mode
+ handle = open(filename)
# Handle should now contain SAM records
for line in handle:
# Ignore header lines
@@ -178,7 +208,7 @@
ids.add(qname)
elif pair_mode == "strict":
# For paired reads, require BOTH be mapped.
- if (flag & 0x4):
+ if flag & 0x4:
# This is unmapped, ignore it
pass
elif not (flag & 0x1):
@@ -192,8 +222,11 @@
stdout, stderr = child.communicate()
assert child.returncode is not None
if child.returncode:
- msg = "Error %i from 'samtools view %s'\n%s" % (child.returncode,
- filename, stderr)
+ msg = "Error %i from 'samtools view %s'\n%s" % (
+ child.returncode,
+ filename,
+ stderr,
+ )
sys.exit(msg.strip(), child.returncode)
else:
handle.close()
@@ -211,7 +244,7 @@
def crude_fasta_iterator(handle):
- """Yields tuples, record ID and the full record as a string."""
+ """Yield tuples, record ID and the full record as a string."""
while True:
line = handle.readline()
if line == "":
@@ -222,8 +255,7 @@
no_id_warned = False
while True:
if line[0] != ">":
- raise ValueError(
- "Records in Fasta files should start with '>' character")
+ raise ValueError("Records in Fasta files should start with '>' character")
try:
id = line[1:].split(None, 1)[0]
except IndexError:
@@ -288,6 +320,7 @@
def fastq_filter(in_file, pos_file, neg_file, wanted):
"""FASTQ filter."""
from Bio.SeqIO.QualityIO import FastqGeneralIterator
+
pos_count = neg_count = 0
handle = open(in_file, "r")
if pos_file is not None and neg_file is not None:
@@ -355,13 +388,17 @@
out_handle = open(pos_file, "wb")
writer = SffWriter(out_handle, xml=manifest)
in_handle.seek(0) # start again after getting manifest
- pos_count = writer.write_file(rec for rec in SffIterator(in_handle) if clean_name(rec.id) in wanted)
+ pos_count = writer.write_file(
+ rec for rec in SffIterator(in_handle) if clean_name(rec.id) in wanted
+ )
out_handle.close()
if neg_file is not None:
out_handle = open(neg_file, "wb")
writer = SffWriter(out_handle, xml=manifest)
in_handle.seek(0) # start again
- neg_count = writer.write_file(rec for rec in SffIterator(in_handle) if clean_name(rec.id) not in wanted)
+ neg_count = writer.write_file(
+ rec for rec in SffIterator(in_handle) if clean_name(rec.id) not in wanted
+ )
out_handle.close()
# And we're done
in_handle.close()
@@ -377,7 +414,9 @@
else:
sys.exit("Unsupported file type %r" % seq_format)
-pos_count, neg_count = sequence_filter(in_file, out_positive_file, out_negative_file, ids)
+pos_count, neg_count = sequence_filter(
+ in_file, out_positive_file, out_negative_file, ids
+)
print("%i mapped and %i unmapped reads." % (pos_count, neg_count))
fraction = float(pos_count) * 100.0 / float(pos_count + neg_count)
print("In total %i reads, of which %0.1f%% mapped." % (pos_count + neg_count, fraction))
diff -r 481b0a925e66 -r f82868a026ea tools/seq_filter_by_mapping/seq_filter_by_mapping.xml
--- a/tools/seq_filter_by_mapping/seq_filter_by_mapping.xml Wed May 17 09:24:01 2017 -0400
+++ b/tools/seq_filter_by_mapping/seq_filter_by_mapping.xml Fri Apr 16 22:22:47 2021 +0000
@@ -1,4 +1,4 @@
-
+
from SAM/BAM file
biopython
@@ -21,7 +21,7 @@
-
+
@@ -33,12 +33,12 @@
-
+
-
-
+ -->
@@ -50,19 +50,19 @@
-
+
-
+
-
+
@@ -97,7 +97,7 @@
Cock et al (2009). Biopython: freely available Python tools for computational
molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3.
-http://dx.doi.org/10.1093/bioinformatics/btp163 pmid:19304878.
+https://doi.org/10.1093/bioinformatics/btp163 pmid:19304878.
This tool is available to install into other Galaxy Instances via the Galaxy
Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/seq_filter_by_mapping
diff -r 481b0a925e66 -r f82868a026ea tools/seq_filter_by_mapping/tool_dependencies.xml
--- a/tools/seq_filter_by_mapping/tool_dependencies.xml Wed May 17 09:24:01 2017 -0400
+++ b/tools/seq_filter_by_mapping/tool_dependencies.xml Fri Apr 16 22:22:47 2021 +0000
@@ -1,9 +1,9 @@
-
+
-
+
-
+
-
+
\ No newline at end of file