Mercurial > repos > peterjc > make_nr
changeset 1:84e483325b04 draft
"make_nr v0.0.2"
author | peterjc |
---|---|
date | Thu, 18 Mar 2021 12:48:57 +0000 |
parents | c84f12187af9 |
children | 0c71cd1cd99a |
files | test-data/empty.fasta tools/make_nr/README.rst tools/make_nr/make_nr.py tools/make_nr/make_nr.xml tools/make_nr/tool_dependencies.xml |
diffstat | 4 files changed, 57 insertions(+), 27 deletions(-) [+] |
line wrap: on
line diff
--- a/tools/make_nr/README.rst Fri Nov 09 11:00:03 2018 -0500 +++ b/tools/make_nr/README.rst Thu Mar 18 12:48:57 2021 +0000 @@ -83,7 +83,9 @@ ======= ====================================================================== Version Changes ------- ---------------------------------------------------------------------- -v0.0.0 - Initial version +v0.0.2 - Fixed bug writing files when there were no duplicates +v0.0.1 - Added option to sort merged IDs, and support for gzipped files +v0.0.0 - Initial version (not published to main Galaxy Tool Shed) ======= ======================================================================
--- a/tools/make_nr/make_nr.py Fri Nov 09 11:00:03 2018 -0500 +++ b/tools/make_nr/make_nr.py Thu Mar 18 12:48:57 2021 +0000 @@ -12,13 +12,14 @@ import gzip import os +import shutil import sys from optparse import OptionParser if "-v" in sys.argv or "--version" in sys.argv: - print("v0.0.1") + print("v0.0.2") sys.exit(0) @@ -45,17 +46,30 @@ """ parser = OptionParser(usage=usage) -parser.add_option("-s", "--sep", dest="sep", - default=";", - help="Separator character for combining identifiers " - "of duplicated records e.g. '|' or ';' (required)") -parser.add_option("-a", "--alphasort", action="store_true", - help="When merging duplicated records sort their " - "identifiers alphabetically before combining them. " - "Default is input file order.") -parser.add_option("-o", "--output", dest="output", - default="/dev/stdout", metavar="FILE", - help="Output filename (defaults to stdout)") +parser.add_option( + "-s", + "--sep", + dest="sep", + default=";", + help="Separator character for combining identifiers " + "of duplicated records e.g. '|' or ';' (required)", +) +parser.add_option( + "-a", + "--alphasort", + action="store_true", + help="When merging duplicated records sort their " + "identifiers alphabetically before combining them. " + "Default is input file order.", +) +parser.add_option( + "-o", + "--output", + dest="output", + default="/dev/stdout", + metavar="FILE", + help="Output filename (defaults to stdout)", +) options, args = parser.parse_args() if not args: @@ -66,7 +80,7 @@ """Open a possibly gzipped text file.""" with open(filename, "rb") as h: magic = h.read(2) - if magic == b'\x1f\x8b': + if magic == b"\x1f\x8b": return gzip.open(filename, "rt") else: return open(filename) @@ -121,13 +135,25 @@ continue # TODO - line wrapping handle.write(">%s\n%s\n" % (title, seq)) - sys.stderr.write("%i unique entries; removed %i duplicates " - "leaving %i representative records\n" - % (unique, len(duplicates), len(representatives))) + sys.stderr.write( + "%i unique entries; removed %i duplicates " + "leaving %i representative records\n" + % (unique, len(duplicates), len(representatives)) + ) + elif len(input_fasta) == 1: + # Single file, no need to even edit titles + shutil.copy(os.path.abspath(input_fasta[0]), output_fasta) + sys.stderr.write("No perfect duplicates in file, %i unique entries\n" % unique) else: - os.symlink(os.path.abspath(input_fasta), output_fasta) - sys.stderr.write("No perfect duplicates in file, %i unique entries\n" - % unique) + with open(output_fasta, "w") as handle: + for f in input_fasta: + with gzip_open(f) as in_handle: + for title, seq in SimpleFastaParser(in_handle): + handle.write(">%s\n%s\n" % (title, seq)) + sys.stderr.write( + "No perfect duplicates in %i files, %i unique entries\n" + % (len(input_fasta), unique) + ) make_nr(args, options.output, options.sep, options.alphasort)
--- a/tools/make_nr/make_nr.xml Fri Nov 09 11:00:03 2018 -0500 +++ b/tools/make_nr/make_nr.xml Thu Mar 18 12:48:57 2021 +0000 @@ -1,4 +1,4 @@ -<tool id="make_nr" name="Make FASTA non-redundant" version="0.0.1"> +<tool id="make_nr" name="Make FASTA non-redundant" version="0.0.2"> <description>by combining duplicated sequences</description> <requirements> <requirement type="package" version="1.67">biopython</requirement> @@ -51,6 +51,14 @@ <param name="alphasort" value="-a"/> <output name="output" file="deduplicate.sortids.fasta" ftype="fasta"/> </test> + <test> + <param name="input" value="empty.fasta" ftype="fasta"/> + <output name="output" file="empty.fasta" ftype="fasta"/> + </test> + <test> + <param name="input" value="empty.fasta,empty.fasta" ftype="fasta"/> + <output name="output" file="empty.fasta" ftype="fasta"/> + </test> </tests> <help> **What it does**
--- a/tools/make_nr/tool_dependencies.xml Fri Nov 09 11:00:03 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -<?xml version="1.0" ?> -<tool_dependency> - <package name="biopython" version="1.67"> - <repository changeset_revision="a12f73c3b116" name="package_biopython_1_67" owner="biopython" toolshed="https://toolshed.g2.bx.psu.edu"/> - </package> -</tool_dependency> \ No newline at end of file