# HG changeset patch # User peterjc # Date 1616071737 0 # Node ID 84e483325b0422012b9311f07571014d7dd1815a # Parent c84f12187af92968a14a317515212b8f4eb451c4 "make_nr v0.0.2" diff -r c84f12187af9 -r 84e483325b04 test-data/empty.fasta diff -r c84f12187af9 -r 84e483325b04 tools/make_nr/README.rst --- a/tools/make_nr/README.rst Fri Nov 09 11:00:03 2018 -0500 +++ b/tools/make_nr/README.rst Thu Mar 18 12:48:57 2021 +0000 @@ -83,7 +83,9 @@ ======= ====================================================================== Version Changes ------- ---------------------------------------------------------------------- -v0.0.0 - Initial version +v0.0.2 - Fixed bug writing files when there were no duplicates +v0.0.1 - Added option to sort merged IDs, and support for gzipped files +v0.0.0 - Initial version (not published to main Galaxy Tool Shed) ======= ====================================================================== diff -r c84f12187af9 -r 84e483325b04 tools/make_nr/make_nr.py --- a/tools/make_nr/make_nr.py Fri Nov 09 11:00:03 2018 -0500 +++ b/tools/make_nr/make_nr.py Thu Mar 18 12:48:57 2021 +0000 @@ -12,13 +12,14 @@ import gzip import os +import shutil import sys from optparse import OptionParser if "-v" in sys.argv or "--version" in sys.argv: - print("v0.0.1") + print("v0.0.2") sys.exit(0) @@ -45,17 +46,30 @@ """ parser = OptionParser(usage=usage) -parser.add_option("-s", "--sep", dest="sep", - default=";", - help="Separator character for combining identifiers " - "of duplicated records e.g. '|' or ';' (required)") -parser.add_option("-a", "--alphasort", action="store_true", - help="When merging duplicated records sort their " - "identifiers alphabetically before combining them. " - "Default is input file order.") -parser.add_option("-o", "--output", dest="output", - default="/dev/stdout", metavar="FILE", - help="Output filename (defaults to stdout)") +parser.add_option( + "-s", + "--sep", + dest="sep", + default=";", + help="Separator character for combining identifiers " + "of duplicated records e.g. '|' or ';' (required)", +) +parser.add_option( + "-a", + "--alphasort", + action="store_true", + help="When merging duplicated records sort their " + "identifiers alphabetically before combining them. " + "Default is input file order.", +) +parser.add_option( + "-o", + "--output", + dest="output", + default="/dev/stdout", + metavar="FILE", + help="Output filename (defaults to stdout)", +) options, args = parser.parse_args() if not args: @@ -66,7 +80,7 @@ """Open a possibly gzipped text file.""" with open(filename, "rb") as h: magic = h.read(2) - if magic == b'\x1f\x8b': + if magic == b"\x1f\x8b": return gzip.open(filename, "rt") else: return open(filename) @@ -121,13 +135,25 @@ continue # TODO - line wrapping handle.write(">%s\n%s\n" % (title, seq)) - sys.stderr.write("%i unique entries; removed %i duplicates " - "leaving %i representative records\n" - % (unique, len(duplicates), len(representatives))) + sys.stderr.write( + "%i unique entries; removed %i duplicates " + "leaving %i representative records\n" + % (unique, len(duplicates), len(representatives)) + ) + elif len(input_fasta) == 1: + # Single file, no need to even edit titles + shutil.copy(os.path.abspath(input_fasta[0]), output_fasta) + sys.stderr.write("No perfect duplicates in file, %i unique entries\n" % unique) else: - os.symlink(os.path.abspath(input_fasta), output_fasta) - sys.stderr.write("No perfect duplicates in file, %i unique entries\n" - % unique) + with open(output_fasta, "w") as handle: + for f in input_fasta: + with gzip_open(f) as in_handle: + for title, seq in SimpleFastaParser(in_handle): + handle.write(">%s\n%s\n" % (title, seq)) + sys.stderr.write( + "No perfect duplicates in %i files, %i unique entries\n" + % (len(input_fasta), unique) + ) make_nr(args, options.output, options.sep, options.alphasort) diff -r c84f12187af9 -r 84e483325b04 tools/make_nr/make_nr.xml --- a/tools/make_nr/make_nr.xml Fri Nov 09 11:00:03 2018 -0500 +++ b/tools/make_nr/make_nr.xml Thu Mar 18 12:48:57 2021 +0000 @@ -1,4 +1,4 @@ - + by combining duplicated sequences biopython @@ -51,6 +51,14 @@ + + + + + + + + **What it does** diff -r c84f12187af9 -r 84e483325b04 tools/make_nr/tool_dependencies.xml --- a/tools/make_nr/tool_dependencies.xml Fri Nov 09 11:00:03 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file