Mercurial > repos > peterjc > seq_filter_by_id
changeset 7:fb1313d79396 draft
Uploaded v0.2.5, ignore blank names in tabular files (based on contribution from Gildas Le Corguille)
author | peterjc |
---|---|
date | Fri, 04 Nov 2016 08:11:08 -0400 |
parents | 03e134cae41a |
children | 2d4537dbf0bc |
files | tools/seq_filter_by_id/README.rst tools/seq_filter_by_id/seq_filter_by_id.py tools/seq_filter_by_id/seq_filter_by_id.xml tools/seq_filter_by_id/tool_dependencies.xml |
diffstat | 4 files changed, 14 insertions(+), 9 deletions(-) [+] |
line wrap: on
line diff
--- a/tools/seq_filter_by_id/README.rst Tue May 17 05:59:24 2016 -0400 +++ b/tools/seq_filter_by_id/README.rst Fri Nov 04 08:11:08 2016 -0400 @@ -89,6 +89,8 @@ v0.2.3 - Ignore blank lines in ID file (contributed by Gildas Le Corguillé). - Defensive quoting of filenames etc in the command definition (internal change only). +v0.2.4 - Corrected error message wording. +v0.2.5 - Ignore empty names, common in R output (Gildas Le Corguillé). ======= ======================================================================
--- a/tools/seq_filter_by_id/seq_filter_by_id.py Tue May 17 05:59:24 2016 -0400 +++ b/tools/seq_filter_by_id/seq_filter_by_id.py Fri Nov 04 08:11:08 2016 -0400 @@ -74,7 +74,7 @@ options, args = parser.parse_args() if options.version: - print "v0.2.3" + print "v0.2.5" sys.exit(0) in_file = options.input @@ -93,7 +93,7 @@ if logic not in ["UNION", "INTERSECTION"]: sys.exit("Logic agrument should be 'UNION' or 'INTERSECTION', not %r" % logic) if options.id_list and args: - sys.exit("Cannot accepted IDs via both -t and as tabular files") + sys.exit("Cannot accept IDs via both -t in the command line, and as tabular files") elif not options.id_list and not args: sys.exit("Expected matched pairs of tabular files and columns (or -t given)") if len(args) % 2: @@ -181,7 +181,7 @@ '\r': '__cr__', '\t': '__tc__', '#': '__pd__', - } +} # Read tabular file(s) and record all specified identifiers ids = None # Will be a set @@ -206,15 +206,19 @@ continue parts = line.rstrip("\n").split("\t") for col in columns: - file_ids.add(clean_name(parts[col])) + name = clean_name(parts[col]) + if name: + file_ids.add(name) else: # Single column, special case speed up col = columns[0] for line in handle: - if not line.strip(): #skip empty lines + if not line.strip(): # skip empty lines continue if not line.startswith("#"): - file_ids.add(clean_name(line.rstrip("\n").split("\t")[col])) + name = clean_name(line.rstrip("\n").split("\t")[col]) + if name: + file_ids.add(name) print "Using %i IDs from column %s in tabular file" % (len(file_ids), ", ".join(str(col + 1) for col in columns)) if ids is None: ids = file_ids
--- a/tools/seq_filter_by_id/seq_filter_by_id.xml Tue May 17 05:59:24 2016 -0400 +++ b/tools/seq_filter_by_id/seq_filter_by_id.xml Fri Nov 04 08:11:08 2016 -0400 @@ -1,8 +1,7 @@ -<tool id="seq_filter_by_id" name="Filter sequences by ID" version="0.2.3"> +<tool id="seq_filter_by_id" name="Filter sequences by ID" version="0.2.5"> <description>from a tabular file</description> <requirements> <requirement type="package" version="1.64">biopython</requirement> - <requirement type="python-module">Bio</requirement> </requirements> <stdio> <!-- Anything other than zero is an error -->
--- a/tools/seq_filter_by_id/tool_dependencies.xml Tue May 17 05:59:24 2016 -0400 +++ b/tools/seq_filter_by_id/tool_dependencies.xml Fri Nov 04 08:11:08 2016 -0400 @@ -1,6 +1,6 @@ <?xml version="1.0"?> <tool_dependency> <package name="biopython" version="1.64"> - <repository changeset_revision="b64c8edb7e45" name="package_biopython_1_64" owner="biopython" toolshed="https://toolshed.g2.bx.psu.edu" /> + <repository changeset_revision="0c7526e8ea70" name="package_biopython_1_64" owner="biopython" toolshed="https://toolshed.g2.bx.psu.edu" /> </package> </tool_dependency>