Mercurial > repos > peterjc > seq_filter_by_id
changeset 9:141612f8c3e3 draft
v0.2.7 Python 3 compatible print etc
author | peterjc |
---|---|
date | Thu, 11 May 2017 12:18:52 -0400 |
parents | 2d4537dbf0bc |
children | 4a7d8ad2a983 |
files | tools/seq_filter_by_id/README.rst tools/seq_filter_by_id/seq_filter_by_id.py tools/seq_filter_by_id/seq_filter_by_id.xml |
diffstat | 3 files changed, 30 insertions(+), 27 deletions(-) [+] |
line wrap: on
line diff
--- a/tools/seq_filter_by_id/README.rst Wed May 10 13:18:01 2017 -0400 +++ b/tools/seq_filter_by_id/README.rst Thu May 11 12:18:52 2017 -0400 @@ -92,6 +92,9 @@ v0.2.4 - Corrected error message wording. v0.2.5 - Ignore empty names, common in R output (Gildas Le Corguillé). v0.2.6 - Depends on Biopython 1.67 via legacy Tool Shed package or bioconda. +v0.2.7 - Python 3 compatible print function. + - Use ``<command detect_errors="aggressive">`` (internal change only). + - Single quote command line arguments (internal change only). ======= ======================================================================
--- a/tools/seq_filter_by_id/seq_filter_by_id.py Wed May 10 13:18:01 2017 -0400 +++ b/tools/seq_filter_by_id/seq_filter_by_id.py Thu May 11 12:18:52 2017 -0400 @@ -21,13 +21,15 @@ molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3. http://dx.doi.org/10.1093/bioinformatics/btp163 pmid:19304878. -This script is copyright 2010-2013 by Peter Cock, The James Hutton Institute +This script is copyright 2010-2017 by Peter Cock, The James Hutton Institute (formerly the Scottish Crop Research Institute, SCRI), UK. All rights reserved. See accompanying text file for licence details (MIT license). Use -v or --version to get the version, -h or --help for help. """ +from __future__ import print_function + import os import re import sys @@ -76,7 +78,7 @@ options, args = parser.parse_args() if options.version: - print "v0.2.5" + print("v0.2.7") sys.exit(0) in_file = options.input @@ -139,6 +141,7 @@ def check_white_space(name): + """Check identifier for white space, take first word only.""" parts = name.split(None, 1) global name_warn if not name_warn and len(parts) > 1: @@ -222,7 +225,7 @@ name = clean_name(line.rstrip("\n").split("\t")[col]) if name: file_ids.add(name) - print "Using %i IDs from column %s in tabular file" % (len(file_ids), ", ".join(str(col + 1) for col in columns)) + print("Using %i IDs from column %s in tabular file" % (len(file_ids), ", ".join(str(col + 1) for col in columns))) if ids is None: ids = file_ids if logic == "UNION": @@ -232,9 +235,9 @@ handle.close() if len(identifiers) > 1: if logic == "UNION": - print "Have %i IDs combined from %i tabular files" % (len(ids), len(identifiers)) + print("Have %i IDs combined from %i tabular files" % (len(ids), len(identifiers))) else: - print "Have %i IDs in common from %i tabular files" % (len(ids), len(identifiers)) + print("Have %i IDs in common from %i tabular files" % (len(ids), len(identifiers))) if name_warn: sys.stderr.write(name_warn) @@ -282,7 +285,7 @@ # Doing the if statement outside the loop for speed # (with the downside of three very similar loops). if pos_file is not None and neg_file is not None: - print "Generating two FASTA files" + print("Generating two FASTA files") with open(pos_file, "w") as pos_handle: with open(neg_file, "w") as neg_handle: for identifier, record in crude_fasta_iterator(in_handle): @@ -293,7 +296,7 @@ neg_handle.write(record) neg_count += 1 elif pos_file is not None: - print "Generating matching FASTA file" + print("Generating matching FASTA file") with open(pos_file, "w") as pos_handle: for identifier, record in crude_fasta_iterator(in_handle): if clean_name(identifier) in wanted: @@ -302,7 +305,7 @@ else: neg_count += 1 else: - print "Generating non-matching FASTA file" + print("Generating non-matching FASTA file") assert neg_file is not None with open(neg_file, "w") as neg_handle: for identifier, record in crude_fasta_iterator(in_handle): @@ -319,10 +322,10 @@ from Bio.SeqIO.QualityIO import FastqGeneralIterator handle = open(in_file, "r") if pos_file is not None and neg_file is not None: - print "Generating two FASTQ files" + print("Generating two FASTQ files") positive_handle = open(pos_file, "w") negative_handle = open(neg_file, "w") - print in_file + print(in_file) for title, seq, qual in FastqGeneralIterator(handle): print("%s --> %s" % (title, clean_name(title.split(None, 1)[0]))) if clean_name(title.split(None, 1)[0]) in wanted: @@ -332,14 +335,14 @@ positive_handle.close() negative_handle.close() elif pos_file is not None: - print "Generating matching FASTQ file" + print("Generating matching FASTQ file") positive_handle = open(pos_file, "w") for title, seq, qual in FastqGeneralIterator(handle): if clean_name(title.split(None, 1)[0]) in wanted: positive_handle.write("@%s\n%s\n+\n%s\n" % (title, seq, qual)) positive_handle.close() elif neg_file is not None: - print "Generating non-matching FASTQ file" + print("Generating non-matching FASTQ file") negative_handle = open(neg_file, "w") for title, seq, qual in FastqGeneralIterator(handle): if clean_name(title.split(None, 1)[0]) not in wanted: @@ -398,7 +401,7 @@ elif seq_format.lower() == "fasta": # Write filtered FASTA file based on IDs from tabular file pos_count, neg_count = fasta_filter(in_file, out_positive_file, out_negative_file, ids) - print "%i with and %i without specified IDs" % (pos_count, neg_count) + print("%i with and %i without specified IDs" % (pos_count, neg_count)) elif seq_format.lower().startswith("fastq"): # Write filtered FASTQ file based on IDs from tabular file fastq_filter(in_file, out_positive_file, out_negative_file, ids)
--- a/tools/seq_filter_by_id/seq_filter_by_id.xml Wed May 10 13:18:01 2017 -0400 +++ b/tools/seq_filter_by_id/seq_filter_by_id.xml Thu May 11 12:18:52 2017 -0400 @@ -1,22 +1,19 @@ -<tool id="seq_filter_by_id" name="Filter sequences by ID" version="0.2.6"> +<tool id="seq_filter_by_id" name="Filter sequences by ID" version="0.2.7"> <description>from a tabular file</description> <requirements> <requirement type="package" version="1.67">biopython</requirement> </requirements> - <stdio> - <!-- Anything other than zero is an error --> - <exit_code range="1:" /> - <exit_code range=":-1" /> - </stdio> - <version_command interpreter="python">seq_filter_by_id.py --version</version_command> - <command interpreter="python"> -seq_filter_by_id.py -i "$input_file" -f "$input_file.ext" + <version_command> +python $__tool_directory__/seq_filter_by_id.py --version + </version_command> + <command detect_errors="aggressive"> +python $__tool_directory__/seq_filter_by_id.py -i '$input_file' -f '$input_file.ext' #if str($output_choice_cond.output_choice)=="both" - -p "$output_pos" -n "$output_neg" + -p '$output_pos' -n '$output_neg' #elif str($output_choice_cond.output_choice)=="pos" - -p "$output_pos" + -p '$output_pos' #elif str($output_choice_cond.output_choice)=="neg" - -n "$output_neg" + -n '$output_neg' #end if #if str($adv_opts.adv_opts_selector)=="advanced" and $adv_opts.strip_suffix -s @@ -24,9 +21,9 @@ #if str($id_opts.id_opts_selector)=="tabular": ## TODO - Decide on best way to expose multiple ID files via the XML wrapper. ## Single tabular file, can call the Python script with either UNION or INTERSECTION --l UNION "$id_opts.input_tabular" "$id_opts.columns" +-l UNION '$id_opts.input_tabular' '$id_opts.columns' #else --t "$id_opts.id_list" +-t '$id_opts.id_list' #end if </command> <inputs>