Mercurial > repos > peterjc > seq_filter_by_id

--- a/tools/seq_filter_by_id/README.rst	Wed May 10 13:18:01 2017 -0400
+++ b/tools/seq_filter_by_id/README.rst	Thu May 11 12:18:52 2017 -0400
@@ -92,6 +92,9 @@
 v0.2.4  - Corrected error message wording.
 v0.2.5  - Ignore empty names, common in R output (Gildas Le Corguillé).
 v0.2.6  - Depends on Biopython 1.67 via legacy Tool Shed package or bioconda.
+v0.2.7  - Python 3 compatible print function.
+        - Use ``<command detect_errors="aggressive">`` (internal change only).
+        - Single quote command line arguments (internal change only).
 ======= ======================================================================
--- a/tools/seq_filter_by_id/seq_filter_by_id.py	Wed May 10 13:18:01 2017 -0400
+++ b/tools/seq_filter_by_id/seq_filter_by_id.py	Thu May 11 12:18:52 2017 -0400
@@ -21,13 +21,15 @@
 molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3.
 http://dx.doi.org/10.1093/bioinformatics/btp163 pmid:19304878.

-This script is copyright 2010-2013 by Peter Cock, The James Hutton Institute
+This script is copyright 2010-2017 by Peter Cock, The James Hutton Institute
 (formerly the Scottish Crop Research Institute, SCRI), UK. All rights reserved.
 See accompanying text file for licence details (MIT license).

 Use -v or --version to get the version, -h or --help for help.
 """

+from __future__ import print_function
+
 import os
 import re
 import sys
@@ -76,7 +78,7 @@
 options, args = parser.parse_args()

 if options.version:
-    print "v0.2.5"
+    print("v0.2.7")
     sys.exit(0)

 in_file = options.input
@@ -139,6 +141,7 @@


 def check_white_space(name):
+    """Check identifier for white space, take first word only."""
     parts = name.split(None, 1)
     global name_warn
     if not name_warn and len(parts) > 1:
@@ -222,7 +225,7 @@
                 name = clean_name(line.rstrip("\n").split("\t")[col])
                 if name:
                     file_ids.add(name)
-    print "Using %i IDs from column %s in tabular file" % (len(file_ids), ", ".join(str(col + 1) for col in columns))
+    print("Using %i IDs from column %s in tabular file" % (len(file_ids), ", ".join(str(col + 1) for col in columns)))
     if ids is None:
         ids = file_ids
     if logic == "UNION":
@@ -232,9 +235,9 @@
     handle.close()
 if len(identifiers) > 1:
     if logic == "UNION":
-        print "Have %i IDs combined from %i tabular files" % (len(ids), len(identifiers))
+        print("Have %i IDs combined from %i tabular files" % (len(ids), len(identifiers)))
     else:
-        print "Have %i IDs in common from %i tabular files" % (len(ids), len(identifiers))
+        print("Have %i IDs in common from %i tabular files" % (len(ids), len(identifiers)))
 if name_warn:
     sys.stderr.write(name_warn)

@@ -282,7 +285,7 @@
         # Doing the if statement outside the loop for speed
         # (with the downside of three very similar loops).
         if pos_file is not None and neg_file is not None:
-            print "Generating two FASTA files"
+            print("Generating two FASTA files")
             with open(pos_file, "w") as pos_handle:
                 with open(neg_file, "w") as neg_handle:
                     for identifier, record in crude_fasta_iterator(in_handle):
@@ -293,7 +296,7 @@
                             neg_handle.write(record)
                             neg_count += 1
         elif pos_file is not None:
-            print "Generating matching FASTA file"
+            print("Generating matching FASTA file")
             with open(pos_file, "w") as pos_handle:
                 for identifier, record in crude_fasta_iterator(in_handle):
                     if clean_name(identifier) in wanted:
@@ -302,7 +305,7 @@
                     else:
                         neg_count += 1
         else:
-            print "Generating non-matching FASTA file"
+            print("Generating non-matching FASTA file")
             assert neg_file is not None
             with open(neg_file, "w") as neg_handle:
                 for identifier, record in crude_fasta_iterator(in_handle):
@@ -319,10 +322,10 @@
     from Bio.SeqIO.QualityIO import FastqGeneralIterator
     handle = open(in_file, "r")
     if pos_file is not None and neg_file is not None:
-        print "Generating two FASTQ files"
+        print("Generating two FASTQ files")
         positive_handle = open(pos_file, "w")
         negative_handle = open(neg_file, "w")
-        print in_file
+        print(in_file)
         for title, seq, qual in FastqGeneralIterator(handle):
             print("%s --> %s" % (title, clean_name(title.split(None, 1)[0])))
             if clean_name(title.split(None, 1)[0]) in wanted:
@@ -332,14 +335,14 @@
         positive_handle.close()
         negative_handle.close()
     elif pos_file is not None:
-        print "Generating matching FASTQ file"
+        print("Generating matching FASTQ file")
         positive_handle = open(pos_file, "w")
         for title, seq, qual in FastqGeneralIterator(handle):
             if clean_name(title.split(None, 1)[0]) in wanted:
                 positive_handle.write("@%s\n%s\n+\n%s\n" % (title, seq, qual))
         positive_handle.close()
     elif neg_file is not None:
-        print "Generating non-matching FASTQ file"
+        print("Generating non-matching FASTQ file")
         negative_handle = open(neg_file, "w")
         for title, seq, qual in FastqGeneralIterator(handle):
             if clean_name(title.split(None, 1)[0]) not in wanted:
@@ -398,7 +401,7 @@
 elif seq_format.lower() == "fasta":
     # Write filtered FASTA file based on IDs from tabular file
     pos_count, neg_count = fasta_filter(in_file, out_positive_file, out_negative_file, ids)
-    print "%i with and %i without specified IDs" % (pos_count, neg_count)
+    print("%i with and %i without specified IDs" % (pos_count, neg_count))
 elif seq_format.lower().startswith("fastq"):
     # Write filtered FASTQ file based on IDs from tabular file
     fastq_filter(in_file, out_positive_file, out_negative_file, ids)
--- a/tools/seq_filter_by_id/seq_filter_by_id.xml	Wed May 10 13:18:01 2017 -0400
+++ b/tools/seq_filter_by_id/seq_filter_by_id.xml	Thu May 11 12:18:52 2017 -0400
@@ -1,22 +1,19 @@
-<tool id="seq_filter_by_id" name="Filter sequences by ID" version="0.2.6">
+<tool id="seq_filter_by_id" name="Filter sequences by ID" version="0.2.7">
     <description>from a tabular file</description>
     <requirements>
         <requirement type="package" version="1.67">biopython</requirement>
     </requirements>
-    <stdio>
-        <!-- Anything other than zero is an error -->
-        <exit_code range="1:" />
-        <exit_code range=":-1" />
-    </stdio>
-    <version_command interpreter="python">seq_filter_by_id.py --version</version_command>
-    <command interpreter="python">
-seq_filter_by_id.py -i "$input_file" -f "$input_file.ext"
+    <version_command>
+python $__tool_directory__/seq_filter_by_id.py --version
+    </version_command>
+    <command detect_errors="aggressive">
+python $__tool_directory__/seq_filter_by_id.py -i '$input_file' -f '$input_file.ext'
 #if str($output_choice_cond.output_choice)=="both"
- -p "$output_pos" -n "$output_neg"
+ -p '$output_pos' -n '$output_neg'
 #elif str($output_choice_cond.output_choice)=="pos"
- -p "$output_pos"
+ -p '$output_pos'
 #elif str($output_choice_cond.output_choice)=="neg"
- -n "$output_neg"
+ -n '$output_neg'
 #end if
 #if str($adv_opts.adv_opts_selector)=="advanced" and $adv_opts.strip_suffix
  -s
@@ -24,9 +21,9 @@
 #if str($id_opts.id_opts_selector)=="tabular":
 ## TODO - Decide on best way to expose multiple ID files via the XML wrapper.
 ## Single tabular file, can call the Python script with either UNION or INTERSECTION
--l UNION "$id_opts.input_tabular" "$id_opts.columns"
+-l UNION '$id_opts.input_tabular' '$id_opts.columns'
 #else
--t "$id_opts.id_list"
+-t '$id_opts.id_list'
 #end if
     </command>
     <inputs>