Mercurial > repos > peterjc > seq_select_by_id

--- a/tools/seq_select_by_id/README.rst	Wed May 13 10:56:29 2015 -0400
+++ b/tools/seq_select_by_id/README.rst	Thu May 11 06:26:05 2017 -0400
@@ -1,7 +1,7 @@
 Galaxy tool to select FASTA, QUAL, FASTQ or SFF sequences by ID
 ===============================================================

-This tool is copyright 2011-2015 by Peter Cock, The James Hutton Institute
+This tool is copyright 2011-2017 by Peter Cock, The James Hutton Institute
 (formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved.
 See the licence text below.

@@ -85,6 +85,10 @@
         - Reorder XML elements (internal change only).
         - Planemo for Tool Shed upload (``.shed.yml``, internal change only).
         - Quote filenames in case of spaces (internal change only).
+v0.0.12 - Python style changes (internal change only).
+        - Use ``<command detect_errors="aggressive">`` (internal change only).
+        - Depends on Biopython 1.67 via legacy Tool Shed package or bioconda.
+        - Python 3 compatible print function.
 ======= ======================================================================


@@ -101,17 +105,17 @@
 Planemo commands (which requires you have set your Tool Shed access details in
 ``~/.planemo.yml`` and that you have access rights on the Tool Shed)::

-    $ planemo shed_upload --shed_target testtoolshed --check_diff ~/repositories/pico_galaxy/tools/seq_select_by_id/
+    $ planemo shed_update -t testtoolshed --check_diff tools/seq_select_by_id/
     ...

 or::

-    $ planemo shed_upload --shed_target toolshed --check_diff ~/repositories/pico_galaxy/tools/seq_select_by_id/
+    $ planemo shed_update -t toolshed --check_diff tools/seq_select_by_id/
     ...

 To just build and check the tar ball, use::

-    $ planemo shed_upload --tar_only  ~/repositories/pico_galaxy/tools/seq_select_by_id/
+    $ planemo shed_upload --tar_only tools/seq_select_by_id/
     ...
     $ tar -tzf shed_upload.tar.gz
     test-data/k12_hypothetical.fasta
--- a/tools/seq_select_by_id/seq_select_by_id.py	Wed May 13 10:56:29 2015 -0400
+++ b/tools/seq_select_by_id/seq_select_by_id.py	Thu May 11 06:26:05 2017 -0400
@@ -16,51 +16,50 @@
 molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3.
 http://dx.doi.org/10.1093/bioinformatics/btp163 pmid:19304878.

-This script is copyright 2011-2013 by Peter Cock, The James Hutton Institute UK.
+This script is copyright 2011-2017 by Peter Cock, The James Hutton Institute UK.
 All rights reserved. See accompanying text file for licence details (MIT
 license).
 """
+
+from __future__ import print_function
+
 import sys

-def sys_exit(msg, err=1):
-    sys.stderr.write(msg.rstrip() + "\n")
-    sys.exit(err)
-
 if "-v" in sys.argv or "--version" in sys.argv:
-    print "v0.0.9"
+    print("v0.0.12")
     sys.exit(0)

-#Parse Command Line
+# Parse Command Line
 try:
     tabular_file, col_arg, in_file, seq_format, out_file = sys.argv[1:]
 except ValueError:
-    sys_exit("Expected five arguments, got %i:\n%s" % (len(sys.argv)-1, " ".join(sys.argv)))
+    sys.exit("Expected five arguments, got %i:\n%s" % (len(sys.argv) - 1, " ".join(sys.argv)))
 try:
     if col_arg.startswith("c"):
-        column = int(col_arg[1:])-1
+        column = int(col_arg[1:]) - 1
     else:
-        column = int(col_arg)-1
+        column = int(col_arg) - 1
 except ValueError:
-    sys_exit("Expected column number, got %s" % col_arg)
+    sys.exit("Expected column number, got %s" % col_arg)

 if seq_format == "fastqcssanger":
-    sys_exit("Colorspace FASTQ not supported.")
+    sys.exit("Colorspace FASTQ not supported.")
 elif seq_format.lower() in ["sff", "fastq", "qual", "fasta"]:
     seq_format = seq_format.lower()
 elif seq_format.lower().startswith("fastq"):
-    #We don't care how the qualities are encoded
+    # We don't care how the qualities are encoded
     seq_format = "fastq"
 elif seq_format.lower().startswith("qual"):
-    #We don't care what the scores are
+    # We don't care what the scores are
     seq_format = "qual"
 else:
-    sys_exit("Unrecognised file format %r" % seq_format)
+    sys.exit("Unrecognised file format %r" % seq_format)


 try:
     from Bio import SeqIO
 except ImportError:
-    sys_exit("Biopython 1.54 or later is required")
+    sys.exit("Biopython 1.54 or later is required")


 def parse_ids(tabular_file, col):
@@ -84,25 +83,26 @@
     if warn:
         sys.stderr.write(warn)

-#Index the sequence file.
-#If very big, could use SeqIO.index_db() to avoid memory bottleneck...
+
+# Index the sequence file.
+# If very big, could use SeqIO.index_db() to avoid memory bottleneck...
 records = SeqIO.index(in_file, seq_format)
-print "Indexed %i sequences" % len(records)
+print("Indexed %i sequences" % len(records))

-if seq_format.lower()=="sff":
-    #Special case to try to preserve the XML manifest
+if seq_format.lower() == "sff":
+    # Special case to try to preserve the XML manifest
     try:
-        from Bio.SeqIO.SffIO import SffIterator, SffWriter
+        from Bio.SeqIO.SffIO import SffWriter
     except ImportError:
-        sys_exit("Requires Biopython 1.54 or later")
+        sys.exit("Requires Biopython 1.54 or later")

     try:
         from Bio.SeqIO.SffIO import ReadRocheXmlManifest
     except ImportError:
-        #Prior to Biopython 1.56 this was a private function
+        # Prior to Biopython 1.56 this was a private function
         from Bio.SeqIO.SffIO import _sff_read_roche_index_xml as ReadRocheXmlManifest

-    in_handle = open(in_file, "rb") #must be binary mode!
+    in_handle = open(in_file, "rb")  # must be binary mode!
     try:
         manifest = ReadRocheXmlManifest(in_handle)
     except ValueError:
@@ -112,21 +112,22 @@
     out_handle = open(out_file, "wb")
     writer = SffWriter(out_handle, xml=manifest)
     count = 0
-    #This does have the overhead of parsing into SeqRecord objects,
-    #but doing the header and index at the low level is too fidly.
+    # This does have the overhead of parsing into SeqRecord objects,
+    # but doing the header and index at the low level is too fidly.
+    name = None  # We want the variable to leak from the iterator's scope...
     iterator = (records[name] for name in parse_ids(tabular_file, column))
     try:
         count = writer.write_file(iterator)
     except KeyError, err:
         out_handle.close()
         if name not in records:
-            sys_exit("Identifier %r not found in sequence file" % name)
+            sys.exit("Identifier %r not found in sequence file" % name)
         else:
             raise err
     out_handle.close()
 else:
-    #Avoid overhead of parsing into SeqRecord objects,
-    #just re-use the original formatting from the input file.
+    # Avoid overhead of parsing into SeqRecord objects,
+    # just re-use the original formatting from the input file.
     out_handle = open(out_file, "w")
     count = 0
     for name in parse_ids(tabular_file, column):
@@ -134,8 +135,8 @@
             out_handle.write(records.get_raw(name))
         except KeyError:
             out_handle.close()
-            sys_exit("Identifier %r not found in sequence file" % name)
+            sys.exit("Identifier %r not found in sequence file" % name)
         count += 1
     out_handle.close()

-print "Selected %i sequences by ID" % count
+print("Selected %i sequences by ID" % count)
--- a/tools/seq_select_by_id/seq_select_by_id.xml	Wed May 13 10:56:29 2015 -0400
+++ b/tools/seq_select_by_id/seq_select_by_id.xml	Thu May 11 06:26:05 2017 -0400
@@ -1,17 +1,13 @@
-<tool id="seq_select_by_id" name="Select sequences by ID" version="0.0.11">
+<tool id="seq_select_by_id" name="Select sequences by ID" version="0.0.12">
     <description>from a tabular file</description>
     <requirements>
-        <requirement type="package" version="1.62">biopython</requirement>
-        <requirement type="python-module">Bio</requirement>
+        <requirement type="package" version="1.67">biopython</requirement>
     </requirements>
-    <stdio>
-        <!-- Anything other than zero is an error -->
-        <exit_code range="1:" />
-        <exit_code range=":-1" />
-    </stdio>
-    <version_command interpreter="python">seq_select_by_id.py --version</version_command>
-    <command interpreter="python">
-seq_select_by_id.py "$input_tabular" "$column" "$input_file" "$input_file.ext" "$output_file"
+    <version_command>
+python $__tool_directory__/seq_select_by_id.py --version
+    </version_command>
+    <command detect_errors="aggressive">
+python $__tool_directory__/seq_select_by_id.py '$input_tabular' '$column' '$input_file' '$input_file.ext' '$output_file'
     </command>
     <inputs>
         <param name="input_file" type="data" format="fasta,qual,fastq,sff" label="Sequence file to select from" help="FASTA, QUAL, FASTQ, or SFF format." />
--- a/tools/seq_select_by_id/tool_dependencies.xml	Wed May 13 10:56:29 2015 -0400
+++ b/tools/seq_select_by_id/tool_dependencies.xml	Thu May 11 06:26:05 2017 -0400
@@ -1,6 +1,6 @@
 <?xml version="1.0"?>
 <tool_dependency>
-    <package name="biopython" version="1.62">
-        <repository changeset_revision="3e82cbc44886" name="package_biopython_1_62" owner="biopython" toolshed="https://toolshed.g2.bx.psu.edu" />
+    <package name="biopython" version="1.67">
+        <repository changeset_revision="a42f244cce44" name="package_biopython_1_67" owner="biopython" toolshed="https://toolshed.g2.bx.psu.edu" />
     </package>
 </tool_dependency>