Repository 'seq_filter_by_id'
hg clone https://toolshed.g2.bx.psu.edu/repos/peterjc/seq_filter_by_id

Changeset 10:4a7d8ad2a983 (2023-11-30)
Previous changeset 9:141612f8c3e3 (2017-05-11) Next changeset 11:83a19df00eab (2023-11-30)
Commit message:
Bump Biopython dependency
modified:
tools/seq_filter_by_id/README.rst
tools/seq_filter_by_id/seq_filter_by_id.py
tools/seq_filter_by_id/seq_filter_by_id.xml
tools/seq_filter_by_id/tool_dependencies.xml
b
diff -r 141612f8c3e3 -r 4a7d8ad2a983 tools/seq_filter_by_id/README.rst
--- a/tools/seq_filter_by_id/README.rst Thu May 11 12:18:52 2017 -0400
+++ b/tools/seq_filter_by_id/README.rst Thu Nov 30 09:50:34 2023 +0000
b
@@ -1,7 +1,7 @@
 Galaxy tool to filter FASTA, FASTQ or SFF sequences by ID
 =========================================================
 
-This tool is copyright 2010-2017 by Peter Cock, The James Hutton Institute
+This tool is copyright 2010-2023 by Peter Cock, The James Hutton Institute
 (formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved.
 See the licence text below.
 
@@ -76,7 +76,7 @@
 v0.0.8  - Simplified XML to apply input format to output data.
 v0.2.0  - Can supply ID list as a text parameter (instead of in a file)
         - Using ``optparse`` for the Python command line API.
-        - Advanced option to ignore paired read suffices.
+        - Advanced option to ignore paired read suffixes.
         - Updated dependencies to use Biopython 1.64.
 v0.2.1  - Use Biopython instead of Galaxy for FASTQ handling.
         - Tool definition now embeds citation information.
@@ -95,6 +95,7 @@
 v0.2.7  - Python 3 compatible print function.
         - Use ``<command detect_errors="aggressive">`` (internal change only).
         - Single quote command line arguments (internal change only).
+v0.2.8  - Bumped Biopython dependency version for Python 3 fixes.
 ======= ======================================================================
 
 
@@ -124,7 +125,7 @@
 
     $ planemo shed_upload --tar_only tools/seq_filter_by_id/
     ...
-    $ tar -tzf shed_upload.tar.gz 
+    $ tar -tzf shed_upload.tar.gz
     test-data/empty_file.dat
     test-data/k12_hypothetical.fasta
     test-data/k12_hypothetical.tabular
b
diff -r 141612f8c3e3 -r 4a7d8ad2a983 tools/seq_filter_by_id/seq_filter_by_id.py
--- a/tools/seq_filter_by_id/seq_filter_by_id.py Thu May 11 12:18:52 2017 -0400
+++ b/tools/seq_filter_by_id/seq_filter_by_id.py Thu Nov 30 09:50:34 2023 +0000
[
b'@@ -19,7 +19,7 @@\n \n Cock et al 2009. Biopython: freely available Python tools for computational\n molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3.\n-http://dx.doi.org/10.1093/bioinformatics/btp163 pmid:19304878.\n+https://doi.org/10.1093/bioinformatics/btp163 pmid:19304878.\n \n This script is copyright 2010-2017 by Peter Cock, The James Hutton Institute\n (formerly the Scottish Crop Research Institute, SCRI), UK. All rights reserved.\n@@ -49,31 +49,66 @@\n the -t or --text option.\n """\n parser = OptionParser(usage=usage)\n-parser.add_option(\'-i\', \'--input\', dest=\'input\',\n-                  default=None, help=\'Input sequences filename\',\n-                  metavar="FILE")\n-parser.add_option(\'-f\', \'--format\', dest=\'format\',\n-                  default=None,\n-                  help=\'Input sequence format (e.g. fasta, fastq, sff)\')\n-parser.add_option(\'-t\', \'--text\', dest=\'id_list\',\n-                  default=None, help="Lists of white space separated IDs (instead of a tabular file)")\n-parser.add_option(\'-p\', \'--positive\', dest=\'output_positive\',\n-                  default=None,\n-                  help=\'Output filename for matches\',\n-                  metavar="FILE")\n-parser.add_option(\'-n\', \'--negative\', dest=\'output_negative\',\n-                  default=None,\n-                  help=\'Output filename for non-matches\',\n-                  metavar="FILE")\n-parser.add_option("-l", "--logic", dest="logic",\n-                  default="UNION",\n-                  help="How to combined multiple ID columns (UNION or INTERSECTION)")\n-parser.add_option("-s", "--suffix", dest="suffix",\n-                  action="store_true",\n-                  help="Ignore pair-read suffices for matching names")\n-parser.add_option("-v", "--version", dest="version",\n-                  default=False, action="store_true",\n-                  help="Show version and quit")\n+parser.add_option(\n+    "-i",\n+    "--input",\n+    dest="input",\n+    default=None,\n+    help="Input sequences filename",\n+    metavar="FILE",\n+)\n+parser.add_option(\n+    "-f",\n+    "--format",\n+    dest="format",\n+    default=None,\n+    help="Input sequence format (e.g. fasta, fastq, sff)",\n+)\n+parser.add_option(\n+    "-t",\n+    "--text",\n+    dest="id_list",\n+    default=None,\n+    help="Lists of white space separated IDs (instead of a tabular file)",\n+)\n+parser.add_option(\n+    "-p",\n+    "--positive",\n+    dest="output_positive",\n+    default=None,\n+    help="Output filename for matches",\n+    metavar="FILE",\n+)\n+parser.add_option(\n+    "-n",\n+    "--negative",\n+    dest="output_negative",\n+    default=None,\n+    help="Output filename for non-matches",\n+    metavar="FILE",\n+)\n+parser.add_option(\n+    "-l",\n+    "--logic",\n+    dest="logic",\n+    default="UNION",\n+    help="How to combined multiple ID columns (UNION or INTERSECTION)",\n+)\n+parser.add_option(\n+    "-s",\n+    "--suffix",\n+    dest="suffix",\n+    action="store_true",\n+    help="Ignore pair-read suffixes for matching names",\n+)\n+parser.add_option(\n+    "-v",\n+    "--version",\n+    dest="version",\n+    default=False,\n+    action="store_true",\n+    help="Show version and quit",\n+)\n \n options, args = parser.parse_args()\n \n@@ -86,7 +121,7 @@\n out_positive_file = options.output_positive\n out_negative_file = options.output_negative\n logic = options.logic\n-drop_suffices = bool(options.suffix)\n+drop_suffixes = bool(options.suffix)\n \n if in_file is None or not os.path.isfile(in_file):\n     sys.exit("Missing input file: %r" % in_file)\n@@ -132,9 +167,14 @@\n     try:\n         columns = [int(arg) - 1 for arg in cols_arg.split(",")]\n     except ValueError:\n-        sys.exit("Expected list of columns (comma separated integers), got %r" % cols_arg)\n+        sys.exit(\n+            "Expected list of columns (comma separated integers), got %r" % cols_arg\n+        )\n     if min(columns) < 0:\n-        sys.exit("Expect one-based column numbers (not zero-based counting), got %r" % cols_arg)\n+        sys.exit(\n+            "Expect one-based col'..b'  "\\r": "__cr__",\n+    "\\t": "__tc__",\n+    "#": "__pd__",\n }\n \n # Read tabular file(s) and record all specified identifiers\n@@ -225,7 +269,10 @@\n                 name = clean_name(line.rstrip("\\n").split("\\t")[col])\n                 if name:\n                     file_ids.add(name)\n-    print("Using %i IDs from column %s in tabular file" % (len(file_ids), ", ".join(str(col + 1) for col in columns)))\n+    print(\n+        "Using %i IDs from column %s in tabular file"\n+        % (len(file_ids), ", ".join(str(col + 1) for col in columns))\n+    )\n     if ids is None:\n         ids = file_ids\n     if logic == "UNION":\n@@ -235,15 +282,19 @@\n     handle.close()\n if len(identifiers) > 1:\n     if logic == "UNION":\n-        print("Have %i IDs combined from %i tabular files" % (len(ids), len(identifiers)))\n+        print(\n+            "Have %i IDs combined from %i tabular files" % (len(ids), len(identifiers))\n+        )\n     else:\n-        print("Have %i IDs in common from %i tabular files" % (len(ids), len(identifiers)))\n+        print(\n+            "Have %i IDs in common from %i tabular files" % (len(ids), len(identifiers))\n+        )\n if name_warn:\n     sys.stderr.write(name_warn)\n \n \n def crude_fasta_iterator(handle):\n-    """Yields tuples, record ID and the full record as a string."""\n+    """Parse FASTA file yielding tuples of (name, sequence)."""\n     while True:\n         line = handle.readline()\n         if line == "":\n@@ -254,8 +305,7 @@\n     no_id_warned = False\n     while True:\n         if line[0] != ">":\n-            raise ValueError(\n-                "Records in Fasta files should start with \'>\' character")\n+            raise ValueError("Records in Fasta files should start with \'>\' character")\n         try:\n             id = line[1:].split(None, 1)[0]\n         except IndexError:\n@@ -320,6 +370,7 @@\n def fastq_filter(in_file, pos_file, neg_file, wanted):\n     """FASTQ filter."""\n     from Bio.SeqIO.QualityIO import FastqGeneralIterator\n+\n     handle = open(in_file, "r")\n     if pos_file is not None and neg_file is not None:\n         print("Generating two FASTQ files")\n@@ -378,13 +429,17 @@\n         out_handle = open(pos_file, "wb")\n         writer = SffWriter(out_handle, xml=manifest)\n         in_handle.seek(0)  # start again after getting manifest\n-        pos_count = writer.write_file(rec for rec in SffIterator(in_handle) if clean_name(rec.id) in wanted)\n+        pos_count = writer.write_file(\n+            rec for rec in SffIterator(in_handle) if clean_name(rec.id) in wanted\n+        )\n         out_handle.close()\n     if neg_file is not None:\n         out_handle = open(neg_file, "wb")\n         writer = SffWriter(out_handle, xml=manifest)\n         in_handle.seek(0)  # start again\n-        neg_count = writer.write_file(rec for rec in SffIterator(in_handle) if clean_name(rec.id) not in wanted)\n+        neg_count = writer.write_file(\n+            rec for rec in SffIterator(in_handle) if clean_name(rec.id) not in wanted\n+        )\n         out_handle.close()\n     # And we\'re done\n     in_handle.close()\n@@ -395,12 +450,16 @@\n \n if seq_format.lower() == "sff":\n     # Now write filtered SFF file based on IDs wanted\n-    pos_count, neg_count = sff_filter(in_file, out_positive_file, out_negative_file, ids)\n+    pos_count, neg_count = sff_filter(\n+        in_file, out_positive_file, out_negative_file, ids\n+    )\n     # At the time of writing, Galaxy doesn\'t show SFF file read counts,\n     # so it is useful to put them in stdout and thus shown in job info.\n elif seq_format.lower() == "fasta":\n     # Write filtered FASTA file based on IDs from tabular file\n-    pos_count, neg_count = fasta_filter(in_file, out_positive_file, out_negative_file, ids)\n+    pos_count, neg_count = fasta_filter(\n+        in_file, out_positive_file, out_negative_file, ids\n+    )\n     print("%i with and %i without specified IDs" % (pos_count, neg_count))\n elif seq_format.lower().startswith("fastq"):\n     # Write filtered FASTQ file based on IDs from tabular file\n'
b
diff -r 141612f8c3e3 -r 4a7d8ad2a983 tools/seq_filter_by_id/seq_filter_by_id.xml
--- a/tools/seq_filter_by_id/seq_filter_by_id.xml Thu May 11 12:18:52 2017 -0400
+++ b/tools/seq_filter_by_id/seq_filter_by_id.xml Thu Nov 30 09:50:34 2023 +0000
b
@@ -1,7 +1,7 @@
-<tool id="seq_filter_by_id" name="Filter sequences by ID" version="0.2.7">
+<tool id="seq_filter_by_id" name="Filter sequences by ID" version="0.2.8">
     <description>from a tabular file</description>
     <requirements>
-        <requirement type="package" version="1.67">biopython</requirement>
+        <requirement type="package" version="1.81">biopython</requirement>
     </requirements>
     <version_command>
 python $__tool_directory__/seq_filter_by_id.py --version
@@ -30,20 +30,20 @@
         <param name="input_file" type="data" format="fasta,fastq,sff" label="Sequence file to be filtered" help="FASTA, FASTQ, or SFF format." />
         <conditional name="id_opts">
             <param name="id_opts_selector" type="select" label="Filter using the ID list from">
-                <option value="tabular" selected="True">tabular file</option>
+                <option value="tabular" selected="true">tabular file</option>
                 <option value="list">provided list</option>
                 <!-- add UNION or INTERSECTION of multiple tabular files here? -->
             </param>
             <when value="tabular">
                 <param name="input_tabular" type="data" format="tabular" label="Tabular file containing sequence identifiers"/>
-                <param name="columns" type="data_column" data_ref="input_tabular" multiple="True" numerical="False"
+                <param name="columns" type="data_column" data_ref="input_tabular" multiple="true" numerical="false"
                        label="Column(s) containing sequence identifiers"
                        help="Multi-select list - hold the appropriate key while clicking to select multiple columns">
                     <validator type="no_options" message="Pick at least one column"/>
                 </param>
             </when>
             <when value="list">
-                <param name="id_list" type="text" size="20x80" area="True" format="tabular"
+                <param name="id_list" type="text" size="20x80" area="true" format="tabular"
                        label="List of sequence identifiers (white space separated)"
                        help="You can use both spaces and new lines to separate your identifiers.">
                     <sanitizer>
@@ -69,12 +69,12 @@
         </conditional>
         <conditional name="adv_opts">
             <param name="adv_opts_selector" type="select" label="Advanced Options">
-              <option value="basic" selected="True">Hide Advanced Options</option>
+              <option value="basic" selected="true">Hide Advanced Options</option>
               <option value="advanced">Show Advanced Options</option>
             </param>
             <when value="basic" />
             <when value="advanced">
-                <param name="strip_suffix" type="boolean" value="false" label="Remove typical pair read name suffices when matching identifiers?" help="Will remove suffices including Illumina /1 and /2, Roche 454 .f and .r, and assorted Sanger names like .p* and .q*" />
+                <param name="strip_suffix" type="boolean" value="false" label="Remove typical pair read name suffixes when matching identifiers?" help="Will remove suffixes including Illumina /1 and /2, Roche 454 .f and .r, and assorted Sanger names like .p* and .q*" />
             </when>
         </conditional>
     </inputs>
@@ -128,7 +128,7 @@
             <param name="adv_opts_selector" value="advanced" />
             <param name="strip_suffix" value="true" />
             <output name="output_pos" file="sanger-pairs-mixed.fastq" ftype="fastq" />
-     <output name="output_neg" file="empty_file.dat" ftype="fastq" />
+            <output name="output_neg" file="empty_file.dat" ftype="fastq" />
         </test>
         <test>
             <param name="input_file" value="sanger-pairs-mixed.fastq" ftype="fastq" />
@@ -180,14 +180,14 @@
 Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
 Galaxy tools and workflows for sequence analysis with applications
 in molecular plant pathology. PeerJ 1:e167
-http://dx.doi.org/10.7717/peerj.167
+https://doi.org/10.7717/peerj.167
 
 This tool uses Biopython to read and write SFF files, so you may also wish to
 cite the Biopython application note (and Galaxy too of course):
 
 Cock et al (2009). Biopython: freely available Python tools for computational
 molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3.
-http://dx.doi.org/10.1093/bioinformatics/btp163 pmid:19304878.
+https://doi.org/10.1093/bioinformatics/btp163 pmid:19304878.
 
 This tool is available to install into other Galaxy Instances via the Galaxy
 Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/seq_filter_by_id
b
diff -r 141612f8c3e3 -r 4a7d8ad2a983 tools/seq_filter_by_id/tool_dependencies.xml
--- a/tools/seq_filter_by_id/tool_dependencies.xml Thu May 11 12:18:52 2017 -0400
+++ b/tools/seq_filter_by_id/tool_dependencies.xml Thu Nov 30 09:50:34 2023 +0000
b
@@ -1,6 +1,6 @@
-<?xml version="1.0"?>
+<?xml version="1.0" ?>
 <tool_dependency>
     <package name="biopython" version="1.67">
-        <repository changeset_revision="a42f244cce44" name="package_biopython_1_67" owner="biopython" toolshed="https://toolshed.g2.bx.psu.edu" />
+        <repository name="package_biopython_1_67" owner="biopython" toolshed="https://toolshed.g2.bx.psu.edu" changeset_revision="a12f73c3b116"/>
     </package>
-</tool_dependency>
+</tool_dependency>
\ No newline at end of file