Galaxy |

Changeset 10:4a7d8ad2a983 (2023-11-30)

Previous changeset 9:141612f8c3e3 (2017-05-11) Next changeset 11:83a19df00eab (2023-11-30)

Commit message:
Bump Biopython dependency

modified:
tools/seq_filter_by_id/README.rst
tools/seq_filter_by_id/seq_filter_by_id.py
tools/seq_filter_by_id/seq_filter_by_id.xml
tools/seq_filter_by_id/tool_dependencies.xml

diff -r 141612f8c3e3 -r 4a7d8ad2a983 tools/seq_filter_by_id/README.rst
--- a/tools/seq_filter_by_id/README.rst Thu May 11 12:18:52 2017 -0400
+++ b/tools/seq_filter_by_id/README.rst Thu Nov 30 09:50:34 2023 +0000

@@ -1,7 +1,7 @@
Galaxy tool to filter FASTA, FASTQ or SFF sequences by ID
=========================================================

-This tool is copyright 2010-2017 by Peter Cock, The James Hutton Institute
+This tool is copyright 2010-2023 by Peter Cock, The James Hutton Institute
(formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved.
See the licence text below.

@@ -76,7 +76,7 @@
v0.0.8  - Simplified XML to apply input format to output data.
v0.2.0  - Can supply ID list as a text parameter (instead of in a file)
         - Using ``optparse`` for the Python command line API.
-        - Advanced option to ignore paired read suffices.
+        - Advanced option to ignore paired read suffixes.
         - Updated dependencies to use Biopython 1.64.
v0.2.1  - Use Biopython instead of Galaxy for FASTQ handling.
         - Tool definition now embeds citation information.
@@ -95,6 +95,7 @@
v0.2.7  - Python 3 compatible print function.
         - Use ``<command detect_errors="aggressive">`` (internal change only).
         - Single quote command line arguments (internal change only).
+v0.2.8  - Bumped Biopython dependency version for Python 3 fixes.
======= ======================================================================

@@ -124,7 +125,7 @@

     $ planemo shed_upload --tar_only tools/seq_filter_by_id/
     ...
-    $ tar -tzf shed_upload.tar.gz
+    $ tar -tzf shed_upload.tar.gz
     test-data/empty_file.dat
     test-data/k12_hypothetical.fasta
     test-data/k12_hypothetical.tabular

diff -r 141612f8c3e3 -r 4a7d8ad2a983 tools/seq_filter_by_id/seq_filter_by_id.py
--- a/tools/seq_filter_by_id/seq_filter_by_id.py Thu May 11 12:18:52 2017 -0400
+++ b/tools/seq_filter_by_id/seq_filter_by_id.py Thu Nov 30 09:50:34 2023 +0000

[

b'@@ -19,7 +19,7 @@\n \n Cock et al 2009. Biopython: freely available Python tools for computational\n molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3.\n-http://dx.doi.org/10.1093/bioinformatics/btp163 pmid:19304878.\n+https://doi.org/10.1093/bioinformatics/btp163 pmid:19304878.\n \n This script is copyright 2010-2017 by Peter Cock, The James Hutton Institute\n (formerly the Scottish Crop Research Institute, SCRI), UK. All rights reserved.\n@@ -49,31 +49,66 @@\n the -t or --text option.\n """\n parser = OptionParser(usage=usage)\n-parser.add_option(\'-i\', \'--input\', dest=\'input\',\n- default=None, help=\'Input sequences filename\',\n- metavar="FILE")\n-parser.add_option(\'-f\', \'--format\', dest=\'format\',\n- default=None,\n- help=\'Input sequence format (e.g. fasta, fastq, sff)\')\n-parser.add_option(\'-t\', \'--text\', dest=\'id_list\',\n- default=None, help="Lists of white space separated IDs (instead of a tabular file)")\n-parser.add_option(\'-p\', \'--positive\', dest=\'output_positive\',\n- default=None,\n- help=\'Output filename for matches\',\n- metavar="FILE")\n-parser.add_option(\'-n\', \'--negative\', dest=\'output_negative\',\n- default=None,\n- help=\'Output filename for non-matches\',\n- metavar="FILE")\n-parser.add_option("-l", "--logic", dest="logic",\n- default="UNION",\n- help="How to combined multiple ID columns (UNION or INTERSECTION)")\n-parser.add_option("-s", "--suffix", dest="suffix",\n- action="store_true",\n- help="Ignore pair-read suffices for matching names")\n-parser.add_option("-v", "--version", dest="version",\n- default=False, action="store_true",\n- help="Show version and quit")\n+parser.add_option(\n+ "-i",\n+ "--input",\n+ dest="input",\n+ default=None,\n+ help="Input sequences filename",\n+ metavar="FILE",\n+)\n+parser.add_option(\n+ "-f",\n+ "--format",\n+ dest="format",\n+ default=None,\n+ help="Input sequence format (e.g. fasta, fastq, sff)",\n+)\n+parser.add_option(\n+ "-t",\n+ "--text",\n+ dest="id_list",\n+ default=None,\n+ help="Lists of white space separated IDs (instead of a tabular file)",\n+)\n+parser.add_option(\n+ "-p",\n+ "--positive",\n+ dest="output_positive",\n+ default=None,\n+ help="Output filename for matches",\n+ metavar="FILE",\n+)\n+parser.add_option(\n+ "-n",\n+ "--negative",\n+ dest="output_negative",\n+ default=None,\n+ help="Output filename for non-matches",\n+ metavar="FILE",\n+)\n+parser.add_option(\n+ "-l",\n+ "--logic",\n+ dest="logic",\n+ default="UNION",\n+ help="How to combined multiple ID columns (UNION or INTERSECTION)",\n+)\n+parser.add_option(\n+ "-s",\n+ "--suffix",\n+ dest="suffix",\n+ action="store_true",\n+ help="Ignore pair-read suffixes for matching names",\n+)\n+parser.add_option(\n+ "-v",\n+ "--version",\n+ dest="version",\n+ default=False,\n+ action="store_true",\n+ help="Show version and quit",\n+)\n \n options, args = parser.parse_args()\n \n@@ -86,7 +121,7 @@\n out_positive_file = options.output_positive\n out_negative_file = options.output_negative\n logic = options.logic\n-drop_suffices = bool(options.suffix)\n+drop_suffixes = bool(options.suffix)\n \n if in_file is None or not os.path.isfile(in_file):\n sys.exit("Missing input file: %r" % in_file)\n@@ -132,9 +167,14 @@\n try:\n columns = [int(arg) - 1 for arg in cols_arg.split(",")]\n except ValueError:\n- sys.exit("Expected list of columns (comma separated integers), got %r" % cols_arg)\n+ sys.exit(\n+ "Expected list of columns (comma separated integers), got %r" % cols_arg\n+ )\n if min(columns) < 0:\n- sys.exit("Expect one-based column numbers (not zero-based counting), got %r" % cols_arg)\n+ sys.exit(\n+ "Expect one-based col'..b' "\\r": "__cr__",\n+ "\\t": "__tc__",\n+ "#": "__pd__",\n }\n \n # Read tabular file(s) and record all specified identifiers\n@@ -225,7 +269,10 @@\n name = clean_name(line.rstrip("\\n").split("\\t")[col])\n if name:\n file_ids.add(name)\n- print("Using %i IDs from column %s in tabular file" % (len(file_ids), ", ".join(str(col + 1) for col in columns)))\n+ print(\n+ "Using %i IDs from column %s in tabular file"\n+ % (len(file_ids), ", ".join(str(col + 1) for col in columns))\n+ )\n if ids is None:\n ids = file_ids\n if logic == "UNION":\n@@ -235,15 +282,19 @@\n handle.close()\n if len(identifiers) > 1:\n if logic == "UNION":\n- print("Have %i IDs combined from %i tabular files" % (len(ids), len(identifiers)))\n+ print(\n+ "Have %i IDs combined from %i tabular files" % (len(ids), len(identifiers))\n+ )\n else:\n- print("Have %i IDs in common from %i tabular files" % (len(ids), len(identifiers)))\n+ print(\n+ "Have %i IDs in common from %i tabular files" % (len(ids), len(identifiers))\n+ )\n if name_warn:\n sys.stderr.write(name_warn)\n \n \n def crude_fasta_iterator(handle):\n- """Yields tuples, record ID and the full record as a string."""\n+ """Parse FASTA file yielding tuples of (name, sequence)."""\n while True:\n line = handle.readline()\n if line == "":\n@@ -254,8 +305,7 @@\n no_id_warned = False\n while True:\n if line[0] != ">":\n- raise ValueError(\n- "Records in Fasta files should start with \'>\' character")\n+ raise ValueError("Records in Fasta files should start with \'>\' character")\n try:\n id = line[1:].split(None, 1)[0]\n except IndexError:\n@@ -320,6 +370,7 @@\n def fastq_filter(in_file, pos_file, neg_file, wanted):\n """FASTQ filter."""\n from Bio.SeqIO.QualityIO import FastqGeneralIterator\n+\n handle = open(in_file, "r")\n if pos_file is not None and neg_file is not None:\n print("Generating two FASTQ files")\n@@ -378,13 +429,17 @@\n out_handle = open(pos_file, "wb")\n writer = SffWriter(out_handle, xml=manifest)\n in_handle.seek(0) # start again after getting manifest\n- pos_count = writer.write_file(rec for rec in SffIterator(in_handle) if clean_name(rec.id) in wanted)\n+ pos_count = writer.write_file(\n+ rec for rec in SffIterator(in_handle) if clean_name(rec.id) in wanted\n+ )\n out_handle.close()\n if neg_file is not None:\n out_handle = open(neg_file, "wb")\n writer = SffWriter(out_handle, xml=manifest)\n in_handle.seek(0) # start again\n- neg_count = writer.write_file(rec for rec in SffIterator(in_handle) if clean_name(rec.id) not in wanted)\n+ neg_count = writer.write_file(\n+ rec for rec in SffIterator(in_handle) if clean_name(rec.id) not in wanted\n+ )\n out_handle.close()\n # And we\'re done\n in_handle.close()\n@@ -395,12 +450,16 @@\n \n if seq_format.lower() == "sff":\n # Now write filtered SFF file based on IDs wanted\n- pos_count, neg_count = sff_filter(in_file, out_positive_file, out_negative_file, ids)\n+ pos_count, neg_count = sff_filter(\n+ in_file, out_positive_file, out_negative_file, ids\n+ )\n # At the time of writing, Galaxy doesn\'t show SFF file read counts,\n # so it is useful to put them in stdout and thus shown in job info.\n elif seq_format.lower() == "fasta":\n # Write filtered FASTA file based on IDs from tabular file\n- pos_count, neg_count = fasta_filter(in_file, out_positive_file, out_negative_file, ids)\n+ pos_count, neg_count = fasta_filter(\n+ in_file, out_positive_file, out_negative_file, ids\n+ )\n print("%i with and %i without specified IDs" % (pos_count, neg_count))\n elif seq_format.lower().startswith("fastq"):\n # Write filtered FASTQ file based on IDs from tabular file\n'

diff -r 141612f8c3e3 -r 4a7d8ad2a983 tools/seq_filter_by_id/seq_filter_by_id.xml
--- a/tools/seq_filter_by_id/seq_filter_by_id.xml Thu May 11 12:18:52 2017 -0400
+++ b/tools/seq_filter_by_id/seq_filter_by_id.xml Thu Nov 30 09:50:34 2023 +0000

@@ -1,7 +1,7 @@
-<tool id="seq_filter_by_id" name="Filter sequences by ID" version="0.2.7">
+<tool id="seq_filter_by_id" name="Filter sequences by ID" version="0.2.8">
     <description>from a tabular file</description>
     <requirements>
-        <requirement type="package" version="1.67">biopython</requirement>
+        <requirement type="package" version="1.81">biopython</requirement>
     </requirements>
     <version_command>
python $__tool_directory__/seq_filter_by_id.py --version
@@ -30,20 +30,20 @@
         <param name="input_file" type="data" format="fasta,fastq,sff" label="Sequence file to be filtered" help="FASTA, FASTQ, or SFF format." />
         <conditional name="id_opts">
             <param name="id_opts_selector" type="select" label="Filter using the ID list from">
-                <option value="tabular" selected="True">tabular file</option>
+                <option value="tabular" selected="true">tabular file</option>
                 <option value="list">provided list</option>
                 
             </param>
             <when value="tabular">
                 <param name="input_tabular" type="data" format="tabular" label="Tabular file containing sequence identifiers"/>
-                <param name="columns" type="data_column" data_ref="input_tabular" multiple="True" numerical="False"
+                <param name="columns" type="data_column" data_ref="input_tabular" multiple="true" numerical="false"
                        label="Column(s) containing sequence identifiers"
                        help="Multi-select list - hold the appropriate key while clicking to select multiple columns">
                     <validator type="no_options" message="Pick at least one column"/>
                 </param>
             </when>
             <when value="list">
-                <param name="id_list" type="text" size="20x80" area="True" format="tabular"
+                <param name="id_list" type="text" size="20x80" area="true" format="tabular"
                        label="List of sequence identifiers (white space separated)"
                        help="You can use both spaces and new lines to separate your identifiers.">
                     <sanitizer>
@@ -69,12 +69,12 @@
         </conditional>
         <conditional name="adv_opts">
             <param name="adv_opts_selector" type="select" label="Advanced Options">
-              <option value="basic" selected="True">Hide Advanced Options</option>
+              <option value="basic" selected="true">Hide Advanced Options</option>
               <option value="advanced">Show Advanced Options</option>
             </param>
             <when value="basic" />
             <when value="advanced">
-                <param name="strip_suffix" type="boolean" value="false" label="Remove typical pair read name suffices when matching identifiers?" help="Will remove suffices including Illumina /1 and /2, Roche 454 .f and .r, and assorted Sanger names like .p* and .q*" />
+                <param name="strip_suffix" type="boolean" value="false" label="Remove typical pair read name suffixes when matching identifiers?" help="Will remove suffixes including Illumina /1 and /2, Roche 454 .f and .r, and assorted Sanger names like .p* and .q*" />
             </when>
         </conditional>
     </inputs>
@@ -128,7 +128,7 @@
             <param name="adv_opts_selector" value="advanced" />
             <param name="strip_suffix" value="true" />
             <output name="output_pos" file="sanger-pairs-mixed.fastq" ftype="fastq" />
-     <output name="output_neg" file="empty_file.dat" ftype="fastq" />
+            <output name="output_neg" file="empty_file.dat" ftype="fastq" />
         </test>
         <test>
             <param name="input_file" value="sanger-pairs-mixed.fastq" ftype="fastq" />
@@ -180,14 +180,14 @@
Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
Galaxy tools and workflows for sequence analysis with applications
in molecular plant pathology. PeerJ 1:e167
-http://dx.doi.org/10.7717/peerj.167
+https://doi.org/10.7717/peerj.167

This tool uses Biopython to read and write SFF files, so you may also wish to
cite the Biopython application note (and Galaxy too of course):

Cock et al (2009). Biopython: freely available Python tools for computational
molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3.
-http://dx.doi.org/10.1093/bioinformatics/btp163 pmid:19304878.
+https://doi.org/10.1093/bioinformatics/btp163 pmid:19304878.

This tool is available to install into other Galaxy Instances via the Galaxy
Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/seq_filter_by_id

diff -r 141612f8c3e3 -r 4a7d8ad2a983 tools/seq_filter_by_id/tool_dependencies.xml
--- a/tools/seq_filter_by_id/tool_dependencies.xml Thu May 11 12:18:52 2017 -0400
+++ b/tools/seq_filter_by_id/tool_dependencies.xml Thu Nov 30 09:50:34 2023 +0000

@@ -1,6 +1,6 @@
-<?xml version="1.0"?>
+<?xml version="1.0" ?>
<tool_dependency>
     <package name="biopython" version="1.67">
-        <repository changeset_revision="a42f244cce44" name="package_biopython_1_67" owner="biopython" toolshed="https://toolshed.g2.bx.psu.edu" />
+        <repository name="package_biopython_1_67" owner="biopython" toolshed="https://toolshed.g2.bx.psu.edu" changeset_revision="a12f73c3b116"/>
     </package>
-</tool_dependency>
+</tool_dependency>
\ No newline at end of file