Previous changeset 9:141612f8c3e3 (2017-05-11) Next changeset 11:83a19df00eab (2023-11-30) |
Commit message:
Bump Biopython dependency |
modified:
tools/seq_filter_by_id/README.rst tools/seq_filter_by_id/seq_filter_by_id.py tools/seq_filter_by_id/seq_filter_by_id.xml tools/seq_filter_by_id/tool_dependencies.xml |
b |
diff -r 141612f8c3e3 -r 4a7d8ad2a983 tools/seq_filter_by_id/README.rst --- a/tools/seq_filter_by_id/README.rst Thu May 11 12:18:52 2017 -0400 +++ b/tools/seq_filter_by_id/README.rst Thu Nov 30 09:50:34 2023 +0000 |
b |
@@ -1,7 +1,7 @@ Galaxy tool to filter FASTA, FASTQ or SFF sequences by ID ========================================================= -This tool is copyright 2010-2017 by Peter Cock, The James Hutton Institute +This tool is copyright 2010-2023 by Peter Cock, The James Hutton Institute (formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved. See the licence text below. @@ -76,7 +76,7 @@ v0.0.8 - Simplified XML to apply input format to output data. v0.2.0 - Can supply ID list as a text parameter (instead of in a file) - Using ``optparse`` for the Python command line API. - - Advanced option to ignore paired read suffices. + - Advanced option to ignore paired read suffixes. - Updated dependencies to use Biopython 1.64. v0.2.1 - Use Biopython instead of Galaxy for FASTQ handling. - Tool definition now embeds citation information. @@ -95,6 +95,7 @@ v0.2.7 - Python 3 compatible print function. - Use ``<command detect_errors="aggressive">`` (internal change only). - Single quote command line arguments (internal change only). +v0.2.8 - Bumped Biopython dependency version for Python 3 fixes. ======= ====================================================================== @@ -124,7 +125,7 @@ $ planemo shed_upload --tar_only tools/seq_filter_by_id/ ... - $ tar -tzf shed_upload.tar.gz + $ tar -tzf shed_upload.tar.gz test-data/empty_file.dat test-data/k12_hypothetical.fasta test-data/k12_hypothetical.tabular |
b |
diff -r 141612f8c3e3 -r 4a7d8ad2a983 tools/seq_filter_by_id/seq_filter_by_id.py --- a/tools/seq_filter_by_id/seq_filter_by_id.py Thu May 11 12:18:52 2017 -0400 +++ b/tools/seq_filter_by_id/seq_filter_by_id.py Thu Nov 30 09:50:34 2023 +0000 |
[ |
b'@@ -19,7 +19,7 @@\n \n Cock et al 2009. Biopython: freely available Python tools for computational\n molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3.\n-http://dx.doi.org/10.1093/bioinformatics/btp163 pmid:19304878.\n+https://doi.org/10.1093/bioinformatics/btp163 pmid:19304878.\n \n This script is copyright 2010-2017 by Peter Cock, The James Hutton Institute\n (formerly the Scottish Crop Research Institute, SCRI), UK. All rights reserved.\n@@ -49,31 +49,66 @@\n the -t or --text option.\n """\n parser = OptionParser(usage=usage)\n-parser.add_option(\'-i\', \'--input\', dest=\'input\',\n- default=None, help=\'Input sequences filename\',\n- metavar="FILE")\n-parser.add_option(\'-f\', \'--format\', dest=\'format\',\n- default=None,\n- help=\'Input sequence format (e.g. fasta, fastq, sff)\')\n-parser.add_option(\'-t\', \'--text\', dest=\'id_list\',\n- default=None, help="Lists of white space separated IDs (instead of a tabular file)")\n-parser.add_option(\'-p\', \'--positive\', dest=\'output_positive\',\n- default=None,\n- help=\'Output filename for matches\',\n- metavar="FILE")\n-parser.add_option(\'-n\', \'--negative\', dest=\'output_negative\',\n- default=None,\n- help=\'Output filename for non-matches\',\n- metavar="FILE")\n-parser.add_option("-l", "--logic", dest="logic",\n- default="UNION",\n- help="How to combined multiple ID columns (UNION or INTERSECTION)")\n-parser.add_option("-s", "--suffix", dest="suffix",\n- action="store_true",\n- help="Ignore pair-read suffices for matching names")\n-parser.add_option("-v", "--version", dest="version",\n- default=False, action="store_true",\n- help="Show version and quit")\n+parser.add_option(\n+ "-i",\n+ "--input",\n+ dest="input",\n+ default=None,\n+ help="Input sequences filename",\n+ metavar="FILE",\n+)\n+parser.add_option(\n+ "-f",\n+ "--format",\n+ dest="format",\n+ default=None,\n+ help="Input sequence format (e.g. fasta, fastq, sff)",\n+)\n+parser.add_option(\n+ "-t",\n+ "--text",\n+ dest="id_list",\n+ default=None,\n+ help="Lists of white space separated IDs (instead of a tabular file)",\n+)\n+parser.add_option(\n+ "-p",\n+ "--positive",\n+ dest="output_positive",\n+ default=None,\n+ help="Output filename for matches",\n+ metavar="FILE",\n+)\n+parser.add_option(\n+ "-n",\n+ "--negative",\n+ dest="output_negative",\n+ default=None,\n+ help="Output filename for non-matches",\n+ metavar="FILE",\n+)\n+parser.add_option(\n+ "-l",\n+ "--logic",\n+ dest="logic",\n+ default="UNION",\n+ help="How to combined multiple ID columns (UNION or INTERSECTION)",\n+)\n+parser.add_option(\n+ "-s",\n+ "--suffix",\n+ dest="suffix",\n+ action="store_true",\n+ help="Ignore pair-read suffixes for matching names",\n+)\n+parser.add_option(\n+ "-v",\n+ "--version",\n+ dest="version",\n+ default=False,\n+ action="store_true",\n+ help="Show version and quit",\n+)\n \n options, args = parser.parse_args()\n \n@@ -86,7 +121,7 @@\n out_positive_file = options.output_positive\n out_negative_file = options.output_negative\n logic = options.logic\n-drop_suffices = bool(options.suffix)\n+drop_suffixes = bool(options.suffix)\n \n if in_file is None or not os.path.isfile(in_file):\n sys.exit("Missing input file: %r" % in_file)\n@@ -132,9 +167,14 @@\n try:\n columns = [int(arg) - 1 for arg in cols_arg.split(",")]\n except ValueError:\n- sys.exit("Expected list of columns (comma separated integers), got %r" % cols_arg)\n+ sys.exit(\n+ "Expected list of columns (comma separated integers), got %r" % cols_arg\n+ )\n if min(columns) < 0:\n- sys.exit("Expect one-based column numbers (not zero-based counting), got %r" % cols_arg)\n+ sys.exit(\n+ "Expect one-based col'..b' "\\r": "__cr__",\n+ "\\t": "__tc__",\n+ "#": "__pd__",\n }\n \n # Read tabular file(s) and record all specified identifiers\n@@ -225,7 +269,10 @@\n name = clean_name(line.rstrip("\\n").split("\\t")[col])\n if name:\n file_ids.add(name)\n- print("Using %i IDs from column %s in tabular file" % (len(file_ids), ", ".join(str(col + 1) for col in columns)))\n+ print(\n+ "Using %i IDs from column %s in tabular file"\n+ % (len(file_ids), ", ".join(str(col + 1) for col in columns))\n+ )\n if ids is None:\n ids = file_ids\n if logic == "UNION":\n@@ -235,15 +282,19 @@\n handle.close()\n if len(identifiers) > 1:\n if logic == "UNION":\n- print("Have %i IDs combined from %i tabular files" % (len(ids), len(identifiers)))\n+ print(\n+ "Have %i IDs combined from %i tabular files" % (len(ids), len(identifiers))\n+ )\n else:\n- print("Have %i IDs in common from %i tabular files" % (len(ids), len(identifiers)))\n+ print(\n+ "Have %i IDs in common from %i tabular files" % (len(ids), len(identifiers))\n+ )\n if name_warn:\n sys.stderr.write(name_warn)\n \n \n def crude_fasta_iterator(handle):\n- """Yields tuples, record ID and the full record as a string."""\n+ """Parse FASTA file yielding tuples of (name, sequence)."""\n while True:\n line = handle.readline()\n if line == "":\n@@ -254,8 +305,7 @@\n no_id_warned = False\n while True:\n if line[0] != ">":\n- raise ValueError(\n- "Records in Fasta files should start with \'>\' character")\n+ raise ValueError("Records in Fasta files should start with \'>\' character")\n try:\n id = line[1:].split(None, 1)[0]\n except IndexError:\n@@ -320,6 +370,7 @@\n def fastq_filter(in_file, pos_file, neg_file, wanted):\n """FASTQ filter."""\n from Bio.SeqIO.QualityIO import FastqGeneralIterator\n+\n handle = open(in_file, "r")\n if pos_file is not None and neg_file is not None:\n print("Generating two FASTQ files")\n@@ -378,13 +429,17 @@\n out_handle = open(pos_file, "wb")\n writer = SffWriter(out_handle, xml=manifest)\n in_handle.seek(0) # start again after getting manifest\n- pos_count = writer.write_file(rec for rec in SffIterator(in_handle) if clean_name(rec.id) in wanted)\n+ pos_count = writer.write_file(\n+ rec for rec in SffIterator(in_handle) if clean_name(rec.id) in wanted\n+ )\n out_handle.close()\n if neg_file is not None:\n out_handle = open(neg_file, "wb")\n writer = SffWriter(out_handle, xml=manifest)\n in_handle.seek(0) # start again\n- neg_count = writer.write_file(rec for rec in SffIterator(in_handle) if clean_name(rec.id) not in wanted)\n+ neg_count = writer.write_file(\n+ rec for rec in SffIterator(in_handle) if clean_name(rec.id) not in wanted\n+ )\n out_handle.close()\n # And we\'re done\n in_handle.close()\n@@ -395,12 +450,16 @@\n \n if seq_format.lower() == "sff":\n # Now write filtered SFF file based on IDs wanted\n- pos_count, neg_count = sff_filter(in_file, out_positive_file, out_negative_file, ids)\n+ pos_count, neg_count = sff_filter(\n+ in_file, out_positive_file, out_negative_file, ids\n+ )\n # At the time of writing, Galaxy doesn\'t show SFF file read counts,\n # so it is useful to put them in stdout and thus shown in job info.\n elif seq_format.lower() == "fasta":\n # Write filtered FASTA file based on IDs from tabular file\n- pos_count, neg_count = fasta_filter(in_file, out_positive_file, out_negative_file, ids)\n+ pos_count, neg_count = fasta_filter(\n+ in_file, out_positive_file, out_negative_file, ids\n+ )\n print("%i with and %i without specified IDs" % (pos_count, neg_count))\n elif seq_format.lower().startswith("fastq"):\n # Write filtered FASTQ file based on IDs from tabular file\n' |
b |
diff -r 141612f8c3e3 -r 4a7d8ad2a983 tools/seq_filter_by_id/seq_filter_by_id.xml --- a/tools/seq_filter_by_id/seq_filter_by_id.xml Thu May 11 12:18:52 2017 -0400 +++ b/tools/seq_filter_by_id/seq_filter_by_id.xml Thu Nov 30 09:50:34 2023 +0000 |
b |
@@ -1,7 +1,7 @@ -<tool id="seq_filter_by_id" name="Filter sequences by ID" version="0.2.7"> +<tool id="seq_filter_by_id" name="Filter sequences by ID" version="0.2.8"> <description>from a tabular file</description> <requirements> - <requirement type="package" version="1.67">biopython</requirement> + <requirement type="package" version="1.81">biopython</requirement> </requirements> <version_command> python $__tool_directory__/seq_filter_by_id.py --version @@ -30,20 +30,20 @@ <param name="input_file" type="data" format="fasta,fastq,sff" label="Sequence file to be filtered" help="FASTA, FASTQ, or SFF format." /> <conditional name="id_opts"> <param name="id_opts_selector" type="select" label="Filter using the ID list from"> - <option value="tabular" selected="True">tabular file</option> + <option value="tabular" selected="true">tabular file</option> <option value="list">provided list</option> <!-- add UNION or INTERSECTION of multiple tabular files here? --> </param> <when value="tabular"> <param name="input_tabular" type="data" format="tabular" label="Tabular file containing sequence identifiers"/> - <param name="columns" type="data_column" data_ref="input_tabular" multiple="True" numerical="False" + <param name="columns" type="data_column" data_ref="input_tabular" multiple="true" numerical="false" label="Column(s) containing sequence identifiers" help="Multi-select list - hold the appropriate key while clicking to select multiple columns"> <validator type="no_options" message="Pick at least one column"/> </param> </when> <when value="list"> - <param name="id_list" type="text" size="20x80" area="True" format="tabular" + <param name="id_list" type="text" size="20x80" area="true" format="tabular" label="List of sequence identifiers (white space separated)" help="You can use both spaces and new lines to separate your identifiers."> <sanitizer> @@ -69,12 +69,12 @@ </conditional> <conditional name="adv_opts"> <param name="adv_opts_selector" type="select" label="Advanced Options"> - <option value="basic" selected="True">Hide Advanced Options</option> + <option value="basic" selected="true">Hide Advanced Options</option> <option value="advanced">Show Advanced Options</option> </param> <when value="basic" /> <when value="advanced"> - <param name="strip_suffix" type="boolean" value="false" label="Remove typical pair read name suffices when matching identifiers?" help="Will remove suffices including Illumina /1 and /2, Roche 454 .f and .r, and assorted Sanger names like .p* and .q*" /> + <param name="strip_suffix" type="boolean" value="false" label="Remove typical pair read name suffixes when matching identifiers?" help="Will remove suffixes including Illumina /1 and /2, Roche 454 .f and .r, and assorted Sanger names like .p* and .q*" /> </when> </conditional> </inputs> @@ -128,7 +128,7 @@ <param name="adv_opts_selector" value="advanced" /> <param name="strip_suffix" value="true" /> <output name="output_pos" file="sanger-pairs-mixed.fastq" ftype="fastq" /> - <output name="output_neg" file="empty_file.dat" ftype="fastq" /> + <output name="output_neg" file="empty_file.dat" ftype="fastq" /> </test> <test> <param name="input_file" value="sanger-pairs-mixed.fastq" ftype="fastq" /> @@ -180,14 +180,14 @@ Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013). Galaxy tools and workflows for sequence analysis with applications in molecular plant pathology. PeerJ 1:e167 -http://dx.doi.org/10.7717/peerj.167 +https://doi.org/10.7717/peerj.167 This tool uses Biopython to read and write SFF files, so you may also wish to cite the Biopython application note (and Galaxy too of course): Cock et al (2009). Biopython: freely available Python tools for computational molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3. -http://dx.doi.org/10.1093/bioinformatics/btp163 pmid:19304878. +https://doi.org/10.1093/bioinformatics/btp163 pmid:19304878. This tool is available to install into other Galaxy Instances via the Galaxy Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/seq_filter_by_id |
b |
diff -r 141612f8c3e3 -r 4a7d8ad2a983 tools/seq_filter_by_id/tool_dependencies.xml --- a/tools/seq_filter_by_id/tool_dependencies.xml Thu May 11 12:18:52 2017 -0400 +++ b/tools/seq_filter_by_id/tool_dependencies.xml Thu Nov 30 09:50:34 2023 +0000 |
b |
@@ -1,6 +1,6 @@ -<?xml version="1.0"?> +<?xml version="1.0" ?> <tool_dependency> <package name="biopython" version="1.67"> - <repository changeset_revision="a42f244cce44" name="package_biopython_1_67" owner="biopython" toolshed="https://toolshed.g2.bx.psu.edu" /> + <repository name="package_biopython_1_67" owner="biopython" toolshed="https://toolshed.g2.bx.psu.edu" changeset_revision="a12f73c3b116"/> </package> -</tool_dependency> +</tool_dependency> \ No newline at end of file |