Mercurial > repos > peterjc > seq_select_by_id
changeset 1:50a8a6917a9c draft
Uploaded update (v0.0.3) to ignore blank lines in the ID file
author | peterjc |
---|---|
date | Fri, 18 May 2012 12:25:12 -0400 |
parents | 838b9bebfa3c |
children | 28d52478ace9 |
files | tools/filters/seq_select_by_id.py tools/filters/seq_select_by_id.txt tools/filters/seq_select_by_id.xml |
diffstat | 3 files changed, 11 insertions(+), 11 deletions(-) [+] |
line wrap: on
line diff
--- a/tools/filters/seq_select_by_id.py Tue Jun 07 17:43:38 2011 -0400 +++ b/tools/filters/seq_select_by_id.py Fri May 18 12:25:12 2012 -0400 @@ -16,11 +16,11 @@ molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3. http://dx.doi.org/10.1093/bioinformatics/btp163 pmid:19304878. -This script is copyright 2011 by Peter Cock, The James Hutton Institute UK. +This script is copyright 2011-2012 by Peter Cock, The James Hutton Institute UK. All rights reserved. See accompanying text file for licence details (MIT/BSD style). -This is version 0.0.1 of the script. +This is version 0.0.3 of the script. """ import sys @@ -39,7 +39,7 @@ else: column = int(col_arg)-1 except ValueError: - stop_err("Expected column number, got %s" % cols_arg) + stop_err("Expected column number, got %s" % col_arg) if seq_format == "fastqcssanger": stop_err("Colorspace FASTQ not supported.") @@ -65,7 +65,7 @@ """Read tabular file and record all specified identifiers.""" handle = open(tabular_file, "rU") for line in handle: - if not line.startswith("#"): + if line.strip() and not line.startswith("#"): yield line.rstrip("\n").split("\t")[col].strip() handle.close() @@ -105,7 +105,7 @@ except KeyError, err: out_handle.close() if name not in records: - stop_err("Identifier %s not found in sequence file" % name) + stop_err("Identifier %r not found in sequence file" % name) else: raise err out_handle.close() @@ -119,7 +119,7 @@ out_handle.write(records.get_raw(name)) except KeyError: out_handle.close() - stop_err("Identifier %s not found in sequence file" % name) + stop_err("Identifier %r not found in sequence file" % name) count += 1 out_handle.close()
--- a/tools/filters/seq_select_by_id.txt Tue Jun 07 17:43:38 2011 -0400 +++ b/tools/filters/seq_select_by_id.txt Fri May 18 12:25:12 2012 -0400 @@ -1,5 +1,5 @@ -Galaxy tool to select FASTA, FASTQ or SFF sequences by ID -========================================================= +Galaxy tool to select FASTA, QUAL, FASTQ or SFF sequences by ID +=============================================================== This tool is copyright 2011 by Peter Cock, The James Hutton Institute (formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved. @@ -26,7 +26,7 @@ You will also need to modify the tools_conf.xml file to tell Galaxy to offer the tool. One suggested location is in the filters section. Simply add the line: -<tool file="filters/sff_select_by_id.xml" /> +<tool file="filters/seq_select_by_id.xml" /> You will also need to install Biopython 1.54 or later. That's it. @@ -35,7 +35,7 @@ ======= v0.0.1 - Initial version. - +v0.0.3 - Ignore blank lines in input Developers ==========
--- a/tools/filters/seq_select_by_id.xml Tue Jun 07 17:43:38 2011 -0400 +++ b/tools/filters/seq_select_by_id.xml Fri May 18 12:25:12 2012 -0400 @@ -1,4 +1,4 @@ -<tool id="seq_select_by_id" name="Select sequences by ID" version="0.0.1"> +<tool id="seq_select_by_id" name="Select sequences by ID" version="0.0.3"> <description>from a tabular file</description> <command interpreter="python"> seq_select_by_id.py $input_tabular $column $input_file $input_file.ext $output_file