Previous changeset 0:c323e29a8248 (2018-05-08) Next changeset 2:6f29bb9960ac (2018-05-14) |
Commit message:
Faster FASTA and FASTQ, v0.0.2 |
modified:
tools/seq_length/README.rst tools/seq_length/seq_length.py tools/seq_length/seq_length.xml |
b |
diff -r c323e29a8248 -r 458f987918a6 tools/seq_length/README.rst --- a/tools/seq_length/README.rst Tue May 08 09:35:45 2018 -0400 +++ b/tools/seq_length/README.rst Tue May 08 11:16:50 2018 -0400 |
b |
@@ -60,6 +60,8 @@ Version Changes ------- ---------------------------------------------------------------------- v0.0.1 - Initial version. +v0.0.2 - Faster for FASTA and FASTQ. + - Fixed typo. ======= ====================================================================== |
b |
diff -r c323e29a8248 -r 458f987918a6 tools/seq_length/seq_length.py --- a/tools/seq_length/seq_length.py Tue May 08 09:35:45 2018 -0400 +++ b/tools/seq_length/seq_length.py Tue May 08 11:16:50 2018 -0400 |
[ |
@@ -22,7 +22,7 @@ import sys if "-v" in sys.argv or "--version" in sys.argv: - print("v0.0.1") + print("v0.0.2") sys.exit(0) try: @@ -30,6 +30,16 @@ except ImportError: sys.exit("Missing required Python library Biopython.") +try: + from Bio.SeqIO.QualityIO import FastqGeneralIterator +except ImportError: + sys.exit("Biopython tool old?, missing Bio.SeqIO.QualityIO.FastqGeneralIterator") + +try: + from Bio.SeqIO.FastaIO import SimpleFastaParser +except ImportError: + sys.exit("Biopython tool old?, missing Bio.SeqIO.FastaIO.SimpleFastaParser") + # Parse Command Line try: @@ -61,9 +71,26 @@ total = 0 with open(out_file, "w") as out_handle: out_handle.write("#Identifier\tLength\n") - for record in SeqIO.parse(in_file, format): - count += 1 - length = len(record) - total += length - out_handle.write("%s\t%i\n" % (record.id, length)) + if format == "fastq": + with open(in_file) as in_handle: + for title, seq, qual in FastqGeneralIterator(in_handle): + count += 1 + length = len(seq) + total += length + identifier = title.split(None, 1)[0] + out_handle.write("%s\t%i\n" % (identifier, length)) + elif format == "fasta": + with open(in_file) as in_handle: + for title, seq in SimpleFastaParser(in_handle): + count += 1 + length = len(seq) + total += length + identifier = title.split(None, 1)[0] + out_handle.write("%s\t%i\n" % (identifier, length)) + else: + for record in SeqIO.parse(in_file, format): + count += 1 + length = len(record) + total += length + out_handle.write("%s\t%i\n" % (record.id, length)) print("%i sequences, total length %i" % (count, total)) |
b |
diff -r c323e29a8248 -r 458f987918a6 tools/seq_length/seq_length.xml --- a/tools/seq_length/seq_length.xml Tue May 08 09:35:45 2018 -0400 +++ b/tools/seq_length/seq_length.xml Tue May 08 11:16:50 2018 -0400 |
b |
@@ -1,5 +1,5 @@ -<tool id="seq_length" name="Sequence lengths" version="0.0.1"> - <description>with ID mapping from a tabular file</description> +<tool id="seq_length" name="Sequence lengths" version="0.0.2"> + <description>from FASTA, QUAL, FASTQ, or SFF file</description> <requirements> <!-- This is the currently the last release of Biopython which is available via Galaxy's legacy XML packaging system --> <requirement type="package" version="1.67">biopython</requirement> |