Mercurial > repos > peterjc > seq_length
diff tools/seq_length/seq_length.py @ 2:6f29bb9960ac draft
v0.0.3 - Fixed SFF; more tests
author | peterjc |
---|---|
date | Mon, 14 May 2018 12:09:50 -0400 |
parents | 458f987918a6 |
children | fcdf11fb34de |
line wrap: on
line diff
--- a/tools/seq_length/seq_length.py Tue May 08 11:16:50 2018 -0400 +++ b/tools/seq_length/seq_length.py Mon May 14 12:09:50 2018 -0400 @@ -20,10 +20,36 @@ from __future__ import print_function import sys +from optparse import OptionParser -if "-v" in sys.argv or "--version" in sys.argv: - print("v0.0.2") +usage = r"""Use as follows to compute all the lengths in a sequence file: + +$ python seq_length.py -i example.fasta -f fasta -o lengths.tsv +""" + +parser = OptionParser(usage=usage) +parser.add_option('-i', '--input', dest='input', + default=None, help='Input sequence filename (FASTA, FASTQ, etc)', + metavar="FILE") +parser.add_option('-f', '--format', dest='format', + default=None, help='Input sequence format (FASTA, QUAL, FASTQ, SFF)') +parser.add_option('-o', '--output', dest='output', + default=None, help='Output filename (tabular)', + metavar="FILE") +parser.add_option("-v", "--version", dest="version", + default=False, action="store_true", + help="Show version and quit") +options, args = parser.parse_args() + +if options.version: + print("v0.0.3") sys.exit(0) +if not options.input: + sys.exit("Require an input filename") +if not options.format: + sys.exit("Require the input format") +if not options.output: + sys.exit("Require an output filename") try: from Bio import SeqIO @@ -40,31 +66,25 @@ except ImportError: sys.exit("Biopython tool old?, missing Bio.SeqIO.FastaIO.SimpleFastaParser") +in_file = options.input +out_file = options.output -# Parse Command Line -try: - in_file, seq_format, out_file = sys.argv[1:] -except ValueError: - sys.exit("Expected three arguments (input file, format, output file), " - "got %i:\n%s" % (len(sys.argv) - 1, " ".join(sys.argv))) - - -if seq_format.startswith("fastq"): +if options.format.startswith("fastq"): # We don't care about the quality score encoding, just # need to translate Galaxy format name into something # Biopython will accept: format = "fastq" -elif seq_format.lower() == "csfasta": +elif options.format.lower() == "csfasta": # I have not tested with colour space FASTA format = "fasta" -elif seq_format.lower == "sff": +elif options.format.lower() == "sff": # The masked/trimmed numbers are more interesting format = "sff-trim" -elif seq_format.lower() in ["fasta", "qual"]: - format = seq_format.lower() +elif options.format.lower() in ["fasta", "qual"]: + format = options.format.lower() else: # TODO: Does Galaxy understand GenBank, EMBL, etc yet? - sys.exit("Unexpected format argument: %r" % seq_format) + sys.exit("Unexpected format argument: %r" % options.format) count = 0