Mercurial > repos > peterjc > seq_length
comparison tools/seq_length/seq_length.py @ 2:6f29bb9960ac draft
v0.0.3 - Fixed SFF; more tests
author | peterjc |
---|---|
date | Mon, 14 May 2018 12:09:50 -0400 |
parents | 458f987918a6 |
children | fcdf11fb34de |
comparison
equal
deleted
inserted
replaced
1:458f987918a6 | 2:6f29bb9960ac |
---|---|
18 """ | 18 """ |
19 | 19 |
20 from __future__ import print_function | 20 from __future__ import print_function |
21 | 21 |
22 import sys | 22 import sys |
23 from optparse import OptionParser | |
23 | 24 |
24 if "-v" in sys.argv or "--version" in sys.argv: | 25 usage = r"""Use as follows to compute all the lengths in a sequence file: |
25 print("v0.0.2") | 26 |
27 $ python seq_length.py -i example.fasta -f fasta -o lengths.tsv | |
28 """ | |
29 | |
30 parser = OptionParser(usage=usage) | |
31 parser.add_option('-i', '--input', dest='input', | |
32 default=None, help='Input sequence filename (FASTA, FASTQ, etc)', | |
33 metavar="FILE") | |
34 parser.add_option('-f', '--format', dest='format', | |
35 default=None, help='Input sequence format (FASTA, QUAL, FASTQ, SFF)') | |
36 parser.add_option('-o', '--output', dest='output', | |
37 default=None, help='Output filename (tabular)', | |
38 metavar="FILE") | |
39 parser.add_option("-v", "--version", dest="version", | |
40 default=False, action="store_true", | |
41 help="Show version and quit") | |
42 options, args = parser.parse_args() | |
43 | |
44 if options.version: | |
45 print("v0.0.3") | |
26 sys.exit(0) | 46 sys.exit(0) |
47 if not options.input: | |
48 sys.exit("Require an input filename") | |
49 if not options.format: | |
50 sys.exit("Require the input format") | |
51 if not options.output: | |
52 sys.exit("Require an output filename") | |
27 | 53 |
28 try: | 54 try: |
29 from Bio import SeqIO | 55 from Bio import SeqIO |
30 except ImportError: | 56 except ImportError: |
31 sys.exit("Missing required Python library Biopython.") | 57 sys.exit("Missing required Python library Biopython.") |
38 try: | 64 try: |
39 from Bio.SeqIO.FastaIO import SimpleFastaParser | 65 from Bio.SeqIO.FastaIO import SimpleFastaParser |
40 except ImportError: | 66 except ImportError: |
41 sys.exit("Biopython tool old?, missing Bio.SeqIO.FastaIO.SimpleFastaParser") | 67 sys.exit("Biopython tool old?, missing Bio.SeqIO.FastaIO.SimpleFastaParser") |
42 | 68 |
69 in_file = options.input | |
70 out_file = options.output | |
43 | 71 |
44 # Parse Command Line | 72 if options.format.startswith("fastq"): |
45 try: | |
46 in_file, seq_format, out_file = sys.argv[1:] | |
47 except ValueError: | |
48 sys.exit("Expected three arguments (input file, format, output file), " | |
49 "got %i:\n%s" % (len(sys.argv) - 1, " ".join(sys.argv))) | |
50 | |
51 | |
52 if seq_format.startswith("fastq"): | |
53 # We don't care about the quality score encoding, just | 73 # We don't care about the quality score encoding, just |
54 # need to translate Galaxy format name into something | 74 # need to translate Galaxy format name into something |
55 # Biopython will accept: | 75 # Biopython will accept: |
56 format = "fastq" | 76 format = "fastq" |
57 elif seq_format.lower() == "csfasta": | 77 elif options.format.lower() == "csfasta": |
58 # I have not tested with colour space FASTA | 78 # I have not tested with colour space FASTA |
59 format = "fasta" | 79 format = "fasta" |
60 elif seq_format.lower == "sff": | 80 elif options.format.lower() == "sff": |
61 # The masked/trimmed numbers are more interesting | 81 # The masked/trimmed numbers are more interesting |
62 format = "sff-trim" | 82 format = "sff-trim" |
63 elif seq_format.lower() in ["fasta", "qual"]: | 83 elif options.format.lower() in ["fasta", "qual"]: |
64 format = seq_format.lower() | 84 format = options.format.lower() |
65 else: | 85 else: |
66 # TODO: Does Galaxy understand GenBank, EMBL, etc yet? | 86 # TODO: Does Galaxy understand GenBank, EMBL, etc yet? |
67 sys.exit("Unexpected format argument: %r" % seq_format) | 87 sys.exit("Unexpected format argument: %r" % options.format) |
68 | 88 |
69 | 89 |
70 count = 0 | 90 count = 0 |
71 total = 0 | 91 total = 0 |
72 with open(out_file, "w") as out_handle: | 92 with open(out_file, "w") as out_handle: |