comparison glimmer_gbk_to_orf.py @ 0:9c195b26a5ac draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
author bgruening
date Tue, 28 Nov 2017 10:08:06 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:9c195b26a5ac
1 #!/usr/bin/env python
2
3 ###################################################################
4 #
5 # gbk2orf.py by Errol Strain (estrain@gmail.com)
6 #
7 # Read a GenBank file and export fasta formatted amino acid and
8 # CDS files
9 #
10 ###################################################################
11
12 import sys
13 from optparse import OptionParser
14
15 from Bio import SeqIO
16 from Bio.Seq import Seq
17 from Bio.SeqRecord import SeqRecord
18
19
20 # Command line usage
21 usage = "usage: %prog -g input.gbk -a aa.fasta -n nuc.fasta"
22 p = OptionParser(usage)
23 p.add_option("-t", "--translate", dest="transtabl", type="int", default=11,
24 help="Translation table used to translate coding regions (default=11)")
25 p.add_option("-g", "--genbank", dest="gb_file", help="GenBank input file")
26 p.add_option("-a", "--amino_acid", dest="aa_file", help="Fasta amino acid output")
27 p.add_option("-n", "--nucleotide", dest="orf_file", help="Fasta nucleotide output")
28 (opts, args) = p.parse_args()
29 # Do I need this next line?
30 if not opts and not args:
31 p.error("Use --help to see usage")
32 if len(sys.argv) == 1:
33 p.error("Use --help to see usage")
34
35 # Lists to hold SeqRecords
36 aalist = []
37 nuclist = []
38
39 # If the CDS does not have a locus tag the name will be assigned using the
40 # order in which it was found
41 feat_count = 0
42
43 # Iterate through genbank records in input file
44 for gb_record in SeqIO.parse(open(opts.gb_file, "r"), "genbank"):
45 for (index, feature) in enumerate(gb_record.features):
46 if feature.type == "CDS":
47 feat_count = feat_count + 1
48 gene = feature.extract(gb_record.seq)
49 if "locus_tag" in feature.qualifiers:
50 value = feature.qualifiers["locus_tag"][0]
51 else:
52 value = "Index_" + str(feat_count)
53 nuclist.append(SeqRecord(Seq(str(gene)), id=value, name=value))
54 pro = Seq(str(gene.translate(table=opts.transtabl, to_stop=True)))
55 aalist.append(SeqRecord(pro, id=value, name=value))
56
57 # Write out lists in fasta format
58 aa_handle = open(opts.aa_file, "w")
59 SeqIO.write(aalist, aa_handle, "fasta")
60 aa_handle.close()
61 orf_handle = open(opts.orf_file, "w")
62 SeqIO.write(nuclist, orf_handle, "fasta")
63 orf_handle.close()