annotate glimmer_gbk_to_orf.py @ 3:21d0af260f11 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit a80e3e4aa3a40970af507bf9119cf7f1c2ffb336
author iuc
date Mon, 16 Dec 2024 14:42:19 +0000
parents 9b2e283dc3b5
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
1 #!/usr/bin/env python
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
2
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
3 ###################################################################
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
4 #
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
5 # gbk2orf.py by Errol Strain (estrain@gmail.com)
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
6 #
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
7 # Read a GenBank file and export fasta formatted amino acid and
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
8 # CDS files
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
9 #
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
10 ###################################################################
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
11
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
12 import sys
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
13 from optparse import OptionParser
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
14
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
15 from Bio import SeqIO
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
16 from Bio.Seq import Seq
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
17 from Bio.SeqRecord import SeqRecord
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
18
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
19
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
20 # Command line usage
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
21 usage = "usage: %prog -g input.gbk -a aa.fasta -n nuc.fasta"
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
22 p = OptionParser(usage)
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
23 p.add_option("-t", "--translate", dest="transtabl", type="int", default=11,
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
24 help="Translation table used to translate coding regions (default=11)")
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
25 p.add_option("-g", "--genbank", dest="gb_file", help="GenBank input file")
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
26 p.add_option("-a", "--amino_acid", dest="aa_file", help="Fasta amino acid output")
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
27 p.add_option("-n", "--nucleotide", dest="orf_file", help="Fasta nucleotide output")
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
28 (opts, args) = p.parse_args()
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
29 # Do I need this next line?
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
30 if not opts and not args:
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
31 p.error("Use --help to see usage")
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
32 if len(sys.argv) == 1:
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
33 p.error("Use --help to see usage")
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
34
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
35 # Lists to hold SeqRecords
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
36 aalist = []
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
37 nuclist = []
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
38
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
39 # If the CDS does not have a locus tag the name will be assigned using the
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
40 # order in which it was found
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
41 feat_count = 0
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
42
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
43 # Iterate through genbank records in input file
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
44 for gb_record in SeqIO.parse(open(opts.gb_file, "r"), "genbank"):
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
45 for (index, feature) in enumerate(gb_record.features):
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
46 if feature.type == "CDS":
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
47 feat_count = feat_count + 1
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
48 gene = feature.extract(gb_record.seq)
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
49 if "locus_tag" in feature.qualifiers:
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
50 value = feature.qualifiers["locus_tag"][0]
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
51 else:
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
52 value = "Index_" + str(feat_count)
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
53 nuclist.append(SeqRecord(Seq(str(gene)), id=value, name=value))
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
54 pro = Seq(str(gene.translate(table=opts.transtabl, to_stop=True)))
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
55 aalist.append(SeqRecord(pro, id=value, name=value))
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
56
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
57 # Write out lists in fasta format
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
58 aa_handle = open(opts.aa_file, "w")
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
59 SeqIO.write(aalist, aa_handle, "fasta")
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
60 aa_handle.close()
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
61 orf_handle = open(opts.orf_file, "w")
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
62 SeqIO.write(nuclist, orf_handle, "fasta")
9b2e283dc3b5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
bgruening
parents:
diff changeset
63 orf_handle.close()