Mercurial > repos > bgruening > glimmer_knowledge_based
diff glimmer_gbk_to_orf.py @ 0:9b2e283dc3b5 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
author | bgruening |
---|---|
date | Tue, 28 Nov 2017 10:10:55 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/glimmer_gbk_to_orf.py Tue Nov 28 10:10:55 2017 -0500 @@ -0,0 +1,63 @@ +#!/usr/bin/env python + +################################################################### +# +# gbk2orf.py by Errol Strain (estrain@gmail.com) +# +# Read a GenBank file and export fasta formatted amino acid and +# CDS files +# +################################################################### + +import sys +from optparse import OptionParser + +from Bio import SeqIO +from Bio.Seq import Seq +from Bio.SeqRecord import SeqRecord + + +# Command line usage +usage = "usage: %prog -g input.gbk -a aa.fasta -n nuc.fasta" +p = OptionParser(usage) +p.add_option("-t", "--translate", dest="transtabl", type="int", default=11, + help="Translation table used to translate coding regions (default=11)") +p.add_option("-g", "--genbank", dest="gb_file", help="GenBank input file") +p.add_option("-a", "--amino_acid", dest="aa_file", help="Fasta amino acid output") +p.add_option("-n", "--nucleotide", dest="orf_file", help="Fasta nucleotide output") +(opts, args) = p.parse_args() +# Do I need this next line? +if not opts and not args: + p.error("Use --help to see usage") +if len(sys.argv) == 1: + p.error("Use --help to see usage") + +# Lists to hold SeqRecords +aalist = [] +nuclist = [] + +# If the CDS does not have a locus tag the name will be assigned using the +# order in which it was found +feat_count = 0 + +# Iterate through genbank records in input file +for gb_record in SeqIO.parse(open(opts.gb_file, "r"), "genbank"): + for (index, feature) in enumerate(gb_record.features): + if feature.type == "CDS": + feat_count = feat_count + 1 + gene = feature.extract(gb_record.seq) + if "locus_tag" in feature.qualifiers: + value = feature.qualifiers["locus_tag"][0] + else: + value = "Index_" + str(feat_count) + nuclist.append(SeqRecord(Seq(str(gene)), id=value, name=value)) + pro = Seq(str(gene.translate(table=opts.transtabl, to_stop=True))) + aalist.append(SeqRecord(pro, id=value, name=value)) + +# Write out lists in fasta format +aa_handle = open(opts.aa_file, "w") +SeqIO.write(aalist, aa_handle, "fasta") +aa_handle.close() +orf_handle = open(opts.orf_file, "w") +SeqIO.write(nuclist, orf_handle, "fasta") +orf_handle.close()