| 0 | 1 #!/usr/bin/env python | 
|  | 2 import argparse | 
|  | 3 from CPT_GFFParser import gffParse, gffWrite | 
|  | 4 from Bio import SeqIO | 
|  | 5 from gff3 import feature_lambda, feature_test_type | 
|  | 6 | 
|  | 7 | 
|  | 8 def main(fasta, gff3): | 
|  | 9     seq_dict = SeqIO.to_dict(SeqIO.parse(fasta, "fasta")) | 
|  | 10 | 
|  | 11     codon_usage = {} | 
|  | 12 | 
|  | 13     for rec in gffParse(gff3, base_dict=seq_dict): | 
|  | 14         for feat in feature_lambda( | 
|  | 15             rec.features, feature_test_type, {"type": "CDS"}, subfeatures=True | 
|  | 16         ): | 
|  | 17             seq = str(feat.extract(rec).seq)[0:3] | 
|  | 18             try: | 
|  | 19                 codon_usage[seq] += 1 | 
|  | 20             except KeyError: | 
|  | 21                 codon_usage[seq] = 1 | 
|  | 22 | 
|  | 23     # TODO: print all actg combinations? Or just ones that are there | 
|  | 24     print ("# Codon\tCount") | 
|  | 25     for key in sorted(codon_usage): | 
|  | 26         print ("\t".join((key, str(codon_usage[key])))) | 
|  | 27 | 
|  | 28 | 
|  | 29 if __name__ == "__main__": | 
|  | 30     parser = argparse.ArgumentParser( | 
|  | 31         description="Summarise start codon usage", epilog="" | 
|  | 32     ) | 
|  | 33     parser.add_argument("fasta", type=argparse.FileType("r"), help="Fasta Genome") | 
|  | 34     parser.add_argument("gff3", type=argparse.FileType("r"), help="GFF3 File") | 
|  | 35     args = parser.parse_args() | 
|  | 36     main(**vars(args)) |