| 
0
 | 
     1 #!/usr/bin/env python
 | 
| 
 | 
     2 import argparse
 | 
| 
 | 
     3 from CPT_GFFParser import gffParse, gffWrite
 | 
| 
 | 
     4 from Bio import SeqIO
 | 
| 
 | 
     5 from gff3 import feature_lambda, feature_test_type
 | 
| 
 | 
     6 
 | 
| 
 | 
     7 
 | 
| 
 | 
     8 def main(fasta, gff3):
 | 
| 
 | 
     9     seq_dict = SeqIO.to_dict(SeqIO.parse(fasta, "fasta"))
 | 
| 
 | 
    10 
 | 
| 
 | 
    11     codon_usage = {}
 | 
| 
 | 
    12 
 | 
| 
 | 
    13     for rec in gffParse(gff3, base_dict=seq_dict):
 | 
| 
 | 
    14         for feat in feature_lambda(
 | 
| 
 | 
    15             rec.features, feature_test_type, {"type": "CDS"}, subfeatures=True
 | 
| 
 | 
    16         ):
 | 
| 
 | 
    17             seq = str(feat.extract(rec).seq)[0:3]
 | 
| 
 | 
    18             try:
 | 
| 
 | 
    19                 codon_usage[seq] += 1
 | 
| 
 | 
    20             except KeyError:
 | 
| 
 | 
    21                 codon_usage[seq] = 1
 | 
| 
 | 
    22 
 | 
| 
 | 
    23     # TODO: print all actg combinations? Or just ones that are there
 | 
| 
 | 
    24     print ("# Codon\tCount")
 | 
| 
 | 
    25     for key in sorted(codon_usage):
 | 
| 
 | 
    26         print ("\t".join((key, str(codon_usage[key]))))
 | 
| 
 | 
    27 
 | 
| 
 | 
    28 
 | 
| 
 | 
    29 if __name__ == "__main__":
 | 
| 
 | 
    30     parser = argparse.ArgumentParser(
 | 
| 
 | 
    31         description="Summarise start codon usage", epilog=""
 | 
| 
 | 
    32     )
 | 
| 
 | 
    33     parser.add_argument("fasta", type=argparse.FileType("r"), help="Fasta Genome")
 | 
| 
 | 
    34     parser.add_argument("gff3", type=argparse.FileType("r"), help="GFF3 File")
 | 
| 
 | 
    35     args = parser.parse_args()
 | 
| 
 | 
    36     main(**vars(args))
 |