comparison cpt_stops/stop_stats.py @ 0:89c78d269676 draft

Uploaded
author cpt
date Fri, 13 May 2022 05:39:51 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:89c78d269676
1 #!/usr/bin/env python
2 import argparse
3 from CPT_GFFParser import gffParse, gffWrite
4 from Bio import SeqIO
5 from gff3 import feature_lambda, feature_test_type
6
7
8 def main(fasta, gff3):
9 seq_dict = SeqIO.to_dict(SeqIO.parse(fasta, "fasta"))
10
11 codon_usage = {}
12
13 for rec in gffParse(gff3, base_dict=seq_dict):
14 for feat in feature_lambda(
15 rec.features, feature_test_type, {"type": "CDS"}, subfeatures=True
16 ):
17 seq = str(feat.extract(rec).seq)[-3:]
18 try:
19 codon_usage[seq] += 1
20 except KeyError:
21 codon_usage[seq] = 1
22
23 names = {
24 "TAG": "Amber",
25 "TAA": "Ochre",
26 "TGA": "Opal",
27 }
28
29 # TODO: print all actg combinations? Or just ones that are there
30 print ("# Name\tCodon\tCount")
31 for key in sorted(codon_usage):
32 print ("\t".join((names.get(key.upper(), "None"), key, str(codon_usage[key]))))
33
34
35 if __name__ == "__main__":
36 parser = argparse.ArgumentParser(
37 description="Summarise stop codon usage", epilog=""
38 )
39 parser.add_argument("fasta", type=argparse.FileType("r"), help="Fasta Genome")
40 parser.add_argument("gff3", type=argparse.FileType("r"), help="GFF3 File")
41 args = parser.parse_args()
42 main(**vars(args))