Mercurial > repos > cpt > cpt_stop_stats
comparison cpt_stops/stop_stats.py @ 0:89c78d269676 draft
Uploaded
author | cpt |
---|---|
date | Fri, 13 May 2022 05:39:51 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:89c78d269676 |
---|---|
1 #!/usr/bin/env python | |
2 import argparse | |
3 from CPT_GFFParser import gffParse, gffWrite | |
4 from Bio import SeqIO | |
5 from gff3 import feature_lambda, feature_test_type | |
6 | |
7 | |
8 def main(fasta, gff3): | |
9 seq_dict = SeqIO.to_dict(SeqIO.parse(fasta, "fasta")) | |
10 | |
11 codon_usage = {} | |
12 | |
13 for rec in gffParse(gff3, base_dict=seq_dict): | |
14 for feat in feature_lambda( | |
15 rec.features, feature_test_type, {"type": "CDS"}, subfeatures=True | |
16 ): | |
17 seq = str(feat.extract(rec).seq)[-3:] | |
18 try: | |
19 codon_usage[seq] += 1 | |
20 except KeyError: | |
21 codon_usage[seq] = 1 | |
22 | |
23 names = { | |
24 "TAG": "Amber", | |
25 "TAA": "Ochre", | |
26 "TGA": "Opal", | |
27 } | |
28 | |
29 # TODO: print all actg combinations? Or just ones that are there | |
30 print ("# Name\tCodon\tCount") | |
31 for key in sorted(codon_usage): | |
32 print ("\t".join((names.get(key.upper(), "None"), key, str(codon_usage[key])))) | |
33 | |
34 | |
35 if __name__ == "__main__": | |
36 parser = argparse.ArgumentParser( | |
37 description="Summarise stop codon usage", epilog="" | |
38 ) | |
39 parser.add_argument("fasta", type=argparse.FileType("r"), help="Fasta Genome") | |
40 parser.add_argument("gff3", type=argparse.FileType("r"), help="GFF3 File") | |
41 args = parser.parse_args() | |
42 main(**vars(args)) |