Mercurial > repos > cpt > cpt_stop_stats
comparison cpt_stops/stop_stats.py @ 0:89c78d269676 draft
Uploaded
| author | cpt |
|---|---|
| date | Fri, 13 May 2022 05:39:51 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:89c78d269676 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 import argparse | |
| 3 from CPT_GFFParser import gffParse, gffWrite | |
| 4 from Bio import SeqIO | |
| 5 from gff3 import feature_lambda, feature_test_type | |
| 6 | |
| 7 | |
| 8 def main(fasta, gff3): | |
| 9 seq_dict = SeqIO.to_dict(SeqIO.parse(fasta, "fasta")) | |
| 10 | |
| 11 codon_usage = {} | |
| 12 | |
| 13 for rec in gffParse(gff3, base_dict=seq_dict): | |
| 14 for feat in feature_lambda( | |
| 15 rec.features, feature_test_type, {"type": "CDS"}, subfeatures=True | |
| 16 ): | |
| 17 seq = str(feat.extract(rec).seq)[-3:] | |
| 18 try: | |
| 19 codon_usage[seq] += 1 | |
| 20 except KeyError: | |
| 21 codon_usage[seq] = 1 | |
| 22 | |
| 23 names = { | |
| 24 "TAG": "Amber", | |
| 25 "TAA": "Ochre", | |
| 26 "TGA": "Opal", | |
| 27 } | |
| 28 | |
| 29 # TODO: print all actg combinations? Or just ones that are there | |
| 30 print ("# Name\tCodon\tCount") | |
| 31 for key in sorted(codon_usage): | |
| 32 print ("\t".join((names.get(key.upper(), "None"), key, str(codon_usage[key])))) | |
| 33 | |
| 34 | |
| 35 if __name__ == "__main__": | |
| 36 parser = argparse.ArgumentParser( | |
| 37 description="Summarise stop codon usage", epilog="" | |
| 38 ) | |
| 39 parser.add_argument("fasta", type=argparse.FileType("r"), help="Fasta Genome") | |
| 40 parser.add_argument("gff3", type=argparse.FileType("r"), help="GFF3 File") | |
| 41 args = parser.parse_args() | |
| 42 main(**vars(args)) |
