Mercurial > repos > cpt > cpt_start_stats
comparison cpt_starts/start_stats.py @ 0:9f2517655a1e draft
Uploaded
| author | cpt |
|---|---|
| date | Fri, 13 May 2022 05:38:37 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:9f2517655a1e |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 import argparse | |
| 3 from CPT_GFFParser import gffParse, gffWrite | |
| 4 from Bio import SeqIO | |
| 5 from gff3 import feature_lambda, feature_test_type | |
| 6 | |
| 7 | |
| 8 def main(fasta, gff3): | |
| 9 seq_dict = SeqIO.to_dict(SeqIO.parse(fasta, "fasta")) | |
| 10 | |
| 11 codon_usage = {} | |
| 12 | |
| 13 for rec in gffParse(gff3, base_dict=seq_dict): | |
| 14 for feat in feature_lambda( | |
| 15 rec.features, feature_test_type, {"type": "CDS"}, subfeatures=True | |
| 16 ): | |
| 17 seq = str(feat.extract(rec).seq)[0:3] | |
| 18 try: | |
| 19 codon_usage[seq] += 1 | |
| 20 except KeyError: | |
| 21 codon_usage[seq] = 1 | |
| 22 | |
| 23 # TODO: print all actg combinations? Or just ones that are there | |
| 24 print ("# Codon\tCount") | |
| 25 for key in sorted(codon_usage): | |
| 26 print ("\t".join((key, str(codon_usage[key])))) | |
| 27 | |
| 28 | |
| 29 if __name__ == "__main__": | |
| 30 parser = argparse.ArgumentParser( | |
| 31 description="Summarise start codon usage", epilog="" | |
| 32 ) | |
| 33 parser.add_argument("fasta", type=argparse.FileType("r"), help="Fasta Genome") | |
| 34 parser.add_argument("gff3", type=argparse.FileType("r"), help="GFF3 File") | |
| 35 args = parser.parse_args() | |
| 36 main(**vars(args)) |
