annotate Project_RM/codon_usage.py @ 6:1c2b04d72f9d draft

Uploaded
author gianmarco_piccinno
date Sun, 09 Dec 2018 06:14:35 -0500
parents 791a47d9a777
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
1 #!/home/gianmarco/galaxy-python/python
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
2
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
3 import Bio
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
4 from Bio import SeqIO
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
5 from Bio.Data import CodonTable
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
6 import re
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
7 import sys
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
8 import os
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
9 import pandas as pd
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
10
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
11 def read_input(data = "example.fna"):
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
12
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
13 seqs = ""
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
14 with open(data, "rU") as handle:
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
15 for record in SeqIO.parse(handle, "fasta"):
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
16 seqs = seqs + str(record.seq)
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
17
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
18 return seqs
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
19
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
20 def codon_usage(seqs, codonTable):
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
21
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
22 codon_usage = {}
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
23 tmp = [x for x in re.split(r'(\w{3})', seqs) if x != ""]
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
24
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
25 b_cod_table = CodonTable.unambiguous_dna_by_name[codonTable].forward_table
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
26
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
27
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
28 for cod in CodonTable.unambiguous_dna_by_name[codonTable].stop_codons:
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
29 b_cod_table[cod] = "_Stop"
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
30
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
31 for cod in CodonTable.unambiguous_dna_by_name[codonTable].start_codons:
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
32 b_cod_table[cod + " Start"] = b_cod_table[cod]
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
33 b_cod_table.pop(cod)
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
34
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
35 aas = set(b_cod_table.values())
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
36
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
37
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
38 for aa in aas:
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
39 codon_usage[aa] = {}
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
40 for codon in b_cod_table.keys():
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
41 if b_cod_table[codon] == aa:
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
42 codon_usage[aa][codon] = tmp.count(codon.split(" ")[0])
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
43
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
44
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
45 tups = {(outerKey, innerKey): values for outerKey, innerDict in codon_usage.iteritems() for innerKey, values in innerDict.iteritems()}
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
46
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
47 #aas_ = set(tups.keys())
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
48
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
49 #stops_ = {el for el in aas_ if el[0] == "Stop"}
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
50 #aas_ = list(aas_.difference(stops_))
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
51 #stops_ = list(stops_)
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
52 #aas_.sort()
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
53 #stops_.sort()
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
54
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
55 codon_usage_ = pd.DataFrame(pd.Series(tups), columns = ["Count"])
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
56 codon_usage_.index = codon_usage_.index.set_names(["AA", "Codon"])
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
57 #codon_usage_.index.reindex(pd.MultiIndex.from_tuples([aas_, stops_], names=('AA', 'Codon')), level=[0,1])
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
58
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
59
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
60 codon_usage_['Proportion'] = codon_usage_.groupby(level=0).transform(lambda x: (x / x.sum()).round(2))
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
61
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
62 return {"Dictionary": codon_usage, "Tuples": tups, "Table": codon_usage_}
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
63
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
64
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
65
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
66 if __name__ == '__main__':
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
67
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
68
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
69 seqs = read_input(data=sys.argv[1])
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
70 out = codon_usage(seqs,"Bacterial")
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
71
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
72
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
73 with open(sys.argv[2], "w") as outf:
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
74 out["Table"].to_csv(outf, sep="\t")
791a47d9a777 Uploaded-dev
gianmarco_piccinno
parents:
diff changeset
75 #sys.stdout.write(out['Table'])