annotate GTranslator.py @ 8:e5616d5101c0 draft default tip

Bug fix - Null strand give index out of bound error
author nedias
date Wed, 19 Oct 2016 14:24:31 -0400
parents c56b8a6bd02e
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
1 """
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
2 Translate nucleotide code to polypeptide
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
3 Because condon table is never change, so in consider of read/exe performance,
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
4 it is hard-coded, not writen in XML or other format of files, nor store in database
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
5 TODO: Need to add condon table for RNA
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
6
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
7 Author Nedias
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
8 """
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
9
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
10 from Bio.Alphabet import generic_protein
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
11 from Bio.Seq import Seq
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
12
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
13
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
14 # Nucleotide to Polypeptide mapping
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
15 def condon_table():
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
16 c_table = dict()
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
17
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
18 c_table["TTT"] = "F"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
19 c_table["TTC"] = "F"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
20 c_table["TTA"] = "L"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
21 c_table["TTG"] = "L"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
22 c_table["CTT"] = "L"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
23 c_table["CTC"] = "L"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
24 c_table["CTA"] = "L"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
25 c_table["CTG"] = "L"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
26 c_table["ATT"] = "I"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
27 c_table["ATC"] = "I"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
28 c_table["ATA"] = "I"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
29 c_table["ATG"] = "M"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
30 c_table["GTT"] = "V"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
31 c_table["GTC"] = "V"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
32 c_table["GTA"] = "V"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
33 c_table["GTG"] = "V"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
34 c_table["TCT"] = "S"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
35 c_table["TCA"] = "S"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
36 c_table["TCC"] = "S"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
37 c_table["TCG"] = "S"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
38 c_table["CCT"] = "P"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
39 c_table["CCC"] = "P"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
40 c_table["CCA"] = "P"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
41 c_table["CCG"] = "P"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
42 c_table["ACT"] = "T"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
43 c_table["ACC"] = "T"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
44 c_table["ACA"] = "T"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
45 c_table["ACG"] = "T"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
46 c_table["GCT"] = "A"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
47 c_table["GCC"] = "A"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
48 c_table["GCA"] = "A"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
49 c_table["GCG"] = "A"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
50 c_table["TAT"] = "Y"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
51 c_table["TAC"] = "Y"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
52 c_table["TAA"] = "stop"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
53 c_table["TAG"] = "stop"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
54 c_table["CAT"] = "H"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
55 c_table["CAC"] = "H"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
56 c_table["CAA"] = "Q"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
57 c_table["CAG"] = "Q"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
58 c_table["AAT"] = "N"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
59 c_table["AAC"] = "N"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
60 c_table["AAA"] = "K"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
61 c_table["AAG"] = "K"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
62 c_table["GAT"] = "D"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
63 c_table["GAC"] = "D"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
64 c_table["GAA"] = "E"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
65 c_table["GAG"] = "E"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
66 c_table["TGT"] = "C"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
67 c_table["TGC"] = "C"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
68 c_table["TGA"] = "stop"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
69 c_table["TGG"] = "W"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
70 c_table["CGT"] = "R"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
71 c_table["CGC"] = "R"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
72 c_table["CGA"] = "R"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
73 c_table["CGG"] = "R"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
74 c_table["AGT"] = "S"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
75 c_table["AGC"] = "S"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
76 c_table["AGA"] = "R"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
77 c_table["AGG"] = "R"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
78 c_table["GGT"] = "G"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
79 c_table["GGC"] = "G"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
80 c_table["GGA"] = "G"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
81 c_table["GGG"] = "G"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
82 c_table.update(dict((c_table[i], i) for i in c_table))
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
83
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
84 return c_table
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
85
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
86
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
87 # Nucleotide to Polypeptide mapping for complementary sequence
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
88 def rev_condon_table():
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
89
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
90 c_table = dict()
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
91
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
92 c_table["AAA"] = "F"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
93 c_table["AAG"] = "F"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
94 c_table["AAT"] = "L"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
95 c_table["AAC"] = "L"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
96 c_table["GAA"] = "L"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
97 c_table["GAG"] = "L"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
98 c_table["GAT"] = "L"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
99 c_table["GAC"] = "L"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
100 c_table["TAA"] = "I"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
101 c_table["TAG"] = "I"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
102 c_table["TAT"] = "I"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
103 c_table["TAC"] = "M"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
104 c_table["CAA"] = "V"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
105 c_table["CAG"] = "V"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
106 c_table["CAT"] = "V"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
107 c_table["CAC"] = "V"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
108 c_table["AGA"] = "S"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
109 c_table["AGT"] = "S"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
110 c_table["AGG"] = "S"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
111 c_table["AGC"] = "S"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
112 c_table["GGA"] = "P"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
113 c_table["GGG"] = "P"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
114 c_table["GGT"] = "P"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
115 c_table["GGC"] = "P"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
116 c_table["TGA"] = "T"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
117 c_table["TGG"] = "T"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
118 c_table["TGT"] = "T"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
119 c_table["TGC"] = "T"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
120 c_table["CGA"] = "A"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
121 c_table["CGG"] = "A"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
122 c_table["CGT"] = "A"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
123 c_table["CGC"] = "A"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
124 c_table["ATA"] = "Y"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
125 c_table["ATG"] = "Y"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
126 c_table["ATT"] = "stop"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
127 c_table["ATC"] = "stop"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
128 c_table["GTA"] = "H"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
129 c_table["GTG"] = "H"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
130 c_table["GTT"] = "Q"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
131 c_table["GTC"] = "Q"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
132 c_table["TTA"] = "N"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
133 c_table["TTG"] = "N"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
134 c_table["TTT"] = "K"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
135 c_table["TTC"] = "K"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
136 c_table["CTA"] = "D"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
137 c_table["CTG"] = "D"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
138 c_table["CTT"] = "E"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
139 c_table["CTC"] = "E"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
140 c_table["ACA"] = "C"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
141 c_table["ACG"] = "C"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
142 c_table["ACT"] = "stop"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
143 c_table["ACC"] = "W"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
144 c_table["GCA"] = "R"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
145 c_table["GCG"] = "R"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
146 c_table["GCT"] = "R"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
147 c_table["GCC"] = "R"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
148 c_table["TCA"] = "S"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
149 c_table["TCG"] = "S"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
150 c_table["TCT"] = "R"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
151 c_table["TCC"] = "R"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
152 c_table["CCA"] = "G"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
153 c_table["CCG"] = "G"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
154 c_table["CCT"] = "G"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
155 c_table["CCC"] = "G"
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
156 c_table.update(dict((c_table[i], i) for i in c_table))
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
157
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
158 return c_table
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
159
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
160
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
161 # Check if the sequence is a multiple of 3
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
162 # input: Nucleotide sequence in SeqRecords format or string format
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
163 # output: check result
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
164 def check_seq(seq):
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
165
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
166 if len(seq) % 3 == 0:
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
167 return True
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
168 else:
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
169 return False
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
170
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
171
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
172 # Translate Nucleotide to Polypeptide
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
173 # input: 1.seq: Nucleotide sequence in SeqRecords format
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
174 # 2.rev: True if +strand(use normal mapping), False for -strand(use complementary mapping)
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
175 # return: Polypeptide sequence in Seq format
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
176 def nucleotide_to_polypeptide(seq, rev):
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
177
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
178 poly_seq = ""
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
179 # If -strand, use complementary mapping
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
180 if rev:
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
181 c_table = rev_condon_table()
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
182 # If +strand, use normal mapping
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
183 else:
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
184 c_table = condon_table()
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
185
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
186 # If sequence length is a multiple of 3
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
187 if check_seq(seq):
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
188 str_seq = str(seq)
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
189 # Translate every 3 nucleotide acid to one polypeptide
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
190 for i in xrange(0, len(str_seq) - 3, 3):
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
191 poly_seq += c_table[str_seq[i:i+3]]
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
192 return Seq(poly_seq, generic_protein)
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
193
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
194
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
195
c56b8a6bd02e Uploaded
nedias
parents:
diff changeset
196