0
|
1 # Copyright (c) 2006 John Gilman
|
|
2 #
|
|
3 # This software is distributed under the MIT Open Source License.
|
|
4 # <http://www.opensource.org/licenses/mit-license.html>
|
|
5 #
|
|
6 # Permission is hereby granted, free of charge, to any person obtaining a
|
|
7 # copy of this software and associated documentation files (the "Software"),
|
|
8 # to deal in the Software without restriction, including without limitation
|
|
9 # the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
10 # and/or sell copies of the Software, and to permit persons to whom the
|
|
11 # Software is furnished to do so, subject to the following conditions:
|
|
12 #
|
|
13 # The above copyright notice and this permission notice shall be included
|
|
14 # in all copies or substantial portions of the Software.
|
|
15 #
|
|
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
22 # THE SOFTWARE.
|
|
23
|
|
24 """ Transformations of Seqs (alphabetic sequences).
|
|
25
|
|
26
|
|
27
|
|
28 Classes :
|
|
29 - Transform -- Simple transforms of alphabetic strings.
|
|
30 - GeneticCode -- The genetic mapping of dna to protein.
|
|
31
|
|
32 Functions :
|
|
33 - mask_low_complexity -- Implementation of Seg algorithm to remove low complexity
|
|
34 regions from protein sequences.
|
|
35
|
|
36
|
|
37 """
|
|
38
|
|
39
|
|
40 from corebio.data import dna_extended_letters, dna_ambiguity
|
|
41 from corebio.seq import Seq, protein_alphabet, nucleic_alphabet, dna_alphabet
|
|
42 from string import maketrans
|
|
43 from corebio.moremath import log2 , entropy
|
|
44
|
|
45 __all__ = [
|
|
46 'Transform',
|
|
47 'mask_low_complexity',
|
|
48 'GeneticCode'
|
|
49 ]
|
|
50
|
|
51 class Transform(object) :
|
|
52 """A translation between alphabetic strings.
|
|
53 (This class is not called 'Translation' to avoid confusion with the
|
|
54 biological translation of rna to protein.)
|
|
55
|
|
56 Example:
|
|
57 trans = Transform(
|
|
58 Seq("ACGTRYSWKMBDHVN-acgtUuryswkmbdhvnXx?.~'", dna_alphabet),
|
|
59 Seq("ACGTRYSWKMNNNNN-acgtUuryswkmbnnnnXx?.~", reduced_nucleic_alphabet)
|
|
60 )
|
|
61 s0 = Seq("AAAAAV", nucleic_alphabet)
|
|
62 s1 = trans(s0)
|
|
63 assert(s1.alphabet == reduced_nucleic_alphabet)
|
|
64 assert(s2 == Seq("AAAAAN", reduced_nucleic_alphabet)
|
|
65
|
|
66 Status : Beta
|
|
67 """
|
|
68
|
|
69 __slots__ = ["table", "source", "target"]
|
|
70 def __init__(self, source, target) :
|
|
71
|
|
72 self.table = maketrans(source, target)
|
|
73 self.source = source
|
|
74 self.target = target
|
|
75
|
|
76
|
|
77 def __call__(self, seq) :
|
|
78 """Translate sequence."""
|
|
79 if not self.source.alphabet.alphabetic(seq) :
|
|
80 raise ValueError("Incompatable alphabets")
|
|
81 s = str.translate(seq, self.table)
|
|
82 cls = self.target.__class__
|
|
83 return cls(s, self.target.alphabet, seq.name, seq.description)
|
|
84 # End class Translation
|
|
85
|
|
86 # FIXME: Test, document, add to seq.
|
|
87 dna_complement = Transform(
|
|
88 Seq("ACGTRYSWKMBDHVN-acgtUuryswkmbdhvnXx?.~", dna_alphabet),
|
|
89 Seq("TGCAYRSWMKVHDBN-tgcaAayrswmkvhdbnXx?.~", dna_alphabet),
|
|
90 )
|
|
91
|
|
92
|
|
93
|
|
94 def mask_low_complexity(seq, width =12, trigger=1.8, extension=2.0, mask='X') :
|
|
95 """ Mask low complexity regions in protein sequences.
|
|
96
|
|
97 Uses the method of Seg [1] by Wootton & Federhen [2] to divide a sequence
|
|
98 into regions of high and low complexity. The sequence is divided into
|
|
99 overlapping windows. Low complexity windows either have a sequence entropy
|
|
100 less that the trigger complexity, or have an entropy less than the extension
|
|
101 complexity and neighbor other low-complexity windows. The sequence within
|
|
102 low complexity regions are replaced with the mask character (default 'X'),
|
|
103 and the masked alphabetic sequence is returned.
|
|
104
|
|
105 The default parameters, width=12, trigger=1.8, extension=2.0, mask='X' are
|
|
106 suitable for masking protein sequences before a database search. The
|
|
107 standard default seg parameters are width=12, trigger=2.2, extension=2.5
|
|
108
|
|
109 Arguments:
|
|
110 Seq seq -- An alphabetic sequence
|
|
111 int width -- Window width
|
|
112 float trigger -- Entropy in bits between 0 and 4.3.. ( =log_2(20) )
|
|
113 float extension -- Entropy in bits between 0 and 4.3.. ( =log_2(20) )
|
|
114 char mask -- The mask character (default: 'X')
|
|
115 Returns :
|
|
116 Seq -- A masked alphabetic sequence
|
|
117 Raises :
|
|
118 ValueError -- On invalid arguments
|
|
119 Refs:
|
|
120 [1] seg man page:
|
|
121 http://bioportal.weizmann.ac.il/education/materials/gcg/seg.html
|
|
122 [2] Wootton & Federhen (Computers and Chemistry 17; 149-163, (1993))
|
|
123 Authors:
|
|
124 GEC 2005
|
|
125 Future :
|
|
126 - Optional mask character.
|
|
127 - Option to lower case masked symbols.
|
|
128 - Remove arbitary restriction to protein.
|
|
129 """
|
|
130
|
|
131 lg20 = log2(20)
|
|
132 if trigger<0 or trigger>lg20 :
|
|
133 raise ValueError("Invalid trigger complexity: %f"% trigger)
|
|
134 if extension<0 or extension>lg20 or extension<trigger:
|
|
135 raise ValueError("Invalid extension complexity: %f"% extension)
|
|
136 if width<0 :
|
|
137 raise ValueError("Invalid width: %d"% width)
|
|
138
|
|
139 if width > len(seq) : return seq
|
|
140
|
|
141 s = seq.ords()
|
|
142
|
|
143 X = seq.alphabet.ord(mask)
|
|
144
|
|
145
|
|
146 nwindows = len(seq)- width +1
|
|
147 ent = [ 0 for x in range(0, nwindows)]
|
|
148 count = [ 0 for x in range(0, len(seq.alphabet) )]
|
|
149
|
|
150 for c in s[0:width] : count[c] +=1
|
|
151 ent[0] = entropy(count,2)
|
|
152
|
|
153 for i in range(1, nwindows) :
|
|
154 count[ s[i-1] ] -= 1
|
|
155 count[ s[i+width-1] ] +=1
|
|
156 ent[i] = entropy(count,2)
|
|
157
|
|
158 prev_segged = False
|
|
159 for i in range(0, nwindows) :
|
|
160 if ((prev_segged and ent[i]< extension) or
|
|
161 ent[i]< trigger) :
|
|
162 for j in range(0, width) : s[i+j]=X
|
|
163 prev_segged=True
|
|
164 else :
|
|
165 prev_segged = False
|
|
166
|
|
167
|
|
168 # Redo, only backwards
|
|
169 prev_segged = False
|
|
170 for i in range(nwindows-1, -1, -1) :
|
|
171 if ((prev_segged and ent[i]< extension) or
|
|
172 ent[i]< trigger) :
|
|
173 for j in range(0, width) : s[i+j]=X
|
|
174 prev_segged=True
|
|
175 else :
|
|
176 prev_segged = False
|
|
177
|
|
178
|
|
179 return seq.alphabet.chrs(s)
|
|
180 # end mask_low_complexity()
|
|
181
|
|
182
|
|
183 class GeneticCode(object):
|
|
184 """An encoding of amino acids by DNA triplets.
|
|
185
|
|
186 Example :
|
|
187
|
|
188 Genetic Code [1]: Standard
|
|
189 T C A G
|
|
190 +---------+---------+---------+---------+
|
|
191 T | TTT F | TCT S | TAT Y | TGT C | T
|
|
192 T | TTC F | TCC S | TAC Y | TGC C | C
|
|
193 T | TTA L | TCA S | TAA Stop| TGA Stop| A
|
|
194 T | TTG L(s)| TCG S | TAG Stop| TGG W | G
|
|
195 +---------+---------+---------+---------+
|
|
196 C | CTT L | CCT P | CAT H | CGT R | T
|
|
197 C | CTC L | CCC P | CAC H | CGC R | C
|
|
198 C | CTA L | CCA P | CAA Q | CGA R | A
|
|
199 C | CTG L(s)| CCG P | CAG Q | CGG R | G
|
|
200 +---------+---------+---------+---------+
|
|
201 A | ATT I | ACT T | AAT N | AGT S | T
|
|
202 A | ATC I | ACC T | AAC N | AGC S | C
|
|
203 A | ATA I | ACA T | AAA K | AGA R | A
|
|
204 A | ATG M(s)| ACG T | AAG K | AGG R | G
|
|
205 +---------+---------+---------+---------+
|
|
206 G | GTT V | GCT A | GAT D | GGT G | T
|
|
207 G | GTC V | GCC A | GAC D | GGC G | C
|
|
208 G | GTA V | GCA A | GAA E | GGA G | A
|
|
209 G | GTG V | GCG A | GAG E | GGG G | G
|
|
210 +---------+---------+---------+---------+
|
|
211
|
|
212
|
|
213 See Also :
|
|
214 -- http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=c
|
|
215 -- http://www.ncbi.nlm.nih.gov/projects/collab/FT/index.html#7.5
|
|
216 Authors:
|
|
217 JXG, GEC
|
|
218 """
|
|
219 # TODO: Explain use of '?' in translated sequence.
|
|
220 # TODO: Does translate fails with aproriate execption when fed gaps?
|
|
221 # TODO: Can back_translate handle gaps?
|
|
222
|
|
223 def __init__(self, ident, description,
|
|
224 amino_acid, start, base1, base2, base3):
|
|
225 """Create a new GeneticCode.
|
|
226
|
|
227 Args:
|
|
228 -- ident - Standarad identifier (Or zero). An integer
|
|
229 -- description
|
|
230 -- amino acid - A sequecne of amino acids and stop codons. e.g.
|
|
231 "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG"
|
|
232 -- start - A sequence indicating start codons, e.g.,
|
|
233 "---M---------------M---------------M----------------------------"
|
|
234 -- base1 - The first base of each codon. e.g.,
|
|
235 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG"
|
|
236 -- base2 - The second base of each codon. e.g.,
|
|
237 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG"
|
|
238 -- base3 - The last base of each codon. e.g.,
|
|
239 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"
|
|
240 """
|
|
241 self.ident = ident
|
|
242 self.description = description
|
|
243
|
|
244 self.amino_acid = amino_acid
|
|
245 self.start = start
|
|
246 self.base1 = base1
|
|
247 self.base2 = base2
|
|
248 self.base3 = base3
|
|
249
|
|
250 stop_codons = []
|
|
251 start_codons = []
|
|
252 for i, a in enumerate(amino_acid) :
|
|
253 codon = base1[i] + base2[i] + base3[i]
|
|
254 if a=='*' : stop_codons.append(codon)
|
|
255 if start[i] == 'M': start_codons.append(codon)
|
|
256
|
|
257 self.stop_codons = tuple(stop_codons)
|
|
258 self.start_codons = tuple(start_codons)
|
|
259
|
|
260 # Building the full translation table is expensive,
|
|
261 # so we avoid doing so until necessary.
|
|
262 self._table = None
|
|
263 self._back_table = None
|
|
264
|
|
265 #@staticmethod
|
|
266 def std_list():
|
|
267 "Return a list of standard genetic codes."
|
|
268 return _codon_tables
|
|
269 std_list = staticmethod(std_list)
|
|
270
|
|
271 #@staticmethod
|
|
272 def std():
|
|
273 "The standard 'universal' genetic code."
|
|
274 return _codon_tables[0]
|
|
275 std = staticmethod(std)
|
|
276
|
|
277
|
|
278 #@staticmethod
|
|
279 def by_name(name) :
|
|
280 """Find a genetic code in the code list by name or identifier.
|
|
281 """
|
|
282 for t in _codon_tables :
|
|
283 if t.ident == name or t.description == name :
|
|
284 return t
|
|
285 raise ValueError("No such translation table: %s" % str(name) )
|
|
286 by_name = staticmethod(by_name)
|
|
287
|
|
288
|
|
289 def _get_table(self) :
|
|
290 if self._table is None : self._create_table()
|
|
291 return self._table
|
|
292 table = property(_get_table, None, "A map between codons and amino acids")
|
|
293
|
|
294 def _get_back_table(self) :
|
|
295 if self._back_table is None :
|
|
296 self._create_table()
|
|
297 return self._back_table
|
|
298 back_table = property(_get_back_table, None, "A map between amino acids and codons")
|
|
299
|
|
300
|
|
301 def _create_table(self) :
|
|
302 aa = self.amino_acid
|
|
303 base1 = self.base1
|
|
304 base2 = self.base2
|
|
305 base3 = self.base3
|
|
306
|
|
307 # Construct a table of unambiguous codon translations
|
|
308 table = {}
|
|
309 for i, a in enumerate(aa) :
|
|
310 codon = base1[i] + base2[i] + base3[i]
|
|
311 table[codon] = a
|
|
312
|
|
313 # Build the back table.
|
|
314 back_table = {}
|
|
315 items = table.items()
|
|
316 items.sort()
|
|
317 for codon, aa in items[::-1] :
|
|
318 back_table[aa] = codon # Use first codon, alphabetically.
|
|
319 back_table['X'] = 'NNN'
|
|
320 back_table['B'] = 'NNN'
|
|
321 back_table['Z'] = 'NNN'
|
|
322 back_table['J'] = 'NNN'
|
|
323 self._back_table = back_table
|
|
324
|
|
325 ltable = {}
|
|
326 letters = dna_extended_letters+'U' # include RNA in table
|
|
327
|
|
328 # Create a list of all possble codons
|
|
329 codons = []
|
|
330 for c1 in letters:
|
|
331 for c2 in letters:
|
|
332 for c3 in letters :
|
|
333 codons.append( c1+c2+c3)
|
|
334
|
|
335 # For each ambiguous codon, construct all compatible unambiguous codons.
|
|
336 # Translate and collect a set of all possible translated amino acids.
|
|
337 # If more than one translation look for possible amino acid ambiguity
|
|
338 # codes.
|
|
339 for C in codons :
|
|
340 translated = dict() # Use dict, because no set in py2.3
|
|
341 c = C.replace('U', 'T') # Convert rna codon to dna
|
|
342 for c1 in dna_ambiguity[c[0]]:
|
|
343 for c2 in dna_ambiguity[c[1]]:
|
|
344 for c3 in dna_ambiguity[c[2]]:
|
|
345 aa = table[ c1+c2+c3 ]
|
|
346 translated[aa] = ''
|
|
347 translated = list(translated.keys())
|
|
348 translated.sort()
|
|
349 if len(translated) ==1 :
|
|
350 trans = list(translated)[0]
|
|
351 elif translated == ['D','N'] :
|
|
352 trans = 'B'
|
|
353 elif translated == ['E','Q'] :
|
|
354 trans = 'Z'
|
|
355 elif translated == ['I','L'] :
|
|
356 trans = 'J'
|
|
357 elif '*' in translated:
|
|
358 trans = '?'
|
|
359 else :
|
|
360 trans = 'X'
|
|
361 ltable[C] = trans
|
|
362
|
|
363 self._table = ltable
|
|
364 # End create tables
|
|
365
|
|
366 def translate(self, seq, frame=0) :
|
|
367 """Translate a DNA sequence to a polypeptide using full
|
|
368 IUPAC ambiguities in DNA/RNA and amino acid codes.
|
|
369
|
|
370 Returns :
|
|
371 -- Seq - A polypeptide sequence
|
|
372 """
|
|
373 # TODO: Optimize.
|
|
374 # TODO: Insanity check alphabet.
|
|
375 seq = str(seq)
|
|
376 table = self.table
|
|
377 trans = []
|
|
378 L = len(seq)
|
|
379 for i in range(frame, L-2, 3) :
|
|
380 codon = seq[i:i+3].upper()
|
|
381 trans.append( table[codon])
|
|
382 return Seq(''.join(trans), protein_alphabet)
|
|
383
|
|
384
|
|
385 def back_translate(self, seq) :
|
|
386 """Convert protein back into coding DNA.
|
|
387
|
|
388 Args:
|
|
389 -- seq - A polypeptide sequence.
|
|
390
|
|
391 Returns :
|
|
392 -- Seq - A dna sequence
|
|
393 """
|
|
394 # TODO: Optimzie
|
|
395 # TODO: Insanity check alphabet.
|
|
396 table = self.back_table
|
|
397 seq = str(seq)
|
|
398 trans = [ table[a] for a in seq]
|
|
399 return Seq(''.join(trans), dna_alphabet)
|
|
400
|
|
401 #TODO: translate_orf(self, seq, start) ?
|
|
402 #TODO: translate_to_stop(self, seq, frame) ?
|
|
403 #TODO: translate_all_frames(self,seq) -> 6 translations.
|
|
404
|
|
405 def __repr__(self) :
|
|
406 string = []
|
|
407 string += 'GeneticCode( %d, "' % self.ident
|
|
408 string += self.description
|
|
409 string += '", \n'
|
|
410 string += ' amino_acid = "'
|
|
411 string += self.amino_acid
|
|
412 string += '",\n'
|
|
413 string += ' start = "'
|
|
414 string += self.start
|
|
415 string += '",\n'
|
|
416 string += ' base1 = "'
|
|
417 string += self.base1
|
|
418 string += '",\n'
|
|
419 string += ' base2 = "'
|
|
420 string += self.base2
|
|
421 string += '",\n'
|
|
422 string += ' base3 = "'
|
|
423 string += self.base3
|
|
424 string += '" )'
|
|
425 return ''.join(string)
|
|
426
|
|
427
|
|
428 def __str__(self) :
|
|
429 """Returns a text representation of this genetic code."""
|
|
430 # Inspired by http://bugzilla.open-bio.org/show_bug.cgi?id=1963
|
|
431 letters = "TCAG" # Convectional ordering for codon tables.
|
|
432 string = []
|
|
433
|
|
434 if self.ident :
|
|
435 string += 'Genetic Code [%d]: ' % self.ident
|
|
436 else :
|
|
437 string += 'Genetic Code: '
|
|
438 string += self.description or ''
|
|
439
|
|
440 string += "\n "
|
|
441 string += " ".join( [" %s " % c2 for c2 in letters] )
|
|
442
|
|
443 string += "\n +"
|
|
444 string += "+".join(["---------" for c2 in letters]) + "+ "
|
|
445
|
|
446 table = self.table
|
|
447
|
|
448 for c1 in letters :
|
|
449 for c3 in letters :
|
|
450 string += '\n '
|
|
451 string += c1
|
|
452 string += " |"
|
|
453 for c2 in letters :
|
|
454 codon = c1+c2+c3
|
|
455 string += " " + codon
|
|
456 if codon in self.stop_codons :
|
|
457 string += " Stop|"
|
|
458 else :
|
|
459 amino = table.get(codon, '?')
|
|
460 if codon in self.start_codons :
|
|
461 string += " %s(s)|" % amino
|
|
462 else :
|
|
463 string += " %s |" % amino
|
|
464 string += " " + c3
|
|
465
|
|
466 string += "\n +"
|
|
467 string += "+".join(["---------" for c2 in letters])
|
|
468 string += "+ "
|
|
469 string += '\n'
|
|
470 return ''.join(string)
|
|
471 # end class GeneticCode
|
|
472
|
|
473
|
|
474 # Data from http://www.ncbi.nlm.nih.gov/projects/collab/FT/index.html#7.5
|
|
475 # Aug. 2006
|
|
476 # Genetic Code Tables
|
|
477 #
|
|
478 # Authority International Sequence Databank Collaboration
|
|
479 # Contact NCBI
|
|
480 # Scope /transl_table qualifier
|
|
481 # URL http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=c
|
|
482 _codon_tables = (
|
|
483 GeneticCode(1, "Standard",
|
|
484 "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
|
|
485 "---M---------------M---------------M----------------------------",
|
|
486 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG",
|
|
487 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG",
|
|
488 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"),
|
|
489
|
|
490 GeneticCode(2, "Vertebrate Mitochondrial",
|
|
491 "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG",
|
|
492 "--------------------------------MMMM---------------M------------",
|
|
493 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG",
|
|
494 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG",
|
|
495 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"),
|
|
496
|
|
497 GeneticCode(3, "Yeast Mitochondrial",
|
|
498 "FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
|
|
499 "----------------------------------MM----------------------------",
|
|
500 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG",
|
|
501 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG",
|
|
502 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"),
|
|
503
|
|
504 GeneticCode(4, "Mold, Protozoan, Coelenterate Mitochondrial & Mycoplasma/Spiroplasma",
|
|
505 "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
|
|
506 "--MM---------------M------------MMMM---------------M------------",
|
|
507 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG",
|
|
508 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG",
|
|
509 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"),
|
|
510
|
|
511 GeneticCode(5, "Invertebrate Mitochondrial",
|
|
512 "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG",
|
|
513 "---M----------------------------MMMM---------------M------------",
|
|
514 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG",
|
|
515 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG",
|
|
516 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"),
|
|
517
|
|
518 GeneticCode(6, "Ciliate, Dasycladacean and Hexamita Nuclear",
|
|
519 "FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
|
|
520 "-----------------------------------M----------------------------",
|
|
521 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG",
|
|
522 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG",
|
|
523 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"),
|
|
524
|
|
525 GeneticCode(9, "Echinoderm and Flatworm Mitochondrial",
|
|
526 "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
|
|
527 "-----------------------------------M---------------M------------",
|
|
528 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG",
|
|
529 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG",
|
|
530 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"),
|
|
531
|
|
532 GeneticCode(10, "Euplotid Nuclear",
|
|
533 "FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
|
|
534 "-----------------------------------M----------------------------",
|
|
535 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG",
|
|
536 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG",
|
|
537 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"),
|
|
538
|
|
539 GeneticCode(11, "Bacterial and Plant Plastid",
|
|
540 "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
|
|
541 "---M---------------M------------MMMM---------------M------------",
|
|
542 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG",
|
|
543 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG",
|
|
544 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"),
|
|
545
|
|
546 GeneticCode(12, "Alternative Yeast Nuclear",
|
|
547 "FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
|
|
548 "-------------------M---------------M----------------------------",
|
|
549 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG",
|
|
550 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG",
|
|
551 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"),
|
|
552
|
|
553 GeneticCode(13,"Ascidian Mitochondrial",
|
|
554 "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG",
|
|
555 "-----------------------------------M----------------------------",
|
|
556 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG",
|
|
557 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG",
|
|
558 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"),
|
|
559
|
|
560 GeneticCode(14, "Alternative Flatworm Mitochondrial",
|
|
561 "FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
|
|
562 "-----------------------------------M----------------------------",
|
|
563 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG",
|
|
564 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG",
|
|
565 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"),
|
|
566
|
|
567 GeneticCode(15, "Blepharisma Nuclear",
|
|
568 "FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
|
|
569 "-----------------------------------M----------------------------",
|
|
570 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG",
|
|
571 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG",
|
|
572 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"),
|
|
573
|
|
574 GeneticCode(16, "Chlorophycean Mitochondrial",
|
|
575 "FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
|
|
576 "-----------------------------------M----------------------------",
|
|
577 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG",
|
|
578 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG",
|
|
579 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"),
|
|
580
|
|
581 GeneticCode(21, "Trematode Mitochondrial",
|
|
582 "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
|
|
583 "-----------------------------------M---------------M------------",
|
|
584 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG",
|
|
585 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG",
|
|
586 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"),
|
|
587
|
|
588 GeneticCode(22, "Scenedesmus obliquus Mitochondrial",
|
|
589 "FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
|
|
590 "-----------------------------------M----------------------------",
|
|
591 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG",
|
|
592 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG",
|
|
593 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"),
|
|
594
|
|
595 GeneticCode(23,"Thraustochytrium Mitochondrial",
|
|
596 "FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
|
|
597 "--------------------------------M--M---------------M------------",
|
|
598 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG",
|
|
599 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG",
|
|
600 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG",),
|
|
601 )
|
|
602
|
|
603
|
|
604
|
|
605 |