Mercurial > repos > davidmurphy > codonlogo
comparison corebio/resource/stride.py @ 0:c55bdc2fb9fa
Uploaded
| author | davidmurphy |
|---|---|
| date | Thu, 27 Oct 2011 12:09:09 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:c55bdc2fb9fa |
|---|---|
| 1 | |
| 2 # Copyright (c) 2003 Gavin E. Crooks | |
| 3 # Copyright (c) 2005 David D. Ding <dding@berkeley.edu> | |
| 4 # | |
| 5 # This software is distributed under the MIT Open Source License. | |
| 6 # <http://www.opensource.org/licenses/mit-license.html> | |
| 7 # | |
| 8 # Permission is hereby granted, free of charge, to any person obtaining a | |
| 9 # copy of this software and associated documentation files (the "Software"), | |
| 10 # to deal in the Software without restriction, including without limitation | |
| 11 # the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
| 12 # and/or sell copies of the Software, and to permit persons to whom the | |
| 13 # Software is furnished to do so, subject to the following conditions: | |
| 14 # | |
| 15 # The above copyright notice and this permission notice shall be included | |
| 16 # in all copies or substantial portions of the Software. | |
| 17 # | |
| 18 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
| 19 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
| 20 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
| 21 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
| 22 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
| 23 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
| 24 # THE SOFTWARE. | |
| 25 | |
| 26 """STRIDE: Protein secondary structure assignment from atomic coordinates. | |
| 27 | |
| 28 This module provides an interface to STRIDE, a c program used to recognize | |
| 29 secondary structural elements in proteins from their atomic coordinates. | |
| 30 | |
| 31 """ | |
| 32 | |
| 33 from corebio.seq import Seq, protein_alphabet, Alphabet | |
| 34 from corebio.resource.astral import to_one_letter_code | |
| 35 | |
| 36 # alphabet for stride secondary structure | |
| 37 stride_alphabet = Alphabet("HGIEBC12345678@&T") | |
| 38 | |
| 39 # Dictionary for conversion between names and alphabet | |
| 40 stride_alphabet_names = ( | |
| 41 "H", "AlphaHelix", | |
| 42 "G", "310Helix", | |
| 43 "I", "PiHelix", | |
| 44 "E", "Strand", | |
| 45 "b", "Bridge", | |
| 46 "B", "Bridge", | |
| 47 "C", "Coil", | |
| 48 "1", "TurnI", | |
| 49 "2", "TurnI'", | |
| 50 "3", "TurnII", | |
| 51 "4", "TurnII'", | |
| 52 "5", "TurnVIa", | |
| 53 "6", "TurnVIb", | |
| 54 "7", "TurnVIII", | |
| 55 "8", "TurnIV", | |
| 56 "@", "GammaClassic", | |
| 57 "&", "GammaInv", | |
| 58 "T", "Turn" | |
| 59 ) | |
| 60 | |
| 61 | |
| 62 class Stride(object) : | |
| 63 def __init__(self, stride_file) : | |
| 64 """ Read and parse a STRIDE output file. | |
| 65 | |
| 66 args: | |
| 67 - stride_file : An open file handle | |
| 68 attributes : | |
| 69 - pdbid : The PDB id. | |
| 70 - res : A list of Res objects, one per PDB resiude | |
| 71 """ | |
| 72 res =[] | |
| 73 f=stride_file | |
| 74 self.pdbid = f.readline()[75:79] | |
| 75 for l in f: | |
| 76 if l[0:3] =="ASG": | |
| 77 res.append(Res(l)) | |
| 78 | |
| 79 self.res = res # A list of Res objects | |
| 80 | |
| 81 self._res_dict = None | |
| 82 | |
| 83 def total_area(self) : | |
| 84 """ Return the solvent accessible area """ | |
| 85 area = 0 | |
| 86 for i in self.res : | |
| 87 area += i.solvent_acc_area | |
| 88 return area | |
| 89 | |
| 90 def primary(self): | |
| 91 """ Return the protein primary sequence as a Seq object.""" | |
| 92 return Seq(''.join([r.primary_seq for r in self.res]), protein_alphabet) | |
| 93 | |
| 94 def secondary(self): | |
| 95 """Return the secondary structure of the protien as a Seq object""" | |
| 96 return Seq(''.join([r.secondary_str for r in self.res]), stride_alphabet) | |
| 97 | |
| 98 | |
| 99 def get_res(self, chainid, resid) : | |
| 100 """ Return the given resiude """ | |
| 101 if not self._res_dict : | |
| 102 d = {} | |
| 103 for r in self.res : | |
| 104 d[ (r.chainid, r.resid)] = r | |
| 105 self._res_dict =d | |
| 106 | |
| 107 return self._res_dict[(chainid, resid)] | |
| 108 | |
| 109 | |
| 110 | |
| 111 class Res(object): | |
| 112 """ Structural information of a single resiude. An ASG line from a stride | |
| 113 output file. | |
| 114 | |
| 115 Attributes : | |
| 116 - chainid | |
| 117 - resid | |
| 118 - primary_seq | |
| 119 - secondary_str | |
| 120 - solvent_acc_area | |
| 121 - phi | |
| 122 - psi | |
| 123 """ | |
| 124 | |
| 125 def __init__(self, res_line) : | |
| 126 """ Eats a single 'ASG' line from a stride file, splits it up | |
| 127 into parts and return a Res object.""" | |
| 128 | |
| 129 if (len(res_line)<70): | |
| 130 raise ValueError("Line not long enough") | |
| 131 try: | |
| 132 self.chainid = res_line[9:10] | |
| 133 # STRIDE converts blank chain ids into dashes. Undo. | |
| 134 if self.chainid=="-" : self.chainid = " " | |
| 135 | |
| 136 # In rare cases STRIDE columns can be misaligned. Grab extra | |
| 137 # white space to compensate. | |
| 138 self.resid = res_line[10:15].strip() | |
| 139 self.primary_seq = to_one_letter_code[res_line[5:8].capitalize()] | |
| 140 self.secondary_str = res_line[24:25] | |
| 141 self.solvent_acc_area = float(res_line[64:71]) | |
| 142 self.phi = float(res_line[42:49].strip()) | |
| 143 self.psi = float(res_line[52:59].strip()) | |
| 144 except FloatingPointError: | |
| 145 raise FloatingPointError("Can't float phi, psi, or area") | |
| 146 except KeyError: | |
| 147 raise KeyError("Can't find three letter code in dictionary") | |
| 148 except LookupError: | |
| 149 raise LookupError("One of the values is out of index of res_line") | |
| 150 | |
| 151 | |
| 152 | |
| 153 | |
| 154 | |
| 155 | |
| 156 | |
| 157 | |
| 158 | |
| 159 | |
| 160 | |
| 161 | |
| 162 | |
| 163 | |
| 164 |
