| 0 | 1 | 
|  | 2 #  Copyright (c) 2003 Gavin E. Crooks | 
|  | 3 #  Copyright (c) 2005 David D. Ding <dding@berkeley.edu> | 
|  | 4 # | 
|  | 5 #  This software is distributed under the MIT Open Source License. | 
|  | 6 #  <http://www.opensource.org/licenses/mit-license.html> | 
|  | 7 # | 
|  | 8 #  Permission is hereby granted, free of charge, to any person obtaining a | 
|  | 9 #  copy of this software and associated documentation files (the "Software"), | 
|  | 10 #  to deal in the Software without restriction, including without limitation | 
|  | 11 #  the rights to use, copy, modify, merge, publish, distribute, sublicense, | 
|  | 12 #  and/or sell copies of the Software, and to permit persons to whom the | 
|  | 13 #  Software is furnished to do so, subject to the following conditions: | 
|  | 14 # | 
|  | 15 #  The above copyright notice and this permission notice shall be included | 
|  | 16 #  in all copies or substantial portions of the Software. | 
|  | 17 # | 
|  | 18 #  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | 
|  | 19 #  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | 
|  | 20 #  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | 
|  | 21 #  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | 
|  | 22 #  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | 
|  | 23 #  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | 
|  | 24 #  THE SOFTWARE. | 
|  | 25 | 
|  | 26 """STRIDE: Protein secondary structure assignment from atomic coordinates. | 
|  | 27 | 
|  | 28 This module provides an interface to STRIDE, a c program used to recognize | 
|  | 29 secondary structural elements in proteins from their atomic coordinates. | 
|  | 30 | 
|  | 31 """ | 
|  | 32 | 
|  | 33 from corebio.seq import Seq, protein_alphabet, Alphabet | 
|  | 34 from corebio.resource.astral import to_one_letter_code | 
|  | 35 | 
|  | 36 # alphabet for stride secondary structure | 
|  | 37 stride_alphabet = Alphabet("HGIEBC12345678@&T") | 
|  | 38 | 
|  | 39 # Dictionary for conversion between names and alphabet | 
|  | 40 stride_alphabet_names  = ( | 
|  | 41     "H", "AlphaHelix", | 
|  | 42     "G", "310Helix", | 
|  | 43     "I", "PiHelix", | 
|  | 44     "E", "Strand", | 
|  | 45     "b", "Bridge", | 
|  | 46     "B", "Bridge", | 
|  | 47     "C", "Coil", | 
|  | 48     "1", "TurnI", | 
|  | 49     "2", "TurnI'", | 
|  | 50     "3", "TurnII", | 
|  | 51     "4", "TurnII'", | 
|  | 52     "5", "TurnVIa", | 
|  | 53     "6", "TurnVIb", | 
|  | 54     "7", "TurnVIII", | 
|  | 55     "8", "TurnIV", | 
|  | 56     "@", "GammaClassic", | 
|  | 57     "&", "GammaInv", | 
|  | 58     "T", "Turn" | 
|  | 59     ) | 
|  | 60 | 
|  | 61 | 
|  | 62 class Stride(object) : | 
|  | 63     def __init__(self, stride_file) : | 
|  | 64         """ Read and parse a STRIDE output file. | 
|  | 65 | 
|  | 66         args: | 
|  | 67             - stride_file   : An open file handle | 
|  | 68         attributes : | 
|  | 69             - pdbid     : The PDB id. | 
|  | 70             - res       : A list of Res objects, one per PDB resiude | 
|  | 71         """ | 
|  | 72         res =[] | 
|  | 73         f=stride_file | 
|  | 74         self.pdbid = f.readline()[75:79] | 
|  | 75         for l in f: | 
|  | 76             if l[0:3] =="ASG": | 
|  | 77                 res.append(Res(l)) | 
|  | 78 | 
|  | 79         self.res = res # A list of Res objects | 
|  | 80 | 
|  | 81         self._res_dict = None | 
|  | 82 | 
|  | 83     def total_area(self) : | 
|  | 84         """ Return the solvent accessible area """ | 
|  | 85         area = 0 | 
|  | 86         for i in self.res : | 
|  | 87             area += i.solvent_acc_area | 
|  | 88         return area | 
|  | 89 | 
|  | 90     def primary(self): | 
|  | 91         """ Return the protein primary sequence as a Seq object.""" | 
|  | 92         return Seq(''.join([r.primary_seq for r in self.res]), protein_alphabet) | 
|  | 93 | 
|  | 94     def secondary(self): | 
|  | 95         """Return the secondary structure of the protien as a Seq object""" | 
|  | 96         return Seq(''.join([r.secondary_str for r in self.res]), stride_alphabet) | 
|  | 97 | 
|  | 98 | 
|  | 99     def get_res(self, chainid, resid) : | 
|  | 100         """ Return the given resiude """ | 
|  | 101         if not self._res_dict : | 
|  | 102             d = {} | 
|  | 103             for r in self.res : | 
|  | 104                 d[ (r.chainid, r.resid)] = r | 
|  | 105             self._res_dict =d | 
|  | 106 | 
|  | 107         return self._res_dict[(chainid, resid)] | 
|  | 108 | 
|  | 109 | 
|  | 110 | 
|  | 111 class Res(object): | 
|  | 112     """ Structural information of a single resiude. An ASG line from a stride | 
|  | 113         output file. | 
|  | 114 | 
|  | 115         Attributes : | 
|  | 116          - chainid | 
|  | 117          - resid | 
|  | 118          - primary_seq | 
|  | 119          - secondary_str | 
|  | 120          - solvent_acc_area | 
|  | 121          - phi | 
|  | 122          - psi | 
|  | 123     """ | 
|  | 124 | 
|  | 125     def __init__(self, res_line) : | 
|  | 126         """ Eats a single 'ASG' line from a stride file, splits it up | 
|  | 127         into parts and return a Res object.""" | 
|  | 128 | 
|  | 129         if (len(res_line)<70): | 
|  | 130             raise ValueError("Line not long enough") | 
|  | 131         try: | 
|  | 132             self.chainid = res_line[9:10] | 
|  | 133             # STRIDE converts blank chain ids into dashes. Undo. | 
|  | 134             if self.chainid=="-" : self.chainid = " " | 
|  | 135 | 
|  | 136             # In rare cases STRIDE columns can be misaligned. Grab extra | 
|  | 137             # white space to compensate. | 
|  | 138             self.resid = res_line[10:15].strip() | 
|  | 139             self.primary_seq = to_one_letter_code[res_line[5:8].capitalize()] | 
|  | 140             self.secondary_str = res_line[24:25] | 
|  | 141             self.solvent_acc_area = float(res_line[64:71]) | 
|  | 142             self.phi = float(res_line[42:49].strip()) | 
|  | 143             self.psi = float(res_line[52:59].strip()) | 
|  | 144         except FloatingPointError: | 
|  | 145             raise FloatingPointError("Can't float phi, psi, or area") | 
|  | 146         except KeyError: | 
|  | 147             raise KeyError("Can't find three letter code in dictionary") | 
|  | 148         except LookupError: | 
|  | 149             raise LookupError("One of the values is out of index of res_line") | 
|  | 150 | 
|  | 151 | 
|  | 152 | 
|  | 153 | 
|  | 154 | 
|  | 155 | 
|  | 156 | 
|  | 157 | 
|  | 158 | 
|  | 159 | 
|  | 160 | 
|  | 161 | 
|  | 162 | 
|  | 163 | 
|  | 164 |