comparison local_ctd.py @ 27:54af89adf87b draft

Uploaded
author jose_duarte
date Sun, 12 Dec 2021 10:49:56 +0000
parents
children
comparison
equal deleted inserted replaced
26:52e50de4c005 27:54af89adf87b
1 # -*- coding: utf-8 -*-
2 """
3 Compute the composition, transition and distribution descriptors based on the
4 different properties of AADs.
5 The AADs with the same properties is marked as the same number. You can get 147
6 descriptors for a given protein sequence.
7 References
8 ----------
9 .. [1] Inna Dubchak, Ilya Muchink, Stephen R.Holbrook and Sung-Hou Kim.
10 Prediction of protein folding class using global description of amino
11 acid sequence. Proc.Natl. Acad.Sci.USA, 1995, 92, 8700-8704.
12 .. [2] Inna Dubchak, Ilya Muchink, Christopher Mayor, Igor Dralyuk and Sung-Hou
13 Kim. Recognition of a Protein Fold in the Context of the SCOP
14 classification. Proteins: Structure, Function and
15 Genetics, 1999, 35, 401-407.
16 Authors: Dongsheng Cao and Yizeng Liang.
17 Date: 2010.11.22
18 Email: oriental-cds@163.com
19 """
20
21 # Core Library
22 import copy
23 import math
24 from typing import Any, Dict
25
26 _Hydrophobicity = {"1": "RKEDQN", "2": "GASTPHY", "3": "CLVIMFW"}
27 # '1'stand for Polar; '2'stand for Neutral, '3' stand for Hydrophobicity
28
29 _NormalizedVDWV = {"1": "GASTPDC", "2": "NVEQIL", "3": "MHKFRYW"}
30 # '1'stand for (0-2.78); '2'stand for (2.95-4.0), '3' stand for (4.03-8.08)
31
32 _Polarity = {"1": "LIFWCMVY", "2": "PATGS", "3": "HQRKNED"}
33 # '1'stand for (4.9-6.2); '2'stand for (8.0-9.2), '3' stand for (10.4-13.0)
34
35 _Charge = {"1": "KR", "2": "ANCQGHILMFPSTWYV", "3": "DE"}
36 # '1'stand for Positive; '2'stand for Neutral, '3' stand for Negative
37
38 _SecondaryStr = {"1": "EALMQKRH", "2": "VIYCWFT", "3": "GNPSD"}
39 # '1'stand for Helix; '2'stand for Strand, '3' stand for coil
40
41 _SolventAccessibility = {"1": "ALFCGIVW", "2": "RKQEND", "3": "MPSTHY"}
42 # '1'stand for Buried; '2'stand for Exposed, '3' stand for Intermediate
43
44 _Polarizability = {"1": "GASDT", "2": "CPNVEQIL", "3": "KMHFRYW"}
45 # '1'stand for (0-0.108); '2'stand for (0.128-0.186), '3' stand for (0.219-0.409)
46
47
48 # You can continuely add other properties of AADs to compute descriptors of
49 # protein sequence.
50
51 _AATProperty = (
52 _Hydrophobicity,
53 _NormalizedVDWV,
54 _Polarity,
55 _Charge,
56 _SecondaryStr,
57 _SolventAccessibility,
58 _Polarizability,
59 )
60
61 _AATPropertyName = (
62 "_Hydrophobicity",
63 "_NormalizedVDWV",
64 "_Polarity",
65 "_Charge",
66 "_SecondaryStr",
67 "_SolventAccessibility",
68 "_Polarizability",
69 )
70
71
72 def StringtoNum(ProteinSequence: str, AAProperty: Dict[Any, Any]) -> str:
73 hardProteinSequence = copy.deepcopy(ProteinSequence)
74 for k, m in list(AAProperty.items()):
75 for index in m:
76 hardProteinSequence = hardProteinSequence.replace(index, k)
77 TProteinSequence = hardProteinSequence
78
79 return TProteinSequence
80
81
82 def CalculateComposition(
83 ProteinSequence: str, AAProperty: Dict[Any, Any], AAPName: str) -> Dict[Any, Any]:
84 TProteinSequence = StringtoNum(ProteinSequence, AAProperty)
85 result = {}
86 num = len(TProteinSequence)
87 result[AAPName + "C" + "1"] = round(float(TProteinSequence.count("1")) / num, 3)
88 result[AAPName + "C" + "2"] = round(float(TProteinSequence.count("2")) / num, 3)
89 result[AAPName + "C" + "3"] = round(float(TProteinSequence.count("3")) / num, 3)
90 return result
91
92
93 def CalculateTransition(
94 ProteinSequence: str, AAProperty: Dict[Any, Any], AAPName: str
95 ) -> Dict[Any, Any]:
96 TProteinSequence = StringtoNum(ProteinSequence, AAProperty)
97 Result = {}
98 num = len(TProteinSequence)
99 CTD = TProteinSequence
100 Result[AAPName + "T" + "12"] = round(
101 float(CTD.count("12") + CTD.count("21")) / (num - 1), 3
102 )
103 Result[AAPName + "T" + "13"] = round(
104 float(CTD.count("13") + CTD.count("31")) / (num - 1), 3
105 )
106 Result[AAPName + "T" + "23"] = round(
107 float(CTD.count("23") + CTD.count("32")) / (num - 1), 3
108 )
109 return Result
110
111
112 def CalculateDistribution(
113 ProteinSequence: str, AAProperty: Dict[Any, Any], AAPName: str
114 ) -> Dict[Any, Any]:
115 TProteinSequence = StringtoNum(ProteinSequence, AAProperty)
116 Result: Dict[str, float] = {}
117 Num = len(TProteinSequence)
118 for i in ("1", "2", "3"):
119 num = TProteinSequence.count(i)
120 ink = 1
121 indexk = 0
122 cds = []
123 while ink <= num:
124 indexk = TProteinSequence.find(i, indexk) + 1
125 cds.append(indexk)
126 ink = ink + 1
127
128 if cds == []:
129 Result[AAPName + "D" + i + "001"] = 0
130 Result[AAPName + "D" + i + "025"] = 0
131 Result[AAPName + "D" + i + "050"] = 0
132 Result[AAPName + "D" + i + "075"] = 0
133 Result[AAPName + "D" + i + "100"] = 0
134 else:
135 Result[AAPName + "D" + i + "001"] = round(float(cds[0]) / Num * 100, 3)
136 Result[AAPName + "D" + i + "025"] = round(
137 float(cds[int(math.floor(num * 0.25)) - 1]) / Num * 100, 3
138 )
139 Result[AAPName + "D" + i + "050"] = round(
140 float(cds[int(math.floor(num * 0.5)) - 1]) / Num * 100, 3
141 )
142 Result[AAPName + "D" + i + "075"] = round(
143 float(cds[int(math.floor(num * 0.75)) - 1]) / Num * 100, 3
144 )
145 Result[AAPName + "D" + i + "100"] = round(float(cds[-1]) / Num * 100, 3)
146
147 return Result
148
149
150 def CalculateCompositionHydrophobicity(ProteinSequence: str):
151 return CalculateComposition(ProteinSequence, _Hydrophobicity, "_Hydrophobicity")
152
153
154 def CalculateCompositionNormalizedVDWV(ProteinSequence: str):
155 return CalculateComposition(ProteinSequence, _NormalizedVDWV, "_NormalizedVDWV")
156
157
158 def CalculateCompositionPolarity(ProteinSequence: str):
159 return CalculateComposition(ProteinSequence, _Polarity, "_Polarity")
160
161
162 def CalculateCompositionCharge(ProteinSequence: str) -> Dict[Any, Any]:
163 return CalculateComposition(ProteinSequence, _Charge, "_Charge")
164
165
166 def CalculateCompositionSecondaryStr(ProteinSequence: str) -> Dict[Any, Any]:
167 return CalculateComposition(ProteinSequence, _SecondaryStr, "_SecondaryStr")
168
169
170 def CalculateCompositionSolventAccessibility(ProteinSequence: str) -> Dict[Any, Any]:
171 return CalculateComposition(
172 ProteinSequence, _SolventAccessibility, "_SolventAccessibility"
173 )
174
175
176 def CalculateCompositionPolarizability(ProteinSequence: str) -> Dict[Any, Any]:
177 return CalculateComposition(ProteinSequence, _Polarizability, "_Polarizability")
178
179
180 def CalculateTransitionHydrophobicity(ProteinSequence: str) -> Dict[Any, Any]:
181 result = CalculateTransition(ProteinSequence, _Hydrophobicity, "_Hydrophobicity")
182 return result
183
184
185 def CalculateTransitionNormalizedVDWV(ProteinSequence: str) -> Dict[Any, Any]:
186 result = CalculateTransition(ProteinSequence, _NormalizedVDWV, "_NormalizedVDWV")
187 return result
188
189
190 def CalculateTransitionPolarity(ProteinSequence: str) -> Dict[Any, Any]:
191 result = CalculateTransition(ProteinSequence, _Polarity, "_Polarity")
192 return result
193
194
195 def CalculateTransitionCharge(ProteinSequence: str) -> Dict[Any, Any]:
196 result = CalculateTransition(ProteinSequence, _Charge, "_Charge")
197 return result
198
199
200 def CalculateTransitionSecondaryStr(ProteinSequence: str) -> Dict[Any, Any]:
201 result = CalculateTransition(ProteinSequence, _SecondaryStr, "_SecondaryStr")
202 return result
203
204
205 def CalculateTransitionSolventAccessibility(ProteinSequence: str) -> Dict[Any, Any]:
206 result = CalculateTransition(
207 ProteinSequence, _SolventAccessibility, "_SolventAccessibility"
208 )
209 return result
210
211
212 def CalculateTransitionPolarizability(ProteinSequence: str) -> Dict[Any, Any]:
213 result = CalculateTransition(ProteinSequence, _Polarizability, "_Polarizability")
214 return result
215
216
217 def CalculateDistributionHydrophobicity(ProteinSequence: str) -> Dict[Any, Any]:
218 result = CalculateDistribution(ProteinSequence, _Hydrophobicity, "_Hydrophobicity")
219 return result
220
221
222 def CalculateDistributionNormalizedVDWV(ProteinSequence: str) -> Dict[Any, Any]:
223 result = CalculateDistribution(ProteinSequence, _NormalizedVDWV, "_NormalizedVDWV")
224 return result
225
226
227 def CalculateDistributionPolarity(ProteinSequence: str) -> Dict[Any, Any]:
228 result = CalculateDistribution(ProteinSequence, _Polarity, "_Polarity")
229 return result
230
231
232 def CalculateDistributionCharge(ProteinSequence: str) -> Dict[Any, Any]:
233 result = CalculateDistribution(ProteinSequence, _Charge, "_Charge")
234 return result
235
236
237 def CalculateDistributionSecondaryStr(ProteinSequence: str) -> Dict[Any, Any]:
238 result = CalculateDistribution(ProteinSequence, _SecondaryStr, "_SecondaryStr")
239 return result
240
241
242 def CalculateDistributionSolventAccessibility(ProteinSequence: str) -> Dict[Any, Any]:
243 result = CalculateDistribution(
244 ProteinSequence, _SolventAccessibility, "_SolventAccessibility"
245 )
246 return result
247
248
249 def CalculateDistributionPolarizability(ProteinSequence: str) -> Dict[Any, Any]:
250 result = CalculateDistribution(ProteinSequence, _Polarizability, "_Polarizability")
251 return result
252
253
254 def CalculateC(ProteinSequence: str) -> Dict[Any, Any]:
255 result: Dict[Any, Any] = {}
256 result.update(CalculateCompositionPolarizability(ProteinSequence))
257 result.update(CalculateCompositionSolventAccessibility(ProteinSequence))
258 result.update(CalculateCompositionSecondaryStr(ProteinSequence))
259 result.update(CalculateCompositionCharge(ProteinSequence))
260 result.update(CalculateCompositionPolarity(ProteinSequence))
261 result.update(CalculateCompositionNormalizedVDWV(ProteinSequence))
262 result.update(CalculateCompositionHydrophobicity(ProteinSequence))
263 return result
264
265
266 def CalculateT(ProteinSequence: str) -> Dict[Any, Any]:
267 result: Dict[Any, Any] = {}
268 result.update(CalculateTransitionPolarizability(ProteinSequence))
269 result.update(CalculateTransitionSolventAccessibility(ProteinSequence))
270 result.update(CalculateTransitionSecondaryStr(ProteinSequence))
271 result.update(CalculateTransitionCharge(ProteinSequence))
272 result.update(CalculateTransitionPolarity(ProteinSequence))
273 result.update(CalculateTransitionNormalizedVDWV(ProteinSequence))
274 result.update(CalculateTransitionHydrophobicity(ProteinSequence))
275 return result
276
277
278 def CalculateD(ProteinSequence: str) -> Dict[Any, Any]:
279 result: Dict[Any, Any] = {}
280 result.update(CalculateDistributionPolarizability(ProteinSequence))
281 result.update(CalculateDistributionSolventAccessibility(ProteinSequence))
282 result.update(CalculateDistributionSecondaryStr(ProteinSequence))
283 result.update(CalculateDistributionCharge(ProteinSequence))
284 result.update(CalculateDistributionPolarity(ProteinSequence))
285 result.update(CalculateDistributionNormalizedVDWV(ProteinSequence))
286 result.update(CalculateDistributionHydrophobicity(ProteinSequence))
287 return result
288
289
290 def CalculateCTD(ProteinSequence: str) -> Dict[Any, Any]:
291 result: Dict[Any, Any] = {}
292 result.update(CalculateCompositionPolarizability(ProteinSequence))
293 result.update(CalculateCompositionSolventAccessibility(ProteinSequence))
294 result.update(CalculateCompositionSecondaryStr(ProteinSequence))
295 result.update(CalculateCompositionCharge(ProteinSequence))
296 result.update(CalculateCompositionPolarity(ProteinSequence))
297 result.update(CalculateCompositionNormalizedVDWV(ProteinSequence))
298 result.update(CalculateCompositionHydrophobicity(ProteinSequence))
299 result.update(CalculateTransitionPolarizability(ProteinSequence))
300 result.update(CalculateTransitionSolventAccessibility(ProteinSequence))
301 result.update(CalculateTransitionSecondaryStr(ProteinSequence))
302 result.update(CalculateTransitionCharge(ProteinSequence))
303 result.update(CalculateTransitionPolarity(ProteinSequence))
304 result.update(CalculateTransitionNormalizedVDWV(ProteinSequence))
305 result.update(CalculateTransitionHydrophobicity(ProteinSequence))
306 result.update(CalculateDistributionPolarizability(ProteinSequence))
307 result.update(CalculateDistributionSolventAccessibility(ProteinSequence))
308 result.update(CalculateDistributionSecondaryStr(ProteinSequence))
309 result.update(CalculateDistributionCharge(ProteinSequence))
310 result.update(CalculateDistributionPolarity(ProteinSequence))
311 result.update(CalculateDistributionNormalizedVDWV(ProteinSequence))
312 result.update(CalculateDistributionHydrophobicity(ProteinSequence))
313 return result
314
315
316
317
318
319
320
321
322
323
324