Mercurial > repos > jose_duarte > phagedpo
comparison local_ctd.py @ 31:3d94608aea7a draft
Uploaded
author | jose_duarte |
---|---|
date | Mon, 13 Dec 2021 11:19:23 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
30:921004017f3c | 31:3d94608aea7a |
---|---|
1 # -*- coding: utf-8 -*- | |
2 """ | |
3 Compute the composition, transition and distribution descriptors based on the | |
4 different properties of AADs. | |
5 The AADs with the same properties is marked as the same number. You can get 147 | |
6 descriptors for a given protein sequence. | |
7 References | |
8 ---------- | |
9 .. [1] Inna Dubchak, Ilya Muchink, Stephen R.Holbrook and Sung-Hou Kim. | |
10 Prediction of protein folding class using global description of amino | |
11 acid sequence. Proc.Natl. Acad.Sci.USA, 1995, 92, 8700-8704. | |
12 .. [2] Inna Dubchak, Ilya Muchink, Christopher Mayor, Igor Dralyuk and Sung-Hou | |
13 Kim. Recognition of a Protein Fold in the Context of the SCOP | |
14 classification. Proteins: Structure, Function and | |
15 Genetics, 1999, 35, 401-407. | |
16 Authors: Dongsheng Cao and Yizeng Liang. | |
17 Date: 2010.11.22 | |
18 Email: oriental-cds@163.com | |
19 """ | |
20 | |
21 # Core Library | |
22 import copy | |
23 import math | |
24 from typing import Any, Dict | |
25 | |
26 _Hydrophobicity = {"1": "RKEDQN", "2": "GASTPHY", "3": "CLVIMFW"} | |
27 # '1'stand for Polar; '2'stand for Neutral, '3' stand for Hydrophobicity | |
28 | |
29 _NormalizedVDWV = {"1": "GASTPD", "2": "NVEQIL", "3": "MHKFRYW"} | |
30 # '1'stand for (0-2.78); '2'stand for (2.95-4.0), '3' stand for (4.03-8.08) | |
31 | |
32 _Polarity = {"1": "LIFWCMVY", "2": "CPNVEQIL", "3": "KMHFRYW"} | |
33 # '1'stand for (4.9-6.2); '2'stand for (8.0-9.2), '3' stand for (10.4-13.0) | |
34 | |
35 _Charge = {"1": "KR", "2": "ANCQGHILMFPSTWYV", "3": "DE"} | |
36 # '1'stand for Positive; '2'stand for Neutral, '3' stand for Negative | |
37 | |
38 _SecondaryStr = {"1": "EALMQKRH", "2": "VIYCWFT", "3": "GNPSD"} | |
39 # '1'stand for Helix; '2'stand for Strand, '3' stand for coil | |
40 | |
41 _SolventAccessibility = {"1": "ALFCGIVW", "2": "RKQEND", "3": "MPSTHY"} | |
42 # '1'stand for Buried; '2'stand for Exposed, '3' stand for Intermediate | |
43 | |
44 _Polarizability = {"1": "GASDT", "2": "CPNVEQIL", "3": "KMHFRYW"} | |
45 # '1'stand for (0-0.108); '2'stand for (0.128-0.186), '3' stand for (0.219-0.409) | |
46 | |
47 | |
48 # You can continuely add other properties of AADs to compute descriptors of | |
49 # protein sequence. | |
50 | |
51 _AATProperty = ( | |
52 _Hydrophobicity, | |
53 _NormalizedVDWV, | |
54 _Polarity, | |
55 _Charge, | |
56 _SecondaryStr, | |
57 _SolventAccessibility, | |
58 _Polarizability, | |
59 ) | |
60 | |
61 _AATPropertyName = ( | |
62 "_Hydrophobicity", | |
63 "_NormalizedVDWV", | |
64 "_Polarity", | |
65 "_Charge", | |
66 "_SecondaryStr", | |
67 "_SolventAccessibility", | |
68 "_Polarizability", | |
69 ) | |
70 | |
71 | |
72 def StringtoNum(ProteinSequence: str, AAProperty: Dict[Any, Any]) -> str: | |
73 hardProteinSequence = copy.deepcopy(ProteinSequence) | |
74 for k, m in list(AAProperty.items()): | |
75 for index in m: | |
76 hardProteinSequence = hardProteinSequence.replace(index, k) | |
77 TProteinSequence = hardProteinSequence | |
78 | |
79 return TProteinSequence | |
80 | |
81 | |
82 def CalculateComposition( | |
83 ProteinSequence: str, AAProperty: Dict[Any, Any], AAPName: str) -> Dict[Any, Any]: | |
84 TProteinSequence = StringtoNum(ProteinSequence, AAProperty) | |
85 result = {} | |
86 num = len(TProteinSequence) | |
87 result[AAPName + "C" + "1"] = round(float(TProteinSequence.count("1")) / num, 3) | |
88 result[AAPName + "C" + "2"] = round(float(TProteinSequence.count("2")) / num, 3) | |
89 result[AAPName + "C" + "3"] = round(float(TProteinSequence.count("3")) / num, 3) | |
90 return result | |
91 | |
92 | |
93 def CalculateTransition( | |
94 ProteinSequence: str, AAProperty: Dict[Any, Any], AAPName: str | |
95 ) -> Dict[Any, Any]: | |
96 TProteinSequence = StringtoNum(ProteinSequence, AAProperty) | |
97 Result = {} | |
98 num = len(TProteinSequence) | |
99 CTD = TProteinSequence | |
100 Result[AAPName + "T" + "12"] = round( | |
101 float(CTD.count("12") + CTD.count("21")) / (num - 1), 3 | |
102 ) | |
103 Result[AAPName + "T" + "13"] = round( | |
104 float(CTD.count("13") + CTD.count("31")) / (num - 1), 3 | |
105 ) | |
106 Result[AAPName + "T" + "23"] = round( | |
107 float(CTD.count("23") + CTD.count("32")) / (num - 1), 3 | |
108 ) | |
109 return Result | |
110 | |
111 | |
112 def CalculateDistribution( | |
113 ProteinSequence: str, AAProperty: Dict[Any, Any], AAPName: str | |
114 ) -> Dict[Any, Any]: | |
115 TProteinSequence = StringtoNum(ProteinSequence, AAProperty) | |
116 Result: Dict[str, float] = {} | |
117 Num = len(TProteinSequence) | |
118 for i in ("1", "2", "3"): | |
119 num = TProteinSequence.count(i) | |
120 ink = 1 | |
121 indexk = 0 | |
122 cds = [] | |
123 while ink <= num: | |
124 indexk = TProteinSequence.find(i, indexk) + 1 | |
125 cds.append(indexk) | |
126 ink = ink + 1 | |
127 | |
128 if cds == []: | |
129 Result[AAPName + "D" + i + "001"] = 0 | |
130 Result[AAPName + "D" + i + "025"] = 0 | |
131 Result[AAPName + "D" + i + "050"] = 0 | |
132 Result[AAPName + "D" + i + "075"] = 0 | |
133 Result[AAPName + "D" + i + "100"] = 0 | |
134 else: | |
135 Result[AAPName + "D" + i + "001"] = round(float(cds[0]) / Num * 100, 3) | |
136 Result[AAPName + "D" + i + "025"] = round( | |
137 float(cds[int(math.floor(num * 0.25)) - 1]) / Num * 100, 3 | |
138 ) | |
139 Result[AAPName + "D" + i + "050"] = round( | |
140 float(cds[int(math.floor(num * 0.5)) - 1]) / Num * 100, 3 | |
141 ) | |
142 Result[AAPName + "D" + i + "075"] = round( | |
143 float(cds[int(math.floor(num * 0.75)) - 1]) / Num * 100, 3 | |
144 ) | |
145 Result[AAPName + "D" + i + "100"] = round(float(cds[-1]) / Num * 100, 3) | |
146 | |
147 return Result | |
148 | |
149 | |
150 def CalculateCompositionHydrophobicity(ProteinSequence: str): | |
151 return CalculateComposition(ProteinSequence, _Hydrophobicity, "_Hydrophobicity") | |
152 | |
153 | |
154 def CalculateCompositionNormalizedVDWV(ProteinSequence: str): | |
155 return CalculateComposition(ProteinSequence, _NormalizedVDWV, "_NormalizedVDWV") | |
156 | |
157 | |
158 def CalculateCompositionPolarity(ProteinSequence: str): | |
159 return CalculateComposition(ProteinSequence, _Polarity, "_Polarity") | |
160 | |
161 | |
162 def CalculateCompositionCharge(ProteinSequence: str) -> Dict[Any, Any]: | |
163 return CalculateComposition(ProteinSequence, _Charge, "_Charge") | |
164 | |
165 | |
166 def CalculateCompositionSecondaryStr(ProteinSequence: str) -> Dict[Any, Any]: | |
167 return CalculateComposition(ProteinSequence, _SecondaryStr, "_SecondaryStr") | |
168 | |
169 | |
170 def CalculateCompositionSolventAccessibility(ProteinSequence: str) -> Dict[Any, Any]: | |
171 return CalculateComposition( | |
172 ProteinSequence, _SolventAccessibility, "_SolventAccessibility" | |
173 ) | |
174 | |
175 | |
176 def CalculateCompositionPolarizability(ProteinSequence: str) -> Dict[Any, Any]: | |
177 return CalculateComposition(ProteinSequence, _Polarizability, "_Polarizability") | |
178 | |
179 | |
180 def CalculateTransitionHydrophobicity(ProteinSequence: str) -> Dict[Any, Any]: | |
181 result = CalculateTransition(ProteinSequence, _Hydrophobicity, "_Hydrophobicity") | |
182 return result | |
183 | |
184 | |
185 def CalculateTransitionNormalizedVDWV(ProteinSequence: str) -> Dict[Any, Any]: | |
186 result = CalculateTransition(ProteinSequence, _NormalizedVDWV, "_NormalizedVDWV") | |
187 return result | |
188 | |
189 | |
190 def CalculateTransitionPolarity(ProteinSequence: str) -> Dict[Any, Any]: | |
191 result = CalculateTransition(ProteinSequence, _Polarity, "_Polarity") | |
192 return result | |
193 | |
194 | |
195 def CalculateTransitionCharge(ProteinSequence: str) -> Dict[Any, Any]: | |
196 result = CalculateTransition(ProteinSequence, _Charge, "_Charge") | |
197 return result | |
198 | |
199 | |
200 def CalculateTransitionSecondaryStr(ProteinSequence: str) -> Dict[Any, Any]: | |
201 result = CalculateTransition(ProteinSequence, _SecondaryStr, "_SecondaryStr") | |
202 return result | |
203 | |
204 | |
205 def CalculateTransitionSolventAccessibility(ProteinSequence: str) -> Dict[Any, Any]: | |
206 result = CalculateTransition( | |
207 ProteinSequence, _SolventAccessibility, "_SolventAccessibility" | |
208 ) | |
209 return result | |
210 | |
211 | |
212 def CalculateTransitionPolarizability(ProteinSequence: str) -> Dict[Any, Any]: | |
213 result = CalculateTransition(ProteinSequence, _Polarizability, "_Polarizability") | |
214 return result | |
215 | |
216 | |
217 def CalculateDistributionHydrophobicity(ProteinSequence: str) -> Dict[Any, Any]: | |
218 result = CalculateDistribution(ProteinSequence, _Hydrophobicity, "_Hydrophobicity") | |
219 return result | |
220 | |
221 | |
222 def CalculateDistributionNormalizedVDWV(ProteinSequence: str) -> Dict[Any, Any]: | |
223 result = CalculateDistribution(ProteinSequence, _NormalizedVDWV, "_NormalizedVDWV") | |
224 return result | |
225 | |
226 | |
227 def CalculateDistributionPolarity(ProteinSequence: str) -> Dict[Any, Any]: | |
228 result = CalculateDistribution(ProteinSequence, _Polarity, "_Polarity") | |
229 return result | |
230 | |
231 | |
232 def CalculateDistributionCharge(ProteinSequence: str) -> Dict[Any, Any]: | |
233 result = CalculateDistribution(ProteinSequence, _Charge, "_Charge") | |
234 return result | |
235 | |
236 | |
237 def CalculateDistributionSecondaryStr(ProteinSequence: str) -> Dict[Any, Any]: | |
238 result = CalculateDistribution(ProteinSequence, _SecondaryStr, "_SecondaryStr") | |
239 return result | |
240 | |
241 | |
242 def CalculateDistributionSolventAccessibility(ProteinSequence: str) -> Dict[Any, Any]: | |
243 result = CalculateDistribution( | |
244 ProteinSequence, _SolventAccessibility, "_SolventAccessibility" | |
245 ) | |
246 return result | |
247 | |
248 | |
249 def CalculateDistributionPolarizability(ProteinSequence: str) -> Dict[Any, Any]: | |
250 result = CalculateDistribution(ProteinSequence, _Polarizability, "_Polarizability") | |
251 return result | |
252 | |
253 | |
254 def CalculateC(ProteinSequence: str) -> Dict[Any, Any]: | |
255 result: Dict[Any, Any] = {} | |
256 result.update(CalculateCompositionPolarizability(ProteinSequence)) | |
257 result.update(CalculateCompositionSolventAccessibility(ProteinSequence)) | |
258 result.update(CalculateCompositionSecondaryStr(ProteinSequence)) | |
259 result.update(CalculateCompositionCharge(ProteinSequence)) | |
260 result.update(CalculateCompositionPolarity(ProteinSequence)) | |
261 result.update(CalculateCompositionNormalizedVDWV(ProteinSequence)) | |
262 result.update(CalculateCompositionHydrophobicity(ProteinSequence)) | |
263 return result | |
264 | |
265 | |
266 def CalculateT(ProteinSequence: str) -> Dict[Any, Any]: | |
267 result: Dict[Any, Any] = {} | |
268 result.update(CalculateTransitionPolarizability(ProteinSequence)) | |
269 result.update(CalculateTransitionSolventAccessibility(ProteinSequence)) | |
270 result.update(CalculateTransitionSecondaryStr(ProteinSequence)) | |
271 result.update(CalculateTransitionCharge(ProteinSequence)) | |
272 result.update(CalculateTransitionPolarity(ProteinSequence)) | |
273 result.update(CalculateTransitionNormalizedVDWV(ProteinSequence)) | |
274 result.update(CalculateTransitionHydrophobicity(ProteinSequence)) | |
275 return result | |
276 | |
277 | |
278 def CalculateD(ProteinSequence: str) -> Dict[Any, Any]: | |
279 result: Dict[Any, Any] = {} | |
280 result.update(CalculateDistributionPolarizability(ProteinSequence)) | |
281 result.update(CalculateDistributionSolventAccessibility(ProteinSequence)) | |
282 result.update(CalculateDistributionSecondaryStr(ProteinSequence)) | |
283 result.update(CalculateDistributionCharge(ProteinSequence)) | |
284 result.update(CalculateDistributionPolarity(ProteinSequence)) | |
285 result.update(CalculateDistributionNormalizedVDWV(ProteinSequence)) | |
286 result.update(CalculateDistributionHydrophobicity(ProteinSequence)) | |
287 return result | |
288 | |
289 | |
290 def CalculateCTD(ProteinSequence: str) -> Dict[Any, Any]: | |
291 result: Dict[Any, Any] = {} | |
292 result.update(CalculateCompositionPolarizability(ProteinSequence)) | |
293 result.update(CalculateCompositionSolventAccessibility(ProteinSequence)) | |
294 result.update(CalculateCompositionSecondaryStr(ProteinSequence)) | |
295 result.update(CalculateCompositionCharge(ProteinSequence)) | |
296 result.update(CalculateCompositionPolarity(ProteinSequence)) | |
297 result.update(CalculateCompositionNormalizedVDWV(ProteinSequence)) | |
298 result.update(CalculateCompositionHydrophobicity(ProteinSequence)) | |
299 result.update(CalculateTransitionPolarizability(ProteinSequence)) | |
300 result.update(CalculateTransitionSolventAccessibility(ProteinSequence)) | |
301 result.update(CalculateTransitionSecondaryStr(ProteinSequence)) | |
302 result.update(CalculateTransitionCharge(ProteinSequence)) | |
303 result.update(CalculateTransitionPolarity(ProteinSequence)) | |
304 result.update(CalculateTransitionNormalizedVDWV(ProteinSequence)) | |
305 result.update(CalculateTransitionHydrophobicity(ProteinSequence)) | |
306 result.update(CalculateDistributionPolarizability(ProteinSequence)) | |
307 result.update(CalculateDistributionSolventAccessibility(ProteinSequence)) | |
308 result.update(CalculateDistributionSecondaryStr(ProteinSequence)) | |
309 result.update(CalculateDistributionCharge(ProteinSequence)) | |
310 result.update(CalculateDistributionPolarity(ProteinSequence)) | |
311 result.update(CalculateDistributionNormalizedVDWV(ProteinSequence)) | |
312 result.update(CalculateDistributionHydrophobicity(ProteinSequence)) | |
313 return result | |
314 | |
315 | |
316 | |
317 | |
318 | |
319 | |
320 | |
321 | |
322 | |
323 | |
324 |