Mercurial > repos > jose_duarte > phagedpo
comparison local_AAComposition.py @ 26:52e50de4c005 draft
Uploaded
| author | jose_duarte |
|---|---|
| date | Sun, 12 Dec 2021 10:49:43 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 25:ce0de724097a | 26:52e50de4c005 |
|---|---|
| 1 | |
| 2 # -*- coding: utf-8 -*- | |
| 3 """ | |
| 4 The module is used for computing the composition of amino acids, dipetide and | |
| 5 3-mers (tri-peptide) for a given protein sequence. | |
| 6 References | |
| 7 ---------- | |
| 8 .. [1] Reczko, M. and Bohr, H. (1994) The DEF data base of sequence based protein | |
| 9 fold class predictions. Nucleic Acids Res, 22, 3616-3619. | |
| 10 .. [2] Hua, S. and Sun, Z. (2001) Support vector machine approach for protein | |
| 11 subcellular localization prediction. Bioinformatics, 17, 721-728. | |
| 12 .. [3] Grassmann, J., Reczko, M., Suhai, S. and Edler, L. (1999) Protein fold | |
| 13 class prediction: new methods of statistical classification. Proc Int Conf | |
| 14 Intell Syst Mol Biol, 106-112. | |
| 15 Authors: Dongsheng Cao and Yizeng Liang. | |
| 16 Date: 2012.3.27 | |
| 17 Email: oriental-cds@163.com | |
| 18 """ | |
| 19 | |
| 20 # Core Library | |
| 21 import re | |
| 22 from typing import Any, Dict, List | |
| 23 | |
| 24 AALetter: List[str] = list("ARNDCEQGHILKMFPSTWYV") | |
| 25 | |
| 26 ProteinSequence_docstring = """ProteinSequence: str | |
| 27 a pure protein sequence""" | |
| 28 | |
| 29 | |
| 30 def CalculateAAComposition(ProteinSequence: str) -> Dict[str, float]: | |
| 31 sequence_length = len(ProteinSequence) | |
| 32 result: Dict[str, float] = {} | |
| 33 for i in AALetter: | |
| 34 result[i] = round(float(ProteinSequence.count(i)) / sequence_length * 100, 3) | |
| 35 return result | |
| 36 | |
| 37 | |
| 38 def CalculateDipeptideComposition(ProteinSequence: str) -> Dict[str, float]: | |
| 39 sequence_length = len(ProteinSequence) | |
| 40 result = {} | |
| 41 for i in AALetter: | |
| 42 for j in AALetter: | |
| 43 dipeptide = i + j | |
| 44 result[dipeptide] = round( | |
| 45 float(ProteinSequence.count(dipeptide)) / (sequence_length - 1) * 100, 2 | |
| 46 ) | |
| 47 return result | |
| 48 | |
| 49 | |
| 50 def Getkmers() -> List[str]: | |
| 51 kmers = [] | |
| 52 for i in AALetter: | |
| 53 for j in AALetter: | |
| 54 for k in AALetter: | |
| 55 kmers.append(i + j + k) | |
| 56 return kmers | |
| 57 | |
| 58 | |
| 59 def GetSpectrumDict(proteinsequence: str) -> Dict[str, int]: | |
| 60 result = {} | |
| 61 kmers = Getkmers() | |
| 62 for i in kmers: | |
| 63 result[i] = len(re.findall(i, proteinsequence)) | |
| 64 return result | |
| 65 | |
| 66 | |
| 67 def CalculateAADipeptideComposition(ProteinSequence: str) -> Dict[str, float]: | |
| 68 result: Dict[Any, Any] = {} | |
| 69 result.update(CalculateAAComposition(ProteinSequence)) | |
| 70 result.update(CalculateDipeptideComposition(ProteinSequence)) | |
| 71 result.update(GetSpectrumDict(ProteinSequence)) | |
| 72 return result | |
| 73 | |
| 74 | |
| 75 | |
| 76 | |
| 77 | |
| 78 |
