Mercurial > repos > jay > pdaug_sequence_similarity_network
comparison PDAUG_Sequence_Property_Based_Descriptors/PDAUG_Sequence_Property_Based_Descriptors.py @ 0:e650de82bcc7 draft
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
author | jay |
---|---|
date | Wed, 28 Oct 2020 01:50:00 +0000 |
parents | |
children | 908b8ce0326c |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e650de82bcc7 |
---|---|
1 import pandas as pd | |
2 from pydpi.pypro import PyPro | |
3 import os | |
4 | |
5 | |
6 def BinaryDescriptor(seq): | |
7 | |
8 BinaryCode = { | |
9 | |
10 'A':"1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0", | |
11 'C':"0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0", | |
12 'D':"0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0", | |
13 'E':"0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0", | |
14 'F':"0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0", | |
15 'G':"0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0", | |
16 'H':"0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0", | |
17 'I':"0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0", | |
18 'K':"0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0", | |
19 'L':"0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0", | |
20 'M':"0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0", | |
21 'N':"0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0", | |
22 'P':"0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0", | |
23 'Q':"0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0", | |
24 'R':"0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0", | |
25 'S':"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0", | |
26 'T':"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0", | |
27 'V':"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0", | |
28 'W':"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0", | |
29 'Y':"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1", | |
30 } | |
31 | |
32 lines = [] | |
33 Df = [] | |
34 | |
35 | |
36 for s in seq: | |
37 des = [] | |
38 for n in s: | |
39 des.append(BinaryCode[n.upper()]) | |
40 lines.append(','.join(des).split(',')) | |
41 | |
42 df = pd.DataFrame(lines) | |
43 | |
44 return df | |
45 | |
46 def Decriptor_generator(InFile, Lamda, Weight, DesType, Out_file): | |
47 | |
48 list_pep_name = [] | |
49 f = open(InFile) | |
50 lines = f.readlines() | |
51 | |
52 for line in lines: | |
53 if ">" in line: | |
54 pass | |
55 else: | |
56 list_pep_name.append(line.strip('\n')) | |
57 | |
58 out_df = pd.DataFrame() | |
59 | |
60 for seq in list_pep_name: | |
61 | |
62 protein = PyPro() | |
63 protein.ReadProteinSequence(seq) | |
64 | |
65 if DesType == 'PAAC': | |
66 DS = protein.GetPAAC(lamda=int(Lamda), weight=float(Weight)) | |
67 df = pd.DataFrame(DS, index=[0]) | |
68 elif DesType == 'APAAC': | |
69 DS = protein.GetAPAAC(lamda=int(Lamda), weight=float(Weight)) | |
70 df = pd.DataFrame(DS, index=[0]) | |
71 elif DesType == 'CTD': | |
72 DS = protein.GetCTD() | |
73 df = pd.DataFrame(DS, index=[0]) | |
74 elif DesType == 'DPComp': | |
75 DS = protein.GetDPComp() | |
76 df = pd.DataFrame(DS, index=[0]) | |
77 elif DesType == 'GearyAuto': | |
78 DS = protein.GetGearyAuto() | |
79 df = pd.DataFrame(DS, index=[0]) | |
80 elif DesType == 'MoranAuto': | |
81 DS = protein.GetMoranAuto() | |
82 df = pd.DataFrame(DS, index=[0]) | |
83 elif DesType == 'MoreauBrotoAuto': | |
84 DS = protein.GetMoreauBrotoAuto() | |
85 df = pd.DataFrame(DS, index=[0]) | |
86 elif DesType == 'QSO': | |
87 DS = protein.GetQSO() | |
88 df = pd.DataFrame(DS, index=[0]) | |
89 elif DesType == 'SOCN': | |
90 DS = protein.GetSOCN() | |
91 df = pd.DataFrame(DS, index=[0]) | |
92 elif DesType == 'TPComp': | |
93 DS = protein.GetTPComp() | |
94 df = pd.DataFrame(DS, index=[0]) | |
95 elif DesType == 'All': | |
96 DS_1 = protein.GetPAAC(lamda=int(Lamda), weight=float(Weight)) | |
97 DS_2 = protein.GetAPAAC(lamda=int(Lamda), weight=float(Weight)) | |
98 DS_3 = protein.GetCTD() | |
99 DS_4 = protein.GetDPComp() | |
100 DS_5 = protein.GetGearyAuto() | |
101 DS_6 = protein.GetMoranAuto() | |
102 DS_7 = protein.GetMoreauBrotoAuto() | |
103 DS_8 = protein.GetQSO() | |
104 DS_9 = protein.GetSOCN() | |
105 DS_10 = protein.GetTPComp() | |
106 | |
107 DS = {} | |
108 | |
109 for D in (DS_1, DS_2, DS_3, DS_4, DS_5, DS_6, DS_7, DS_8, DS_9, DS_10): | |
110 DS.update(D) | |
111 | |
112 df = pd.DataFrame(DS, index=[0]) | |
113 | |
114 if DesType == 'BinaryDescriptor': | |
115 out_df = BinaryDescriptor(list_pep_name) | |
116 else: | |
117 out_df = pd.concat([out_df, df], axis=0) | |
118 | |
119 | |
120 out_df.to_csv(Out_file, index=False, sep='\t') | |
121 | |
122 | |
123 if __name__=="__main__": | |
124 | |
125 | |
126 import argparse | |
127 | |
128 parser = argparse.ArgumentParser() | |
129 | |
130 parser.add_argument("-I", "--InFile", | |
131 required=True, | |
132 default=None, | |
133 help="pep file") | |
134 | |
135 parser.add_argument("-l", "--Lamda", | |
136 required=False, | |
137 default=50, | |
138 help="pep file") | |
139 | |
140 parser.add_argument("-w", "--Weight", | |
141 required=False, | |
142 default=0.5, | |
143 help="pep file") | |
144 | |
145 parser.add_argument("-t", "--DesType", | |
146 required=True, | |
147 default=None, | |
148 help="out put file name for str Descriptors") | |
149 | |
150 parser.add_argument("-O", "--Out_file", | |
151 required=False, | |
152 default="Out.tsv", | |
153 help="Path to target tsv file") | |
154 | |
155 args = parser.parse_args() | |
156 Decriptor_generator(args.InFile, args.Lamda, args.Weight, args.DesType, args.Out_file) | |
157 | |
158 |