1 import pandas as pd 
2 from pydpi.pypro import PyPro 
3 import os 
4 
5 
6 def BinaryDescriptor(seq): 
7 
8 BinaryCode = { 
9 
10 'A':"1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0", 
11 'C':"0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0", 
12 'D':"0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0", 
13 'E':"0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0", 
14 'F':"0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0", 
15 'G':"0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0", 
16 'H':"0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0", 
17 'I':"0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0", 
18 'K':"0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0", 
19 'L':"0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0", 
20 'M':"0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0", 
21 'N':"0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0", 
22 'P':"0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0", 
23 'Q':"0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0", 
24 'R':"0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0", 
25 'S':"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0", 
26 'T':"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0", 
27 'V':"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0", 
28 'W':"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0", 
29 'Y':"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1", 
30 } 
31 
32 lines = [] 
33 Df = [] 
34 
35 
36 for s in seq: 
37 des = [] 
38 for n in s: 
39 des.append(BinaryCode[n.upper()]) 
40 lines.append(','.join(des).split(',')) 
41 
42 df = pd.DataFrame(lines) 
43 
44 return df 
45 
46 def Decriptor_generator(InFile, Lamda, Weight, DesType, Out_file): 
47 
48 list_pep_name = [] 
49 f = open(InFile) 
50 lines = f.readlines() 
51 
52 for line in lines: 
53 if ">" in line: 
54 pass 
55 else: 
56 list_pep_name.append(line.strip('\n')) 
57 
58 out_df = pd.DataFrame() 
59 
60 for seq in list_pep_name: 
61 
62 protein = PyPro() 
63 protein.ReadProteinSequence(seq) 
64 
65 if DesType == 'PAAC': 
66 DS = protein.GetPAAC(lamda=int(Lamda), weight=float(Weight)) 
67 df = pd.DataFrame(DS, index=[0]) 
68 elif DesType == 'APAAC': 
69 DS = protein.GetAPAAC(lamda=int(Lamda), weight=float(Weight)) 
70 df = pd.DataFrame(DS, index=[0]) 
71 elif DesType == 'CTD': 
72 DS = protein.GetCTD() 
73 df = pd.DataFrame(DS, index=[0]) 
74 elif DesType == 'DPComp': 
75 DS = protein.GetDPComp() 
76 df = pd.DataFrame(DS, index=[0]) 
77 elif DesType == 'GearyAuto': 
78 DS = protein.GetGearyAuto() 
79 df = pd.DataFrame(DS, index=[0]) 
80 elif DesType == 'MoranAuto': 
81 DS = protein.GetMoranAuto() 
82 df = pd.DataFrame(DS, index=[0]) 
83 elif DesType == 'MoreauBrotoAuto': 
84 DS = protein.GetMoreauBrotoAuto() 
85 df = pd.DataFrame(DS, index=[0]) 
86 elif DesType == 'QSO': 
87 DS = protein.GetQSO() 
88 df = pd.DataFrame(DS, index=[0]) 
89 elif DesType == 'SOCN': 
90 DS = protein.GetSOCN() 
91 df = pd.DataFrame(DS, index=[0]) 
92 elif DesType == 'TPComp': 
93 DS = protein.GetTPComp() 
94 df = pd.DataFrame(DS, index=[0]) 
95 elif DesType == 'All': 
96 DS_1 = protein.GetPAAC(lamda=int(Lamda), weight=float(Weight)) 
97 DS_2 = protein.GetAPAAC(lamda=int(Lamda), weight=float(Weight)) 
98 DS_3 = protein.GetCTD() 
99 DS_4 = protein.GetDPComp() 
100 DS_5 = protein.GetGearyAuto() 
101 DS_6 = protein.GetMoranAuto() 
102 DS_7 = protein.GetMoreauBrotoAuto() 
103 DS_8 = protein.GetQSO() 
104 DS_9 = protein.GetSOCN() 
105 DS_10 = protein.GetTPComp() 
106 
107 DS = {} 
108 
109 for D in (DS_1, DS_2, DS_3, DS_4, DS_5, DS_6, DS_7, DS_8, DS_9, DS_10): 
110 DS.update(D) 
111 
112 df = pd.DataFrame(DS, index=[0]) 
113 
114 if DesType == 'BinaryDescriptor': 
115 out_df = BinaryDescriptor(list_pep_name) 
116 else: 
117 out_df = pd.concat([out_df, df], axis=0) 
118 
119 
120 out_df.to_csv(Out_file, index=False, sep='\t') 
121 
122 
123 if __name__=="__main__": 
124 
125 
126 import argparse 
127 
128 parser = argparse.ArgumentParser() 
129 
130 parser.add_argument("I", "InFile", 
131 required=True, 
132 default=None, 
133 help="pep file") 
134 
135 parser.add_argument("l", "Lamda", 
136 required=False, 
137 default=50, 
138 help="pep file") 
139 
140 parser.add_argument("w", "Weight", 
141 required=False, 
142 default=0.5, 
143 help="pep file") 
144 
145 parser.add_argument("t", "DesType", 
146 required=True, 
147 default=None, 
148 help="out put file name for str Descriptors") 
149 
150 parser.add_argument("O", "Out_file", 
151 required=False, 
152 default="Out.tsv", 
153 help="Path to target tsv file") 
154 
155 args = parser.parse_args() 
156 Decriptor_generator(args.InFile, args.Lamda, args.Weight, args.DesType, args.Out_file) 
157 
158 