comparison PDAUG_Sequence_Property_Based_Descriptors/PDAUG_Sequence_Property_Based_Descriptors.py @ 0:7557b48b2872 draft

"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
author jay
date Wed, 28 Oct 2020 02:10:12 +0000
parents
children d7e684975db3
comparison
equal deleted inserted replaced
-1:000000000000 0:7557b48b2872
1 import pandas as pd
2 from pydpi.pypro import PyPro
3 import os
4
5
6 def BinaryDescriptor(seq):
7
8 BinaryCode = {
9
10 'A':"1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0",
11 'C':"0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0",
12 'D':"0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0",
13 'E':"0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0",
14 'F':"0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0",
15 'G':"0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0",
16 'H':"0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0",
17 'I':"0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0",
18 'K':"0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0",
19 'L':"0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0",
20 'M':"0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0",
21 'N':"0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0",
22 'P':"0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0",
23 'Q':"0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0",
24 'R':"0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0",
25 'S':"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0",
26 'T':"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0",
27 'V':"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0",
28 'W':"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0",
29 'Y':"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1",
30 }
31
32 lines = []
33 Df = []
34
35
36 for s in seq:
37 des = []
38 for n in s:
39 des.append(BinaryCode[n.upper()])
40 lines.append(','.join(des).split(','))
41
42 df = pd.DataFrame(lines)
43
44 return df
45
46 def Decriptor_generator(InFile, Lamda, Weight, DesType, Out_file):
47
48 list_pep_name = []
49 f = open(InFile)
50 lines = f.readlines()
51
52 for line in lines:
53 if ">" in line:
54 pass
55 else:
56 list_pep_name.append(line.strip('\n'))
57
58 out_df = pd.DataFrame()
59
60 for seq in list_pep_name:
61
62 protein = PyPro()
63 protein.ReadProteinSequence(seq)
64
65 if DesType == 'PAAC':
66 DS = protein.GetPAAC(lamda=int(Lamda), weight=float(Weight))
67 df = pd.DataFrame(DS, index=[0])
68 elif DesType == 'APAAC':
69 DS = protein.GetAPAAC(lamda=int(Lamda), weight=float(Weight))
70 df = pd.DataFrame(DS, index=[0])
71 elif DesType == 'CTD':
72 DS = protein.GetCTD()
73 df = pd.DataFrame(DS, index=[0])
74 elif DesType == 'DPComp':
75 DS = protein.GetDPComp()
76 df = pd.DataFrame(DS, index=[0])
77 elif DesType == 'GearyAuto':
78 DS = protein.GetGearyAuto()
79 df = pd.DataFrame(DS, index=[0])
80 elif DesType == 'MoranAuto':
81 DS = protein.GetMoranAuto()
82 df = pd.DataFrame(DS, index=[0])
83 elif DesType == 'MoreauBrotoAuto':
84 DS = protein.GetMoreauBrotoAuto()
85 df = pd.DataFrame(DS, index=[0])
86 elif DesType == 'QSO':
87 DS = protein.GetQSO()
88 df = pd.DataFrame(DS, index=[0])
89 elif DesType == 'SOCN':
90 DS = protein.GetSOCN()
91 df = pd.DataFrame(DS, index=[0])
92 elif DesType == 'TPComp':
93 DS = protein.GetTPComp()
94 df = pd.DataFrame(DS, index=[0])
95 elif DesType == 'All':
96 DS_1 = protein.GetPAAC(lamda=int(Lamda), weight=float(Weight))
97 DS_2 = protein.GetAPAAC(lamda=int(Lamda), weight=float(Weight))
98 DS_3 = protein.GetCTD()
99 DS_4 = protein.GetDPComp()
100 DS_5 = protein.GetGearyAuto()
101 DS_6 = protein.GetMoranAuto()
102 DS_7 = protein.GetMoreauBrotoAuto()
103 DS_8 = protein.GetQSO()
104 DS_9 = protein.GetSOCN()
105 DS_10 = protein.GetTPComp()
106
107 DS = {}
108
109 for D in (DS_1, DS_2, DS_3, DS_4, DS_5, DS_6, DS_7, DS_8, DS_9, DS_10):
110 DS.update(D)
111
112 df = pd.DataFrame(DS, index=[0])
113
114 if DesType == 'BinaryDescriptor':
115 out_df = BinaryDescriptor(list_pep_name)
116 else:
117 out_df = pd.concat([out_df, df], axis=0)
118
119
120 out_df.to_csv(Out_file, index=False, sep='\t')
121
122
123 if __name__=="__main__":
124
125
126 import argparse
127
128 parser = argparse.ArgumentParser()
129
130 parser.add_argument("-I", "--InFile",
131 required=True,
132 default=None,
133 help="pep file")
134
135 parser.add_argument("-l", "--Lamda",
136 required=False,
137 default=50,
138 help="pep file")
139
140 parser.add_argument("-w", "--Weight",
141 required=False,
142 default=0.5,
143 help="pep file")
144
145 parser.add_argument("-t", "--DesType",
146 required=True,
147 default=None,
148 help="out put file name for str Descriptors")
149
150 parser.add_argument("-O", "--Out_file",
151 required=False,
152 default="Out.tsv",
153 help="Path to target tsv file")
154
155 args = parser.parse_args()
156 Decriptor_generator(args.InFile, args.Lamda, args.Weight, args.DesType, args.Out_file)
157
158