comparison kinatestid_r/Kinatest-R.R @ 14:da1012f014bd draft

Uploaded
author jfb
date Thu, 08 Feb 2018 15:29:56 -0500
parents
children 15b5d4ae4480
comparison
equal deleted inserted replaced
13:d71eb1d66a88 14:da1012f014bd
1
2 ImportedSubstrateList<- read.csv("input1", stringsAsFactors=FALSE)
3 NegativeSubstrateList<- read.csv("input2", stringsAsFactors=FALSE)
4 SubstrateBackgroundFrequency<- read.csv("input3", stringsAsFactors=FALSE)
5
6 ScreenerFilename<-"screener"
7
8
9
10 FILENAME<-"output1.csv"
11 FILENAME2<-"output2.csv"
12 FILENAME3<-"output3.csv"
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37 OutputMatrix<-"KinaseMatrix.csv"
38 CharacterizationTable<-"CharacterizationTableForThisKinase.csv"
39 SDtable<-"SDtableforthisKinase"
40 SiteSelectivityTable<-"SiteSelectivityForThisKinase"
41
42
43
44 substrates<-matrix(rep("A",times=((nrow(ImportedSubstrateList)-1)*15)),ncol = 15)
45 #SeqsToBeScored<-"asdasd"
46
47 for (i in 2:nrow(ImportedSubstrateList))
48 {
49 substratemotif<-ImportedSubstrateList[i,4:18]
50 substratemotif[8]<-"Y"
51 #substratemotif<-paste(substratemotif,sep = "",collapse = "")
52 j=i-1
53 substratemotif<-unlist(substratemotif)
54 substrates[j,1:15]<-substratemotif
55 }
56
57 # SpacesToOs<-c(""="O",)
58 # substrates<-SpacesToOs[substrates]
59
60 SubstrateBackgroundFrequency[nrow(SubstrateBackgroundFrequency),2]
61
62 if(2==2){
63 Amean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),2]), na.rm=TRUE)
64 Cmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),3]), na.rm=TRUE)
65 Dmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),4]), na.rm=TRUE)
66 Emean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),5]), na.rm=TRUE)
67 Fmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),6]), na.rm=TRUE)
68 Gmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),7]), na.rm=TRUE)
69 Hmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),8]), na.rm=TRUE)
70 Imean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),9]), na.rm=TRUE)
71 Kmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),10]), na.rm=TRUE)
72 Lmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),11]), na.rm=TRUE)
73 Mmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),12]), na.rm=TRUE)
74 Nmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),13]), na.rm=TRUE)
75 Pmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),14]), na.rm=TRUE)
76 Qmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),15]), na.rm=TRUE)
77 Rmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),16]), na.rm=TRUE)
78 Smean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),17]), na.rm=TRUE)
79 Tmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),18]), na.rm=TRUE)
80 Vmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),19]), na.rm=TRUE)
81 Wmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),20]), na.rm=TRUE)
82 Ymean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),21]), na.rm=TRUE)
83
84 AllMeans<-c(Amean,Cmean,Dmean,Emean,Fmean,Gmean,Hmean,Imean,Kmean,Lmean,Mmean,Nmean,Pmean,Qmean,Rmean,Smean,Tmean,Vmean,Wmean,Ymean)
85
86 Asd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),2]), na.rm=TRUE)
87 Csd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),3]), na.rm=TRUE)
88 Dsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),4]), na.rm=TRUE)
89 Esd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),5]), na.rm=TRUE)
90 Fsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),6]), na.rm=TRUE)
91 Gsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),7]), na.rm=TRUE)
92 Hsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),8]), na.rm=TRUE)
93 Isd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),9]), na.rm=TRUE)
94 Ksd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),10]), na.rm=TRUE)
95 Lsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),11]), na.rm=TRUE)
96 Msd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),12]), na.rm=TRUE)
97 Nsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),13]), na.rm=TRUE)
98 Psd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),14]), na.rm=TRUE)
99 Qsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),15]), na.rm=TRUE)
100 Rsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),16]), na.rm=TRUE)
101 Ssd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),17]), na.rm=TRUE)
102 Tsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),18]), na.rm=TRUE)
103 Vsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),19]), na.rm=TRUE)
104 Wsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),20]), na.rm=TRUE)
105 Ysd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),21]), na.rm=TRUE)
106 }
107 AllSDs<-c(Asd,Csd,Dsd,Esd,Fsd,Gsd,Hsd,Isd,Ksd,Lsd,Msd,Nsd,Psd,Qsd,Rsd,Ssd,Tsd,Vsd,Wsd,Ysd)
108 #this is subbackfreq SDs
109
110 SBF_statisticalvalues<-cbind(AllMeans,AllSDs)
111
112 #create the percent table
113 if (1==1){
114 Column1<-substrates[,1]
115 Column2<-substrates[,2]
116 Column3<-substrates[,3]
117 Column4<-substrates[,4]
118 Column5<-substrates[,5]
119 Column6<-substrates[,6]
120 Column7<-substrates[,7]
121 Column8<-substrates[,8]
122 Column9<-substrates[,9]
123 Column10<-substrates[,10]
124 Column11<-substrates[,11]
125 Column12<-substrates[,12]
126 Column13<-substrates[,13]
127 Column14<-substrates[,14]
128 Column15<-substrates[,15]
129
130 spaces1<-sum((Column1%in% ""))
131 spaces2<-sum(Column2%in% "")
132 spaces3<-sum(Column3%in% "")
133 spaces4<-sum(Column4%in% "")
134 spaces5<-sum(Column5%in% "")
135 spaces6<-sum(Column6%in% "")
136 spaces7<-sum(Column7%in% "")
137 spaces8<-sum(Column8%in% "")
138 spaces9<-sum(Column9%in% "")
139 spaces10<-sum(Column10%in% "")
140 spaces11<-sum(Column11%in% "")
141 spaces12<-sum(Column12%in% "")
142 spaces13<-sum(Column13%in% "")
143 spaces14<-sum(Column14%in% "")
144 spaces15<-sum(Column15%in% "")
145
146 A1<-sum(Column1 %in% "A")/(length(Column1)-spaces1)
147 A2<-sum(Column2 %in% "A")/(length(Column2)-spaces2)
148 A3<-sum(Column3 %in% "A")/(length(Column3)-spaces3)
149 A4<-sum(Column4 %in% "A")/(length(Column4)-spaces4)
150 A5<-sum(Column5 %in% "A")/(length(Column5)-spaces5)
151 A6<-sum(Column6 %in% "A")/(length(Column6)-spaces6)
152 A7<-sum(Column7 %in% "A")/(length(Column7)-spaces7)
153 A8<-sum(Column8 %in% "A")/(length(Column8)-spaces8)
154 A9<-sum(Column9 %in% "A")/(length(Column9)-spaces9)
155 A10<-sum(Column10 %in% "A")/(length(Column10)-spaces10)
156 A11<-sum(Column11 %in% "A")/(length(Column11)-spaces11)
157 A12<-sum(Column12 %in% "A")/(length(Column12)-spaces12)
158 A13<-sum(Column13 %in% "A")/(length(Column13)-spaces13)
159 A14<-sum(Column14 %in% "A")/(length(Column14)-spaces14)
160 A15<-sum(Column15 %in% "A")/(length(Column15)-spaces15)
161 AllAs<-cbind(A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15)
162
163 C1<-sum(Column1 %in% "C")/(length(Column1)-spaces1)
164 C2<-sum(Column2 %in% "C")/(length(Column2)-spaces2)
165 C3<-sum(Column3 %in% "C")/(length(Column3)-spaces3)
166 C4<-sum(Column4 %in% "C")/(length(Column4)-spaces4)
167 C5<-sum(Column5 %in% "C")/(length(Column5)-spaces5)
168 C6<-sum(Column6 %in% "C")/(length(Column6)-spaces6)
169 C7<-sum(Column7 %in% "C")/(length(Column7)-spaces7)
170 C8<-sum(Column8 %in% "C")/(length(Column8)-spaces8)
171 C9<-sum(Column9 %in% "C")/(length(Column9)-spaces9)
172 C10<-sum(Column10 %in% "C")/(length(Column10)-spaces10)
173 C11<-sum(Column11 %in% "C")/(length(Column11)-spaces11)
174 C12<-sum(Column12 %in% "C")/(length(Column12)-spaces12)
175 C13<-sum(Column13 %in% "C")/(length(Column13)-spaces13)
176 C14<-sum(Column14 %in% "C")/(length(Column14)-spaces14)
177 C15<-sum(Column15 %in% "C")/(length(Column15)-spaces15)
178 CllCs<-cbind(C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15)
179
180 D1<-sum(Column1 %in% "D")/(length(Column1)-spaces1)
181 D2<-sum(Column2 %in% "D")/(length(Column2)-spaces2)
182 D3<-sum(Column3 %in% "D")/(length(Column3)-spaces3)
183 D4<-sum(Column4 %in% "D")/(length(Column4)-spaces4)
184 D5<-sum(Column5 %in% "D")/(length(Column5)-spaces5)
185 D6<-sum(Column6 %in% "D")/(length(Column6)-spaces6)
186 D7<-sum(Column7 %in% "D")/(length(Column7)-spaces7)
187 D8<-sum(Column8 %in% "D")/(length(Column8)-spaces8)
188 D9<-sum(Column9 %in% "D")/(length(Column9)-spaces9)
189 D10<-sum(Column10 %in% "D")/(length(Column10)-spaces10)
190 D11<-sum(Column11 %in% "D")/(length(Column11)-spaces11)
191 D12<-sum(Column12 %in% "D")/(length(Column12)-spaces12)
192 D13<-sum(Column13 %in% "D")/(length(Column13)-spaces13)
193 D14<-sum(Column14 %in% "D")/(length(Column14)-spaces14)
194 D15<-sum(Column15 %in% "D")/(length(Column15)-spaces15)
195 DllDs<-cbind(D1,D2,D3,D4,D5,D6,D7,D8,D9,D10,D11,D12,D13,D14,D15)
196
197 E1<-sum(Column1 %in% "E")/(length(Column1)-spaces1)
198 E2<-sum(Column2 %in% "E")/(length(Column2)-spaces2)
199 E3<-sum(Column3 %in% "E")/(length(Column3)-spaces3)
200 E4<-sum(Column4 %in% "E")/(length(Column4)-spaces4)
201 E5<-sum(Column5 %in% "E")/(length(Column5)-spaces5)
202 E6<-sum(Column6 %in% "E")/(length(Column6)-spaces6)
203 E7<-sum(Column7 %in% "E")/(length(Column7)-spaces7)
204 E8<-sum(Column8 %in% "E")/(length(Column8)-spaces8)
205 E9<-sum(Column9 %in% "E")/(length(Column9)-spaces9)
206 E10<-sum(Column10 %in% "E")/(length(Column10)-spaces10)
207 E11<-sum(Column11 %in% "E")/(length(Column11)-spaces11)
208 E12<-sum(Column12 %in% "E")/(length(Column12)-spaces12)
209 E13<-sum(Column13 %in% "E")/(length(Column13)-spaces13)
210 E14<-sum(Column14 %in% "E")/(length(Column14)-spaces14)
211 E15<-sum(Column15 %in% "E")/(length(Column15)-spaces15)
212 EllEs<-cbind(E1,E2,E3,E4,E5,E6,E7,E8,E9,E10,E11,E12,E13,E14,E15)
213
214
215 F1<-sum(Column1 %in% "F")/(length(Column1)-spaces1)
216 F2<-sum(Column2 %in% "F")/(length(Column2)-spaces2)
217 F3<-sum(Column3 %in% "F")/(length(Column3)-spaces3)
218 F4<-sum(Column4 %in% "F")/(length(Column4)-spaces4)
219 F5<-sum(Column5 %in% "F")/(length(Column5)-spaces5)
220 F6<-sum(Column6 %in% "F")/(length(Column6)-spaces6)
221 F7<-sum(Column7 %in% "F")/(length(Column7)-spaces7)
222 F8<-sum(Column8 %in% "F")/(length(Column8)-spaces8)
223 F9<-sum(Column9 %in% "F")/(length(Column9)-spaces9)
224 F10<-sum(Column10 %in% "F")/(length(Column10)-spaces10)
225 F11<-sum(Column11 %in% "F")/(length(Column11)-spaces11)
226 F12<-sum(Column12 %in% "F")/(length(Column12)-spaces12)
227 F13<-sum(Column13 %in% "F")/(length(Column13)-spaces13)
228 F14<-sum(Column14 %in% "F")/(length(Column14)-spaces14)
229 F15<-sum(Column15 %in% "F")/(length(Column15)-spaces15)
230 FllFs<-cbind(F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15)
231
232
233 G1<-sum(Column1 %in% "G")/(length(Column1)-spaces1)
234 G2<-sum(Column2 %in% "G")/(length(Column2)-spaces2)
235 G3<-sum(Column3 %in% "G")/(length(Column3)-spaces3)
236 G4<-sum(Column4 %in% "G")/(length(Column4)-spaces4)
237 G5<-sum(Column5 %in% "G")/(length(Column5)-spaces5)
238 G6<-sum(Column6 %in% "G")/(length(Column6)-spaces6)
239 G7<-sum(Column7 %in% "G")/(length(Column7)-spaces7)
240 G8<-sum(Column8 %in% "G")/(length(Column8)-spaces8)
241 G9<-sum(Column9 %in% "G")/(length(Column9)-spaces9)
242 G10<-sum(Column10 %in% "G")/(length(Column10)-spaces10)
243 G11<-sum(Column11 %in% "G")/(length(Column11)-spaces11)
244 G12<-sum(Column12 %in% "G")/(length(Column12)-spaces12)
245 G13<-sum(Column13 %in% "G")/(length(Column13)-spaces13)
246 G14<-sum(Column14 %in% "G")/(length(Column14)-spaces14)
247 G15<-sum(Column15 %in% "G")/(length(Column15)-spaces15)
248 GllGs<-cbind(G1,G2,G3,G4,G5,G6,G7,G8,G9,G10,G11,G12,G13,G14,G15)
249
250
251 H1<-sum(Column1 %in% "H")/(length(Column1)-spaces1)
252 H2<-sum(Column2 %in% "H")/(length(Column2)-spaces2)
253 H3<-sum(Column3 %in% "H")/(length(Column3)-spaces3)
254 H4<-sum(Column4 %in% "H")/(length(Column4)-spaces4)
255 H5<-sum(Column5 %in% "H")/(length(Column5)-spaces5)
256 H6<-sum(Column6 %in% "H")/(length(Column6)-spaces6)
257 H7<-sum(Column7 %in% "H")/(length(Column7)-spaces7)
258 H8<-sum(Column8 %in% "H")/(length(Column8)-spaces8)
259 H9<-sum(Column9 %in% "H")/(length(Column9)-spaces9)
260 H10<-sum(Column10 %in% "H")/(length(Column10)-spaces10)
261 H11<-sum(Column11 %in% "H")/(length(Column11)-spaces11)
262 H12<-sum(Column12 %in% "H")/(length(Column12)-spaces12)
263 H13<-sum(Column13 %in% "H")/(length(Column13)-spaces13)
264 H14<-sum(Column14 %in% "H")/(length(Column14)-spaces14)
265 H15<-sum(Column15 %in% "H")/(length(Column15)-spaces15)
266 HllHs<-cbind(H1,H2,H3,H4,H5,H6,H7,H8,H9,H10,H11,H12,H13,H14,H15)
267
268
269 I1<-sum(Column1 %in% "I")/(length(Column1)-spaces1)
270 I2<-sum(Column2 %in% "I")/(length(Column2)-spaces2)
271 I3<-sum(Column3 %in% "I")/(length(Column3)-spaces3)
272 I4<-sum(Column4 %in% "I")/(length(Column4)-spaces4)
273 I5<-sum(Column5 %in% "I")/(length(Column5)-spaces5)
274 I6<-sum(Column6 %in% "I")/(length(Column6)-spaces6)
275 I7<-sum(Column7 %in% "I")/(length(Column7)-spaces7)
276 I8<-sum(Column8 %in% "I")/(length(Column8)-spaces8)
277 I9<-sum(Column9 %in% "I")/(length(Column9)-spaces9)
278 I10<-sum(Column10 %in% "I")/(length(Column10)-spaces10)
279 I11<-sum(Column11 %in% "I")/(length(Column11)-spaces11)
280 I12<-sum(Column12 %in% "I")/(length(Column12)-spaces12)
281 I13<-sum(Column13 %in% "I")/(length(Column13)-spaces13)
282 I14<-sum(Column14 %in% "I")/(length(Column14)-spaces14)
283 I15<-sum(Column15 %in% "I")/(length(Column15)-spaces15)
284 IllIs<-cbind(I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,I11,I12,I13,I14,I15)
285
286
287 K1<-sum(Column1 %in% "K")/(length(Column1)-spaces1)
288 K2<-sum(Column2 %in% "K")/(length(Column2)-spaces2)
289 K3<-sum(Column3 %in% "K")/(length(Column3)-spaces3)
290 K4<-sum(Column4 %in% "K")/(length(Column4)-spaces4)
291 K5<-sum(Column5 %in% "K")/(length(Column5)-spaces5)
292 K6<-sum(Column6 %in% "K")/(length(Column6)-spaces6)
293 K7<-sum(Column7 %in% "K")/(length(Column7)-spaces7)
294 K8<-sum(Column8 %in% "K")/(length(Column8)-spaces8)
295 K9<-sum(Column9 %in% "K")/(length(Column9)-spaces9)
296 K10<-sum(Column10 %in% "K")/(length(Column10)-spaces10)
297 K11<-sum(Column11 %in% "K")/(length(Column11)-spaces11)
298 K12<-sum(Column12 %in% "K")/(length(Column12)-spaces12)
299 K13<-sum(Column13 %in% "K")/(length(Column13)-spaces13)
300 K14<-sum(Column14 %in% "K")/(length(Column14)-spaces14)
301 K15<-sum(Column15 %in% "K")/(length(Column15)-spaces15)
302 KllKs<-cbind(K1,K2,K3,K4,K5,K6,K7,K8,K9,K10,K11,K12,K13,K14,K15)
303
304
305 L1<-sum(Column1 %in% "L")/(length(Column1)-spaces1)
306 L2<-sum(Column2 %in% "L")/(length(Column2)-spaces2)
307 L3<-sum(Column3 %in% "L")/(length(Column3)-spaces3)
308 L4<-sum(Column4 %in% "L")/(length(Column4)-spaces4)
309 L5<-sum(Column5 %in% "L")/(length(Column5)-spaces5)
310 L6<-sum(Column6 %in% "L")/(length(Column6)-spaces6)
311 L7<-sum(Column7 %in% "L")/(length(Column7)-spaces7)
312 L8<-sum(Column8 %in% "L")/(length(Column8)-spaces8)
313 L9<-sum(Column9 %in% "L")/(length(Column9)-spaces9)
314 L10<-sum(Column10 %in% "L")/(length(Column10)-spaces10)
315 L11<-sum(Column11 %in% "L")/(length(Column11)-spaces11)
316 L12<-sum(Column12 %in% "L")/(length(Column12)-spaces12)
317 L13<-sum(Column13 %in% "L")/(length(Column13)-spaces13)
318 L14<-sum(Column14 %in% "L")/(length(Column14)-spaces14)
319 L15<-sum(Column15 %in% "L")/(length(Column15)-spaces15)
320 LllLs<-cbind(L1,L2,L3,L4,L5,L6,L7,L8,L9,L10,L11,L12,L13,L14,L15)
321
322
323 M1<-sum(Column1 %in% "M")/(length(Column1)-spaces1)
324 M2<-sum(Column2 %in% "M")/(length(Column2)-spaces2)
325 M3<-sum(Column3 %in% "M")/(length(Column3)-spaces3)
326 M4<-sum(Column4 %in% "M")/(length(Column4)-spaces4)
327 M5<-sum(Column5 %in% "M")/(length(Column5)-spaces5)
328 M6<-sum(Column6 %in% "M")/(length(Column6)-spaces6)
329 M7<-sum(Column7 %in% "M")/(length(Column7)-spaces7)
330 M8<-sum(Column8 %in% "M")/(length(Column8)-spaces8)
331 M9<-sum(Column9 %in% "M")/(length(Column9)-spaces9)
332 M10<-sum(Column10 %in% "M")/(length(Column10)-spaces10)
333 M11<-sum(Column11 %in% "M")/(length(Column11)-spaces11)
334 M12<-sum(Column12 %in% "M")/(length(Column12)-spaces12)
335 M13<-sum(Column13 %in% "M")/(length(Column13)-spaces13)
336 M14<-sum(Column14 %in% "M")/(length(Column14)-spaces14)
337 M15<-sum(Column15 %in% "M")/(length(Column15)-spaces15)
338 MllMs<-cbind(M1,M2,M3,M4,M5,M6,M7,M8,M9,M10,M11,M12,M13,M14,M15)
339
340
341 N1<-sum(Column1 %in% "N")/(length(Column1)-spaces1)
342 N2<-sum(Column2 %in% "N")/(length(Column2)-spaces2)
343 N3<-sum(Column3 %in% "N")/(length(Column3)-spaces3)
344 N4<-sum(Column4 %in% "N")/(length(Column4)-spaces4)
345 N5<-sum(Column5 %in% "N")/(length(Column5)-spaces5)
346 N6<-sum(Column6 %in% "N")/(length(Column6)-spaces6)
347 N7<-sum(Column7 %in% "N")/(length(Column7)-spaces7)
348 N8<-sum(Column8 %in% "N")/(length(Column8)-spaces8)
349 N9<-sum(Column9 %in% "N")/(length(Column9)-spaces9)
350 N10<-sum(Column10 %in% "N")/(length(Column10)-spaces10)
351 N11<-sum(Column11 %in% "N")/(length(Column11)-spaces11)
352 N12<-sum(Column12 %in% "N")/(length(Column12)-spaces12)
353 N13<-sum(Column13 %in% "N")/(length(Column13)-spaces13)
354 N14<-sum(Column14 %in% "N")/(length(Column14)-spaces14)
355 N15<-sum(Column15 %in% "N")/(length(Column15)-spaces15)
356 NllNs<-cbind(N1,N2,N3,N4,N5,N6,N7,N8,N9,N10,N11,N12,N13,N14,N15)
357
358
359 P1<-sum(Column1 %in% "P")/(length(Column1)-spaces1)
360 P2<-sum(Column2 %in% "P")/(length(Column2)-spaces2)
361 P3<-sum(Column3 %in% "P")/(length(Column3)-spaces3)
362 P4<-sum(Column4 %in% "P")/(length(Column4)-spaces4)
363 P5<-sum(Column5 %in% "P")/(length(Column5)-spaces5)
364 P6<-sum(Column6 %in% "P")/(length(Column6)-spaces6)
365 P7<-sum(Column7 %in% "P")/(length(Column7)-spaces7)
366 P8<-sum(Column8 %in% "P")/(length(Column8)-spaces8)
367 P9<-sum(Column9 %in% "P")/(length(Column9)-spaces9)
368 P10<-sum(Column10 %in% "P")/(length(Column10)-spaces10)
369 P11<-sum(Column11 %in% "P")/(length(Column11)-spaces11)
370 P12<-sum(Column12 %in% "P")/(length(Column12)-spaces12)
371 P13<-sum(Column13 %in% "P")/(length(Column13)-spaces13)
372 P14<-sum(Column14 %in% "P")/(length(Column14)-spaces14)
373 P15<-sum(Column15 %in% "P")/(length(Column15)-spaces15)
374 PllPs<-cbind(P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11,P12,P13,P14,P15)
375
376
377 Q1<-sum(Column1 %in% "Q")/(length(Column1)-spaces1)
378 Q2<-sum(Column2 %in% "Q")/(length(Column2)-spaces2)
379 Q3<-sum(Column3 %in% "Q")/(length(Column3)-spaces3)
380 Q4<-sum(Column4 %in% "Q")/(length(Column4)-spaces4)
381 Q5<-sum(Column5 %in% "Q")/(length(Column5)-spaces5)
382 Q6<-sum(Column6 %in% "Q")/(length(Column6)-spaces6)
383 Q7<-sum(Column7 %in% "Q")/(length(Column7)-spaces7)
384 Q8<-sum(Column8 %in% "Q")/(length(Column8)-spaces8)
385 Q9<-sum(Column9 %in% "Q")/(length(Column9)-spaces9)
386 Q10<-sum(Column10 %in% "Q")/(length(Column10)-spaces10)
387 Q11<-sum(Column11 %in% "Q")/(length(Column11)-spaces11)
388 Q12<-sum(Column12 %in% "Q")/(length(Column12)-spaces12)
389 Q13<-sum(Column13 %in% "Q")/(length(Column13)-spaces13)
390 Q14<-sum(Column14 %in% "Q")/(length(Column14)-spaces14)
391 Q15<-sum(Column15 %in% "Q")/(length(Column15)-spaces15)
392 QllQs<-cbind(Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,Q11,Q12,Q13,Q14,Q15)
393
394
395 R1<-sum(Column1 %in% "R")/(length(Column1)-spaces1)
396 R2<-sum(Column2 %in% "R")/(length(Column2)-spaces2)
397 R3<-sum(Column3 %in% "R")/(length(Column3)-spaces3)
398 R4<-sum(Column4 %in% "R")/(length(Column4)-spaces4)
399 R5<-sum(Column5 %in% "R")/(length(Column5)-spaces5)
400 R6<-sum(Column6 %in% "R")/(length(Column6)-spaces6)
401 R7<-sum(Column7 %in% "R")/(length(Column7)-spaces7)
402 R8<-sum(Column8 %in% "R")/(length(Column8)-spaces8)
403 R9<-sum(Column9 %in% "R")/(length(Column9)-spaces9)
404 R10<-sum(Column10 %in% "R")/(length(Column10)-spaces10)
405 R11<-sum(Column11 %in% "R")/(length(Column11)-spaces11)
406 R12<-sum(Column12 %in% "R")/(length(Column12)-spaces12)
407 R13<-sum(Column13 %in% "R")/(length(Column13)-spaces13)
408 R14<-sum(Column14 %in% "R")/(length(Column14)-spaces14)
409 R15<-sum(Column15 %in% "R")/(length(Column15)-spaces15)
410 RllRs<-cbind(R1,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,R13,R14,R15)
411
412
413 S1<-sum(Column1 %in% "S")/(length(Column1)-spaces1)
414 S2<-sum(Column2 %in% "S")/(length(Column2)-spaces2)
415 S3<-sum(Column3 %in% "S")/(length(Column3)-spaces3)
416 S4<-sum(Column4 %in% "S")/(length(Column4)-spaces4)
417 S5<-sum(Column5 %in% "S")/(length(Column5)-spaces5)
418 S6<-sum(Column6 %in% "S")/(length(Column6)-spaces6)
419 S7<-sum(Column7 %in% "S")/(length(Column7)-spaces7)
420 S8<-sum(Column8 %in% "S")/(length(Column8)-spaces8)
421 S9<-sum(Column9 %in% "S")/(length(Column9)-spaces9)
422 S10<-sum(Column10 %in% "S")/(length(Column10)-spaces10)
423 S11<-sum(Column11 %in% "S")/(length(Column11)-spaces11)
424 S12<-sum(Column12 %in% "S")/(length(Column12)-spaces12)
425 S13<-sum(Column13 %in% "S")/(length(Column13)-spaces13)
426 S14<-sum(Column14 %in% "S")/(length(Column14)-spaces14)
427 S15<-sum(Column15 %in% "S")/(length(Column15)-spaces15)
428 SllSs<-cbind(S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,S14,S15)
429
430
431 T1<-sum(Column1 %in% "T")/(length(Column1)-spaces1)
432 T2<-sum(Column2 %in% "T")/(length(Column2)-spaces2)
433 T3<-sum(Column3 %in% "T")/(length(Column3)-spaces3)
434 T4<-sum(Column4 %in% "T")/(length(Column4)-spaces4)
435 T5<-sum(Column5 %in% "T")/(length(Column5)-spaces5)
436 T6<-sum(Column6 %in% "T")/(length(Column6)-spaces6)
437 T7<-sum(Column7 %in% "T")/(length(Column7)-spaces7)
438 T8<-sum(Column8 %in% "T")/(length(Column8)-spaces8)
439 T9<-sum(Column9 %in% "T")/(length(Column9)-spaces9)
440 T10<-sum(Column10 %in% "T")/(length(Column10)-spaces10)
441 T11<-sum(Column11 %in% "T")/(length(Column11)-spaces11)
442 T12<-sum(Column12 %in% "T")/(length(Column12)-spaces12)
443 T13<-sum(Column13 %in% "T")/(length(Column13)-spaces13)
444 T14<-sum(Column14 %in% "T")/(length(Column14)-spaces14)
445 T15<-sum(Column15 %in% "T")/(length(Column15)-spaces15)
446 TllTs<-cbind(T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15)
447
448
449 V1<-sum(Column1 %in% "V")/(length(Column1)-spaces1)
450 V2<-sum(Column2 %in% "V")/(length(Column2)-spaces2)
451 V3<-sum(Column3 %in% "V")/(length(Column3)-spaces3)
452 V4<-sum(Column4 %in% "V")/(length(Column4)-spaces4)
453 V5<-sum(Column5 %in% "V")/(length(Column5)-spaces5)
454 V6<-sum(Column6 %in% "V")/(length(Column6)-spaces6)
455 V7<-sum(Column7 %in% "V")/(length(Column7)-spaces7)
456 V8<-sum(Column8 %in% "V")/(length(Column8)-spaces8)
457 V9<-sum(Column9 %in% "V")/(length(Column9)-spaces9)
458 V10<-sum(Column10 %in% "V")/(length(Column10)-spaces10)
459 V11<-sum(Column11 %in% "V")/(length(Column11)-spaces11)
460 V12<-sum(Column12 %in% "V")/(length(Column12)-spaces12)
461 V13<-sum(Column13 %in% "V")/(length(Column13)-spaces13)
462 V14<-sum(Column14 %in% "V")/(length(Column14)-spaces14)
463 V15<-sum(Column15 %in% "V")/(length(Column15)-spaces15)
464 VllVs<-cbind(V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15)
465
466
467 W1<-sum(Column1 %in% "W")/(length(Column1)-spaces1)
468 W2<-sum(Column2 %in% "W")/(length(Column2)-spaces2)
469 W3<-sum(Column3 %in% "W")/(length(Column3)-spaces3)
470 W4<-sum(Column4 %in% "W")/(length(Column4)-spaces4)
471 W5<-sum(Column5 %in% "W")/(length(Column5)-spaces5)
472 W6<-sum(Column6 %in% "W")/(length(Column6)-spaces6)
473 W7<-sum(Column7 %in% "W")/(length(Column7)-spaces7)
474 W8<-sum(Column8 %in% "W")/(length(Column8)-spaces8)
475 W9<-sum(Column9 %in% "W")/(length(Column9)-spaces9)
476 W10<-sum(Column10 %in% "W")/(length(Column10)-spaces10)
477 W11<-sum(Column11 %in% "W")/(length(Column11)-spaces11)
478 W12<-sum(Column12 %in% "W")/(length(Column12)-spaces12)
479 W13<-sum(Column13 %in% "W")/(length(Column13)-spaces13)
480 W14<-sum(Column14 %in% "W")/(length(Column14)-spaces14)
481 W15<-sum(Column15 %in% "W")/(length(Column15)-spaces15)
482 WllWs<-cbind(W1,W2,W3,W4,W5,W6,W7,W8,W9,W10,W11,W12,W13,W14,W15)
483
484
485 Y1<-sum(Column1 %in% "Y")/(length(Column1)-spaces1)
486 Y2<-sum(Column2 %in% "Y")/(length(Column2)-spaces2)
487 Y3<-sum(Column3 %in% "Y")/(length(Column3)-spaces3)
488 Y4<-sum(Column4 %in% "Y")/(length(Column4)-spaces4)
489 Y5<-sum(Column5 %in% "Y")/(length(Column5)-spaces5)
490 Y6<-sum(Column6 %in% "Y")/(length(Column6)-spaces6)
491 Y7<-sum(Column7 %in% "Y")/(length(Column7)-spaces7)
492 Y8<-sum(Column8 %in% "Y")/(length(Column8)-spaces8)
493 Y9<-sum(Column9 %in% "Y")/(length(Column9)-spaces9)
494 Y10<-sum(Column10 %in% "Y")/(length(Column10)-spaces10)
495 Y11<-sum(Column11 %in% "Y")/(length(Column11)-spaces11)
496 Y12<-sum(Column12 %in% "Y")/(length(Column12)-spaces12)
497 Y13<-sum(Column13 %in% "Y")/(length(Column13)-spaces13)
498 Y14<-sum(Column14 %in% "Y")/(length(Column14)-spaces14)
499 Y15<-sum(Column15 %in% "Y")/(length(Column15)-spaces15)
500 YllYs<-cbind(Y1,Y2,Y3,Y4,Y5,Y6,Y7,Y8,Y9,Y10,Y11,Y12,Y13,Y14,Y15)
501 }
502 #this is substrate percents
503
504 #A C D E F G H I K L N P Q R S T V W Y
505
506 PercentTable<-rbind(AllAs,CllCs,DllDs,EllEs,FllFs,GllGs,HllHs,IllIs,KllKs,LllLs,MllMs,NllNs,PllPs,QllQs,RllRs,SllSs,TllTs,VllVs,WllWs,YllYs)
507 PercentTable<-PercentTable*100
508
509 #create the SD table
510 SDtable<-matrix(data = rep(1,times=(nrow(PercentTable)*ncol(PercentTable))),nrow = nrow(PercentTable),ncol = ncol(PercentTable))
511 #for every row, a percertage minus the same mean over the same SD
512 if(1==1){
513 SDtable[1,]<-(PercentTable[1,]-Amean)/Asd
514 SDtable[2,]<-(PercentTable[2,]-Cmean)/Csd
515 SDtable[3,]<-(PercentTable[3,]-Dmean)/Dsd
516 SDtable[4,]<-(PercentTable[4,]-Emean)/Esd
517 SDtable[5,]<-(PercentTable[5,]-Fmean)/Fsd
518 SDtable[6,]<-(PercentTable[6,]-Gmean)/Gsd
519 SDtable[7,]<-(PercentTable[7,]-Hmean)/Hsd
520 SDtable[8,]<-(PercentTable[8,]-Imean)/Isd
521 SDtable[9,]<-(PercentTable[9,]-Kmean)/Ksd
522 SDtable[10,]<-(PercentTable[10,]-Lmean)/Lsd
523 SDtable[11,]<-(PercentTable[11,]-Mmean)/Msd
524 SDtable[12,]<-(PercentTable[12,]-Nmean)/Nsd
525 SDtable[13,]<-(PercentTable[13,]-Pmean)/Psd
526 SDtable[14,]<-(PercentTable[14,]-Qmean)/Qsd
527 SDtable[15,]<-(PercentTable[15,]-Rmean)/Rsd
528 SDtable[16,]<-(PercentTable[16,]-Smean)/Ssd
529 SDtable[17,]<-(PercentTable[17,]-Tmean)/Tsd
530 SDtable[18,]<-(PercentTable[18,]-Vmean)/Vsd
531 SDtable[19,]<-(PercentTable[19,]-Wmean)/Wsd
532 SDtable[20,]<-(PercentTable[20,]-Ymean)/Ysd
533 }
534
535
536 SetOfAAs<-c("Letter","A","C","D","E","F","G","H","I","K","L","M","N","P","Q","R","S","T","V","W","Y")
537
538
539 SumOfSigmaAAs<-c(1:15)
540
541 for (i in 1:15){
542 SumOfSigmasValue<-0
543 for (j in 1:20){
544 value<-0
545 if (SDtable[j,i]>2){
546 value<-sum(substrates[,i]==SetOfAAs[j])
547 }
548 SumOfSigmasValue<-SumOfSigmasValue+value
549 }
550 SumOfSigmaAAs[i]<-SumOfSigmasValue
551 }
552
553 # AAs1<-length(substrates[,1])-sum(substrates[,1]=="")
554 # AAs2<-length(substrates[,2])-sum(substrates[,2]=="")
555 # AAs3<-length(substrates[,3])-sum(substrates[,3]=="")
556 # AAs4<-length(substrates[,4])-sum(substrates[,4]=="")
557 # AAs5<-length(substrates[,5])-sum(substrates[,5]=="")
558 # AAs6<-length(substrates[,6])-sum(substrates[,6]=="")
559 # AAs7<-length(substrates[,7])-sum(substrates[,7]=="")
560 # AAs8<-length(substrates[,8])-sum(substrates[,8]=="")
561 # AAs9<-length(substrates[,9])-sum(substrates[,9]=="")
562 #
563 #
564 #
565 # #AAsAtPositions<-c(AAs1,AAs2,AAs3,AAs4,AAs5,AAs6,AAs7,AAs8,AAs9)
566 # AAsAtPositions<-c(length(substrates[,1]),length(substrates[,2]),length(substrates[,3]),length(substrates[,4]),
567 # length(substrates[,5]),length(substrates[,6]),length(substrates[,7]),length(substrates[,8]),
568 # length(substrates[,9]))
569
570 SumOfExpectedSigmaAAs<-c(1:15)
571 for (i in 1:15){
572 ExpectedValue<-0
573 for (j in 1:20){
574 value<-0
575 if (SDtable[j,i]>2){
576 value<-AllMeans[j]
577 }
578 ExpectedValue<-ExpectedValue+value
579 }
580 SumOfExpectedSigmaAAs[i]<-ExpectedValue*(length(substrates[,i])-sum(substrates[,i]%in% ""))/100
581 }
582
583 SelectivityRow<-SumOfSigmaAAs/SumOfExpectedSigmaAAs
584 SelectivitySheet<-rbind(SumOfSigmaAAs,SumOfExpectedSigmaAAs,SelectivityRow)
585
586 SetOfAAs<-matrix(data = SetOfAAs,ncol = 1)
587
588 SDtableu<-SDtable
589 HeaderSD<-c(-7:7)
590 SDtable<-rbind(HeaderSD,SDtableu)
591 row.names(SDtable)<-NULL
592 SDtable<-data.frame(SetOfAAs,SDtable)
593
594 PercentTable<-rbind(HeaderSD,PercentTable)
595 row.names(PercentTable)<-NULL
596 PercentTable<-data.frame(SetOfAAs,PercentTable)
597 numberofY<-as.numeric(SubstrateBackgroundFrequency$Number.of.Y)
598 numberofY<-numberofY[!is.na(numberofY)]
599
600 numberofPY<-as.numeric(SubstrateBackgroundFrequency$Number.of.pY)
601 numberofPY<-numberofPY[!is.na(numberofPY)]
602
603 NormalizationScore<-sum(numberofPY)/sum(numberofY)
604
605 # positions<-matrix(data = NA, nrow=20,ncol = 15)
606 #
607 # #column1
608 #
609 # for (q in 1:15) {
610 # sA<-sum(substrates[,i]=="A")
611 # positions[1,i]<-sA
612 # sC<-sum(substrates[,i]=="C")
613 # positions[2,i]<-sC
614 # sD<-sum(substrates[,i]=="D")
615 # positions[3,i]<-sD
616 # sE<-sum(substrates[,i]=="E")
617 # positions[4,i]<-sE
618 # sF<-sum(substrates[,i]=="F")
619 # sG<-sum(substrates[,i]=="G")
620 # sH<-sum(substrates[,i]=="H")
621 # sI<-sum(substrates[,i]=="I")
622 # sK<-sum(substrates[,i]=="K")
623 # sL<-sum(substrates[,i]=="L")
624 # sM<-sum(substrates[,i]=="M")
625 # sN<-sum(substrates[,i]=="N")
626 # sP<-sum(substrates[,i]=="P")
627 # sQ<-sum(substrates[,i]=="Q")
628 # sR<-sum(substrates[,i]=="R")
629 # sS<-sum(substrates[,i]=="S")
630 # sT<-sum(substrates[,i]=="T")
631 # sV<-sum(substrates[,i]=="V")
632 # sW<-sum(substrates[,i]=="W")
633 # sY<-sum(substrates[,i]=="Y")
634 # positions[5,i]<-sF
635 # positions[6,i]<-sG
636 # positions[7,i]<-sH
637 # positions[8,i]<-sI
638 # positions[9,i]<-sK
639 # positions[10,i]<-sL
640 # positions[11,i]<-sM
641 # positions[12,i]<-sN
642 # positions[13,i]<-sP
643 # positions[14,i]<-sQ
644 # positions[15,i]<-sR
645 # positions[16,i]<-sS
646 # positions[17,i]<-sT
647 # positions[18,i]<-sV
648 # positions[19,i]<-sW
649 # positions[20,i]<-sY
650 # }
651
652 #here I create the positions table which is needed for the endogenous prob matrix and is simply "how many times did an AA show up in this spot?
653 if (6==6){
654 Column1<-substrates[,1]
655 Column2<-substrates[,2]
656 Column3<-substrates[,3]
657 Column4<-substrates[,4]
658 Column5<-substrates[,5]
659 Column6<-substrates[,6]
660 Column7<-substrates[,7]
661 Column8<-substrates[,8]
662 Column9<-substrates[,9]
663 Column10<-substrates[,10]
664 Column11<-substrates[,11]
665 Column12<-substrates[,12]
666 Column13<-substrates[,13]
667 Column14<-substrates[,14]
668 Column15<-substrates[,15]
669
670 spaces1<-sum((Column1%in% ""))
671 spaces2<-sum(Column2%in% "")
672 spaces3<-sum(Column3%in% "")
673 spaces4<-sum(Column4%in% "")
674 spaces5<-sum(Column5%in% "")
675 spaces6<-sum(Column6%in% "")
676 spaces7<-sum(Column7%in% "")
677 spaces8<-sum(Column8%in% "")
678 spaces9<-sum(Column9%in% "")
679 spaces10<-sum(Column10%in% "")
680 spaces11<-sum(Column11%in% "")
681 spaces12<-sum(Column12%in% "")
682 spaces13<-sum(Column13%in% "")
683 spaces14<-sum(Column14%in% "")
684 spaces15<-sum(Column15%in% "")
685
686 A1<-sum(Column1 %in% "A")
687 A2<-sum(Column2 %in% "A")
688 A3<-sum(Column3 %in% "A")
689 A4<-sum(Column4 %in% "A")
690 A5<-sum(Column5 %in% "A")
691 A6<-sum(Column6 %in% "A")
692 A7<-sum(Column7 %in% "A")
693 A8<-sum(Column8 %in% "A")
694 A9<-sum(Column9 %in% "A")
695 A10<-sum(Column10 %in% "A")
696 A11<-sum(Column11 %in% "A")
697 A12<-sum(Column12 %in% "A")
698 A13<-sum(Column13 %in% "A")
699 A14<-sum(Column14 %in% "A")
700 A15<-sum(Column15 %in% "A")
701 AllAs<-cbind(A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15)
702
703 C1<-sum(Column1 %in% "C")
704 C2<-sum(Column2 %in% "C")
705 C3<-sum(Column3 %in% "C")
706 C4<-sum(Column4 %in% "C")
707 C5<-sum(Column5 %in% "C")
708 C6<-sum(Column6 %in% "C")
709 C7<-sum(Column7 %in% "C")
710 C8<-sum(Column8 %in% "C")
711 C9<-sum(Column9 %in% "C")
712 C10<-sum(Column10 %in% "C")
713 C11<-sum(Column11 %in% "C")
714 C12<-sum(Column12 %in% "C")
715 C13<-sum(Column13 %in% "C")
716 C14<-sum(Column14 %in% "C")
717 C15<-sum(Column15 %in% "C")
718 CllCs<-cbind(C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15)
719
720 D1<-sum(Column1 %in% "D")
721 D2<-sum(Column2 %in% "D")
722 D3<-sum(Column3 %in% "D")
723 D4<-sum(Column4 %in% "D")
724 D5<-sum(Column5 %in% "D")
725 D6<-sum(Column6 %in% "D")
726 D7<-sum(Column7 %in% "D")
727 D8<-sum(Column8 %in% "D")
728 D9<-sum(Column9 %in% "D")
729 D10<-sum(Column10 %in% "D")
730 D11<-sum(Column11 %in% "D")
731 D12<-sum(Column12 %in% "D")
732 D13<-sum(Column13 %in% "D")
733 D14<-sum(Column14 %in% "D")
734 D15<-sum(Column15 %in% "D")
735 DllDs<-cbind(D1,D2,D3,D4,D5,D6,D7,D8,D9,D10,D11,D12,D13,D14,D15)
736
737 E1<-sum(Column1 %in% "E")
738 E2<-sum(Column2 %in% "E")
739 E3<-sum(Column3 %in% "E")
740 E4<-sum(Column4 %in% "E")
741 E5<-sum(Column5 %in% "E")
742 E6<-sum(Column6 %in% "E")
743 E7<-sum(Column7 %in% "E")
744 E8<-sum(Column8 %in% "E")
745 E9<-sum(Column9 %in% "E")
746 E10<-sum(Column10 %in% "E")
747 E11<-sum(Column11 %in% "E")
748 E12<-sum(Column12 %in% "E")
749 E13<-sum(Column13 %in% "E")
750 E14<-sum(Column14 %in% "E")
751 E15<-sum(Column15 %in% "E")
752 EllEs<-cbind(E1,E2,E3,E4,E5,E6,E7,E8,E9,E10,E11,E12,E13,E14,E15)
753
754 F1<-sum(Column1 %in% "F")
755 F2<-sum(Column2 %in% "F")
756 F3<-sum(Column3 %in% "F")
757 F4<-sum(Column4 %in% "F")
758 F5<-sum(Column5 %in% "F")
759 F6<-sum(Column6 %in% "F")
760 F7<-sum(Column7 %in% "F")
761 F8<-sum(Column8 %in% "F")
762 F9<-sum(Column9 %in% "F")
763 F10<-sum(Column10 %in% "F")
764 F11<-sum(Column11 %in% "F")
765 F12<-sum(Column12 %in% "F")
766 F13<-sum(Column13 %in% "F")
767 F14<-sum(Column14 %in% "F")
768 F15<-sum(Column15 %in% "F")
769 FllFs<-cbind(F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15)
770
771 G1<-sum(Column1 %in% "G")
772 G2<-sum(Column2 %in% "G")
773 G3<-sum(Column3 %in% "G")
774 G4<-sum(Column4 %in% "G")
775 G5<-sum(Column5 %in% "G")
776 G6<-sum(Column6 %in% "G")
777 G7<-sum(Column7 %in% "G")
778 G8<-sum(Column8 %in% "G")
779 G9<-sum(Column9 %in% "G")
780 G10<-sum(Column10 %in% "G")
781 G11<-sum(Column11 %in% "G")
782 G12<-sum(Column12 %in% "G")
783 G13<-sum(Column13 %in% "G")
784 G14<-sum(Column14 %in% "G")
785 G15<-sum(Column15 %in% "G")
786 GllGs<-cbind(G1,G2,G3,G4,G5,G6,G7,G8,G9,G10,G11,G12,G13,G14,G15)
787
788 H1<-sum(Column1 %in% "H")
789 H2<-sum(Column2 %in% "H")
790 H3<-sum(Column3 %in% "H")
791 H4<-sum(Column4 %in% "H")
792 H5<-sum(Column5 %in% "H")
793 H6<-sum(Column6 %in% "H")
794 H7<-sum(Column7 %in% "H")
795 H8<-sum(Column8 %in% "H")
796 H9<-sum(Column9 %in% "H")
797 H10<-sum(Column10 %in% "H")
798 H11<-sum(Column11 %in% "H")
799 H12<-sum(Column12 %in% "H")
800 H13<-sum(Column13 %in% "H")
801 H14<-sum(Column14 %in% "H")
802 H15<-sum(Column15 %in% "H")
803 HllHs<-cbind(H1,H2,H3,H4,H5,H6,H7,H8,H9,H10,H11,H12,H13,H14,H15)
804
805 I1<-sum(Column1 %in% "I")
806 I2<-sum(Column2 %in% "I")
807 I3<-sum(Column3 %in% "I")
808 I4<-sum(Column4 %in% "I")
809 I5<-sum(Column5 %in% "I")
810 I6<-sum(Column6 %in% "I")
811 I7<-sum(Column7 %in% "I")
812 I8<-sum(Column8 %in% "I")
813 I9<-sum(Column9 %in% "I")
814 I10<-sum(Column10 %in% "I")
815 I11<-sum(Column11 %in% "I")
816 I12<-sum(Column12 %in% "I")
817 I13<-sum(Column13 %in% "I")
818 I14<-sum(Column14 %in% "I")
819 I15<-sum(Column15 %in% "I")
820 IllIs<-cbind(I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,I11,I12,I13,I14,I15)
821
822 K1<-sum(Column1 %in% "K")
823 K2<-sum(Column2 %in% "K")
824 K3<-sum(Column3 %in% "K")
825 K4<-sum(Column4 %in% "K")
826 K5<-sum(Column5 %in% "K")
827 K6<-sum(Column6 %in% "K")
828 K7<-sum(Column7 %in% "K")
829 K8<-sum(Column8 %in% "K")
830 K9<-sum(Column9 %in% "K")
831 K10<-sum(Column10 %in% "K")
832 K11<-sum(Column11 %in% "K")
833 K12<-sum(Column12 %in% "K")
834 K13<-sum(Column13 %in% "K")
835 K14<-sum(Column14 %in% "K")
836 K15<-sum(Column15 %in% "K")
837 KllKs<-cbind(K1,K2,K3,K4,K5,K6,K7,K8,K9,K10,K11,K12,K13,K14,K15)
838
839 L1<-sum(Column1 %in% "L")
840 L2<-sum(Column2 %in% "L")
841 L3<-sum(Column3 %in% "L")
842 L4<-sum(Column4 %in% "L")
843 L5<-sum(Column5 %in% "L")
844 L6<-sum(Column6 %in% "L")
845 L7<-sum(Column7 %in% "L")
846 L8<-sum(Column8 %in% "L")
847 L9<-sum(Column9 %in% "L")
848 L10<-sum(Column10 %in% "L")
849 L11<-sum(Column11 %in% "L")
850 L12<-sum(Column12 %in% "L")
851 L13<-sum(Column13 %in% "L")
852 L14<-sum(Column14 %in% "L")
853 L15<-sum(Column15 %in% "L")
854 LllLs<-cbind(L1,L2,L3,L4,L5,L6,L7,L8,L9,L10,L11,L12,L13,L14,L15)
855
856 M1<-sum(Column1 %in% "M")
857 M2<-sum(Column2 %in% "M")
858 M3<-sum(Column3 %in% "M")
859 M4<-sum(Column4 %in% "M")
860 M5<-sum(Column5 %in% "M")
861 M6<-sum(Column6 %in% "M")
862 M7<-sum(Column7 %in% "M")
863 M8<-sum(Column8 %in% "M")
864 M9<-sum(Column9 %in% "M")
865 M10<-sum(Column10 %in% "M")
866 M11<-sum(Column11 %in% "M")
867 M12<-sum(Column12 %in% "M")
868 M13<-sum(Column13 %in% "M")
869 M14<-sum(Column14 %in% "M")
870 M15<-sum(Column15 %in% "M")
871 MllMs<-cbind(M1,M2,M3,M4,M5,M6,M7,M8,M9,M10,M11,M12,M13,M14,M15)
872
873 N1<-sum(Column1 %in% "N")
874 N2<-sum(Column2 %in% "N")
875 N3<-sum(Column3 %in% "N")
876 N4<-sum(Column4 %in% "N")
877 N5<-sum(Column5 %in% "N")
878 N6<-sum(Column6 %in% "N")
879 N7<-sum(Column7 %in% "N")
880 N8<-sum(Column8 %in% "N")
881 N9<-sum(Column9 %in% "N")
882 N10<-sum(Column10 %in% "N")
883 N11<-sum(Column11 %in% "N")
884 N12<-sum(Column12 %in% "N")
885 N13<-sum(Column13 %in% "N")
886 N14<-sum(Column14 %in% "N")
887 N15<-sum(Column15 %in% "N")
888 NllNs<-cbind(N1,N2,N3,N4,N5,N6,N7,N8,N9,N10,N11,N12,N13,N14,N15)
889
890 P1<-sum(Column1 %in% "P")
891 P2<-sum(Column2 %in% "P")
892 P3<-sum(Column3 %in% "P")
893 P4<-sum(Column4 %in% "P")
894 P5<-sum(Column5 %in% "P")
895 P6<-sum(Column6 %in% "P")
896 P7<-sum(Column7 %in% "P")
897 P8<-sum(Column8 %in% "P")
898 P9<-sum(Column9 %in% "P")
899 P10<-sum(Column10 %in% "P")
900 P11<-sum(Column11 %in% "P")
901 P12<-sum(Column12 %in% "P")
902 P13<-sum(Column13 %in% "P")
903 P14<-sum(Column14 %in% "P")
904 P15<-sum(Column15 %in% "P")
905 PllPs<-cbind(P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11,P12,P13,P14,P15)
906
907 Q1<-sum(Column1 %in% "Q")
908 Q2<-sum(Column2 %in% "Q")
909 Q3<-sum(Column3 %in% "Q")
910 Q4<-sum(Column4 %in% "Q")
911 Q5<-sum(Column5 %in% "Q")
912 Q6<-sum(Column6 %in% "Q")
913 Q7<-sum(Column7 %in% "Q")
914 Q8<-sum(Column8 %in% "Q")
915 Q9<-sum(Column9 %in% "Q")
916 Q10<-sum(Column10 %in% "Q")
917 Q11<-sum(Column11 %in% "Q")
918 Q12<-sum(Column12 %in% "Q")
919 Q13<-sum(Column13 %in% "Q")
920 Q14<-sum(Column14 %in% "Q")
921 Q15<-sum(Column15 %in% "Q")
922 QllQs<-cbind(Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,Q11,Q12,Q13,Q14,Q15)
923
924 R1<-sum(Column1 %in% "R")
925 R2<-sum(Column2 %in% "R")
926 R3<-sum(Column3 %in% "R")
927 R4<-sum(Column4 %in% "R")
928 R5<-sum(Column5 %in% "R")
929 R6<-sum(Column6 %in% "R")
930 R7<-sum(Column7 %in% "R")
931 R8<-sum(Column8 %in% "R")
932 R9<-sum(Column9 %in% "R")
933 R10<-sum(Column10 %in% "R")
934 R11<-sum(Column11 %in% "R")
935 R12<-sum(Column12 %in% "R")
936 R13<-sum(Column13 %in% "R")
937 R14<-sum(Column14 %in% "R")
938 R15<-sum(Column15 %in% "R")
939 RllRs<-cbind(R1,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,R13,R14,R15)
940
941 S1<-sum(Column1 %in% "S")
942 S2<-sum(Column2 %in% "S")
943 S3<-sum(Column3 %in% "S")
944 S4<-sum(Column4 %in% "S")
945 S5<-sum(Column5 %in% "S")
946 S6<-sum(Column6 %in% "S")
947 S7<-sum(Column7 %in% "S")
948 S8<-sum(Column8 %in% "S")
949 S9<-sum(Column9 %in% "S")
950 S10<-sum(Column10 %in% "S")
951 S11<-sum(Column11 %in% "S")
952 S12<-sum(Column12 %in% "S")
953 S13<-sum(Column13 %in% "S")
954 S14<-sum(Column14 %in% "S")
955 S15<-sum(Column15 %in% "S")
956 SllSs<-cbind(S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,S14,S15)
957
958 T1<-sum(Column1 %in% "T")
959 T2<-sum(Column2 %in% "T")
960 T3<-sum(Column3 %in% "T")
961 T4<-sum(Column4 %in% "T")
962 T5<-sum(Column5 %in% "T")
963 T6<-sum(Column6 %in% "T")
964 T7<-sum(Column7 %in% "T")
965 T8<-sum(Column8 %in% "T")
966 T9<-sum(Column9 %in% "T")
967 T10<-sum(Column10 %in% "T")
968 T11<-sum(Column11 %in% "T")
969 T12<-sum(Column12 %in% "T")
970 T13<-sum(Column13 %in% "T")
971 T14<-sum(Column14 %in% "T")
972 T15<-sum(Column15 %in% "T")
973 TllTs<-cbind(T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15)
974
975 V1<-sum(Column1 %in% "V")
976 V2<-sum(Column2 %in% "V")
977 V3<-sum(Column3 %in% "V")
978 V4<-sum(Column4 %in% "V")
979 V5<-sum(Column5 %in% "V")
980 V6<-sum(Column6 %in% "V")
981 V7<-sum(Column7 %in% "V")
982 V8<-sum(Column8 %in% "V")
983 V9<-sum(Column9 %in% "V")
984 V10<-sum(Column10 %in% "V")
985 V11<-sum(Column11 %in% "V")
986 V12<-sum(Column12 %in% "V")
987 V13<-sum(Column13 %in% "V")
988 V14<-sum(Column14 %in% "V")
989 V15<-sum(Column15 %in% "V")
990 VllVs<-cbind(V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15)
991
992 W1<-sum(Column1 %in% "W")
993 W2<-sum(Column2 %in% "W")
994 W3<-sum(Column3 %in% "W")
995 W4<-sum(Column4 %in% "W")
996 W5<-sum(Column5 %in% "W")
997 W6<-sum(Column6 %in% "W")
998 W7<-sum(Column7 %in% "W")
999 W8<-sum(Column8 %in% "W")
1000 W9<-sum(Column9 %in% "W")
1001 W10<-sum(Column10 %in% "W")
1002 W11<-sum(Column11 %in% "W")
1003 W12<-sum(Column12 %in% "W")
1004 W13<-sum(Column13 %in% "W")
1005 W14<-sum(Column14 %in% "W")
1006 W15<-sum(Column15 %in% "W")
1007 WllWs<-cbind(W1,W2,W3,W4,W5,W6,W7,W8,W9,W10,W11,W12,W13,W14,W15)
1008
1009 Y1<-sum(Column1 %in% "Y")
1010 Y2<-sum(Column2 %in% "Y")
1011 Y3<-sum(Column3 %in% "Y")
1012 Y4<-sum(Column4 %in% "Y")
1013 Y5<-sum(Column5 %in% "Y")
1014 Y6<-sum(Column6 %in% "Y")
1015 Y7<-sum(Column7 %in% "Y")
1016 Y8<-sum(Column8 %in% "Y")
1017 Y9<-sum(Column9 %in% "Y")
1018 Y10<-sum(Column10 %in% "Y")
1019 Y11<-sum(Column11 %in% "Y")
1020 Y12<-sum(Column12 %in% "Y")
1021 Y13<-sum(Column13 %in% "Y")
1022 Y14<-sum(Column14 %in% "Y")
1023 Y15<-sum(Column15 %in% "Y")
1024 YllYs<-cbind(Y1,Y2,Y3,Y4,Y5,Y6,Y7,Y8,Y9,Y10,Y11,Y12,Y13,Y14,Y15)
1025 PositionTable<-rbind(AllAs,CllCs,DllDs,EllEs,FllFs,GllGs,HllHs,IllIs,KllKs,LllLs,MllMs,NllNs,PllPs,QllQs,RllRs,SllSs,TllTs,VllVs,WllWs,YllYs)
1026 }
1027 #endogenous prob matrix is AA position over subbackfreqmean
1028 dim(PositionTable)
1029 EPMtable<-PositionTable
1030 # EPMtable[1,]<-(PositionTable[1,]/(PositionTable[1,]*.01*Amean))
1031 # EPMtable[2,]<-(PositionTable[2,]/(PositionTable[2,]*.01*Cmean))
1032 # EPMtable[3,]<-(PositionTable[3,]/(PositionTable[3,]*.01*Dmean))
1033 # EPMtable[4,]<-(PositionTable[4,]/(PositionTable[4,]*.01*Emean))
1034 # EPMtable[5,]<-(PositionTable[5,]/(PositionTable[5,]*.01*Fmean))
1035 # EPMtable[6,]<-(PositionTable[6,]/(PositionTable[6,]*.01*Gmean))
1036 # EPMtable[7,]<-(PositionTable[7,]/(PositionTable[7,]*.01*Hmean))
1037 # EPMtable[8,]<-(PositionTable[8,]/(PositionTable[8,]*.01*Imean))
1038 # EPMtable[9,]<-(PositionTable[9,]/(PositionTable[9,]*.01*Kmean))
1039 # EPMtable[10,]<-(PositionTable[10,]/(PositionTable[10,]*.01*Lmean))
1040 # EPMtable[11,]<-(PositionTable[11,]/(PositionTable[11,]*.01*Mmean))
1041 # EPMtable[12,]<-(PositionTable[12,]/(PositionTable[12,]*.01*Nmean))
1042 # EPMtable[13,]<-(PositionTable[13,]/(PositionTable[13,]*.01*Pmean))
1043 # EPMtable[14,]<-(PositionTable[14,]/(PositionTable[14,]*.01*Qmean))
1044 # EPMtable[15,]<-(PositionTable[15,]/(PositionTable[15,]*.01*Rmean))
1045 # EPMtable[16,]<-(PositionTable[16,]/(PositionTable[16,]*.01*Smean))
1046 # EPMtable[17,]<-(PositionTable[17,]/(PositionTable[17,]*.01*Tmean))
1047 # EPMtable[18,]<-(PositionTable[18,]/(PositionTable[18,]*.01*Vmean))
1048 # EPMtable[19,]<-(PositionTable[19,]/(PositionTable[19,]*.01*Wmean))
1049 # EPMtable[20,]<-(PositionTable[20,]/(PositionTable[20,]*.01*Ymean))
1050
1051 columns<-c(length(Column1)-sum(Column1==""),
1052 length(Column2)-sum(Column2==""),
1053 length(Column3)-sum(Column3==""),
1054 length(Column4)-sum(Column4==""),
1055 length(Column5)-sum(Column5==""),
1056 length(Column6)-sum(Column6==""),
1057 length(Column7)-sum(Column7==""),
1058 length(Column8)-sum(Column8==""),
1059 length(Column9)-sum(Column9==""),
1060 length(Column10)-sum(Column10==""),
1061 length(Column11)-sum(Column11==""),
1062 length(Column12)-sum(Column12==""),
1063 length(Column13)-sum(Column13==""),
1064 length(Column14)-sum(Column14==""),
1065 length(Column15)-sum(Column15==""))
1066
1067 for (z in 1:15) {
1068 for (y in 1:20) {
1069 if (PositionTable[y,z]>0){
1070 EPMtable[y,z]<-PositionTable[y,z]/((columns[z]*.01*AllMeans[y]))
1071 }
1072 if (PositionTable[y,z]==0){
1073 EPMtable[y,z]<-(1/columns[z])/((columns[z]*.01*AllMeans[y]))
1074 }
1075 }
1076 }
1077 #here I created the endogenous probability matrix
1078 #now all I need to do is make the program automatically determine which SDs are >2, and then make it perform screener and sorter on those SDs
1079
1080
1081
1082
1083
1084 # write.xlsx(SDtable,file=FILENAME, sheetName = "Standard Deviation Table",col.names = FALSE,row.names = FALSE,append = TRUE)
1085 # write.xlsx(PercentTable,file = FILENAME,sheetName = "Percent Table",col.names = FALSE,row.names = FALSE,append = TRUE)
1086 # write.xlsx(SelectivitySheet,file = FILENAME,sheetName = "Site Selectivity",col.names = FALSE,row.names = FALSE,append = TRUE)
1087 # write.xlsx(EPMtable,file=FILENAME,sheetName = "Endogenous Probability Matrix",col.names = FALSE,row.names = FALSE,append = TRUE)
1088 # write.xlsx(NormalizationScore,file = FILENAME,sheetName = "Normalization Score",col.names = FALSE,row.names = FALSE,append = TRUE)
1089
1090 NormalizationScore<-c("Normalization Score",NormalizationScore)
1091
1092 write.table(x=c("SD Table"),file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)
1093 write.table(SDtable,file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)
1094 write.table(x=c("Percent Table"),file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)
1095 write.table(PercentTable,file=FILENAME, append = TRUE,sep=",",row.names = FALSE, col.names = FALSE)
1096
1097 EPMtableu<-EPMtable
1098 HeaderSD<-c(-7:7)
1099 EPMtableu<-rbind(HeaderSD,EPMtableu)
1100 row.names(EPMtableu)<-NULL
1101 EPMtableu<-data.frame(SetOfAAs,EPMtableu)
1102
1103 write.table("Site Selectivity Matrix", file = FILENAME2, append = TRUE, sep = ",", row.names = FALSE, col.names = FALSE)
1104 SelectivityHeader=matrix(data = c("Position",-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7),nrow = 1)
1105 head<-matrix(data=rep(" ",times=16),nrow = 1)
1106 SelectivityHeader<-rbind(head,SelectivityHeader)
1107
1108 write.table(SelectivityHeader, file = FILENAME2, append = TRUE, sep = ",", row.names = FALSE, col.names = FALSE)
1109 #colnames(SelectivitySheet)<-c("-7","-6","-5","-4","-3","-2","-1","0","1","2","3","4","5","6","7")
1110 write.table(SelectivitySheet,file = FILENAME2, append = TRUE,sep = ",",row.names = TRUE, col.names = FALSE)
1111 write.table(x=c("Endogenous Probability Matrix"),file=FILENAME2,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)
1112 write.table(EPMtableu,file = FILENAME2, append = TRUE,sep = ",",row.names = FALSE, col.names = FALSE)
1113 write.table(NormalizationScore, file = FILENAME2, append = TRUE,sep = ",",row.names = FALSE, col.names = FALSE)
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148 #test myself: this script should take in amino acids for each of the 9 positions and give out every single combination of those AAs
1149
1150 #need to do following: fix it so that the accession numbers stay with the substrates,
1151 #also the neg false constant is totaly unphos'd Ys found by FASTA-2-CSV system# uniprot
1152
1153 #HOW MANY: IF THERE'S two aas in each position you get 2^9, so I assume the numbers are:
1154 #(number in position-4)*(number in position -3)*(number in position -2)...=total
1155 # require(rJava)
1156 # require(xlsxjars)
1157 # require(xlsx)
1158 # # require(readxl)
1159
1160 #View(SDtable)
1161 bareSDs<-SDtable[2:21,2:16]
1162 goodones<-bareSDs>2
1163
1164 Positionm7<-which(goodones[,1] %in% TRUE)
1165 if (length(Positionm7)<1){Positionm7<-which(bareSDs[,1]==max(bareSDs[,1]))}
1166 Positionm6<-which(goodones[,2] %in% TRUE)
1167 if (length(Positionm6)<1){Positionm6<-which(bareSDs[,2]==max(bareSDs[,2]))}
1168 Positionm5<-which(goodones[,3] %in% TRUE)
1169 if (length(Positionm5)<1){Positionm5<-which(bareSDs[,3]==max(bareSDs[,3]))}
1170 Positionm4<-which(goodones[,4] %in% TRUE)
1171 if (length(Positionm4)<1){Positionm4<-which(bareSDs[,4]==max(bareSDs[,4]))}
1172 Positionm3<-which(goodones[,5] %in% TRUE)
1173 if (length(Positionm3)<1){Positionm3<-which(bareSDs[,5]==max(bareSDs[,5]))}
1174 Positionm2<-which(goodones[,6] %in% TRUE)
1175 if (length(Positionm2)<1){Positionm2<-which(bareSDs[,6]==max(bareSDs[,6]))}
1176 Positionm1<-which(goodones[,7] %in% TRUE)
1177 if (length(Positionm1)<1){Positionm1<-which(bareSDs[,7]==max(bareSDs[,7]))}
1178
1179 Positiond0<-which(goodones[,8] %in% TRUE)
1180 if (length(Positiond0)<1){Positiond0<-which(bareSDs[,8]==max(bareSDs[,8]))}
1181
1182 Positionp1<-which(goodones[,9] %in% TRUE)
1183 if (length(Positionp1)<1){Positionp1<-which(bareSDs[,9]==max(bareSDs[,9]))}
1184 Positionp2<-which(goodones[,10] %in% TRUE)
1185 if (length(Positionp2)<1){Positionp2<-which(bareSDs[,10]==max(bareSDs[,10]))}
1186 Positionp3<-which(goodones[,11] %in% TRUE)
1187 if (length(Positionp3)<1){Positionp3<-which(bareSDs[,11]==max(bareSDs[,11]))}
1188 Positionp4<-which(goodones[,12] %in% TRUE)
1189 if (length(Positionp4)<1){Positionp4<-which(bareSDs[,12]==max(bareSDs[,12]))}
1190 Positionp5<-which(goodones[,13] %in% TRUE)
1191 if (length(Positionp5)<1){Positionp5<-which(bareSDs[,13]==max(bareSDs[,13]))}
1192 Positionp6<-which(goodones[,14] %in% TRUE)
1193 if (length(Positionp6)<1){Positionp6<-which(bareSDs[,14]==max(bareSDs[,14]))}
1194 Positionp7<-which(goodones[,15] %in% TRUE)
1195 if (length(Positionp7)<1){Positionp7<-which(bareSDs[,15]==max(bareSDs[,15]))}
1196
1197 aa_props2 <- c("1"="A", "2"="C", "3"="D", "4"="E", "5"="F", "6"="G", "7"="H", "8"="I", "9"="K", "10"="L", "11"="M", "12"="N",
1198 "13"="P", "14"="Q", "15"="R", "16"="S", "17"="T", "18"="V", "19"="W", "20"="Y")
1199
1200 Positionm7<-sapply(Positionm7, function (x) aa_props2[x])
1201 Positionm6<-sapply(Positionm6, function (x) aa_props2[x])
1202 Positionm5<-sapply(Positionm5, function (x) aa_props2[x])
1203 Positionm4<-sapply(Positionm4, function (x) aa_props2[x])
1204 Positionm3<-sapply(Positionm3, function (x) aa_props2[x])
1205 Positionm2<-sapply(Positionm2, function (x) aa_props2[x])
1206 Positionm1<-sapply(Positionm1, function (x) aa_props2[x])
1207 Positiond0<-sapply(Positiond0, function (x) aa_props2[x])
1208 Positionp1<-sapply(Positionp1, function (x) aa_props2[x])
1209 Positionp2<-sapply(Positionp2, function (x) aa_props2[x])
1210 Positionp3<-sapply(Positionp3, function (x) aa_props2[x])
1211 Positionp4<-sapply(Positionp4, function (x) aa_props2[x])
1212 Positionp5<-sapply(Positionp5, function (x) aa_props2[x])
1213 Positionp6<-sapply(Positionp6, function (x) aa_props2[x])
1214 Positionp7<-sapply(Positionp7, function (x) aa_props2[x])
1215
1216
1217 # Positionm7<-c("D","H","N","V")
1218 # Positionm6<-c("E","V")
1219 # Positionm5<-c("D","H")
1220 # Positionm4<-c("D","N")
1221 # Positionm3<-c("D","E","F","Q")
1222 # Positionm2<-c("D","N","Q","S")
1223 # Positionm1<-c("F","I","L")
1224 # Positiond0<-c("Y")
1225 # Positionp1<-c("A","E")
1226 # Positionp2<-c("T","S","Q","E")
1227 # Positionp3<-c("V")
1228 # Positionp4<-c("K")
1229 # Positionp5<-c("K")
1230 # Positionp6<-c("K")
1231 # Positionp7<-c("R")
1232 #this is where the amino acids for each position are given. m means minus, p mean plus
1233 ########################################
1234 # ScreenerFilename<-"C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls"
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246 screaner<-read.csv(ScreenerFilename, header = FALSE, stringsAsFactors = FALSE)
1247
1248
1249 Abl<-screaner[2:25,]
1250 Arg<-screaner[27:50,]
1251 Btk<-screaner[52:75,]
1252 Csk<-screaner[77:100,]
1253 Fyn<-screaner[102:125,]
1254 Hck<-screaner[127:150,]
1255 JAK2<-screaner[152:175,]
1256 Lck<-screaner[177:200,]
1257 Lyn<-screaner[202:225,]
1258 Pyk2<-screaner[227:250,]
1259 Src<-screaner[252:275,]
1260 Syk<-screaner[277:300,]
1261 Yes<-screaner[302:325,]
1262
1263 #two questions: why are we doing BTK when we already have a bioninformatics page about it?
1264 #two I reran everything and only get 96 positions of interest in the SD table
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274 #Do_You_want_An_Excel_Output_Questionmark<-"NO"
1275 GeneratedPeptidesFile<-"GeneratedPeptidesFile.csv"
1276
1277
1278 # Abl<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 4)
1279 # Arg<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 5)
1280 # Btk<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 6)
1281 # Csk<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 7)
1282 # Fyn<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 8)
1283 # Hck<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 9)
1284 # JAK2<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 10)
1285 # Lck<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 11)
1286 # Lyn<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 12)
1287 # Pyk2<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 13)
1288 # Src<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 14)
1289 # Syk<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 15)
1290 # Yes<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 16)
1291 #
1292
1293
1294 "A"=1
1295 "C"=2
1296 "D"=3
1297 "E"=4
1298 "F"=5
1299 "G"=6
1300 "H"=7
1301 "I"=8
1302 "K"=9
1303 "L"=10
1304 "M"=11
1305 "N"=12
1306 "P"=13
1307 "Q"=14
1308 "R"=15
1309 "S"=16
1310 "T"=17
1311 "V"=18
1312 "W"=19
1313 "Y"=20
1314
1315 aa_props <- c("A"=A, "C"=C, "D"=D, "E"=E, "F"=F,"G"=G,"H"=H,"I"=I,"K"=K,"L"=L,"M"=M,"N"=N,"P"=P,"Q"=Q,"R"=R,
1316 "S"=S,"T"=T,"V"=V,"W"=W,"Y"=Y,"xY"=Y,"O"=21)
1317
1318 number15<-sapply(Positionm7, function (x) aa_props[x])
1319 number14<-sapply(Positionm6, function (x) aa_props[x])
1320 number13<-sapply(Positionm5, function (x) aa_props[x])
1321 number1 <- sapply(Positionm4, function (x) aa_props[x])
1322 number2 <- sapply(Positionm3, function (x) aa_props[x])
1323 number3 <- sapply(Positionm2, function (x) aa_props[x])
1324 number4 <- sapply(Positionm1, function (x) aa_props[x])
1325 number5 <- sapply(Positiond0, function (x) aa_props[x])
1326 number6 <- sapply(Positionp1, function (x) aa_props[x])
1327 number7 <- sapply(Positionp2, function (x) aa_props[x])
1328 number8 <- sapply(Positionp3, function (x) aa_props[x])
1329 number9 <- sapply(Positionp4, function (x) aa_props[x])
1330 number10<-sapply(Positionp5, function (x) aa_props[x])
1331 number11<-sapply(Positionp6, function (x) aa_props[x])
1332 number12<-sapply(Positionp7, function (x) aa_props[x])
1333
1334 # number1<-Positionm4
1335 # number2<-Positionm3
1336 # number3<-Positionm2
1337 # number4<-Positionm1
1338 # number5<-Positiond0
1339 # number6<-Positionp1
1340 # number7<-Positionp2
1341 # number8<-Positionp3
1342 # number9<-Positionp4
1343
1344 #############################
1345 #here I create the Abl seqs with proper value for each number
1346 if (1==0){
1347 Ablnumber1<- gsub("A",A,Ablnumber1,perl = TRUE)
1348 Ablnumber1<- gsub("C",C,Ablnumber1,perl = TRUE)
1349 Ablnumber1<- gsub("D",D,Ablnumber1,perl = TRUE)
1350 Ablnumber1<- gsub("E",E,Ablnumber1,perl = TRUE)
1351 Ablnumber1<- gsub("F",F,Ablnumber1,perl = TRUE)
1352 Ablnumber1<- gsub("G",G,Ablnumber1,perl = TRUE)
1353 Ablnumber1<- gsub("H",H,Ablnumber1,perl = TRUE)
1354 Ablnumber1<- gsub("I",I,Ablnumber1,perl = TRUE)
1355 Ablnumber1<- gsub("K",K,Ablnumber1,perl = TRUE)
1356 Ablnumber1<- gsub("L",L,Ablnumber1,perl = TRUE)
1357 Ablnumber1<- gsub("M",M,Ablnumber1,perl = TRUE)
1358 Ablnumber1<- gsub("N",N,Ablnumber1,perl = TRUE)
1359 Ablnumber1<- gsub("P",P,Ablnumber1,perl = TRUE)
1360 Ablnumber1<- gsub("Q",Q,Ablnumber1,perl = TRUE)
1361 Ablnumber1<- gsub("R",R,Ablnumber1,perl = TRUE)
1362 Ablnumber1<- gsub("S",S,Ablnumber1,perl = TRUE)
1363 Ablnumber1<- gsub("T",T,Ablnumber1,perl = TRUE)
1364 Ablnumber1<- gsub("V",V,Ablnumber1,perl = TRUE)
1365 Ablnumber1<- gsub("W",W,Ablnumber1,perl = TRUE)
1366 Ablnumber1<- gsub("Y",Y,Ablnumber1,perl = TRUE)
1367
1368 Ablnumber2<- gsub("A",A,Ablnumber2,perl = TRUE)
1369 Ablnumber2<- gsub("C",C,Ablnumber2,perl = TRUE)
1370 Ablnumber2<- gsub("D",D,Ablnumber2,perl = TRUE)
1371 Ablnumber2<- gsub("E",E,Ablnumber2,perl = TRUE)
1372 Ablnumber2<- gsub("F",F,Ablnumber2,perl = TRUE)
1373 Ablnumber2<- gsub("G",G,Ablnumber2,perl = TRUE)
1374 Ablnumber2<- gsub("H",H,Ablnumber2,perl = TRUE)
1375 Ablnumber2<- gsub("I",I,Ablnumber2,perl = TRUE)
1376 Ablnumber2<- gsub("K",K,Ablnumber2,perl = TRUE)
1377 Ablnumber2<- gsub("L",L,Ablnumber2,perl = TRUE)
1378 Ablnumber2<- gsub("M",M,Ablnumber2,perl = TRUE)
1379 Ablnumber2<- gsub("N",N,Ablnumber2,perl = TRUE)
1380 Ablnumber2<- gsub("P",P,Ablnumber2,perl = TRUE)
1381 Ablnumber2<- gsub("Q",Q,Ablnumber2,perl = TRUE)
1382 Ablnumber2<- gsub("R",R,Ablnumber2,perl = TRUE)
1383 Ablnumber2<- gsub("S",S,Ablnumber2,perl = TRUE)
1384 Ablnumber2<- gsub("T",T,Ablnumber2,perl = TRUE)
1385 Ablnumber2<- gsub("V",V,Ablnumber2,perl = TRUE)
1386 Ablnumber2<- gsub("W",W,Ablnumber2,perl = TRUE)
1387 Ablnumber2<- gsub("Y",Y,Ablnumber2,perl = TRUE)
1388
1389 Ablnumber3<- gsub("A",A,Ablnumber3,perl = TRUE)
1390 Ablnumber3<- gsub("C",C,Ablnumber3,perl = TRUE)
1391 Ablnumber3<- gsub("D",D,Ablnumber3,perl = TRUE)
1392 Ablnumber3<- gsub("E",E,Ablnumber3,perl = TRUE)
1393 Ablnumber3<- gsub("F",F,Ablnumber3,perl = TRUE)
1394 Ablnumber3<- gsub("G",G,Ablnumber3,perl = TRUE)
1395 Ablnumber3<- gsub("H",H,Ablnumber3,perl = TRUE)
1396 Ablnumber3<- gsub("I",I,Ablnumber3,perl = TRUE)
1397 Ablnumber3<- gsub("K",K,Ablnumber3,perl = TRUE)
1398 Ablnumber3<- gsub("L",L,Ablnumber3,perl = TRUE)
1399 Ablnumber3<- gsub("M",M,Ablnumber3,perl = TRUE)
1400 Ablnumber3<- gsub("N",N,Ablnumber3,perl = TRUE)
1401 Ablnumber3<- gsub("P",P,Ablnumber3,perl = TRUE)
1402 Ablnumber3<- gsub("Q",Q,Ablnumber3,perl = TRUE)
1403 Ablnumber3<- gsub("R",R,Ablnumber3,perl = TRUE)
1404 Ablnumber3<- gsub("S",S,Ablnumber3,perl = TRUE)
1405 Ablnumber3<- gsub("T",T,Ablnumber3,perl = TRUE)
1406 Ablnumber3<- gsub("V",V,Ablnumber3,perl = TRUE)
1407 Ablnumber3<- gsub("W",W,Ablnumber3,perl = TRUE)
1408 Ablnumber3<- gsub("Y",Y,Ablnumber3,perl = TRUE)
1409
1410 Ablnumber4<- gsub("A",A,Ablnumber4,perl = TRUE)
1411 Ablnumber4<- gsub("C",C,Ablnumber4,perl = TRUE)
1412 Ablnumber4<- gsub("D",D,Ablnumber4,perl = TRUE)
1413 Ablnumber4<- gsub("E",E,Ablnumber4,perl = TRUE)
1414 Ablnumber4<- gsub("F",F,Ablnumber4,perl = TRUE)
1415 Ablnumber4<- gsub("G",G,Ablnumber4,perl = TRUE)
1416 Ablnumber4<- gsub("H",H,Ablnumber4,perl = TRUE)
1417 Ablnumber4<- gsub("I",I,Ablnumber4,perl = TRUE)
1418 Ablnumber4<- gsub("K",K,Ablnumber4,perl = TRUE)
1419 Ablnumber4<- gsub("L",L,Ablnumber4,perl = TRUE)
1420 Ablnumber4<- gsub("M",M,Ablnumber4,perl = TRUE)
1421 Ablnumber4<- gsub("N",N,Ablnumber4,perl = TRUE)
1422 Ablnumber4<- gsub("P",P,Ablnumber4,perl = TRUE)
1423 Ablnumber4<- gsub("Q",Q,Ablnumber4,perl = TRUE)
1424 Ablnumber4<- gsub("R",R,Ablnumber4,perl = TRUE)
1425 Ablnumber4<- gsub("S",S,Ablnumber4,perl = TRUE)
1426 Ablnumber4<- gsub("T",T,Ablnumber4,perl = TRUE)
1427 Ablnumber4<- gsub("V",V,Ablnumber4,perl = TRUE)
1428 Ablnumber4<- gsub("W",W,Ablnumber4,perl = TRUE)
1429 Ablnumber4<- gsub("Y",Y,Ablnumber4,perl = TRUE)
1430
1431 Ablnumber5<- gsub("A",A,Ablnumber5,perl = TRUE)
1432 Ablnumber5<- gsub("C",C,Ablnumber5,perl = TRUE)
1433 Ablnumber5<- gsub("D",D,Ablnumber5,perl = TRUE)
1434 Ablnumber5<- gsub("E",E,Ablnumber5,perl = TRUE)
1435 Ablnumber5<- gsub("F",F,Ablnumber5,perl = TRUE)
1436 Ablnumber5<- gsub("G",G,Ablnumber5,perl = TRUE)
1437 Ablnumber5<- gsub("H",H,Ablnumber5,perl = TRUE)
1438 Ablnumber5<- gsub("I",I,Ablnumber5,perl = TRUE)
1439 Ablnumber5<- gsub("K",K,Ablnumber5,perl = TRUE)
1440 Ablnumber5<- gsub("L",L,Ablnumber5,perl = TRUE)
1441 Ablnumber5<- gsub("M",M,Ablnumber5,perl = TRUE)
1442 Ablnumber5<- gsub("N",N,Ablnumber5,perl = TRUE)
1443 Ablnumber5<- gsub("P",P,Ablnumber5,perl = TRUE)
1444 Ablnumber5<- gsub("Q",Q,Ablnumber5,perl = TRUE)
1445 Ablnumber5<- gsub("R",R,Ablnumber5,perl = TRUE)
1446 Ablnumber5<- gsub("S",S,Ablnumber5,perl = TRUE)
1447 Ablnumber5<- gsub("T",T,Ablnumber5,perl = TRUE)
1448 Ablnumber5<- gsub("V",V,Ablnumber5,perl = TRUE)
1449 Ablnumber5<- gsub("W",W,Ablnumber5,perl = TRUE)
1450 Ablnumber5<- gsub("Y",Y,Ablnumber5,perl = TRUE)
1451
1452 Ablnumber6<- gsub("A",A,Ablnumber6,perl = TRUE)
1453 Ablnumber6<- gsub("C",C,Ablnumber6,perl = TRUE)
1454 Ablnumber6<- gsub("D",D,Ablnumber6,perl = TRUE)
1455 Ablnumber6<- gsub("E",E,Ablnumber6,perl = TRUE)
1456 Ablnumber6<- gsub("F",F,Ablnumber6,perl = TRUE)
1457 Ablnumber6<- gsub("G",G,Ablnumber6,perl = TRUE)
1458 Ablnumber6<- gsub("H",H,Ablnumber6,perl = TRUE)
1459 Ablnumber6<- gsub("I",I,Ablnumber6,perl = TRUE)
1460 Ablnumber6<- gsub("K",K,Ablnumber6,perl = TRUE)
1461 Ablnumber6<- gsub("L",L,Ablnumber6,perl = TRUE)
1462 Ablnumber6<- gsub("M",M,Ablnumber6,perl = TRUE)
1463 Ablnumber6<- gsub("N",N,Ablnumber6,perl = TRUE)
1464 Ablnumber6<- gsub("P",P,Ablnumber6,perl = TRUE)
1465 Ablnumber6<- gsub("Q",Q,Ablnumber6,perl = TRUE)
1466 Ablnumber6<- gsub("R",R,Ablnumber6,perl = TRUE)
1467 Ablnumber6<- gsub("S",S,Ablnumber6,perl = TRUE)
1468 Ablnumber6<- gsub("T",T,Ablnumber6,perl = TRUE)
1469 Ablnumber6<- gsub("V",V,Ablnumber6,perl = TRUE)
1470 Ablnumber6<- gsub("W",W,Ablnumber6,perl = TRUE)
1471 Ablnumber6<- gsub("Y",Y,Ablnumber6,perl = TRUE)
1472
1473 Ablnumber7<- gsub("A",A,Ablnumber7,perl = TRUE)
1474 Ablnumber7<- gsub("C",C,Ablnumber7,perl = TRUE)
1475 Ablnumber7<- gsub("D",D,Ablnumber7,perl = TRUE)
1476 Ablnumber7<- gsub("E",E,Ablnumber7,perl = TRUE)
1477 Ablnumber7<- gsub("F",F,Ablnumber7,perl = TRUE)
1478 Ablnumber7<- gsub("G",G,Ablnumber7,perl = TRUE)
1479 Ablnumber7<- gsub("H",H,Ablnumber7,perl = TRUE)
1480 Ablnumber7<- gsub("I",I,Ablnumber7,perl = TRUE)
1481 Ablnumber7<- gsub("K",K,Ablnumber7,perl = TRUE)
1482 Ablnumber7<- gsub("L",L,Ablnumber7,perl = TRUE)
1483 Ablnumber7<- gsub("M",M,Ablnumber7,perl = TRUE)
1484 Ablnumber7<- gsub("N",N,Ablnumber7,perl = TRUE)
1485 Ablnumber7<- gsub("P",P,Ablnumber7,perl = TRUE)
1486 Ablnumber7<- gsub("Q",Q,Ablnumber7,perl = TRUE)
1487 Ablnumber7<- gsub("R",R,Ablnumber7,perl = TRUE)
1488 Ablnumber7<- gsub("S",S,Ablnumber7,perl = TRUE)
1489 Ablnumber7<- gsub("T",T,Ablnumber7,perl = TRUE)
1490 Ablnumber7<- gsub("V",V,Ablnumber7,perl = TRUE)
1491 Ablnumber7<- gsub("W",W,Ablnumber7,perl = TRUE)
1492 Ablnumber7<- gsub("Y",Y,Ablnumber7,perl = TRUE)
1493
1494 Ablnumber8<- gsub("A",A,Ablnumber8,perl = TRUE)
1495 Ablnumber8<- gsub("C",C,Ablnumber8,perl = TRUE)
1496 Ablnumber8<- gsub("D",D,Ablnumber8,perl = TRUE)
1497 Ablnumber8<- gsub("E",E,Ablnumber8,perl = TRUE)
1498 Ablnumber8<- gsub("F",F,Ablnumber8,perl = TRUE)
1499 Ablnumber8<- gsub("G",G,Ablnumber8,perl = TRUE)
1500 Ablnumber8<- gsub("H",H,Ablnumber8,perl = TRUE)
1501 Ablnumber8<- gsub("I",I,Ablnumber8,perl = TRUE)
1502 Ablnumber8<- gsub("K",K,Ablnumber8,perl = TRUE)
1503 Ablnumber8<- gsub("L",L,Ablnumber8,perl = TRUE)
1504 Ablnumber8<- gsub("M",M,Ablnumber8,perl = TRUE)
1505 Ablnumber8<- gsub("N",N,Ablnumber8,perl = TRUE)
1506 Ablnumber8<- gsub("P",P,Ablnumber8,perl = TRUE)
1507 Ablnumber8<- gsub("Q",Q,Ablnumber8,perl = TRUE)
1508 Ablnumber8<- gsub("R",R,Ablnumber8,perl = TRUE)
1509 Ablnumber8<- gsub("S",S,Ablnumber8,perl = TRUE)
1510 Ablnumber8<- gsub("T",T,Ablnumber8,perl = TRUE)
1511 Ablnumber8<- gsub("V",V,Ablnumber8,perl = TRUE)
1512 Ablnumber8<- gsub("W",W,Ablnumber8,perl = TRUE)
1513 Ablnumber8<- gsub("Y",Y,Ablnumber8,perl = TRUE)
1514
1515 Ablnumber9<- gsub("A",A,Ablnumber9,perl = TRUE)
1516 Ablnumber9<- gsub("C",C,Ablnumber9,perl = TRUE)
1517 Ablnumber9<- gsub("D",D,Ablnumber9,perl = TRUE)
1518 Ablnumber9<- gsub("E",E,Ablnumber9,perl = TRUE)
1519 Ablnumber9<- gsub("F",F,Ablnumber9,perl = TRUE)
1520 Ablnumber9<- gsub("G",G,Ablnumber9,perl = TRUE)
1521 Ablnumber9<- gsub("H",H,Ablnumber9,perl = TRUE)
1522 Ablnumber9<- gsub("I",I,Ablnumber9,perl = TRUE)
1523 Ablnumber9<- gsub("K",K,Ablnumber9,perl = TRUE)
1524 Ablnumber9<- gsub("L",L,Ablnumber9,perl = TRUE)
1525 Ablnumber9<- gsub("M",M,Ablnumber9,perl = TRUE)
1526 Ablnumber9<- gsub("N",N,Ablnumber9,perl = TRUE)
1527 Ablnumber9<- gsub("P",P,Ablnumber9,perl = TRUE)
1528 Ablnumber9<- gsub("Q",Q,Ablnumber9,perl = TRUE)
1529 Ablnumber9<- gsub("R",R,Ablnumber9,perl = TRUE)
1530 Ablnumber9<- gsub("S",S,Ablnumber9,perl = TRUE)
1531 Ablnumber9<- gsub("T",T,Ablnumber9,perl = TRUE)
1532 Ablnumber9<- gsub("V",V,Ablnumber9,perl = TRUE)
1533 Ablnumber9<- gsub("W",W,Ablnumber9,perl = TRUE)
1534 Ablnumber9<- gsub("Y",Y,Ablnumber9,perl = TRUE)
1535 }
1536 ########################################
1537
1538
1539 total=length(Positionp7)*length(Positionp6)*length(Positionp5)*length(Positionp4)*length(Positionp3)*(length(Positionp2))*length(Positionp1)*
1540 length(Positiond0)*length(Positionm1)*length(Positionm2)*length(Positionm3)*length(Positionm4)*length(Positionm5)*length(Positionm6)*length(Positionm7)
1541 #this is just a way to doublecheck that the length of the generated peptides vector is correct
1542
1543 GeneratedPeptides<-rep(NA, times=total*15)
1544 GeneratedPeptides<-matrix(data = GeneratedPeptides,ncol = 15)
1545
1546 NumeratedPeptides<-GeneratedPeptides
1547 #create an empty vector of correct length by finding the number of each AAs per position and multiplying them
1548 count<-0
1549 for (t in 1:length(Positionm7)) {
1550 for (s in 1:length(Positionm6)) {
1551 for (r in 1:length(Positionm5)) {
1552 for (i in 1:length(Positionm4)) {
1553 for (j in 1:length(Positionm3)) {
1554 for (k in 1:length(Positionm2)) {
1555 for (l in 1:length(Positionm1)) {
1556 for (m in 1:length(Positiond0)) {
1557 for (n in 1:length(Positionp1)) {
1558 for (o in 1:length(Positionp2)) {
1559 for (p in 1:length(Positionp3)) {
1560 for (q in 1:length(Positionp4)) {
1561 for (u in 1:length(Positionp5)) {
1562 for (v in 1:length(Positionp6)) {
1563 for (w in 1:length(Positionp7)) {
1564 # i=1
1565 # j=1
1566 # k=1
1567 # l=1
1568 # m=1
1569 # n=1
1570 # o=1
1571 # p=1
1572 # q=1
1573 #
1574 #for every single position, increment the count number, create a peptide using the AAs at that position
1575 #then put them together into the generated peptides sequencex
1576 count<-count+1
1577 tabulation<-c(Positionm7[t],Positionm6[s],Positionm5[r],Positionm4[i],Positionm3[j],Positionm2[k],Positionm1[l],Positiond0[m],Positionp1[n],
1578 Positionp2[o],Positionp3[p],Positionp4[q],Positionp5[u],Positionp6[v],Positionp7[w])
1579 numeration<-c(number15[t],number14[s],number13[r],number1[i],number2[j],number3[k],number4[l],number5[m],number6[n],number7[o],number8[p],number9[q],number10[u],number11[v],
1580 number12[w])
1581 #tabulation<-paste(tabulation, sep="", collapse="")
1582 GeneratedPeptides[count,1:15]<-tabulation
1583 NumeratedPeptides[count,1:15]<-numeration
1584 }
1585 }
1586 }
1587 }
1588 }
1589 }
1590 }
1591 }
1592 }
1593 }
1594 }
1595 }
1596 }
1597 }
1598 }
1599 ####################################################################
1600 #now here I use the Endogenous Probabilty matrix from the previous script, which is called EMPtable
1601 #to score the created peptides
1602 ThisKinTable<-EPMtableu#[1:nrow(SDtable),]
1603 TKTcolumn<-c(data=rep(1,times=21))
1604 TKTcolumn<-as.matrix(TKTcolumn,ncol=1)
1605 ThisKinTable<-cbind(TKTcolumn,ThisKinTable)
1606
1607 ThisKinGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides))
1608 ThisKinGenWeirdScore<-rep(NA,times=nrow(GeneratedPeptides))
1609
1610 for (x in 1:nrow(GeneratedPeptides)){
1611 Scoringpeptide<-NumeratedPeptides[x,1:15]
1612 Scoringpeptide<-Scoringpeptide+1
1613 ThisKinTableScore<-as.numeric(ThisKinTable[Scoringpeptide[1],3])*ThisKinTable[as.numeric(Scoringpeptide[2]),4]*ThisKinTable[as.numeric(Scoringpeptide[3]),5]*
1614 ThisKinTable[as.numeric(Scoringpeptide[4]),6]*ThisKinTable[as.numeric(Scoringpeptide[5]),7]*ThisKinTable[as.numeric(Scoringpeptide[6]),8]*ThisKinTable[as.numeric(Scoringpeptide[7]),9]*
1615 #ThisKinTable[as.numeric(Scoringpeptide[8]),10]*
1616 ThisKinTable[as.numeric(Scoringpeptide[9]),11]*ThisKinTable[as.numeric(Scoringpeptide[10]),12]*ThisKinTable[as.numeric(Scoringpeptide[11]),13]*
1617 ThisKinTable[as.numeric(Scoringpeptide[12]),14]*ThisKinTable[as.numeric(Scoringpeptide[13]),15]*ThisKinTable[as.numeric(Scoringpeptide[14]),16]*ThisKinTable[as.numeric(Scoringpeptide[15]),17]
1618 ThisKinGeneratedScores[x]<-ThisKinTableScore
1619 ThisKinTableScore<-(ThisKinTableScore/(ThisKinTableScore+1/as.numeric(NormalizationScore[2])))
1620 ThisKinGenWeirdScore[x]<-ThisKinTableScore
1621 }
1622
1623 AblGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides))
1624 ArgGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides))
1625 BtkGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides))
1626 CskGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides))
1627 FynGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides))
1628 HckGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides))
1629 JAK2GeneratedScores<-rep(NA,times=nrow(GeneratedPeptides))
1630 LckGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides))
1631 LynGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides))
1632 Pyk2GeneratedScores<-rep(NA,times=nrow(GeneratedPeptides))
1633 SrcGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides))
1634 SykGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides))
1635 YesGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides))
1636
1637
1638 for (x in 1:nrow(GeneratedPeptides)){
1639 Scoringpeptide<-NumeratedPeptides[x,1:15]
1640 AblScore<-Abl[Scoringpeptide[1],2]*Abl[Scoringpeptide[2],3]*Abl[Scoringpeptide[3],4]*Abl[Scoringpeptide[4],5]*Abl[Scoringpeptide[5],6]*Abl[Scoringpeptide[6],7]*
1641 Abl[Scoringpeptide[7],8]*Abl[Scoringpeptide[9],10]*Abl[Scoringpeptide[10],11]*Abl[Scoringpeptide[11],12]*Abl[Scoringpeptide[12],13]*
1642 Abl[Scoringpeptide[13],14]*Abl[Scoringpeptide[14],15]*Abl[Scoringpeptide[15],16]
1643 AblGeneratedScores[x]<-AblScore
1644
1645 ArgScore<-Arg[Scoringpeptide[1],2]*Arg[Scoringpeptide[2],3]*Arg[Scoringpeptide[3],4]*Arg[Scoringpeptide[4],5]*Arg[Scoringpeptide[5],6]*Arg[Scoringpeptide[6],7]*
1646 Arg[Scoringpeptide[7],8]*Arg[Scoringpeptide[9],10]*Arg[Scoringpeptide[10],11]*Arg[Scoringpeptide[11],12]*Arg[Scoringpeptide[12],13]*
1647 Arg[Scoringpeptide[13],14]*Arg[Scoringpeptide[14],15]*Arg[Scoringpeptide[15],16]
1648 ArgGeneratedScores[x]<-ArgScore
1649
1650 BtkScore<-Btk[Scoringpeptide[1],2]*Btk[Scoringpeptide[2],3]*Btk[Scoringpeptide[3],4]*Btk[Scoringpeptide[4],5]*Btk[Scoringpeptide[5],6]*Btk[Scoringpeptide[6],7]*
1651 Btk[Scoringpeptide[7],8]*Btk[Scoringpeptide[9],10]*Btk[Scoringpeptide[10],11]*Btk[Scoringpeptide[11],12]*Btk[Scoringpeptide[12],13]*
1652 Btk[Scoringpeptide[13],14]*Btk[Scoringpeptide[14],15]*Btk[Scoringpeptide[15],16]
1653 BtkGeneratedScores[x]<-BtkScore
1654
1655 CskScore<-Csk[Scoringpeptide[1],2]*Csk[Scoringpeptide[2],3]*Csk[Scoringpeptide[3],4]*Csk[Scoringpeptide[4],5]*Csk[Scoringpeptide[5],6]*Csk[Scoringpeptide[6],7]*
1656 Csk[Scoringpeptide[7],8]*Csk[Scoringpeptide[9],10]*Csk[Scoringpeptide[10],11]*Csk[Scoringpeptide[11],12]*Csk[Scoringpeptide[12],13]*
1657 Csk[Scoringpeptide[13],14]*Csk[Scoringpeptide[14],15]*Csk[Scoringpeptide[15],16]
1658 CskGeneratedScores[x]<-CskScore
1659
1660 FynScore<-Fyn[Scoringpeptide[1],2]*Fyn[Scoringpeptide[2],3]*Fyn[Scoringpeptide[3],4]*Fyn[Scoringpeptide[4],5]*Fyn[Scoringpeptide[5],6]*Fyn[Scoringpeptide[6],7]*
1661 Fyn[Scoringpeptide[7],8]*Fyn[Scoringpeptide[9],10]*Fyn[Scoringpeptide[10],11]*Fyn[Scoringpeptide[11],12]*Fyn[Scoringpeptide[12],13]*
1662 Fyn[Scoringpeptide[13],14]*Fyn[Scoringpeptide[14],15]*Fyn[Scoringpeptide[15],16]
1663 FynGeneratedScores[x]<-FynScore
1664
1665 HckScore<-Hck[Scoringpeptide[1],2]*Hck[Scoringpeptide[2],3]*Hck[Scoringpeptide[3],4]*Hck[Scoringpeptide[4],5]*Hck[Scoringpeptide[5],6]*Hck[Scoringpeptide[6],7]*
1666 Hck[Scoringpeptide[7],8]*Hck[Scoringpeptide[9],10]*Hck[Scoringpeptide[10],11]*Hck[Scoringpeptide[11],12]*Hck[Scoringpeptide[12],13]*
1667 Hck[Scoringpeptide[13],14]*Hck[Scoringpeptide[14],15]*Hck[Scoringpeptide[15],16]
1668 HckGeneratedScores[x]<-HckScore
1669
1670 JAK2Score<-JAK2[Scoringpeptide[1],2]*JAK2[Scoringpeptide[2],3]*JAK2[Scoringpeptide[3],4]*JAK2[Scoringpeptide[4],5]*JAK2[Scoringpeptide[5],6]*JAK2[Scoringpeptide[6],7]*
1671 JAK2[Scoringpeptide[7],8]*JAK2[Scoringpeptide[9],10]*JAK2[Scoringpeptide[10],11]*JAK2[Scoringpeptide[11],12]*JAK2[Scoringpeptide[12],13]*
1672 JAK2[Scoringpeptide[13],14]*JAK2[Scoringpeptide[14],15]*JAK2[Scoringpeptide[15],16]
1673 JAK2GeneratedScores[x]<-JAK2Score
1674
1675 LckScore<-Lck[Scoringpeptide[1],2]*Lck[Scoringpeptide[2],3]*Lck[Scoringpeptide[3],4]*Lck[Scoringpeptide[4],5]*Lck[Scoringpeptide[5],6]*Lck[Scoringpeptide[6],7]*
1676 Lck[Scoringpeptide[7],8]*Lck[Scoringpeptide[9],10]*Lck[Scoringpeptide[10],11]*Lck[Scoringpeptide[11],12]*Lck[Scoringpeptide[12],13]*
1677 Lck[Scoringpeptide[13],14]*Lck[Scoringpeptide[14],15]*Lck[Scoringpeptide[15],16]
1678 LckGeneratedScores[x]<-LckScore
1679
1680 LynScore<-Lyn[Scoringpeptide[1],2]*Lyn[Scoringpeptide[2],3]*Lyn[Scoringpeptide[3],4]*Lyn[Scoringpeptide[4],5]*Lyn[Scoringpeptide[5],6]*Lyn[Scoringpeptide[6],7]*
1681 Lyn[Scoringpeptide[7],8]*Lyn[Scoringpeptide[9],10]*Lyn[Scoringpeptide[10],11]*Lyn[Scoringpeptide[11],12]*Lyn[Scoringpeptide[12],13]*
1682 Lyn[Scoringpeptide[13],14]*Lyn[Scoringpeptide[14],15]*Lyn[Scoringpeptide[15],16]
1683 LynGeneratedScores[x]<-LynScore
1684
1685 Pyk2Score<-Pyk2[Scoringpeptide[1],2]*Pyk2[Scoringpeptide[2],3]*Pyk2[Scoringpeptide[3],4]*Pyk2[Scoringpeptide[4],5]*Pyk2[Scoringpeptide[5],6]*Pyk2[Scoringpeptide[6],7]*
1686 Pyk2[Scoringpeptide[7],8]*Pyk2[Scoringpeptide[9],10]*Pyk2[Scoringpeptide[10],11]*Pyk2[Scoringpeptide[11],12]*Pyk2[Scoringpeptide[12],13]*
1687 Pyk2[Scoringpeptide[13],14]*Pyk2[Scoringpeptide[14],15]*Pyk2[Scoringpeptide[15],16]
1688 Pyk2GeneratedScores[x]<-Pyk2Score
1689
1690 SrcScore<-Src[Scoringpeptide[1],2]*Src[Scoringpeptide[2],3]*Src[Scoringpeptide[3],4]*Src[Scoringpeptide[4],5]*Src[Scoringpeptide[5],6]*Src[Scoringpeptide[6],7]*
1691 Src[Scoringpeptide[7],8]*Src[Scoringpeptide[9],10]*Src[Scoringpeptide[10],11]*Src[Scoringpeptide[11],12]*Src[Scoringpeptide[12],13]*
1692 Src[Scoringpeptide[13],14]*Src[Scoringpeptide[14],15]*Src[Scoringpeptide[15],16]
1693 SrcGeneratedScores[x]<-SrcScore
1694
1695 SykScore<-Syk[Scoringpeptide[1],2]*Syk[Scoringpeptide[2],3]*Syk[Scoringpeptide[3],4]*Syk[Scoringpeptide[4],5]*Syk[Scoringpeptide[5],6]*Syk[Scoringpeptide[6],7]*
1696 Syk[Scoringpeptide[7],8]*Syk[Scoringpeptide[9],10]*Syk[Scoringpeptide[10],11]*Syk[Scoringpeptide[11],12]*Syk[Scoringpeptide[12],13]*
1697 Syk[Scoringpeptide[13],14]*Syk[Scoringpeptide[14],15]*Syk[Scoringpeptide[15],16]
1698 SykGeneratedScores[x]<-SykScore
1699
1700 YesScore<-Yes[Scoringpeptide[1],2]*Yes[Scoringpeptide[2],3]*Yes[Scoringpeptide[3],4]*Yes[Scoringpeptide[4],5]*Yes[Scoringpeptide[5],6]*Yes[Scoringpeptide[6],7]*
1701 Yes[Scoringpeptide[7],8]*Yes[Scoringpeptide[9],10]*Yes[Scoringpeptide[10],11]*Yes[Scoringpeptide[11],12]*Yes[Scoringpeptide[12],13]*
1702 Yes[Scoringpeptide[13],14]*Yes[Scoringpeptide[14],15]*Yes[Scoringpeptide[15],16]
1703 YesGeneratedScores[x]<-YesScore
1704
1705 # ThisKinTableScore<-ThisKinTable[as.numeric(Scoringpeptide[1]),3]*ThisKinTable[as.numeric(Scoringpeptide[2]),4]*ThisKinTable[as.numeric(Scoringpeptide[3]),5]*
1706 # ThisKinTable[as.numeric(Scoringpeptide[4]),6]*ThisKinTable[as.numeric(Scoringpeptide[6]),8]*
1707 # ThisKinTable[as.numeric(Scoringpeptide[7]),9]*ThisKinTable[as.numeric(Scoringpeptide[8]),10]*ThisKinTable[as.numeric(Scoringpeptide[9]),11]
1708 # ThisKinGeneratedScores[x]<-ThisKinTableScore
1709 }
1710
1711
1712
1713 AblNorm<-1/as.numeric(Abl[22,1])
1714 AblThresh<-as.numeric(Abl[24,1])
1715 AblTrueThresh<-((AblThresh*AblNorm)/(100-AblThresh))
1716 AblActive<-unlist(AblGeneratedScores)>AblTrueThresh
1717
1718 ArgNorm<-1/as.numeric(Arg[22,1])
1719 ArgThresh<-as.numeric(Arg[24,1])
1720 ArgTrueThresh<-((ArgThresh*ArgNorm)/(100-ArgThresh))
1721 ArgActive<-unlist(ArgGeneratedScores)>ArgTrueThresh
1722
1723 BtkNorm<-1/as.numeric(Btk[22,1])
1724 BtkThresh<-as.numeric(Btk[24,1])
1725 BtkTrueThresh<-((BtkThresh*BtkNorm)/(100-BtkThresh))
1726 BtkActive<-unlist(BtkGeneratedScores)>BtkTrueThresh
1727
1728 CskNorm<-1/as.numeric(Csk[22,1])
1729 CskThresh<-as.numeric(Csk[24,1])
1730 CskTrueThresh<-((CskThresh*CskNorm)/(100-CskThresh))
1731 CskActive<-(CskGeneratedScores)>CskTrueThresh
1732
1733 FynNorm<-1/as.numeric(Fyn[22,1])
1734 FynThresh<-as.numeric(Fyn[24,1])
1735 FynTrueThresh<-((FynThresh*FynNorm)/(100-FynThresh))
1736 FynActive<-unlist(FynGeneratedScores)>FynTrueThresh
1737
1738 HckNorm<-1/as.numeric(Hck[22,1])
1739 HckThresh<-as.numeric(Hck[24,1])
1740 HckTrueThresh<-((HckThresh*HckNorm)/(100-HckThresh))
1741 HckActive<-unlist(HckGeneratedScores)>HckTrueThresh
1742
1743 JAK2Norm<-1/as.numeric(JAK2[22,1])
1744 JAK2Thresh<-as.numeric(JAK2[24,1])
1745 JAK2TrueThresh<-((JAK2Thresh*JAK2Norm)/(100-JAK2Thresh))
1746 JAk2Active<-unlist(JAK2GeneratedScores)>JAK2TrueThresh
1747
1748 LckNorm<-1/as.numeric(Lck[22,1])
1749 LckThresh<-as.numeric(Lck[24,1])
1750 LckTrueThresh<-((LckThresh*LckNorm)/(100-LckThresh))
1751 LckActive<-unlist(LckGeneratedScores)>LckTrueThresh
1752
1753 LynNorm<-1/as.numeric(Lyn[22,1])
1754 LynThresh<-as.numeric(Lyn[24,1])
1755 LynTrueThresh<-((LynThresh*LynNorm)/(100-LynThresh))
1756 LynActive<-unlist(LynGeneratedScores)>LynTrueThresh
1757
1758 Pyk2Norm<-1/as.numeric(Pyk2[22,1])
1759 Pyk2Thresh<-as.numeric(Pyk2[24,1])
1760 Pyk2TrueThresh<-((Pyk2Thresh*Pyk2Norm)/(100-Pyk2Thresh))
1761 Pyk2Active<-unlist(Pyk2GeneratedScores)>Pyk2TrueThresh
1762
1763 SrcNorm<-1/as.numeric(Src[22,1])
1764 SrcThresh<-as.numeric(Src[24,1])
1765 SrcTrueThresh<-((SrcThresh*SrcNorm)/(100-SrcThresh))
1766 SrcActive<-unlist(SrcGeneratedScores)>SrcTrueThresh
1767
1768 SykNorm<-1/as.numeric(Syk[22,1])
1769 SykThresh<-as.numeric(Syk[24,1])
1770 SykTrueThresh<-((SykThresh*SykNorm)/(100-SykThresh))
1771 SykActive<-unlist(SykGeneratedScores)>SykTrueThresh
1772
1773 YesNorm<-1/as.numeric(Yes[22,1])
1774 YesThresh<-as.numeric(Yes[24,1])
1775 YesTrueThresh<-((YesThresh*YesNorm)/(100-YesThresh))
1776 YesActive<-unlist(YesGeneratedScores)>YesTrueThresh
1777
1778 AllActive<-AblActive+ArgActive+BtkActive+CskActive+FynActive+HckActive+JAk2Active+LckActive+LynActive+Pyk2Active+SrcActive+SykActive+YesActive
1779 #Btkactive+
1780
1781 Scores<-ThisKinGeneratedScores
1782 ThresholdValues<-ThisKinGenWeirdScore
1783
1784 FullMotifs<-rep("Z",times=nrow(GeneratedPeptides))
1785 for (i in 1:nrow(GeneratedPeptides)) {
1786 motif<-GeneratedPeptides[i,1:15]
1787 motif<-paste(motif,sep = "", collapse = "")
1788 FullMotifs[i]<-motif
1789 }
1790
1791 PeptidesWithRanks<-cbind.data.frame(FullMotifs,GeneratedPeptides,Scores,ThresholdValues)
1792 PeptidesWithRanks<-cbind.data.frame(PeptidesWithRanks,AllActive,AblActive,ArgActive,BtkActive,CskActive,FynActive,HckActive,JAk2Active,LckActive,LynActive,Pyk2Active,SrcActive,SykActive,YesActive)
1793 RanksPeptides<-PeptidesWithRanks[order(PeptidesWithRanks$AllActive,decreasing = FALSE),]
1794 # PepRankHead<-c(1:9,"Sequence","RPMS","PMS")
1795 # RanksPeptides<-rbind.data.frame(PepRankHead,PeptidesWithRanks)
1796 #head(RanksPeptides)
1797
1798
1799 #now I have to score the negative sequences... for some reason
1800 #write up how we transfect with lipofectamine
1801 #3,4,5 questions
1802
1803 #PAUSED EHRE AT 4:50, HOPING THAT FIXING MINERVOTHING SO THAT LEFT SPACES WORKS FIXES A THING. OTHERWISE
1804 #I FUCKED WITH THE MCC TABLE AND NEED TO FINISH IT
1805
1806 ThisKinBlanks<-rep(1,times=17)
1807 #indx <- sapply(breast, is.factor)
1808 #ThisKinTable[indx] <- lapply(ThisKinTable[indx], function(x) as.character(x))
1809 ThisKinTable$SetOfAAs<-as.character(ThisKinTable$SetOfAAs)
1810
1811 #ThisKinTest<-rbind.data.frame(ThisKinTable,ThisKinBlanks)
1812 ThisKinTable<-rbind.data.frame(ThisKinTable,ThisKinBlanks)
1813
1814 NegativeScores<-rep(NA,times=nrow(NegativeSubstrateList))
1815 NegativeWeirdScores<-rep(NA,times=nrow(NegativeSubstrateList))
1816 for (v in 1:nrow(NegativeSubstrateList)) {
1817 motif<-NegativeSubstrateList[v,2]
1818 motif<-unlist(strsplit(motif,""))
1819 #if (length(motif)<9){print(v)}}
1820 # motif[1] <- sapply(motif[1], function (x) aa_props[x])
1821 # motif[2] <- sapply(motif[2], function (x) aa_props[x])
1822 # motif[3] <- sapply(motif[3], function (x) aa_props[x])
1823 # motif[4] <- sapply(motif[4], function (x) aa_props[x])
1824 # motif[5] <- sapply(motif[5], function (x) aa_props[x])
1825 # motif[6] <- sapply(motif[6], function (x) aa_props[x])
1826 # motif[7] <- sapply(motif[7], function (x) aa_props[x])
1827 # motif[8] <- sapply(motif[8], function (x) aa_props[x])
1828 # motif[9] <- sapply(motif[9], function (x) aa_props[x])
1829 motif<- gsub(" ","O",motif)
1830 motif <- sapply(motif, function (x) aa_props[x])
1831 Scoringpeptide<-motif
1832 Scoringpeptide<-Scoringpeptide+1
1833 ThisKinTableScore<-as.numeric(ThisKinTable[Scoringpeptide[1],3])*ThisKinTable[as.numeric(Scoringpeptide[2]),4]*ThisKinTable[as.numeric(Scoringpeptide[3]),5]*
1834 ThisKinTable[as.numeric(Scoringpeptide[4]),6]*ThisKinTable[as.numeric(Scoringpeptide[5]),7]*ThisKinTable[as.numeric(Scoringpeptide[6]),8]*ThisKinTable[as.numeric(Scoringpeptide[7]),9]*
1835 #ThisKinTable[as.numeric(Scoringpeptide[8]),10]*
1836 ThisKinTable[as.numeric(Scoringpeptide[9]),11]*ThisKinTable[as.numeric(Scoringpeptide[10]),12]*ThisKinTable[as.numeric(Scoringpeptide[11]),13]*
1837 ThisKinTable[as.numeric(Scoringpeptide[12]),14]*ThisKinTable[as.numeric(Scoringpeptide[13]),15]*ThisKinTable[as.numeric(Scoringpeptide[14]),16]*ThisKinTable[as.numeric(Scoringpeptide[15]),17]
1838 NegativeScores[v]<-ThisKinTableScore
1839 ThisKinTableScore<-(ThisKinTableScore/(ThisKinTableScore+1/as.numeric(NormalizationScore[2])))
1840 NegativeWeirdScores[v]<-ThisKinTableScore*100
1841 }
1842
1843 negativesubstrates<-NegativeSubstrateList[,2]
1844 NegativeWithScores<-cbind(negativesubstrates,as.character(NegativeScores),as.character(NegativeWeirdScores))
1845
1846
1847 #NEED TO HAVE THE NEGATIVE SUBSTRATES BE OUTPUTTED
1848
1849 PositiveScores<-rep(NA,times=nrow(ImportedSubstrateList))
1850 PositiveWeirdScores<-rep(NA,times=nrow(ImportedSubstrateList))
1851
1852 for (v in 1:nrow(ImportedSubstrateList)) {
1853 motif<-ImportedSubstrateList[v,4:18]
1854 motif<-unlist(motif)
1855 motif<- gsub("^$","O",motif)
1856 motif <- sapply(motif, function (x) aa_props[x])
1857 Scoringpeptide<-motif
1858 Scoringpeptide<-Scoringpeptide+1
1859 ThisKinTableScore<-as.numeric(ThisKinTable[Scoringpeptide[1],3])*ThisKinTable[as.numeric(Scoringpeptide[2]),4]*ThisKinTable[as.numeric(Scoringpeptide[3]),5]*
1860 ThisKinTable[as.numeric(Scoringpeptide[4]),6]*ThisKinTable[as.numeric(Scoringpeptide[5]),7]*ThisKinTable[as.numeric(Scoringpeptide[6]),8]*ThisKinTable[as.numeric(Scoringpeptide[7]),9]*
1861 #ThisKinTable[as.numeric(Scoringpeptide[8]),10]*
1862 ThisKinTable[as.numeric(Scoringpeptide[9]),11]*ThisKinTable[as.numeric(Scoringpeptide[10]),12]*ThisKinTable[as.numeric(Scoringpeptide[11]),13]*
1863 ThisKinTable[as.numeric(Scoringpeptide[12]),14]*ThisKinTable[as.numeric(Scoringpeptide[13]),15]*ThisKinTable[as.numeric(Scoringpeptide[14]),16]*ThisKinTable[as.numeric(Scoringpeptide[15]),17]
1864
1865 PositiveScores[v]<-ThisKinTableScore
1866 ThisKinTableScore<-(ThisKinTableScore/(ThisKinTableScore+1/as.numeric(NormalizationScore[2])))
1867 PositiveWeirdScores[v]<-ThisKinTableScore*100
1868 }
1869
1870 positivesubstrates<-ImportedSubstrateList[,4:18]
1871 positivewithscores<-cbind.data.frame(positivesubstrates,PositiveScores,PositiveWeirdScores)
1872
1873
1874 #write down the transient transfection SOP and what we will be doing with them
1875 #write down the vector names I will be using
1876 #write down something about transforming bacteria and with what
1877
1878 #90% whatevernness
1879 # TPninetyone<-length(PositiveWeirdScores[PositiveWeirdScores>=0.91])
1880 # Senseninetyone<-TPninetyone/nrow(positivesubstrates)
1881 #
1882 # TNninetyone<-length(NegativeWeirdScores[NegativeWeirdScores<91])
1883 # Specninetyone<-TNninetyone/100
1884
1885 #create the MCC table
1886
1887 threshold<-c(1:100)
1888 threshold<-order(threshold,decreasing = TRUE)
1889
1890 Truepositives<-c(1:100)
1891 Falsenegatives<-c(1:100)
1892 Sensitivity<-c(1:100)
1893 TrueNegatives<-c(1:100)
1894 FalsePositives<-c(1:100)
1895 Specificity<-c(1:100)
1896 Accuracy<-c(1:100)
1897 MCC<-c(1:100)
1898 EER<-c(1:100)
1899
1900 #MAKE DAMN SURE THAT THE ACCESSION NUMBERS FOLLOW THE MOTIFS
1901
1902 for (z in 1:100) {
1903 thres<-101-z
1904 Truepositives[z]<-length(PositiveWeirdScores[PositiveWeirdScores>=(thres)])
1905 Falsenegatives[z]<-nrow(positivesubstrates)-Truepositives[z]
1906 Sensitivity[z]<-Truepositives[z]/(Falsenegatives[z]+Truepositives[z])
1907 TrueNegatives[z]<-length(NegativeWeirdScores[NegativeWeirdScores<(thres)])
1908 # at thresh 100 this should be 0, because it is total minus true negatives
1909 FalsePositives[z]<-nrow(NegativeSubstrateList)-TrueNegatives[z]
1910 Specificity[z]<-1-(TrueNegatives[z]/(FalsePositives[z]+TrueNegatives[z]))
1911 Accuracy[z]<-100*(Truepositives[z]+TrueNegatives[z])/(Falsenegatives[z]+FalsePositives[z]+TrueNegatives[z]+Truepositives[z])
1912 MCC[z]<-((Truepositives[z]+TrueNegatives[z])-(Falsenegatives[z]+FalsePositives[z]))/sqrt(round(round(Truepositives[z]+Falsenegatives[z])*round(TrueNegatives[z]+FalsePositives[z])*round(Truepositives[z]+FalsePositives[z])*round(TrueNegatives[z]+Falsenegatives[z])))
1913 EER[z]<-.01*(((1-(Sensitivity[z]))*(Truepositives[z]+Falsenegatives[z]))+(Specificity[z]*(1-(Truepositives[z]+Falsenegatives[z]))))
1914 }
1915 Characterization<-cbind.data.frame(threshold,Truepositives,Falsenegatives,Sensitivity,TrueNegatives,FalsePositives,Specificity,Accuracy,MCC,EER)
1916
1917 positiveheader<-c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,"RPMS","PMS")
1918 positivewithscores<-rbind.data.frame(positiveheader,positivewithscores)
1919
1920 negativeheader<-c("Substrate","RPMS","PMS")
1921 colnames(NegativeWithScores)<-negativeheader
1922
1923 # write.xlsx(NegativeWithScores,file = FILENAME, sheetName = "Negative Sequences Scored",col.names = TRUE,row.names = FALSE,append = TRUE)
1924 # write.xlsx(Characterization,file = FILENAME,sheetName = "Characterization Table",col.names = TRUE,row.names = FALSE,append = TRUE)
1925 # write.xlsx(RanksPeptides,file = FILENAME,sheetName = "Ranked Generated Peptides",col.names = FALSE,row.names = FALSE,append = TRUE)
1926 # write.xlsx(positivewithscores,file = FILENAME, sheetName = "Positive Sequences Scored",col.names = FALSE,row.names = FALSE,append = TRUE)
1927 write.table(x=c("Characterzation Table"),file = FILENAME2, col.names = FALSE,row.names = FALSE, append = TRUE,sep = ",")
1928 header<-colnames(Characterization)
1929 Characterization<-rbind.data.frame(header,Characterization)
1930 write.table(Characterization,file = FILENAME2, col.names = FALSE,row.names = FALSE, append = TRUE,sep = ",")
1931
1932 # header<-colnames(RanksPeptides)
1933 # RanksPeptides<-rbind.data.frame(header,RanksPeptides)
1934 write.table(RanksPeptides,file = FILENAME3,append = FALSE,row.names = FALSE,col.names = TRUE,sep = ",")