comparison kinatestid_r/Kinatest-R_part1.R @ 4:2f3df9b1c05b draft

Uploaded
author jfb
date Tue, 06 Feb 2018 17:16:05 -0500
parents
children bfbbed93df02
comparison
equal deleted inserted replaced
3:65f235b5fe14 4:2f3df9b1c05b
1 this.dir <- dirname(parent.frame(2)$ofile)
2 setwd(this.dir)
3
4
5 ImportedSubstrateList<- read.csv(input1, stringsAsFactors=FALSE)
6 NegativeSubstrateList<- read.csv(input2, stringsAsFactors=FALSE)
7 SubstrateBackgroundFrequency<- read.csv(input3, stringsAsFactors=FALSE)
8
9 ScreenerFilename<-screener
10
11
12
13 FILENAME<-"output1"
14 FILENAME2<-"output2"
15 FILENAME3<-"output3"
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40 OutputMatrix<-"KinaseMatrix.csv"
41 CharacterizationTable<-"CharacterizationTableForThisKinase.csv"
42 SDtable<-"SDtableforthisKinase"
43 SiteSelectivityTable<-"SiteSelectivityForThisKinase"
44
45
46
47 substrates<-matrix(rep("A",times=((nrow(ImportedSubstrateList)-1)*15)),ncol = 15)
48 #SeqsToBeScored<-"asdasd"
49
50 for (i in 2:nrow(ImportedSubstrateList))
51 {
52 substratemotif<-ImportedSubstrateList[i,4:18]
53 substratemotif[8]<-"Y"
54 #substratemotif<-paste(substratemotif,sep = "",collapse = "")
55 j=i-1
56 substratemotif<-unlist(substratemotif)
57 substrates[j,1:15]<-substratemotif
58 }
59
60 # SpacesToOs<-c(""="O",)
61 # substrates<-SpacesToOs[substrates]
62
63 SubstrateBackgroundFrequency[nrow(SubstrateBackgroundFrequency),2]
64
65 if(2==2){
66 Amean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),2]), na.rm=TRUE)
67 Cmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),3]), na.rm=TRUE)
68 Dmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),4]), na.rm=TRUE)
69 Emean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),5]), na.rm=TRUE)
70 Fmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),6]), na.rm=TRUE)
71 Gmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),7]), na.rm=TRUE)
72 Hmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),8]), na.rm=TRUE)
73 Imean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),9]), na.rm=TRUE)
74 Kmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),10]), na.rm=TRUE)
75 Lmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),11]), na.rm=TRUE)
76 Mmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),12]), na.rm=TRUE)
77 Nmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),13]), na.rm=TRUE)
78 Pmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),14]), na.rm=TRUE)
79 Qmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),15]), na.rm=TRUE)
80 Rmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),16]), na.rm=TRUE)
81 Smean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),17]), na.rm=TRUE)
82 Tmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),18]), na.rm=TRUE)
83 Vmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),19]), na.rm=TRUE)
84 Wmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),20]), na.rm=TRUE)
85 Ymean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),21]), na.rm=TRUE)
86
87 AllMeans<-c(Amean,Cmean,Dmean,Emean,Fmean,Gmean,Hmean,Imean,Kmean,Lmean,Mmean,Nmean,Pmean,Qmean,Rmean,Smean,Tmean,Vmean,Wmean,Ymean)
88
89 Asd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),2]), na.rm=TRUE)
90 Csd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),3]), na.rm=TRUE)
91 Dsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),4]), na.rm=TRUE)
92 Esd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),5]), na.rm=TRUE)
93 Fsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),6]), na.rm=TRUE)
94 Gsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),7]), na.rm=TRUE)
95 Hsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),8]), na.rm=TRUE)
96 Isd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),9]), na.rm=TRUE)
97 Ksd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),10]), na.rm=TRUE)
98 Lsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),11]), na.rm=TRUE)
99 Msd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),12]), na.rm=TRUE)
100 Nsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),13]), na.rm=TRUE)
101 Psd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),14]), na.rm=TRUE)
102 Qsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),15]), na.rm=TRUE)
103 Rsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),16]), na.rm=TRUE)
104 Ssd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),17]), na.rm=TRUE)
105 Tsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),18]), na.rm=TRUE)
106 Vsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),19]), na.rm=TRUE)
107 Wsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),20]), na.rm=TRUE)
108 Ysd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),21]), na.rm=TRUE)
109 }
110 AllSDs<-c(Asd,Csd,Dsd,Esd,Fsd,Gsd,Hsd,Isd,Ksd,Lsd,Msd,Nsd,Psd,Qsd,Rsd,Ssd,Tsd,Vsd,Wsd,Ysd)
111 #this is subbackfreq SDs
112
113 SBF_statisticalvalues<-cbind(AllMeans,AllSDs)
114
115 #create the percent table
116 if (1==1){
117 Column1<-substrates[,1]
118 Column2<-substrates[,2]
119 Column3<-substrates[,3]
120 Column4<-substrates[,4]
121 Column5<-substrates[,5]
122 Column6<-substrates[,6]
123 Column7<-substrates[,7]
124 Column8<-substrates[,8]
125 Column9<-substrates[,9]
126 Column10<-substrates[,10]
127 Column11<-substrates[,11]
128 Column12<-substrates[,12]
129 Column13<-substrates[,13]
130 Column14<-substrates[,14]
131 Column15<-substrates[,15]
132
133 spaces1<-sum((Column1%in% ""))
134 spaces2<-sum(Column2%in% "")
135 spaces3<-sum(Column3%in% "")
136 spaces4<-sum(Column4%in% "")
137 spaces5<-sum(Column5%in% "")
138 spaces6<-sum(Column6%in% "")
139 spaces7<-sum(Column7%in% "")
140 spaces8<-sum(Column8%in% "")
141 spaces9<-sum(Column9%in% "")
142 spaces10<-sum(Column10%in% "")
143 spaces11<-sum(Column11%in% "")
144 spaces12<-sum(Column12%in% "")
145 spaces13<-sum(Column13%in% "")
146 spaces14<-sum(Column14%in% "")
147 spaces15<-sum(Column15%in% "")
148
149 A1<-sum(Column1 %in% "A")/(length(Column1)-spaces1)
150 A2<-sum(Column2 %in% "A")/(length(Column2)-spaces2)
151 A3<-sum(Column3 %in% "A")/(length(Column3)-spaces3)
152 A4<-sum(Column4 %in% "A")/(length(Column4)-spaces4)
153 A5<-sum(Column5 %in% "A")/(length(Column5)-spaces5)
154 A6<-sum(Column6 %in% "A")/(length(Column6)-spaces6)
155 A7<-sum(Column7 %in% "A")/(length(Column7)-spaces7)
156 A8<-sum(Column8 %in% "A")/(length(Column8)-spaces8)
157 A9<-sum(Column9 %in% "A")/(length(Column9)-spaces9)
158 A10<-sum(Column10 %in% "A")/(length(Column10)-spaces10)
159 A11<-sum(Column11 %in% "A")/(length(Column11)-spaces11)
160 A12<-sum(Column12 %in% "A")/(length(Column12)-spaces12)
161 A13<-sum(Column13 %in% "A")/(length(Column13)-spaces13)
162 A14<-sum(Column14 %in% "A")/(length(Column14)-spaces14)
163 A15<-sum(Column15 %in% "A")/(length(Column15)-spaces15)
164 AllAs<-cbind(A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15)
165
166 C1<-sum(Column1 %in% "C")/(length(Column1)-spaces1)
167 C2<-sum(Column2 %in% "C")/(length(Column2)-spaces2)
168 C3<-sum(Column3 %in% "C")/(length(Column3)-spaces3)
169 C4<-sum(Column4 %in% "C")/(length(Column4)-spaces4)
170 C5<-sum(Column5 %in% "C")/(length(Column5)-spaces5)
171 C6<-sum(Column6 %in% "C")/(length(Column6)-spaces6)
172 C7<-sum(Column7 %in% "C")/(length(Column7)-spaces7)
173 C8<-sum(Column8 %in% "C")/(length(Column8)-spaces8)
174 C9<-sum(Column9 %in% "C")/(length(Column9)-spaces9)
175 C10<-sum(Column10 %in% "C")/(length(Column10)-spaces10)
176 C11<-sum(Column11 %in% "C")/(length(Column11)-spaces11)
177 C12<-sum(Column12 %in% "C")/(length(Column12)-spaces12)
178 C13<-sum(Column13 %in% "C")/(length(Column13)-spaces13)
179 C14<-sum(Column14 %in% "C")/(length(Column14)-spaces14)
180 C15<-sum(Column15 %in% "C")/(length(Column15)-spaces15)
181 CllCs<-cbind(C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15)
182
183 D1<-sum(Column1 %in% "D")/(length(Column1)-spaces1)
184 D2<-sum(Column2 %in% "D")/(length(Column2)-spaces2)
185 D3<-sum(Column3 %in% "D")/(length(Column3)-spaces3)
186 D4<-sum(Column4 %in% "D")/(length(Column4)-spaces4)
187 D5<-sum(Column5 %in% "D")/(length(Column5)-spaces5)
188 D6<-sum(Column6 %in% "D")/(length(Column6)-spaces6)
189 D7<-sum(Column7 %in% "D")/(length(Column7)-spaces7)
190 D8<-sum(Column8 %in% "D")/(length(Column8)-spaces8)
191 D9<-sum(Column9 %in% "D")/(length(Column9)-spaces9)
192 D10<-sum(Column10 %in% "D")/(length(Column10)-spaces10)
193 D11<-sum(Column11 %in% "D")/(length(Column11)-spaces11)
194 D12<-sum(Column12 %in% "D")/(length(Column12)-spaces12)
195 D13<-sum(Column13 %in% "D")/(length(Column13)-spaces13)
196 D14<-sum(Column14 %in% "D")/(length(Column14)-spaces14)
197 D15<-sum(Column15 %in% "D")/(length(Column15)-spaces15)
198 DllDs<-cbind(D1,D2,D3,D4,D5,D6,D7,D8,D9,D10,D11,D12,D13,D14,D15)
199
200 E1<-sum(Column1 %in% "E")/(length(Column1)-spaces1)
201 E2<-sum(Column2 %in% "E")/(length(Column2)-spaces2)
202 E3<-sum(Column3 %in% "E")/(length(Column3)-spaces3)
203 E4<-sum(Column4 %in% "E")/(length(Column4)-spaces4)
204 E5<-sum(Column5 %in% "E")/(length(Column5)-spaces5)
205 E6<-sum(Column6 %in% "E")/(length(Column6)-spaces6)
206 E7<-sum(Column7 %in% "E")/(length(Column7)-spaces7)
207 E8<-sum(Column8 %in% "E")/(length(Column8)-spaces8)
208 E9<-sum(Column9 %in% "E")/(length(Column9)-spaces9)
209 E10<-sum(Column10 %in% "E")/(length(Column10)-spaces10)
210 E11<-sum(Column11 %in% "E")/(length(Column11)-spaces11)
211 E12<-sum(Column12 %in% "E")/(length(Column12)-spaces12)
212 E13<-sum(Column13 %in% "E")/(length(Column13)-spaces13)
213 E14<-sum(Column14 %in% "E")/(length(Column14)-spaces14)
214 E15<-sum(Column15 %in% "E")/(length(Column15)-spaces15)
215 EllEs<-cbind(E1,E2,E3,E4,E5,E6,E7,E8,E9,E10,E11,E12,E13,E14,E15)
216
217
218 F1<-sum(Column1 %in% "F")/(length(Column1)-spaces1)
219 F2<-sum(Column2 %in% "F")/(length(Column2)-spaces2)
220 F3<-sum(Column3 %in% "F")/(length(Column3)-spaces3)
221 F4<-sum(Column4 %in% "F")/(length(Column4)-spaces4)
222 F5<-sum(Column5 %in% "F")/(length(Column5)-spaces5)
223 F6<-sum(Column6 %in% "F")/(length(Column6)-spaces6)
224 F7<-sum(Column7 %in% "F")/(length(Column7)-spaces7)
225 F8<-sum(Column8 %in% "F")/(length(Column8)-spaces8)
226 F9<-sum(Column9 %in% "F")/(length(Column9)-spaces9)
227 F10<-sum(Column10 %in% "F")/(length(Column10)-spaces10)
228 F11<-sum(Column11 %in% "F")/(length(Column11)-spaces11)
229 F12<-sum(Column12 %in% "F")/(length(Column12)-spaces12)
230 F13<-sum(Column13 %in% "F")/(length(Column13)-spaces13)
231 F14<-sum(Column14 %in% "F")/(length(Column14)-spaces14)
232 F15<-sum(Column15 %in% "F")/(length(Column15)-spaces15)
233 FllFs<-cbind(F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15)
234
235
236 G1<-sum(Column1 %in% "G")/(length(Column1)-spaces1)
237 G2<-sum(Column2 %in% "G")/(length(Column2)-spaces2)
238 G3<-sum(Column3 %in% "G")/(length(Column3)-spaces3)
239 G4<-sum(Column4 %in% "G")/(length(Column4)-spaces4)
240 G5<-sum(Column5 %in% "G")/(length(Column5)-spaces5)
241 G6<-sum(Column6 %in% "G")/(length(Column6)-spaces6)
242 G7<-sum(Column7 %in% "G")/(length(Column7)-spaces7)
243 G8<-sum(Column8 %in% "G")/(length(Column8)-spaces8)
244 G9<-sum(Column9 %in% "G")/(length(Column9)-spaces9)
245 G10<-sum(Column10 %in% "G")/(length(Column10)-spaces10)
246 G11<-sum(Column11 %in% "G")/(length(Column11)-spaces11)
247 G12<-sum(Column12 %in% "G")/(length(Column12)-spaces12)
248 G13<-sum(Column13 %in% "G")/(length(Column13)-spaces13)
249 G14<-sum(Column14 %in% "G")/(length(Column14)-spaces14)
250 G15<-sum(Column15 %in% "G")/(length(Column15)-spaces15)
251 GllGs<-cbind(G1,G2,G3,G4,G5,G6,G7,G8,G9,G10,G11,G12,G13,G14,G15)
252
253
254 H1<-sum(Column1 %in% "H")/(length(Column1)-spaces1)
255 H2<-sum(Column2 %in% "H")/(length(Column2)-spaces2)
256 H3<-sum(Column3 %in% "H")/(length(Column3)-spaces3)
257 H4<-sum(Column4 %in% "H")/(length(Column4)-spaces4)
258 H5<-sum(Column5 %in% "H")/(length(Column5)-spaces5)
259 H6<-sum(Column6 %in% "H")/(length(Column6)-spaces6)
260 H7<-sum(Column7 %in% "H")/(length(Column7)-spaces7)
261 H8<-sum(Column8 %in% "H")/(length(Column8)-spaces8)
262 H9<-sum(Column9 %in% "H")/(length(Column9)-spaces9)
263 H10<-sum(Column10 %in% "H")/(length(Column10)-spaces10)
264 H11<-sum(Column11 %in% "H")/(length(Column11)-spaces11)
265 H12<-sum(Column12 %in% "H")/(length(Column12)-spaces12)
266 H13<-sum(Column13 %in% "H")/(length(Column13)-spaces13)
267 H14<-sum(Column14 %in% "H")/(length(Column14)-spaces14)
268 H15<-sum(Column15 %in% "H")/(length(Column15)-spaces15)
269 HllHs<-cbind(H1,H2,H3,H4,H5,H6,H7,H8,H9,H10,H11,H12,H13,H14,H15)
270
271
272 I1<-sum(Column1 %in% "I")/(length(Column1)-spaces1)
273 I2<-sum(Column2 %in% "I")/(length(Column2)-spaces2)
274 I3<-sum(Column3 %in% "I")/(length(Column3)-spaces3)
275 I4<-sum(Column4 %in% "I")/(length(Column4)-spaces4)
276 I5<-sum(Column5 %in% "I")/(length(Column5)-spaces5)
277 I6<-sum(Column6 %in% "I")/(length(Column6)-spaces6)
278 I7<-sum(Column7 %in% "I")/(length(Column7)-spaces7)
279 I8<-sum(Column8 %in% "I")/(length(Column8)-spaces8)
280 I9<-sum(Column9 %in% "I")/(length(Column9)-spaces9)
281 I10<-sum(Column10 %in% "I")/(length(Column10)-spaces10)
282 I11<-sum(Column11 %in% "I")/(length(Column11)-spaces11)
283 I12<-sum(Column12 %in% "I")/(length(Column12)-spaces12)
284 I13<-sum(Column13 %in% "I")/(length(Column13)-spaces13)
285 I14<-sum(Column14 %in% "I")/(length(Column14)-spaces14)
286 I15<-sum(Column15 %in% "I")/(length(Column15)-spaces15)
287 IllIs<-cbind(I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,I11,I12,I13,I14,I15)
288
289
290 K1<-sum(Column1 %in% "K")/(length(Column1)-spaces1)
291 K2<-sum(Column2 %in% "K")/(length(Column2)-spaces2)
292 K3<-sum(Column3 %in% "K")/(length(Column3)-spaces3)
293 K4<-sum(Column4 %in% "K")/(length(Column4)-spaces4)
294 K5<-sum(Column5 %in% "K")/(length(Column5)-spaces5)
295 K6<-sum(Column6 %in% "K")/(length(Column6)-spaces6)
296 K7<-sum(Column7 %in% "K")/(length(Column7)-spaces7)
297 K8<-sum(Column8 %in% "K")/(length(Column8)-spaces8)
298 K9<-sum(Column9 %in% "K")/(length(Column9)-spaces9)
299 K10<-sum(Column10 %in% "K")/(length(Column10)-spaces10)
300 K11<-sum(Column11 %in% "K")/(length(Column11)-spaces11)
301 K12<-sum(Column12 %in% "K")/(length(Column12)-spaces12)
302 K13<-sum(Column13 %in% "K")/(length(Column13)-spaces13)
303 K14<-sum(Column14 %in% "K")/(length(Column14)-spaces14)
304 K15<-sum(Column15 %in% "K")/(length(Column15)-spaces15)
305 KllKs<-cbind(K1,K2,K3,K4,K5,K6,K7,K8,K9,K10,K11,K12,K13,K14,K15)
306
307
308 L1<-sum(Column1 %in% "L")/(length(Column1)-spaces1)
309 L2<-sum(Column2 %in% "L")/(length(Column2)-spaces2)
310 L3<-sum(Column3 %in% "L")/(length(Column3)-spaces3)
311 L4<-sum(Column4 %in% "L")/(length(Column4)-spaces4)
312 L5<-sum(Column5 %in% "L")/(length(Column5)-spaces5)
313 L6<-sum(Column6 %in% "L")/(length(Column6)-spaces6)
314 L7<-sum(Column7 %in% "L")/(length(Column7)-spaces7)
315 L8<-sum(Column8 %in% "L")/(length(Column8)-spaces8)
316 L9<-sum(Column9 %in% "L")/(length(Column9)-spaces9)
317 L10<-sum(Column10 %in% "L")/(length(Column10)-spaces10)
318 L11<-sum(Column11 %in% "L")/(length(Column11)-spaces11)
319 L12<-sum(Column12 %in% "L")/(length(Column12)-spaces12)
320 L13<-sum(Column13 %in% "L")/(length(Column13)-spaces13)
321 L14<-sum(Column14 %in% "L")/(length(Column14)-spaces14)
322 L15<-sum(Column15 %in% "L")/(length(Column15)-spaces15)
323 LllLs<-cbind(L1,L2,L3,L4,L5,L6,L7,L8,L9,L10,L11,L12,L13,L14,L15)
324
325
326 M1<-sum(Column1 %in% "M")/(length(Column1)-spaces1)
327 M2<-sum(Column2 %in% "M")/(length(Column2)-spaces2)
328 M3<-sum(Column3 %in% "M")/(length(Column3)-spaces3)
329 M4<-sum(Column4 %in% "M")/(length(Column4)-spaces4)
330 M5<-sum(Column5 %in% "M")/(length(Column5)-spaces5)
331 M6<-sum(Column6 %in% "M")/(length(Column6)-spaces6)
332 M7<-sum(Column7 %in% "M")/(length(Column7)-spaces7)
333 M8<-sum(Column8 %in% "M")/(length(Column8)-spaces8)
334 M9<-sum(Column9 %in% "M")/(length(Column9)-spaces9)
335 M10<-sum(Column10 %in% "M")/(length(Column10)-spaces10)
336 M11<-sum(Column11 %in% "M")/(length(Column11)-spaces11)
337 M12<-sum(Column12 %in% "M")/(length(Column12)-spaces12)
338 M13<-sum(Column13 %in% "M")/(length(Column13)-spaces13)
339 M14<-sum(Column14 %in% "M")/(length(Column14)-spaces14)
340 M15<-sum(Column15 %in% "M")/(length(Column15)-spaces15)
341 MllMs<-cbind(M1,M2,M3,M4,M5,M6,M7,M8,M9,M10,M11,M12,M13,M14,M15)
342
343
344 N1<-sum(Column1 %in% "N")/(length(Column1)-spaces1)
345 N2<-sum(Column2 %in% "N")/(length(Column2)-spaces2)
346 N3<-sum(Column3 %in% "N")/(length(Column3)-spaces3)
347 N4<-sum(Column4 %in% "N")/(length(Column4)-spaces4)
348 N5<-sum(Column5 %in% "N")/(length(Column5)-spaces5)
349 N6<-sum(Column6 %in% "N")/(length(Column6)-spaces6)
350 N7<-sum(Column7 %in% "N")/(length(Column7)-spaces7)
351 N8<-sum(Column8 %in% "N")/(length(Column8)-spaces8)
352 N9<-sum(Column9 %in% "N")/(length(Column9)-spaces9)
353 N10<-sum(Column10 %in% "N")/(length(Column10)-spaces10)
354 N11<-sum(Column11 %in% "N")/(length(Column11)-spaces11)
355 N12<-sum(Column12 %in% "N")/(length(Column12)-spaces12)
356 N13<-sum(Column13 %in% "N")/(length(Column13)-spaces13)
357 N14<-sum(Column14 %in% "N")/(length(Column14)-spaces14)
358 N15<-sum(Column15 %in% "N")/(length(Column15)-spaces15)
359 NllNs<-cbind(N1,N2,N3,N4,N5,N6,N7,N8,N9,N10,N11,N12,N13,N14,N15)
360
361
362 P1<-sum(Column1 %in% "P")/(length(Column1)-spaces1)
363 P2<-sum(Column2 %in% "P")/(length(Column2)-spaces2)
364 P3<-sum(Column3 %in% "P")/(length(Column3)-spaces3)
365 P4<-sum(Column4 %in% "P")/(length(Column4)-spaces4)
366 P5<-sum(Column5 %in% "P")/(length(Column5)-spaces5)
367 P6<-sum(Column6 %in% "P")/(length(Column6)-spaces6)
368 P7<-sum(Column7 %in% "P")/(length(Column7)-spaces7)
369 P8<-sum(Column8 %in% "P")/(length(Column8)-spaces8)
370 P9<-sum(Column9 %in% "P")/(length(Column9)-spaces9)
371 P10<-sum(Column10 %in% "P")/(length(Column10)-spaces10)
372 P11<-sum(Column11 %in% "P")/(length(Column11)-spaces11)
373 P12<-sum(Column12 %in% "P")/(length(Column12)-spaces12)
374 P13<-sum(Column13 %in% "P")/(length(Column13)-spaces13)
375 P14<-sum(Column14 %in% "P")/(length(Column14)-spaces14)
376 P15<-sum(Column15 %in% "P")/(length(Column15)-spaces15)
377 PllPs<-cbind(P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11,P12,P13,P14,P15)
378
379
380 Q1<-sum(Column1 %in% "Q")/(length(Column1)-spaces1)
381 Q2<-sum(Column2 %in% "Q")/(length(Column2)-spaces2)
382 Q3<-sum(Column3 %in% "Q")/(length(Column3)-spaces3)
383 Q4<-sum(Column4 %in% "Q")/(length(Column4)-spaces4)
384 Q5<-sum(Column5 %in% "Q")/(length(Column5)-spaces5)
385 Q6<-sum(Column6 %in% "Q")/(length(Column6)-spaces6)
386 Q7<-sum(Column7 %in% "Q")/(length(Column7)-spaces7)
387 Q8<-sum(Column8 %in% "Q")/(length(Column8)-spaces8)
388 Q9<-sum(Column9 %in% "Q")/(length(Column9)-spaces9)
389 Q10<-sum(Column10 %in% "Q")/(length(Column10)-spaces10)
390 Q11<-sum(Column11 %in% "Q")/(length(Column11)-spaces11)
391 Q12<-sum(Column12 %in% "Q")/(length(Column12)-spaces12)
392 Q13<-sum(Column13 %in% "Q")/(length(Column13)-spaces13)
393 Q14<-sum(Column14 %in% "Q")/(length(Column14)-spaces14)
394 Q15<-sum(Column15 %in% "Q")/(length(Column15)-spaces15)
395 QllQs<-cbind(Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,Q11,Q12,Q13,Q14,Q15)
396
397
398 R1<-sum(Column1 %in% "R")/(length(Column1)-spaces1)
399 R2<-sum(Column2 %in% "R")/(length(Column2)-spaces2)
400 R3<-sum(Column3 %in% "R")/(length(Column3)-spaces3)
401 R4<-sum(Column4 %in% "R")/(length(Column4)-spaces4)
402 R5<-sum(Column5 %in% "R")/(length(Column5)-spaces5)
403 R6<-sum(Column6 %in% "R")/(length(Column6)-spaces6)
404 R7<-sum(Column7 %in% "R")/(length(Column7)-spaces7)
405 R8<-sum(Column8 %in% "R")/(length(Column8)-spaces8)
406 R9<-sum(Column9 %in% "R")/(length(Column9)-spaces9)
407 R10<-sum(Column10 %in% "R")/(length(Column10)-spaces10)
408 R11<-sum(Column11 %in% "R")/(length(Column11)-spaces11)
409 R12<-sum(Column12 %in% "R")/(length(Column12)-spaces12)
410 R13<-sum(Column13 %in% "R")/(length(Column13)-spaces13)
411 R14<-sum(Column14 %in% "R")/(length(Column14)-spaces14)
412 R15<-sum(Column15 %in% "R")/(length(Column15)-spaces15)
413 RllRs<-cbind(R1,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,R13,R14,R15)
414
415
416 S1<-sum(Column1 %in% "S")/(length(Column1)-spaces1)
417 S2<-sum(Column2 %in% "S")/(length(Column2)-spaces2)
418 S3<-sum(Column3 %in% "S")/(length(Column3)-spaces3)
419 S4<-sum(Column4 %in% "S")/(length(Column4)-spaces4)
420 S5<-sum(Column5 %in% "S")/(length(Column5)-spaces5)
421 S6<-sum(Column6 %in% "S")/(length(Column6)-spaces6)
422 S7<-sum(Column7 %in% "S")/(length(Column7)-spaces7)
423 S8<-sum(Column8 %in% "S")/(length(Column8)-spaces8)
424 S9<-sum(Column9 %in% "S")/(length(Column9)-spaces9)
425 S10<-sum(Column10 %in% "S")/(length(Column10)-spaces10)
426 S11<-sum(Column11 %in% "S")/(length(Column11)-spaces11)
427 S12<-sum(Column12 %in% "S")/(length(Column12)-spaces12)
428 S13<-sum(Column13 %in% "S")/(length(Column13)-spaces13)
429 S14<-sum(Column14 %in% "S")/(length(Column14)-spaces14)
430 S15<-sum(Column15 %in% "S")/(length(Column15)-spaces15)
431 SllSs<-cbind(S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,S14,S15)
432
433
434 T1<-sum(Column1 %in% "T")/(length(Column1)-spaces1)
435 T2<-sum(Column2 %in% "T")/(length(Column2)-spaces2)
436 T3<-sum(Column3 %in% "T")/(length(Column3)-spaces3)
437 T4<-sum(Column4 %in% "T")/(length(Column4)-spaces4)
438 T5<-sum(Column5 %in% "T")/(length(Column5)-spaces5)
439 T6<-sum(Column6 %in% "T")/(length(Column6)-spaces6)
440 T7<-sum(Column7 %in% "T")/(length(Column7)-spaces7)
441 T8<-sum(Column8 %in% "T")/(length(Column8)-spaces8)
442 T9<-sum(Column9 %in% "T")/(length(Column9)-spaces9)
443 T10<-sum(Column10 %in% "T")/(length(Column10)-spaces10)
444 T11<-sum(Column11 %in% "T")/(length(Column11)-spaces11)
445 T12<-sum(Column12 %in% "T")/(length(Column12)-spaces12)
446 T13<-sum(Column13 %in% "T")/(length(Column13)-spaces13)
447 T14<-sum(Column14 %in% "T")/(length(Column14)-spaces14)
448 T15<-sum(Column15 %in% "T")/(length(Column15)-spaces15)
449 TllTs<-cbind(T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15)
450
451
452 V1<-sum(Column1 %in% "V")/(length(Column1)-spaces1)
453 V2<-sum(Column2 %in% "V")/(length(Column2)-spaces2)
454 V3<-sum(Column3 %in% "V")/(length(Column3)-spaces3)
455 V4<-sum(Column4 %in% "V")/(length(Column4)-spaces4)
456 V5<-sum(Column5 %in% "V")/(length(Column5)-spaces5)
457 V6<-sum(Column6 %in% "V")/(length(Column6)-spaces6)
458 V7<-sum(Column7 %in% "V")/(length(Column7)-spaces7)
459 V8<-sum(Column8 %in% "V")/(length(Column8)-spaces8)
460 V9<-sum(Column9 %in% "V")/(length(Column9)-spaces9)
461 V10<-sum(Column10 %in% "V")/(length(Column10)-spaces10)
462 V11<-sum(Column11 %in% "V")/(length(Column11)-spaces11)
463 V12<-sum(Column12 %in% "V")/(length(Column12)-spaces12)
464 V13<-sum(Column13 %in% "V")/(length(Column13)-spaces13)
465 V14<-sum(Column14 %in% "V")/(length(Column14)-spaces14)
466 V15<-sum(Column15 %in% "V")/(length(Column15)-spaces15)
467 VllVs<-cbind(V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15)
468
469
470 W1<-sum(Column1 %in% "W")/(length(Column1)-spaces1)
471 W2<-sum(Column2 %in% "W")/(length(Column2)-spaces2)
472 W3<-sum(Column3 %in% "W")/(length(Column3)-spaces3)
473 W4<-sum(Column4 %in% "W")/(length(Column4)-spaces4)
474 W5<-sum(Column5 %in% "W")/(length(Column5)-spaces5)
475 W6<-sum(Column6 %in% "W")/(length(Column6)-spaces6)
476 W7<-sum(Column7 %in% "W")/(length(Column7)-spaces7)
477 W8<-sum(Column8 %in% "W")/(length(Column8)-spaces8)
478 W9<-sum(Column9 %in% "W")/(length(Column9)-spaces9)
479 W10<-sum(Column10 %in% "W")/(length(Column10)-spaces10)
480 W11<-sum(Column11 %in% "W")/(length(Column11)-spaces11)
481 W12<-sum(Column12 %in% "W")/(length(Column12)-spaces12)
482 W13<-sum(Column13 %in% "W")/(length(Column13)-spaces13)
483 W14<-sum(Column14 %in% "W")/(length(Column14)-spaces14)
484 W15<-sum(Column15 %in% "W")/(length(Column15)-spaces15)
485 WllWs<-cbind(W1,W2,W3,W4,W5,W6,W7,W8,W9,W10,W11,W12,W13,W14,W15)
486
487
488 Y1<-sum(Column1 %in% "Y")/(length(Column1)-spaces1)
489 Y2<-sum(Column2 %in% "Y")/(length(Column2)-spaces2)
490 Y3<-sum(Column3 %in% "Y")/(length(Column3)-spaces3)
491 Y4<-sum(Column4 %in% "Y")/(length(Column4)-spaces4)
492 Y5<-sum(Column5 %in% "Y")/(length(Column5)-spaces5)
493 Y6<-sum(Column6 %in% "Y")/(length(Column6)-spaces6)
494 Y7<-sum(Column7 %in% "Y")/(length(Column7)-spaces7)
495 Y8<-sum(Column8 %in% "Y")/(length(Column8)-spaces8)
496 Y9<-sum(Column9 %in% "Y")/(length(Column9)-spaces9)
497 Y10<-sum(Column10 %in% "Y")/(length(Column10)-spaces10)
498 Y11<-sum(Column11 %in% "Y")/(length(Column11)-spaces11)
499 Y12<-sum(Column12 %in% "Y")/(length(Column12)-spaces12)
500 Y13<-sum(Column13 %in% "Y")/(length(Column13)-spaces13)
501 Y14<-sum(Column14 %in% "Y")/(length(Column14)-spaces14)
502 Y15<-sum(Column15 %in% "Y")/(length(Column15)-spaces15)
503 YllYs<-cbind(Y1,Y2,Y3,Y4,Y5,Y6,Y7,Y8,Y9,Y10,Y11,Y12,Y13,Y14,Y15)
504 }
505 #this is substrate percents
506
507 #A C D E F G H I K L N P Q R S T V W Y
508
509 PercentTable<-rbind(AllAs,CllCs,DllDs,EllEs,FllFs,GllGs,HllHs,IllIs,KllKs,LllLs,MllMs,NllNs,PllPs,QllQs,RllRs,SllSs,TllTs,VllVs,WllWs,YllYs)
510 PercentTable<-PercentTable*100
511
512 #create the SD table
513 SDtable<-matrix(data = rep(1,times=(nrow(PercentTable)*ncol(PercentTable))),nrow = nrow(PercentTable),ncol = ncol(PercentTable))
514 #for every row, a percertage minus the same mean over the same SD
515 if(1==1){
516 SDtable[1,]<-(PercentTable[1,]-Amean)/Asd
517 SDtable[2,]<-(PercentTable[2,]-Cmean)/Csd
518 SDtable[3,]<-(PercentTable[3,]-Dmean)/Dsd
519 SDtable[4,]<-(PercentTable[4,]-Emean)/Esd
520 SDtable[5,]<-(PercentTable[5,]-Fmean)/Fsd
521 SDtable[6,]<-(PercentTable[6,]-Gmean)/Gsd
522 SDtable[7,]<-(PercentTable[7,]-Hmean)/Hsd
523 SDtable[8,]<-(PercentTable[8,]-Imean)/Isd
524 SDtable[9,]<-(PercentTable[9,]-Kmean)/Ksd
525 SDtable[10,]<-(PercentTable[10,]-Lmean)/Lsd
526 SDtable[11,]<-(PercentTable[11,]-Mmean)/Msd
527 SDtable[12,]<-(PercentTable[12,]-Nmean)/Nsd
528 SDtable[13,]<-(PercentTable[13,]-Pmean)/Psd
529 SDtable[14,]<-(PercentTable[14,]-Qmean)/Qsd
530 SDtable[15,]<-(PercentTable[15,]-Rmean)/Rsd
531 SDtable[16,]<-(PercentTable[16,]-Smean)/Ssd
532 SDtable[17,]<-(PercentTable[17,]-Tmean)/Tsd
533 SDtable[18,]<-(PercentTable[18,]-Vmean)/Vsd
534 SDtable[19,]<-(PercentTable[19,]-Wmean)/Wsd
535 SDtable[20,]<-(PercentTable[20,]-Ymean)/Ysd
536 }
537
538
539 SetOfAAs<-c("Letter","A","C","D","E","F","G","H","I","K","L","M","N","P","Q","R","S","T","V","W","Y")
540
541
542 SumOfSigmaAAs<-c(1:15)
543
544 for (i in 1:15){
545 SumOfSigmasValue<-0
546 for (j in 1:20){
547 value<-0
548 if (SDtable[j,i]>2){
549 value<-sum(substrates[,i]==SetOfAAs[j])
550 }
551 SumOfSigmasValue<-SumOfSigmasValue+value
552 }
553 SumOfSigmaAAs[i]<-SumOfSigmasValue
554 }
555
556 # AAs1<-length(substrates[,1])-sum(substrates[,1]=="")
557 # AAs2<-length(substrates[,2])-sum(substrates[,2]=="")
558 # AAs3<-length(substrates[,3])-sum(substrates[,3]=="")
559 # AAs4<-length(substrates[,4])-sum(substrates[,4]=="")
560 # AAs5<-length(substrates[,5])-sum(substrates[,5]=="")
561 # AAs6<-length(substrates[,6])-sum(substrates[,6]=="")
562 # AAs7<-length(substrates[,7])-sum(substrates[,7]=="")
563 # AAs8<-length(substrates[,8])-sum(substrates[,8]=="")
564 # AAs9<-length(substrates[,9])-sum(substrates[,9]=="")
565 #
566 #
567 #
568 # #AAsAtPositions<-c(AAs1,AAs2,AAs3,AAs4,AAs5,AAs6,AAs7,AAs8,AAs9)
569 # AAsAtPositions<-c(length(substrates[,1]),length(substrates[,2]),length(substrates[,3]),length(substrates[,4]),
570 # length(substrates[,5]),length(substrates[,6]),length(substrates[,7]),length(substrates[,8]),
571 # length(substrates[,9]))
572
573 SumOfExpectedSigmaAAs<-c(1:15)
574 for (i in 1:15){
575 ExpectedValue<-0
576 for (j in 1:20){
577 value<-0
578 if (SDtable[j,i]>2){
579 value<-AllMeans[j]
580 }
581 ExpectedValue<-ExpectedValue+value
582 }
583 SumOfExpectedSigmaAAs[i]<-ExpectedValue*(length(substrates[,i])-sum(substrates[,i]%in% ""))/100
584 }
585
586 SelectivityRow<-SumOfSigmaAAs/SumOfExpectedSigmaAAs
587 SelectivitySheet<-rbind(SumOfSigmaAAs,SumOfExpectedSigmaAAs,SelectivityRow)
588
589 SetOfAAs<-matrix(data = SetOfAAs,ncol = 1)
590
591 SDtableu<-SDtable
592 HeaderSD<-c(-7:7)
593 SDtable<-rbind(HeaderSD,SDtableu)
594 SDtable<-data.frame(SetOfAAs,SDtable)
595
596 PercentTable<-rbind(HeaderSD,PercentTable)
597 PercentTable<-data.frame(SetOfAAs,PercentTable)
598 numberofY<-as.numeric(SubstrateBackgroundFrequency$Number.of.Y)
599 numberofY<-numberofY[!is.na(numberofY)]
600
601 numberofPY<-as.numeric(SubstrateBackgroundFrequency$Number.of.pY)
602 numberofPY<-numberofPY[!is.na(numberofPY)]
603
604 NormalizationScore<-sum(numberofPY)/sum(numberofY)
605
606 # positions<-matrix(data = NA, nrow=20,ncol = 15)
607 #
608 # #column1
609 #
610 # for (q in 1:15) {
611 # sA<-sum(substrates[,i]=="A")
612 # positions[1,i]<-sA
613 # sC<-sum(substrates[,i]=="C")
614 # positions[2,i]<-sC
615 # sD<-sum(substrates[,i]=="D")
616 # positions[3,i]<-sD
617 # sE<-sum(substrates[,i]=="E")
618 # positions[4,i]<-sE
619 # sF<-sum(substrates[,i]=="F")
620 # sG<-sum(substrates[,i]=="G")
621 # sH<-sum(substrates[,i]=="H")
622 # sI<-sum(substrates[,i]=="I")
623 # sK<-sum(substrates[,i]=="K")
624 # sL<-sum(substrates[,i]=="L")
625 # sM<-sum(substrates[,i]=="M")
626 # sN<-sum(substrates[,i]=="N")
627 # sP<-sum(substrates[,i]=="P")
628 # sQ<-sum(substrates[,i]=="Q")
629 # sR<-sum(substrates[,i]=="R")
630 # sS<-sum(substrates[,i]=="S")
631 # sT<-sum(substrates[,i]=="T")
632 # sV<-sum(substrates[,i]=="V")
633 # sW<-sum(substrates[,i]=="W")
634 # sY<-sum(substrates[,i]=="Y")
635 # positions[5,i]<-sF
636 # positions[6,i]<-sG
637 # positions[7,i]<-sH
638 # positions[8,i]<-sI
639 # positions[9,i]<-sK
640 # positions[10,i]<-sL
641 # positions[11,i]<-sM
642 # positions[12,i]<-sN
643 # positions[13,i]<-sP
644 # positions[14,i]<-sQ
645 # positions[15,i]<-sR
646 # positions[16,i]<-sS
647 # positions[17,i]<-sT
648 # positions[18,i]<-sV
649 # positions[19,i]<-sW
650 # positions[20,i]<-sY
651 # }
652
653 #here I create the positions table which is needed for the endogenous prob matrix and is simply "how many times did an AA show up in this spot?
654 if (6==6){
655 Column1<-substrates[,1]
656 Column2<-substrates[,2]
657 Column3<-substrates[,3]
658 Column4<-substrates[,4]
659 Column5<-substrates[,5]
660 Column6<-substrates[,6]
661 Column7<-substrates[,7]
662 Column8<-substrates[,8]
663 Column9<-substrates[,9]
664 Column10<-substrates[,10]
665 Column11<-substrates[,11]
666 Column12<-substrates[,12]
667 Column13<-substrates[,13]
668 Column14<-substrates[,14]
669 Column15<-substrates[,15]
670
671 spaces1<-sum((Column1%in% ""))
672 spaces2<-sum(Column2%in% "")
673 spaces3<-sum(Column3%in% "")
674 spaces4<-sum(Column4%in% "")
675 spaces5<-sum(Column5%in% "")
676 spaces6<-sum(Column6%in% "")
677 spaces7<-sum(Column7%in% "")
678 spaces8<-sum(Column8%in% "")
679 spaces9<-sum(Column9%in% "")
680 spaces10<-sum(Column10%in% "")
681 spaces11<-sum(Column11%in% "")
682 spaces12<-sum(Column12%in% "")
683 spaces13<-sum(Column13%in% "")
684 spaces14<-sum(Column14%in% "")
685 spaces15<-sum(Column15%in% "")
686
687 A1<-sum(Column1 %in% "A")
688 A2<-sum(Column2 %in% "A")
689 A3<-sum(Column3 %in% "A")
690 A4<-sum(Column4 %in% "A")
691 A5<-sum(Column5 %in% "A")
692 A6<-sum(Column6 %in% "A")
693 A7<-sum(Column7 %in% "A")
694 A8<-sum(Column8 %in% "A")
695 A9<-sum(Column9 %in% "A")
696 A10<-sum(Column10 %in% "A")
697 A11<-sum(Column11 %in% "A")
698 A12<-sum(Column12 %in% "A")
699 A13<-sum(Column13 %in% "A")
700 A14<-sum(Column14 %in% "A")
701 A15<-sum(Column15 %in% "A")
702 AllAs<-cbind(A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15)
703
704 C1<-sum(Column1 %in% "C")
705 C2<-sum(Column2 %in% "C")
706 C3<-sum(Column3 %in% "C")
707 C4<-sum(Column4 %in% "C")
708 C5<-sum(Column5 %in% "C")
709 C6<-sum(Column6 %in% "C")
710 C7<-sum(Column7 %in% "C")
711 C8<-sum(Column8 %in% "C")
712 C9<-sum(Column9 %in% "C")
713 C10<-sum(Column10 %in% "C")
714 C11<-sum(Column11 %in% "C")
715 C12<-sum(Column12 %in% "C")
716 C13<-sum(Column13 %in% "C")
717 C14<-sum(Column14 %in% "C")
718 C15<-sum(Column15 %in% "C")
719 CllCs<-cbind(C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15)
720
721 D1<-sum(Column1 %in% "D")
722 D2<-sum(Column2 %in% "D")
723 D3<-sum(Column3 %in% "D")
724 D4<-sum(Column4 %in% "D")
725 D5<-sum(Column5 %in% "D")
726 D6<-sum(Column6 %in% "D")
727 D7<-sum(Column7 %in% "D")
728 D8<-sum(Column8 %in% "D")
729 D9<-sum(Column9 %in% "D")
730 D10<-sum(Column10 %in% "D")
731 D11<-sum(Column11 %in% "D")
732 D12<-sum(Column12 %in% "D")
733 D13<-sum(Column13 %in% "D")
734 D14<-sum(Column14 %in% "D")
735 D15<-sum(Column15 %in% "D")
736 DllDs<-cbind(D1,D2,D3,D4,D5,D6,D7,D8,D9,D10,D11,D12,D13,D14,D15)
737
738 E1<-sum(Column1 %in% "E")
739 E2<-sum(Column2 %in% "E")
740 E3<-sum(Column3 %in% "E")
741 E4<-sum(Column4 %in% "E")
742 E5<-sum(Column5 %in% "E")
743 E6<-sum(Column6 %in% "E")
744 E7<-sum(Column7 %in% "E")
745 E8<-sum(Column8 %in% "E")
746 E9<-sum(Column9 %in% "E")
747 E10<-sum(Column10 %in% "E")
748 E11<-sum(Column11 %in% "E")
749 E12<-sum(Column12 %in% "E")
750 E13<-sum(Column13 %in% "E")
751 E14<-sum(Column14 %in% "E")
752 E15<-sum(Column15 %in% "E")
753 EllEs<-cbind(E1,E2,E3,E4,E5,E6,E7,E8,E9,E10,E11,E12,E13,E14,E15)
754
755 F1<-sum(Column1 %in% "F")
756 F2<-sum(Column2 %in% "F")
757 F3<-sum(Column3 %in% "F")
758 F4<-sum(Column4 %in% "F")
759 F5<-sum(Column5 %in% "F")
760 F6<-sum(Column6 %in% "F")
761 F7<-sum(Column7 %in% "F")
762 F8<-sum(Column8 %in% "F")
763 F9<-sum(Column9 %in% "F")
764 F10<-sum(Column10 %in% "F")
765 F11<-sum(Column11 %in% "F")
766 F12<-sum(Column12 %in% "F")
767 F13<-sum(Column13 %in% "F")
768 F14<-sum(Column14 %in% "F")
769 F15<-sum(Column15 %in% "F")
770 FllFs<-cbind(F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15)
771
772 G1<-sum(Column1 %in% "G")
773 G2<-sum(Column2 %in% "G")
774 G3<-sum(Column3 %in% "G")
775 G4<-sum(Column4 %in% "G")
776 G5<-sum(Column5 %in% "G")
777 G6<-sum(Column6 %in% "G")
778 G7<-sum(Column7 %in% "G")
779 G8<-sum(Column8 %in% "G")
780 G9<-sum(Column9 %in% "G")
781 G10<-sum(Column10 %in% "G")
782 G11<-sum(Column11 %in% "G")
783 G12<-sum(Column12 %in% "G")
784 G13<-sum(Column13 %in% "G")
785 G14<-sum(Column14 %in% "G")
786 G15<-sum(Column15 %in% "G")
787 GllGs<-cbind(G1,G2,G3,G4,G5,G6,G7,G8,G9,G10,G11,G12,G13,G14,G15)
788
789 H1<-sum(Column1 %in% "H")
790 H2<-sum(Column2 %in% "H")
791 H3<-sum(Column3 %in% "H")
792 H4<-sum(Column4 %in% "H")
793 H5<-sum(Column5 %in% "H")
794 H6<-sum(Column6 %in% "H")
795 H7<-sum(Column7 %in% "H")
796 H8<-sum(Column8 %in% "H")
797 H9<-sum(Column9 %in% "H")
798 H10<-sum(Column10 %in% "H")
799 H11<-sum(Column11 %in% "H")
800 H12<-sum(Column12 %in% "H")
801 H13<-sum(Column13 %in% "H")
802 H14<-sum(Column14 %in% "H")
803 H15<-sum(Column15 %in% "H")
804 HllHs<-cbind(H1,H2,H3,H4,H5,H6,H7,H8,H9,H10,H11,H12,H13,H14,H15)
805
806 I1<-sum(Column1 %in% "I")
807 I2<-sum(Column2 %in% "I")
808 I3<-sum(Column3 %in% "I")
809 I4<-sum(Column4 %in% "I")
810 I5<-sum(Column5 %in% "I")
811 I6<-sum(Column6 %in% "I")
812 I7<-sum(Column7 %in% "I")
813 I8<-sum(Column8 %in% "I")
814 I9<-sum(Column9 %in% "I")
815 I10<-sum(Column10 %in% "I")
816 I11<-sum(Column11 %in% "I")
817 I12<-sum(Column12 %in% "I")
818 I13<-sum(Column13 %in% "I")
819 I14<-sum(Column14 %in% "I")
820 I15<-sum(Column15 %in% "I")
821 IllIs<-cbind(I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,I11,I12,I13,I14,I15)
822
823 K1<-sum(Column1 %in% "K")
824 K2<-sum(Column2 %in% "K")
825 K3<-sum(Column3 %in% "K")
826 K4<-sum(Column4 %in% "K")
827 K5<-sum(Column5 %in% "K")
828 K6<-sum(Column6 %in% "K")
829 K7<-sum(Column7 %in% "K")
830 K8<-sum(Column8 %in% "K")
831 K9<-sum(Column9 %in% "K")
832 K10<-sum(Column10 %in% "K")
833 K11<-sum(Column11 %in% "K")
834 K12<-sum(Column12 %in% "K")
835 K13<-sum(Column13 %in% "K")
836 K14<-sum(Column14 %in% "K")
837 K15<-sum(Column15 %in% "K")
838 KllKs<-cbind(K1,K2,K3,K4,K5,K6,K7,K8,K9,K10,K11,K12,K13,K14,K15)
839
840 L1<-sum(Column1 %in% "L")
841 L2<-sum(Column2 %in% "L")
842 L3<-sum(Column3 %in% "L")
843 L4<-sum(Column4 %in% "L")
844 L5<-sum(Column5 %in% "L")
845 L6<-sum(Column6 %in% "L")
846 L7<-sum(Column7 %in% "L")
847 L8<-sum(Column8 %in% "L")
848 L9<-sum(Column9 %in% "L")
849 L10<-sum(Column10 %in% "L")
850 L11<-sum(Column11 %in% "L")
851 L12<-sum(Column12 %in% "L")
852 L13<-sum(Column13 %in% "L")
853 L14<-sum(Column14 %in% "L")
854 L15<-sum(Column15 %in% "L")
855 LllLs<-cbind(L1,L2,L3,L4,L5,L6,L7,L8,L9,L10,L11,L12,L13,L14,L15)
856
857 M1<-sum(Column1 %in% "M")
858 M2<-sum(Column2 %in% "M")
859 M3<-sum(Column3 %in% "M")
860 M4<-sum(Column4 %in% "M")
861 M5<-sum(Column5 %in% "M")
862 M6<-sum(Column6 %in% "M")
863 M7<-sum(Column7 %in% "M")
864 M8<-sum(Column8 %in% "M")
865 M9<-sum(Column9 %in% "M")
866 M10<-sum(Column10 %in% "M")
867 M11<-sum(Column11 %in% "M")
868 M12<-sum(Column12 %in% "M")
869 M13<-sum(Column13 %in% "M")
870 M14<-sum(Column14 %in% "M")
871 M15<-sum(Column15 %in% "M")
872 MllMs<-cbind(M1,M2,M3,M4,M5,M6,M7,M8,M9,M10,M11,M12,M13,M14,M15)
873
874 N1<-sum(Column1 %in% "N")
875 N2<-sum(Column2 %in% "N")
876 N3<-sum(Column3 %in% "N")
877 N4<-sum(Column4 %in% "N")
878 N5<-sum(Column5 %in% "N")
879 N6<-sum(Column6 %in% "N")
880 N7<-sum(Column7 %in% "N")
881 N8<-sum(Column8 %in% "N")
882 N9<-sum(Column9 %in% "N")
883 N10<-sum(Column10 %in% "N")
884 N11<-sum(Column11 %in% "N")
885 N12<-sum(Column12 %in% "N")
886 N13<-sum(Column13 %in% "N")
887 N14<-sum(Column14 %in% "N")
888 N15<-sum(Column15 %in% "N")
889 NllNs<-cbind(N1,N2,N3,N4,N5,N6,N7,N8,N9,N10,N11,N12,N13,N14,N15)
890
891 P1<-sum(Column1 %in% "P")
892 P2<-sum(Column2 %in% "P")
893 P3<-sum(Column3 %in% "P")
894 P4<-sum(Column4 %in% "P")
895 P5<-sum(Column5 %in% "P")
896 P6<-sum(Column6 %in% "P")
897 P7<-sum(Column7 %in% "P")
898 P8<-sum(Column8 %in% "P")
899 P9<-sum(Column9 %in% "P")
900 P10<-sum(Column10 %in% "P")
901 P11<-sum(Column11 %in% "P")
902 P12<-sum(Column12 %in% "P")
903 P13<-sum(Column13 %in% "P")
904 P14<-sum(Column14 %in% "P")
905 P15<-sum(Column15 %in% "P")
906 PllPs<-cbind(P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11,P12,P13,P14,P15)
907
908 Q1<-sum(Column1 %in% "Q")
909 Q2<-sum(Column2 %in% "Q")
910 Q3<-sum(Column3 %in% "Q")
911 Q4<-sum(Column4 %in% "Q")
912 Q5<-sum(Column5 %in% "Q")
913 Q6<-sum(Column6 %in% "Q")
914 Q7<-sum(Column7 %in% "Q")
915 Q8<-sum(Column8 %in% "Q")
916 Q9<-sum(Column9 %in% "Q")
917 Q10<-sum(Column10 %in% "Q")
918 Q11<-sum(Column11 %in% "Q")
919 Q12<-sum(Column12 %in% "Q")
920 Q13<-sum(Column13 %in% "Q")
921 Q14<-sum(Column14 %in% "Q")
922 Q15<-sum(Column15 %in% "Q")
923 QllQs<-cbind(Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,Q11,Q12,Q13,Q14,Q15)
924
925 R1<-sum(Column1 %in% "R")
926 R2<-sum(Column2 %in% "R")
927 R3<-sum(Column3 %in% "R")
928 R4<-sum(Column4 %in% "R")
929 R5<-sum(Column5 %in% "R")
930 R6<-sum(Column6 %in% "R")
931 R7<-sum(Column7 %in% "R")
932 R8<-sum(Column8 %in% "R")
933 R9<-sum(Column9 %in% "R")
934 R10<-sum(Column10 %in% "R")
935 R11<-sum(Column11 %in% "R")
936 R12<-sum(Column12 %in% "R")
937 R13<-sum(Column13 %in% "R")
938 R14<-sum(Column14 %in% "R")
939 R15<-sum(Column15 %in% "R")
940 RllRs<-cbind(R1,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,R13,R14,R15)
941
942 S1<-sum(Column1 %in% "S")
943 S2<-sum(Column2 %in% "S")
944 S3<-sum(Column3 %in% "S")
945 S4<-sum(Column4 %in% "S")
946 S5<-sum(Column5 %in% "S")
947 S6<-sum(Column6 %in% "S")
948 S7<-sum(Column7 %in% "S")
949 S8<-sum(Column8 %in% "S")
950 S9<-sum(Column9 %in% "S")
951 S10<-sum(Column10 %in% "S")
952 S11<-sum(Column11 %in% "S")
953 S12<-sum(Column12 %in% "S")
954 S13<-sum(Column13 %in% "S")
955 S14<-sum(Column14 %in% "S")
956 S15<-sum(Column15 %in% "S")
957 SllSs<-cbind(S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,S14,S15)
958
959 T1<-sum(Column1 %in% "T")
960 T2<-sum(Column2 %in% "T")
961 T3<-sum(Column3 %in% "T")
962 T4<-sum(Column4 %in% "T")
963 T5<-sum(Column5 %in% "T")
964 T6<-sum(Column6 %in% "T")
965 T7<-sum(Column7 %in% "T")
966 T8<-sum(Column8 %in% "T")
967 T9<-sum(Column9 %in% "T")
968 T10<-sum(Column10 %in% "T")
969 T11<-sum(Column11 %in% "T")
970 T12<-sum(Column12 %in% "T")
971 T13<-sum(Column13 %in% "T")
972 T14<-sum(Column14 %in% "T")
973 T15<-sum(Column15 %in% "T")
974 TllTs<-cbind(T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15)
975
976 V1<-sum(Column1 %in% "V")
977 V2<-sum(Column2 %in% "V")
978 V3<-sum(Column3 %in% "V")
979 V4<-sum(Column4 %in% "V")
980 V5<-sum(Column5 %in% "V")
981 V6<-sum(Column6 %in% "V")
982 V7<-sum(Column7 %in% "V")
983 V8<-sum(Column8 %in% "V")
984 V9<-sum(Column9 %in% "V")
985 V10<-sum(Column10 %in% "V")
986 V11<-sum(Column11 %in% "V")
987 V12<-sum(Column12 %in% "V")
988 V13<-sum(Column13 %in% "V")
989 V14<-sum(Column14 %in% "V")
990 V15<-sum(Column15 %in% "V")
991 VllVs<-cbind(V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15)
992
993 W1<-sum(Column1 %in% "W")
994 W2<-sum(Column2 %in% "W")
995 W3<-sum(Column3 %in% "W")
996 W4<-sum(Column4 %in% "W")
997 W5<-sum(Column5 %in% "W")
998 W6<-sum(Column6 %in% "W")
999 W7<-sum(Column7 %in% "W")
1000 W8<-sum(Column8 %in% "W")
1001 W9<-sum(Column9 %in% "W")
1002 W10<-sum(Column10 %in% "W")
1003 W11<-sum(Column11 %in% "W")
1004 W12<-sum(Column12 %in% "W")
1005 W13<-sum(Column13 %in% "W")
1006 W14<-sum(Column14 %in% "W")
1007 W15<-sum(Column15 %in% "W")
1008 WllWs<-cbind(W1,W2,W3,W4,W5,W6,W7,W8,W9,W10,W11,W12,W13,W14,W15)
1009
1010 Y1<-sum(Column1 %in% "Y")
1011 Y2<-sum(Column2 %in% "Y")
1012 Y3<-sum(Column3 %in% "Y")
1013 Y4<-sum(Column4 %in% "Y")
1014 Y5<-sum(Column5 %in% "Y")
1015 Y6<-sum(Column6 %in% "Y")
1016 Y7<-sum(Column7 %in% "Y")
1017 Y8<-sum(Column8 %in% "Y")
1018 Y9<-sum(Column9 %in% "Y")
1019 Y10<-sum(Column10 %in% "Y")
1020 Y11<-sum(Column11 %in% "Y")
1021 Y12<-sum(Column12 %in% "Y")
1022 Y13<-sum(Column13 %in% "Y")
1023 Y14<-sum(Column14 %in% "Y")
1024 Y15<-sum(Column15 %in% "Y")
1025 YllYs<-cbind(Y1,Y2,Y3,Y4,Y5,Y6,Y7,Y8,Y9,Y10,Y11,Y12,Y13,Y14,Y15)
1026 PositionTable<-rbind(AllAs,CllCs,DllDs,EllEs,FllFs,GllGs,HllHs,IllIs,KllKs,LllLs,MllMs,NllNs,PllPs,QllQs,RllRs,SllSs,TllTs,VllVs,WllWs,YllYs)
1027 }
1028 #endogenous prob matrix is AA position over subbackfreqmean
1029 dim(PositionTable)
1030 EPMtable<-PositionTable
1031 # EPMtable[1,]<-(PositionTable[1,]/(PositionTable[1,]*.01*Amean))
1032 # EPMtable[2,]<-(PositionTable[2,]/(PositionTable[2,]*.01*Cmean))
1033 # EPMtable[3,]<-(PositionTable[3,]/(PositionTable[3,]*.01*Dmean))
1034 # EPMtable[4,]<-(PositionTable[4,]/(PositionTable[4,]*.01*Emean))
1035 # EPMtable[5,]<-(PositionTable[5,]/(PositionTable[5,]*.01*Fmean))
1036 # EPMtable[6,]<-(PositionTable[6,]/(PositionTable[6,]*.01*Gmean))
1037 # EPMtable[7,]<-(PositionTable[7,]/(PositionTable[7,]*.01*Hmean))
1038 # EPMtable[8,]<-(PositionTable[8,]/(PositionTable[8,]*.01*Imean))
1039 # EPMtable[9,]<-(PositionTable[9,]/(PositionTable[9,]*.01*Kmean))
1040 # EPMtable[10,]<-(PositionTable[10,]/(PositionTable[10,]*.01*Lmean))
1041 # EPMtable[11,]<-(PositionTable[11,]/(PositionTable[11,]*.01*Mmean))
1042 # EPMtable[12,]<-(PositionTable[12,]/(PositionTable[12,]*.01*Nmean))
1043 # EPMtable[13,]<-(PositionTable[13,]/(PositionTable[13,]*.01*Pmean))
1044 # EPMtable[14,]<-(PositionTable[14,]/(PositionTable[14,]*.01*Qmean))
1045 # EPMtable[15,]<-(PositionTable[15,]/(PositionTable[15,]*.01*Rmean))
1046 # EPMtable[16,]<-(PositionTable[16,]/(PositionTable[16,]*.01*Smean))
1047 # EPMtable[17,]<-(PositionTable[17,]/(PositionTable[17,]*.01*Tmean))
1048 # EPMtable[18,]<-(PositionTable[18,]/(PositionTable[18,]*.01*Vmean))
1049 # EPMtable[19,]<-(PositionTable[19,]/(PositionTable[19,]*.01*Wmean))
1050 # EPMtable[20,]<-(PositionTable[20,]/(PositionTable[20,]*.01*Ymean))
1051
1052 columns<-c(length(Column1)-sum(Column1==""),
1053 length(Column2)-sum(Column2==""),
1054 length(Column3)-sum(Column3==""),
1055 length(Column4)-sum(Column4==""),
1056 length(Column5)-sum(Column5==""),
1057 length(Column6)-sum(Column6==""),
1058 length(Column7)-sum(Column7==""),
1059 length(Column8)-sum(Column8==""),
1060 length(Column9)-sum(Column9==""),
1061 length(Column10)-sum(Column10==""),
1062 length(Column11)-sum(Column11==""),
1063 length(Column12)-sum(Column12==""),
1064 length(Column13)-sum(Column13==""),
1065 length(Column14)-sum(Column14==""),
1066 length(Column15)-sum(Column15==""))
1067
1068 for (z in 1:15) {
1069 for (y in 1:20) {
1070 if (PositionTable[y,z]>0){
1071 EPMtable[y,z]<-PositionTable[y,z]/((columns[z]*.01*AllMeans[y]))
1072 }
1073 if (PositionTable[y,z]==0){
1074 EPMtable[y,z]<-(1/columns[z])/((columns[z]*.01*AllMeans[y]))
1075 }
1076 }
1077 }
1078 #here I created the endogenous probability matrix
1079 #now all I need to do is make the program automatically determine which SDs are >2, and then make it perform screener and sorter on those SDs
1080
1081
1082
1083
1084
1085 # write.xlsx(SDtable,file=FILENAME, sheetName = "Standard Deviation Table",col.names = FALSE,row.names = FALSE,append = TRUE)
1086 # write.xlsx(PercentTable,file = FILENAME,sheetName = "Percent Table",col.names = FALSE,row.names = FALSE,append = TRUE)
1087 # write.xlsx(SelectivitySheet,file = FILENAME,sheetName = "Site Selectivity",col.names = FALSE,row.names = FALSE,append = TRUE)
1088 # write.xlsx(EPMtable,file=FILENAME,sheetName = "Endogenous Probability Matrix",col.names = FALSE,row.names = FALSE,append = TRUE)
1089 # write.xlsx(NormalizationScore,file = FILENAME,sheetName = "Normalization Score",col.names = FALSE,row.names = FALSE,append = TRUE)
1090
1091 NormalizationScore<-c("Normalization Score",NormalizationScore)
1092
1093 write.table(x=c("SD Table"),file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)
1094 write.table(SDtable,file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)
1095 write.table(x=c("Percent Table"),file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)
1096 write.table(PercentTable,file=FILENAME, append = TRUE,sep=",",row.names = FALSE, col.names = FALSE)
1097
1098 EPMtableu<-EPMtable
1099 HeaderSD<-c(-7:7)
1100 EPMtableu<-rbind(HeaderSD,EPMtableu)
1101 EPMtableu<-data.frame(SetOfAAs,EPMtableu)
1102
1103 write.table("Site Selectivity Matrix", file = FILENAME2, append = TRUE, sep = ",", row.names = FALSE, col.names = FALSE)
1104 SelectivityHeader=matrix(data = c("Position",-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7),nrow = 1)
1105 head<-matrix(data=rep(" ",times=16),nrow = 1)
1106 SelectivityHeader<-rbind(head,SelectivityHeader)
1107
1108 write.table(SelectivityHeader, file = FILENAME2, append = TRUE, sep = ",", row.names = FALSE, col.names = FALSE)
1109 #colnames(SelectivitySheet)<-c("-7","-6","-5","-4","-3","-2","-1","0","1","2","3","4","5","6","7")
1110 write.table(SelectivitySheet,file = FILENAME2, append = TRUE,sep = ",",row.names = TRUE, col.names = FALSE)
1111 write.table(x=c("Endogenous Probability Matrix"),file=FILENAME2,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)
1112 write.table(EPMtableu,file = FILENAME2, append = TRUE,sep = ",",row.names = FALSE, col.names = FALSE)
1113 write.table(NormalizationScore, file = FILENAME2, append = TRUE,sep = ",",row.names = FALSE, col.names = FALSE)