Mercurial > repos > jfb > kinatest_r_7_7testing
comparison kinatestid_r/Kinatest-R.R @ 14:da1012f014bd draft
Uploaded
author | jfb |
---|---|
date | Thu, 08 Feb 2018 15:29:56 -0500 |
parents | |
children | 15b5d4ae4480 |
comparison
equal
deleted
inserted
replaced
13:d71eb1d66a88 | 14:da1012f014bd |
---|---|
1 | |
2 ImportedSubstrateList<- read.csv("input1", stringsAsFactors=FALSE) | |
3 NegativeSubstrateList<- read.csv("input2", stringsAsFactors=FALSE) | |
4 SubstrateBackgroundFrequency<- read.csv("input3", stringsAsFactors=FALSE) | |
5 | |
6 ScreenerFilename<-"screener" | |
7 | |
8 | |
9 | |
10 FILENAME<-"output1.csv" | |
11 FILENAME2<-"output2.csv" | |
12 FILENAME3<-"output3.csv" | |
13 | |
14 | |
15 | |
16 | |
17 | |
18 | |
19 | |
20 | |
21 | |
22 | |
23 | |
24 | |
25 | |
26 | |
27 | |
28 | |
29 | |
30 | |
31 | |
32 | |
33 | |
34 | |
35 | |
36 | |
37 OutputMatrix<-"KinaseMatrix.csv" | |
38 CharacterizationTable<-"CharacterizationTableForThisKinase.csv" | |
39 SDtable<-"SDtableforthisKinase" | |
40 SiteSelectivityTable<-"SiteSelectivityForThisKinase" | |
41 | |
42 | |
43 | |
44 substrates<-matrix(rep("A",times=((nrow(ImportedSubstrateList)-1)*15)),ncol = 15) | |
45 #SeqsToBeScored<-"asdasd" | |
46 | |
47 for (i in 2:nrow(ImportedSubstrateList)) | |
48 { | |
49 substratemotif<-ImportedSubstrateList[i,4:18] | |
50 substratemotif[8]<-"Y" | |
51 #substratemotif<-paste(substratemotif,sep = "",collapse = "") | |
52 j=i-1 | |
53 substratemotif<-unlist(substratemotif) | |
54 substrates[j,1:15]<-substratemotif | |
55 } | |
56 | |
57 # SpacesToOs<-c(""="O",) | |
58 # substrates<-SpacesToOs[substrates] | |
59 | |
60 SubstrateBackgroundFrequency[nrow(SubstrateBackgroundFrequency),2] | |
61 | |
62 if(2==2){ | |
63 Amean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),2]), na.rm=TRUE) | |
64 Cmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),3]), na.rm=TRUE) | |
65 Dmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),4]), na.rm=TRUE) | |
66 Emean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),5]), na.rm=TRUE) | |
67 Fmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),6]), na.rm=TRUE) | |
68 Gmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),7]), na.rm=TRUE) | |
69 Hmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),8]), na.rm=TRUE) | |
70 Imean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),9]), na.rm=TRUE) | |
71 Kmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),10]), na.rm=TRUE) | |
72 Lmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),11]), na.rm=TRUE) | |
73 Mmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),12]), na.rm=TRUE) | |
74 Nmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),13]), na.rm=TRUE) | |
75 Pmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),14]), na.rm=TRUE) | |
76 Qmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),15]), na.rm=TRUE) | |
77 Rmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),16]), na.rm=TRUE) | |
78 Smean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),17]), na.rm=TRUE) | |
79 Tmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),18]), na.rm=TRUE) | |
80 Vmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),19]), na.rm=TRUE) | |
81 Wmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),20]), na.rm=TRUE) | |
82 Ymean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),21]), na.rm=TRUE) | |
83 | |
84 AllMeans<-c(Amean,Cmean,Dmean,Emean,Fmean,Gmean,Hmean,Imean,Kmean,Lmean,Mmean,Nmean,Pmean,Qmean,Rmean,Smean,Tmean,Vmean,Wmean,Ymean) | |
85 | |
86 Asd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),2]), na.rm=TRUE) | |
87 Csd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),3]), na.rm=TRUE) | |
88 Dsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),4]), na.rm=TRUE) | |
89 Esd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),5]), na.rm=TRUE) | |
90 Fsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),6]), na.rm=TRUE) | |
91 Gsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),7]), na.rm=TRUE) | |
92 Hsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),8]), na.rm=TRUE) | |
93 Isd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),9]), na.rm=TRUE) | |
94 Ksd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),10]), na.rm=TRUE) | |
95 Lsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),11]), na.rm=TRUE) | |
96 Msd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),12]), na.rm=TRUE) | |
97 Nsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),13]), na.rm=TRUE) | |
98 Psd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),14]), na.rm=TRUE) | |
99 Qsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),15]), na.rm=TRUE) | |
100 Rsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),16]), na.rm=TRUE) | |
101 Ssd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),17]), na.rm=TRUE) | |
102 Tsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),18]), na.rm=TRUE) | |
103 Vsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),19]), na.rm=TRUE) | |
104 Wsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),20]), na.rm=TRUE) | |
105 Ysd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),21]), na.rm=TRUE) | |
106 } | |
107 AllSDs<-c(Asd,Csd,Dsd,Esd,Fsd,Gsd,Hsd,Isd,Ksd,Lsd,Msd,Nsd,Psd,Qsd,Rsd,Ssd,Tsd,Vsd,Wsd,Ysd) | |
108 #this is subbackfreq SDs | |
109 | |
110 SBF_statisticalvalues<-cbind(AllMeans,AllSDs) | |
111 | |
112 #create the percent table | |
113 if (1==1){ | |
114 Column1<-substrates[,1] | |
115 Column2<-substrates[,2] | |
116 Column3<-substrates[,3] | |
117 Column4<-substrates[,4] | |
118 Column5<-substrates[,5] | |
119 Column6<-substrates[,6] | |
120 Column7<-substrates[,7] | |
121 Column8<-substrates[,8] | |
122 Column9<-substrates[,9] | |
123 Column10<-substrates[,10] | |
124 Column11<-substrates[,11] | |
125 Column12<-substrates[,12] | |
126 Column13<-substrates[,13] | |
127 Column14<-substrates[,14] | |
128 Column15<-substrates[,15] | |
129 | |
130 spaces1<-sum((Column1%in% "")) | |
131 spaces2<-sum(Column2%in% "") | |
132 spaces3<-sum(Column3%in% "") | |
133 spaces4<-sum(Column4%in% "") | |
134 spaces5<-sum(Column5%in% "") | |
135 spaces6<-sum(Column6%in% "") | |
136 spaces7<-sum(Column7%in% "") | |
137 spaces8<-sum(Column8%in% "") | |
138 spaces9<-sum(Column9%in% "") | |
139 spaces10<-sum(Column10%in% "") | |
140 spaces11<-sum(Column11%in% "") | |
141 spaces12<-sum(Column12%in% "") | |
142 spaces13<-sum(Column13%in% "") | |
143 spaces14<-sum(Column14%in% "") | |
144 spaces15<-sum(Column15%in% "") | |
145 | |
146 A1<-sum(Column1 %in% "A")/(length(Column1)-spaces1) | |
147 A2<-sum(Column2 %in% "A")/(length(Column2)-spaces2) | |
148 A3<-sum(Column3 %in% "A")/(length(Column3)-spaces3) | |
149 A4<-sum(Column4 %in% "A")/(length(Column4)-spaces4) | |
150 A5<-sum(Column5 %in% "A")/(length(Column5)-spaces5) | |
151 A6<-sum(Column6 %in% "A")/(length(Column6)-spaces6) | |
152 A7<-sum(Column7 %in% "A")/(length(Column7)-spaces7) | |
153 A8<-sum(Column8 %in% "A")/(length(Column8)-spaces8) | |
154 A9<-sum(Column9 %in% "A")/(length(Column9)-spaces9) | |
155 A10<-sum(Column10 %in% "A")/(length(Column10)-spaces10) | |
156 A11<-sum(Column11 %in% "A")/(length(Column11)-spaces11) | |
157 A12<-sum(Column12 %in% "A")/(length(Column12)-spaces12) | |
158 A13<-sum(Column13 %in% "A")/(length(Column13)-spaces13) | |
159 A14<-sum(Column14 %in% "A")/(length(Column14)-spaces14) | |
160 A15<-sum(Column15 %in% "A")/(length(Column15)-spaces15) | |
161 AllAs<-cbind(A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15) | |
162 | |
163 C1<-sum(Column1 %in% "C")/(length(Column1)-spaces1) | |
164 C2<-sum(Column2 %in% "C")/(length(Column2)-spaces2) | |
165 C3<-sum(Column3 %in% "C")/(length(Column3)-spaces3) | |
166 C4<-sum(Column4 %in% "C")/(length(Column4)-spaces4) | |
167 C5<-sum(Column5 %in% "C")/(length(Column5)-spaces5) | |
168 C6<-sum(Column6 %in% "C")/(length(Column6)-spaces6) | |
169 C7<-sum(Column7 %in% "C")/(length(Column7)-spaces7) | |
170 C8<-sum(Column8 %in% "C")/(length(Column8)-spaces8) | |
171 C9<-sum(Column9 %in% "C")/(length(Column9)-spaces9) | |
172 C10<-sum(Column10 %in% "C")/(length(Column10)-spaces10) | |
173 C11<-sum(Column11 %in% "C")/(length(Column11)-spaces11) | |
174 C12<-sum(Column12 %in% "C")/(length(Column12)-spaces12) | |
175 C13<-sum(Column13 %in% "C")/(length(Column13)-spaces13) | |
176 C14<-sum(Column14 %in% "C")/(length(Column14)-spaces14) | |
177 C15<-sum(Column15 %in% "C")/(length(Column15)-spaces15) | |
178 CllCs<-cbind(C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15) | |
179 | |
180 D1<-sum(Column1 %in% "D")/(length(Column1)-spaces1) | |
181 D2<-sum(Column2 %in% "D")/(length(Column2)-spaces2) | |
182 D3<-sum(Column3 %in% "D")/(length(Column3)-spaces3) | |
183 D4<-sum(Column4 %in% "D")/(length(Column4)-spaces4) | |
184 D5<-sum(Column5 %in% "D")/(length(Column5)-spaces5) | |
185 D6<-sum(Column6 %in% "D")/(length(Column6)-spaces6) | |
186 D7<-sum(Column7 %in% "D")/(length(Column7)-spaces7) | |
187 D8<-sum(Column8 %in% "D")/(length(Column8)-spaces8) | |
188 D9<-sum(Column9 %in% "D")/(length(Column9)-spaces9) | |
189 D10<-sum(Column10 %in% "D")/(length(Column10)-spaces10) | |
190 D11<-sum(Column11 %in% "D")/(length(Column11)-spaces11) | |
191 D12<-sum(Column12 %in% "D")/(length(Column12)-spaces12) | |
192 D13<-sum(Column13 %in% "D")/(length(Column13)-spaces13) | |
193 D14<-sum(Column14 %in% "D")/(length(Column14)-spaces14) | |
194 D15<-sum(Column15 %in% "D")/(length(Column15)-spaces15) | |
195 DllDs<-cbind(D1,D2,D3,D4,D5,D6,D7,D8,D9,D10,D11,D12,D13,D14,D15) | |
196 | |
197 E1<-sum(Column1 %in% "E")/(length(Column1)-spaces1) | |
198 E2<-sum(Column2 %in% "E")/(length(Column2)-spaces2) | |
199 E3<-sum(Column3 %in% "E")/(length(Column3)-spaces3) | |
200 E4<-sum(Column4 %in% "E")/(length(Column4)-spaces4) | |
201 E5<-sum(Column5 %in% "E")/(length(Column5)-spaces5) | |
202 E6<-sum(Column6 %in% "E")/(length(Column6)-spaces6) | |
203 E7<-sum(Column7 %in% "E")/(length(Column7)-spaces7) | |
204 E8<-sum(Column8 %in% "E")/(length(Column8)-spaces8) | |
205 E9<-sum(Column9 %in% "E")/(length(Column9)-spaces9) | |
206 E10<-sum(Column10 %in% "E")/(length(Column10)-spaces10) | |
207 E11<-sum(Column11 %in% "E")/(length(Column11)-spaces11) | |
208 E12<-sum(Column12 %in% "E")/(length(Column12)-spaces12) | |
209 E13<-sum(Column13 %in% "E")/(length(Column13)-spaces13) | |
210 E14<-sum(Column14 %in% "E")/(length(Column14)-spaces14) | |
211 E15<-sum(Column15 %in% "E")/(length(Column15)-spaces15) | |
212 EllEs<-cbind(E1,E2,E3,E4,E5,E6,E7,E8,E9,E10,E11,E12,E13,E14,E15) | |
213 | |
214 | |
215 F1<-sum(Column1 %in% "F")/(length(Column1)-spaces1) | |
216 F2<-sum(Column2 %in% "F")/(length(Column2)-spaces2) | |
217 F3<-sum(Column3 %in% "F")/(length(Column3)-spaces3) | |
218 F4<-sum(Column4 %in% "F")/(length(Column4)-spaces4) | |
219 F5<-sum(Column5 %in% "F")/(length(Column5)-spaces5) | |
220 F6<-sum(Column6 %in% "F")/(length(Column6)-spaces6) | |
221 F7<-sum(Column7 %in% "F")/(length(Column7)-spaces7) | |
222 F8<-sum(Column8 %in% "F")/(length(Column8)-spaces8) | |
223 F9<-sum(Column9 %in% "F")/(length(Column9)-spaces9) | |
224 F10<-sum(Column10 %in% "F")/(length(Column10)-spaces10) | |
225 F11<-sum(Column11 %in% "F")/(length(Column11)-spaces11) | |
226 F12<-sum(Column12 %in% "F")/(length(Column12)-spaces12) | |
227 F13<-sum(Column13 %in% "F")/(length(Column13)-spaces13) | |
228 F14<-sum(Column14 %in% "F")/(length(Column14)-spaces14) | |
229 F15<-sum(Column15 %in% "F")/(length(Column15)-spaces15) | |
230 FllFs<-cbind(F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15) | |
231 | |
232 | |
233 G1<-sum(Column1 %in% "G")/(length(Column1)-spaces1) | |
234 G2<-sum(Column2 %in% "G")/(length(Column2)-spaces2) | |
235 G3<-sum(Column3 %in% "G")/(length(Column3)-spaces3) | |
236 G4<-sum(Column4 %in% "G")/(length(Column4)-spaces4) | |
237 G5<-sum(Column5 %in% "G")/(length(Column5)-spaces5) | |
238 G6<-sum(Column6 %in% "G")/(length(Column6)-spaces6) | |
239 G7<-sum(Column7 %in% "G")/(length(Column7)-spaces7) | |
240 G8<-sum(Column8 %in% "G")/(length(Column8)-spaces8) | |
241 G9<-sum(Column9 %in% "G")/(length(Column9)-spaces9) | |
242 G10<-sum(Column10 %in% "G")/(length(Column10)-spaces10) | |
243 G11<-sum(Column11 %in% "G")/(length(Column11)-spaces11) | |
244 G12<-sum(Column12 %in% "G")/(length(Column12)-spaces12) | |
245 G13<-sum(Column13 %in% "G")/(length(Column13)-spaces13) | |
246 G14<-sum(Column14 %in% "G")/(length(Column14)-spaces14) | |
247 G15<-sum(Column15 %in% "G")/(length(Column15)-spaces15) | |
248 GllGs<-cbind(G1,G2,G3,G4,G5,G6,G7,G8,G9,G10,G11,G12,G13,G14,G15) | |
249 | |
250 | |
251 H1<-sum(Column1 %in% "H")/(length(Column1)-spaces1) | |
252 H2<-sum(Column2 %in% "H")/(length(Column2)-spaces2) | |
253 H3<-sum(Column3 %in% "H")/(length(Column3)-spaces3) | |
254 H4<-sum(Column4 %in% "H")/(length(Column4)-spaces4) | |
255 H5<-sum(Column5 %in% "H")/(length(Column5)-spaces5) | |
256 H6<-sum(Column6 %in% "H")/(length(Column6)-spaces6) | |
257 H7<-sum(Column7 %in% "H")/(length(Column7)-spaces7) | |
258 H8<-sum(Column8 %in% "H")/(length(Column8)-spaces8) | |
259 H9<-sum(Column9 %in% "H")/(length(Column9)-spaces9) | |
260 H10<-sum(Column10 %in% "H")/(length(Column10)-spaces10) | |
261 H11<-sum(Column11 %in% "H")/(length(Column11)-spaces11) | |
262 H12<-sum(Column12 %in% "H")/(length(Column12)-spaces12) | |
263 H13<-sum(Column13 %in% "H")/(length(Column13)-spaces13) | |
264 H14<-sum(Column14 %in% "H")/(length(Column14)-spaces14) | |
265 H15<-sum(Column15 %in% "H")/(length(Column15)-spaces15) | |
266 HllHs<-cbind(H1,H2,H3,H4,H5,H6,H7,H8,H9,H10,H11,H12,H13,H14,H15) | |
267 | |
268 | |
269 I1<-sum(Column1 %in% "I")/(length(Column1)-spaces1) | |
270 I2<-sum(Column2 %in% "I")/(length(Column2)-spaces2) | |
271 I3<-sum(Column3 %in% "I")/(length(Column3)-spaces3) | |
272 I4<-sum(Column4 %in% "I")/(length(Column4)-spaces4) | |
273 I5<-sum(Column5 %in% "I")/(length(Column5)-spaces5) | |
274 I6<-sum(Column6 %in% "I")/(length(Column6)-spaces6) | |
275 I7<-sum(Column7 %in% "I")/(length(Column7)-spaces7) | |
276 I8<-sum(Column8 %in% "I")/(length(Column8)-spaces8) | |
277 I9<-sum(Column9 %in% "I")/(length(Column9)-spaces9) | |
278 I10<-sum(Column10 %in% "I")/(length(Column10)-spaces10) | |
279 I11<-sum(Column11 %in% "I")/(length(Column11)-spaces11) | |
280 I12<-sum(Column12 %in% "I")/(length(Column12)-spaces12) | |
281 I13<-sum(Column13 %in% "I")/(length(Column13)-spaces13) | |
282 I14<-sum(Column14 %in% "I")/(length(Column14)-spaces14) | |
283 I15<-sum(Column15 %in% "I")/(length(Column15)-spaces15) | |
284 IllIs<-cbind(I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,I11,I12,I13,I14,I15) | |
285 | |
286 | |
287 K1<-sum(Column1 %in% "K")/(length(Column1)-spaces1) | |
288 K2<-sum(Column2 %in% "K")/(length(Column2)-spaces2) | |
289 K3<-sum(Column3 %in% "K")/(length(Column3)-spaces3) | |
290 K4<-sum(Column4 %in% "K")/(length(Column4)-spaces4) | |
291 K5<-sum(Column5 %in% "K")/(length(Column5)-spaces5) | |
292 K6<-sum(Column6 %in% "K")/(length(Column6)-spaces6) | |
293 K7<-sum(Column7 %in% "K")/(length(Column7)-spaces7) | |
294 K8<-sum(Column8 %in% "K")/(length(Column8)-spaces8) | |
295 K9<-sum(Column9 %in% "K")/(length(Column9)-spaces9) | |
296 K10<-sum(Column10 %in% "K")/(length(Column10)-spaces10) | |
297 K11<-sum(Column11 %in% "K")/(length(Column11)-spaces11) | |
298 K12<-sum(Column12 %in% "K")/(length(Column12)-spaces12) | |
299 K13<-sum(Column13 %in% "K")/(length(Column13)-spaces13) | |
300 K14<-sum(Column14 %in% "K")/(length(Column14)-spaces14) | |
301 K15<-sum(Column15 %in% "K")/(length(Column15)-spaces15) | |
302 KllKs<-cbind(K1,K2,K3,K4,K5,K6,K7,K8,K9,K10,K11,K12,K13,K14,K15) | |
303 | |
304 | |
305 L1<-sum(Column1 %in% "L")/(length(Column1)-spaces1) | |
306 L2<-sum(Column2 %in% "L")/(length(Column2)-spaces2) | |
307 L3<-sum(Column3 %in% "L")/(length(Column3)-spaces3) | |
308 L4<-sum(Column4 %in% "L")/(length(Column4)-spaces4) | |
309 L5<-sum(Column5 %in% "L")/(length(Column5)-spaces5) | |
310 L6<-sum(Column6 %in% "L")/(length(Column6)-spaces6) | |
311 L7<-sum(Column7 %in% "L")/(length(Column7)-spaces7) | |
312 L8<-sum(Column8 %in% "L")/(length(Column8)-spaces8) | |
313 L9<-sum(Column9 %in% "L")/(length(Column9)-spaces9) | |
314 L10<-sum(Column10 %in% "L")/(length(Column10)-spaces10) | |
315 L11<-sum(Column11 %in% "L")/(length(Column11)-spaces11) | |
316 L12<-sum(Column12 %in% "L")/(length(Column12)-spaces12) | |
317 L13<-sum(Column13 %in% "L")/(length(Column13)-spaces13) | |
318 L14<-sum(Column14 %in% "L")/(length(Column14)-spaces14) | |
319 L15<-sum(Column15 %in% "L")/(length(Column15)-spaces15) | |
320 LllLs<-cbind(L1,L2,L3,L4,L5,L6,L7,L8,L9,L10,L11,L12,L13,L14,L15) | |
321 | |
322 | |
323 M1<-sum(Column1 %in% "M")/(length(Column1)-spaces1) | |
324 M2<-sum(Column2 %in% "M")/(length(Column2)-spaces2) | |
325 M3<-sum(Column3 %in% "M")/(length(Column3)-spaces3) | |
326 M4<-sum(Column4 %in% "M")/(length(Column4)-spaces4) | |
327 M5<-sum(Column5 %in% "M")/(length(Column5)-spaces5) | |
328 M6<-sum(Column6 %in% "M")/(length(Column6)-spaces6) | |
329 M7<-sum(Column7 %in% "M")/(length(Column7)-spaces7) | |
330 M8<-sum(Column8 %in% "M")/(length(Column8)-spaces8) | |
331 M9<-sum(Column9 %in% "M")/(length(Column9)-spaces9) | |
332 M10<-sum(Column10 %in% "M")/(length(Column10)-spaces10) | |
333 M11<-sum(Column11 %in% "M")/(length(Column11)-spaces11) | |
334 M12<-sum(Column12 %in% "M")/(length(Column12)-spaces12) | |
335 M13<-sum(Column13 %in% "M")/(length(Column13)-spaces13) | |
336 M14<-sum(Column14 %in% "M")/(length(Column14)-spaces14) | |
337 M15<-sum(Column15 %in% "M")/(length(Column15)-spaces15) | |
338 MllMs<-cbind(M1,M2,M3,M4,M5,M6,M7,M8,M9,M10,M11,M12,M13,M14,M15) | |
339 | |
340 | |
341 N1<-sum(Column1 %in% "N")/(length(Column1)-spaces1) | |
342 N2<-sum(Column2 %in% "N")/(length(Column2)-spaces2) | |
343 N3<-sum(Column3 %in% "N")/(length(Column3)-spaces3) | |
344 N4<-sum(Column4 %in% "N")/(length(Column4)-spaces4) | |
345 N5<-sum(Column5 %in% "N")/(length(Column5)-spaces5) | |
346 N6<-sum(Column6 %in% "N")/(length(Column6)-spaces6) | |
347 N7<-sum(Column7 %in% "N")/(length(Column7)-spaces7) | |
348 N8<-sum(Column8 %in% "N")/(length(Column8)-spaces8) | |
349 N9<-sum(Column9 %in% "N")/(length(Column9)-spaces9) | |
350 N10<-sum(Column10 %in% "N")/(length(Column10)-spaces10) | |
351 N11<-sum(Column11 %in% "N")/(length(Column11)-spaces11) | |
352 N12<-sum(Column12 %in% "N")/(length(Column12)-spaces12) | |
353 N13<-sum(Column13 %in% "N")/(length(Column13)-spaces13) | |
354 N14<-sum(Column14 %in% "N")/(length(Column14)-spaces14) | |
355 N15<-sum(Column15 %in% "N")/(length(Column15)-spaces15) | |
356 NllNs<-cbind(N1,N2,N3,N4,N5,N6,N7,N8,N9,N10,N11,N12,N13,N14,N15) | |
357 | |
358 | |
359 P1<-sum(Column1 %in% "P")/(length(Column1)-spaces1) | |
360 P2<-sum(Column2 %in% "P")/(length(Column2)-spaces2) | |
361 P3<-sum(Column3 %in% "P")/(length(Column3)-spaces3) | |
362 P4<-sum(Column4 %in% "P")/(length(Column4)-spaces4) | |
363 P5<-sum(Column5 %in% "P")/(length(Column5)-spaces5) | |
364 P6<-sum(Column6 %in% "P")/(length(Column6)-spaces6) | |
365 P7<-sum(Column7 %in% "P")/(length(Column7)-spaces7) | |
366 P8<-sum(Column8 %in% "P")/(length(Column8)-spaces8) | |
367 P9<-sum(Column9 %in% "P")/(length(Column9)-spaces9) | |
368 P10<-sum(Column10 %in% "P")/(length(Column10)-spaces10) | |
369 P11<-sum(Column11 %in% "P")/(length(Column11)-spaces11) | |
370 P12<-sum(Column12 %in% "P")/(length(Column12)-spaces12) | |
371 P13<-sum(Column13 %in% "P")/(length(Column13)-spaces13) | |
372 P14<-sum(Column14 %in% "P")/(length(Column14)-spaces14) | |
373 P15<-sum(Column15 %in% "P")/(length(Column15)-spaces15) | |
374 PllPs<-cbind(P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11,P12,P13,P14,P15) | |
375 | |
376 | |
377 Q1<-sum(Column1 %in% "Q")/(length(Column1)-spaces1) | |
378 Q2<-sum(Column2 %in% "Q")/(length(Column2)-spaces2) | |
379 Q3<-sum(Column3 %in% "Q")/(length(Column3)-spaces3) | |
380 Q4<-sum(Column4 %in% "Q")/(length(Column4)-spaces4) | |
381 Q5<-sum(Column5 %in% "Q")/(length(Column5)-spaces5) | |
382 Q6<-sum(Column6 %in% "Q")/(length(Column6)-spaces6) | |
383 Q7<-sum(Column7 %in% "Q")/(length(Column7)-spaces7) | |
384 Q8<-sum(Column8 %in% "Q")/(length(Column8)-spaces8) | |
385 Q9<-sum(Column9 %in% "Q")/(length(Column9)-spaces9) | |
386 Q10<-sum(Column10 %in% "Q")/(length(Column10)-spaces10) | |
387 Q11<-sum(Column11 %in% "Q")/(length(Column11)-spaces11) | |
388 Q12<-sum(Column12 %in% "Q")/(length(Column12)-spaces12) | |
389 Q13<-sum(Column13 %in% "Q")/(length(Column13)-spaces13) | |
390 Q14<-sum(Column14 %in% "Q")/(length(Column14)-spaces14) | |
391 Q15<-sum(Column15 %in% "Q")/(length(Column15)-spaces15) | |
392 QllQs<-cbind(Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,Q11,Q12,Q13,Q14,Q15) | |
393 | |
394 | |
395 R1<-sum(Column1 %in% "R")/(length(Column1)-spaces1) | |
396 R2<-sum(Column2 %in% "R")/(length(Column2)-spaces2) | |
397 R3<-sum(Column3 %in% "R")/(length(Column3)-spaces3) | |
398 R4<-sum(Column4 %in% "R")/(length(Column4)-spaces4) | |
399 R5<-sum(Column5 %in% "R")/(length(Column5)-spaces5) | |
400 R6<-sum(Column6 %in% "R")/(length(Column6)-spaces6) | |
401 R7<-sum(Column7 %in% "R")/(length(Column7)-spaces7) | |
402 R8<-sum(Column8 %in% "R")/(length(Column8)-spaces8) | |
403 R9<-sum(Column9 %in% "R")/(length(Column9)-spaces9) | |
404 R10<-sum(Column10 %in% "R")/(length(Column10)-spaces10) | |
405 R11<-sum(Column11 %in% "R")/(length(Column11)-spaces11) | |
406 R12<-sum(Column12 %in% "R")/(length(Column12)-spaces12) | |
407 R13<-sum(Column13 %in% "R")/(length(Column13)-spaces13) | |
408 R14<-sum(Column14 %in% "R")/(length(Column14)-spaces14) | |
409 R15<-sum(Column15 %in% "R")/(length(Column15)-spaces15) | |
410 RllRs<-cbind(R1,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,R13,R14,R15) | |
411 | |
412 | |
413 S1<-sum(Column1 %in% "S")/(length(Column1)-spaces1) | |
414 S2<-sum(Column2 %in% "S")/(length(Column2)-spaces2) | |
415 S3<-sum(Column3 %in% "S")/(length(Column3)-spaces3) | |
416 S4<-sum(Column4 %in% "S")/(length(Column4)-spaces4) | |
417 S5<-sum(Column5 %in% "S")/(length(Column5)-spaces5) | |
418 S6<-sum(Column6 %in% "S")/(length(Column6)-spaces6) | |
419 S7<-sum(Column7 %in% "S")/(length(Column7)-spaces7) | |
420 S8<-sum(Column8 %in% "S")/(length(Column8)-spaces8) | |
421 S9<-sum(Column9 %in% "S")/(length(Column9)-spaces9) | |
422 S10<-sum(Column10 %in% "S")/(length(Column10)-spaces10) | |
423 S11<-sum(Column11 %in% "S")/(length(Column11)-spaces11) | |
424 S12<-sum(Column12 %in% "S")/(length(Column12)-spaces12) | |
425 S13<-sum(Column13 %in% "S")/(length(Column13)-spaces13) | |
426 S14<-sum(Column14 %in% "S")/(length(Column14)-spaces14) | |
427 S15<-sum(Column15 %in% "S")/(length(Column15)-spaces15) | |
428 SllSs<-cbind(S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,S14,S15) | |
429 | |
430 | |
431 T1<-sum(Column1 %in% "T")/(length(Column1)-spaces1) | |
432 T2<-sum(Column2 %in% "T")/(length(Column2)-spaces2) | |
433 T3<-sum(Column3 %in% "T")/(length(Column3)-spaces3) | |
434 T4<-sum(Column4 %in% "T")/(length(Column4)-spaces4) | |
435 T5<-sum(Column5 %in% "T")/(length(Column5)-spaces5) | |
436 T6<-sum(Column6 %in% "T")/(length(Column6)-spaces6) | |
437 T7<-sum(Column7 %in% "T")/(length(Column7)-spaces7) | |
438 T8<-sum(Column8 %in% "T")/(length(Column8)-spaces8) | |
439 T9<-sum(Column9 %in% "T")/(length(Column9)-spaces9) | |
440 T10<-sum(Column10 %in% "T")/(length(Column10)-spaces10) | |
441 T11<-sum(Column11 %in% "T")/(length(Column11)-spaces11) | |
442 T12<-sum(Column12 %in% "T")/(length(Column12)-spaces12) | |
443 T13<-sum(Column13 %in% "T")/(length(Column13)-spaces13) | |
444 T14<-sum(Column14 %in% "T")/(length(Column14)-spaces14) | |
445 T15<-sum(Column15 %in% "T")/(length(Column15)-spaces15) | |
446 TllTs<-cbind(T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15) | |
447 | |
448 | |
449 V1<-sum(Column1 %in% "V")/(length(Column1)-spaces1) | |
450 V2<-sum(Column2 %in% "V")/(length(Column2)-spaces2) | |
451 V3<-sum(Column3 %in% "V")/(length(Column3)-spaces3) | |
452 V4<-sum(Column4 %in% "V")/(length(Column4)-spaces4) | |
453 V5<-sum(Column5 %in% "V")/(length(Column5)-spaces5) | |
454 V6<-sum(Column6 %in% "V")/(length(Column6)-spaces6) | |
455 V7<-sum(Column7 %in% "V")/(length(Column7)-spaces7) | |
456 V8<-sum(Column8 %in% "V")/(length(Column8)-spaces8) | |
457 V9<-sum(Column9 %in% "V")/(length(Column9)-spaces9) | |
458 V10<-sum(Column10 %in% "V")/(length(Column10)-spaces10) | |
459 V11<-sum(Column11 %in% "V")/(length(Column11)-spaces11) | |
460 V12<-sum(Column12 %in% "V")/(length(Column12)-spaces12) | |
461 V13<-sum(Column13 %in% "V")/(length(Column13)-spaces13) | |
462 V14<-sum(Column14 %in% "V")/(length(Column14)-spaces14) | |
463 V15<-sum(Column15 %in% "V")/(length(Column15)-spaces15) | |
464 VllVs<-cbind(V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15) | |
465 | |
466 | |
467 W1<-sum(Column1 %in% "W")/(length(Column1)-spaces1) | |
468 W2<-sum(Column2 %in% "W")/(length(Column2)-spaces2) | |
469 W3<-sum(Column3 %in% "W")/(length(Column3)-spaces3) | |
470 W4<-sum(Column4 %in% "W")/(length(Column4)-spaces4) | |
471 W5<-sum(Column5 %in% "W")/(length(Column5)-spaces5) | |
472 W6<-sum(Column6 %in% "W")/(length(Column6)-spaces6) | |
473 W7<-sum(Column7 %in% "W")/(length(Column7)-spaces7) | |
474 W8<-sum(Column8 %in% "W")/(length(Column8)-spaces8) | |
475 W9<-sum(Column9 %in% "W")/(length(Column9)-spaces9) | |
476 W10<-sum(Column10 %in% "W")/(length(Column10)-spaces10) | |
477 W11<-sum(Column11 %in% "W")/(length(Column11)-spaces11) | |
478 W12<-sum(Column12 %in% "W")/(length(Column12)-spaces12) | |
479 W13<-sum(Column13 %in% "W")/(length(Column13)-spaces13) | |
480 W14<-sum(Column14 %in% "W")/(length(Column14)-spaces14) | |
481 W15<-sum(Column15 %in% "W")/(length(Column15)-spaces15) | |
482 WllWs<-cbind(W1,W2,W3,W4,W5,W6,W7,W8,W9,W10,W11,W12,W13,W14,W15) | |
483 | |
484 | |
485 Y1<-sum(Column1 %in% "Y")/(length(Column1)-spaces1) | |
486 Y2<-sum(Column2 %in% "Y")/(length(Column2)-spaces2) | |
487 Y3<-sum(Column3 %in% "Y")/(length(Column3)-spaces3) | |
488 Y4<-sum(Column4 %in% "Y")/(length(Column4)-spaces4) | |
489 Y5<-sum(Column5 %in% "Y")/(length(Column5)-spaces5) | |
490 Y6<-sum(Column6 %in% "Y")/(length(Column6)-spaces6) | |
491 Y7<-sum(Column7 %in% "Y")/(length(Column7)-spaces7) | |
492 Y8<-sum(Column8 %in% "Y")/(length(Column8)-spaces8) | |
493 Y9<-sum(Column9 %in% "Y")/(length(Column9)-spaces9) | |
494 Y10<-sum(Column10 %in% "Y")/(length(Column10)-spaces10) | |
495 Y11<-sum(Column11 %in% "Y")/(length(Column11)-spaces11) | |
496 Y12<-sum(Column12 %in% "Y")/(length(Column12)-spaces12) | |
497 Y13<-sum(Column13 %in% "Y")/(length(Column13)-spaces13) | |
498 Y14<-sum(Column14 %in% "Y")/(length(Column14)-spaces14) | |
499 Y15<-sum(Column15 %in% "Y")/(length(Column15)-spaces15) | |
500 YllYs<-cbind(Y1,Y2,Y3,Y4,Y5,Y6,Y7,Y8,Y9,Y10,Y11,Y12,Y13,Y14,Y15) | |
501 } | |
502 #this is substrate percents | |
503 | |
504 #A C D E F G H I K L N P Q R S T V W Y | |
505 | |
506 PercentTable<-rbind(AllAs,CllCs,DllDs,EllEs,FllFs,GllGs,HllHs,IllIs,KllKs,LllLs,MllMs,NllNs,PllPs,QllQs,RllRs,SllSs,TllTs,VllVs,WllWs,YllYs) | |
507 PercentTable<-PercentTable*100 | |
508 | |
509 #create the SD table | |
510 SDtable<-matrix(data = rep(1,times=(nrow(PercentTable)*ncol(PercentTable))),nrow = nrow(PercentTable),ncol = ncol(PercentTable)) | |
511 #for every row, a percertage minus the same mean over the same SD | |
512 if(1==1){ | |
513 SDtable[1,]<-(PercentTable[1,]-Amean)/Asd | |
514 SDtable[2,]<-(PercentTable[2,]-Cmean)/Csd | |
515 SDtable[3,]<-(PercentTable[3,]-Dmean)/Dsd | |
516 SDtable[4,]<-(PercentTable[4,]-Emean)/Esd | |
517 SDtable[5,]<-(PercentTable[5,]-Fmean)/Fsd | |
518 SDtable[6,]<-(PercentTable[6,]-Gmean)/Gsd | |
519 SDtable[7,]<-(PercentTable[7,]-Hmean)/Hsd | |
520 SDtable[8,]<-(PercentTable[8,]-Imean)/Isd | |
521 SDtable[9,]<-(PercentTable[9,]-Kmean)/Ksd | |
522 SDtable[10,]<-(PercentTable[10,]-Lmean)/Lsd | |
523 SDtable[11,]<-(PercentTable[11,]-Mmean)/Msd | |
524 SDtable[12,]<-(PercentTable[12,]-Nmean)/Nsd | |
525 SDtable[13,]<-(PercentTable[13,]-Pmean)/Psd | |
526 SDtable[14,]<-(PercentTable[14,]-Qmean)/Qsd | |
527 SDtable[15,]<-(PercentTable[15,]-Rmean)/Rsd | |
528 SDtable[16,]<-(PercentTable[16,]-Smean)/Ssd | |
529 SDtable[17,]<-(PercentTable[17,]-Tmean)/Tsd | |
530 SDtable[18,]<-(PercentTable[18,]-Vmean)/Vsd | |
531 SDtable[19,]<-(PercentTable[19,]-Wmean)/Wsd | |
532 SDtable[20,]<-(PercentTable[20,]-Ymean)/Ysd | |
533 } | |
534 | |
535 | |
536 SetOfAAs<-c("Letter","A","C","D","E","F","G","H","I","K","L","M","N","P","Q","R","S","T","V","W","Y") | |
537 | |
538 | |
539 SumOfSigmaAAs<-c(1:15) | |
540 | |
541 for (i in 1:15){ | |
542 SumOfSigmasValue<-0 | |
543 for (j in 1:20){ | |
544 value<-0 | |
545 if (SDtable[j,i]>2){ | |
546 value<-sum(substrates[,i]==SetOfAAs[j]) | |
547 } | |
548 SumOfSigmasValue<-SumOfSigmasValue+value | |
549 } | |
550 SumOfSigmaAAs[i]<-SumOfSigmasValue | |
551 } | |
552 | |
553 # AAs1<-length(substrates[,1])-sum(substrates[,1]=="") | |
554 # AAs2<-length(substrates[,2])-sum(substrates[,2]=="") | |
555 # AAs3<-length(substrates[,3])-sum(substrates[,3]=="") | |
556 # AAs4<-length(substrates[,4])-sum(substrates[,4]=="") | |
557 # AAs5<-length(substrates[,5])-sum(substrates[,5]=="") | |
558 # AAs6<-length(substrates[,6])-sum(substrates[,6]=="") | |
559 # AAs7<-length(substrates[,7])-sum(substrates[,7]=="") | |
560 # AAs8<-length(substrates[,8])-sum(substrates[,8]=="") | |
561 # AAs9<-length(substrates[,9])-sum(substrates[,9]=="") | |
562 # | |
563 # | |
564 # | |
565 # #AAsAtPositions<-c(AAs1,AAs2,AAs3,AAs4,AAs5,AAs6,AAs7,AAs8,AAs9) | |
566 # AAsAtPositions<-c(length(substrates[,1]),length(substrates[,2]),length(substrates[,3]),length(substrates[,4]), | |
567 # length(substrates[,5]),length(substrates[,6]),length(substrates[,7]),length(substrates[,8]), | |
568 # length(substrates[,9])) | |
569 | |
570 SumOfExpectedSigmaAAs<-c(1:15) | |
571 for (i in 1:15){ | |
572 ExpectedValue<-0 | |
573 for (j in 1:20){ | |
574 value<-0 | |
575 if (SDtable[j,i]>2){ | |
576 value<-AllMeans[j] | |
577 } | |
578 ExpectedValue<-ExpectedValue+value | |
579 } | |
580 SumOfExpectedSigmaAAs[i]<-ExpectedValue*(length(substrates[,i])-sum(substrates[,i]%in% ""))/100 | |
581 } | |
582 | |
583 SelectivityRow<-SumOfSigmaAAs/SumOfExpectedSigmaAAs | |
584 SelectivitySheet<-rbind(SumOfSigmaAAs,SumOfExpectedSigmaAAs,SelectivityRow) | |
585 | |
586 SetOfAAs<-matrix(data = SetOfAAs,ncol = 1) | |
587 | |
588 SDtableu<-SDtable | |
589 HeaderSD<-c(-7:7) | |
590 SDtable<-rbind(HeaderSD,SDtableu) | |
591 row.names(SDtable)<-NULL | |
592 SDtable<-data.frame(SetOfAAs,SDtable) | |
593 | |
594 PercentTable<-rbind(HeaderSD,PercentTable) | |
595 row.names(PercentTable)<-NULL | |
596 PercentTable<-data.frame(SetOfAAs,PercentTable) | |
597 numberofY<-as.numeric(SubstrateBackgroundFrequency$Number.of.Y) | |
598 numberofY<-numberofY[!is.na(numberofY)] | |
599 | |
600 numberofPY<-as.numeric(SubstrateBackgroundFrequency$Number.of.pY) | |
601 numberofPY<-numberofPY[!is.na(numberofPY)] | |
602 | |
603 NormalizationScore<-sum(numberofPY)/sum(numberofY) | |
604 | |
605 # positions<-matrix(data = NA, nrow=20,ncol = 15) | |
606 # | |
607 # #column1 | |
608 # | |
609 # for (q in 1:15) { | |
610 # sA<-sum(substrates[,i]=="A") | |
611 # positions[1,i]<-sA | |
612 # sC<-sum(substrates[,i]=="C") | |
613 # positions[2,i]<-sC | |
614 # sD<-sum(substrates[,i]=="D") | |
615 # positions[3,i]<-sD | |
616 # sE<-sum(substrates[,i]=="E") | |
617 # positions[4,i]<-sE | |
618 # sF<-sum(substrates[,i]=="F") | |
619 # sG<-sum(substrates[,i]=="G") | |
620 # sH<-sum(substrates[,i]=="H") | |
621 # sI<-sum(substrates[,i]=="I") | |
622 # sK<-sum(substrates[,i]=="K") | |
623 # sL<-sum(substrates[,i]=="L") | |
624 # sM<-sum(substrates[,i]=="M") | |
625 # sN<-sum(substrates[,i]=="N") | |
626 # sP<-sum(substrates[,i]=="P") | |
627 # sQ<-sum(substrates[,i]=="Q") | |
628 # sR<-sum(substrates[,i]=="R") | |
629 # sS<-sum(substrates[,i]=="S") | |
630 # sT<-sum(substrates[,i]=="T") | |
631 # sV<-sum(substrates[,i]=="V") | |
632 # sW<-sum(substrates[,i]=="W") | |
633 # sY<-sum(substrates[,i]=="Y") | |
634 # positions[5,i]<-sF | |
635 # positions[6,i]<-sG | |
636 # positions[7,i]<-sH | |
637 # positions[8,i]<-sI | |
638 # positions[9,i]<-sK | |
639 # positions[10,i]<-sL | |
640 # positions[11,i]<-sM | |
641 # positions[12,i]<-sN | |
642 # positions[13,i]<-sP | |
643 # positions[14,i]<-sQ | |
644 # positions[15,i]<-sR | |
645 # positions[16,i]<-sS | |
646 # positions[17,i]<-sT | |
647 # positions[18,i]<-sV | |
648 # positions[19,i]<-sW | |
649 # positions[20,i]<-sY | |
650 # } | |
651 | |
652 #here I create the positions table which is needed for the endogenous prob matrix and is simply "how many times did an AA show up in this spot? | |
653 if (6==6){ | |
654 Column1<-substrates[,1] | |
655 Column2<-substrates[,2] | |
656 Column3<-substrates[,3] | |
657 Column4<-substrates[,4] | |
658 Column5<-substrates[,5] | |
659 Column6<-substrates[,6] | |
660 Column7<-substrates[,7] | |
661 Column8<-substrates[,8] | |
662 Column9<-substrates[,9] | |
663 Column10<-substrates[,10] | |
664 Column11<-substrates[,11] | |
665 Column12<-substrates[,12] | |
666 Column13<-substrates[,13] | |
667 Column14<-substrates[,14] | |
668 Column15<-substrates[,15] | |
669 | |
670 spaces1<-sum((Column1%in% "")) | |
671 spaces2<-sum(Column2%in% "") | |
672 spaces3<-sum(Column3%in% "") | |
673 spaces4<-sum(Column4%in% "") | |
674 spaces5<-sum(Column5%in% "") | |
675 spaces6<-sum(Column6%in% "") | |
676 spaces7<-sum(Column7%in% "") | |
677 spaces8<-sum(Column8%in% "") | |
678 spaces9<-sum(Column9%in% "") | |
679 spaces10<-sum(Column10%in% "") | |
680 spaces11<-sum(Column11%in% "") | |
681 spaces12<-sum(Column12%in% "") | |
682 spaces13<-sum(Column13%in% "") | |
683 spaces14<-sum(Column14%in% "") | |
684 spaces15<-sum(Column15%in% "") | |
685 | |
686 A1<-sum(Column1 %in% "A") | |
687 A2<-sum(Column2 %in% "A") | |
688 A3<-sum(Column3 %in% "A") | |
689 A4<-sum(Column4 %in% "A") | |
690 A5<-sum(Column5 %in% "A") | |
691 A6<-sum(Column6 %in% "A") | |
692 A7<-sum(Column7 %in% "A") | |
693 A8<-sum(Column8 %in% "A") | |
694 A9<-sum(Column9 %in% "A") | |
695 A10<-sum(Column10 %in% "A") | |
696 A11<-sum(Column11 %in% "A") | |
697 A12<-sum(Column12 %in% "A") | |
698 A13<-sum(Column13 %in% "A") | |
699 A14<-sum(Column14 %in% "A") | |
700 A15<-sum(Column15 %in% "A") | |
701 AllAs<-cbind(A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15) | |
702 | |
703 C1<-sum(Column1 %in% "C") | |
704 C2<-sum(Column2 %in% "C") | |
705 C3<-sum(Column3 %in% "C") | |
706 C4<-sum(Column4 %in% "C") | |
707 C5<-sum(Column5 %in% "C") | |
708 C6<-sum(Column6 %in% "C") | |
709 C7<-sum(Column7 %in% "C") | |
710 C8<-sum(Column8 %in% "C") | |
711 C9<-sum(Column9 %in% "C") | |
712 C10<-sum(Column10 %in% "C") | |
713 C11<-sum(Column11 %in% "C") | |
714 C12<-sum(Column12 %in% "C") | |
715 C13<-sum(Column13 %in% "C") | |
716 C14<-sum(Column14 %in% "C") | |
717 C15<-sum(Column15 %in% "C") | |
718 CllCs<-cbind(C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15) | |
719 | |
720 D1<-sum(Column1 %in% "D") | |
721 D2<-sum(Column2 %in% "D") | |
722 D3<-sum(Column3 %in% "D") | |
723 D4<-sum(Column4 %in% "D") | |
724 D5<-sum(Column5 %in% "D") | |
725 D6<-sum(Column6 %in% "D") | |
726 D7<-sum(Column7 %in% "D") | |
727 D8<-sum(Column8 %in% "D") | |
728 D9<-sum(Column9 %in% "D") | |
729 D10<-sum(Column10 %in% "D") | |
730 D11<-sum(Column11 %in% "D") | |
731 D12<-sum(Column12 %in% "D") | |
732 D13<-sum(Column13 %in% "D") | |
733 D14<-sum(Column14 %in% "D") | |
734 D15<-sum(Column15 %in% "D") | |
735 DllDs<-cbind(D1,D2,D3,D4,D5,D6,D7,D8,D9,D10,D11,D12,D13,D14,D15) | |
736 | |
737 E1<-sum(Column1 %in% "E") | |
738 E2<-sum(Column2 %in% "E") | |
739 E3<-sum(Column3 %in% "E") | |
740 E4<-sum(Column4 %in% "E") | |
741 E5<-sum(Column5 %in% "E") | |
742 E6<-sum(Column6 %in% "E") | |
743 E7<-sum(Column7 %in% "E") | |
744 E8<-sum(Column8 %in% "E") | |
745 E9<-sum(Column9 %in% "E") | |
746 E10<-sum(Column10 %in% "E") | |
747 E11<-sum(Column11 %in% "E") | |
748 E12<-sum(Column12 %in% "E") | |
749 E13<-sum(Column13 %in% "E") | |
750 E14<-sum(Column14 %in% "E") | |
751 E15<-sum(Column15 %in% "E") | |
752 EllEs<-cbind(E1,E2,E3,E4,E5,E6,E7,E8,E9,E10,E11,E12,E13,E14,E15) | |
753 | |
754 F1<-sum(Column1 %in% "F") | |
755 F2<-sum(Column2 %in% "F") | |
756 F3<-sum(Column3 %in% "F") | |
757 F4<-sum(Column4 %in% "F") | |
758 F5<-sum(Column5 %in% "F") | |
759 F6<-sum(Column6 %in% "F") | |
760 F7<-sum(Column7 %in% "F") | |
761 F8<-sum(Column8 %in% "F") | |
762 F9<-sum(Column9 %in% "F") | |
763 F10<-sum(Column10 %in% "F") | |
764 F11<-sum(Column11 %in% "F") | |
765 F12<-sum(Column12 %in% "F") | |
766 F13<-sum(Column13 %in% "F") | |
767 F14<-sum(Column14 %in% "F") | |
768 F15<-sum(Column15 %in% "F") | |
769 FllFs<-cbind(F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15) | |
770 | |
771 G1<-sum(Column1 %in% "G") | |
772 G2<-sum(Column2 %in% "G") | |
773 G3<-sum(Column3 %in% "G") | |
774 G4<-sum(Column4 %in% "G") | |
775 G5<-sum(Column5 %in% "G") | |
776 G6<-sum(Column6 %in% "G") | |
777 G7<-sum(Column7 %in% "G") | |
778 G8<-sum(Column8 %in% "G") | |
779 G9<-sum(Column9 %in% "G") | |
780 G10<-sum(Column10 %in% "G") | |
781 G11<-sum(Column11 %in% "G") | |
782 G12<-sum(Column12 %in% "G") | |
783 G13<-sum(Column13 %in% "G") | |
784 G14<-sum(Column14 %in% "G") | |
785 G15<-sum(Column15 %in% "G") | |
786 GllGs<-cbind(G1,G2,G3,G4,G5,G6,G7,G8,G9,G10,G11,G12,G13,G14,G15) | |
787 | |
788 H1<-sum(Column1 %in% "H") | |
789 H2<-sum(Column2 %in% "H") | |
790 H3<-sum(Column3 %in% "H") | |
791 H4<-sum(Column4 %in% "H") | |
792 H5<-sum(Column5 %in% "H") | |
793 H6<-sum(Column6 %in% "H") | |
794 H7<-sum(Column7 %in% "H") | |
795 H8<-sum(Column8 %in% "H") | |
796 H9<-sum(Column9 %in% "H") | |
797 H10<-sum(Column10 %in% "H") | |
798 H11<-sum(Column11 %in% "H") | |
799 H12<-sum(Column12 %in% "H") | |
800 H13<-sum(Column13 %in% "H") | |
801 H14<-sum(Column14 %in% "H") | |
802 H15<-sum(Column15 %in% "H") | |
803 HllHs<-cbind(H1,H2,H3,H4,H5,H6,H7,H8,H9,H10,H11,H12,H13,H14,H15) | |
804 | |
805 I1<-sum(Column1 %in% "I") | |
806 I2<-sum(Column2 %in% "I") | |
807 I3<-sum(Column3 %in% "I") | |
808 I4<-sum(Column4 %in% "I") | |
809 I5<-sum(Column5 %in% "I") | |
810 I6<-sum(Column6 %in% "I") | |
811 I7<-sum(Column7 %in% "I") | |
812 I8<-sum(Column8 %in% "I") | |
813 I9<-sum(Column9 %in% "I") | |
814 I10<-sum(Column10 %in% "I") | |
815 I11<-sum(Column11 %in% "I") | |
816 I12<-sum(Column12 %in% "I") | |
817 I13<-sum(Column13 %in% "I") | |
818 I14<-sum(Column14 %in% "I") | |
819 I15<-sum(Column15 %in% "I") | |
820 IllIs<-cbind(I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,I11,I12,I13,I14,I15) | |
821 | |
822 K1<-sum(Column1 %in% "K") | |
823 K2<-sum(Column2 %in% "K") | |
824 K3<-sum(Column3 %in% "K") | |
825 K4<-sum(Column4 %in% "K") | |
826 K5<-sum(Column5 %in% "K") | |
827 K6<-sum(Column6 %in% "K") | |
828 K7<-sum(Column7 %in% "K") | |
829 K8<-sum(Column8 %in% "K") | |
830 K9<-sum(Column9 %in% "K") | |
831 K10<-sum(Column10 %in% "K") | |
832 K11<-sum(Column11 %in% "K") | |
833 K12<-sum(Column12 %in% "K") | |
834 K13<-sum(Column13 %in% "K") | |
835 K14<-sum(Column14 %in% "K") | |
836 K15<-sum(Column15 %in% "K") | |
837 KllKs<-cbind(K1,K2,K3,K4,K5,K6,K7,K8,K9,K10,K11,K12,K13,K14,K15) | |
838 | |
839 L1<-sum(Column1 %in% "L") | |
840 L2<-sum(Column2 %in% "L") | |
841 L3<-sum(Column3 %in% "L") | |
842 L4<-sum(Column4 %in% "L") | |
843 L5<-sum(Column5 %in% "L") | |
844 L6<-sum(Column6 %in% "L") | |
845 L7<-sum(Column7 %in% "L") | |
846 L8<-sum(Column8 %in% "L") | |
847 L9<-sum(Column9 %in% "L") | |
848 L10<-sum(Column10 %in% "L") | |
849 L11<-sum(Column11 %in% "L") | |
850 L12<-sum(Column12 %in% "L") | |
851 L13<-sum(Column13 %in% "L") | |
852 L14<-sum(Column14 %in% "L") | |
853 L15<-sum(Column15 %in% "L") | |
854 LllLs<-cbind(L1,L2,L3,L4,L5,L6,L7,L8,L9,L10,L11,L12,L13,L14,L15) | |
855 | |
856 M1<-sum(Column1 %in% "M") | |
857 M2<-sum(Column2 %in% "M") | |
858 M3<-sum(Column3 %in% "M") | |
859 M4<-sum(Column4 %in% "M") | |
860 M5<-sum(Column5 %in% "M") | |
861 M6<-sum(Column6 %in% "M") | |
862 M7<-sum(Column7 %in% "M") | |
863 M8<-sum(Column8 %in% "M") | |
864 M9<-sum(Column9 %in% "M") | |
865 M10<-sum(Column10 %in% "M") | |
866 M11<-sum(Column11 %in% "M") | |
867 M12<-sum(Column12 %in% "M") | |
868 M13<-sum(Column13 %in% "M") | |
869 M14<-sum(Column14 %in% "M") | |
870 M15<-sum(Column15 %in% "M") | |
871 MllMs<-cbind(M1,M2,M3,M4,M5,M6,M7,M8,M9,M10,M11,M12,M13,M14,M15) | |
872 | |
873 N1<-sum(Column1 %in% "N") | |
874 N2<-sum(Column2 %in% "N") | |
875 N3<-sum(Column3 %in% "N") | |
876 N4<-sum(Column4 %in% "N") | |
877 N5<-sum(Column5 %in% "N") | |
878 N6<-sum(Column6 %in% "N") | |
879 N7<-sum(Column7 %in% "N") | |
880 N8<-sum(Column8 %in% "N") | |
881 N9<-sum(Column9 %in% "N") | |
882 N10<-sum(Column10 %in% "N") | |
883 N11<-sum(Column11 %in% "N") | |
884 N12<-sum(Column12 %in% "N") | |
885 N13<-sum(Column13 %in% "N") | |
886 N14<-sum(Column14 %in% "N") | |
887 N15<-sum(Column15 %in% "N") | |
888 NllNs<-cbind(N1,N2,N3,N4,N5,N6,N7,N8,N9,N10,N11,N12,N13,N14,N15) | |
889 | |
890 P1<-sum(Column1 %in% "P") | |
891 P2<-sum(Column2 %in% "P") | |
892 P3<-sum(Column3 %in% "P") | |
893 P4<-sum(Column4 %in% "P") | |
894 P5<-sum(Column5 %in% "P") | |
895 P6<-sum(Column6 %in% "P") | |
896 P7<-sum(Column7 %in% "P") | |
897 P8<-sum(Column8 %in% "P") | |
898 P9<-sum(Column9 %in% "P") | |
899 P10<-sum(Column10 %in% "P") | |
900 P11<-sum(Column11 %in% "P") | |
901 P12<-sum(Column12 %in% "P") | |
902 P13<-sum(Column13 %in% "P") | |
903 P14<-sum(Column14 %in% "P") | |
904 P15<-sum(Column15 %in% "P") | |
905 PllPs<-cbind(P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11,P12,P13,P14,P15) | |
906 | |
907 Q1<-sum(Column1 %in% "Q") | |
908 Q2<-sum(Column2 %in% "Q") | |
909 Q3<-sum(Column3 %in% "Q") | |
910 Q4<-sum(Column4 %in% "Q") | |
911 Q5<-sum(Column5 %in% "Q") | |
912 Q6<-sum(Column6 %in% "Q") | |
913 Q7<-sum(Column7 %in% "Q") | |
914 Q8<-sum(Column8 %in% "Q") | |
915 Q9<-sum(Column9 %in% "Q") | |
916 Q10<-sum(Column10 %in% "Q") | |
917 Q11<-sum(Column11 %in% "Q") | |
918 Q12<-sum(Column12 %in% "Q") | |
919 Q13<-sum(Column13 %in% "Q") | |
920 Q14<-sum(Column14 %in% "Q") | |
921 Q15<-sum(Column15 %in% "Q") | |
922 QllQs<-cbind(Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,Q11,Q12,Q13,Q14,Q15) | |
923 | |
924 R1<-sum(Column1 %in% "R") | |
925 R2<-sum(Column2 %in% "R") | |
926 R3<-sum(Column3 %in% "R") | |
927 R4<-sum(Column4 %in% "R") | |
928 R5<-sum(Column5 %in% "R") | |
929 R6<-sum(Column6 %in% "R") | |
930 R7<-sum(Column7 %in% "R") | |
931 R8<-sum(Column8 %in% "R") | |
932 R9<-sum(Column9 %in% "R") | |
933 R10<-sum(Column10 %in% "R") | |
934 R11<-sum(Column11 %in% "R") | |
935 R12<-sum(Column12 %in% "R") | |
936 R13<-sum(Column13 %in% "R") | |
937 R14<-sum(Column14 %in% "R") | |
938 R15<-sum(Column15 %in% "R") | |
939 RllRs<-cbind(R1,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,R13,R14,R15) | |
940 | |
941 S1<-sum(Column1 %in% "S") | |
942 S2<-sum(Column2 %in% "S") | |
943 S3<-sum(Column3 %in% "S") | |
944 S4<-sum(Column4 %in% "S") | |
945 S5<-sum(Column5 %in% "S") | |
946 S6<-sum(Column6 %in% "S") | |
947 S7<-sum(Column7 %in% "S") | |
948 S8<-sum(Column8 %in% "S") | |
949 S9<-sum(Column9 %in% "S") | |
950 S10<-sum(Column10 %in% "S") | |
951 S11<-sum(Column11 %in% "S") | |
952 S12<-sum(Column12 %in% "S") | |
953 S13<-sum(Column13 %in% "S") | |
954 S14<-sum(Column14 %in% "S") | |
955 S15<-sum(Column15 %in% "S") | |
956 SllSs<-cbind(S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,S14,S15) | |
957 | |
958 T1<-sum(Column1 %in% "T") | |
959 T2<-sum(Column2 %in% "T") | |
960 T3<-sum(Column3 %in% "T") | |
961 T4<-sum(Column4 %in% "T") | |
962 T5<-sum(Column5 %in% "T") | |
963 T6<-sum(Column6 %in% "T") | |
964 T7<-sum(Column7 %in% "T") | |
965 T8<-sum(Column8 %in% "T") | |
966 T9<-sum(Column9 %in% "T") | |
967 T10<-sum(Column10 %in% "T") | |
968 T11<-sum(Column11 %in% "T") | |
969 T12<-sum(Column12 %in% "T") | |
970 T13<-sum(Column13 %in% "T") | |
971 T14<-sum(Column14 %in% "T") | |
972 T15<-sum(Column15 %in% "T") | |
973 TllTs<-cbind(T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15) | |
974 | |
975 V1<-sum(Column1 %in% "V") | |
976 V2<-sum(Column2 %in% "V") | |
977 V3<-sum(Column3 %in% "V") | |
978 V4<-sum(Column4 %in% "V") | |
979 V5<-sum(Column5 %in% "V") | |
980 V6<-sum(Column6 %in% "V") | |
981 V7<-sum(Column7 %in% "V") | |
982 V8<-sum(Column8 %in% "V") | |
983 V9<-sum(Column9 %in% "V") | |
984 V10<-sum(Column10 %in% "V") | |
985 V11<-sum(Column11 %in% "V") | |
986 V12<-sum(Column12 %in% "V") | |
987 V13<-sum(Column13 %in% "V") | |
988 V14<-sum(Column14 %in% "V") | |
989 V15<-sum(Column15 %in% "V") | |
990 VllVs<-cbind(V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15) | |
991 | |
992 W1<-sum(Column1 %in% "W") | |
993 W2<-sum(Column2 %in% "W") | |
994 W3<-sum(Column3 %in% "W") | |
995 W4<-sum(Column4 %in% "W") | |
996 W5<-sum(Column5 %in% "W") | |
997 W6<-sum(Column6 %in% "W") | |
998 W7<-sum(Column7 %in% "W") | |
999 W8<-sum(Column8 %in% "W") | |
1000 W9<-sum(Column9 %in% "W") | |
1001 W10<-sum(Column10 %in% "W") | |
1002 W11<-sum(Column11 %in% "W") | |
1003 W12<-sum(Column12 %in% "W") | |
1004 W13<-sum(Column13 %in% "W") | |
1005 W14<-sum(Column14 %in% "W") | |
1006 W15<-sum(Column15 %in% "W") | |
1007 WllWs<-cbind(W1,W2,W3,W4,W5,W6,W7,W8,W9,W10,W11,W12,W13,W14,W15) | |
1008 | |
1009 Y1<-sum(Column1 %in% "Y") | |
1010 Y2<-sum(Column2 %in% "Y") | |
1011 Y3<-sum(Column3 %in% "Y") | |
1012 Y4<-sum(Column4 %in% "Y") | |
1013 Y5<-sum(Column5 %in% "Y") | |
1014 Y6<-sum(Column6 %in% "Y") | |
1015 Y7<-sum(Column7 %in% "Y") | |
1016 Y8<-sum(Column8 %in% "Y") | |
1017 Y9<-sum(Column9 %in% "Y") | |
1018 Y10<-sum(Column10 %in% "Y") | |
1019 Y11<-sum(Column11 %in% "Y") | |
1020 Y12<-sum(Column12 %in% "Y") | |
1021 Y13<-sum(Column13 %in% "Y") | |
1022 Y14<-sum(Column14 %in% "Y") | |
1023 Y15<-sum(Column15 %in% "Y") | |
1024 YllYs<-cbind(Y1,Y2,Y3,Y4,Y5,Y6,Y7,Y8,Y9,Y10,Y11,Y12,Y13,Y14,Y15) | |
1025 PositionTable<-rbind(AllAs,CllCs,DllDs,EllEs,FllFs,GllGs,HllHs,IllIs,KllKs,LllLs,MllMs,NllNs,PllPs,QllQs,RllRs,SllSs,TllTs,VllVs,WllWs,YllYs) | |
1026 } | |
1027 #endogenous prob matrix is AA position over subbackfreqmean | |
1028 dim(PositionTable) | |
1029 EPMtable<-PositionTable | |
1030 # EPMtable[1,]<-(PositionTable[1,]/(PositionTable[1,]*.01*Amean)) | |
1031 # EPMtable[2,]<-(PositionTable[2,]/(PositionTable[2,]*.01*Cmean)) | |
1032 # EPMtable[3,]<-(PositionTable[3,]/(PositionTable[3,]*.01*Dmean)) | |
1033 # EPMtable[4,]<-(PositionTable[4,]/(PositionTable[4,]*.01*Emean)) | |
1034 # EPMtable[5,]<-(PositionTable[5,]/(PositionTable[5,]*.01*Fmean)) | |
1035 # EPMtable[6,]<-(PositionTable[6,]/(PositionTable[6,]*.01*Gmean)) | |
1036 # EPMtable[7,]<-(PositionTable[7,]/(PositionTable[7,]*.01*Hmean)) | |
1037 # EPMtable[8,]<-(PositionTable[8,]/(PositionTable[8,]*.01*Imean)) | |
1038 # EPMtable[9,]<-(PositionTable[9,]/(PositionTable[9,]*.01*Kmean)) | |
1039 # EPMtable[10,]<-(PositionTable[10,]/(PositionTable[10,]*.01*Lmean)) | |
1040 # EPMtable[11,]<-(PositionTable[11,]/(PositionTable[11,]*.01*Mmean)) | |
1041 # EPMtable[12,]<-(PositionTable[12,]/(PositionTable[12,]*.01*Nmean)) | |
1042 # EPMtable[13,]<-(PositionTable[13,]/(PositionTable[13,]*.01*Pmean)) | |
1043 # EPMtable[14,]<-(PositionTable[14,]/(PositionTable[14,]*.01*Qmean)) | |
1044 # EPMtable[15,]<-(PositionTable[15,]/(PositionTable[15,]*.01*Rmean)) | |
1045 # EPMtable[16,]<-(PositionTable[16,]/(PositionTable[16,]*.01*Smean)) | |
1046 # EPMtable[17,]<-(PositionTable[17,]/(PositionTable[17,]*.01*Tmean)) | |
1047 # EPMtable[18,]<-(PositionTable[18,]/(PositionTable[18,]*.01*Vmean)) | |
1048 # EPMtable[19,]<-(PositionTable[19,]/(PositionTable[19,]*.01*Wmean)) | |
1049 # EPMtable[20,]<-(PositionTable[20,]/(PositionTable[20,]*.01*Ymean)) | |
1050 | |
1051 columns<-c(length(Column1)-sum(Column1==""), | |
1052 length(Column2)-sum(Column2==""), | |
1053 length(Column3)-sum(Column3==""), | |
1054 length(Column4)-sum(Column4==""), | |
1055 length(Column5)-sum(Column5==""), | |
1056 length(Column6)-sum(Column6==""), | |
1057 length(Column7)-sum(Column7==""), | |
1058 length(Column8)-sum(Column8==""), | |
1059 length(Column9)-sum(Column9==""), | |
1060 length(Column10)-sum(Column10==""), | |
1061 length(Column11)-sum(Column11==""), | |
1062 length(Column12)-sum(Column12==""), | |
1063 length(Column13)-sum(Column13==""), | |
1064 length(Column14)-sum(Column14==""), | |
1065 length(Column15)-sum(Column15=="")) | |
1066 | |
1067 for (z in 1:15) { | |
1068 for (y in 1:20) { | |
1069 if (PositionTable[y,z]>0){ | |
1070 EPMtable[y,z]<-PositionTable[y,z]/((columns[z]*.01*AllMeans[y])) | |
1071 } | |
1072 if (PositionTable[y,z]==0){ | |
1073 EPMtable[y,z]<-(1/columns[z])/((columns[z]*.01*AllMeans[y])) | |
1074 } | |
1075 } | |
1076 } | |
1077 #here I created the endogenous probability matrix | |
1078 #now all I need to do is make the program automatically determine which SDs are >2, and then make it perform screener and sorter on those SDs | |
1079 | |
1080 | |
1081 | |
1082 | |
1083 | |
1084 # write.xlsx(SDtable,file=FILENAME, sheetName = "Standard Deviation Table",col.names = FALSE,row.names = FALSE,append = TRUE) | |
1085 # write.xlsx(PercentTable,file = FILENAME,sheetName = "Percent Table",col.names = FALSE,row.names = FALSE,append = TRUE) | |
1086 # write.xlsx(SelectivitySheet,file = FILENAME,sheetName = "Site Selectivity",col.names = FALSE,row.names = FALSE,append = TRUE) | |
1087 # write.xlsx(EPMtable,file=FILENAME,sheetName = "Endogenous Probability Matrix",col.names = FALSE,row.names = FALSE,append = TRUE) | |
1088 # write.xlsx(NormalizationScore,file = FILENAME,sheetName = "Normalization Score",col.names = FALSE,row.names = FALSE,append = TRUE) | |
1089 | |
1090 NormalizationScore<-c("Normalization Score",NormalizationScore) | |
1091 | |
1092 write.table(x=c("SD Table"),file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE) | |
1093 write.table(SDtable,file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE) | |
1094 write.table(x=c("Percent Table"),file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE) | |
1095 write.table(PercentTable,file=FILENAME, append = TRUE,sep=",",row.names = FALSE, col.names = FALSE) | |
1096 | |
1097 EPMtableu<-EPMtable | |
1098 HeaderSD<-c(-7:7) | |
1099 EPMtableu<-rbind(HeaderSD,EPMtableu) | |
1100 row.names(EPMtableu)<-NULL | |
1101 EPMtableu<-data.frame(SetOfAAs,EPMtableu) | |
1102 | |
1103 write.table("Site Selectivity Matrix", file = FILENAME2, append = TRUE, sep = ",", row.names = FALSE, col.names = FALSE) | |
1104 SelectivityHeader=matrix(data = c("Position",-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7),nrow = 1) | |
1105 head<-matrix(data=rep(" ",times=16),nrow = 1) | |
1106 SelectivityHeader<-rbind(head,SelectivityHeader) | |
1107 | |
1108 write.table(SelectivityHeader, file = FILENAME2, append = TRUE, sep = ",", row.names = FALSE, col.names = FALSE) | |
1109 #colnames(SelectivitySheet)<-c("-7","-6","-5","-4","-3","-2","-1","0","1","2","3","4","5","6","7") | |
1110 write.table(SelectivitySheet,file = FILENAME2, append = TRUE,sep = ",",row.names = TRUE, col.names = FALSE) | |
1111 write.table(x=c("Endogenous Probability Matrix"),file=FILENAME2,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE) | |
1112 write.table(EPMtableu,file = FILENAME2, append = TRUE,sep = ",",row.names = FALSE, col.names = FALSE) | |
1113 write.table(NormalizationScore, file = FILENAME2, append = TRUE,sep = ",",row.names = FALSE, col.names = FALSE) | |
1114 | |
1115 | |
1116 | |
1117 | |
1118 | |
1119 | |
1120 | |
1121 | |
1122 | |
1123 | |
1124 | |
1125 | |
1126 | |
1127 | |
1128 | |
1129 | |
1130 | |
1131 | |
1132 | |
1133 | |
1134 | |
1135 | |
1136 | |
1137 | |
1138 | |
1139 | |
1140 | |
1141 | |
1142 | |
1143 | |
1144 | |
1145 | |
1146 | |
1147 | |
1148 #test myself: this script should take in amino acids for each of the 9 positions and give out every single combination of those AAs | |
1149 | |
1150 #need to do following: fix it so that the accession numbers stay with the substrates, | |
1151 #also the neg false constant is totaly unphos'd Ys found by FASTA-2-CSV system# uniprot | |
1152 | |
1153 #HOW MANY: IF THERE'S two aas in each position you get 2^9, so I assume the numbers are: | |
1154 #(number in position-4)*(number in position -3)*(number in position -2)...=total | |
1155 # require(rJava) | |
1156 # require(xlsxjars) | |
1157 # require(xlsx) | |
1158 # # require(readxl) | |
1159 | |
1160 #View(SDtable) | |
1161 bareSDs<-SDtable[2:21,2:16] | |
1162 goodones<-bareSDs>2 | |
1163 | |
1164 Positionm7<-which(goodones[,1] %in% TRUE) | |
1165 if (length(Positionm7)<1){Positionm7<-which(bareSDs[,1]==max(bareSDs[,1]))} | |
1166 Positionm6<-which(goodones[,2] %in% TRUE) | |
1167 if (length(Positionm6)<1){Positionm6<-which(bareSDs[,2]==max(bareSDs[,2]))} | |
1168 Positionm5<-which(goodones[,3] %in% TRUE) | |
1169 if (length(Positionm5)<1){Positionm5<-which(bareSDs[,3]==max(bareSDs[,3]))} | |
1170 Positionm4<-which(goodones[,4] %in% TRUE) | |
1171 if (length(Positionm4)<1){Positionm4<-which(bareSDs[,4]==max(bareSDs[,4]))} | |
1172 Positionm3<-which(goodones[,5] %in% TRUE) | |
1173 if (length(Positionm3)<1){Positionm3<-which(bareSDs[,5]==max(bareSDs[,5]))} | |
1174 Positionm2<-which(goodones[,6] %in% TRUE) | |
1175 if (length(Positionm2)<1){Positionm2<-which(bareSDs[,6]==max(bareSDs[,6]))} | |
1176 Positionm1<-which(goodones[,7] %in% TRUE) | |
1177 if (length(Positionm1)<1){Positionm1<-which(bareSDs[,7]==max(bareSDs[,7]))} | |
1178 | |
1179 Positiond0<-which(goodones[,8] %in% TRUE) | |
1180 if (length(Positiond0)<1){Positiond0<-which(bareSDs[,8]==max(bareSDs[,8]))} | |
1181 | |
1182 Positionp1<-which(goodones[,9] %in% TRUE) | |
1183 if (length(Positionp1)<1){Positionp1<-which(bareSDs[,9]==max(bareSDs[,9]))} | |
1184 Positionp2<-which(goodones[,10] %in% TRUE) | |
1185 if (length(Positionp2)<1){Positionp2<-which(bareSDs[,10]==max(bareSDs[,10]))} | |
1186 Positionp3<-which(goodones[,11] %in% TRUE) | |
1187 if (length(Positionp3)<1){Positionp3<-which(bareSDs[,11]==max(bareSDs[,11]))} | |
1188 Positionp4<-which(goodones[,12] %in% TRUE) | |
1189 if (length(Positionp4)<1){Positionp4<-which(bareSDs[,12]==max(bareSDs[,12]))} | |
1190 Positionp5<-which(goodones[,13] %in% TRUE) | |
1191 if (length(Positionp5)<1){Positionp5<-which(bareSDs[,13]==max(bareSDs[,13]))} | |
1192 Positionp6<-which(goodones[,14] %in% TRUE) | |
1193 if (length(Positionp6)<1){Positionp6<-which(bareSDs[,14]==max(bareSDs[,14]))} | |
1194 Positionp7<-which(goodones[,15] %in% TRUE) | |
1195 if (length(Positionp7)<1){Positionp7<-which(bareSDs[,15]==max(bareSDs[,15]))} | |
1196 | |
1197 aa_props2 <- c("1"="A", "2"="C", "3"="D", "4"="E", "5"="F", "6"="G", "7"="H", "8"="I", "9"="K", "10"="L", "11"="M", "12"="N", | |
1198 "13"="P", "14"="Q", "15"="R", "16"="S", "17"="T", "18"="V", "19"="W", "20"="Y") | |
1199 | |
1200 Positionm7<-sapply(Positionm7, function (x) aa_props2[x]) | |
1201 Positionm6<-sapply(Positionm6, function (x) aa_props2[x]) | |
1202 Positionm5<-sapply(Positionm5, function (x) aa_props2[x]) | |
1203 Positionm4<-sapply(Positionm4, function (x) aa_props2[x]) | |
1204 Positionm3<-sapply(Positionm3, function (x) aa_props2[x]) | |
1205 Positionm2<-sapply(Positionm2, function (x) aa_props2[x]) | |
1206 Positionm1<-sapply(Positionm1, function (x) aa_props2[x]) | |
1207 Positiond0<-sapply(Positiond0, function (x) aa_props2[x]) | |
1208 Positionp1<-sapply(Positionp1, function (x) aa_props2[x]) | |
1209 Positionp2<-sapply(Positionp2, function (x) aa_props2[x]) | |
1210 Positionp3<-sapply(Positionp3, function (x) aa_props2[x]) | |
1211 Positionp4<-sapply(Positionp4, function (x) aa_props2[x]) | |
1212 Positionp5<-sapply(Positionp5, function (x) aa_props2[x]) | |
1213 Positionp6<-sapply(Positionp6, function (x) aa_props2[x]) | |
1214 Positionp7<-sapply(Positionp7, function (x) aa_props2[x]) | |
1215 | |
1216 | |
1217 # Positionm7<-c("D","H","N","V") | |
1218 # Positionm6<-c("E","V") | |
1219 # Positionm5<-c("D","H") | |
1220 # Positionm4<-c("D","N") | |
1221 # Positionm3<-c("D","E","F","Q") | |
1222 # Positionm2<-c("D","N","Q","S") | |
1223 # Positionm1<-c("F","I","L") | |
1224 # Positiond0<-c("Y") | |
1225 # Positionp1<-c("A","E") | |
1226 # Positionp2<-c("T","S","Q","E") | |
1227 # Positionp3<-c("V") | |
1228 # Positionp4<-c("K") | |
1229 # Positionp5<-c("K") | |
1230 # Positionp6<-c("K") | |
1231 # Positionp7<-c("R") | |
1232 #this is where the amino acids for each position are given. m means minus, p mean plus | |
1233 ######################################## | |
1234 # ScreenerFilename<-"C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls" | |
1235 | |
1236 | |
1237 | |
1238 | |
1239 | |
1240 | |
1241 | |
1242 | |
1243 | |
1244 | |
1245 | |
1246 screaner<-read.csv(ScreenerFilename, header = FALSE, stringsAsFactors = FALSE) | |
1247 | |
1248 | |
1249 Abl<-screaner[2:25,] | |
1250 Arg<-screaner[27:50,] | |
1251 Btk<-screaner[52:75,] | |
1252 Csk<-screaner[77:100,] | |
1253 Fyn<-screaner[102:125,] | |
1254 Hck<-screaner[127:150,] | |
1255 JAK2<-screaner[152:175,] | |
1256 Lck<-screaner[177:200,] | |
1257 Lyn<-screaner[202:225,] | |
1258 Pyk2<-screaner[227:250,] | |
1259 Src<-screaner[252:275,] | |
1260 Syk<-screaner[277:300,] | |
1261 Yes<-screaner[302:325,] | |
1262 | |
1263 #two questions: why are we doing BTK when we already have a bioninformatics page about it? | |
1264 #two I reran everything and only get 96 positions of interest in the SD table | |
1265 | |
1266 | |
1267 | |
1268 | |
1269 | |
1270 | |
1271 | |
1272 | |
1273 | |
1274 #Do_You_want_An_Excel_Output_Questionmark<-"NO" | |
1275 GeneratedPeptidesFile<-"GeneratedPeptidesFile.csv" | |
1276 | |
1277 | |
1278 # Abl<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 4) | |
1279 # Arg<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 5) | |
1280 # Btk<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 6) | |
1281 # Csk<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 7) | |
1282 # Fyn<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 8) | |
1283 # Hck<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 9) | |
1284 # JAK2<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 10) | |
1285 # Lck<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 11) | |
1286 # Lyn<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 12) | |
1287 # Pyk2<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 13) | |
1288 # Src<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 14) | |
1289 # Syk<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 15) | |
1290 # Yes<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 16) | |
1291 # | |
1292 | |
1293 | |
1294 "A"=1 | |
1295 "C"=2 | |
1296 "D"=3 | |
1297 "E"=4 | |
1298 "F"=5 | |
1299 "G"=6 | |
1300 "H"=7 | |
1301 "I"=8 | |
1302 "K"=9 | |
1303 "L"=10 | |
1304 "M"=11 | |
1305 "N"=12 | |
1306 "P"=13 | |
1307 "Q"=14 | |
1308 "R"=15 | |
1309 "S"=16 | |
1310 "T"=17 | |
1311 "V"=18 | |
1312 "W"=19 | |
1313 "Y"=20 | |
1314 | |
1315 aa_props <- c("A"=A, "C"=C, "D"=D, "E"=E, "F"=F,"G"=G,"H"=H,"I"=I,"K"=K,"L"=L,"M"=M,"N"=N,"P"=P,"Q"=Q,"R"=R, | |
1316 "S"=S,"T"=T,"V"=V,"W"=W,"Y"=Y,"xY"=Y,"O"=21) | |
1317 | |
1318 number15<-sapply(Positionm7, function (x) aa_props[x]) | |
1319 number14<-sapply(Positionm6, function (x) aa_props[x]) | |
1320 number13<-sapply(Positionm5, function (x) aa_props[x]) | |
1321 number1 <- sapply(Positionm4, function (x) aa_props[x]) | |
1322 number2 <- sapply(Positionm3, function (x) aa_props[x]) | |
1323 number3 <- sapply(Positionm2, function (x) aa_props[x]) | |
1324 number4 <- sapply(Positionm1, function (x) aa_props[x]) | |
1325 number5 <- sapply(Positiond0, function (x) aa_props[x]) | |
1326 number6 <- sapply(Positionp1, function (x) aa_props[x]) | |
1327 number7 <- sapply(Positionp2, function (x) aa_props[x]) | |
1328 number8 <- sapply(Positionp3, function (x) aa_props[x]) | |
1329 number9 <- sapply(Positionp4, function (x) aa_props[x]) | |
1330 number10<-sapply(Positionp5, function (x) aa_props[x]) | |
1331 number11<-sapply(Positionp6, function (x) aa_props[x]) | |
1332 number12<-sapply(Positionp7, function (x) aa_props[x]) | |
1333 | |
1334 # number1<-Positionm4 | |
1335 # number2<-Positionm3 | |
1336 # number3<-Positionm2 | |
1337 # number4<-Positionm1 | |
1338 # number5<-Positiond0 | |
1339 # number6<-Positionp1 | |
1340 # number7<-Positionp2 | |
1341 # number8<-Positionp3 | |
1342 # number9<-Positionp4 | |
1343 | |
1344 ############################# | |
1345 #here I create the Abl seqs with proper value for each number | |
1346 if (1==0){ | |
1347 Ablnumber1<- gsub("A",A,Ablnumber1,perl = TRUE) | |
1348 Ablnumber1<- gsub("C",C,Ablnumber1,perl = TRUE) | |
1349 Ablnumber1<- gsub("D",D,Ablnumber1,perl = TRUE) | |
1350 Ablnumber1<- gsub("E",E,Ablnumber1,perl = TRUE) | |
1351 Ablnumber1<- gsub("F",F,Ablnumber1,perl = TRUE) | |
1352 Ablnumber1<- gsub("G",G,Ablnumber1,perl = TRUE) | |
1353 Ablnumber1<- gsub("H",H,Ablnumber1,perl = TRUE) | |
1354 Ablnumber1<- gsub("I",I,Ablnumber1,perl = TRUE) | |
1355 Ablnumber1<- gsub("K",K,Ablnumber1,perl = TRUE) | |
1356 Ablnumber1<- gsub("L",L,Ablnumber1,perl = TRUE) | |
1357 Ablnumber1<- gsub("M",M,Ablnumber1,perl = TRUE) | |
1358 Ablnumber1<- gsub("N",N,Ablnumber1,perl = TRUE) | |
1359 Ablnumber1<- gsub("P",P,Ablnumber1,perl = TRUE) | |
1360 Ablnumber1<- gsub("Q",Q,Ablnumber1,perl = TRUE) | |
1361 Ablnumber1<- gsub("R",R,Ablnumber1,perl = TRUE) | |
1362 Ablnumber1<- gsub("S",S,Ablnumber1,perl = TRUE) | |
1363 Ablnumber1<- gsub("T",T,Ablnumber1,perl = TRUE) | |
1364 Ablnumber1<- gsub("V",V,Ablnumber1,perl = TRUE) | |
1365 Ablnumber1<- gsub("W",W,Ablnumber1,perl = TRUE) | |
1366 Ablnumber1<- gsub("Y",Y,Ablnumber1,perl = TRUE) | |
1367 | |
1368 Ablnumber2<- gsub("A",A,Ablnumber2,perl = TRUE) | |
1369 Ablnumber2<- gsub("C",C,Ablnumber2,perl = TRUE) | |
1370 Ablnumber2<- gsub("D",D,Ablnumber2,perl = TRUE) | |
1371 Ablnumber2<- gsub("E",E,Ablnumber2,perl = TRUE) | |
1372 Ablnumber2<- gsub("F",F,Ablnumber2,perl = TRUE) | |
1373 Ablnumber2<- gsub("G",G,Ablnumber2,perl = TRUE) | |
1374 Ablnumber2<- gsub("H",H,Ablnumber2,perl = TRUE) | |
1375 Ablnumber2<- gsub("I",I,Ablnumber2,perl = TRUE) | |
1376 Ablnumber2<- gsub("K",K,Ablnumber2,perl = TRUE) | |
1377 Ablnumber2<- gsub("L",L,Ablnumber2,perl = TRUE) | |
1378 Ablnumber2<- gsub("M",M,Ablnumber2,perl = TRUE) | |
1379 Ablnumber2<- gsub("N",N,Ablnumber2,perl = TRUE) | |
1380 Ablnumber2<- gsub("P",P,Ablnumber2,perl = TRUE) | |
1381 Ablnumber2<- gsub("Q",Q,Ablnumber2,perl = TRUE) | |
1382 Ablnumber2<- gsub("R",R,Ablnumber2,perl = TRUE) | |
1383 Ablnumber2<- gsub("S",S,Ablnumber2,perl = TRUE) | |
1384 Ablnumber2<- gsub("T",T,Ablnumber2,perl = TRUE) | |
1385 Ablnumber2<- gsub("V",V,Ablnumber2,perl = TRUE) | |
1386 Ablnumber2<- gsub("W",W,Ablnumber2,perl = TRUE) | |
1387 Ablnumber2<- gsub("Y",Y,Ablnumber2,perl = TRUE) | |
1388 | |
1389 Ablnumber3<- gsub("A",A,Ablnumber3,perl = TRUE) | |
1390 Ablnumber3<- gsub("C",C,Ablnumber3,perl = TRUE) | |
1391 Ablnumber3<- gsub("D",D,Ablnumber3,perl = TRUE) | |
1392 Ablnumber3<- gsub("E",E,Ablnumber3,perl = TRUE) | |
1393 Ablnumber3<- gsub("F",F,Ablnumber3,perl = TRUE) | |
1394 Ablnumber3<- gsub("G",G,Ablnumber3,perl = TRUE) | |
1395 Ablnumber3<- gsub("H",H,Ablnumber3,perl = TRUE) | |
1396 Ablnumber3<- gsub("I",I,Ablnumber3,perl = TRUE) | |
1397 Ablnumber3<- gsub("K",K,Ablnumber3,perl = TRUE) | |
1398 Ablnumber3<- gsub("L",L,Ablnumber3,perl = TRUE) | |
1399 Ablnumber3<- gsub("M",M,Ablnumber3,perl = TRUE) | |
1400 Ablnumber3<- gsub("N",N,Ablnumber3,perl = TRUE) | |
1401 Ablnumber3<- gsub("P",P,Ablnumber3,perl = TRUE) | |
1402 Ablnumber3<- gsub("Q",Q,Ablnumber3,perl = TRUE) | |
1403 Ablnumber3<- gsub("R",R,Ablnumber3,perl = TRUE) | |
1404 Ablnumber3<- gsub("S",S,Ablnumber3,perl = TRUE) | |
1405 Ablnumber3<- gsub("T",T,Ablnumber3,perl = TRUE) | |
1406 Ablnumber3<- gsub("V",V,Ablnumber3,perl = TRUE) | |
1407 Ablnumber3<- gsub("W",W,Ablnumber3,perl = TRUE) | |
1408 Ablnumber3<- gsub("Y",Y,Ablnumber3,perl = TRUE) | |
1409 | |
1410 Ablnumber4<- gsub("A",A,Ablnumber4,perl = TRUE) | |
1411 Ablnumber4<- gsub("C",C,Ablnumber4,perl = TRUE) | |
1412 Ablnumber4<- gsub("D",D,Ablnumber4,perl = TRUE) | |
1413 Ablnumber4<- gsub("E",E,Ablnumber4,perl = TRUE) | |
1414 Ablnumber4<- gsub("F",F,Ablnumber4,perl = TRUE) | |
1415 Ablnumber4<- gsub("G",G,Ablnumber4,perl = TRUE) | |
1416 Ablnumber4<- gsub("H",H,Ablnumber4,perl = TRUE) | |
1417 Ablnumber4<- gsub("I",I,Ablnumber4,perl = TRUE) | |
1418 Ablnumber4<- gsub("K",K,Ablnumber4,perl = TRUE) | |
1419 Ablnumber4<- gsub("L",L,Ablnumber4,perl = TRUE) | |
1420 Ablnumber4<- gsub("M",M,Ablnumber4,perl = TRUE) | |
1421 Ablnumber4<- gsub("N",N,Ablnumber4,perl = TRUE) | |
1422 Ablnumber4<- gsub("P",P,Ablnumber4,perl = TRUE) | |
1423 Ablnumber4<- gsub("Q",Q,Ablnumber4,perl = TRUE) | |
1424 Ablnumber4<- gsub("R",R,Ablnumber4,perl = TRUE) | |
1425 Ablnumber4<- gsub("S",S,Ablnumber4,perl = TRUE) | |
1426 Ablnumber4<- gsub("T",T,Ablnumber4,perl = TRUE) | |
1427 Ablnumber4<- gsub("V",V,Ablnumber4,perl = TRUE) | |
1428 Ablnumber4<- gsub("W",W,Ablnumber4,perl = TRUE) | |
1429 Ablnumber4<- gsub("Y",Y,Ablnumber4,perl = TRUE) | |
1430 | |
1431 Ablnumber5<- gsub("A",A,Ablnumber5,perl = TRUE) | |
1432 Ablnumber5<- gsub("C",C,Ablnumber5,perl = TRUE) | |
1433 Ablnumber5<- gsub("D",D,Ablnumber5,perl = TRUE) | |
1434 Ablnumber5<- gsub("E",E,Ablnumber5,perl = TRUE) | |
1435 Ablnumber5<- gsub("F",F,Ablnumber5,perl = TRUE) | |
1436 Ablnumber5<- gsub("G",G,Ablnumber5,perl = TRUE) | |
1437 Ablnumber5<- gsub("H",H,Ablnumber5,perl = TRUE) | |
1438 Ablnumber5<- gsub("I",I,Ablnumber5,perl = TRUE) | |
1439 Ablnumber5<- gsub("K",K,Ablnumber5,perl = TRUE) | |
1440 Ablnumber5<- gsub("L",L,Ablnumber5,perl = TRUE) | |
1441 Ablnumber5<- gsub("M",M,Ablnumber5,perl = TRUE) | |
1442 Ablnumber5<- gsub("N",N,Ablnumber5,perl = TRUE) | |
1443 Ablnumber5<- gsub("P",P,Ablnumber5,perl = TRUE) | |
1444 Ablnumber5<- gsub("Q",Q,Ablnumber5,perl = TRUE) | |
1445 Ablnumber5<- gsub("R",R,Ablnumber5,perl = TRUE) | |
1446 Ablnumber5<- gsub("S",S,Ablnumber5,perl = TRUE) | |
1447 Ablnumber5<- gsub("T",T,Ablnumber5,perl = TRUE) | |
1448 Ablnumber5<- gsub("V",V,Ablnumber5,perl = TRUE) | |
1449 Ablnumber5<- gsub("W",W,Ablnumber5,perl = TRUE) | |
1450 Ablnumber5<- gsub("Y",Y,Ablnumber5,perl = TRUE) | |
1451 | |
1452 Ablnumber6<- gsub("A",A,Ablnumber6,perl = TRUE) | |
1453 Ablnumber6<- gsub("C",C,Ablnumber6,perl = TRUE) | |
1454 Ablnumber6<- gsub("D",D,Ablnumber6,perl = TRUE) | |
1455 Ablnumber6<- gsub("E",E,Ablnumber6,perl = TRUE) | |
1456 Ablnumber6<- gsub("F",F,Ablnumber6,perl = TRUE) | |
1457 Ablnumber6<- gsub("G",G,Ablnumber6,perl = TRUE) | |
1458 Ablnumber6<- gsub("H",H,Ablnumber6,perl = TRUE) | |
1459 Ablnumber6<- gsub("I",I,Ablnumber6,perl = TRUE) | |
1460 Ablnumber6<- gsub("K",K,Ablnumber6,perl = TRUE) | |
1461 Ablnumber6<- gsub("L",L,Ablnumber6,perl = TRUE) | |
1462 Ablnumber6<- gsub("M",M,Ablnumber6,perl = TRUE) | |
1463 Ablnumber6<- gsub("N",N,Ablnumber6,perl = TRUE) | |
1464 Ablnumber6<- gsub("P",P,Ablnumber6,perl = TRUE) | |
1465 Ablnumber6<- gsub("Q",Q,Ablnumber6,perl = TRUE) | |
1466 Ablnumber6<- gsub("R",R,Ablnumber6,perl = TRUE) | |
1467 Ablnumber6<- gsub("S",S,Ablnumber6,perl = TRUE) | |
1468 Ablnumber6<- gsub("T",T,Ablnumber6,perl = TRUE) | |
1469 Ablnumber6<- gsub("V",V,Ablnumber6,perl = TRUE) | |
1470 Ablnumber6<- gsub("W",W,Ablnumber6,perl = TRUE) | |
1471 Ablnumber6<- gsub("Y",Y,Ablnumber6,perl = TRUE) | |
1472 | |
1473 Ablnumber7<- gsub("A",A,Ablnumber7,perl = TRUE) | |
1474 Ablnumber7<- gsub("C",C,Ablnumber7,perl = TRUE) | |
1475 Ablnumber7<- gsub("D",D,Ablnumber7,perl = TRUE) | |
1476 Ablnumber7<- gsub("E",E,Ablnumber7,perl = TRUE) | |
1477 Ablnumber7<- gsub("F",F,Ablnumber7,perl = TRUE) | |
1478 Ablnumber7<- gsub("G",G,Ablnumber7,perl = TRUE) | |
1479 Ablnumber7<- gsub("H",H,Ablnumber7,perl = TRUE) | |
1480 Ablnumber7<- gsub("I",I,Ablnumber7,perl = TRUE) | |
1481 Ablnumber7<- gsub("K",K,Ablnumber7,perl = TRUE) | |
1482 Ablnumber7<- gsub("L",L,Ablnumber7,perl = TRUE) | |
1483 Ablnumber7<- gsub("M",M,Ablnumber7,perl = TRUE) | |
1484 Ablnumber7<- gsub("N",N,Ablnumber7,perl = TRUE) | |
1485 Ablnumber7<- gsub("P",P,Ablnumber7,perl = TRUE) | |
1486 Ablnumber7<- gsub("Q",Q,Ablnumber7,perl = TRUE) | |
1487 Ablnumber7<- gsub("R",R,Ablnumber7,perl = TRUE) | |
1488 Ablnumber7<- gsub("S",S,Ablnumber7,perl = TRUE) | |
1489 Ablnumber7<- gsub("T",T,Ablnumber7,perl = TRUE) | |
1490 Ablnumber7<- gsub("V",V,Ablnumber7,perl = TRUE) | |
1491 Ablnumber7<- gsub("W",W,Ablnumber7,perl = TRUE) | |
1492 Ablnumber7<- gsub("Y",Y,Ablnumber7,perl = TRUE) | |
1493 | |
1494 Ablnumber8<- gsub("A",A,Ablnumber8,perl = TRUE) | |
1495 Ablnumber8<- gsub("C",C,Ablnumber8,perl = TRUE) | |
1496 Ablnumber8<- gsub("D",D,Ablnumber8,perl = TRUE) | |
1497 Ablnumber8<- gsub("E",E,Ablnumber8,perl = TRUE) | |
1498 Ablnumber8<- gsub("F",F,Ablnumber8,perl = TRUE) | |
1499 Ablnumber8<- gsub("G",G,Ablnumber8,perl = TRUE) | |
1500 Ablnumber8<- gsub("H",H,Ablnumber8,perl = TRUE) | |
1501 Ablnumber8<- gsub("I",I,Ablnumber8,perl = TRUE) | |
1502 Ablnumber8<- gsub("K",K,Ablnumber8,perl = TRUE) | |
1503 Ablnumber8<- gsub("L",L,Ablnumber8,perl = TRUE) | |
1504 Ablnumber8<- gsub("M",M,Ablnumber8,perl = TRUE) | |
1505 Ablnumber8<- gsub("N",N,Ablnumber8,perl = TRUE) | |
1506 Ablnumber8<- gsub("P",P,Ablnumber8,perl = TRUE) | |
1507 Ablnumber8<- gsub("Q",Q,Ablnumber8,perl = TRUE) | |
1508 Ablnumber8<- gsub("R",R,Ablnumber8,perl = TRUE) | |
1509 Ablnumber8<- gsub("S",S,Ablnumber8,perl = TRUE) | |
1510 Ablnumber8<- gsub("T",T,Ablnumber8,perl = TRUE) | |
1511 Ablnumber8<- gsub("V",V,Ablnumber8,perl = TRUE) | |
1512 Ablnumber8<- gsub("W",W,Ablnumber8,perl = TRUE) | |
1513 Ablnumber8<- gsub("Y",Y,Ablnumber8,perl = TRUE) | |
1514 | |
1515 Ablnumber9<- gsub("A",A,Ablnumber9,perl = TRUE) | |
1516 Ablnumber9<- gsub("C",C,Ablnumber9,perl = TRUE) | |
1517 Ablnumber9<- gsub("D",D,Ablnumber9,perl = TRUE) | |
1518 Ablnumber9<- gsub("E",E,Ablnumber9,perl = TRUE) | |
1519 Ablnumber9<- gsub("F",F,Ablnumber9,perl = TRUE) | |
1520 Ablnumber9<- gsub("G",G,Ablnumber9,perl = TRUE) | |
1521 Ablnumber9<- gsub("H",H,Ablnumber9,perl = TRUE) | |
1522 Ablnumber9<- gsub("I",I,Ablnumber9,perl = TRUE) | |
1523 Ablnumber9<- gsub("K",K,Ablnumber9,perl = TRUE) | |
1524 Ablnumber9<- gsub("L",L,Ablnumber9,perl = TRUE) | |
1525 Ablnumber9<- gsub("M",M,Ablnumber9,perl = TRUE) | |
1526 Ablnumber9<- gsub("N",N,Ablnumber9,perl = TRUE) | |
1527 Ablnumber9<- gsub("P",P,Ablnumber9,perl = TRUE) | |
1528 Ablnumber9<- gsub("Q",Q,Ablnumber9,perl = TRUE) | |
1529 Ablnumber9<- gsub("R",R,Ablnumber9,perl = TRUE) | |
1530 Ablnumber9<- gsub("S",S,Ablnumber9,perl = TRUE) | |
1531 Ablnumber9<- gsub("T",T,Ablnumber9,perl = TRUE) | |
1532 Ablnumber9<- gsub("V",V,Ablnumber9,perl = TRUE) | |
1533 Ablnumber9<- gsub("W",W,Ablnumber9,perl = TRUE) | |
1534 Ablnumber9<- gsub("Y",Y,Ablnumber9,perl = TRUE) | |
1535 } | |
1536 ######################################## | |
1537 | |
1538 | |
1539 total=length(Positionp7)*length(Positionp6)*length(Positionp5)*length(Positionp4)*length(Positionp3)*(length(Positionp2))*length(Positionp1)* | |
1540 length(Positiond0)*length(Positionm1)*length(Positionm2)*length(Positionm3)*length(Positionm4)*length(Positionm5)*length(Positionm6)*length(Positionm7) | |
1541 #this is just a way to doublecheck that the length of the generated peptides vector is correct | |
1542 | |
1543 GeneratedPeptides<-rep(NA, times=total*15) | |
1544 GeneratedPeptides<-matrix(data = GeneratedPeptides,ncol = 15) | |
1545 | |
1546 NumeratedPeptides<-GeneratedPeptides | |
1547 #create an empty vector of correct length by finding the number of each AAs per position and multiplying them | |
1548 count<-0 | |
1549 for (t in 1:length(Positionm7)) { | |
1550 for (s in 1:length(Positionm6)) { | |
1551 for (r in 1:length(Positionm5)) { | |
1552 for (i in 1:length(Positionm4)) { | |
1553 for (j in 1:length(Positionm3)) { | |
1554 for (k in 1:length(Positionm2)) { | |
1555 for (l in 1:length(Positionm1)) { | |
1556 for (m in 1:length(Positiond0)) { | |
1557 for (n in 1:length(Positionp1)) { | |
1558 for (o in 1:length(Positionp2)) { | |
1559 for (p in 1:length(Positionp3)) { | |
1560 for (q in 1:length(Positionp4)) { | |
1561 for (u in 1:length(Positionp5)) { | |
1562 for (v in 1:length(Positionp6)) { | |
1563 for (w in 1:length(Positionp7)) { | |
1564 # i=1 | |
1565 # j=1 | |
1566 # k=1 | |
1567 # l=1 | |
1568 # m=1 | |
1569 # n=1 | |
1570 # o=1 | |
1571 # p=1 | |
1572 # q=1 | |
1573 # | |
1574 #for every single position, increment the count number, create a peptide using the AAs at that position | |
1575 #then put them together into the generated peptides sequencex | |
1576 count<-count+1 | |
1577 tabulation<-c(Positionm7[t],Positionm6[s],Positionm5[r],Positionm4[i],Positionm3[j],Positionm2[k],Positionm1[l],Positiond0[m],Positionp1[n], | |
1578 Positionp2[o],Positionp3[p],Positionp4[q],Positionp5[u],Positionp6[v],Positionp7[w]) | |
1579 numeration<-c(number15[t],number14[s],number13[r],number1[i],number2[j],number3[k],number4[l],number5[m],number6[n],number7[o],number8[p],number9[q],number10[u],number11[v], | |
1580 number12[w]) | |
1581 #tabulation<-paste(tabulation, sep="", collapse="") | |
1582 GeneratedPeptides[count,1:15]<-tabulation | |
1583 NumeratedPeptides[count,1:15]<-numeration | |
1584 } | |
1585 } | |
1586 } | |
1587 } | |
1588 } | |
1589 } | |
1590 } | |
1591 } | |
1592 } | |
1593 } | |
1594 } | |
1595 } | |
1596 } | |
1597 } | |
1598 } | |
1599 #################################################################### | |
1600 #now here I use the Endogenous Probabilty matrix from the previous script, which is called EMPtable | |
1601 #to score the created peptides | |
1602 ThisKinTable<-EPMtableu#[1:nrow(SDtable),] | |
1603 TKTcolumn<-c(data=rep(1,times=21)) | |
1604 TKTcolumn<-as.matrix(TKTcolumn,ncol=1) | |
1605 ThisKinTable<-cbind(TKTcolumn,ThisKinTable) | |
1606 | |
1607 ThisKinGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) | |
1608 ThisKinGenWeirdScore<-rep(NA,times=nrow(GeneratedPeptides)) | |
1609 | |
1610 for (x in 1:nrow(GeneratedPeptides)){ | |
1611 Scoringpeptide<-NumeratedPeptides[x,1:15] | |
1612 Scoringpeptide<-Scoringpeptide+1 | |
1613 ThisKinTableScore<-as.numeric(ThisKinTable[Scoringpeptide[1],3])*ThisKinTable[as.numeric(Scoringpeptide[2]),4]*ThisKinTable[as.numeric(Scoringpeptide[3]),5]* | |
1614 ThisKinTable[as.numeric(Scoringpeptide[4]),6]*ThisKinTable[as.numeric(Scoringpeptide[5]),7]*ThisKinTable[as.numeric(Scoringpeptide[6]),8]*ThisKinTable[as.numeric(Scoringpeptide[7]),9]* | |
1615 #ThisKinTable[as.numeric(Scoringpeptide[8]),10]* | |
1616 ThisKinTable[as.numeric(Scoringpeptide[9]),11]*ThisKinTable[as.numeric(Scoringpeptide[10]),12]*ThisKinTable[as.numeric(Scoringpeptide[11]),13]* | |
1617 ThisKinTable[as.numeric(Scoringpeptide[12]),14]*ThisKinTable[as.numeric(Scoringpeptide[13]),15]*ThisKinTable[as.numeric(Scoringpeptide[14]),16]*ThisKinTable[as.numeric(Scoringpeptide[15]),17] | |
1618 ThisKinGeneratedScores[x]<-ThisKinTableScore | |
1619 ThisKinTableScore<-(ThisKinTableScore/(ThisKinTableScore+1/as.numeric(NormalizationScore[2]))) | |
1620 ThisKinGenWeirdScore[x]<-ThisKinTableScore | |
1621 } | |
1622 | |
1623 AblGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) | |
1624 ArgGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) | |
1625 BtkGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) | |
1626 CskGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) | |
1627 FynGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) | |
1628 HckGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) | |
1629 JAK2GeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) | |
1630 LckGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) | |
1631 LynGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) | |
1632 Pyk2GeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) | |
1633 SrcGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) | |
1634 SykGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) | |
1635 YesGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) | |
1636 | |
1637 | |
1638 for (x in 1:nrow(GeneratedPeptides)){ | |
1639 Scoringpeptide<-NumeratedPeptides[x,1:15] | |
1640 AblScore<-Abl[Scoringpeptide[1],2]*Abl[Scoringpeptide[2],3]*Abl[Scoringpeptide[3],4]*Abl[Scoringpeptide[4],5]*Abl[Scoringpeptide[5],6]*Abl[Scoringpeptide[6],7]* | |
1641 Abl[Scoringpeptide[7],8]*Abl[Scoringpeptide[9],10]*Abl[Scoringpeptide[10],11]*Abl[Scoringpeptide[11],12]*Abl[Scoringpeptide[12],13]* | |
1642 Abl[Scoringpeptide[13],14]*Abl[Scoringpeptide[14],15]*Abl[Scoringpeptide[15],16] | |
1643 AblGeneratedScores[x]<-AblScore | |
1644 | |
1645 ArgScore<-Arg[Scoringpeptide[1],2]*Arg[Scoringpeptide[2],3]*Arg[Scoringpeptide[3],4]*Arg[Scoringpeptide[4],5]*Arg[Scoringpeptide[5],6]*Arg[Scoringpeptide[6],7]* | |
1646 Arg[Scoringpeptide[7],8]*Arg[Scoringpeptide[9],10]*Arg[Scoringpeptide[10],11]*Arg[Scoringpeptide[11],12]*Arg[Scoringpeptide[12],13]* | |
1647 Arg[Scoringpeptide[13],14]*Arg[Scoringpeptide[14],15]*Arg[Scoringpeptide[15],16] | |
1648 ArgGeneratedScores[x]<-ArgScore | |
1649 | |
1650 BtkScore<-Btk[Scoringpeptide[1],2]*Btk[Scoringpeptide[2],3]*Btk[Scoringpeptide[3],4]*Btk[Scoringpeptide[4],5]*Btk[Scoringpeptide[5],6]*Btk[Scoringpeptide[6],7]* | |
1651 Btk[Scoringpeptide[7],8]*Btk[Scoringpeptide[9],10]*Btk[Scoringpeptide[10],11]*Btk[Scoringpeptide[11],12]*Btk[Scoringpeptide[12],13]* | |
1652 Btk[Scoringpeptide[13],14]*Btk[Scoringpeptide[14],15]*Btk[Scoringpeptide[15],16] | |
1653 BtkGeneratedScores[x]<-BtkScore | |
1654 | |
1655 CskScore<-Csk[Scoringpeptide[1],2]*Csk[Scoringpeptide[2],3]*Csk[Scoringpeptide[3],4]*Csk[Scoringpeptide[4],5]*Csk[Scoringpeptide[5],6]*Csk[Scoringpeptide[6],7]* | |
1656 Csk[Scoringpeptide[7],8]*Csk[Scoringpeptide[9],10]*Csk[Scoringpeptide[10],11]*Csk[Scoringpeptide[11],12]*Csk[Scoringpeptide[12],13]* | |
1657 Csk[Scoringpeptide[13],14]*Csk[Scoringpeptide[14],15]*Csk[Scoringpeptide[15],16] | |
1658 CskGeneratedScores[x]<-CskScore | |
1659 | |
1660 FynScore<-Fyn[Scoringpeptide[1],2]*Fyn[Scoringpeptide[2],3]*Fyn[Scoringpeptide[3],4]*Fyn[Scoringpeptide[4],5]*Fyn[Scoringpeptide[5],6]*Fyn[Scoringpeptide[6],7]* | |
1661 Fyn[Scoringpeptide[7],8]*Fyn[Scoringpeptide[9],10]*Fyn[Scoringpeptide[10],11]*Fyn[Scoringpeptide[11],12]*Fyn[Scoringpeptide[12],13]* | |
1662 Fyn[Scoringpeptide[13],14]*Fyn[Scoringpeptide[14],15]*Fyn[Scoringpeptide[15],16] | |
1663 FynGeneratedScores[x]<-FynScore | |
1664 | |
1665 HckScore<-Hck[Scoringpeptide[1],2]*Hck[Scoringpeptide[2],3]*Hck[Scoringpeptide[3],4]*Hck[Scoringpeptide[4],5]*Hck[Scoringpeptide[5],6]*Hck[Scoringpeptide[6],7]* | |
1666 Hck[Scoringpeptide[7],8]*Hck[Scoringpeptide[9],10]*Hck[Scoringpeptide[10],11]*Hck[Scoringpeptide[11],12]*Hck[Scoringpeptide[12],13]* | |
1667 Hck[Scoringpeptide[13],14]*Hck[Scoringpeptide[14],15]*Hck[Scoringpeptide[15],16] | |
1668 HckGeneratedScores[x]<-HckScore | |
1669 | |
1670 JAK2Score<-JAK2[Scoringpeptide[1],2]*JAK2[Scoringpeptide[2],3]*JAK2[Scoringpeptide[3],4]*JAK2[Scoringpeptide[4],5]*JAK2[Scoringpeptide[5],6]*JAK2[Scoringpeptide[6],7]* | |
1671 JAK2[Scoringpeptide[7],8]*JAK2[Scoringpeptide[9],10]*JAK2[Scoringpeptide[10],11]*JAK2[Scoringpeptide[11],12]*JAK2[Scoringpeptide[12],13]* | |
1672 JAK2[Scoringpeptide[13],14]*JAK2[Scoringpeptide[14],15]*JAK2[Scoringpeptide[15],16] | |
1673 JAK2GeneratedScores[x]<-JAK2Score | |
1674 | |
1675 LckScore<-Lck[Scoringpeptide[1],2]*Lck[Scoringpeptide[2],3]*Lck[Scoringpeptide[3],4]*Lck[Scoringpeptide[4],5]*Lck[Scoringpeptide[5],6]*Lck[Scoringpeptide[6],7]* | |
1676 Lck[Scoringpeptide[7],8]*Lck[Scoringpeptide[9],10]*Lck[Scoringpeptide[10],11]*Lck[Scoringpeptide[11],12]*Lck[Scoringpeptide[12],13]* | |
1677 Lck[Scoringpeptide[13],14]*Lck[Scoringpeptide[14],15]*Lck[Scoringpeptide[15],16] | |
1678 LckGeneratedScores[x]<-LckScore | |
1679 | |
1680 LynScore<-Lyn[Scoringpeptide[1],2]*Lyn[Scoringpeptide[2],3]*Lyn[Scoringpeptide[3],4]*Lyn[Scoringpeptide[4],5]*Lyn[Scoringpeptide[5],6]*Lyn[Scoringpeptide[6],7]* | |
1681 Lyn[Scoringpeptide[7],8]*Lyn[Scoringpeptide[9],10]*Lyn[Scoringpeptide[10],11]*Lyn[Scoringpeptide[11],12]*Lyn[Scoringpeptide[12],13]* | |
1682 Lyn[Scoringpeptide[13],14]*Lyn[Scoringpeptide[14],15]*Lyn[Scoringpeptide[15],16] | |
1683 LynGeneratedScores[x]<-LynScore | |
1684 | |
1685 Pyk2Score<-Pyk2[Scoringpeptide[1],2]*Pyk2[Scoringpeptide[2],3]*Pyk2[Scoringpeptide[3],4]*Pyk2[Scoringpeptide[4],5]*Pyk2[Scoringpeptide[5],6]*Pyk2[Scoringpeptide[6],7]* | |
1686 Pyk2[Scoringpeptide[7],8]*Pyk2[Scoringpeptide[9],10]*Pyk2[Scoringpeptide[10],11]*Pyk2[Scoringpeptide[11],12]*Pyk2[Scoringpeptide[12],13]* | |
1687 Pyk2[Scoringpeptide[13],14]*Pyk2[Scoringpeptide[14],15]*Pyk2[Scoringpeptide[15],16] | |
1688 Pyk2GeneratedScores[x]<-Pyk2Score | |
1689 | |
1690 SrcScore<-Src[Scoringpeptide[1],2]*Src[Scoringpeptide[2],3]*Src[Scoringpeptide[3],4]*Src[Scoringpeptide[4],5]*Src[Scoringpeptide[5],6]*Src[Scoringpeptide[6],7]* | |
1691 Src[Scoringpeptide[7],8]*Src[Scoringpeptide[9],10]*Src[Scoringpeptide[10],11]*Src[Scoringpeptide[11],12]*Src[Scoringpeptide[12],13]* | |
1692 Src[Scoringpeptide[13],14]*Src[Scoringpeptide[14],15]*Src[Scoringpeptide[15],16] | |
1693 SrcGeneratedScores[x]<-SrcScore | |
1694 | |
1695 SykScore<-Syk[Scoringpeptide[1],2]*Syk[Scoringpeptide[2],3]*Syk[Scoringpeptide[3],4]*Syk[Scoringpeptide[4],5]*Syk[Scoringpeptide[5],6]*Syk[Scoringpeptide[6],7]* | |
1696 Syk[Scoringpeptide[7],8]*Syk[Scoringpeptide[9],10]*Syk[Scoringpeptide[10],11]*Syk[Scoringpeptide[11],12]*Syk[Scoringpeptide[12],13]* | |
1697 Syk[Scoringpeptide[13],14]*Syk[Scoringpeptide[14],15]*Syk[Scoringpeptide[15],16] | |
1698 SykGeneratedScores[x]<-SykScore | |
1699 | |
1700 YesScore<-Yes[Scoringpeptide[1],2]*Yes[Scoringpeptide[2],3]*Yes[Scoringpeptide[3],4]*Yes[Scoringpeptide[4],5]*Yes[Scoringpeptide[5],6]*Yes[Scoringpeptide[6],7]* | |
1701 Yes[Scoringpeptide[7],8]*Yes[Scoringpeptide[9],10]*Yes[Scoringpeptide[10],11]*Yes[Scoringpeptide[11],12]*Yes[Scoringpeptide[12],13]* | |
1702 Yes[Scoringpeptide[13],14]*Yes[Scoringpeptide[14],15]*Yes[Scoringpeptide[15],16] | |
1703 YesGeneratedScores[x]<-YesScore | |
1704 | |
1705 # ThisKinTableScore<-ThisKinTable[as.numeric(Scoringpeptide[1]),3]*ThisKinTable[as.numeric(Scoringpeptide[2]),4]*ThisKinTable[as.numeric(Scoringpeptide[3]),5]* | |
1706 # ThisKinTable[as.numeric(Scoringpeptide[4]),6]*ThisKinTable[as.numeric(Scoringpeptide[6]),8]* | |
1707 # ThisKinTable[as.numeric(Scoringpeptide[7]),9]*ThisKinTable[as.numeric(Scoringpeptide[8]),10]*ThisKinTable[as.numeric(Scoringpeptide[9]),11] | |
1708 # ThisKinGeneratedScores[x]<-ThisKinTableScore | |
1709 } | |
1710 | |
1711 | |
1712 | |
1713 AblNorm<-1/as.numeric(Abl[22,1]) | |
1714 AblThresh<-as.numeric(Abl[24,1]) | |
1715 AblTrueThresh<-((AblThresh*AblNorm)/(100-AblThresh)) | |
1716 AblActive<-unlist(AblGeneratedScores)>AblTrueThresh | |
1717 | |
1718 ArgNorm<-1/as.numeric(Arg[22,1]) | |
1719 ArgThresh<-as.numeric(Arg[24,1]) | |
1720 ArgTrueThresh<-((ArgThresh*ArgNorm)/(100-ArgThresh)) | |
1721 ArgActive<-unlist(ArgGeneratedScores)>ArgTrueThresh | |
1722 | |
1723 BtkNorm<-1/as.numeric(Btk[22,1]) | |
1724 BtkThresh<-as.numeric(Btk[24,1]) | |
1725 BtkTrueThresh<-((BtkThresh*BtkNorm)/(100-BtkThresh)) | |
1726 BtkActive<-unlist(BtkGeneratedScores)>BtkTrueThresh | |
1727 | |
1728 CskNorm<-1/as.numeric(Csk[22,1]) | |
1729 CskThresh<-as.numeric(Csk[24,1]) | |
1730 CskTrueThresh<-((CskThresh*CskNorm)/(100-CskThresh)) | |
1731 CskActive<-(CskGeneratedScores)>CskTrueThresh | |
1732 | |
1733 FynNorm<-1/as.numeric(Fyn[22,1]) | |
1734 FynThresh<-as.numeric(Fyn[24,1]) | |
1735 FynTrueThresh<-((FynThresh*FynNorm)/(100-FynThresh)) | |
1736 FynActive<-unlist(FynGeneratedScores)>FynTrueThresh | |
1737 | |
1738 HckNorm<-1/as.numeric(Hck[22,1]) | |
1739 HckThresh<-as.numeric(Hck[24,1]) | |
1740 HckTrueThresh<-((HckThresh*HckNorm)/(100-HckThresh)) | |
1741 HckActive<-unlist(HckGeneratedScores)>HckTrueThresh | |
1742 | |
1743 JAK2Norm<-1/as.numeric(JAK2[22,1]) | |
1744 JAK2Thresh<-as.numeric(JAK2[24,1]) | |
1745 JAK2TrueThresh<-((JAK2Thresh*JAK2Norm)/(100-JAK2Thresh)) | |
1746 JAk2Active<-unlist(JAK2GeneratedScores)>JAK2TrueThresh | |
1747 | |
1748 LckNorm<-1/as.numeric(Lck[22,1]) | |
1749 LckThresh<-as.numeric(Lck[24,1]) | |
1750 LckTrueThresh<-((LckThresh*LckNorm)/(100-LckThresh)) | |
1751 LckActive<-unlist(LckGeneratedScores)>LckTrueThresh | |
1752 | |
1753 LynNorm<-1/as.numeric(Lyn[22,1]) | |
1754 LynThresh<-as.numeric(Lyn[24,1]) | |
1755 LynTrueThresh<-((LynThresh*LynNorm)/(100-LynThresh)) | |
1756 LynActive<-unlist(LynGeneratedScores)>LynTrueThresh | |
1757 | |
1758 Pyk2Norm<-1/as.numeric(Pyk2[22,1]) | |
1759 Pyk2Thresh<-as.numeric(Pyk2[24,1]) | |
1760 Pyk2TrueThresh<-((Pyk2Thresh*Pyk2Norm)/(100-Pyk2Thresh)) | |
1761 Pyk2Active<-unlist(Pyk2GeneratedScores)>Pyk2TrueThresh | |
1762 | |
1763 SrcNorm<-1/as.numeric(Src[22,1]) | |
1764 SrcThresh<-as.numeric(Src[24,1]) | |
1765 SrcTrueThresh<-((SrcThresh*SrcNorm)/(100-SrcThresh)) | |
1766 SrcActive<-unlist(SrcGeneratedScores)>SrcTrueThresh | |
1767 | |
1768 SykNorm<-1/as.numeric(Syk[22,1]) | |
1769 SykThresh<-as.numeric(Syk[24,1]) | |
1770 SykTrueThresh<-((SykThresh*SykNorm)/(100-SykThresh)) | |
1771 SykActive<-unlist(SykGeneratedScores)>SykTrueThresh | |
1772 | |
1773 YesNorm<-1/as.numeric(Yes[22,1]) | |
1774 YesThresh<-as.numeric(Yes[24,1]) | |
1775 YesTrueThresh<-((YesThresh*YesNorm)/(100-YesThresh)) | |
1776 YesActive<-unlist(YesGeneratedScores)>YesTrueThresh | |
1777 | |
1778 AllActive<-AblActive+ArgActive+BtkActive+CskActive+FynActive+HckActive+JAk2Active+LckActive+LynActive+Pyk2Active+SrcActive+SykActive+YesActive | |
1779 #Btkactive+ | |
1780 | |
1781 Scores<-ThisKinGeneratedScores | |
1782 ThresholdValues<-ThisKinGenWeirdScore | |
1783 | |
1784 FullMotifs<-rep("Z",times=nrow(GeneratedPeptides)) | |
1785 for (i in 1:nrow(GeneratedPeptides)) { | |
1786 motif<-GeneratedPeptides[i,1:15] | |
1787 motif<-paste(motif,sep = "", collapse = "") | |
1788 FullMotifs[i]<-motif | |
1789 } | |
1790 | |
1791 PeptidesWithRanks<-cbind.data.frame(FullMotifs,GeneratedPeptides,Scores,ThresholdValues) | |
1792 PeptidesWithRanks<-cbind.data.frame(PeptidesWithRanks,AllActive,AblActive,ArgActive,BtkActive,CskActive,FynActive,HckActive,JAk2Active,LckActive,LynActive,Pyk2Active,SrcActive,SykActive,YesActive) | |
1793 RanksPeptides<-PeptidesWithRanks[order(PeptidesWithRanks$AllActive,decreasing = FALSE),] | |
1794 # PepRankHead<-c(1:9,"Sequence","RPMS","PMS") | |
1795 # RanksPeptides<-rbind.data.frame(PepRankHead,PeptidesWithRanks) | |
1796 #head(RanksPeptides) | |
1797 | |
1798 | |
1799 #now I have to score the negative sequences... for some reason | |
1800 #write up how we transfect with lipofectamine | |
1801 #3,4,5 questions | |
1802 | |
1803 #PAUSED EHRE AT 4:50, HOPING THAT FIXING MINERVOTHING SO THAT LEFT SPACES WORKS FIXES A THING. OTHERWISE | |
1804 #I FUCKED WITH THE MCC TABLE AND NEED TO FINISH IT | |
1805 | |
1806 ThisKinBlanks<-rep(1,times=17) | |
1807 #indx <- sapply(breast, is.factor) | |
1808 #ThisKinTable[indx] <- lapply(ThisKinTable[indx], function(x) as.character(x)) | |
1809 ThisKinTable$SetOfAAs<-as.character(ThisKinTable$SetOfAAs) | |
1810 | |
1811 #ThisKinTest<-rbind.data.frame(ThisKinTable,ThisKinBlanks) | |
1812 ThisKinTable<-rbind.data.frame(ThisKinTable,ThisKinBlanks) | |
1813 | |
1814 NegativeScores<-rep(NA,times=nrow(NegativeSubstrateList)) | |
1815 NegativeWeirdScores<-rep(NA,times=nrow(NegativeSubstrateList)) | |
1816 for (v in 1:nrow(NegativeSubstrateList)) { | |
1817 motif<-NegativeSubstrateList[v,2] | |
1818 motif<-unlist(strsplit(motif,"")) | |
1819 #if (length(motif)<9){print(v)}} | |
1820 # motif[1] <- sapply(motif[1], function (x) aa_props[x]) | |
1821 # motif[2] <- sapply(motif[2], function (x) aa_props[x]) | |
1822 # motif[3] <- sapply(motif[3], function (x) aa_props[x]) | |
1823 # motif[4] <- sapply(motif[4], function (x) aa_props[x]) | |
1824 # motif[5] <- sapply(motif[5], function (x) aa_props[x]) | |
1825 # motif[6] <- sapply(motif[6], function (x) aa_props[x]) | |
1826 # motif[7] <- sapply(motif[7], function (x) aa_props[x]) | |
1827 # motif[8] <- sapply(motif[8], function (x) aa_props[x]) | |
1828 # motif[9] <- sapply(motif[9], function (x) aa_props[x]) | |
1829 motif<- gsub(" ","O",motif) | |
1830 motif <- sapply(motif, function (x) aa_props[x]) | |
1831 Scoringpeptide<-motif | |
1832 Scoringpeptide<-Scoringpeptide+1 | |
1833 ThisKinTableScore<-as.numeric(ThisKinTable[Scoringpeptide[1],3])*ThisKinTable[as.numeric(Scoringpeptide[2]),4]*ThisKinTable[as.numeric(Scoringpeptide[3]),5]* | |
1834 ThisKinTable[as.numeric(Scoringpeptide[4]),6]*ThisKinTable[as.numeric(Scoringpeptide[5]),7]*ThisKinTable[as.numeric(Scoringpeptide[6]),8]*ThisKinTable[as.numeric(Scoringpeptide[7]),9]* | |
1835 #ThisKinTable[as.numeric(Scoringpeptide[8]),10]* | |
1836 ThisKinTable[as.numeric(Scoringpeptide[9]),11]*ThisKinTable[as.numeric(Scoringpeptide[10]),12]*ThisKinTable[as.numeric(Scoringpeptide[11]),13]* | |
1837 ThisKinTable[as.numeric(Scoringpeptide[12]),14]*ThisKinTable[as.numeric(Scoringpeptide[13]),15]*ThisKinTable[as.numeric(Scoringpeptide[14]),16]*ThisKinTable[as.numeric(Scoringpeptide[15]),17] | |
1838 NegativeScores[v]<-ThisKinTableScore | |
1839 ThisKinTableScore<-(ThisKinTableScore/(ThisKinTableScore+1/as.numeric(NormalizationScore[2]))) | |
1840 NegativeWeirdScores[v]<-ThisKinTableScore*100 | |
1841 } | |
1842 | |
1843 negativesubstrates<-NegativeSubstrateList[,2] | |
1844 NegativeWithScores<-cbind(negativesubstrates,as.character(NegativeScores),as.character(NegativeWeirdScores)) | |
1845 | |
1846 | |
1847 #NEED TO HAVE THE NEGATIVE SUBSTRATES BE OUTPUTTED | |
1848 | |
1849 PositiveScores<-rep(NA,times=nrow(ImportedSubstrateList)) | |
1850 PositiveWeirdScores<-rep(NA,times=nrow(ImportedSubstrateList)) | |
1851 | |
1852 for (v in 1:nrow(ImportedSubstrateList)) { | |
1853 motif<-ImportedSubstrateList[v,4:18] | |
1854 motif<-unlist(motif) | |
1855 motif<- gsub("^$","O",motif) | |
1856 motif <- sapply(motif, function (x) aa_props[x]) | |
1857 Scoringpeptide<-motif | |
1858 Scoringpeptide<-Scoringpeptide+1 | |
1859 ThisKinTableScore<-as.numeric(ThisKinTable[Scoringpeptide[1],3])*ThisKinTable[as.numeric(Scoringpeptide[2]),4]*ThisKinTable[as.numeric(Scoringpeptide[3]),5]* | |
1860 ThisKinTable[as.numeric(Scoringpeptide[4]),6]*ThisKinTable[as.numeric(Scoringpeptide[5]),7]*ThisKinTable[as.numeric(Scoringpeptide[6]),8]*ThisKinTable[as.numeric(Scoringpeptide[7]),9]* | |
1861 #ThisKinTable[as.numeric(Scoringpeptide[8]),10]* | |
1862 ThisKinTable[as.numeric(Scoringpeptide[9]),11]*ThisKinTable[as.numeric(Scoringpeptide[10]),12]*ThisKinTable[as.numeric(Scoringpeptide[11]),13]* | |
1863 ThisKinTable[as.numeric(Scoringpeptide[12]),14]*ThisKinTable[as.numeric(Scoringpeptide[13]),15]*ThisKinTable[as.numeric(Scoringpeptide[14]),16]*ThisKinTable[as.numeric(Scoringpeptide[15]),17] | |
1864 | |
1865 PositiveScores[v]<-ThisKinTableScore | |
1866 ThisKinTableScore<-(ThisKinTableScore/(ThisKinTableScore+1/as.numeric(NormalizationScore[2]))) | |
1867 PositiveWeirdScores[v]<-ThisKinTableScore*100 | |
1868 } | |
1869 | |
1870 positivesubstrates<-ImportedSubstrateList[,4:18] | |
1871 positivewithscores<-cbind.data.frame(positivesubstrates,PositiveScores,PositiveWeirdScores) | |
1872 | |
1873 | |
1874 #write down the transient transfection SOP and what we will be doing with them | |
1875 #write down the vector names I will be using | |
1876 #write down something about transforming bacteria and with what | |
1877 | |
1878 #90% whatevernness | |
1879 # TPninetyone<-length(PositiveWeirdScores[PositiveWeirdScores>=0.91]) | |
1880 # Senseninetyone<-TPninetyone/nrow(positivesubstrates) | |
1881 # | |
1882 # TNninetyone<-length(NegativeWeirdScores[NegativeWeirdScores<91]) | |
1883 # Specninetyone<-TNninetyone/100 | |
1884 | |
1885 #create the MCC table | |
1886 | |
1887 threshold<-c(1:100) | |
1888 threshold<-order(threshold,decreasing = TRUE) | |
1889 | |
1890 Truepositives<-c(1:100) | |
1891 Falsenegatives<-c(1:100) | |
1892 Sensitivity<-c(1:100) | |
1893 TrueNegatives<-c(1:100) | |
1894 FalsePositives<-c(1:100) | |
1895 Specificity<-c(1:100) | |
1896 Accuracy<-c(1:100) | |
1897 MCC<-c(1:100) | |
1898 EER<-c(1:100) | |
1899 | |
1900 #MAKE DAMN SURE THAT THE ACCESSION NUMBERS FOLLOW THE MOTIFS | |
1901 | |
1902 for (z in 1:100) { | |
1903 thres<-101-z | |
1904 Truepositives[z]<-length(PositiveWeirdScores[PositiveWeirdScores>=(thres)]) | |
1905 Falsenegatives[z]<-nrow(positivesubstrates)-Truepositives[z] | |
1906 Sensitivity[z]<-Truepositives[z]/(Falsenegatives[z]+Truepositives[z]) | |
1907 TrueNegatives[z]<-length(NegativeWeirdScores[NegativeWeirdScores<(thres)]) | |
1908 # at thresh 100 this should be 0, because it is total minus true negatives | |
1909 FalsePositives[z]<-nrow(NegativeSubstrateList)-TrueNegatives[z] | |
1910 Specificity[z]<-1-(TrueNegatives[z]/(FalsePositives[z]+TrueNegatives[z])) | |
1911 Accuracy[z]<-100*(Truepositives[z]+TrueNegatives[z])/(Falsenegatives[z]+FalsePositives[z]+TrueNegatives[z]+Truepositives[z]) | |
1912 MCC[z]<-((Truepositives[z]+TrueNegatives[z])-(Falsenegatives[z]+FalsePositives[z]))/sqrt(round(round(Truepositives[z]+Falsenegatives[z])*round(TrueNegatives[z]+FalsePositives[z])*round(Truepositives[z]+FalsePositives[z])*round(TrueNegatives[z]+Falsenegatives[z]))) | |
1913 EER[z]<-.01*(((1-(Sensitivity[z]))*(Truepositives[z]+Falsenegatives[z]))+(Specificity[z]*(1-(Truepositives[z]+Falsenegatives[z])))) | |
1914 } | |
1915 Characterization<-cbind.data.frame(threshold,Truepositives,Falsenegatives,Sensitivity,TrueNegatives,FalsePositives,Specificity,Accuracy,MCC,EER) | |
1916 | |
1917 positiveheader<-c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,"RPMS","PMS") | |
1918 positivewithscores<-rbind.data.frame(positiveheader,positivewithscores) | |
1919 | |
1920 negativeheader<-c("Substrate","RPMS","PMS") | |
1921 colnames(NegativeWithScores)<-negativeheader | |
1922 | |
1923 # write.xlsx(NegativeWithScores,file = FILENAME, sheetName = "Negative Sequences Scored",col.names = TRUE,row.names = FALSE,append = TRUE) | |
1924 # write.xlsx(Characterization,file = FILENAME,sheetName = "Characterization Table",col.names = TRUE,row.names = FALSE,append = TRUE) | |
1925 # write.xlsx(RanksPeptides,file = FILENAME,sheetName = "Ranked Generated Peptides",col.names = FALSE,row.names = FALSE,append = TRUE) | |
1926 # write.xlsx(positivewithscores,file = FILENAME, sheetName = "Positive Sequences Scored",col.names = FALSE,row.names = FALSE,append = TRUE) | |
1927 write.table(x=c("Characterzation Table"),file = FILENAME2, col.names = FALSE,row.names = FALSE, append = TRUE,sep = ",") | |
1928 header<-colnames(Characterization) | |
1929 Characterization<-rbind.data.frame(header,Characterization) | |
1930 write.table(Characterization,file = FILENAME2, col.names = FALSE,row.names = FALSE, append = TRUE,sep = ",") | |
1931 | |
1932 # header<-colnames(RanksPeptides) | |
1933 # RanksPeptides<-rbind.data.frame(header,RanksPeptides) | |
1934 write.table(RanksPeptides,file = FILENAME3,append = FALSE,row.names = FALSE,col.names = TRUE,sep = ",") |