1
|
1 this.dir <- dirname(parent.frame(2)$ofile)
|
|
2 setwd(this.dir)
|
|
3
|
|
4
|
|
5 ImportedSubstrateList<- read.csv(input1, stringsAsFactors=FALSE)
|
|
6 NegativeSubstrateList<- read.csv(input2, stringsAsFactors=FALSE)
|
|
7 SubstrateBackgroundFrequency<- read.csv(input3, stringsAsFactors=FALSE)
|
|
8
|
|
9 ScreenerFilename<-screener
|
|
10
|
|
11
|
|
12
|
|
13 FILENAME<-"output1"
|
|
14 FILENAME2<-"output2"
|
|
15 FILENAME3<-"output3"
|
|
16
|
|
17
|
|
18
|
|
19
|
|
20
|
|
21
|
|
22
|
|
23
|
|
24
|
|
25
|
|
26
|
|
27
|
|
28
|
|
29
|
|
30
|
|
31
|
|
32
|
|
33
|
|
34
|
|
35
|
|
36
|
|
37
|
|
38
|
|
39
|
|
40 OutputMatrix<-"KinaseMatrix.csv"
|
|
41 CharacterizationTable<-"CharacterizationTableForThisKinase.csv"
|
|
42 SDtable<-"SDtableforthisKinase"
|
|
43 SiteSelectivityTable<-"SiteSelectivityForThisKinase"
|
|
44
|
|
45
|
|
46
|
|
47 substrates<-matrix(rep("A",times=((nrow(ImportedSubstrateList)-1)*15)),ncol = 15)
|
|
48 #SeqsToBeScored<-"asdasd"
|
|
49
|
|
50 for (i in 2:nrow(ImportedSubstrateList))
|
|
51 {
|
|
52 substratemotif<-ImportedSubstrateList[i,4:18]
|
|
53 substratemotif[8]<-"Y"
|
|
54 #substratemotif<-paste(substratemotif,sep = "",collapse = "")
|
|
55 j=i-1
|
|
56 substratemotif<-unlist(substratemotif)
|
|
57 substrates[j,1:15]<-substratemotif
|
|
58 }
|
|
59
|
|
60 # SpacesToOs<-c(""="O",)
|
|
61 # substrates<-SpacesToOs[substrates]
|
|
62
|
|
63 SubstrateBackgroundFrequency[nrow(SubstrateBackgroundFrequency),2]
|
|
64
|
|
65 if(2==2){
|
|
66 Amean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),2]), na.rm=TRUE)
|
|
67 Cmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),3]), na.rm=TRUE)
|
|
68 Dmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),4]), na.rm=TRUE)
|
|
69 Emean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),5]), na.rm=TRUE)
|
|
70 Fmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),6]), na.rm=TRUE)
|
|
71 Gmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),7]), na.rm=TRUE)
|
|
72 Hmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),8]), na.rm=TRUE)
|
|
73 Imean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),9]), na.rm=TRUE)
|
|
74 Kmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),10]), na.rm=TRUE)
|
|
75 Lmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),11]), na.rm=TRUE)
|
|
76 Mmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),12]), na.rm=TRUE)
|
|
77 Nmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),13]), na.rm=TRUE)
|
|
78 Pmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),14]), na.rm=TRUE)
|
|
79 Qmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),15]), na.rm=TRUE)
|
|
80 Rmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),16]), na.rm=TRUE)
|
|
81 Smean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),17]), na.rm=TRUE)
|
|
82 Tmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),18]), na.rm=TRUE)
|
|
83 Vmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),19]), na.rm=TRUE)
|
|
84 Wmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),20]), na.rm=TRUE)
|
|
85 Ymean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),21]), na.rm=TRUE)
|
|
86
|
|
87 AllMeans<-c(Amean,Cmean,Dmean,Emean,Fmean,Gmean,Hmean,Imean,Kmean,Lmean,Mmean,Nmean,Pmean,Qmean,Rmean,Smean,Tmean,Vmean,Wmean,Ymean)
|
|
88
|
|
89 Asd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),2]), na.rm=TRUE)
|
|
90 Csd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),3]), na.rm=TRUE)
|
|
91 Dsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),4]), na.rm=TRUE)
|
|
92 Esd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),5]), na.rm=TRUE)
|
|
93 Fsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),6]), na.rm=TRUE)
|
|
94 Gsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),7]), na.rm=TRUE)
|
|
95 Hsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),8]), na.rm=TRUE)
|
|
96 Isd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),9]), na.rm=TRUE)
|
|
97 Ksd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),10]), na.rm=TRUE)
|
|
98 Lsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),11]), na.rm=TRUE)
|
|
99 Msd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),12]), na.rm=TRUE)
|
|
100 Nsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),13]), na.rm=TRUE)
|
|
101 Psd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),14]), na.rm=TRUE)
|
|
102 Qsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),15]), na.rm=TRUE)
|
|
103 Rsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),16]), na.rm=TRUE)
|
|
104 Ssd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),17]), na.rm=TRUE)
|
|
105 Tsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),18]), na.rm=TRUE)
|
|
106 Vsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),19]), na.rm=TRUE)
|
|
107 Wsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),20]), na.rm=TRUE)
|
|
108 Ysd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),21]), na.rm=TRUE)
|
|
109 }
|
|
110 AllSDs<-c(Asd,Csd,Dsd,Esd,Fsd,Gsd,Hsd,Isd,Ksd,Lsd,Msd,Nsd,Psd,Qsd,Rsd,Ssd,Tsd,Vsd,Wsd,Ysd)
|
|
111 #this is subbackfreq SDs
|
|
112
|
|
113 SBF_statisticalvalues<-cbind(AllMeans,AllSDs)
|
|
114
|
|
115 #create the percent table
|
|
116 if (1==1){
|
|
117 Column1<-substrates[,1]
|
|
118 Column2<-substrates[,2]
|
|
119 Column3<-substrates[,3]
|
|
120 Column4<-substrates[,4]
|
|
121 Column5<-substrates[,5]
|
|
122 Column6<-substrates[,6]
|
|
123 Column7<-substrates[,7]
|
|
124 Column8<-substrates[,8]
|
|
125 Column9<-substrates[,9]
|
|
126 Column10<-substrates[,10]
|
|
127 Column11<-substrates[,11]
|
|
128 Column12<-substrates[,12]
|
|
129 Column13<-substrates[,13]
|
|
130 Column14<-substrates[,14]
|
|
131 Column15<-substrates[,15]
|
|
132
|
|
133 spaces1<-sum((Column1%in% ""))
|
|
134 spaces2<-sum(Column2%in% "")
|
|
135 spaces3<-sum(Column3%in% "")
|
|
136 spaces4<-sum(Column4%in% "")
|
|
137 spaces5<-sum(Column5%in% "")
|
|
138 spaces6<-sum(Column6%in% "")
|
|
139 spaces7<-sum(Column7%in% "")
|
|
140 spaces8<-sum(Column8%in% "")
|
|
141 spaces9<-sum(Column9%in% "")
|
|
142 spaces10<-sum(Column10%in% "")
|
|
143 spaces11<-sum(Column11%in% "")
|
|
144 spaces12<-sum(Column12%in% "")
|
|
145 spaces13<-sum(Column13%in% "")
|
|
146 spaces14<-sum(Column14%in% "")
|
|
147 spaces15<-sum(Column15%in% "")
|
|
148
|
|
149 A1<-sum(Column1 %in% "A")/(length(Column1)-spaces1)
|
|
150 A2<-sum(Column2 %in% "A")/(length(Column2)-spaces2)
|
|
151 A3<-sum(Column3 %in% "A")/(length(Column3)-spaces3)
|
|
152 A4<-sum(Column4 %in% "A")/(length(Column4)-spaces4)
|
|
153 A5<-sum(Column5 %in% "A")/(length(Column5)-spaces5)
|
|
154 A6<-sum(Column6 %in% "A")/(length(Column6)-spaces6)
|
|
155 A7<-sum(Column7 %in% "A")/(length(Column7)-spaces7)
|
|
156 A8<-sum(Column8 %in% "A")/(length(Column8)-spaces8)
|
|
157 A9<-sum(Column9 %in% "A")/(length(Column9)-spaces9)
|
|
158 A10<-sum(Column10 %in% "A")/(length(Column10)-spaces10)
|
|
159 A11<-sum(Column11 %in% "A")/(length(Column11)-spaces11)
|
|
160 A12<-sum(Column12 %in% "A")/(length(Column12)-spaces12)
|
|
161 A13<-sum(Column13 %in% "A")/(length(Column13)-spaces13)
|
|
162 A14<-sum(Column14 %in% "A")/(length(Column14)-spaces14)
|
|
163 A15<-sum(Column15 %in% "A")/(length(Column15)-spaces15)
|
|
164 AllAs<-cbind(A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15)
|
|
165
|
|
166 C1<-sum(Column1 %in% "C")/(length(Column1)-spaces1)
|
|
167 C2<-sum(Column2 %in% "C")/(length(Column2)-spaces2)
|
|
168 C3<-sum(Column3 %in% "C")/(length(Column3)-spaces3)
|
|
169 C4<-sum(Column4 %in% "C")/(length(Column4)-spaces4)
|
|
170 C5<-sum(Column5 %in% "C")/(length(Column5)-spaces5)
|
|
171 C6<-sum(Column6 %in% "C")/(length(Column6)-spaces6)
|
|
172 C7<-sum(Column7 %in% "C")/(length(Column7)-spaces7)
|
|
173 C8<-sum(Column8 %in% "C")/(length(Column8)-spaces8)
|
|
174 C9<-sum(Column9 %in% "C")/(length(Column9)-spaces9)
|
|
175 C10<-sum(Column10 %in% "C")/(length(Column10)-spaces10)
|
|
176 C11<-sum(Column11 %in% "C")/(length(Column11)-spaces11)
|
|
177 C12<-sum(Column12 %in% "C")/(length(Column12)-spaces12)
|
|
178 C13<-sum(Column13 %in% "C")/(length(Column13)-spaces13)
|
|
179 C14<-sum(Column14 %in% "C")/(length(Column14)-spaces14)
|
|
180 C15<-sum(Column15 %in% "C")/(length(Column15)-spaces15)
|
|
181 CllCs<-cbind(C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15)
|
|
182
|
|
183 D1<-sum(Column1 %in% "D")/(length(Column1)-spaces1)
|
|
184 D2<-sum(Column2 %in% "D")/(length(Column2)-spaces2)
|
|
185 D3<-sum(Column3 %in% "D")/(length(Column3)-spaces3)
|
|
186 D4<-sum(Column4 %in% "D")/(length(Column4)-spaces4)
|
|
187 D5<-sum(Column5 %in% "D")/(length(Column5)-spaces5)
|
|
188 D6<-sum(Column6 %in% "D")/(length(Column6)-spaces6)
|
|
189 D7<-sum(Column7 %in% "D")/(length(Column7)-spaces7)
|
|
190 D8<-sum(Column8 %in% "D")/(length(Column8)-spaces8)
|
|
191 D9<-sum(Column9 %in% "D")/(length(Column9)-spaces9)
|
|
192 D10<-sum(Column10 %in% "D")/(length(Column10)-spaces10)
|
|
193 D11<-sum(Column11 %in% "D")/(length(Column11)-spaces11)
|
|
194 D12<-sum(Column12 %in% "D")/(length(Column12)-spaces12)
|
|
195 D13<-sum(Column13 %in% "D")/(length(Column13)-spaces13)
|
|
196 D14<-sum(Column14 %in% "D")/(length(Column14)-spaces14)
|
|
197 D15<-sum(Column15 %in% "D")/(length(Column15)-spaces15)
|
|
198 DllDs<-cbind(D1,D2,D3,D4,D5,D6,D7,D8,D9,D10,D11,D12,D13,D14,D15)
|
|
199
|
|
200 E1<-sum(Column1 %in% "E")/(length(Column1)-spaces1)
|
|
201 E2<-sum(Column2 %in% "E")/(length(Column2)-spaces2)
|
|
202 E3<-sum(Column3 %in% "E")/(length(Column3)-spaces3)
|
|
203 E4<-sum(Column4 %in% "E")/(length(Column4)-spaces4)
|
|
204 E5<-sum(Column5 %in% "E")/(length(Column5)-spaces5)
|
|
205 E6<-sum(Column6 %in% "E")/(length(Column6)-spaces6)
|
|
206 E7<-sum(Column7 %in% "E")/(length(Column7)-spaces7)
|
|
207 E8<-sum(Column8 %in% "E")/(length(Column8)-spaces8)
|
|
208 E9<-sum(Column9 %in% "E")/(length(Column9)-spaces9)
|
|
209 E10<-sum(Column10 %in% "E")/(length(Column10)-spaces10)
|
|
210 E11<-sum(Column11 %in% "E")/(length(Column11)-spaces11)
|
|
211 E12<-sum(Column12 %in% "E")/(length(Column12)-spaces12)
|
|
212 E13<-sum(Column13 %in% "E")/(length(Column13)-spaces13)
|
|
213 E14<-sum(Column14 %in% "E")/(length(Column14)-spaces14)
|
|
214 E15<-sum(Column15 %in% "E")/(length(Column15)-spaces15)
|
|
215 EllEs<-cbind(E1,E2,E3,E4,E5,E6,E7,E8,E9,E10,E11,E12,E13,E14,E15)
|
|
216
|
|
217
|
|
218 F1<-sum(Column1 %in% "F")/(length(Column1)-spaces1)
|
|
219 F2<-sum(Column2 %in% "F")/(length(Column2)-spaces2)
|
|
220 F3<-sum(Column3 %in% "F")/(length(Column3)-spaces3)
|
|
221 F4<-sum(Column4 %in% "F")/(length(Column4)-spaces4)
|
|
222 F5<-sum(Column5 %in% "F")/(length(Column5)-spaces5)
|
|
223 F6<-sum(Column6 %in% "F")/(length(Column6)-spaces6)
|
|
224 F7<-sum(Column7 %in% "F")/(length(Column7)-spaces7)
|
|
225 F8<-sum(Column8 %in% "F")/(length(Column8)-spaces8)
|
|
226 F9<-sum(Column9 %in% "F")/(length(Column9)-spaces9)
|
|
227 F10<-sum(Column10 %in% "F")/(length(Column10)-spaces10)
|
|
228 F11<-sum(Column11 %in% "F")/(length(Column11)-spaces11)
|
|
229 F12<-sum(Column12 %in% "F")/(length(Column12)-spaces12)
|
|
230 F13<-sum(Column13 %in% "F")/(length(Column13)-spaces13)
|
|
231 F14<-sum(Column14 %in% "F")/(length(Column14)-spaces14)
|
|
232 F15<-sum(Column15 %in% "F")/(length(Column15)-spaces15)
|
|
233 FllFs<-cbind(F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15)
|
|
234
|
|
235
|
|
236 G1<-sum(Column1 %in% "G")/(length(Column1)-spaces1)
|
|
237 G2<-sum(Column2 %in% "G")/(length(Column2)-spaces2)
|
|
238 G3<-sum(Column3 %in% "G")/(length(Column3)-spaces3)
|
|
239 G4<-sum(Column4 %in% "G")/(length(Column4)-spaces4)
|
|
240 G5<-sum(Column5 %in% "G")/(length(Column5)-spaces5)
|
|
241 G6<-sum(Column6 %in% "G")/(length(Column6)-spaces6)
|
|
242 G7<-sum(Column7 %in% "G")/(length(Column7)-spaces7)
|
|
243 G8<-sum(Column8 %in% "G")/(length(Column8)-spaces8)
|
|
244 G9<-sum(Column9 %in% "G")/(length(Column9)-spaces9)
|
|
245 G10<-sum(Column10 %in% "G")/(length(Column10)-spaces10)
|
|
246 G11<-sum(Column11 %in% "G")/(length(Column11)-spaces11)
|
|
247 G12<-sum(Column12 %in% "G")/(length(Column12)-spaces12)
|
|
248 G13<-sum(Column13 %in% "G")/(length(Column13)-spaces13)
|
|
249 G14<-sum(Column14 %in% "G")/(length(Column14)-spaces14)
|
|
250 G15<-sum(Column15 %in% "G")/(length(Column15)-spaces15)
|
|
251 GllGs<-cbind(G1,G2,G3,G4,G5,G6,G7,G8,G9,G10,G11,G12,G13,G14,G15)
|
|
252
|
|
253
|
|
254 H1<-sum(Column1 %in% "H")/(length(Column1)-spaces1)
|
|
255 H2<-sum(Column2 %in% "H")/(length(Column2)-spaces2)
|
|
256 H3<-sum(Column3 %in% "H")/(length(Column3)-spaces3)
|
|
257 H4<-sum(Column4 %in% "H")/(length(Column4)-spaces4)
|
|
258 H5<-sum(Column5 %in% "H")/(length(Column5)-spaces5)
|
|
259 H6<-sum(Column6 %in% "H")/(length(Column6)-spaces6)
|
|
260 H7<-sum(Column7 %in% "H")/(length(Column7)-spaces7)
|
|
261 H8<-sum(Column8 %in% "H")/(length(Column8)-spaces8)
|
|
262 H9<-sum(Column9 %in% "H")/(length(Column9)-spaces9)
|
|
263 H10<-sum(Column10 %in% "H")/(length(Column10)-spaces10)
|
|
264 H11<-sum(Column11 %in% "H")/(length(Column11)-spaces11)
|
|
265 H12<-sum(Column12 %in% "H")/(length(Column12)-spaces12)
|
|
266 H13<-sum(Column13 %in% "H")/(length(Column13)-spaces13)
|
|
267 H14<-sum(Column14 %in% "H")/(length(Column14)-spaces14)
|
|
268 H15<-sum(Column15 %in% "H")/(length(Column15)-spaces15)
|
|
269 HllHs<-cbind(H1,H2,H3,H4,H5,H6,H7,H8,H9,H10,H11,H12,H13,H14,H15)
|
|
270
|
|
271
|
|
272 I1<-sum(Column1 %in% "I")/(length(Column1)-spaces1)
|
|
273 I2<-sum(Column2 %in% "I")/(length(Column2)-spaces2)
|
|
274 I3<-sum(Column3 %in% "I")/(length(Column3)-spaces3)
|
|
275 I4<-sum(Column4 %in% "I")/(length(Column4)-spaces4)
|
|
276 I5<-sum(Column5 %in% "I")/(length(Column5)-spaces5)
|
|
277 I6<-sum(Column6 %in% "I")/(length(Column6)-spaces6)
|
|
278 I7<-sum(Column7 %in% "I")/(length(Column7)-spaces7)
|
|
279 I8<-sum(Column8 %in% "I")/(length(Column8)-spaces8)
|
|
280 I9<-sum(Column9 %in% "I")/(length(Column9)-spaces9)
|
|
281 I10<-sum(Column10 %in% "I")/(length(Column10)-spaces10)
|
|
282 I11<-sum(Column11 %in% "I")/(length(Column11)-spaces11)
|
|
283 I12<-sum(Column12 %in% "I")/(length(Column12)-spaces12)
|
|
284 I13<-sum(Column13 %in% "I")/(length(Column13)-spaces13)
|
|
285 I14<-sum(Column14 %in% "I")/(length(Column14)-spaces14)
|
|
286 I15<-sum(Column15 %in% "I")/(length(Column15)-spaces15)
|
|
287 IllIs<-cbind(I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,I11,I12,I13,I14,I15)
|
|
288
|
|
289
|
|
290 K1<-sum(Column1 %in% "K")/(length(Column1)-spaces1)
|
|
291 K2<-sum(Column2 %in% "K")/(length(Column2)-spaces2)
|
|
292 K3<-sum(Column3 %in% "K")/(length(Column3)-spaces3)
|
|
293 K4<-sum(Column4 %in% "K")/(length(Column4)-spaces4)
|
|
294 K5<-sum(Column5 %in% "K")/(length(Column5)-spaces5)
|
|
295 K6<-sum(Column6 %in% "K")/(length(Column6)-spaces6)
|
|
296 K7<-sum(Column7 %in% "K")/(length(Column7)-spaces7)
|
|
297 K8<-sum(Column8 %in% "K")/(length(Column8)-spaces8)
|
|
298 K9<-sum(Column9 %in% "K")/(length(Column9)-spaces9)
|
|
299 K10<-sum(Column10 %in% "K")/(length(Column10)-spaces10)
|
|
300 K11<-sum(Column11 %in% "K")/(length(Column11)-spaces11)
|
|
301 K12<-sum(Column12 %in% "K")/(length(Column12)-spaces12)
|
|
302 K13<-sum(Column13 %in% "K")/(length(Column13)-spaces13)
|
|
303 K14<-sum(Column14 %in% "K")/(length(Column14)-spaces14)
|
|
304 K15<-sum(Column15 %in% "K")/(length(Column15)-spaces15)
|
|
305 KllKs<-cbind(K1,K2,K3,K4,K5,K6,K7,K8,K9,K10,K11,K12,K13,K14,K15)
|
|
306
|
|
307
|
|
308 L1<-sum(Column1 %in% "L")/(length(Column1)-spaces1)
|
|
309 L2<-sum(Column2 %in% "L")/(length(Column2)-spaces2)
|
|
310 L3<-sum(Column3 %in% "L")/(length(Column3)-spaces3)
|
|
311 L4<-sum(Column4 %in% "L")/(length(Column4)-spaces4)
|
|
312 L5<-sum(Column5 %in% "L")/(length(Column5)-spaces5)
|
|
313 L6<-sum(Column6 %in% "L")/(length(Column6)-spaces6)
|
|
314 L7<-sum(Column7 %in% "L")/(length(Column7)-spaces7)
|
|
315 L8<-sum(Column8 %in% "L")/(length(Column8)-spaces8)
|
|
316 L9<-sum(Column9 %in% "L")/(length(Column9)-spaces9)
|
|
317 L10<-sum(Column10 %in% "L")/(length(Column10)-spaces10)
|
|
318 L11<-sum(Column11 %in% "L")/(length(Column11)-spaces11)
|
|
319 L12<-sum(Column12 %in% "L")/(length(Column12)-spaces12)
|
|
320 L13<-sum(Column13 %in% "L")/(length(Column13)-spaces13)
|
|
321 L14<-sum(Column14 %in% "L")/(length(Column14)-spaces14)
|
|
322 L15<-sum(Column15 %in% "L")/(length(Column15)-spaces15)
|
|
323 LllLs<-cbind(L1,L2,L3,L4,L5,L6,L7,L8,L9,L10,L11,L12,L13,L14,L15)
|
|
324
|
|
325
|
|
326 M1<-sum(Column1 %in% "M")/(length(Column1)-spaces1)
|
|
327 M2<-sum(Column2 %in% "M")/(length(Column2)-spaces2)
|
|
328 M3<-sum(Column3 %in% "M")/(length(Column3)-spaces3)
|
|
329 M4<-sum(Column4 %in% "M")/(length(Column4)-spaces4)
|
|
330 M5<-sum(Column5 %in% "M")/(length(Column5)-spaces5)
|
|
331 M6<-sum(Column6 %in% "M")/(length(Column6)-spaces6)
|
|
332 M7<-sum(Column7 %in% "M")/(length(Column7)-spaces7)
|
|
333 M8<-sum(Column8 %in% "M")/(length(Column8)-spaces8)
|
|
334 M9<-sum(Column9 %in% "M")/(length(Column9)-spaces9)
|
|
335 M10<-sum(Column10 %in% "M")/(length(Column10)-spaces10)
|
|
336 M11<-sum(Column11 %in% "M")/(length(Column11)-spaces11)
|
|
337 M12<-sum(Column12 %in% "M")/(length(Column12)-spaces12)
|
|
338 M13<-sum(Column13 %in% "M")/(length(Column13)-spaces13)
|
|
339 M14<-sum(Column14 %in% "M")/(length(Column14)-spaces14)
|
|
340 M15<-sum(Column15 %in% "M")/(length(Column15)-spaces15)
|
|
341 MllMs<-cbind(M1,M2,M3,M4,M5,M6,M7,M8,M9,M10,M11,M12,M13,M14,M15)
|
|
342
|
|
343
|
|
344 N1<-sum(Column1 %in% "N")/(length(Column1)-spaces1)
|
|
345 N2<-sum(Column2 %in% "N")/(length(Column2)-spaces2)
|
|
346 N3<-sum(Column3 %in% "N")/(length(Column3)-spaces3)
|
|
347 N4<-sum(Column4 %in% "N")/(length(Column4)-spaces4)
|
|
348 N5<-sum(Column5 %in% "N")/(length(Column5)-spaces5)
|
|
349 N6<-sum(Column6 %in% "N")/(length(Column6)-spaces6)
|
|
350 N7<-sum(Column7 %in% "N")/(length(Column7)-spaces7)
|
|
351 N8<-sum(Column8 %in% "N")/(length(Column8)-spaces8)
|
|
352 N9<-sum(Column9 %in% "N")/(length(Column9)-spaces9)
|
|
353 N10<-sum(Column10 %in% "N")/(length(Column10)-spaces10)
|
|
354 N11<-sum(Column11 %in% "N")/(length(Column11)-spaces11)
|
|
355 N12<-sum(Column12 %in% "N")/(length(Column12)-spaces12)
|
|
356 N13<-sum(Column13 %in% "N")/(length(Column13)-spaces13)
|
|
357 N14<-sum(Column14 %in% "N")/(length(Column14)-spaces14)
|
|
358 N15<-sum(Column15 %in% "N")/(length(Column15)-spaces15)
|
|
359 NllNs<-cbind(N1,N2,N3,N4,N5,N6,N7,N8,N9,N10,N11,N12,N13,N14,N15)
|
|
360
|
|
361
|
|
362 P1<-sum(Column1 %in% "P")/(length(Column1)-spaces1)
|
|
363 P2<-sum(Column2 %in% "P")/(length(Column2)-spaces2)
|
|
364 P3<-sum(Column3 %in% "P")/(length(Column3)-spaces3)
|
|
365 P4<-sum(Column4 %in% "P")/(length(Column4)-spaces4)
|
|
366 P5<-sum(Column5 %in% "P")/(length(Column5)-spaces5)
|
|
367 P6<-sum(Column6 %in% "P")/(length(Column6)-spaces6)
|
|
368 P7<-sum(Column7 %in% "P")/(length(Column7)-spaces7)
|
|
369 P8<-sum(Column8 %in% "P")/(length(Column8)-spaces8)
|
|
370 P9<-sum(Column9 %in% "P")/(length(Column9)-spaces9)
|
|
371 P10<-sum(Column10 %in% "P")/(length(Column10)-spaces10)
|
|
372 P11<-sum(Column11 %in% "P")/(length(Column11)-spaces11)
|
|
373 P12<-sum(Column12 %in% "P")/(length(Column12)-spaces12)
|
|
374 P13<-sum(Column13 %in% "P")/(length(Column13)-spaces13)
|
|
375 P14<-sum(Column14 %in% "P")/(length(Column14)-spaces14)
|
|
376 P15<-sum(Column15 %in% "P")/(length(Column15)-spaces15)
|
|
377 PllPs<-cbind(P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11,P12,P13,P14,P15)
|
|
378
|
|
379
|
|
380 Q1<-sum(Column1 %in% "Q")/(length(Column1)-spaces1)
|
|
381 Q2<-sum(Column2 %in% "Q")/(length(Column2)-spaces2)
|
|
382 Q3<-sum(Column3 %in% "Q")/(length(Column3)-spaces3)
|
|
383 Q4<-sum(Column4 %in% "Q")/(length(Column4)-spaces4)
|
|
384 Q5<-sum(Column5 %in% "Q")/(length(Column5)-spaces5)
|
|
385 Q6<-sum(Column6 %in% "Q")/(length(Column6)-spaces6)
|
|
386 Q7<-sum(Column7 %in% "Q")/(length(Column7)-spaces7)
|
|
387 Q8<-sum(Column8 %in% "Q")/(length(Column8)-spaces8)
|
|
388 Q9<-sum(Column9 %in% "Q")/(length(Column9)-spaces9)
|
|
389 Q10<-sum(Column10 %in% "Q")/(length(Column10)-spaces10)
|
|
390 Q11<-sum(Column11 %in% "Q")/(length(Column11)-spaces11)
|
|
391 Q12<-sum(Column12 %in% "Q")/(length(Column12)-spaces12)
|
|
392 Q13<-sum(Column13 %in% "Q")/(length(Column13)-spaces13)
|
|
393 Q14<-sum(Column14 %in% "Q")/(length(Column14)-spaces14)
|
|
394 Q15<-sum(Column15 %in% "Q")/(length(Column15)-spaces15)
|
|
395 QllQs<-cbind(Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,Q11,Q12,Q13,Q14,Q15)
|
|
396
|
|
397
|
|
398 R1<-sum(Column1 %in% "R")/(length(Column1)-spaces1)
|
|
399 R2<-sum(Column2 %in% "R")/(length(Column2)-spaces2)
|
|
400 R3<-sum(Column3 %in% "R")/(length(Column3)-spaces3)
|
|
401 R4<-sum(Column4 %in% "R")/(length(Column4)-spaces4)
|
|
402 R5<-sum(Column5 %in% "R")/(length(Column5)-spaces5)
|
|
403 R6<-sum(Column6 %in% "R")/(length(Column6)-spaces6)
|
|
404 R7<-sum(Column7 %in% "R")/(length(Column7)-spaces7)
|
|
405 R8<-sum(Column8 %in% "R")/(length(Column8)-spaces8)
|
|
406 R9<-sum(Column9 %in% "R")/(length(Column9)-spaces9)
|
|
407 R10<-sum(Column10 %in% "R")/(length(Column10)-spaces10)
|
|
408 R11<-sum(Column11 %in% "R")/(length(Column11)-spaces11)
|
|
409 R12<-sum(Column12 %in% "R")/(length(Column12)-spaces12)
|
|
410 R13<-sum(Column13 %in% "R")/(length(Column13)-spaces13)
|
|
411 R14<-sum(Column14 %in% "R")/(length(Column14)-spaces14)
|
|
412 R15<-sum(Column15 %in% "R")/(length(Column15)-spaces15)
|
|
413 RllRs<-cbind(R1,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,R13,R14,R15)
|
|
414
|
|
415
|
|
416 S1<-sum(Column1 %in% "S")/(length(Column1)-spaces1)
|
|
417 S2<-sum(Column2 %in% "S")/(length(Column2)-spaces2)
|
|
418 S3<-sum(Column3 %in% "S")/(length(Column3)-spaces3)
|
|
419 S4<-sum(Column4 %in% "S")/(length(Column4)-spaces4)
|
|
420 S5<-sum(Column5 %in% "S")/(length(Column5)-spaces5)
|
|
421 S6<-sum(Column6 %in% "S")/(length(Column6)-spaces6)
|
|
422 S7<-sum(Column7 %in% "S")/(length(Column7)-spaces7)
|
|
423 S8<-sum(Column8 %in% "S")/(length(Column8)-spaces8)
|
|
424 S9<-sum(Column9 %in% "S")/(length(Column9)-spaces9)
|
|
425 S10<-sum(Column10 %in% "S")/(length(Column10)-spaces10)
|
|
426 S11<-sum(Column11 %in% "S")/(length(Column11)-spaces11)
|
|
427 S12<-sum(Column12 %in% "S")/(length(Column12)-spaces12)
|
|
428 S13<-sum(Column13 %in% "S")/(length(Column13)-spaces13)
|
|
429 S14<-sum(Column14 %in% "S")/(length(Column14)-spaces14)
|
|
430 S15<-sum(Column15 %in% "S")/(length(Column15)-spaces15)
|
|
431 SllSs<-cbind(S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,S14,S15)
|
|
432
|
|
433
|
|
434 T1<-sum(Column1 %in% "T")/(length(Column1)-spaces1)
|
|
435 T2<-sum(Column2 %in% "T")/(length(Column2)-spaces2)
|
|
436 T3<-sum(Column3 %in% "T")/(length(Column3)-spaces3)
|
|
437 T4<-sum(Column4 %in% "T")/(length(Column4)-spaces4)
|
|
438 T5<-sum(Column5 %in% "T")/(length(Column5)-spaces5)
|
|
439 T6<-sum(Column6 %in% "T")/(length(Column6)-spaces6)
|
|
440 T7<-sum(Column7 %in% "T")/(length(Column7)-spaces7)
|
|
441 T8<-sum(Column8 %in% "T")/(length(Column8)-spaces8)
|
|
442 T9<-sum(Column9 %in% "T")/(length(Column9)-spaces9)
|
|
443 T10<-sum(Column10 %in% "T")/(length(Column10)-spaces10)
|
|
444 T11<-sum(Column11 %in% "T")/(length(Column11)-spaces11)
|
|
445 T12<-sum(Column12 %in% "T")/(length(Column12)-spaces12)
|
|
446 T13<-sum(Column13 %in% "T")/(length(Column13)-spaces13)
|
|
447 T14<-sum(Column14 %in% "T")/(length(Column14)-spaces14)
|
|
448 T15<-sum(Column15 %in% "T")/(length(Column15)-spaces15)
|
|
449 TllTs<-cbind(T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15)
|
|
450
|
|
451
|
|
452 V1<-sum(Column1 %in% "V")/(length(Column1)-spaces1)
|
|
453 V2<-sum(Column2 %in% "V")/(length(Column2)-spaces2)
|
|
454 V3<-sum(Column3 %in% "V")/(length(Column3)-spaces3)
|
|
455 V4<-sum(Column4 %in% "V")/(length(Column4)-spaces4)
|
|
456 V5<-sum(Column5 %in% "V")/(length(Column5)-spaces5)
|
|
457 V6<-sum(Column6 %in% "V")/(length(Column6)-spaces6)
|
|
458 V7<-sum(Column7 %in% "V")/(length(Column7)-spaces7)
|
|
459 V8<-sum(Column8 %in% "V")/(length(Column8)-spaces8)
|
|
460 V9<-sum(Column9 %in% "V")/(length(Column9)-spaces9)
|
|
461 V10<-sum(Column10 %in% "V")/(length(Column10)-spaces10)
|
|
462 V11<-sum(Column11 %in% "V")/(length(Column11)-spaces11)
|
|
463 V12<-sum(Column12 %in% "V")/(length(Column12)-spaces12)
|
|
464 V13<-sum(Column13 %in% "V")/(length(Column13)-spaces13)
|
|
465 V14<-sum(Column14 %in% "V")/(length(Column14)-spaces14)
|
|
466 V15<-sum(Column15 %in% "V")/(length(Column15)-spaces15)
|
|
467 VllVs<-cbind(V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15)
|
|
468
|
|
469
|
|
470 W1<-sum(Column1 %in% "W")/(length(Column1)-spaces1)
|
|
471 W2<-sum(Column2 %in% "W")/(length(Column2)-spaces2)
|
|
472 W3<-sum(Column3 %in% "W")/(length(Column3)-spaces3)
|
|
473 W4<-sum(Column4 %in% "W")/(length(Column4)-spaces4)
|
|
474 W5<-sum(Column5 %in% "W")/(length(Column5)-spaces5)
|
|
475 W6<-sum(Column6 %in% "W")/(length(Column6)-spaces6)
|
|
476 W7<-sum(Column7 %in% "W")/(length(Column7)-spaces7)
|
|
477 W8<-sum(Column8 %in% "W")/(length(Column8)-spaces8)
|
|
478 W9<-sum(Column9 %in% "W")/(length(Column9)-spaces9)
|
|
479 W10<-sum(Column10 %in% "W")/(length(Column10)-spaces10)
|
|
480 W11<-sum(Column11 %in% "W")/(length(Column11)-spaces11)
|
|
481 W12<-sum(Column12 %in% "W")/(length(Column12)-spaces12)
|
|
482 W13<-sum(Column13 %in% "W")/(length(Column13)-spaces13)
|
|
483 W14<-sum(Column14 %in% "W")/(length(Column14)-spaces14)
|
|
484 W15<-sum(Column15 %in% "W")/(length(Column15)-spaces15)
|
|
485 WllWs<-cbind(W1,W2,W3,W4,W5,W6,W7,W8,W9,W10,W11,W12,W13,W14,W15)
|
|
486
|
|
487
|
|
488 Y1<-sum(Column1 %in% "Y")/(length(Column1)-spaces1)
|
|
489 Y2<-sum(Column2 %in% "Y")/(length(Column2)-spaces2)
|
|
490 Y3<-sum(Column3 %in% "Y")/(length(Column3)-spaces3)
|
|
491 Y4<-sum(Column4 %in% "Y")/(length(Column4)-spaces4)
|
|
492 Y5<-sum(Column5 %in% "Y")/(length(Column5)-spaces5)
|
|
493 Y6<-sum(Column6 %in% "Y")/(length(Column6)-spaces6)
|
|
494 Y7<-sum(Column7 %in% "Y")/(length(Column7)-spaces7)
|
|
495 Y8<-sum(Column8 %in% "Y")/(length(Column8)-spaces8)
|
|
496 Y9<-sum(Column9 %in% "Y")/(length(Column9)-spaces9)
|
|
497 Y10<-sum(Column10 %in% "Y")/(length(Column10)-spaces10)
|
|
498 Y11<-sum(Column11 %in% "Y")/(length(Column11)-spaces11)
|
|
499 Y12<-sum(Column12 %in% "Y")/(length(Column12)-spaces12)
|
|
500 Y13<-sum(Column13 %in% "Y")/(length(Column13)-spaces13)
|
|
501 Y14<-sum(Column14 %in% "Y")/(length(Column14)-spaces14)
|
|
502 Y15<-sum(Column15 %in% "Y")/(length(Column15)-spaces15)
|
|
503 YllYs<-cbind(Y1,Y2,Y3,Y4,Y5,Y6,Y7,Y8,Y9,Y10,Y11,Y12,Y13,Y14,Y15)
|
|
504 }
|
|
505 #this is substrate percents
|
|
506
|
|
507 #A C D E F G H I K L N P Q R S T V W Y
|
|
508
|
|
509 PercentTable<-rbind(AllAs,CllCs,DllDs,EllEs,FllFs,GllGs,HllHs,IllIs,KllKs,LllLs,MllMs,NllNs,PllPs,QllQs,RllRs,SllSs,TllTs,VllVs,WllWs,YllYs)
|
|
510 PercentTable<-PercentTable*100
|
|
511
|
|
512 #create the SD table
|
|
513 SDtable<-matrix(data = rep(1,times=(nrow(PercentTable)*ncol(PercentTable))),nrow = nrow(PercentTable),ncol = ncol(PercentTable))
|
|
514 #for every row, a percertage minus the same mean over the same SD
|
|
515 if(1==1){
|
|
516 SDtable[1,]<-(PercentTable[1,]-Amean)/Asd
|
|
517 SDtable[2,]<-(PercentTable[2,]-Cmean)/Csd
|
|
518 SDtable[3,]<-(PercentTable[3,]-Dmean)/Dsd
|
|
519 SDtable[4,]<-(PercentTable[4,]-Emean)/Esd
|
|
520 SDtable[5,]<-(PercentTable[5,]-Fmean)/Fsd
|
|
521 SDtable[6,]<-(PercentTable[6,]-Gmean)/Gsd
|
|
522 SDtable[7,]<-(PercentTable[7,]-Hmean)/Hsd
|
|
523 SDtable[8,]<-(PercentTable[8,]-Imean)/Isd
|
|
524 SDtable[9,]<-(PercentTable[9,]-Kmean)/Ksd
|
|
525 SDtable[10,]<-(PercentTable[10,]-Lmean)/Lsd
|
|
526 SDtable[11,]<-(PercentTable[11,]-Mmean)/Msd
|
|
527 SDtable[12,]<-(PercentTable[12,]-Nmean)/Nsd
|
|
528 SDtable[13,]<-(PercentTable[13,]-Pmean)/Psd
|
|
529 SDtable[14,]<-(PercentTable[14,]-Qmean)/Qsd
|
|
530 SDtable[15,]<-(PercentTable[15,]-Rmean)/Rsd
|
|
531 SDtable[16,]<-(PercentTable[16,]-Smean)/Ssd
|
|
532 SDtable[17,]<-(PercentTable[17,]-Tmean)/Tsd
|
|
533 SDtable[18,]<-(PercentTable[18,]-Vmean)/Vsd
|
|
534 SDtable[19,]<-(PercentTable[19,]-Wmean)/Wsd
|
|
535 SDtable[20,]<-(PercentTable[20,]-Ymean)/Ysd
|
|
536 }
|
|
537
|
|
538
|
|
539 SetOfAAs<-c("Letter","A","C","D","E","F","G","H","I","K","L","M","N","P","Q","R","S","T","V","W","Y")
|
|
540
|
|
541
|
|
542 SumOfSigmaAAs<-c(1:15)
|
|
543
|
|
544 for (i in 1:15){
|
|
545 SumOfSigmasValue<-0
|
|
546 for (j in 1:20){
|
|
547 value<-0
|
|
548 if (SDtable[j,i]>2){
|
|
549 value<-sum(substrates[,i]==SetOfAAs[j])
|
|
550 }
|
|
551 SumOfSigmasValue<-SumOfSigmasValue+value
|
|
552 }
|
|
553 SumOfSigmaAAs[i]<-SumOfSigmasValue
|
|
554 }
|
|
555
|
|
556 # AAs1<-length(substrates[,1])-sum(substrates[,1]=="")
|
|
557 # AAs2<-length(substrates[,2])-sum(substrates[,2]=="")
|
|
558 # AAs3<-length(substrates[,3])-sum(substrates[,3]=="")
|
|
559 # AAs4<-length(substrates[,4])-sum(substrates[,4]=="")
|
|
560 # AAs5<-length(substrates[,5])-sum(substrates[,5]=="")
|
|
561 # AAs6<-length(substrates[,6])-sum(substrates[,6]=="")
|
|
562 # AAs7<-length(substrates[,7])-sum(substrates[,7]=="")
|
|
563 # AAs8<-length(substrates[,8])-sum(substrates[,8]=="")
|
|
564 # AAs9<-length(substrates[,9])-sum(substrates[,9]=="")
|
|
565 #
|
|
566 #
|
|
567 #
|
|
568 # #AAsAtPositions<-c(AAs1,AAs2,AAs3,AAs4,AAs5,AAs6,AAs7,AAs8,AAs9)
|
|
569 # AAsAtPositions<-c(length(substrates[,1]),length(substrates[,2]),length(substrates[,3]),length(substrates[,4]),
|
|
570 # length(substrates[,5]),length(substrates[,6]),length(substrates[,7]),length(substrates[,8]),
|
|
571 # length(substrates[,9]))
|
|
572
|
|
573 SumOfExpectedSigmaAAs<-c(1:15)
|
|
574 for (i in 1:15){
|
|
575 ExpectedValue<-0
|
|
576 for (j in 1:20){
|
|
577 value<-0
|
|
578 if (SDtable[j,i]>2){
|
|
579 value<-AllMeans[j]
|
|
580 }
|
|
581 ExpectedValue<-ExpectedValue+value
|
|
582 }
|
|
583 SumOfExpectedSigmaAAs[i]<-ExpectedValue*(length(substrates[,i])-sum(substrates[,i]%in% ""))/100
|
|
584 }
|
|
585
|
|
586 SelectivityRow<-SumOfSigmaAAs/SumOfExpectedSigmaAAs
|
|
587 SelectivitySheet<-rbind(SumOfSigmaAAs,SumOfExpectedSigmaAAs,SelectivityRow)
|
|
588
|
|
589 SetOfAAs<-matrix(data = SetOfAAs,ncol = 1)
|
|
590
|
|
591 SDtableu<-SDtable
|
|
592 HeaderSD<-c(-7:7)
|
|
593 SDtable<-rbind(HeaderSD,SDtableu)
|
|
594 SDtable<-data.frame(SetOfAAs,SDtable)
|
|
595
|
|
596 PercentTable<-rbind(HeaderSD,PercentTable)
|
|
597 PercentTable<-data.frame(SetOfAAs,PercentTable)
|
|
598 numberofY<-as.numeric(SubstrateBackgroundFrequency$Number.of.Y)
|
|
599 numberofY<-numberofY[!is.na(numberofY)]
|
|
600
|
|
601 numberofPY<-as.numeric(SubstrateBackgroundFrequency$Number.of.pY)
|
|
602 numberofPY<-numberofPY[!is.na(numberofPY)]
|
|
603
|
|
604 NormalizationScore<-sum(numberofPY)/sum(numberofY)
|
|
605
|
|
606 # positions<-matrix(data = NA, nrow=20,ncol = 15)
|
|
607 #
|
|
608 # #column1
|
|
609 #
|
|
610 # for (q in 1:15) {
|
|
611 # sA<-sum(substrates[,i]=="A")
|
|
612 # positions[1,i]<-sA
|
|
613 # sC<-sum(substrates[,i]=="C")
|
|
614 # positions[2,i]<-sC
|
|
615 # sD<-sum(substrates[,i]=="D")
|
|
616 # positions[3,i]<-sD
|
|
617 # sE<-sum(substrates[,i]=="E")
|
|
618 # positions[4,i]<-sE
|
|
619 # sF<-sum(substrates[,i]=="F")
|
|
620 # sG<-sum(substrates[,i]=="G")
|
|
621 # sH<-sum(substrates[,i]=="H")
|
|
622 # sI<-sum(substrates[,i]=="I")
|
|
623 # sK<-sum(substrates[,i]=="K")
|
|
624 # sL<-sum(substrates[,i]=="L")
|
|
625 # sM<-sum(substrates[,i]=="M")
|
|
626 # sN<-sum(substrates[,i]=="N")
|
|
627 # sP<-sum(substrates[,i]=="P")
|
|
628 # sQ<-sum(substrates[,i]=="Q")
|
|
629 # sR<-sum(substrates[,i]=="R")
|
|
630 # sS<-sum(substrates[,i]=="S")
|
|
631 # sT<-sum(substrates[,i]=="T")
|
|
632 # sV<-sum(substrates[,i]=="V")
|
|
633 # sW<-sum(substrates[,i]=="W")
|
|
634 # sY<-sum(substrates[,i]=="Y")
|
|
635 # positions[5,i]<-sF
|
|
636 # positions[6,i]<-sG
|
|
637 # positions[7,i]<-sH
|
|
638 # positions[8,i]<-sI
|
|
639 # positions[9,i]<-sK
|
|
640 # positions[10,i]<-sL
|
|
641 # positions[11,i]<-sM
|
|
642 # positions[12,i]<-sN
|
|
643 # positions[13,i]<-sP
|
|
644 # positions[14,i]<-sQ
|
|
645 # positions[15,i]<-sR
|
|
646 # positions[16,i]<-sS
|
|
647 # positions[17,i]<-sT
|
|
648 # positions[18,i]<-sV
|
|
649 # positions[19,i]<-sW
|
|
650 # positions[20,i]<-sY
|
|
651 # }
|
|
652
|
|
653 #here I create the positions table which is needed for the endogenous prob matrix and is simply "how many times did an AA show up in this spot?
|
|
654 if (6==6){
|
|
655 Column1<-substrates[,1]
|
|
656 Column2<-substrates[,2]
|
|
657 Column3<-substrates[,3]
|
|
658 Column4<-substrates[,4]
|
|
659 Column5<-substrates[,5]
|
|
660 Column6<-substrates[,6]
|
|
661 Column7<-substrates[,7]
|
|
662 Column8<-substrates[,8]
|
|
663 Column9<-substrates[,9]
|
|
664 Column10<-substrates[,10]
|
|
665 Column11<-substrates[,11]
|
|
666 Column12<-substrates[,12]
|
|
667 Column13<-substrates[,13]
|
|
668 Column14<-substrates[,14]
|
|
669 Column15<-substrates[,15]
|
|
670
|
|
671 spaces1<-sum((Column1%in% ""))
|
|
672 spaces2<-sum(Column2%in% "")
|
|
673 spaces3<-sum(Column3%in% "")
|
|
674 spaces4<-sum(Column4%in% "")
|
|
675 spaces5<-sum(Column5%in% "")
|
|
676 spaces6<-sum(Column6%in% "")
|
|
677 spaces7<-sum(Column7%in% "")
|
|
678 spaces8<-sum(Column8%in% "")
|
|
679 spaces9<-sum(Column9%in% "")
|
|
680 spaces10<-sum(Column10%in% "")
|
|
681 spaces11<-sum(Column11%in% "")
|
|
682 spaces12<-sum(Column12%in% "")
|
|
683 spaces13<-sum(Column13%in% "")
|
|
684 spaces14<-sum(Column14%in% "")
|
|
685 spaces15<-sum(Column15%in% "")
|
|
686
|
|
687 A1<-sum(Column1 %in% "A")
|
|
688 A2<-sum(Column2 %in% "A")
|
|
689 A3<-sum(Column3 %in% "A")
|
|
690 A4<-sum(Column4 %in% "A")
|
|
691 A5<-sum(Column5 %in% "A")
|
|
692 A6<-sum(Column6 %in% "A")
|
|
693 A7<-sum(Column7 %in% "A")
|
|
694 A8<-sum(Column8 %in% "A")
|
|
695 A9<-sum(Column9 %in% "A")
|
|
696 A10<-sum(Column10 %in% "A")
|
|
697 A11<-sum(Column11 %in% "A")
|
|
698 A12<-sum(Column12 %in% "A")
|
|
699 A13<-sum(Column13 %in% "A")
|
|
700 A14<-sum(Column14 %in% "A")
|
|
701 A15<-sum(Column15 %in% "A")
|
|
702 AllAs<-cbind(A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15)
|
|
703
|
|
704 C1<-sum(Column1 %in% "C")
|
|
705 C2<-sum(Column2 %in% "C")
|
|
706 C3<-sum(Column3 %in% "C")
|
|
707 C4<-sum(Column4 %in% "C")
|
|
708 C5<-sum(Column5 %in% "C")
|
|
709 C6<-sum(Column6 %in% "C")
|
|
710 C7<-sum(Column7 %in% "C")
|
|
711 C8<-sum(Column8 %in% "C")
|
|
712 C9<-sum(Column9 %in% "C")
|
|
713 C10<-sum(Column10 %in% "C")
|
|
714 C11<-sum(Column11 %in% "C")
|
|
715 C12<-sum(Column12 %in% "C")
|
|
716 C13<-sum(Column13 %in% "C")
|
|
717 C14<-sum(Column14 %in% "C")
|
|
718 C15<-sum(Column15 %in% "C")
|
|
719 CllCs<-cbind(C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15)
|
|
720
|
|
721 D1<-sum(Column1 %in% "D")
|
|
722 D2<-sum(Column2 %in% "D")
|
|
723 D3<-sum(Column3 %in% "D")
|
|
724 D4<-sum(Column4 %in% "D")
|
|
725 D5<-sum(Column5 %in% "D")
|
|
726 D6<-sum(Column6 %in% "D")
|
|
727 D7<-sum(Column7 %in% "D")
|
|
728 D8<-sum(Column8 %in% "D")
|
|
729 D9<-sum(Column9 %in% "D")
|
|
730 D10<-sum(Column10 %in% "D")
|
|
731 D11<-sum(Column11 %in% "D")
|
|
732 D12<-sum(Column12 %in% "D")
|
|
733 D13<-sum(Column13 %in% "D")
|
|
734 D14<-sum(Column14 %in% "D")
|
|
735 D15<-sum(Column15 %in% "D")
|
|
736 DllDs<-cbind(D1,D2,D3,D4,D5,D6,D7,D8,D9,D10,D11,D12,D13,D14,D15)
|
|
737
|
|
738 E1<-sum(Column1 %in% "E")
|
|
739 E2<-sum(Column2 %in% "E")
|
|
740 E3<-sum(Column3 %in% "E")
|
|
741 E4<-sum(Column4 %in% "E")
|
|
742 E5<-sum(Column5 %in% "E")
|
|
743 E6<-sum(Column6 %in% "E")
|
|
744 E7<-sum(Column7 %in% "E")
|
|
745 E8<-sum(Column8 %in% "E")
|
|
746 E9<-sum(Column9 %in% "E")
|
|
747 E10<-sum(Column10 %in% "E")
|
|
748 E11<-sum(Column11 %in% "E")
|
|
749 E12<-sum(Column12 %in% "E")
|
|
750 E13<-sum(Column13 %in% "E")
|
|
751 E14<-sum(Column14 %in% "E")
|
|
752 E15<-sum(Column15 %in% "E")
|
|
753 EllEs<-cbind(E1,E2,E3,E4,E5,E6,E7,E8,E9,E10,E11,E12,E13,E14,E15)
|
|
754
|
|
755 F1<-sum(Column1 %in% "F")
|
|
756 F2<-sum(Column2 %in% "F")
|
|
757 F3<-sum(Column3 %in% "F")
|
|
758 F4<-sum(Column4 %in% "F")
|
|
759 F5<-sum(Column5 %in% "F")
|
|
760 F6<-sum(Column6 %in% "F")
|
|
761 F7<-sum(Column7 %in% "F")
|
|
762 F8<-sum(Column8 %in% "F")
|
|
763 F9<-sum(Column9 %in% "F")
|
|
764 F10<-sum(Column10 %in% "F")
|
|
765 F11<-sum(Column11 %in% "F")
|
|
766 F12<-sum(Column12 %in% "F")
|
|
767 F13<-sum(Column13 %in% "F")
|
|
768 F14<-sum(Column14 %in% "F")
|
|
769 F15<-sum(Column15 %in% "F")
|
|
770 FllFs<-cbind(F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15)
|
|
771
|
|
772 G1<-sum(Column1 %in% "G")
|
|
773 G2<-sum(Column2 %in% "G")
|
|
774 G3<-sum(Column3 %in% "G")
|
|
775 G4<-sum(Column4 %in% "G")
|
|
776 G5<-sum(Column5 %in% "G")
|
|
777 G6<-sum(Column6 %in% "G")
|
|
778 G7<-sum(Column7 %in% "G")
|
|
779 G8<-sum(Column8 %in% "G")
|
|
780 G9<-sum(Column9 %in% "G")
|
|
781 G10<-sum(Column10 %in% "G")
|
|
782 G11<-sum(Column11 %in% "G")
|
|
783 G12<-sum(Column12 %in% "G")
|
|
784 G13<-sum(Column13 %in% "G")
|
|
785 G14<-sum(Column14 %in% "G")
|
|
786 G15<-sum(Column15 %in% "G")
|
|
787 GllGs<-cbind(G1,G2,G3,G4,G5,G6,G7,G8,G9,G10,G11,G12,G13,G14,G15)
|
|
788
|
|
789 H1<-sum(Column1 %in% "H")
|
|
790 H2<-sum(Column2 %in% "H")
|
|
791 H3<-sum(Column3 %in% "H")
|
|
792 H4<-sum(Column4 %in% "H")
|
|
793 H5<-sum(Column5 %in% "H")
|
|
794 H6<-sum(Column6 %in% "H")
|
|
795 H7<-sum(Column7 %in% "H")
|
|
796 H8<-sum(Column8 %in% "H")
|
|
797 H9<-sum(Column9 %in% "H")
|
|
798 H10<-sum(Column10 %in% "H")
|
|
799 H11<-sum(Column11 %in% "H")
|
|
800 H12<-sum(Column12 %in% "H")
|
|
801 H13<-sum(Column13 %in% "H")
|
|
802 H14<-sum(Column14 %in% "H")
|
|
803 H15<-sum(Column15 %in% "H")
|
|
804 HllHs<-cbind(H1,H2,H3,H4,H5,H6,H7,H8,H9,H10,H11,H12,H13,H14,H15)
|
|
805
|
|
806 I1<-sum(Column1 %in% "I")
|
|
807 I2<-sum(Column2 %in% "I")
|
|
808 I3<-sum(Column3 %in% "I")
|
|
809 I4<-sum(Column4 %in% "I")
|
|
810 I5<-sum(Column5 %in% "I")
|
|
811 I6<-sum(Column6 %in% "I")
|
|
812 I7<-sum(Column7 %in% "I")
|
|
813 I8<-sum(Column8 %in% "I")
|
|
814 I9<-sum(Column9 %in% "I")
|
|
815 I10<-sum(Column10 %in% "I")
|
|
816 I11<-sum(Column11 %in% "I")
|
|
817 I12<-sum(Column12 %in% "I")
|
|
818 I13<-sum(Column13 %in% "I")
|
|
819 I14<-sum(Column14 %in% "I")
|
|
820 I15<-sum(Column15 %in% "I")
|
|
821 IllIs<-cbind(I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,I11,I12,I13,I14,I15)
|
|
822
|
|
823 K1<-sum(Column1 %in% "K")
|
|
824 K2<-sum(Column2 %in% "K")
|
|
825 K3<-sum(Column3 %in% "K")
|
|
826 K4<-sum(Column4 %in% "K")
|
|
827 K5<-sum(Column5 %in% "K")
|
|
828 K6<-sum(Column6 %in% "K")
|
|
829 K7<-sum(Column7 %in% "K")
|
|
830 K8<-sum(Column8 %in% "K")
|
|
831 K9<-sum(Column9 %in% "K")
|
|
832 K10<-sum(Column10 %in% "K")
|
|
833 K11<-sum(Column11 %in% "K")
|
|
834 K12<-sum(Column12 %in% "K")
|
|
835 K13<-sum(Column13 %in% "K")
|
|
836 K14<-sum(Column14 %in% "K")
|
|
837 K15<-sum(Column15 %in% "K")
|
|
838 KllKs<-cbind(K1,K2,K3,K4,K5,K6,K7,K8,K9,K10,K11,K12,K13,K14,K15)
|
|
839
|
|
840 L1<-sum(Column1 %in% "L")
|
|
841 L2<-sum(Column2 %in% "L")
|
|
842 L3<-sum(Column3 %in% "L")
|
|
843 L4<-sum(Column4 %in% "L")
|
|
844 L5<-sum(Column5 %in% "L")
|
|
845 L6<-sum(Column6 %in% "L")
|
|
846 L7<-sum(Column7 %in% "L")
|
|
847 L8<-sum(Column8 %in% "L")
|
|
848 L9<-sum(Column9 %in% "L")
|
|
849 L10<-sum(Column10 %in% "L")
|
|
850 L11<-sum(Column11 %in% "L")
|
|
851 L12<-sum(Column12 %in% "L")
|
|
852 L13<-sum(Column13 %in% "L")
|
|
853 L14<-sum(Column14 %in% "L")
|
|
854 L15<-sum(Column15 %in% "L")
|
|
855 LllLs<-cbind(L1,L2,L3,L4,L5,L6,L7,L8,L9,L10,L11,L12,L13,L14,L15)
|
|
856
|
|
857 M1<-sum(Column1 %in% "M")
|
|
858 M2<-sum(Column2 %in% "M")
|
|
859 M3<-sum(Column3 %in% "M")
|
|
860 M4<-sum(Column4 %in% "M")
|
|
861 M5<-sum(Column5 %in% "M")
|
|
862 M6<-sum(Column6 %in% "M")
|
|
863 M7<-sum(Column7 %in% "M")
|
|
864 M8<-sum(Column8 %in% "M")
|
|
865 M9<-sum(Column9 %in% "M")
|
|
866 M10<-sum(Column10 %in% "M")
|
|
867 M11<-sum(Column11 %in% "M")
|
|
868 M12<-sum(Column12 %in% "M")
|
|
869 M13<-sum(Column13 %in% "M")
|
|
870 M14<-sum(Column14 %in% "M")
|
|
871 M15<-sum(Column15 %in% "M")
|
|
872 MllMs<-cbind(M1,M2,M3,M4,M5,M6,M7,M8,M9,M10,M11,M12,M13,M14,M15)
|
|
873
|
|
874 N1<-sum(Column1 %in% "N")
|
|
875 N2<-sum(Column2 %in% "N")
|
|
876 N3<-sum(Column3 %in% "N")
|
|
877 N4<-sum(Column4 %in% "N")
|
|
878 N5<-sum(Column5 %in% "N")
|
|
879 N6<-sum(Column6 %in% "N")
|
|
880 N7<-sum(Column7 %in% "N")
|
|
881 N8<-sum(Column8 %in% "N")
|
|
882 N9<-sum(Column9 %in% "N")
|
|
883 N10<-sum(Column10 %in% "N")
|
|
884 N11<-sum(Column11 %in% "N")
|
|
885 N12<-sum(Column12 %in% "N")
|
|
886 N13<-sum(Column13 %in% "N")
|
|
887 N14<-sum(Column14 %in% "N")
|
|
888 N15<-sum(Column15 %in% "N")
|
|
889 NllNs<-cbind(N1,N2,N3,N4,N5,N6,N7,N8,N9,N10,N11,N12,N13,N14,N15)
|
|
890
|
|
891 P1<-sum(Column1 %in% "P")
|
|
892 P2<-sum(Column2 %in% "P")
|
|
893 P3<-sum(Column3 %in% "P")
|
|
894 P4<-sum(Column4 %in% "P")
|
|
895 P5<-sum(Column5 %in% "P")
|
|
896 P6<-sum(Column6 %in% "P")
|
|
897 P7<-sum(Column7 %in% "P")
|
|
898 P8<-sum(Column8 %in% "P")
|
|
899 P9<-sum(Column9 %in% "P")
|
|
900 P10<-sum(Column10 %in% "P")
|
|
901 P11<-sum(Column11 %in% "P")
|
|
902 P12<-sum(Column12 %in% "P")
|
|
903 P13<-sum(Column13 %in% "P")
|
|
904 P14<-sum(Column14 %in% "P")
|
|
905 P15<-sum(Column15 %in% "P")
|
|
906 PllPs<-cbind(P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11,P12,P13,P14,P15)
|
|
907
|
|
908 Q1<-sum(Column1 %in% "Q")
|
|
909 Q2<-sum(Column2 %in% "Q")
|
|
910 Q3<-sum(Column3 %in% "Q")
|
|
911 Q4<-sum(Column4 %in% "Q")
|
|
912 Q5<-sum(Column5 %in% "Q")
|
|
913 Q6<-sum(Column6 %in% "Q")
|
|
914 Q7<-sum(Column7 %in% "Q")
|
|
915 Q8<-sum(Column8 %in% "Q")
|
|
916 Q9<-sum(Column9 %in% "Q")
|
|
917 Q10<-sum(Column10 %in% "Q")
|
|
918 Q11<-sum(Column11 %in% "Q")
|
|
919 Q12<-sum(Column12 %in% "Q")
|
|
920 Q13<-sum(Column13 %in% "Q")
|
|
921 Q14<-sum(Column14 %in% "Q")
|
|
922 Q15<-sum(Column15 %in% "Q")
|
|
923 QllQs<-cbind(Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,Q11,Q12,Q13,Q14,Q15)
|
|
924
|
|
925 R1<-sum(Column1 %in% "R")
|
|
926 R2<-sum(Column2 %in% "R")
|
|
927 R3<-sum(Column3 %in% "R")
|
|
928 R4<-sum(Column4 %in% "R")
|
|
929 R5<-sum(Column5 %in% "R")
|
|
930 R6<-sum(Column6 %in% "R")
|
|
931 R7<-sum(Column7 %in% "R")
|
|
932 R8<-sum(Column8 %in% "R")
|
|
933 R9<-sum(Column9 %in% "R")
|
|
934 R10<-sum(Column10 %in% "R")
|
|
935 R11<-sum(Column11 %in% "R")
|
|
936 R12<-sum(Column12 %in% "R")
|
|
937 R13<-sum(Column13 %in% "R")
|
|
938 R14<-sum(Column14 %in% "R")
|
|
939 R15<-sum(Column15 %in% "R")
|
|
940 RllRs<-cbind(R1,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,R13,R14,R15)
|
|
941
|
|
942 S1<-sum(Column1 %in% "S")
|
|
943 S2<-sum(Column2 %in% "S")
|
|
944 S3<-sum(Column3 %in% "S")
|
|
945 S4<-sum(Column4 %in% "S")
|
|
946 S5<-sum(Column5 %in% "S")
|
|
947 S6<-sum(Column6 %in% "S")
|
|
948 S7<-sum(Column7 %in% "S")
|
|
949 S8<-sum(Column8 %in% "S")
|
|
950 S9<-sum(Column9 %in% "S")
|
|
951 S10<-sum(Column10 %in% "S")
|
|
952 S11<-sum(Column11 %in% "S")
|
|
953 S12<-sum(Column12 %in% "S")
|
|
954 S13<-sum(Column13 %in% "S")
|
|
955 S14<-sum(Column14 %in% "S")
|
|
956 S15<-sum(Column15 %in% "S")
|
|
957 SllSs<-cbind(S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,S14,S15)
|
|
958
|
|
959 T1<-sum(Column1 %in% "T")
|
|
960 T2<-sum(Column2 %in% "T")
|
|
961 T3<-sum(Column3 %in% "T")
|
|
962 T4<-sum(Column4 %in% "T")
|
|
963 T5<-sum(Column5 %in% "T")
|
|
964 T6<-sum(Column6 %in% "T")
|
|
965 T7<-sum(Column7 %in% "T")
|
|
966 T8<-sum(Column8 %in% "T")
|
|
967 T9<-sum(Column9 %in% "T")
|
|
968 T10<-sum(Column10 %in% "T")
|
|
969 T11<-sum(Column11 %in% "T")
|
|
970 T12<-sum(Column12 %in% "T")
|
|
971 T13<-sum(Column13 %in% "T")
|
|
972 T14<-sum(Column14 %in% "T")
|
|
973 T15<-sum(Column15 %in% "T")
|
|
974 TllTs<-cbind(T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15)
|
|
975
|
|
976 V1<-sum(Column1 %in% "V")
|
|
977 V2<-sum(Column2 %in% "V")
|
|
978 V3<-sum(Column3 %in% "V")
|
|
979 V4<-sum(Column4 %in% "V")
|
|
980 V5<-sum(Column5 %in% "V")
|
|
981 V6<-sum(Column6 %in% "V")
|
|
982 V7<-sum(Column7 %in% "V")
|
|
983 V8<-sum(Column8 %in% "V")
|
|
984 V9<-sum(Column9 %in% "V")
|
|
985 V10<-sum(Column10 %in% "V")
|
|
986 V11<-sum(Column11 %in% "V")
|
|
987 V12<-sum(Column12 %in% "V")
|
|
988 V13<-sum(Column13 %in% "V")
|
|
989 V14<-sum(Column14 %in% "V")
|
|
990 V15<-sum(Column15 %in% "V")
|
|
991 VllVs<-cbind(V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15)
|
|
992
|
|
993 W1<-sum(Column1 %in% "W")
|
|
994 W2<-sum(Column2 %in% "W")
|
|
995 W3<-sum(Column3 %in% "W")
|
|
996 W4<-sum(Column4 %in% "W")
|
|
997 W5<-sum(Column5 %in% "W")
|
|
998 W6<-sum(Column6 %in% "W")
|
|
999 W7<-sum(Column7 %in% "W")
|
|
1000 W8<-sum(Column8 %in% "W")
|
|
1001 W9<-sum(Column9 %in% "W")
|
|
1002 W10<-sum(Column10 %in% "W")
|
|
1003 W11<-sum(Column11 %in% "W")
|
|
1004 W12<-sum(Column12 %in% "W")
|
|
1005 W13<-sum(Column13 %in% "W")
|
|
1006 W14<-sum(Column14 %in% "W")
|
|
1007 W15<-sum(Column15 %in% "W")
|
|
1008 WllWs<-cbind(W1,W2,W3,W4,W5,W6,W7,W8,W9,W10,W11,W12,W13,W14,W15)
|
|
1009
|
|
1010 Y1<-sum(Column1 %in% "Y")
|
|
1011 Y2<-sum(Column2 %in% "Y")
|
|
1012 Y3<-sum(Column3 %in% "Y")
|
|
1013 Y4<-sum(Column4 %in% "Y")
|
|
1014 Y5<-sum(Column5 %in% "Y")
|
|
1015 Y6<-sum(Column6 %in% "Y")
|
|
1016 Y7<-sum(Column7 %in% "Y")
|
|
1017 Y8<-sum(Column8 %in% "Y")
|
|
1018 Y9<-sum(Column9 %in% "Y")
|
|
1019 Y10<-sum(Column10 %in% "Y")
|
|
1020 Y11<-sum(Column11 %in% "Y")
|
|
1021 Y12<-sum(Column12 %in% "Y")
|
|
1022 Y13<-sum(Column13 %in% "Y")
|
|
1023 Y14<-sum(Column14 %in% "Y")
|
|
1024 Y15<-sum(Column15 %in% "Y")
|
|
1025 YllYs<-cbind(Y1,Y2,Y3,Y4,Y5,Y6,Y7,Y8,Y9,Y10,Y11,Y12,Y13,Y14,Y15)
|
|
1026 PositionTable<-rbind(AllAs,CllCs,DllDs,EllEs,FllFs,GllGs,HllHs,IllIs,KllKs,LllLs,MllMs,NllNs,PllPs,QllQs,RllRs,SllSs,TllTs,VllVs,WllWs,YllYs)
|
|
1027 }
|
|
1028 #endogenous prob matrix is AA position over subbackfreqmean
|
|
1029 dim(PositionTable)
|
|
1030 EPMtable<-PositionTable
|
|
1031 # EPMtable[1,]<-(PositionTable[1,]/(PositionTable[1,]*.01*Amean))
|
|
1032 # EPMtable[2,]<-(PositionTable[2,]/(PositionTable[2,]*.01*Cmean))
|
|
1033 # EPMtable[3,]<-(PositionTable[3,]/(PositionTable[3,]*.01*Dmean))
|
|
1034 # EPMtable[4,]<-(PositionTable[4,]/(PositionTable[4,]*.01*Emean))
|
|
1035 # EPMtable[5,]<-(PositionTable[5,]/(PositionTable[5,]*.01*Fmean))
|
|
1036 # EPMtable[6,]<-(PositionTable[6,]/(PositionTable[6,]*.01*Gmean))
|
|
1037 # EPMtable[7,]<-(PositionTable[7,]/(PositionTable[7,]*.01*Hmean))
|
|
1038 # EPMtable[8,]<-(PositionTable[8,]/(PositionTable[8,]*.01*Imean))
|
|
1039 # EPMtable[9,]<-(PositionTable[9,]/(PositionTable[9,]*.01*Kmean))
|
|
1040 # EPMtable[10,]<-(PositionTable[10,]/(PositionTable[10,]*.01*Lmean))
|
|
1041 # EPMtable[11,]<-(PositionTable[11,]/(PositionTable[11,]*.01*Mmean))
|
|
1042 # EPMtable[12,]<-(PositionTable[12,]/(PositionTable[12,]*.01*Nmean))
|
|
1043 # EPMtable[13,]<-(PositionTable[13,]/(PositionTable[13,]*.01*Pmean))
|
|
1044 # EPMtable[14,]<-(PositionTable[14,]/(PositionTable[14,]*.01*Qmean))
|
|
1045 # EPMtable[15,]<-(PositionTable[15,]/(PositionTable[15,]*.01*Rmean))
|
|
1046 # EPMtable[16,]<-(PositionTable[16,]/(PositionTable[16,]*.01*Smean))
|
|
1047 # EPMtable[17,]<-(PositionTable[17,]/(PositionTable[17,]*.01*Tmean))
|
|
1048 # EPMtable[18,]<-(PositionTable[18,]/(PositionTable[18,]*.01*Vmean))
|
|
1049 # EPMtable[19,]<-(PositionTable[19,]/(PositionTable[19,]*.01*Wmean))
|
|
1050 # EPMtable[20,]<-(PositionTable[20,]/(PositionTable[20,]*.01*Ymean))
|
|
1051
|
|
1052 columns<-c(length(Column1)-sum(Column1==""),
|
|
1053 length(Column2)-sum(Column2==""),
|
|
1054 length(Column3)-sum(Column3==""),
|
|
1055 length(Column4)-sum(Column4==""),
|
|
1056 length(Column5)-sum(Column5==""),
|
|
1057 length(Column6)-sum(Column6==""),
|
|
1058 length(Column7)-sum(Column7==""),
|
|
1059 length(Column8)-sum(Column8==""),
|
|
1060 length(Column9)-sum(Column9==""),
|
|
1061 length(Column10)-sum(Column10==""),
|
|
1062 length(Column11)-sum(Column11==""),
|
|
1063 length(Column12)-sum(Column12==""),
|
|
1064 length(Column13)-sum(Column13==""),
|
|
1065 length(Column14)-sum(Column14==""),
|
|
1066 length(Column15)-sum(Column15==""))
|
|
1067
|
|
1068 for (z in 1:15) {
|
|
1069 for (y in 1:20) {
|
|
1070 if (PositionTable[y,z]>0){
|
|
1071 EPMtable[y,z]<-PositionTable[y,z]/((columns[z]*.01*AllMeans[y]))
|
|
1072 }
|
|
1073 if (PositionTable[y,z]==0){
|
|
1074 EPMtable[y,z]<-(1/columns[z])/((columns[z]*.01*AllMeans[y]))
|
|
1075 }
|
|
1076 }
|
|
1077 }
|
|
1078 #here I created the endogenous probability matrix
|
|
1079 #now all I need to do is make the program automatically determine which SDs are >2, and then make it perform screener and sorter on those SDs
|
|
1080
|
|
1081
|
|
1082
|
|
1083
|
|
1084
|
|
1085 # write.xlsx(SDtable,file=FILENAME, sheetName = "Standard Deviation Table",col.names = FALSE,row.names = FALSE,append = TRUE)
|
|
1086 # write.xlsx(PercentTable,file = FILENAME,sheetName = "Percent Table",col.names = FALSE,row.names = FALSE,append = TRUE)
|
|
1087 # write.xlsx(SelectivitySheet,file = FILENAME,sheetName = "Site Selectivity",col.names = FALSE,row.names = FALSE,append = TRUE)
|
|
1088 # write.xlsx(EPMtable,file=FILENAME,sheetName = "Endogenous Probability Matrix",col.names = FALSE,row.names = FALSE,append = TRUE)
|
|
1089 # write.xlsx(NormalizationScore,file = FILENAME,sheetName = "Normalization Score",col.names = FALSE,row.names = FALSE,append = TRUE)
|
|
1090
|
|
1091 NormalizationScore<-c("Normalization Score",NormalizationScore)
|
|
1092
|
|
1093 write.table(x=c("SD Table"),file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)
|
|
1094 write.table(SDtable,file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)
|
|
1095 write.table(x=c("Percent Table"),file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)
|
|
1096 write.table(PercentTable,file=FILENAME, append = TRUE,sep=",",row.names = FALSE, col.names = FALSE)
|
|
1097
|
|
1098 EPMtableu<-EPMtable
|
|
1099 HeaderSD<-c(-7:7)
|
|
1100 EPMtableu<-rbind(HeaderSD,EPMtableu)
|
|
1101 EPMtableu<-data.frame(SetOfAAs,EPMtableu)
|
|
1102
|
|
1103 write.table("Site Selectivity Matrix", file = FILENAME2, append = TRUE, sep = ",", row.names = FALSE, col.names = FALSE)
|
|
1104 SelectivityHeader=matrix(data = c("Position",-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7),nrow = 1)
|
|
1105 head<-matrix(data=rep(" ",times=16),nrow = 1)
|
|
1106 SelectivityHeader<-rbind(head,SelectivityHeader)
|
|
1107
|
|
1108 write.table(SelectivityHeader, file = FILENAME2, append = TRUE, sep = ",", row.names = FALSE, col.names = FALSE)
|
|
1109 #colnames(SelectivitySheet)<-c("-7","-6","-5","-4","-3","-2","-1","0","1","2","3","4","5","6","7")
|
|
1110 write.table(SelectivitySheet,file = FILENAME2, append = TRUE,sep = ",",row.names = TRUE, col.names = FALSE)
|
|
1111 write.table(x=c("Endogenous Probability Matrix"),file=FILENAME2,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)
|
|
1112 write.table(EPMtableu,file = FILENAME2, append = TRUE,sep = ",",row.names = FALSE, col.names = FALSE)
|
|
1113 write.table(NormalizationScore, file = FILENAME2, append = TRUE,sep = ",",row.names = FALSE, col.names = FALSE)
|