# HG changeset patch # User jfb # Date 1517955190 18000 # Node ID f1bbd121dfb7d227baa9985681d8446b0119bfde # Parent c0de8ed09b7d95fd3ba6961c3004a539fd491880 Uploaded diff -r c0de8ed09b7d -r f1bbd121dfb7 Kinatest-R_part1.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Kinatest-R_part1.R Tue Feb 06 17:13:10 2018 -0500 @@ -0,0 +1,1113 @@ +this.dir <- dirname(parent.frame(2)$ofile) +setwd(this.dir) + + +ImportedSubstrateList<- read.csv(input1, stringsAsFactors=FALSE) +NegativeSubstrateList<- read.csv(input2, stringsAsFactors=FALSE) +SubstrateBackgroundFrequency<- read.csv(input3, stringsAsFactors=FALSE) + +ScreenerFilename<-screener + + + +FILENAME<-"output1" +FILENAME2<-"output2" +FILENAME3<-"output3" + + + + + + + + + + + + + + + + + + + + + + + + +OutputMatrix<-"KinaseMatrix.csv" +CharacterizationTable<-"CharacterizationTableForThisKinase.csv" +SDtable<-"SDtableforthisKinase" +SiteSelectivityTable<-"SiteSelectivityForThisKinase" + + + +substrates<-matrix(rep("A",times=((nrow(ImportedSubstrateList)-1)*15)),ncol = 15) +#SeqsToBeScored<-"asdasd" + +for (i in 2:nrow(ImportedSubstrateList)) +{ + substratemotif<-ImportedSubstrateList[i,4:18] + substratemotif[8]<-"Y" + #substratemotif<-paste(substratemotif,sep = "",collapse = "") + j=i-1 + substratemotif<-unlist(substratemotif) + substrates[j,1:15]<-substratemotif +} + +# SpacesToOs<-c(""="O",) +# substrates<-SpacesToOs[substrates] + +SubstrateBackgroundFrequency[nrow(SubstrateBackgroundFrequency),2] + +if(2==2){ +Amean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),2]), na.rm=TRUE) +Cmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),3]), na.rm=TRUE) +Dmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),4]), na.rm=TRUE) +Emean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),5]), na.rm=TRUE) +Fmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),6]), na.rm=TRUE) +Gmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),7]), na.rm=TRUE) +Hmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),8]), na.rm=TRUE) +Imean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),9]), na.rm=TRUE) +Kmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),10]), na.rm=TRUE) +Lmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),11]), na.rm=TRUE) +Mmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),12]), na.rm=TRUE) +Nmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),13]), na.rm=TRUE) +Pmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),14]), na.rm=TRUE) +Qmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),15]), na.rm=TRUE) +Rmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),16]), na.rm=TRUE) +Smean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),17]), na.rm=TRUE) +Tmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),18]), na.rm=TRUE) +Vmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),19]), na.rm=TRUE) +Wmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),20]), na.rm=TRUE) +Ymean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),21]), na.rm=TRUE) + +AllMeans<-c(Amean,Cmean,Dmean,Emean,Fmean,Gmean,Hmean,Imean,Kmean,Lmean,Mmean,Nmean,Pmean,Qmean,Rmean,Smean,Tmean,Vmean,Wmean,Ymean) + +Asd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),2]), na.rm=TRUE) +Csd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),3]), na.rm=TRUE) +Dsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),4]), na.rm=TRUE) +Esd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),5]), na.rm=TRUE) +Fsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),6]), na.rm=TRUE) +Gsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),7]), na.rm=TRUE) +Hsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),8]), na.rm=TRUE) +Isd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),9]), na.rm=TRUE) +Ksd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),10]), na.rm=TRUE) +Lsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),11]), na.rm=TRUE) +Msd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),12]), na.rm=TRUE) +Nsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),13]), na.rm=TRUE) +Psd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),14]), na.rm=TRUE) +Qsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),15]), na.rm=TRUE) +Rsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),16]), na.rm=TRUE) +Ssd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),17]), na.rm=TRUE) +Tsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),18]), na.rm=TRUE) +Vsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),19]), na.rm=TRUE) +Wsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),20]), na.rm=TRUE) +Ysd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),21]), na.rm=TRUE) +} +AllSDs<-c(Asd,Csd,Dsd,Esd,Fsd,Gsd,Hsd,Isd,Ksd,Lsd,Msd,Nsd,Psd,Qsd,Rsd,Ssd,Tsd,Vsd,Wsd,Ysd) +#this is subbackfreq SDs + +SBF_statisticalvalues<-cbind(AllMeans,AllSDs) + +#create the percent table +if (1==1){ + Column1<-substrates[,1] + Column2<-substrates[,2] + Column3<-substrates[,3] + Column4<-substrates[,4] + Column5<-substrates[,5] + Column6<-substrates[,6] + Column7<-substrates[,7] + Column8<-substrates[,8] + Column9<-substrates[,9] + Column10<-substrates[,10] + Column11<-substrates[,11] + Column12<-substrates[,12] + Column13<-substrates[,13] + Column14<-substrates[,14] + Column15<-substrates[,15] + + spaces1<-sum((Column1%in% "")) + spaces2<-sum(Column2%in% "") + spaces3<-sum(Column3%in% "") + spaces4<-sum(Column4%in% "") + spaces5<-sum(Column5%in% "") + spaces6<-sum(Column6%in% "") + spaces7<-sum(Column7%in% "") + spaces8<-sum(Column8%in% "") + spaces9<-sum(Column9%in% "") + spaces10<-sum(Column10%in% "") + spaces11<-sum(Column11%in% "") + spaces12<-sum(Column12%in% "") + spaces13<-sum(Column13%in% "") + spaces14<-sum(Column14%in% "") + spaces15<-sum(Column15%in% "") + + A1<-sum(Column1 %in% "A")/(length(Column1)-spaces1) + A2<-sum(Column2 %in% "A")/(length(Column2)-spaces2) + A3<-sum(Column3 %in% "A")/(length(Column3)-spaces3) + A4<-sum(Column4 %in% "A")/(length(Column4)-spaces4) + A5<-sum(Column5 %in% "A")/(length(Column5)-spaces5) + A6<-sum(Column6 %in% "A")/(length(Column6)-spaces6) + A7<-sum(Column7 %in% "A")/(length(Column7)-spaces7) + A8<-sum(Column8 %in% "A")/(length(Column8)-spaces8) + A9<-sum(Column9 %in% "A")/(length(Column9)-spaces9) + A10<-sum(Column10 %in% "A")/(length(Column10)-spaces10) + A11<-sum(Column11 %in% "A")/(length(Column11)-spaces11) + A12<-sum(Column12 %in% "A")/(length(Column12)-spaces12) + A13<-sum(Column13 %in% "A")/(length(Column13)-spaces13) + A14<-sum(Column14 %in% "A")/(length(Column14)-spaces14) + A15<-sum(Column15 %in% "A")/(length(Column15)-spaces15) + AllAs<-cbind(A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15) + + C1<-sum(Column1 %in% "C")/(length(Column1)-spaces1) + C2<-sum(Column2 %in% "C")/(length(Column2)-spaces2) + C3<-sum(Column3 %in% "C")/(length(Column3)-spaces3) + C4<-sum(Column4 %in% "C")/(length(Column4)-spaces4) + C5<-sum(Column5 %in% "C")/(length(Column5)-spaces5) + C6<-sum(Column6 %in% "C")/(length(Column6)-spaces6) + C7<-sum(Column7 %in% "C")/(length(Column7)-spaces7) + C8<-sum(Column8 %in% "C")/(length(Column8)-spaces8) + C9<-sum(Column9 %in% "C")/(length(Column9)-spaces9) + C10<-sum(Column10 %in% "C")/(length(Column10)-spaces10) + C11<-sum(Column11 %in% "C")/(length(Column11)-spaces11) + C12<-sum(Column12 %in% "C")/(length(Column12)-spaces12) + C13<-sum(Column13 %in% "C")/(length(Column13)-spaces13) + C14<-sum(Column14 %in% "C")/(length(Column14)-spaces14) + C15<-sum(Column15 %in% "C")/(length(Column15)-spaces15) + CllCs<-cbind(C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15) + + D1<-sum(Column1 %in% "D")/(length(Column1)-spaces1) + D2<-sum(Column2 %in% "D")/(length(Column2)-spaces2) + D3<-sum(Column3 %in% "D")/(length(Column3)-spaces3) + D4<-sum(Column4 %in% "D")/(length(Column4)-spaces4) + D5<-sum(Column5 %in% "D")/(length(Column5)-spaces5) + D6<-sum(Column6 %in% "D")/(length(Column6)-spaces6) + D7<-sum(Column7 %in% "D")/(length(Column7)-spaces7) + D8<-sum(Column8 %in% "D")/(length(Column8)-spaces8) + D9<-sum(Column9 %in% "D")/(length(Column9)-spaces9) + D10<-sum(Column10 %in% "D")/(length(Column10)-spaces10) + D11<-sum(Column11 %in% "D")/(length(Column11)-spaces11) + D12<-sum(Column12 %in% "D")/(length(Column12)-spaces12) + D13<-sum(Column13 %in% "D")/(length(Column13)-spaces13) + D14<-sum(Column14 %in% "D")/(length(Column14)-spaces14) + D15<-sum(Column15 %in% "D")/(length(Column15)-spaces15) + DllDs<-cbind(D1,D2,D3,D4,D5,D6,D7,D8,D9,D10,D11,D12,D13,D14,D15) + + E1<-sum(Column1 %in% "E")/(length(Column1)-spaces1) + E2<-sum(Column2 %in% "E")/(length(Column2)-spaces2) + E3<-sum(Column3 %in% "E")/(length(Column3)-spaces3) + E4<-sum(Column4 %in% "E")/(length(Column4)-spaces4) + E5<-sum(Column5 %in% "E")/(length(Column5)-spaces5) + E6<-sum(Column6 %in% "E")/(length(Column6)-spaces6) + E7<-sum(Column7 %in% "E")/(length(Column7)-spaces7) + E8<-sum(Column8 %in% "E")/(length(Column8)-spaces8) + E9<-sum(Column9 %in% "E")/(length(Column9)-spaces9) + E10<-sum(Column10 %in% "E")/(length(Column10)-spaces10) + E11<-sum(Column11 %in% "E")/(length(Column11)-spaces11) + E12<-sum(Column12 %in% "E")/(length(Column12)-spaces12) + E13<-sum(Column13 %in% "E")/(length(Column13)-spaces13) + E14<-sum(Column14 %in% "E")/(length(Column14)-spaces14) + E15<-sum(Column15 %in% "E")/(length(Column15)-spaces15) + EllEs<-cbind(E1,E2,E3,E4,E5,E6,E7,E8,E9,E10,E11,E12,E13,E14,E15) + + + F1<-sum(Column1 %in% "F")/(length(Column1)-spaces1) + F2<-sum(Column2 %in% "F")/(length(Column2)-spaces2) + F3<-sum(Column3 %in% "F")/(length(Column3)-spaces3) + F4<-sum(Column4 %in% "F")/(length(Column4)-spaces4) + F5<-sum(Column5 %in% "F")/(length(Column5)-spaces5) + F6<-sum(Column6 %in% "F")/(length(Column6)-spaces6) + F7<-sum(Column7 %in% "F")/(length(Column7)-spaces7) + F8<-sum(Column8 %in% "F")/(length(Column8)-spaces8) + F9<-sum(Column9 %in% "F")/(length(Column9)-spaces9) + F10<-sum(Column10 %in% "F")/(length(Column10)-spaces10) + F11<-sum(Column11 %in% "F")/(length(Column11)-spaces11) + F12<-sum(Column12 %in% "F")/(length(Column12)-spaces12) + F13<-sum(Column13 %in% "F")/(length(Column13)-spaces13) + F14<-sum(Column14 %in% "F")/(length(Column14)-spaces14) + F15<-sum(Column15 %in% "F")/(length(Column15)-spaces15) + FllFs<-cbind(F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15) + + + G1<-sum(Column1 %in% "G")/(length(Column1)-spaces1) + G2<-sum(Column2 %in% "G")/(length(Column2)-spaces2) + G3<-sum(Column3 %in% "G")/(length(Column3)-spaces3) + G4<-sum(Column4 %in% "G")/(length(Column4)-spaces4) + G5<-sum(Column5 %in% "G")/(length(Column5)-spaces5) + G6<-sum(Column6 %in% "G")/(length(Column6)-spaces6) + G7<-sum(Column7 %in% "G")/(length(Column7)-spaces7) + G8<-sum(Column8 %in% "G")/(length(Column8)-spaces8) + G9<-sum(Column9 %in% "G")/(length(Column9)-spaces9) + G10<-sum(Column10 %in% "G")/(length(Column10)-spaces10) + G11<-sum(Column11 %in% "G")/(length(Column11)-spaces11) + G12<-sum(Column12 %in% "G")/(length(Column12)-spaces12) + G13<-sum(Column13 %in% "G")/(length(Column13)-spaces13) + G14<-sum(Column14 %in% "G")/(length(Column14)-spaces14) + G15<-sum(Column15 %in% "G")/(length(Column15)-spaces15) + GllGs<-cbind(G1,G2,G3,G4,G5,G6,G7,G8,G9,G10,G11,G12,G13,G14,G15) + + + H1<-sum(Column1 %in% "H")/(length(Column1)-spaces1) + H2<-sum(Column2 %in% "H")/(length(Column2)-spaces2) + H3<-sum(Column3 %in% "H")/(length(Column3)-spaces3) + H4<-sum(Column4 %in% "H")/(length(Column4)-spaces4) + H5<-sum(Column5 %in% "H")/(length(Column5)-spaces5) + H6<-sum(Column6 %in% "H")/(length(Column6)-spaces6) + H7<-sum(Column7 %in% "H")/(length(Column7)-spaces7) + H8<-sum(Column8 %in% "H")/(length(Column8)-spaces8) + H9<-sum(Column9 %in% "H")/(length(Column9)-spaces9) + H10<-sum(Column10 %in% "H")/(length(Column10)-spaces10) + H11<-sum(Column11 %in% "H")/(length(Column11)-spaces11) + H12<-sum(Column12 %in% "H")/(length(Column12)-spaces12) + H13<-sum(Column13 %in% "H")/(length(Column13)-spaces13) + H14<-sum(Column14 %in% "H")/(length(Column14)-spaces14) + H15<-sum(Column15 %in% "H")/(length(Column15)-spaces15) + HllHs<-cbind(H1,H2,H3,H4,H5,H6,H7,H8,H9,H10,H11,H12,H13,H14,H15) + + + I1<-sum(Column1 %in% "I")/(length(Column1)-spaces1) + I2<-sum(Column2 %in% "I")/(length(Column2)-spaces2) + I3<-sum(Column3 %in% "I")/(length(Column3)-spaces3) + I4<-sum(Column4 %in% "I")/(length(Column4)-spaces4) + I5<-sum(Column5 %in% "I")/(length(Column5)-spaces5) + I6<-sum(Column6 %in% "I")/(length(Column6)-spaces6) + I7<-sum(Column7 %in% "I")/(length(Column7)-spaces7) + I8<-sum(Column8 %in% "I")/(length(Column8)-spaces8) + I9<-sum(Column9 %in% "I")/(length(Column9)-spaces9) + I10<-sum(Column10 %in% "I")/(length(Column10)-spaces10) + I11<-sum(Column11 %in% "I")/(length(Column11)-spaces11) + I12<-sum(Column12 %in% "I")/(length(Column12)-spaces12) + I13<-sum(Column13 %in% "I")/(length(Column13)-spaces13) + I14<-sum(Column14 %in% "I")/(length(Column14)-spaces14) + I15<-sum(Column15 %in% "I")/(length(Column15)-spaces15) + IllIs<-cbind(I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,I11,I12,I13,I14,I15) + + + K1<-sum(Column1 %in% "K")/(length(Column1)-spaces1) + K2<-sum(Column2 %in% "K")/(length(Column2)-spaces2) + K3<-sum(Column3 %in% "K")/(length(Column3)-spaces3) + K4<-sum(Column4 %in% "K")/(length(Column4)-spaces4) + K5<-sum(Column5 %in% "K")/(length(Column5)-spaces5) + K6<-sum(Column6 %in% "K")/(length(Column6)-spaces6) + K7<-sum(Column7 %in% "K")/(length(Column7)-spaces7) + K8<-sum(Column8 %in% "K")/(length(Column8)-spaces8) + K9<-sum(Column9 %in% "K")/(length(Column9)-spaces9) + K10<-sum(Column10 %in% "K")/(length(Column10)-spaces10) + K11<-sum(Column11 %in% "K")/(length(Column11)-spaces11) + K12<-sum(Column12 %in% "K")/(length(Column12)-spaces12) + K13<-sum(Column13 %in% "K")/(length(Column13)-spaces13) + K14<-sum(Column14 %in% "K")/(length(Column14)-spaces14) + K15<-sum(Column15 %in% "K")/(length(Column15)-spaces15) + KllKs<-cbind(K1,K2,K3,K4,K5,K6,K7,K8,K9,K10,K11,K12,K13,K14,K15) + + + L1<-sum(Column1 %in% "L")/(length(Column1)-spaces1) + L2<-sum(Column2 %in% "L")/(length(Column2)-spaces2) + L3<-sum(Column3 %in% "L")/(length(Column3)-spaces3) + L4<-sum(Column4 %in% "L")/(length(Column4)-spaces4) + L5<-sum(Column5 %in% "L")/(length(Column5)-spaces5) + L6<-sum(Column6 %in% "L")/(length(Column6)-spaces6) + L7<-sum(Column7 %in% "L")/(length(Column7)-spaces7) + L8<-sum(Column8 %in% "L")/(length(Column8)-spaces8) + L9<-sum(Column9 %in% "L")/(length(Column9)-spaces9) + L10<-sum(Column10 %in% "L")/(length(Column10)-spaces10) + L11<-sum(Column11 %in% "L")/(length(Column11)-spaces11) + L12<-sum(Column12 %in% "L")/(length(Column12)-spaces12) + L13<-sum(Column13 %in% "L")/(length(Column13)-spaces13) + L14<-sum(Column14 %in% "L")/(length(Column14)-spaces14) + L15<-sum(Column15 %in% "L")/(length(Column15)-spaces15) + LllLs<-cbind(L1,L2,L3,L4,L5,L6,L7,L8,L9,L10,L11,L12,L13,L14,L15) + + + M1<-sum(Column1 %in% "M")/(length(Column1)-spaces1) + M2<-sum(Column2 %in% "M")/(length(Column2)-spaces2) + M3<-sum(Column3 %in% "M")/(length(Column3)-spaces3) + M4<-sum(Column4 %in% "M")/(length(Column4)-spaces4) + M5<-sum(Column5 %in% "M")/(length(Column5)-spaces5) + M6<-sum(Column6 %in% "M")/(length(Column6)-spaces6) + M7<-sum(Column7 %in% "M")/(length(Column7)-spaces7) + M8<-sum(Column8 %in% "M")/(length(Column8)-spaces8) + M9<-sum(Column9 %in% "M")/(length(Column9)-spaces9) + M10<-sum(Column10 %in% "M")/(length(Column10)-spaces10) + M11<-sum(Column11 %in% "M")/(length(Column11)-spaces11) + M12<-sum(Column12 %in% "M")/(length(Column12)-spaces12) + M13<-sum(Column13 %in% "M")/(length(Column13)-spaces13) + M14<-sum(Column14 %in% "M")/(length(Column14)-spaces14) + M15<-sum(Column15 %in% "M")/(length(Column15)-spaces15) + MllMs<-cbind(M1,M2,M3,M4,M5,M6,M7,M8,M9,M10,M11,M12,M13,M14,M15) + + + N1<-sum(Column1 %in% "N")/(length(Column1)-spaces1) + N2<-sum(Column2 %in% "N")/(length(Column2)-spaces2) + N3<-sum(Column3 %in% "N")/(length(Column3)-spaces3) + N4<-sum(Column4 %in% "N")/(length(Column4)-spaces4) + N5<-sum(Column5 %in% "N")/(length(Column5)-spaces5) + N6<-sum(Column6 %in% "N")/(length(Column6)-spaces6) + N7<-sum(Column7 %in% "N")/(length(Column7)-spaces7) + N8<-sum(Column8 %in% "N")/(length(Column8)-spaces8) + N9<-sum(Column9 %in% "N")/(length(Column9)-spaces9) + N10<-sum(Column10 %in% "N")/(length(Column10)-spaces10) + N11<-sum(Column11 %in% "N")/(length(Column11)-spaces11) + N12<-sum(Column12 %in% "N")/(length(Column12)-spaces12) + N13<-sum(Column13 %in% "N")/(length(Column13)-spaces13) + N14<-sum(Column14 %in% "N")/(length(Column14)-spaces14) + N15<-sum(Column15 %in% "N")/(length(Column15)-spaces15) + NllNs<-cbind(N1,N2,N3,N4,N5,N6,N7,N8,N9,N10,N11,N12,N13,N14,N15) + + + P1<-sum(Column1 %in% "P")/(length(Column1)-spaces1) + P2<-sum(Column2 %in% "P")/(length(Column2)-spaces2) + P3<-sum(Column3 %in% "P")/(length(Column3)-spaces3) + P4<-sum(Column4 %in% "P")/(length(Column4)-spaces4) + P5<-sum(Column5 %in% "P")/(length(Column5)-spaces5) + P6<-sum(Column6 %in% "P")/(length(Column6)-spaces6) + P7<-sum(Column7 %in% "P")/(length(Column7)-spaces7) + P8<-sum(Column8 %in% "P")/(length(Column8)-spaces8) + P9<-sum(Column9 %in% "P")/(length(Column9)-spaces9) + P10<-sum(Column10 %in% "P")/(length(Column10)-spaces10) + P11<-sum(Column11 %in% "P")/(length(Column11)-spaces11) + P12<-sum(Column12 %in% "P")/(length(Column12)-spaces12) + P13<-sum(Column13 %in% "P")/(length(Column13)-spaces13) + P14<-sum(Column14 %in% "P")/(length(Column14)-spaces14) + P15<-sum(Column15 %in% "P")/(length(Column15)-spaces15) + PllPs<-cbind(P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11,P12,P13,P14,P15) + + + Q1<-sum(Column1 %in% "Q")/(length(Column1)-spaces1) + Q2<-sum(Column2 %in% "Q")/(length(Column2)-spaces2) + Q3<-sum(Column3 %in% "Q")/(length(Column3)-spaces3) + Q4<-sum(Column4 %in% "Q")/(length(Column4)-spaces4) + Q5<-sum(Column5 %in% "Q")/(length(Column5)-spaces5) + Q6<-sum(Column6 %in% "Q")/(length(Column6)-spaces6) + Q7<-sum(Column7 %in% "Q")/(length(Column7)-spaces7) + Q8<-sum(Column8 %in% "Q")/(length(Column8)-spaces8) + Q9<-sum(Column9 %in% "Q")/(length(Column9)-spaces9) + Q10<-sum(Column10 %in% "Q")/(length(Column10)-spaces10) + Q11<-sum(Column11 %in% "Q")/(length(Column11)-spaces11) + Q12<-sum(Column12 %in% "Q")/(length(Column12)-spaces12) + Q13<-sum(Column13 %in% "Q")/(length(Column13)-spaces13) + Q14<-sum(Column14 %in% "Q")/(length(Column14)-spaces14) + Q15<-sum(Column15 %in% "Q")/(length(Column15)-spaces15) + QllQs<-cbind(Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,Q11,Q12,Q13,Q14,Q15) + + + R1<-sum(Column1 %in% "R")/(length(Column1)-spaces1) + R2<-sum(Column2 %in% "R")/(length(Column2)-spaces2) + R3<-sum(Column3 %in% "R")/(length(Column3)-spaces3) + R4<-sum(Column4 %in% "R")/(length(Column4)-spaces4) + R5<-sum(Column5 %in% "R")/(length(Column5)-spaces5) + R6<-sum(Column6 %in% "R")/(length(Column6)-spaces6) + R7<-sum(Column7 %in% "R")/(length(Column7)-spaces7) + R8<-sum(Column8 %in% "R")/(length(Column8)-spaces8) + R9<-sum(Column9 %in% "R")/(length(Column9)-spaces9) + R10<-sum(Column10 %in% "R")/(length(Column10)-spaces10) + R11<-sum(Column11 %in% "R")/(length(Column11)-spaces11) + R12<-sum(Column12 %in% "R")/(length(Column12)-spaces12) + R13<-sum(Column13 %in% "R")/(length(Column13)-spaces13) + R14<-sum(Column14 %in% "R")/(length(Column14)-spaces14) + R15<-sum(Column15 %in% "R")/(length(Column15)-spaces15) + RllRs<-cbind(R1,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,R13,R14,R15) + + + S1<-sum(Column1 %in% "S")/(length(Column1)-spaces1) + S2<-sum(Column2 %in% "S")/(length(Column2)-spaces2) + S3<-sum(Column3 %in% "S")/(length(Column3)-spaces3) + S4<-sum(Column4 %in% "S")/(length(Column4)-spaces4) + S5<-sum(Column5 %in% "S")/(length(Column5)-spaces5) + S6<-sum(Column6 %in% "S")/(length(Column6)-spaces6) + S7<-sum(Column7 %in% "S")/(length(Column7)-spaces7) + S8<-sum(Column8 %in% "S")/(length(Column8)-spaces8) + S9<-sum(Column9 %in% "S")/(length(Column9)-spaces9) + S10<-sum(Column10 %in% "S")/(length(Column10)-spaces10) + S11<-sum(Column11 %in% "S")/(length(Column11)-spaces11) + S12<-sum(Column12 %in% "S")/(length(Column12)-spaces12) + S13<-sum(Column13 %in% "S")/(length(Column13)-spaces13) + S14<-sum(Column14 %in% "S")/(length(Column14)-spaces14) + S15<-sum(Column15 %in% "S")/(length(Column15)-spaces15) + SllSs<-cbind(S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,S14,S15) + + + T1<-sum(Column1 %in% "T")/(length(Column1)-spaces1) + T2<-sum(Column2 %in% "T")/(length(Column2)-spaces2) + T3<-sum(Column3 %in% "T")/(length(Column3)-spaces3) + T4<-sum(Column4 %in% "T")/(length(Column4)-spaces4) + T5<-sum(Column5 %in% "T")/(length(Column5)-spaces5) + T6<-sum(Column6 %in% "T")/(length(Column6)-spaces6) + T7<-sum(Column7 %in% "T")/(length(Column7)-spaces7) + T8<-sum(Column8 %in% "T")/(length(Column8)-spaces8) + T9<-sum(Column9 %in% "T")/(length(Column9)-spaces9) + T10<-sum(Column10 %in% "T")/(length(Column10)-spaces10) + T11<-sum(Column11 %in% "T")/(length(Column11)-spaces11) + T12<-sum(Column12 %in% "T")/(length(Column12)-spaces12) + T13<-sum(Column13 %in% "T")/(length(Column13)-spaces13) + T14<-sum(Column14 %in% "T")/(length(Column14)-spaces14) + T15<-sum(Column15 %in% "T")/(length(Column15)-spaces15) + TllTs<-cbind(T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15) + + + V1<-sum(Column1 %in% "V")/(length(Column1)-spaces1) + V2<-sum(Column2 %in% "V")/(length(Column2)-spaces2) + V3<-sum(Column3 %in% "V")/(length(Column3)-spaces3) + V4<-sum(Column4 %in% "V")/(length(Column4)-spaces4) + V5<-sum(Column5 %in% "V")/(length(Column5)-spaces5) + V6<-sum(Column6 %in% "V")/(length(Column6)-spaces6) + V7<-sum(Column7 %in% "V")/(length(Column7)-spaces7) + V8<-sum(Column8 %in% "V")/(length(Column8)-spaces8) + V9<-sum(Column9 %in% "V")/(length(Column9)-spaces9) + V10<-sum(Column10 %in% "V")/(length(Column10)-spaces10) + V11<-sum(Column11 %in% "V")/(length(Column11)-spaces11) + V12<-sum(Column12 %in% "V")/(length(Column12)-spaces12) + V13<-sum(Column13 %in% "V")/(length(Column13)-spaces13) + V14<-sum(Column14 %in% "V")/(length(Column14)-spaces14) + V15<-sum(Column15 %in% "V")/(length(Column15)-spaces15) + VllVs<-cbind(V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15) + + + W1<-sum(Column1 %in% "W")/(length(Column1)-spaces1) + W2<-sum(Column2 %in% "W")/(length(Column2)-spaces2) + W3<-sum(Column3 %in% "W")/(length(Column3)-spaces3) + W4<-sum(Column4 %in% "W")/(length(Column4)-spaces4) + W5<-sum(Column5 %in% "W")/(length(Column5)-spaces5) + W6<-sum(Column6 %in% "W")/(length(Column6)-spaces6) + W7<-sum(Column7 %in% "W")/(length(Column7)-spaces7) + W8<-sum(Column8 %in% "W")/(length(Column8)-spaces8) + W9<-sum(Column9 %in% "W")/(length(Column9)-spaces9) + W10<-sum(Column10 %in% "W")/(length(Column10)-spaces10) + W11<-sum(Column11 %in% "W")/(length(Column11)-spaces11) + W12<-sum(Column12 %in% "W")/(length(Column12)-spaces12) + W13<-sum(Column13 %in% "W")/(length(Column13)-spaces13) + W14<-sum(Column14 %in% "W")/(length(Column14)-spaces14) + W15<-sum(Column15 %in% "W")/(length(Column15)-spaces15) + WllWs<-cbind(W1,W2,W3,W4,W5,W6,W7,W8,W9,W10,W11,W12,W13,W14,W15) + + + Y1<-sum(Column1 %in% "Y")/(length(Column1)-spaces1) + Y2<-sum(Column2 %in% "Y")/(length(Column2)-spaces2) + Y3<-sum(Column3 %in% "Y")/(length(Column3)-spaces3) + Y4<-sum(Column4 %in% "Y")/(length(Column4)-spaces4) + Y5<-sum(Column5 %in% "Y")/(length(Column5)-spaces5) + Y6<-sum(Column6 %in% "Y")/(length(Column6)-spaces6) + Y7<-sum(Column7 %in% "Y")/(length(Column7)-spaces7) + Y8<-sum(Column8 %in% "Y")/(length(Column8)-spaces8) + Y9<-sum(Column9 %in% "Y")/(length(Column9)-spaces9) + Y10<-sum(Column10 %in% "Y")/(length(Column10)-spaces10) + Y11<-sum(Column11 %in% "Y")/(length(Column11)-spaces11) + Y12<-sum(Column12 %in% "Y")/(length(Column12)-spaces12) + Y13<-sum(Column13 %in% "Y")/(length(Column13)-spaces13) + Y14<-sum(Column14 %in% "Y")/(length(Column14)-spaces14) + Y15<-sum(Column15 %in% "Y")/(length(Column15)-spaces15) + YllYs<-cbind(Y1,Y2,Y3,Y4,Y5,Y6,Y7,Y8,Y9,Y10,Y11,Y12,Y13,Y14,Y15) +} +#this is substrate percents + +#A C D E F G H I K L N P Q R S T V W Y + +PercentTable<-rbind(AllAs,CllCs,DllDs,EllEs,FllFs,GllGs,HllHs,IllIs,KllKs,LllLs,MllMs,NllNs,PllPs,QllQs,RllRs,SllSs,TllTs,VllVs,WllWs,YllYs) +PercentTable<-PercentTable*100 + +#create the SD table +SDtable<-matrix(data = rep(1,times=(nrow(PercentTable)*ncol(PercentTable))),nrow = nrow(PercentTable),ncol = ncol(PercentTable)) +#for every row, a percertage minus the same mean over the same SD +if(1==1){ + SDtable[1,]<-(PercentTable[1,]-Amean)/Asd + SDtable[2,]<-(PercentTable[2,]-Cmean)/Csd + SDtable[3,]<-(PercentTable[3,]-Dmean)/Dsd + SDtable[4,]<-(PercentTable[4,]-Emean)/Esd + SDtable[5,]<-(PercentTable[5,]-Fmean)/Fsd + SDtable[6,]<-(PercentTable[6,]-Gmean)/Gsd + SDtable[7,]<-(PercentTable[7,]-Hmean)/Hsd + SDtable[8,]<-(PercentTable[8,]-Imean)/Isd + SDtable[9,]<-(PercentTable[9,]-Kmean)/Ksd + SDtable[10,]<-(PercentTable[10,]-Lmean)/Lsd + SDtable[11,]<-(PercentTable[11,]-Mmean)/Msd + SDtable[12,]<-(PercentTable[12,]-Nmean)/Nsd + SDtable[13,]<-(PercentTable[13,]-Pmean)/Psd + SDtable[14,]<-(PercentTable[14,]-Qmean)/Qsd + SDtable[15,]<-(PercentTable[15,]-Rmean)/Rsd + SDtable[16,]<-(PercentTable[16,]-Smean)/Ssd + SDtable[17,]<-(PercentTable[17,]-Tmean)/Tsd + SDtable[18,]<-(PercentTable[18,]-Vmean)/Vsd + SDtable[19,]<-(PercentTable[19,]-Wmean)/Wsd + SDtable[20,]<-(PercentTable[20,]-Ymean)/Ysd +} + + +SetOfAAs<-c("Letter","A","C","D","E","F","G","H","I","K","L","M","N","P","Q","R","S","T","V","W","Y") + + +SumOfSigmaAAs<-c(1:15) + +for (i in 1:15){ + SumOfSigmasValue<-0 + for (j in 1:20){ + value<-0 + if (SDtable[j,i]>2){ + value<-sum(substrates[,i]==SetOfAAs[j]) + } + SumOfSigmasValue<-SumOfSigmasValue+value + } + SumOfSigmaAAs[i]<-SumOfSigmasValue +} + +# AAs1<-length(substrates[,1])-sum(substrates[,1]=="") +# AAs2<-length(substrates[,2])-sum(substrates[,2]=="") +# AAs3<-length(substrates[,3])-sum(substrates[,3]=="") +# AAs4<-length(substrates[,4])-sum(substrates[,4]=="") +# AAs5<-length(substrates[,5])-sum(substrates[,5]=="") +# AAs6<-length(substrates[,6])-sum(substrates[,6]=="") +# AAs7<-length(substrates[,7])-sum(substrates[,7]=="") +# AAs8<-length(substrates[,8])-sum(substrates[,8]=="") +# AAs9<-length(substrates[,9])-sum(substrates[,9]=="") +# +# +# +# #AAsAtPositions<-c(AAs1,AAs2,AAs3,AAs4,AAs5,AAs6,AAs7,AAs8,AAs9) +# AAsAtPositions<-c(length(substrates[,1]),length(substrates[,2]),length(substrates[,3]),length(substrates[,4]), +# length(substrates[,5]),length(substrates[,6]),length(substrates[,7]),length(substrates[,8]), +# length(substrates[,9])) + +SumOfExpectedSigmaAAs<-c(1:15) +for (i in 1:15){ + ExpectedValue<-0 + for (j in 1:20){ + value<-0 + if (SDtable[j,i]>2){ + value<-AllMeans[j] + } + ExpectedValue<-ExpectedValue+value + } + SumOfExpectedSigmaAAs[i]<-ExpectedValue*(length(substrates[,i])-sum(substrates[,i]%in% ""))/100 +} + +SelectivityRow<-SumOfSigmaAAs/SumOfExpectedSigmaAAs +SelectivitySheet<-rbind(SumOfSigmaAAs,SumOfExpectedSigmaAAs,SelectivityRow) + +SetOfAAs<-matrix(data = SetOfAAs,ncol = 1) + +SDtableu<-SDtable +HeaderSD<-c(-7:7) +SDtable<-rbind(HeaderSD,SDtableu) +SDtable<-data.frame(SetOfAAs,SDtable) + +PercentTable<-rbind(HeaderSD,PercentTable) +PercentTable<-data.frame(SetOfAAs,PercentTable) +numberofY<-as.numeric(SubstrateBackgroundFrequency$Number.of.Y) +numberofY<-numberofY[!is.na(numberofY)] + +numberofPY<-as.numeric(SubstrateBackgroundFrequency$Number.of.pY) +numberofPY<-numberofPY[!is.na(numberofPY)] + +NormalizationScore<-sum(numberofPY)/sum(numberofY) + +# positions<-matrix(data = NA, nrow=20,ncol = 15) +# +# #column1 +# +# for (q in 1:15) { +# sA<-sum(substrates[,i]=="A") +# positions[1,i]<-sA +# sC<-sum(substrates[,i]=="C") +# positions[2,i]<-sC +# sD<-sum(substrates[,i]=="D") +# positions[3,i]<-sD +# sE<-sum(substrates[,i]=="E") +# positions[4,i]<-sE +# sF<-sum(substrates[,i]=="F") +# sG<-sum(substrates[,i]=="G") +# sH<-sum(substrates[,i]=="H") +# sI<-sum(substrates[,i]=="I") +# sK<-sum(substrates[,i]=="K") +# sL<-sum(substrates[,i]=="L") +# sM<-sum(substrates[,i]=="M") +# sN<-sum(substrates[,i]=="N") +# sP<-sum(substrates[,i]=="P") +# sQ<-sum(substrates[,i]=="Q") +# sR<-sum(substrates[,i]=="R") +# sS<-sum(substrates[,i]=="S") +# sT<-sum(substrates[,i]=="T") +# sV<-sum(substrates[,i]=="V") +# sW<-sum(substrates[,i]=="W") +# sY<-sum(substrates[,i]=="Y") +# positions[5,i]<-sF +# positions[6,i]<-sG +# positions[7,i]<-sH +# positions[8,i]<-sI +# positions[9,i]<-sK +# positions[10,i]<-sL +# positions[11,i]<-sM +# positions[12,i]<-sN +# positions[13,i]<-sP +# positions[14,i]<-sQ +# positions[15,i]<-sR +# positions[16,i]<-sS +# positions[17,i]<-sT +# positions[18,i]<-sV +# positions[19,i]<-sW +# positions[20,i]<-sY +# } + +#here I create the positions table which is needed for the endogenous prob matrix and is simply "how many times did an AA show up in this spot? +if (6==6){ + Column1<-substrates[,1] + Column2<-substrates[,2] + Column3<-substrates[,3] + Column4<-substrates[,4] + Column5<-substrates[,5] + Column6<-substrates[,6] + Column7<-substrates[,7] + Column8<-substrates[,8] + Column9<-substrates[,9] + Column10<-substrates[,10] + Column11<-substrates[,11] + Column12<-substrates[,12] + Column13<-substrates[,13] + Column14<-substrates[,14] + Column15<-substrates[,15] + + spaces1<-sum((Column1%in% "")) + spaces2<-sum(Column2%in% "") + spaces3<-sum(Column3%in% "") + spaces4<-sum(Column4%in% "") + spaces5<-sum(Column5%in% "") + spaces6<-sum(Column6%in% "") + spaces7<-sum(Column7%in% "") + spaces8<-sum(Column8%in% "") + spaces9<-sum(Column9%in% "") + spaces10<-sum(Column10%in% "") + spaces11<-sum(Column11%in% "") + spaces12<-sum(Column12%in% "") + spaces13<-sum(Column13%in% "") + spaces14<-sum(Column14%in% "") + spaces15<-sum(Column15%in% "") + + A1<-sum(Column1 %in% "A") + A2<-sum(Column2 %in% "A") + A3<-sum(Column3 %in% "A") + A4<-sum(Column4 %in% "A") + A5<-sum(Column5 %in% "A") + A6<-sum(Column6 %in% "A") + A7<-sum(Column7 %in% "A") + A8<-sum(Column8 %in% "A") + A9<-sum(Column9 %in% "A") + A10<-sum(Column10 %in% "A") + A11<-sum(Column11 %in% "A") + A12<-sum(Column12 %in% "A") + A13<-sum(Column13 %in% "A") + A14<-sum(Column14 %in% "A") + A15<-sum(Column15 %in% "A") + AllAs<-cbind(A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15) + + C1<-sum(Column1 %in% "C") + C2<-sum(Column2 %in% "C") + C3<-sum(Column3 %in% "C") + C4<-sum(Column4 %in% "C") + C5<-sum(Column5 %in% "C") + C6<-sum(Column6 %in% "C") + C7<-sum(Column7 %in% "C") + C8<-sum(Column8 %in% "C") + C9<-sum(Column9 %in% "C") + C10<-sum(Column10 %in% "C") + C11<-sum(Column11 %in% "C") + C12<-sum(Column12 %in% "C") + C13<-sum(Column13 %in% "C") + C14<-sum(Column14 %in% "C") + C15<-sum(Column15 %in% "C") + CllCs<-cbind(C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15) + + D1<-sum(Column1 %in% "D") + D2<-sum(Column2 %in% "D") + D3<-sum(Column3 %in% "D") + D4<-sum(Column4 %in% "D") + D5<-sum(Column5 %in% "D") + D6<-sum(Column6 %in% "D") + D7<-sum(Column7 %in% "D") + D8<-sum(Column8 %in% "D") + D9<-sum(Column9 %in% "D") + D10<-sum(Column10 %in% "D") + D11<-sum(Column11 %in% "D") + D12<-sum(Column12 %in% "D") + D13<-sum(Column13 %in% "D") + D14<-sum(Column14 %in% "D") + D15<-sum(Column15 %in% "D") + DllDs<-cbind(D1,D2,D3,D4,D5,D6,D7,D8,D9,D10,D11,D12,D13,D14,D15) + + E1<-sum(Column1 %in% "E") + E2<-sum(Column2 %in% "E") + E3<-sum(Column3 %in% "E") + E4<-sum(Column4 %in% "E") + E5<-sum(Column5 %in% "E") + E6<-sum(Column6 %in% "E") + E7<-sum(Column7 %in% "E") + E8<-sum(Column8 %in% "E") + E9<-sum(Column9 %in% "E") + E10<-sum(Column10 %in% "E") + E11<-sum(Column11 %in% "E") + E12<-sum(Column12 %in% "E") + E13<-sum(Column13 %in% "E") + E14<-sum(Column14 %in% "E") + E15<-sum(Column15 %in% "E") + EllEs<-cbind(E1,E2,E3,E4,E5,E6,E7,E8,E9,E10,E11,E12,E13,E14,E15) + + F1<-sum(Column1 %in% "F") + F2<-sum(Column2 %in% "F") + F3<-sum(Column3 %in% "F") + F4<-sum(Column4 %in% "F") + F5<-sum(Column5 %in% "F") + F6<-sum(Column6 %in% "F") + F7<-sum(Column7 %in% "F") + F8<-sum(Column8 %in% "F") + F9<-sum(Column9 %in% "F") + F10<-sum(Column10 %in% "F") + F11<-sum(Column11 %in% "F") + F12<-sum(Column12 %in% "F") + F13<-sum(Column13 %in% "F") + F14<-sum(Column14 %in% "F") + F15<-sum(Column15 %in% "F") + FllFs<-cbind(F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15) + + G1<-sum(Column1 %in% "G") + G2<-sum(Column2 %in% "G") + G3<-sum(Column3 %in% "G") + G4<-sum(Column4 %in% "G") + G5<-sum(Column5 %in% "G") + G6<-sum(Column6 %in% "G") + G7<-sum(Column7 %in% "G") + G8<-sum(Column8 %in% "G") + G9<-sum(Column9 %in% "G") + G10<-sum(Column10 %in% "G") + G11<-sum(Column11 %in% "G") + G12<-sum(Column12 %in% "G") + G13<-sum(Column13 %in% "G") + G14<-sum(Column14 %in% "G") + G15<-sum(Column15 %in% "G") + GllGs<-cbind(G1,G2,G3,G4,G5,G6,G7,G8,G9,G10,G11,G12,G13,G14,G15) + + H1<-sum(Column1 %in% "H") + H2<-sum(Column2 %in% "H") + H3<-sum(Column3 %in% "H") + H4<-sum(Column4 %in% "H") + H5<-sum(Column5 %in% "H") + H6<-sum(Column6 %in% "H") + H7<-sum(Column7 %in% "H") + H8<-sum(Column8 %in% "H") + H9<-sum(Column9 %in% "H") + H10<-sum(Column10 %in% "H") + H11<-sum(Column11 %in% "H") + H12<-sum(Column12 %in% "H") + H13<-sum(Column13 %in% "H") + H14<-sum(Column14 %in% "H") + H15<-sum(Column15 %in% "H") + HllHs<-cbind(H1,H2,H3,H4,H5,H6,H7,H8,H9,H10,H11,H12,H13,H14,H15) + + I1<-sum(Column1 %in% "I") + I2<-sum(Column2 %in% "I") + I3<-sum(Column3 %in% "I") + I4<-sum(Column4 %in% "I") + I5<-sum(Column5 %in% "I") + I6<-sum(Column6 %in% "I") + I7<-sum(Column7 %in% "I") + I8<-sum(Column8 %in% "I") + I9<-sum(Column9 %in% "I") + I10<-sum(Column10 %in% "I") + I11<-sum(Column11 %in% "I") + I12<-sum(Column12 %in% "I") + I13<-sum(Column13 %in% "I") + I14<-sum(Column14 %in% "I") + I15<-sum(Column15 %in% "I") + IllIs<-cbind(I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,I11,I12,I13,I14,I15) + + K1<-sum(Column1 %in% "K") + K2<-sum(Column2 %in% "K") + K3<-sum(Column3 %in% "K") + K4<-sum(Column4 %in% "K") + K5<-sum(Column5 %in% "K") + K6<-sum(Column6 %in% "K") + K7<-sum(Column7 %in% "K") + K8<-sum(Column8 %in% "K") + K9<-sum(Column9 %in% "K") + K10<-sum(Column10 %in% "K") + K11<-sum(Column11 %in% "K") + K12<-sum(Column12 %in% "K") + K13<-sum(Column13 %in% "K") + K14<-sum(Column14 %in% "K") + K15<-sum(Column15 %in% "K") + KllKs<-cbind(K1,K2,K3,K4,K5,K6,K7,K8,K9,K10,K11,K12,K13,K14,K15) + + L1<-sum(Column1 %in% "L") + L2<-sum(Column2 %in% "L") + L3<-sum(Column3 %in% "L") + L4<-sum(Column4 %in% "L") + L5<-sum(Column5 %in% "L") + L6<-sum(Column6 %in% "L") + L7<-sum(Column7 %in% "L") + L8<-sum(Column8 %in% "L") + L9<-sum(Column9 %in% "L") + L10<-sum(Column10 %in% "L") + L11<-sum(Column11 %in% "L") + L12<-sum(Column12 %in% "L") + L13<-sum(Column13 %in% "L") + L14<-sum(Column14 %in% "L") + L15<-sum(Column15 %in% "L") + LllLs<-cbind(L1,L2,L3,L4,L5,L6,L7,L8,L9,L10,L11,L12,L13,L14,L15) + + M1<-sum(Column1 %in% "M") + M2<-sum(Column2 %in% "M") + M3<-sum(Column3 %in% "M") + M4<-sum(Column4 %in% "M") + M5<-sum(Column5 %in% "M") + M6<-sum(Column6 %in% "M") + M7<-sum(Column7 %in% "M") + M8<-sum(Column8 %in% "M") + M9<-sum(Column9 %in% "M") + M10<-sum(Column10 %in% "M") + M11<-sum(Column11 %in% "M") + M12<-sum(Column12 %in% "M") + M13<-sum(Column13 %in% "M") + M14<-sum(Column14 %in% "M") + M15<-sum(Column15 %in% "M") + MllMs<-cbind(M1,M2,M3,M4,M5,M6,M7,M8,M9,M10,M11,M12,M13,M14,M15) + + N1<-sum(Column1 %in% "N") + N2<-sum(Column2 %in% "N") + N3<-sum(Column3 %in% "N") + N4<-sum(Column4 %in% "N") + N5<-sum(Column5 %in% "N") + N6<-sum(Column6 %in% "N") + N7<-sum(Column7 %in% "N") + N8<-sum(Column8 %in% "N") + N9<-sum(Column9 %in% "N") + N10<-sum(Column10 %in% "N") + N11<-sum(Column11 %in% "N") + N12<-sum(Column12 %in% "N") + N13<-sum(Column13 %in% "N") + N14<-sum(Column14 %in% "N") + N15<-sum(Column15 %in% "N") + NllNs<-cbind(N1,N2,N3,N4,N5,N6,N7,N8,N9,N10,N11,N12,N13,N14,N15) + + P1<-sum(Column1 %in% "P") + P2<-sum(Column2 %in% "P") + P3<-sum(Column3 %in% "P") + P4<-sum(Column4 %in% "P") + P5<-sum(Column5 %in% "P") + P6<-sum(Column6 %in% "P") + P7<-sum(Column7 %in% "P") + P8<-sum(Column8 %in% "P") + P9<-sum(Column9 %in% "P") + P10<-sum(Column10 %in% "P") + P11<-sum(Column11 %in% "P") + P12<-sum(Column12 %in% "P") + P13<-sum(Column13 %in% "P") + P14<-sum(Column14 %in% "P") + P15<-sum(Column15 %in% "P") + PllPs<-cbind(P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11,P12,P13,P14,P15) + + Q1<-sum(Column1 %in% "Q") + Q2<-sum(Column2 %in% "Q") + Q3<-sum(Column3 %in% "Q") + Q4<-sum(Column4 %in% "Q") + Q5<-sum(Column5 %in% "Q") + Q6<-sum(Column6 %in% "Q") + Q7<-sum(Column7 %in% "Q") + Q8<-sum(Column8 %in% "Q") + Q9<-sum(Column9 %in% "Q") + Q10<-sum(Column10 %in% "Q") + Q11<-sum(Column11 %in% "Q") + Q12<-sum(Column12 %in% "Q") + Q13<-sum(Column13 %in% "Q") + Q14<-sum(Column14 %in% "Q") + Q15<-sum(Column15 %in% "Q") + QllQs<-cbind(Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,Q11,Q12,Q13,Q14,Q15) + + R1<-sum(Column1 %in% "R") + R2<-sum(Column2 %in% "R") + R3<-sum(Column3 %in% "R") + R4<-sum(Column4 %in% "R") + R5<-sum(Column5 %in% "R") + R6<-sum(Column6 %in% "R") + R7<-sum(Column7 %in% "R") + R8<-sum(Column8 %in% "R") + R9<-sum(Column9 %in% "R") + R10<-sum(Column10 %in% "R") + R11<-sum(Column11 %in% "R") + R12<-sum(Column12 %in% "R") + R13<-sum(Column13 %in% "R") + R14<-sum(Column14 %in% "R") + R15<-sum(Column15 %in% "R") + RllRs<-cbind(R1,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,R13,R14,R15) + + S1<-sum(Column1 %in% "S") + S2<-sum(Column2 %in% "S") + S3<-sum(Column3 %in% "S") + S4<-sum(Column4 %in% "S") + S5<-sum(Column5 %in% "S") + S6<-sum(Column6 %in% "S") + S7<-sum(Column7 %in% "S") + S8<-sum(Column8 %in% "S") + S9<-sum(Column9 %in% "S") + S10<-sum(Column10 %in% "S") + S11<-sum(Column11 %in% "S") + S12<-sum(Column12 %in% "S") + S13<-sum(Column13 %in% "S") + S14<-sum(Column14 %in% "S") + S15<-sum(Column15 %in% "S") + SllSs<-cbind(S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,S14,S15) + + T1<-sum(Column1 %in% "T") + T2<-sum(Column2 %in% "T") + T3<-sum(Column3 %in% "T") + T4<-sum(Column4 %in% "T") + T5<-sum(Column5 %in% "T") + T6<-sum(Column6 %in% "T") + T7<-sum(Column7 %in% "T") + T8<-sum(Column8 %in% "T") + T9<-sum(Column9 %in% "T") + T10<-sum(Column10 %in% "T") + T11<-sum(Column11 %in% "T") + T12<-sum(Column12 %in% "T") + T13<-sum(Column13 %in% "T") + T14<-sum(Column14 %in% "T") + T15<-sum(Column15 %in% "T") + TllTs<-cbind(T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15) + + V1<-sum(Column1 %in% "V") + V2<-sum(Column2 %in% "V") + V3<-sum(Column3 %in% "V") + V4<-sum(Column4 %in% "V") + V5<-sum(Column5 %in% "V") + V6<-sum(Column6 %in% "V") + V7<-sum(Column7 %in% "V") + V8<-sum(Column8 %in% "V") + V9<-sum(Column9 %in% "V") + V10<-sum(Column10 %in% "V") + V11<-sum(Column11 %in% "V") + V12<-sum(Column12 %in% "V") + V13<-sum(Column13 %in% "V") + V14<-sum(Column14 %in% "V") + V15<-sum(Column15 %in% "V") + VllVs<-cbind(V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15) + + W1<-sum(Column1 %in% "W") + W2<-sum(Column2 %in% "W") + W3<-sum(Column3 %in% "W") + W4<-sum(Column4 %in% "W") + W5<-sum(Column5 %in% "W") + W6<-sum(Column6 %in% "W") + W7<-sum(Column7 %in% "W") + W8<-sum(Column8 %in% "W") + W9<-sum(Column9 %in% "W") + W10<-sum(Column10 %in% "W") + W11<-sum(Column11 %in% "W") + W12<-sum(Column12 %in% "W") + W13<-sum(Column13 %in% "W") + W14<-sum(Column14 %in% "W") + W15<-sum(Column15 %in% "W") + WllWs<-cbind(W1,W2,W3,W4,W5,W6,W7,W8,W9,W10,W11,W12,W13,W14,W15) + + Y1<-sum(Column1 %in% "Y") + Y2<-sum(Column2 %in% "Y") + Y3<-sum(Column3 %in% "Y") + Y4<-sum(Column4 %in% "Y") + Y5<-sum(Column5 %in% "Y") + Y6<-sum(Column6 %in% "Y") + Y7<-sum(Column7 %in% "Y") + Y8<-sum(Column8 %in% "Y") + Y9<-sum(Column9 %in% "Y") + Y10<-sum(Column10 %in% "Y") + Y11<-sum(Column11 %in% "Y") + Y12<-sum(Column12 %in% "Y") + Y13<-sum(Column13 %in% "Y") + Y14<-sum(Column14 %in% "Y") + Y15<-sum(Column15 %in% "Y") + YllYs<-cbind(Y1,Y2,Y3,Y4,Y5,Y6,Y7,Y8,Y9,Y10,Y11,Y12,Y13,Y14,Y15) + PositionTable<-rbind(AllAs,CllCs,DllDs,EllEs,FllFs,GllGs,HllHs,IllIs,KllKs,LllLs,MllMs,NllNs,PllPs,QllQs,RllRs,SllSs,TllTs,VllVs,WllWs,YllYs) +} +#endogenous prob matrix is AA position over subbackfreqmean +dim(PositionTable) +EPMtable<-PositionTable +# EPMtable[1,]<-(PositionTable[1,]/(PositionTable[1,]*.01*Amean)) +# EPMtable[2,]<-(PositionTable[2,]/(PositionTable[2,]*.01*Cmean)) +# EPMtable[3,]<-(PositionTable[3,]/(PositionTable[3,]*.01*Dmean)) +# EPMtable[4,]<-(PositionTable[4,]/(PositionTable[4,]*.01*Emean)) +# EPMtable[5,]<-(PositionTable[5,]/(PositionTable[5,]*.01*Fmean)) +# EPMtable[6,]<-(PositionTable[6,]/(PositionTable[6,]*.01*Gmean)) +# EPMtable[7,]<-(PositionTable[7,]/(PositionTable[7,]*.01*Hmean)) +# EPMtable[8,]<-(PositionTable[8,]/(PositionTable[8,]*.01*Imean)) +# EPMtable[9,]<-(PositionTable[9,]/(PositionTable[9,]*.01*Kmean)) +# EPMtable[10,]<-(PositionTable[10,]/(PositionTable[10,]*.01*Lmean)) +# EPMtable[11,]<-(PositionTable[11,]/(PositionTable[11,]*.01*Mmean)) +# EPMtable[12,]<-(PositionTable[12,]/(PositionTable[12,]*.01*Nmean)) +# EPMtable[13,]<-(PositionTable[13,]/(PositionTable[13,]*.01*Pmean)) +# EPMtable[14,]<-(PositionTable[14,]/(PositionTable[14,]*.01*Qmean)) +# EPMtable[15,]<-(PositionTable[15,]/(PositionTable[15,]*.01*Rmean)) +# EPMtable[16,]<-(PositionTable[16,]/(PositionTable[16,]*.01*Smean)) +# EPMtable[17,]<-(PositionTable[17,]/(PositionTable[17,]*.01*Tmean)) +# EPMtable[18,]<-(PositionTable[18,]/(PositionTable[18,]*.01*Vmean)) +# EPMtable[19,]<-(PositionTable[19,]/(PositionTable[19,]*.01*Wmean)) +# EPMtable[20,]<-(PositionTable[20,]/(PositionTable[20,]*.01*Ymean)) + +columns<-c(length(Column1)-sum(Column1==""), + length(Column2)-sum(Column2==""), + length(Column3)-sum(Column3==""), + length(Column4)-sum(Column4==""), + length(Column5)-sum(Column5==""), + length(Column6)-sum(Column6==""), + length(Column7)-sum(Column7==""), + length(Column8)-sum(Column8==""), + length(Column9)-sum(Column9==""), + length(Column10)-sum(Column10==""), + length(Column11)-sum(Column11==""), + length(Column12)-sum(Column12==""), + length(Column13)-sum(Column13==""), + length(Column14)-sum(Column14==""), + length(Column15)-sum(Column15=="")) + +for (z in 1:15) { + for (y in 1:20) { + if (PositionTable[y,z]>0){ + EPMtable[y,z]<-PositionTable[y,z]/((columns[z]*.01*AllMeans[y])) + } + if (PositionTable[y,z]==0){ + EPMtable[y,z]<-(1/columns[z])/((columns[z]*.01*AllMeans[y])) + } + } +} +#here I created the endogenous probability matrix +#now all I need to do is make the program automatically determine which SDs are >2, and then make it perform screener and sorter on those SDs + + + + + +# write.xlsx(SDtable,file=FILENAME, sheetName = "Standard Deviation Table",col.names = FALSE,row.names = FALSE,append = TRUE) +# write.xlsx(PercentTable,file = FILENAME,sheetName = "Percent Table",col.names = FALSE,row.names = FALSE,append = TRUE) +# write.xlsx(SelectivitySheet,file = FILENAME,sheetName = "Site Selectivity",col.names = FALSE,row.names = FALSE,append = TRUE) +# write.xlsx(EPMtable,file=FILENAME,sheetName = "Endogenous Probability Matrix",col.names = FALSE,row.names = FALSE,append = TRUE) +# write.xlsx(NormalizationScore,file = FILENAME,sheetName = "Normalization Score",col.names = FALSE,row.names = FALSE,append = TRUE) + +NormalizationScore<-c("Normalization Score",NormalizationScore) + +write.table(x=c("SD Table"),file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE) +write.table(SDtable,file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE) +write.table(x=c("Percent Table"),file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE) +write.table(PercentTable,file=FILENAME, append = TRUE,sep=",",row.names = FALSE, col.names = FALSE) + +EPMtableu<-EPMtable +HeaderSD<-c(-7:7) +EPMtableu<-rbind(HeaderSD,EPMtableu) +EPMtableu<-data.frame(SetOfAAs,EPMtableu) + +write.table("Site Selectivity Matrix", file = FILENAME2, append = TRUE, sep = ",", row.names = FALSE, col.names = FALSE) +SelectivityHeader=matrix(data = c("Position",-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7),nrow = 1) +head<-matrix(data=rep(" ",times=16),nrow = 1) +SelectivityHeader<-rbind(head,SelectivityHeader) + +write.table(SelectivityHeader, file = FILENAME2, append = TRUE, sep = ",", row.names = FALSE, col.names = FALSE) +#colnames(SelectivitySheet)<-c("-7","-6","-5","-4","-3","-2","-1","0","1","2","3","4","5","6","7") +write.table(SelectivitySheet,file = FILENAME2, append = TRUE,sep = ",",row.names = TRUE, col.names = FALSE) +write.table(x=c("Endogenous Probability Matrix"),file=FILENAME2,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE) +write.table(EPMtableu,file = FILENAME2, append = TRUE,sep = ",",row.names = FALSE, col.names = FALSE) +write.table(NormalizationScore, file = FILENAME2, append = TRUE,sep = ",",row.names = FALSE, col.names = FALSE)