Mercurial > repos > jfb > kinatest_r_7_7testing
view kinatestid_r/Kinatest-R.R @ 14:da1012f014bd draft
Uploaded
author | jfb |
---|---|
date | Thu, 08 Feb 2018 15:29:56 -0500 |
parents | |
children | 15b5d4ae4480 |
line wrap: on
line source
ImportedSubstrateList<- read.csv("input1", stringsAsFactors=FALSE) NegativeSubstrateList<- read.csv("input2", stringsAsFactors=FALSE) SubstrateBackgroundFrequency<- read.csv("input3", stringsAsFactors=FALSE) ScreenerFilename<-"screener" FILENAME<-"output1.csv" FILENAME2<-"output2.csv" FILENAME3<-"output3.csv" OutputMatrix<-"KinaseMatrix.csv" CharacterizationTable<-"CharacterizationTableForThisKinase.csv" SDtable<-"SDtableforthisKinase" SiteSelectivityTable<-"SiteSelectivityForThisKinase" substrates<-matrix(rep("A",times=((nrow(ImportedSubstrateList)-1)*15)),ncol = 15) #SeqsToBeScored<-"asdasd" for (i in 2:nrow(ImportedSubstrateList)) { substratemotif<-ImportedSubstrateList[i,4:18] substratemotif[8]<-"Y" #substratemotif<-paste(substratemotif,sep = "",collapse = "") j=i-1 substratemotif<-unlist(substratemotif) substrates[j,1:15]<-substratemotif } # SpacesToOs<-c(""="O",) # substrates<-SpacesToOs[substrates] SubstrateBackgroundFrequency[nrow(SubstrateBackgroundFrequency),2] if(2==2){ Amean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),2]), na.rm=TRUE) Cmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),3]), na.rm=TRUE) Dmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),4]), na.rm=TRUE) Emean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),5]), na.rm=TRUE) Fmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),6]), na.rm=TRUE) Gmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),7]), na.rm=TRUE) Hmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),8]), na.rm=TRUE) Imean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),9]), na.rm=TRUE) Kmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),10]), na.rm=TRUE) Lmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),11]), na.rm=TRUE) Mmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),12]), na.rm=TRUE) Nmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),13]), na.rm=TRUE) Pmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),14]), na.rm=TRUE) Qmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),15]), na.rm=TRUE) Rmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),16]), na.rm=TRUE) Smean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),17]), na.rm=TRUE) Tmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),18]), na.rm=TRUE) Vmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),19]), na.rm=TRUE) Wmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),20]), na.rm=TRUE) Ymean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),21]), na.rm=TRUE) AllMeans<-c(Amean,Cmean,Dmean,Emean,Fmean,Gmean,Hmean,Imean,Kmean,Lmean,Mmean,Nmean,Pmean,Qmean,Rmean,Smean,Tmean,Vmean,Wmean,Ymean) Asd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),2]), na.rm=TRUE) Csd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),3]), na.rm=TRUE) Dsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),4]), na.rm=TRUE) Esd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),5]), na.rm=TRUE) Fsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),6]), na.rm=TRUE) Gsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),7]), na.rm=TRUE) Hsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),8]), na.rm=TRUE) Isd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),9]), na.rm=TRUE) Ksd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),10]), na.rm=TRUE) Lsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),11]), na.rm=TRUE) Msd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),12]), na.rm=TRUE) Nsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),13]), na.rm=TRUE) Psd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),14]), na.rm=TRUE) Qsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),15]), na.rm=TRUE) Rsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),16]), na.rm=TRUE) Ssd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),17]), na.rm=TRUE) Tsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),18]), na.rm=TRUE) Vsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),19]), na.rm=TRUE) Wsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),20]), na.rm=TRUE) Ysd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),21]), na.rm=TRUE) } AllSDs<-c(Asd,Csd,Dsd,Esd,Fsd,Gsd,Hsd,Isd,Ksd,Lsd,Msd,Nsd,Psd,Qsd,Rsd,Ssd,Tsd,Vsd,Wsd,Ysd) #this is subbackfreq SDs SBF_statisticalvalues<-cbind(AllMeans,AllSDs) #create the percent table if (1==1){ Column1<-substrates[,1] Column2<-substrates[,2] Column3<-substrates[,3] Column4<-substrates[,4] Column5<-substrates[,5] Column6<-substrates[,6] Column7<-substrates[,7] Column8<-substrates[,8] Column9<-substrates[,9] Column10<-substrates[,10] Column11<-substrates[,11] Column12<-substrates[,12] Column13<-substrates[,13] Column14<-substrates[,14] Column15<-substrates[,15] spaces1<-sum((Column1%in% "")) spaces2<-sum(Column2%in% "") spaces3<-sum(Column3%in% "") spaces4<-sum(Column4%in% "") spaces5<-sum(Column5%in% "") spaces6<-sum(Column6%in% "") spaces7<-sum(Column7%in% "") spaces8<-sum(Column8%in% "") spaces9<-sum(Column9%in% "") spaces10<-sum(Column10%in% "") spaces11<-sum(Column11%in% "") spaces12<-sum(Column12%in% "") spaces13<-sum(Column13%in% "") spaces14<-sum(Column14%in% "") spaces15<-sum(Column15%in% "") A1<-sum(Column1 %in% "A")/(length(Column1)-spaces1) A2<-sum(Column2 %in% "A")/(length(Column2)-spaces2) A3<-sum(Column3 %in% "A")/(length(Column3)-spaces3) A4<-sum(Column4 %in% "A")/(length(Column4)-spaces4) A5<-sum(Column5 %in% "A")/(length(Column5)-spaces5) A6<-sum(Column6 %in% "A")/(length(Column6)-spaces6) A7<-sum(Column7 %in% "A")/(length(Column7)-spaces7) A8<-sum(Column8 %in% "A")/(length(Column8)-spaces8) A9<-sum(Column9 %in% "A")/(length(Column9)-spaces9) A10<-sum(Column10 %in% "A")/(length(Column10)-spaces10) A11<-sum(Column11 %in% "A")/(length(Column11)-spaces11) A12<-sum(Column12 %in% "A")/(length(Column12)-spaces12) A13<-sum(Column13 %in% "A")/(length(Column13)-spaces13) A14<-sum(Column14 %in% "A")/(length(Column14)-spaces14) A15<-sum(Column15 %in% "A")/(length(Column15)-spaces15) AllAs<-cbind(A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15) C1<-sum(Column1 %in% "C")/(length(Column1)-spaces1) C2<-sum(Column2 %in% "C")/(length(Column2)-spaces2) C3<-sum(Column3 %in% "C")/(length(Column3)-spaces3) C4<-sum(Column4 %in% "C")/(length(Column4)-spaces4) C5<-sum(Column5 %in% "C")/(length(Column5)-spaces5) C6<-sum(Column6 %in% "C")/(length(Column6)-spaces6) C7<-sum(Column7 %in% "C")/(length(Column7)-spaces7) C8<-sum(Column8 %in% "C")/(length(Column8)-spaces8) C9<-sum(Column9 %in% "C")/(length(Column9)-spaces9) C10<-sum(Column10 %in% "C")/(length(Column10)-spaces10) C11<-sum(Column11 %in% "C")/(length(Column11)-spaces11) C12<-sum(Column12 %in% "C")/(length(Column12)-spaces12) C13<-sum(Column13 %in% "C")/(length(Column13)-spaces13) C14<-sum(Column14 %in% "C")/(length(Column14)-spaces14) C15<-sum(Column15 %in% "C")/(length(Column15)-spaces15) CllCs<-cbind(C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15) D1<-sum(Column1 %in% "D")/(length(Column1)-spaces1) D2<-sum(Column2 %in% "D")/(length(Column2)-spaces2) D3<-sum(Column3 %in% "D")/(length(Column3)-spaces3) D4<-sum(Column4 %in% "D")/(length(Column4)-spaces4) D5<-sum(Column5 %in% "D")/(length(Column5)-spaces5) D6<-sum(Column6 %in% "D")/(length(Column6)-spaces6) D7<-sum(Column7 %in% "D")/(length(Column7)-spaces7) D8<-sum(Column8 %in% "D")/(length(Column8)-spaces8) D9<-sum(Column9 %in% "D")/(length(Column9)-spaces9) D10<-sum(Column10 %in% "D")/(length(Column10)-spaces10) D11<-sum(Column11 %in% "D")/(length(Column11)-spaces11) D12<-sum(Column12 %in% "D")/(length(Column12)-spaces12) D13<-sum(Column13 %in% "D")/(length(Column13)-spaces13) D14<-sum(Column14 %in% "D")/(length(Column14)-spaces14) D15<-sum(Column15 %in% "D")/(length(Column15)-spaces15) DllDs<-cbind(D1,D2,D3,D4,D5,D6,D7,D8,D9,D10,D11,D12,D13,D14,D15) E1<-sum(Column1 %in% "E")/(length(Column1)-spaces1) E2<-sum(Column2 %in% "E")/(length(Column2)-spaces2) E3<-sum(Column3 %in% "E")/(length(Column3)-spaces3) E4<-sum(Column4 %in% "E")/(length(Column4)-spaces4) E5<-sum(Column5 %in% "E")/(length(Column5)-spaces5) E6<-sum(Column6 %in% "E")/(length(Column6)-spaces6) E7<-sum(Column7 %in% "E")/(length(Column7)-spaces7) E8<-sum(Column8 %in% "E")/(length(Column8)-spaces8) E9<-sum(Column9 %in% "E")/(length(Column9)-spaces9) E10<-sum(Column10 %in% "E")/(length(Column10)-spaces10) E11<-sum(Column11 %in% "E")/(length(Column11)-spaces11) E12<-sum(Column12 %in% "E")/(length(Column12)-spaces12) E13<-sum(Column13 %in% "E")/(length(Column13)-spaces13) E14<-sum(Column14 %in% "E")/(length(Column14)-spaces14) E15<-sum(Column15 %in% "E")/(length(Column15)-spaces15) EllEs<-cbind(E1,E2,E3,E4,E5,E6,E7,E8,E9,E10,E11,E12,E13,E14,E15) F1<-sum(Column1 %in% "F")/(length(Column1)-spaces1) F2<-sum(Column2 %in% "F")/(length(Column2)-spaces2) F3<-sum(Column3 %in% "F")/(length(Column3)-spaces3) F4<-sum(Column4 %in% "F")/(length(Column4)-spaces4) F5<-sum(Column5 %in% "F")/(length(Column5)-spaces5) F6<-sum(Column6 %in% "F")/(length(Column6)-spaces6) F7<-sum(Column7 %in% "F")/(length(Column7)-spaces7) F8<-sum(Column8 %in% "F")/(length(Column8)-spaces8) F9<-sum(Column9 %in% "F")/(length(Column9)-spaces9) F10<-sum(Column10 %in% "F")/(length(Column10)-spaces10) F11<-sum(Column11 %in% "F")/(length(Column11)-spaces11) F12<-sum(Column12 %in% "F")/(length(Column12)-spaces12) F13<-sum(Column13 %in% "F")/(length(Column13)-spaces13) F14<-sum(Column14 %in% "F")/(length(Column14)-spaces14) F15<-sum(Column15 %in% "F")/(length(Column15)-spaces15) FllFs<-cbind(F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15) G1<-sum(Column1 %in% "G")/(length(Column1)-spaces1) G2<-sum(Column2 %in% "G")/(length(Column2)-spaces2) G3<-sum(Column3 %in% "G")/(length(Column3)-spaces3) G4<-sum(Column4 %in% "G")/(length(Column4)-spaces4) G5<-sum(Column5 %in% "G")/(length(Column5)-spaces5) G6<-sum(Column6 %in% "G")/(length(Column6)-spaces6) G7<-sum(Column7 %in% "G")/(length(Column7)-spaces7) G8<-sum(Column8 %in% "G")/(length(Column8)-spaces8) G9<-sum(Column9 %in% "G")/(length(Column9)-spaces9) G10<-sum(Column10 %in% "G")/(length(Column10)-spaces10) G11<-sum(Column11 %in% "G")/(length(Column11)-spaces11) G12<-sum(Column12 %in% "G")/(length(Column12)-spaces12) G13<-sum(Column13 %in% "G")/(length(Column13)-spaces13) G14<-sum(Column14 %in% "G")/(length(Column14)-spaces14) G15<-sum(Column15 %in% "G")/(length(Column15)-spaces15) GllGs<-cbind(G1,G2,G3,G4,G5,G6,G7,G8,G9,G10,G11,G12,G13,G14,G15) H1<-sum(Column1 %in% "H")/(length(Column1)-spaces1) H2<-sum(Column2 %in% "H")/(length(Column2)-spaces2) H3<-sum(Column3 %in% "H")/(length(Column3)-spaces3) H4<-sum(Column4 %in% "H")/(length(Column4)-spaces4) H5<-sum(Column5 %in% "H")/(length(Column5)-spaces5) H6<-sum(Column6 %in% "H")/(length(Column6)-spaces6) H7<-sum(Column7 %in% "H")/(length(Column7)-spaces7) H8<-sum(Column8 %in% "H")/(length(Column8)-spaces8) H9<-sum(Column9 %in% "H")/(length(Column9)-spaces9) H10<-sum(Column10 %in% "H")/(length(Column10)-spaces10) H11<-sum(Column11 %in% "H")/(length(Column11)-spaces11) H12<-sum(Column12 %in% "H")/(length(Column12)-spaces12) H13<-sum(Column13 %in% "H")/(length(Column13)-spaces13) H14<-sum(Column14 %in% "H")/(length(Column14)-spaces14) H15<-sum(Column15 %in% "H")/(length(Column15)-spaces15) HllHs<-cbind(H1,H2,H3,H4,H5,H6,H7,H8,H9,H10,H11,H12,H13,H14,H15) I1<-sum(Column1 %in% "I")/(length(Column1)-spaces1) I2<-sum(Column2 %in% "I")/(length(Column2)-spaces2) I3<-sum(Column3 %in% "I")/(length(Column3)-spaces3) I4<-sum(Column4 %in% "I")/(length(Column4)-spaces4) I5<-sum(Column5 %in% "I")/(length(Column5)-spaces5) I6<-sum(Column6 %in% "I")/(length(Column6)-spaces6) I7<-sum(Column7 %in% "I")/(length(Column7)-spaces7) I8<-sum(Column8 %in% "I")/(length(Column8)-spaces8) I9<-sum(Column9 %in% "I")/(length(Column9)-spaces9) I10<-sum(Column10 %in% "I")/(length(Column10)-spaces10) I11<-sum(Column11 %in% "I")/(length(Column11)-spaces11) I12<-sum(Column12 %in% "I")/(length(Column12)-spaces12) I13<-sum(Column13 %in% "I")/(length(Column13)-spaces13) I14<-sum(Column14 %in% "I")/(length(Column14)-spaces14) I15<-sum(Column15 %in% "I")/(length(Column15)-spaces15) IllIs<-cbind(I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,I11,I12,I13,I14,I15) K1<-sum(Column1 %in% "K")/(length(Column1)-spaces1) K2<-sum(Column2 %in% "K")/(length(Column2)-spaces2) K3<-sum(Column3 %in% "K")/(length(Column3)-spaces3) K4<-sum(Column4 %in% "K")/(length(Column4)-spaces4) K5<-sum(Column5 %in% "K")/(length(Column5)-spaces5) K6<-sum(Column6 %in% "K")/(length(Column6)-spaces6) K7<-sum(Column7 %in% "K")/(length(Column7)-spaces7) K8<-sum(Column8 %in% "K")/(length(Column8)-spaces8) K9<-sum(Column9 %in% "K")/(length(Column9)-spaces9) K10<-sum(Column10 %in% "K")/(length(Column10)-spaces10) K11<-sum(Column11 %in% "K")/(length(Column11)-spaces11) K12<-sum(Column12 %in% "K")/(length(Column12)-spaces12) K13<-sum(Column13 %in% "K")/(length(Column13)-spaces13) K14<-sum(Column14 %in% "K")/(length(Column14)-spaces14) K15<-sum(Column15 %in% "K")/(length(Column15)-spaces15) KllKs<-cbind(K1,K2,K3,K4,K5,K6,K7,K8,K9,K10,K11,K12,K13,K14,K15) L1<-sum(Column1 %in% "L")/(length(Column1)-spaces1) L2<-sum(Column2 %in% "L")/(length(Column2)-spaces2) L3<-sum(Column3 %in% "L")/(length(Column3)-spaces3) L4<-sum(Column4 %in% "L")/(length(Column4)-spaces4) L5<-sum(Column5 %in% "L")/(length(Column5)-spaces5) L6<-sum(Column6 %in% "L")/(length(Column6)-spaces6) L7<-sum(Column7 %in% "L")/(length(Column7)-spaces7) L8<-sum(Column8 %in% "L")/(length(Column8)-spaces8) L9<-sum(Column9 %in% "L")/(length(Column9)-spaces9) L10<-sum(Column10 %in% "L")/(length(Column10)-spaces10) L11<-sum(Column11 %in% "L")/(length(Column11)-spaces11) L12<-sum(Column12 %in% "L")/(length(Column12)-spaces12) L13<-sum(Column13 %in% "L")/(length(Column13)-spaces13) L14<-sum(Column14 %in% "L")/(length(Column14)-spaces14) L15<-sum(Column15 %in% "L")/(length(Column15)-spaces15) LllLs<-cbind(L1,L2,L3,L4,L5,L6,L7,L8,L9,L10,L11,L12,L13,L14,L15) M1<-sum(Column1 %in% "M")/(length(Column1)-spaces1) M2<-sum(Column2 %in% "M")/(length(Column2)-spaces2) M3<-sum(Column3 %in% "M")/(length(Column3)-spaces3) M4<-sum(Column4 %in% "M")/(length(Column4)-spaces4) M5<-sum(Column5 %in% "M")/(length(Column5)-spaces5) M6<-sum(Column6 %in% "M")/(length(Column6)-spaces6) M7<-sum(Column7 %in% "M")/(length(Column7)-spaces7) M8<-sum(Column8 %in% "M")/(length(Column8)-spaces8) M9<-sum(Column9 %in% "M")/(length(Column9)-spaces9) M10<-sum(Column10 %in% "M")/(length(Column10)-spaces10) M11<-sum(Column11 %in% "M")/(length(Column11)-spaces11) M12<-sum(Column12 %in% "M")/(length(Column12)-spaces12) M13<-sum(Column13 %in% "M")/(length(Column13)-spaces13) M14<-sum(Column14 %in% "M")/(length(Column14)-spaces14) M15<-sum(Column15 %in% "M")/(length(Column15)-spaces15) MllMs<-cbind(M1,M2,M3,M4,M5,M6,M7,M8,M9,M10,M11,M12,M13,M14,M15) N1<-sum(Column1 %in% "N")/(length(Column1)-spaces1) N2<-sum(Column2 %in% "N")/(length(Column2)-spaces2) N3<-sum(Column3 %in% "N")/(length(Column3)-spaces3) N4<-sum(Column4 %in% "N")/(length(Column4)-spaces4) N5<-sum(Column5 %in% "N")/(length(Column5)-spaces5) N6<-sum(Column6 %in% "N")/(length(Column6)-spaces6) N7<-sum(Column7 %in% "N")/(length(Column7)-spaces7) N8<-sum(Column8 %in% "N")/(length(Column8)-spaces8) N9<-sum(Column9 %in% "N")/(length(Column9)-spaces9) N10<-sum(Column10 %in% "N")/(length(Column10)-spaces10) N11<-sum(Column11 %in% "N")/(length(Column11)-spaces11) N12<-sum(Column12 %in% "N")/(length(Column12)-spaces12) N13<-sum(Column13 %in% "N")/(length(Column13)-spaces13) N14<-sum(Column14 %in% "N")/(length(Column14)-spaces14) N15<-sum(Column15 %in% "N")/(length(Column15)-spaces15) NllNs<-cbind(N1,N2,N3,N4,N5,N6,N7,N8,N9,N10,N11,N12,N13,N14,N15) P1<-sum(Column1 %in% "P")/(length(Column1)-spaces1) P2<-sum(Column2 %in% "P")/(length(Column2)-spaces2) P3<-sum(Column3 %in% "P")/(length(Column3)-spaces3) P4<-sum(Column4 %in% "P")/(length(Column4)-spaces4) P5<-sum(Column5 %in% "P")/(length(Column5)-spaces5) P6<-sum(Column6 %in% "P")/(length(Column6)-spaces6) P7<-sum(Column7 %in% "P")/(length(Column7)-spaces7) P8<-sum(Column8 %in% "P")/(length(Column8)-spaces8) P9<-sum(Column9 %in% "P")/(length(Column9)-spaces9) P10<-sum(Column10 %in% "P")/(length(Column10)-spaces10) P11<-sum(Column11 %in% "P")/(length(Column11)-spaces11) P12<-sum(Column12 %in% "P")/(length(Column12)-spaces12) P13<-sum(Column13 %in% "P")/(length(Column13)-spaces13) P14<-sum(Column14 %in% "P")/(length(Column14)-spaces14) P15<-sum(Column15 %in% "P")/(length(Column15)-spaces15) PllPs<-cbind(P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11,P12,P13,P14,P15) Q1<-sum(Column1 %in% "Q")/(length(Column1)-spaces1) Q2<-sum(Column2 %in% "Q")/(length(Column2)-spaces2) Q3<-sum(Column3 %in% "Q")/(length(Column3)-spaces3) Q4<-sum(Column4 %in% "Q")/(length(Column4)-spaces4) Q5<-sum(Column5 %in% "Q")/(length(Column5)-spaces5) Q6<-sum(Column6 %in% "Q")/(length(Column6)-spaces6) Q7<-sum(Column7 %in% "Q")/(length(Column7)-spaces7) Q8<-sum(Column8 %in% "Q")/(length(Column8)-spaces8) Q9<-sum(Column9 %in% "Q")/(length(Column9)-spaces9) Q10<-sum(Column10 %in% "Q")/(length(Column10)-spaces10) Q11<-sum(Column11 %in% "Q")/(length(Column11)-spaces11) Q12<-sum(Column12 %in% "Q")/(length(Column12)-spaces12) Q13<-sum(Column13 %in% "Q")/(length(Column13)-spaces13) Q14<-sum(Column14 %in% "Q")/(length(Column14)-spaces14) Q15<-sum(Column15 %in% "Q")/(length(Column15)-spaces15) QllQs<-cbind(Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,Q11,Q12,Q13,Q14,Q15) R1<-sum(Column1 %in% "R")/(length(Column1)-spaces1) R2<-sum(Column2 %in% "R")/(length(Column2)-spaces2) R3<-sum(Column3 %in% "R")/(length(Column3)-spaces3) R4<-sum(Column4 %in% "R")/(length(Column4)-spaces4) R5<-sum(Column5 %in% "R")/(length(Column5)-spaces5) R6<-sum(Column6 %in% "R")/(length(Column6)-spaces6) R7<-sum(Column7 %in% "R")/(length(Column7)-spaces7) R8<-sum(Column8 %in% "R")/(length(Column8)-spaces8) R9<-sum(Column9 %in% "R")/(length(Column9)-spaces9) R10<-sum(Column10 %in% "R")/(length(Column10)-spaces10) R11<-sum(Column11 %in% "R")/(length(Column11)-spaces11) R12<-sum(Column12 %in% "R")/(length(Column12)-spaces12) R13<-sum(Column13 %in% "R")/(length(Column13)-spaces13) R14<-sum(Column14 %in% "R")/(length(Column14)-spaces14) R15<-sum(Column15 %in% "R")/(length(Column15)-spaces15) RllRs<-cbind(R1,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,R13,R14,R15) S1<-sum(Column1 %in% "S")/(length(Column1)-spaces1) S2<-sum(Column2 %in% "S")/(length(Column2)-spaces2) S3<-sum(Column3 %in% "S")/(length(Column3)-spaces3) S4<-sum(Column4 %in% "S")/(length(Column4)-spaces4) S5<-sum(Column5 %in% "S")/(length(Column5)-spaces5) S6<-sum(Column6 %in% "S")/(length(Column6)-spaces6) S7<-sum(Column7 %in% "S")/(length(Column7)-spaces7) S8<-sum(Column8 %in% "S")/(length(Column8)-spaces8) S9<-sum(Column9 %in% "S")/(length(Column9)-spaces9) S10<-sum(Column10 %in% "S")/(length(Column10)-spaces10) S11<-sum(Column11 %in% "S")/(length(Column11)-spaces11) S12<-sum(Column12 %in% "S")/(length(Column12)-spaces12) S13<-sum(Column13 %in% "S")/(length(Column13)-spaces13) S14<-sum(Column14 %in% "S")/(length(Column14)-spaces14) S15<-sum(Column15 %in% "S")/(length(Column15)-spaces15) SllSs<-cbind(S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,S14,S15) T1<-sum(Column1 %in% "T")/(length(Column1)-spaces1) T2<-sum(Column2 %in% "T")/(length(Column2)-spaces2) T3<-sum(Column3 %in% "T")/(length(Column3)-spaces3) T4<-sum(Column4 %in% "T")/(length(Column4)-spaces4) T5<-sum(Column5 %in% "T")/(length(Column5)-spaces5) T6<-sum(Column6 %in% "T")/(length(Column6)-spaces6) T7<-sum(Column7 %in% "T")/(length(Column7)-spaces7) T8<-sum(Column8 %in% "T")/(length(Column8)-spaces8) T9<-sum(Column9 %in% "T")/(length(Column9)-spaces9) T10<-sum(Column10 %in% "T")/(length(Column10)-spaces10) T11<-sum(Column11 %in% "T")/(length(Column11)-spaces11) T12<-sum(Column12 %in% "T")/(length(Column12)-spaces12) T13<-sum(Column13 %in% "T")/(length(Column13)-spaces13) T14<-sum(Column14 %in% "T")/(length(Column14)-spaces14) T15<-sum(Column15 %in% "T")/(length(Column15)-spaces15) TllTs<-cbind(T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15) V1<-sum(Column1 %in% "V")/(length(Column1)-spaces1) V2<-sum(Column2 %in% "V")/(length(Column2)-spaces2) V3<-sum(Column3 %in% "V")/(length(Column3)-spaces3) V4<-sum(Column4 %in% "V")/(length(Column4)-spaces4) V5<-sum(Column5 %in% "V")/(length(Column5)-spaces5) V6<-sum(Column6 %in% "V")/(length(Column6)-spaces6) V7<-sum(Column7 %in% "V")/(length(Column7)-spaces7) V8<-sum(Column8 %in% "V")/(length(Column8)-spaces8) V9<-sum(Column9 %in% "V")/(length(Column9)-spaces9) V10<-sum(Column10 %in% "V")/(length(Column10)-spaces10) V11<-sum(Column11 %in% "V")/(length(Column11)-spaces11) V12<-sum(Column12 %in% "V")/(length(Column12)-spaces12) V13<-sum(Column13 %in% "V")/(length(Column13)-spaces13) V14<-sum(Column14 %in% "V")/(length(Column14)-spaces14) V15<-sum(Column15 %in% "V")/(length(Column15)-spaces15) VllVs<-cbind(V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15) W1<-sum(Column1 %in% "W")/(length(Column1)-spaces1) W2<-sum(Column2 %in% "W")/(length(Column2)-spaces2) W3<-sum(Column3 %in% "W")/(length(Column3)-spaces3) W4<-sum(Column4 %in% "W")/(length(Column4)-spaces4) W5<-sum(Column5 %in% "W")/(length(Column5)-spaces5) W6<-sum(Column6 %in% "W")/(length(Column6)-spaces6) W7<-sum(Column7 %in% "W")/(length(Column7)-spaces7) W8<-sum(Column8 %in% "W")/(length(Column8)-spaces8) W9<-sum(Column9 %in% "W")/(length(Column9)-spaces9) W10<-sum(Column10 %in% "W")/(length(Column10)-spaces10) W11<-sum(Column11 %in% "W")/(length(Column11)-spaces11) W12<-sum(Column12 %in% "W")/(length(Column12)-spaces12) W13<-sum(Column13 %in% "W")/(length(Column13)-spaces13) W14<-sum(Column14 %in% "W")/(length(Column14)-spaces14) W15<-sum(Column15 %in% "W")/(length(Column15)-spaces15) WllWs<-cbind(W1,W2,W3,W4,W5,W6,W7,W8,W9,W10,W11,W12,W13,W14,W15) Y1<-sum(Column1 %in% "Y")/(length(Column1)-spaces1) Y2<-sum(Column2 %in% "Y")/(length(Column2)-spaces2) Y3<-sum(Column3 %in% "Y")/(length(Column3)-spaces3) Y4<-sum(Column4 %in% "Y")/(length(Column4)-spaces4) Y5<-sum(Column5 %in% "Y")/(length(Column5)-spaces5) Y6<-sum(Column6 %in% "Y")/(length(Column6)-spaces6) Y7<-sum(Column7 %in% "Y")/(length(Column7)-spaces7) Y8<-sum(Column8 %in% "Y")/(length(Column8)-spaces8) Y9<-sum(Column9 %in% "Y")/(length(Column9)-spaces9) Y10<-sum(Column10 %in% "Y")/(length(Column10)-spaces10) Y11<-sum(Column11 %in% "Y")/(length(Column11)-spaces11) Y12<-sum(Column12 %in% "Y")/(length(Column12)-spaces12) Y13<-sum(Column13 %in% "Y")/(length(Column13)-spaces13) Y14<-sum(Column14 %in% "Y")/(length(Column14)-spaces14) Y15<-sum(Column15 %in% "Y")/(length(Column15)-spaces15) YllYs<-cbind(Y1,Y2,Y3,Y4,Y5,Y6,Y7,Y8,Y9,Y10,Y11,Y12,Y13,Y14,Y15) } #this is substrate percents #A C D E F G H I K L N P Q R S T V W Y PercentTable<-rbind(AllAs,CllCs,DllDs,EllEs,FllFs,GllGs,HllHs,IllIs,KllKs,LllLs,MllMs,NllNs,PllPs,QllQs,RllRs,SllSs,TllTs,VllVs,WllWs,YllYs) PercentTable<-PercentTable*100 #create the SD table SDtable<-matrix(data = rep(1,times=(nrow(PercentTable)*ncol(PercentTable))),nrow = nrow(PercentTable),ncol = ncol(PercentTable)) #for every row, a percertage minus the same mean over the same SD if(1==1){ SDtable[1,]<-(PercentTable[1,]-Amean)/Asd SDtable[2,]<-(PercentTable[2,]-Cmean)/Csd SDtable[3,]<-(PercentTable[3,]-Dmean)/Dsd SDtable[4,]<-(PercentTable[4,]-Emean)/Esd SDtable[5,]<-(PercentTable[5,]-Fmean)/Fsd SDtable[6,]<-(PercentTable[6,]-Gmean)/Gsd SDtable[7,]<-(PercentTable[7,]-Hmean)/Hsd SDtable[8,]<-(PercentTable[8,]-Imean)/Isd SDtable[9,]<-(PercentTable[9,]-Kmean)/Ksd SDtable[10,]<-(PercentTable[10,]-Lmean)/Lsd SDtable[11,]<-(PercentTable[11,]-Mmean)/Msd SDtable[12,]<-(PercentTable[12,]-Nmean)/Nsd SDtable[13,]<-(PercentTable[13,]-Pmean)/Psd SDtable[14,]<-(PercentTable[14,]-Qmean)/Qsd SDtable[15,]<-(PercentTable[15,]-Rmean)/Rsd SDtable[16,]<-(PercentTable[16,]-Smean)/Ssd SDtable[17,]<-(PercentTable[17,]-Tmean)/Tsd SDtable[18,]<-(PercentTable[18,]-Vmean)/Vsd SDtable[19,]<-(PercentTable[19,]-Wmean)/Wsd SDtable[20,]<-(PercentTable[20,]-Ymean)/Ysd } SetOfAAs<-c("Letter","A","C","D","E","F","G","H","I","K","L","M","N","P","Q","R","S","T","V","W","Y") SumOfSigmaAAs<-c(1:15) for (i in 1:15){ SumOfSigmasValue<-0 for (j in 1:20){ value<-0 if (SDtable[j,i]>2){ value<-sum(substrates[,i]==SetOfAAs[j]) } SumOfSigmasValue<-SumOfSigmasValue+value } SumOfSigmaAAs[i]<-SumOfSigmasValue } # AAs1<-length(substrates[,1])-sum(substrates[,1]=="") # AAs2<-length(substrates[,2])-sum(substrates[,2]=="") # AAs3<-length(substrates[,3])-sum(substrates[,3]=="") # AAs4<-length(substrates[,4])-sum(substrates[,4]=="") # AAs5<-length(substrates[,5])-sum(substrates[,5]=="") # AAs6<-length(substrates[,6])-sum(substrates[,6]=="") # AAs7<-length(substrates[,7])-sum(substrates[,7]=="") # AAs8<-length(substrates[,8])-sum(substrates[,8]=="") # AAs9<-length(substrates[,9])-sum(substrates[,9]=="") # # # # #AAsAtPositions<-c(AAs1,AAs2,AAs3,AAs4,AAs5,AAs6,AAs7,AAs8,AAs9) # AAsAtPositions<-c(length(substrates[,1]),length(substrates[,2]),length(substrates[,3]),length(substrates[,4]), # length(substrates[,5]),length(substrates[,6]),length(substrates[,7]),length(substrates[,8]), # length(substrates[,9])) SumOfExpectedSigmaAAs<-c(1:15) for (i in 1:15){ ExpectedValue<-0 for (j in 1:20){ value<-0 if (SDtable[j,i]>2){ value<-AllMeans[j] } ExpectedValue<-ExpectedValue+value } SumOfExpectedSigmaAAs[i]<-ExpectedValue*(length(substrates[,i])-sum(substrates[,i]%in% ""))/100 } SelectivityRow<-SumOfSigmaAAs/SumOfExpectedSigmaAAs SelectivitySheet<-rbind(SumOfSigmaAAs,SumOfExpectedSigmaAAs,SelectivityRow) SetOfAAs<-matrix(data = SetOfAAs,ncol = 1) SDtableu<-SDtable HeaderSD<-c(-7:7) SDtable<-rbind(HeaderSD,SDtableu) row.names(SDtable)<-NULL SDtable<-data.frame(SetOfAAs,SDtable) PercentTable<-rbind(HeaderSD,PercentTable) row.names(PercentTable)<-NULL PercentTable<-data.frame(SetOfAAs,PercentTable) numberofY<-as.numeric(SubstrateBackgroundFrequency$Number.of.Y) numberofY<-numberofY[!is.na(numberofY)] numberofPY<-as.numeric(SubstrateBackgroundFrequency$Number.of.pY) numberofPY<-numberofPY[!is.na(numberofPY)] NormalizationScore<-sum(numberofPY)/sum(numberofY) # positions<-matrix(data = NA, nrow=20,ncol = 15) # # #column1 # # for (q in 1:15) { # sA<-sum(substrates[,i]=="A") # positions[1,i]<-sA # sC<-sum(substrates[,i]=="C") # positions[2,i]<-sC # sD<-sum(substrates[,i]=="D") # positions[3,i]<-sD # sE<-sum(substrates[,i]=="E") # positions[4,i]<-sE # sF<-sum(substrates[,i]=="F") # sG<-sum(substrates[,i]=="G") # sH<-sum(substrates[,i]=="H") # sI<-sum(substrates[,i]=="I") # sK<-sum(substrates[,i]=="K") # sL<-sum(substrates[,i]=="L") # sM<-sum(substrates[,i]=="M") # sN<-sum(substrates[,i]=="N") # sP<-sum(substrates[,i]=="P") # sQ<-sum(substrates[,i]=="Q") # sR<-sum(substrates[,i]=="R") # sS<-sum(substrates[,i]=="S") # sT<-sum(substrates[,i]=="T") # sV<-sum(substrates[,i]=="V") # sW<-sum(substrates[,i]=="W") # sY<-sum(substrates[,i]=="Y") # positions[5,i]<-sF # positions[6,i]<-sG # positions[7,i]<-sH # positions[8,i]<-sI # positions[9,i]<-sK # positions[10,i]<-sL # positions[11,i]<-sM # positions[12,i]<-sN # positions[13,i]<-sP # positions[14,i]<-sQ # positions[15,i]<-sR # positions[16,i]<-sS # positions[17,i]<-sT # positions[18,i]<-sV # positions[19,i]<-sW # positions[20,i]<-sY # } #here I create the positions table which is needed for the endogenous prob matrix and is simply "how many times did an AA show up in this spot? if (6==6){ Column1<-substrates[,1] Column2<-substrates[,2] Column3<-substrates[,3] Column4<-substrates[,4] Column5<-substrates[,5] Column6<-substrates[,6] Column7<-substrates[,7] Column8<-substrates[,8] Column9<-substrates[,9] Column10<-substrates[,10] Column11<-substrates[,11] Column12<-substrates[,12] Column13<-substrates[,13] Column14<-substrates[,14] Column15<-substrates[,15] spaces1<-sum((Column1%in% "")) spaces2<-sum(Column2%in% "") spaces3<-sum(Column3%in% "") spaces4<-sum(Column4%in% "") spaces5<-sum(Column5%in% "") spaces6<-sum(Column6%in% "") spaces7<-sum(Column7%in% "") spaces8<-sum(Column8%in% "") spaces9<-sum(Column9%in% "") spaces10<-sum(Column10%in% "") spaces11<-sum(Column11%in% "") spaces12<-sum(Column12%in% "") spaces13<-sum(Column13%in% "") spaces14<-sum(Column14%in% "") spaces15<-sum(Column15%in% "") A1<-sum(Column1 %in% "A") A2<-sum(Column2 %in% "A") A3<-sum(Column3 %in% "A") A4<-sum(Column4 %in% "A") A5<-sum(Column5 %in% "A") A6<-sum(Column6 %in% "A") A7<-sum(Column7 %in% "A") A8<-sum(Column8 %in% "A") A9<-sum(Column9 %in% "A") A10<-sum(Column10 %in% "A") A11<-sum(Column11 %in% "A") A12<-sum(Column12 %in% "A") A13<-sum(Column13 %in% "A") A14<-sum(Column14 %in% "A") A15<-sum(Column15 %in% "A") AllAs<-cbind(A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15) C1<-sum(Column1 %in% "C") C2<-sum(Column2 %in% "C") C3<-sum(Column3 %in% "C") C4<-sum(Column4 %in% "C") C5<-sum(Column5 %in% "C") C6<-sum(Column6 %in% "C") C7<-sum(Column7 %in% "C") C8<-sum(Column8 %in% "C") C9<-sum(Column9 %in% "C") C10<-sum(Column10 %in% "C") C11<-sum(Column11 %in% "C") C12<-sum(Column12 %in% "C") C13<-sum(Column13 %in% "C") C14<-sum(Column14 %in% "C") C15<-sum(Column15 %in% "C") CllCs<-cbind(C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15) D1<-sum(Column1 %in% "D") D2<-sum(Column2 %in% "D") D3<-sum(Column3 %in% "D") D4<-sum(Column4 %in% "D") D5<-sum(Column5 %in% "D") D6<-sum(Column6 %in% "D") D7<-sum(Column7 %in% "D") D8<-sum(Column8 %in% "D") D9<-sum(Column9 %in% "D") D10<-sum(Column10 %in% "D") D11<-sum(Column11 %in% "D") D12<-sum(Column12 %in% "D") D13<-sum(Column13 %in% "D") D14<-sum(Column14 %in% "D") D15<-sum(Column15 %in% "D") DllDs<-cbind(D1,D2,D3,D4,D5,D6,D7,D8,D9,D10,D11,D12,D13,D14,D15) E1<-sum(Column1 %in% "E") E2<-sum(Column2 %in% "E") E3<-sum(Column3 %in% "E") E4<-sum(Column4 %in% "E") E5<-sum(Column5 %in% "E") E6<-sum(Column6 %in% "E") E7<-sum(Column7 %in% "E") E8<-sum(Column8 %in% "E") E9<-sum(Column9 %in% "E") E10<-sum(Column10 %in% "E") E11<-sum(Column11 %in% "E") E12<-sum(Column12 %in% "E") E13<-sum(Column13 %in% "E") E14<-sum(Column14 %in% "E") E15<-sum(Column15 %in% "E") EllEs<-cbind(E1,E2,E3,E4,E5,E6,E7,E8,E9,E10,E11,E12,E13,E14,E15) F1<-sum(Column1 %in% "F") F2<-sum(Column2 %in% "F") F3<-sum(Column3 %in% "F") F4<-sum(Column4 %in% "F") F5<-sum(Column5 %in% "F") F6<-sum(Column6 %in% "F") F7<-sum(Column7 %in% "F") F8<-sum(Column8 %in% "F") F9<-sum(Column9 %in% "F") F10<-sum(Column10 %in% "F") F11<-sum(Column11 %in% "F") F12<-sum(Column12 %in% "F") F13<-sum(Column13 %in% "F") F14<-sum(Column14 %in% "F") F15<-sum(Column15 %in% "F") FllFs<-cbind(F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15) G1<-sum(Column1 %in% "G") G2<-sum(Column2 %in% "G") G3<-sum(Column3 %in% "G") G4<-sum(Column4 %in% "G") G5<-sum(Column5 %in% "G") G6<-sum(Column6 %in% "G") G7<-sum(Column7 %in% "G") G8<-sum(Column8 %in% "G") G9<-sum(Column9 %in% "G") G10<-sum(Column10 %in% "G") G11<-sum(Column11 %in% "G") G12<-sum(Column12 %in% "G") G13<-sum(Column13 %in% "G") G14<-sum(Column14 %in% "G") G15<-sum(Column15 %in% "G") GllGs<-cbind(G1,G2,G3,G4,G5,G6,G7,G8,G9,G10,G11,G12,G13,G14,G15) H1<-sum(Column1 %in% "H") H2<-sum(Column2 %in% "H") H3<-sum(Column3 %in% "H") H4<-sum(Column4 %in% "H") H5<-sum(Column5 %in% "H") H6<-sum(Column6 %in% "H") H7<-sum(Column7 %in% "H") H8<-sum(Column8 %in% "H") H9<-sum(Column9 %in% "H") H10<-sum(Column10 %in% "H") H11<-sum(Column11 %in% "H") H12<-sum(Column12 %in% "H") H13<-sum(Column13 %in% "H") H14<-sum(Column14 %in% "H") H15<-sum(Column15 %in% "H") HllHs<-cbind(H1,H2,H3,H4,H5,H6,H7,H8,H9,H10,H11,H12,H13,H14,H15) I1<-sum(Column1 %in% "I") I2<-sum(Column2 %in% "I") I3<-sum(Column3 %in% "I") I4<-sum(Column4 %in% "I") I5<-sum(Column5 %in% "I") I6<-sum(Column6 %in% "I") I7<-sum(Column7 %in% "I") I8<-sum(Column8 %in% "I") I9<-sum(Column9 %in% "I") I10<-sum(Column10 %in% "I") I11<-sum(Column11 %in% "I") I12<-sum(Column12 %in% "I") I13<-sum(Column13 %in% "I") I14<-sum(Column14 %in% "I") I15<-sum(Column15 %in% "I") IllIs<-cbind(I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,I11,I12,I13,I14,I15) K1<-sum(Column1 %in% "K") K2<-sum(Column2 %in% "K") K3<-sum(Column3 %in% "K") K4<-sum(Column4 %in% "K") K5<-sum(Column5 %in% "K") K6<-sum(Column6 %in% "K") K7<-sum(Column7 %in% "K") K8<-sum(Column8 %in% "K") K9<-sum(Column9 %in% "K") K10<-sum(Column10 %in% "K") K11<-sum(Column11 %in% "K") K12<-sum(Column12 %in% "K") K13<-sum(Column13 %in% "K") K14<-sum(Column14 %in% "K") K15<-sum(Column15 %in% "K") KllKs<-cbind(K1,K2,K3,K4,K5,K6,K7,K8,K9,K10,K11,K12,K13,K14,K15) L1<-sum(Column1 %in% "L") L2<-sum(Column2 %in% "L") L3<-sum(Column3 %in% "L") L4<-sum(Column4 %in% "L") L5<-sum(Column5 %in% "L") L6<-sum(Column6 %in% "L") L7<-sum(Column7 %in% "L") L8<-sum(Column8 %in% "L") L9<-sum(Column9 %in% "L") L10<-sum(Column10 %in% "L") L11<-sum(Column11 %in% "L") L12<-sum(Column12 %in% "L") L13<-sum(Column13 %in% "L") L14<-sum(Column14 %in% "L") L15<-sum(Column15 %in% "L") LllLs<-cbind(L1,L2,L3,L4,L5,L6,L7,L8,L9,L10,L11,L12,L13,L14,L15) M1<-sum(Column1 %in% "M") M2<-sum(Column2 %in% "M") M3<-sum(Column3 %in% "M") M4<-sum(Column4 %in% "M") M5<-sum(Column5 %in% "M") M6<-sum(Column6 %in% "M") M7<-sum(Column7 %in% "M") M8<-sum(Column8 %in% "M") M9<-sum(Column9 %in% "M") M10<-sum(Column10 %in% "M") M11<-sum(Column11 %in% "M") M12<-sum(Column12 %in% "M") M13<-sum(Column13 %in% "M") M14<-sum(Column14 %in% "M") M15<-sum(Column15 %in% "M") MllMs<-cbind(M1,M2,M3,M4,M5,M6,M7,M8,M9,M10,M11,M12,M13,M14,M15) N1<-sum(Column1 %in% "N") N2<-sum(Column2 %in% "N") N3<-sum(Column3 %in% "N") N4<-sum(Column4 %in% "N") N5<-sum(Column5 %in% "N") N6<-sum(Column6 %in% "N") N7<-sum(Column7 %in% "N") N8<-sum(Column8 %in% "N") N9<-sum(Column9 %in% "N") N10<-sum(Column10 %in% "N") N11<-sum(Column11 %in% "N") N12<-sum(Column12 %in% "N") N13<-sum(Column13 %in% "N") N14<-sum(Column14 %in% "N") N15<-sum(Column15 %in% "N") NllNs<-cbind(N1,N2,N3,N4,N5,N6,N7,N8,N9,N10,N11,N12,N13,N14,N15) P1<-sum(Column1 %in% "P") P2<-sum(Column2 %in% "P") P3<-sum(Column3 %in% "P") P4<-sum(Column4 %in% "P") P5<-sum(Column5 %in% "P") P6<-sum(Column6 %in% "P") P7<-sum(Column7 %in% "P") P8<-sum(Column8 %in% "P") P9<-sum(Column9 %in% "P") P10<-sum(Column10 %in% "P") P11<-sum(Column11 %in% "P") P12<-sum(Column12 %in% "P") P13<-sum(Column13 %in% "P") P14<-sum(Column14 %in% "P") P15<-sum(Column15 %in% "P") PllPs<-cbind(P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11,P12,P13,P14,P15) Q1<-sum(Column1 %in% "Q") Q2<-sum(Column2 %in% "Q") Q3<-sum(Column3 %in% "Q") Q4<-sum(Column4 %in% "Q") Q5<-sum(Column5 %in% "Q") Q6<-sum(Column6 %in% "Q") Q7<-sum(Column7 %in% "Q") Q8<-sum(Column8 %in% "Q") Q9<-sum(Column9 %in% "Q") Q10<-sum(Column10 %in% "Q") Q11<-sum(Column11 %in% "Q") Q12<-sum(Column12 %in% "Q") Q13<-sum(Column13 %in% "Q") Q14<-sum(Column14 %in% "Q") Q15<-sum(Column15 %in% "Q") QllQs<-cbind(Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,Q11,Q12,Q13,Q14,Q15) R1<-sum(Column1 %in% "R") R2<-sum(Column2 %in% "R") R3<-sum(Column3 %in% "R") R4<-sum(Column4 %in% "R") R5<-sum(Column5 %in% "R") R6<-sum(Column6 %in% "R") R7<-sum(Column7 %in% "R") R8<-sum(Column8 %in% "R") R9<-sum(Column9 %in% "R") R10<-sum(Column10 %in% "R") R11<-sum(Column11 %in% "R") R12<-sum(Column12 %in% "R") R13<-sum(Column13 %in% "R") R14<-sum(Column14 %in% "R") R15<-sum(Column15 %in% "R") RllRs<-cbind(R1,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,R13,R14,R15) S1<-sum(Column1 %in% "S") S2<-sum(Column2 %in% "S") S3<-sum(Column3 %in% "S") S4<-sum(Column4 %in% "S") S5<-sum(Column5 %in% "S") S6<-sum(Column6 %in% "S") S7<-sum(Column7 %in% "S") S8<-sum(Column8 %in% "S") S9<-sum(Column9 %in% "S") S10<-sum(Column10 %in% "S") S11<-sum(Column11 %in% "S") S12<-sum(Column12 %in% "S") S13<-sum(Column13 %in% "S") S14<-sum(Column14 %in% "S") S15<-sum(Column15 %in% "S") SllSs<-cbind(S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,S14,S15) T1<-sum(Column1 %in% "T") T2<-sum(Column2 %in% "T") T3<-sum(Column3 %in% "T") T4<-sum(Column4 %in% "T") T5<-sum(Column5 %in% "T") T6<-sum(Column6 %in% "T") T7<-sum(Column7 %in% "T") T8<-sum(Column8 %in% "T") T9<-sum(Column9 %in% "T") T10<-sum(Column10 %in% "T") T11<-sum(Column11 %in% "T") T12<-sum(Column12 %in% "T") T13<-sum(Column13 %in% "T") T14<-sum(Column14 %in% "T") T15<-sum(Column15 %in% "T") TllTs<-cbind(T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15) V1<-sum(Column1 %in% "V") V2<-sum(Column2 %in% "V") V3<-sum(Column3 %in% "V") V4<-sum(Column4 %in% "V") V5<-sum(Column5 %in% "V") V6<-sum(Column6 %in% "V") V7<-sum(Column7 %in% "V") V8<-sum(Column8 %in% "V") V9<-sum(Column9 %in% "V") V10<-sum(Column10 %in% "V") V11<-sum(Column11 %in% "V") V12<-sum(Column12 %in% "V") V13<-sum(Column13 %in% "V") V14<-sum(Column14 %in% "V") V15<-sum(Column15 %in% "V") VllVs<-cbind(V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15) W1<-sum(Column1 %in% "W") W2<-sum(Column2 %in% "W") W3<-sum(Column3 %in% "W") W4<-sum(Column4 %in% "W") W5<-sum(Column5 %in% "W") W6<-sum(Column6 %in% "W") W7<-sum(Column7 %in% "W") W8<-sum(Column8 %in% "W") W9<-sum(Column9 %in% "W") W10<-sum(Column10 %in% "W") W11<-sum(Column11 %in% "W") W12<-sum(Column12 %in% "W") W13<-sum(Column13 %in% "W") W14<-sum(Column14 %in% "W") W15<-sum(Column15 %in% "W") WllWs<-cbind(W1,W2,W3,W4,W5,W6,W7,W8,W9,W10,W11,W12,W13,W14,W15) Y1<-sum(Column1 %in% "Y") Y2<-sum(Column2 %in% "Y") Y3<-sum(Column3 %in% "Y") Y4<-sum(Column4 %in% "Y") Y5<-sum(Column5 %in% "Y") Y6<-sum(Column6 %in% "Y") Y7<-sum(Column7 %in% "Y") Y8<-sum(Column8 %in% "Y") Y9<-sum(Column9 %in% "Y") Y10<-sum(Column10 %in% "Y") Y11<-sum(Column11 %in% "Y") Y12<-sum(Column12 %in% "Y") Y13<-sum(Column13 %in% "Y") Y14<-sum(Column14 %in% "Y") Y15<-sum(Column15 %in% "Y") YllYs<-cbind(Y1,Y2,Y3,Y4,Y5,Y6,Y7,Y8,Y9,Y10,Y11,Y12,Y13,Y14,Y15) PositionTable<-rbind(AllAs,CllCs,DllDs,EllEs,FllFs,GllGs,HllHs,IllIs,KllKs,LllLs,MllMs,NllNs,PllPs,QllQs,RllRs,SllSs,TllTs,VllVs,WllWs,YllYs) } #endogenous prob matrix is AA position over subbackfreqmean dim(PositionTable) EPMtable<-PositionTable # EPMtable[1,]<-(PositionTable[1,]/(PositionTable[1,]*.01*Amean)) # EPMtable[2,]<-(PositionTable[2,]/(PositionTable[2,]*.01*Cmean)) # EPMtable[3,]<-(PositionTable[3,]/(PositionTable[3,]*.01*Dmean)) # EPMtable[4,]<-(PositionTable[4,]/(PositionTable[4,]*.01*Emean)) # EPMtable[5,]<-(PositionTable[5,]/(PositionTable[5,]*.01*Fmean)) # EPMtable[6,]<-(PositionTable[6,]/(PositionTable[6,]*.01*Gmean)) # EPMtable[7,]<-(PositionTable[7,]/(PositionTable[7,]*.01*Hmean)) # EPMtable[8,]<-(PositionTable[8,]/(PositionTable[8,]*.01*Imean)) # EPMtable[9,]<-(PositionTable[9,]/(PositionTable[9,]*.01*Kmean)) # EPMtable[10,]<-(PositionTable[10,]/(PositionTable[10,]*.01*Lmean)) # EPMtable[11,]<-(PositionTable[11,]/(PositionTable[11,]*.01*Mmean)) # EPMtable[12,]<-(PositionTable[12,]/(PositionTable[12,]*.01*Nmean)) # EPMtable[13,]<-(PositionTable[13,]/(PositionTable[13,]*.01*Pmean)) # EPMtable[14,]<-(PositionTable[14,]/(PositionTable[14,]*.01*Qmean)) # EPMtable[15,]<-(PositionTable[15,]/(PositionTable[15,]*.01*Rmean)) # EPMtable[16,]<-(PositionTable[16,]/(PositionTable[16,]*.01*Smean)) # EPMtable[17,]<-(PositionTable[17,]/(PositionTable[17,]*.01*Tmean)) # EPMtable[18,]<-(PositionTable[18,]/(PositionTable[18,]*.01*Vmean)) # EPMtable[19,]<-(PositionTable[19,]/(PositionTable[19,]*.01*Wmean)) # EPMtable[20,]<-(PositionTable[20,]/(PositionTable[20,]*.01*Ymean)) columns<-c(length(Column1)-sum(Column1==""), length(Column2)-sum(Column2==""), length(Column3)-sum(Column3==""), length(Column4)-sum(Column4==""), length(Column5)-sum(Column5==""), length(Column6)-sum(Column6==""), length(Column7)-sum(Column7==""), length(Column8)-sum(Column8==""), length(Column9)-sum(Column9==""), length(Column10)-sum(Column10==""), length(Column11)-sum(Column11==""), length(Column12)-sum(Column12==""), length(Column13)-sum(Column13==""), length(Column14)-sum(Column14==""), length(Column15)-sum(Column15=="")) for (z in 1:15) { for (y in 1:20) { if (PositionTable[y,z]>0){ EPMtable[y,z]<-PositionTable[y,z]/((columns[z]*.01*AllMeans[y])) } if (PositionTable[y,z]==0){ EPMtable[y,z]<-(1/columns[z])/((columns[z]*.01*AllMeans[y])) } } } #here I created the endogenous probability matrix #now all I need to do is make the program automatically determine which SDs are >2, and then make it perform screener and sorter on those SDs # write.xlsx(SDtable,file=FILENAME, sheetName = "Standard Deviation Table",col.names = FALSE,row.names = FALSE,append = TRUE) # write.xlsx(PercentTable,file = FILENAME,sheetName = "Percent Table",col.names = FALSE,row.names = FALSE,append = TRUE) # write.xlsx(SelectivitySheet,file = FILENAME,sheetName = "Site Selectivity",col.names = FALSE,row.names = FALSE,append = TRUE) # write.xlsx(EPMtable,file=FILENAME,sheetName = "Endogenous Probability Matrix",col.names = FALSE,row.names = FALSE,append = TRUE) # write.xlsx(NormalizationScore,file = FILENAME,sheetName = "Normalization Score",col.names = FALSE,row.names = FALSE,append = TRUE) NormalizationScore<-c("Normalization Score",NormalizationScore) write.table(x=c("SD Table"),file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE) write.table(SDtable,file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE) write.table(x=c("Percent Table"),file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE) write.table(PercentTable,file=FILENAME, append = TRUE,sep=",",row.names = FALSE, col.names = FALSE) EPMtableu<-EPMtable HeaderSD<-c(-7:7) EPMtableu<-rbind(HeaderSD,EPMtableu) row.names(EPMtableu)<-NULL EPMtableu<-data.frame(SetOfAAs,EPMtableu) write.table("Site Selectivity Matrix", file = FILENAME2, append = TRUE, sep = ",", row.names = FALSE, col.names = FALSE) SelectivityHeader=matrix(data = c("Position",-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7),nrow = 1) head<-matrix(data=rep(" ",times=16),nrow = 1) SelectivityHeader<-rbind(head,SelectivityHeader) write.table(SelectivityHeader, file = FILENAME2, append = TRUE, sep = ",", row.names = FALSE, col.names = FALSE) #colnames(SelectivitySheet)<-c("-7","-6","-5","-4","-3","-2","-1","0","1","2","3","4","5","6","7") write.table(SelectivitySheet,file = FILENAME2, append = TRUE,sep = ",",row.names = TRUE, col.names = FALSE) write.table(x=c("Endogenous Probability Matrix"),file=FILENAME2,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE) write.table(EPMtableu,file = FILENAME2, append = TRUE,sep = ",",row.names = FALSE, col.names = FALSE) write.table(NormalizationScore, file = FILENAME2, append = TRUE,sep = ",",row.names = FALSE, col.names = FALSE) #test myself: this script should take in amino acids for each of the 9 positions and give out every single combination of those AAs #need to do following: fix it so that the accession numbers stay with the substrates, #also the neg false constant is totaly unphos'd Ys found by FASTA-2-CSV system# uniprot #HOW MANY: IF THERE'S two aas in each position you get 2^9, so I assume the numbers are: #(number in position-4)*(number in position -3)*(number in position -2)...=total # require(rJava) # require(xlsxjars) # require(xlsx) # # require(readxl) #View(SDtable) bareSDs<-SDtable[2:21,2:16] goodones<-bareSDs>2 Positionm7<-which(goodones[,1] %in% TRUE) if (length(Positionm7)<1){Positionm7<-which(bareSDs[,1]==max(bareSDs[,1]))} Positionm6<-which(goodones[,2] %in% TRUE) if (length(Positionm6)<1){Positionm6<-which(bareSDs[,2]==max(bareSDs[,2]))} Positionm5<-which(goodones[,3] %in% TRUE) if (length(Positionm5)<1){Positionm5<-which(bareSDs[,3]==max(bareSDs[,3]))} Positionm4<-which(goodones[,4] %in% TRUE) if (length(Positionm4)<1){Positionm4<-which(bareSDs[,4]==max(bareSDs[,4]))} Positionm3<-which(goodones[,5] %in% TRUE) if (length(Positionm3)<1){Positionm3<-which(bareSDs[,5]==max(bareSDs[,5]))} Positionm2<-which(goodones[,6] %in% TRUE) if (length(Positionm2)<1){Positionm2<-which(bareSDs[,6]==max(bareSDs[,6]))} Positionm1<-which(goodones[,7] %in% TRUE) if (length(Positionm1)<1){Positionm1<-which(bareSDs[,7]==max(bareSDs[,7]))} Positiond0<-which(goodones[,8] %in% TRUE) if (length(Positiond0)<1){Positiond0<-which(bareSDs[,8]==max(bareSDs[,8]))} Positionp1<-which(goodones[,9] %in% TRUE) if (length(Positionp1)<1){Positionp1<-which(bareSDs[,9]==max(bareSDs[,9]))} Positionp2<-which(goodones[,10] %in% TRUE) if (length(Positionp2)<1){Positionp2<-which(bareSDs[,10]==max(bareSDs[,10]))} Positionp3<-which(goodones[,11] %in% TRUE) if (length(Positionp3)<1){Positionp3<-which(bareSDs[,11]==max(bareSDs[,11]))} Positionp4<-which(goodones[,12] %in% TRUE) if (length(Positionp4)<1){Positionp4<-which(bareSDs[,12]==max(bareSDs[,12]))} Positionp5<-which(goodones[,13] %in% TRUE) if (length(Positionp5)<1){Positionp5<-which(bareSDs[,13]==max(bareSDs[,13]))} Positionp6<-which(goodones[,14] %in% TRUE) if (length(Positionp6)<1){Positionp6<-which(bareSDs[,14]==max(bareSDs[,14]))} Positionp7<-which(goodones[,15] %in% TRUE) if (length(Positionp7)<1){Positionp7<-which(bareSDs[,15]==max(bareSDs[,15]))} aa_props2 <- c("1"="A", "2"="C", "3"="D", "4"="E", "5"="F", "6"="G", "7"="H", "8"="I", "9"="K", "10"="L", "11"="M", "12"="N", "13"="P", "14"="Q", "15"="R", "16"="S", "17"="T", "18"="V", "19"="W", "20"="Y") Positionm7<-sapply(Positionm7, function (x) aa_props2[x]) Positionm6<-sapply(Positionm6, function (x) aa_props2[x]) Positionm5<-sapply(Positionm5, function (x) aa_props2[x]) Positionm4<-sapply(Positionm4, function (x) aa_props2[x]) Positionm3<-sapply(Positionm3, function (x) aa_props2[x]) Positionm2<-sapply(Positionm2, function (x) aa_props2[x]) Positionm1<-sapply(Positionm1, function (x) aa_props2[x]) Positiond0<-sapply(Positiond0, function (x) aa_props2[x]) Positionp1<-sapply(Positionp1, function (x) aa_props2[x]) Positionp2<-sapply(Positionp2, function (x) aa_props2[x]) Positionp3<-sapply(Positionp3, function (x) aa_props2[x]) Positionp4<-sapply(Positionp4, function (x) aa_props2[x]) Positionp5<-sapply(Positionp5, function (x) aa_props2[x]) Positionp6<-sapply(Positionp6, function (x) aa_props2[x]) Positionp7<-sapply(Positionp7, function (x) aa_props2[x]) # Positionm7<-c("D","H","N","V") # Positionm6<-c("E","V") # Positionm5<-c("D","H") # Positionm4<-c("D","N") # Positionm3<-c("D","E","F","Q") # Positionm2<-c("D","N","Q","S") # Positionm1<-c("F","I","L") # Positiond0<-c("Y") # Positionp1<-c("A","E") # Positionp2<-c("T","S","Q","E") # Positionp3<-c("V") # Positionp4<-c("K") # Positionp5<-c("K") # Positionp6<-c("K") # Positionp7<-c("R") #this is where the amino acids for each position are given. m means minus, p mean plus ######################################## # ScreenerFilename<-"C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls" screaner<-read.csv(ScreenerFilename, header = FALSE, stringsAsFactors = FALSE) Abl<-screaner[2:25,] Arg<-screaner[27:50,] Btk<-screaner[52:75,] Csk<-screaner[77:100,] Fyn<-screaner[102:125,] Hck<-screaner[127:150,] JAK2<-screaner[152:175,] Lck<-screaner[177:200,] Lyn<-screaner[202:225,] Pyk2<-screaner[227:250,] Src<-screaner[252:275,] Syk<-screaner[277:300,] Yes<-screaner[302:325,] #two questions: why are we doing BTK when we already have a bioninformatics page about it? #two I reran everything and only get 96 positions of interest in the SD table #Do_You_want_An_Excel_Output_Questionmark<-"NO" GeneratedPeptidesFile<-"GeneratedPeptidesFile.csv" # Abl<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 4) # Arg<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 5) # Btk<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 6) # Csk<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 7) # Fyn<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 8) # Hck<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 9) # JAK2<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 10) # Lck<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 11) # Lyn<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 12) # Pyk2<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 13) # Src<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 14) # Syk<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 15) # Yes<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 16) # "A"=1 "C"=2 "D"=3 "E"=4 "F"=5 "G"=6 "H"=7 "I"=8 "K"=9 "L"=10 "M"=11 "N"=12 "P"=13 "Q"=14 "R"=15 "S"=16 "T"=17 "V"=18 "W"=19 "Y"=20 aa_props <- c("A"=A, "C"=C, "D"=D, "E"=E, "F"=F,"G"=G,"H"=H,"I"=I,"K"=K,"L"=L,"M"=M,"N"=N,"P"=P,"Q"=Q,"R"=R, "S"=S,"T"=T,"V"=V,"W"=W,"Y"=Y,"xY"=Y,"O"=21) number15<-sapply(Positionm7, function (x) aa_props[x]) number14<-sapply(Positionm6, function (x) aa_props[x]) number13<-sapply(Positionm5, function (x) aa_props[x]) number1 <- sapply(Positionm4, function (x) aa_props[x]) number2 <- sapply(Positionm3, function (x) aa_props[x]) number3 <- sapply(Positionm2, function (x) aa_props[x]) number4 <- sapply(Positionm1, function (x) aa_props[x]) number5 <- sapply(Positiond0, function (x) aa_props[x]) number6 <- sapply(Positionp1, function (x) aa_props[x]) number7 <- sapply(Positionp2, function (x) aa_props[x]) number8 <- sapply(Positionp3, function (x) aa_props[x]) number9 <- sapply(Positionp4, function (x) aa_props[x]) number10<-sapply(Positionp5, function (x) aa_props[x]) number11<-sapply(Positionp6, function (x) aa_props[x]) number12<-sapply(Positionp7, function (x) aa_props[x]) # number1<-Positionm4 # number2<-Positionm3 # number3<-Positionm2 # number4<-Positionm1 # number5<-Positiond0 # number6<-Positionp1 # number7<-Positionp2 # number8<-Positionp3 # number9<-Positionp4 ############################# #here I create the Abl seqs with proper value for each number if (1==0){ Ablnumber1<- gsub("A",A,Ablnumber1,perl = TRUE) Ablnumber1<- gsub("C",C,Ablnumber1,perl = TRUE) Ablnumber1<- gsub("D",D,Ablnumber1,perl = TRUE) Ablnumber1<- gsub("E",E,Ablnumber1,perl = TRUE) Ablnumber1<- gsub("F",F,Ablnumber1,perl = TRUE) Ablnumber1<- gsub("G",G,Ablnumber1,perl = TRUE) Ablnumber1<- gsub("H",H,Ablnumber1,perl = TRUE) Ablnumber1<- gsub("I",I,Ablnumber1,perl = TRUE) Ablnumber1<- gsub("K",K,Ablnumber1,perl = TRUE) Ablnumber1<- gsub("L",L,Ablnumber1,perl = TRUE) Ablnumber1<- gsub("M",M,Ablnumber1,perl = TRUE) Ablnumber1<- gsub("N",N,Ablnumber1,perl = TRUE) Ablnumber1<- gsub("P",P,Ablnumber1,perl = TRUE) Ablnumber1<- gsub("Q",Q,Ablnumber1,perl = TRUE) Ablnumber1<- gsub("R",R,Ablnumber1,perl = TRUE) Ablnumber1<- gsub("S",S,Ablnumber1,perl = TRUE) Ablnumber1<- gsub("T",T,Ablnumber1,perl = TRUE) Ablnumber1<- gsub("V",V,Ablnumber1,perl = TRUE) Ablnumber1<- gsub("W",W,Ablnumber1,perl = TRUE) Ablnumber1<- gsub("Y",Y,Ablnumber1,perl = TRUE) Ablnumber2<- gsub("A",A,Ablnumber2,perl = TRUE) Ablnumber2<- gsub("C",C,Ablnumber2,perl = TRUE) Ablnumber2<- gsub("D",D,Ablnumber2,perl = TRUE) Ablnumber2<- gsub("E",E,Ablnumber2,perl = TRUE) Ablnumber2<- gsub("F",F,Ablnumber2,perl = TRUE) Ablnumber2<- gsub("G",G,Ablnumber2,perl = TRUE) Ablnumber2<- gsub("H",H,Ablnumber2,perl = TRUE) Ablnumber2<- gsub("I",I,Ablnumber2,perl = TRUE) Ablnumber2<- gsub("K",K,Ablnumber2,perl = TRUE) Ablnumber2<- gsub("L",L,Ablnumber2,perl = TRUE) Ablnumber2<- gsub("M",M,Ablnumber2,perl = TRUE) Ablnumber2<- gsub("N",N,Ablnumber2,perl = TRUE) Ablnumber2<- gsub("P",P,Ablnumber2,perl = TRUE) Ablnumber2<- gsub("Q",Q,Ablnumber2,perl = TRUE) Ablnumber2<- gsub("R",R,Ablnumber2,perl = TRUE) Ablnumber2<- gsub("S",S,Ablnumber2,perl = TRUE) Ablnumber2<- gsub("T",T,Ablnumber2,perl = TRUE) Ablnumber2<- gsub("V",V,Ablnumber2,perl = TRUE) Ablnumber2<- gsub("W",W,Ablnumber2,perl = TRUE) Ablnumber2<- gsub("Y",Y,Ablnumber2,perl = TRUE) Ablnumber3<- gsub("A",A,Ablnumber3,perl = TRUE) Ablnumber3<- gsub("C",C,Ablnumber3,perl = TRUE) Ablnumber3<- gsub("D",D,Ablnumber3,perl = TRUE) Ablnumber3<- gsub("E",E,Ablnumber3,perl = TRUE) Ablnumber3<- gsub("F",F,Ablnumber3,perl = TRUE) Ablnumber3<- gsub("G",G,Ablnumber3,perl = TRUE) Ablnumber3<- gsub("H",H,Ablnumber3,perl = TRUE) Ablnumber3<- gsub("I",I,Ablnumber3,perl = TRUE) Ablnumber3<- gsub("K",K,Ablnumber3,perl = TRUE) Ablnumber3<- gsub("L",L,Ablnumber3,perl = TRUE) Ablnumber3<- gsub("M",M,Ablnumber3,perl = TRUE) Ablnumber3<- gsub("N",N,Ablnumber3,perl = TRUE) Ablnumber3<- gsub("P",P,Ablnumber3,perl = TRUE) Ablnumber3<- gsub("Q",Q,Ablnumber3,perl = TRUE) Ablnumber3<- gsub("R",R,Ablnumber3,perl = TRUE) Ablnumber3<- gsub("S",S,Ablnumber3,perl = TRUE) Ablnumber3<- gsub("T",T,Ablnumber3,perl = TRUE) Ablnumber3<- gsub("V",V,Ablnumber3,perl = TRUE) Ablnumber3<- gsub("W",W,Ablnumber3,perl = TRUE) Ablnumber3<- gsub("Y",Y,Ablnumber3,perl = TRUE) Ablnumber4<- gsub("A",A,Ablnumber4,perl = TRUE) Ablnumber4<- gsub("C",C,Ablnumber4,perl = TRUE) Ablnumber4<- gsub("D",D,Ablnumber4,perl = TRUE) Ablnumber4<- gsub("E",E,Ablnumber4,perl = TRUE) Ablnumber4<- gsub("F",F,Ablnumber4,perl = TRUE) Ablnumber4<- gsub("G",G,Ablnumber4,perl = TRUE) Ablnumber4<- gsub("H",H,Ablnumber4,perl = TRUE) Ablnumber4<- gsub("I",I,Ablnumber4,perl = TRUE) Ablnumber4<- gsub("K",K,Ablnumber4,perl = TRUE) Ablnumber4<- gsub("L",L,Ablnumber4,perl = TRUE) Ablnumber4<- gsub("M",M,Ablnumber4,perl = TRUE) Ablnumber4<- gsub("N",N,Ablnumber4,perl = TRUE) Ablnumber4<- gsub("P",P,Ablnumber4,perl = TRUE) Ablnumber4<- gsub("Q",Q,Ablnumber4,perl = TRUE) Ablnumber4<- gsub("R",R,Ablnumber4,perl = TRUE) Ablnumber4<- gsub("S",S,Ablnumber4,perl = TRUE) Ablnumber4<- gsub("T",T,Ablnumber4,perl = TRUE) Ablnumber4<- gsub("V",V,Ablnumber4,perl = TRUE) Ablnumber4<- gsub("W",W,Ablnumber4,perl = TRUE) Ablnumber4<- gsub("Y",Y,Ablnumber4,perl = TRUE) Ablnumber5<- gsub("A",A,Ablnumber5,perl = TRUE) Ablnumber5<- gsub("C",C,Ablnumber5,perl = TRUE) Ablnumber5<- gsub("D",D,Ablnumber5,perl = TRUE) Ablnumber5<- gsub("E",E,Ablnumber5,perl = TRUE) Ablnumber5<- gsub("F",F,Ablnumber5,perl = TRUE) Ablnumber5<- gsub("G",G,Ablnumber5,perl = TRUE) Ablnumber5<- gsub("H",H,Ablnumber5,perl = TRUE) Ablnumber5<- gsub("I",I,Ablnumber5,perl = TRUE) Ablnumber5<- gsub("K",K,Ablnumber5,perl = TRUE) Ablnumber5<- gsub("L",L,Ablnumber5,perl = TRUE) Ablnumber5<- gsub("M",M,Ablnumber5,perl = TRUE) Ablnumber5<- gsub("N",N,Ablnumber5,perl = TRUE) Ablnumber5<- gsub("P",P,Ablnumber5,perl = TRUE) Ablnumber5<- gsub("Q",Q,Ablnumber5,perl = TRUE) Ablnumber5<- gsub("R",R,Ablnumber5,perl = TRUE) Ablnumber5<- gsub("S",S,Ablnumber5,perl = TRUE) Ablnumber5<- gsub("T",T,Ablnumber5,perl = TRUE) Ablnumber5<- gsub("V",V,Ablnumber5,perl = TRUE) Ablnumber5<- gsub("W",W,Ablnumber5,perl = TRUE) Ablnumber5<- gsub("Y",Y,Ablnumber5,perl = TRUE) Ablnumber6<- gsub("A",A,Ablnumber6,perl = TRUE) Ablnumber6<- gsub("C",C,Ablnumber6,perl = TRUE) Ablnumber6<- gsub("D",D,Ablnumber6,perl = TRUE) Ablnumber6<- gsub("E",E,Ablnumber6,perl = TRUE) Ablnumber6<- gsub("F",F,Ablnumber6,perl = TRUE) Ablnumber6<- gsub("G",G,Ablnumber6,perl = TRUE) Ablnumber6<- gsub("H",H,Ablnumber6,perl = TRUE) Ablnumber6<- gsub("I",I,Ablnumber6,perl = TRUE) Ablnumber6<- gsub("K",K,Ablnumber6,perl = TRUE) Ablnumber6<- gsub("L",L,Ablnumber6,perl = TRUE) Ablnumber6<- gsub("M",M,Ablnumber6,perl = TRUE) Ablnumber6<- gsub("N",N,Ablnumber6,perl = TRUE) Ablnumber6<- gsub("P",P,Ablnumber6,perl = TRUE) Ablnumber6<- gsub("Q",Q,Ablnumber6,perl = TRUE) Ablnumber6<- gsub("R",R,Ablnumber6,perl = TRUE) Ablnumber6<- gsub("S",S,Ablnumber6,perl = TRUE) Ablnumber6<- gsub("T",T,Ablnumber6,perl = TRUE) Ablnumber6<- gsub("V",V,Ablnumber6,perl = TRUE) Ablnumber6<- gsub("W",W,Ablnumber6,perl = TRUE) Ablnumber6<- gsub("Y",Y,Ablnumber6,perl = TRUE) Ablnumber7<- gsub("A",A,Ablnumber7,perl = TRUE) Ablnumber7<- gsub("C",C,Ablnumber7,perl = TRUE) Ablnumber7<- gsub("D",D,Ablnumber7,perl = TRUE) Ablnumber7<- gsub("E",E,Ablnumber7,perl = TRUE) Ablnumber7<- gsub("F",F,Ablnumber7,perl = TRUE) Ablnumber7<- gsub("G",G,Ablnumber7,perl = TRUE) Ablnumber7<- gsub("H",H,Ablnumber7,perl = TRUE) Ablnumber7<- gsub("I",I,Ablnumber7,perl = TRUE) Ablnumber7<- gsub("K",K,Ablnumber7,perl = TRUE) Ablnumber7<- gsub("L",L,Ablnumber7,perl = TRUE) Ablnumber7<- gsub("M",M,Ablnumber7,perl = TRUE) Ablnumber7<- gsub("N",N,Ablnumber7,perl = TRUE) Ablnumber7<- gsub("P",P,Ablnumber7,perl = TRUE) Ablnumber7<- gsub("Q",Q,Ablnumber7,perl = TRUE) Ablnumber7<- gsub("R",R,Ablnumber7,perl = TRUE) Ablnumber7<- gsub("S",S,Ablnumber7,perl = TRUE) Ablnumber7<- gsub("T",T,Ablnumber7,perl = TRUE) Ablnumber7<- gsub("V",V,Ablnumber7,perl = TRUE) Ablnumber7<- gsub("W",W,Ablnumber7,perl = TRUE) Ablnumber7<- gsub("Y",Y,Ablnumber7,perl = TRUE) Ablnumber8<- gsub("A",A,Ablnumber8,perl = TRUE) Ablnumber8<- gsub("C",C,Ablnumber8,perl = TRUE) Ablnumber8<- gsub("D",D,Ablnumber8,perl = TRUE) Ablnumber8<- gsub("E",E,Ablnumber8,perl = TRUE) Ablnumber8<- gsub("F",F,Ablnumber8,perl = TRUE) Ablnumber8<- gsub("G",G,Ablnumber8,perl = TRUE) Ablnumber8<- gsub("H",H,Ablnumber8,perl = TRUE) Ablnumber8<- gsub("I",I,Ablnumber8,perl = TRUE) Ablnumber8<- gsub("K",K,Ablnumber8,perl = TRUE) Ablnumber8<- gsub("L",L,Ablnumber8,perl = TRUE) Ablnumber8<- gsub("M",M,Ablnumber8,perl = TRUE) Ablnumber8<- gsub("N",N,Ablnumber8,perl = TRUE) Ablnumber8<- gsub("P",P,Ablnumber8,perl = TRUE) Ablnumber8<- gsub("Q",Q,Ablnumber8,perl = TRUE) Ablnumber8<- gsub("R",R,Ablnumber8,perl = TRUE) Ablnumber8<- gsub("S",S,Ablnumber8,perl = TRUE) Ablnumber8<- gsub("T",T,Ablnumber8,perl = TRUE) Ablnumber8<- gsub("V",V,Ablnumber8,perl = TRUE) Ablnumber8<- gsub("W",W,Ablnumber8,perl = TRUE) Ablnumber8<- gsub("Y",Y,Ablnumber8,perl = TRUE) Ablnumber9<- gsub("A",A,Ablnumber9,perl = TRUE) Ablnumber9<- gsub("C",C,Ablnumber9,perl = TRUE) Ablnumber9<- gsub("D",D,Ablnumber9,perl = TRUE) Ablnumber9<- gsub("E",E,Ablnumber9,perl = TRUE) Ablnumber9<- gsub("F",F,Ablnumber9,perl = TRUE) Ablnumber9<- gsub("G",G,Ablnumber9,perl = TRUE) Ablnumber9<- gsub("H",H,Ablnumber9,perl = TRUE) Ablnumber9<- gsub("I",I,Ablnumber9,perl = TRUE) Ablnumber9<- gsub("K",K,Ablnumber9,perl = TRUE) Ablnumber9<- gsub("L",L,Ablnumber9,perl = TRUE) Ablnumber9<- gsub("M",M,Ablnumber9,perl = TRUE) Ablnumber9<- gsub("N",N,Ablnumber9,perl = TRUE) Ablnumber9<- gsub("P",P,Ablnumber9,perl = TRUE) Ablnumber9<- gsub("Q",Q,Ablnumber9,perl = TRUE) Ablnumber9<- gsub("R",R,Ablnumber9,perl = TRUE) Ablnumber9<- gsub("S",S,Ablnumber9,perl = TRUE) Ablnumber9<- gsub("T",T,Ablnumber9,perl = TRUE) Ablnumber9<- gsub("V",V,Ablnumber9,perl = TRUE) Ablnumber9<- gsub("W",W,Ablnumber9,perl = TRUE) Ablnumber9<- gsub("Y",Y,Ablnumber9,perl = TRUE) } ######################################## total=length(Positionp7)*length(Positionp6)*length(Positionp5)*length(Positionp4)*length(Positionp3)*(length(Positionp2))*length(Positionp1)* length(Positiond0)*length(Positionm1)*length(Positionm2)*length(Positionm3)*length(Positionm4)*length(Positionm5)*length(Positionm6)*length(Positionm7) #this is just a way to doublecheck that the length of the generated peptides vector is correct GeneratedPeptides<-rep(NA, times=total*15) GeneratedPeptides<-matrix(data = GeneratedPeptides,ncol = 15) NumeratedPeptides<-GeneratedPeptides #create an empty vector of correct length by finding the number of each AAs per position and multiplying them count<-0 for (t in 1:length(Positionm7)) { for (s in 1:length(Positionm6)) { for (r in 1:length(Positionm5)) { for (i in 1:length(Positionm4)) { for (j in 1:length(Positionm3)) { for (k in 1:length(Positionm2)) { for (l in 1:length(Positionm1)) { for (m in 1:length(Positiond0)) { for (n in 1:length(Positionp1)) { for (o in 1:length(Positionp2)) { for (p in 1:length(Positionp3)) { for (q in 1:length(Positionp4)) { for (u in 1:length(Positionp5)) { for (v in 1:length(Positionp6)) { for (w in 1:length(Positionp7)) { # i=1 # j=1 # k=1 # l=1 # m=1 # n=1 # o=1 # p=1 # q=1 # #for every single position, increment the count number, create a peptide using the AAs at that position #then put them together into the generated peptides sequencex count<-count+1 tabulation<-c(Positionm7[t],Positionm6[s],Positionm5[r],Positionm4[i],Positionm3[j],Positionm2[k],Positionm1[l],Positiond0[m],Positionp1[n], Positionp2[o],Positionp3[p],Positionp4[q],Positionp5[u],Positionp6[v],Positionp7[w]) numeration<-c(number15[t],number14[s],number13[r],number1[i],number2[j],number3[k],number4[l],number5[m],number6[n],number7[o],number8[p],number9[q],number10[u],number11[v], number12[w]) #tabulation<-paste(tabulation, sep="", collapse="") GeneratedPeptides[count,1:15]<-tabulation NumeratedPeptides[count,1:15]<-numeration } } } } } } } } } } } } } } } #################################################################### #now here I use the Endogenous Probabilty matrix from the previous script, which is called EMPtable #to score the created peptides ThisKinTable<-EPMtableu#[1:nrow(SDtable),] TKTcolumn<-c(data=rep(1,times=21)) TKTcolumn<-as.matrix(TKTcolumn,ncol=1) ThisKinTable<-cbind(TKTcolumn,ThisKinTable) ThisKinGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) ThisKinGenWeirdScore<-rep(NA,times=nrow(GeneratedPeptides)) for (x in 1:nrow(GeneratedPeptides)){ Scoringpeptide<-NumeratedPeptides[x,1:15] Scoringpeptide<-Scoringpeptide+1 ThisKinTableScore<-as.numeric(ThisKinTable[Scoringpeptide[1],3])*ThisKinTable[as.numeric(Scoringpeptide[2]),4]*ThisKinTable[as.numeric(Scoringpeptide[3]),5]* ThisKinTable[as.numeric(Scoringpeptide[4]),6]*ThisKinTable[as.numeric(Scoringpeptide[5]),7]*ThisKinTable[as.numeric(Scoringpeptide[6]),8]*ThisKinTable[as.numeric(Scoringpeptide[7]),9]* #ThisKinTable[as.numeric(Scoringpeptide[8]),10]* ThisKinTable[as.numeric(Scoringpeptide[9]),11]*ThisKinTable[as.numeric(Scoringpeptide[10]),12]*ThisKinTable[as.numeric(Scoringpeptide[11]),13]* ThisKinTable[as.numeric(Scoringpeptide[12]),14]*ThisKinTable[as.numeric(Scoringpeptide[13]),15]*ThisKinTable[as.numeric(Scoringpeptide[14]),16]*ThisKinTable[as.numeric(Scoringpeptide[15]),17] ThisKinGeneratedScores[x]<-ThisKinTableScore ThisKinTableScore<-(ThisKinTableScore/(ThisKinTableScore+1/as.numeric(NormalizationScore[2]))) ThisKinGenWeirdScore[x]<-ThisKinTableScore } AblGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) ArgGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) BtkGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) CskGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) FynGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) HckGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) JAK2GeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) LckGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) LynGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) Pyk2GeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) SrcGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) SykGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) YesGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) for (x in 1:nrow(GeneratedPeptides)){ Scoringpeptide<-NumeratedPeptides[x,1:15] AblScore<-Abl[Scoringpeptide[1],2]*Abl[Scoringpeptide[2],3]*Abl[Scoringpeptide[3],4]*Abl[Scoringpeptide[4],5]*Abl[Scoringpeptide[5],6]*Abl[Scoringpeptide[6],7]* Abl[Scoringpeptide[7],8]*Abl[Scoringpeptide[9],10]*Abl[Scoringpeptide[10],11]*Abl[Scoringpeptide[11],12]*Abl[Scoringpeptide[12],13]* Abl[Scoringpeptide[13],14]*Abl[Scoringpeptide[14],15]*Abl[Scoringpeptide[15],16] AblGeneratedScores[x]<-AblScore ArgScore<-Arg[Scoringpeptide[1],2]*Arg[Scoringpeptide[2],3]*Arg[Scoringpeptide[3],4]*Arg[Scoringpeptide[4],5]*Arg[Scoringpeptide[5],6]*Arg[Scoringpeptide[6],7]* Arg[Scoringpeptide[7],8]*Arg[Scoringpeptide[9],10]*Arg[Scoringpeptide[10],11]*Arg[Scoringpeptide[11],12]*Arg[Scoringpeptide[12],13]* Arg[Scoringpeptide[13],14]*Arg[Scoringpeptide[14],15]*Arg[Scoringpeptide[15],16] ArgGeneratedScores[x]<-ArgScore BtkScore<-Btk[Scoringpeptide[1],2]*Btk[Scoringpeptide[2],3]*Btk[Scoringpeptide[3],4]*Btk[Scoringpeptide[4],5]*Btk[Scoringpeptide[5],6]*Btk[Scoringpeptide[6],7]* Btk[Scoringpeptide[7],8]*Btk[Scoringpeptide[9],10]*Btk[Scoringpeptide[10],11]*Btk[Scoringpeptide[11],12]*Btk[Scoringpeptide[12],13]* Btk[Scoringpeptide[13],14]*Btk[Scoringpeptide[14],15]*Btk[Scoringpeptide[15],16] BtkGeneratedScores[x]<-BtkScore CskScore<-Csk[Scoringpeptide[1],2]*Csk[Scoringpeptide[2],3]*Csk[Scoringpeptide[3],4]*Csk[Scoringpeptide[4],5]*Csk[Scoringpeptide[5],6]*Csk[Scoringpeptide[6],7]* Csk[Scoringpeptide[7],8]*Csk[Scoringpeptide[9],10]*Csk[Scoringpeptide[10],11]*Csk[Scoringpeptide[11],12]*Csk[Scoringpeptide[12],13]* Csk[Scoringpeptide[13],14]*Csk[Scoringpeptide[14],15]*Csk[Scoringpeptide[15],16] CskGeneratedScores[x]<-CskScore FynScore<-Fyn[Scoringpeptide[1],2]*Fyn[Scoringpeptide[2],3]*Fyn[Scoringpeptide[3],4]*Fyn[Scoringpeptide[4],5]*Fyn[Scoringpeptide[5],6]*Fyn[Scoringpeptide[6],7]* Fyn[Scoringpeptide[7],8]*Fyn[Scoringpeptide[9],10]*Fyn[Scoringpeptide[10],11]*Fyn[Scoringpeptide[11],12]*Fyn[Scoringpeptide[12],13]* Fyn[Scoringpeptide[13],14]*Fyn[Scoringpeptide[14],15]*Fyn[Scoringpeptide[15],16] FynGeneratedScores[x]<-FynScore HckScore<-Hck[Scoringpeptide[1],2]*Hck[Scoringpeptide[2],3]*Hck[Scoringpeptide[3],4]*Hck[Scoringpeptide[4],5]*Hck[Scoringpeptide[5],6]*Hck[Scoringpeptide[6],7]* Hck[Scoringpeptide[7],8]*Hck[Scoringpeptide[9],10]*Hck[Scoringpeptide[10],11]*Hck[Scoringpeptide[11],12]*Hck[Scoringpeptide[12],13]* Hck[Scoringpeptide[13],14]*Hck[Scoringpeptide[14],15]*Hck[Scoringpeptide[15],16] HckGeneratedScores[x]<-HckScore JAK2Score<-JAK2[Scoringpeptide[1],2]*JAK2[Scoringpeptide[2],3]*JAK2[Scoringpeptide[3],4]*JAK2[Scoringpeptide[4],5]*JAK2[Scoringpeptide[5],6]*JAK2[Scoringpeptide[6],7]* JAK2[Scoringpeptide[7],8]*JAK2[Scoringpeptide[9],10]*JAK2[Scoringpeptide[10],11]*JAK2[Scoringpeptide[11],12]*JAK2[Scoringpeptide[12],13]* JAK2[Scoringpeptide[13],14]*JAK2[Scoringpeptide[14],15]*JAK2[Scoringpeptide[15],16] JAK2GeneratedScores[x]<-JAK2Score LckScore<-Lck[Scoringpeptide[1],2]*Lck[Scoringpeptide[2],3]*Lck[Scoringpeptide[3],4]*Lck[Scoringpeptide[4],5]*Lck[Scoringpeptide[5],6]*Lck[Scoringpeptide[6],7]* Lck[Scoringpeptide[7],8]*Lck[Scoringpeptide[9],10]*Lck[Scoringpeptide[10],11]*Lck[Scoringpeptide[11],12]*Lck[Scoringpeptide[12],13]* Lck[Scoringpeptide[13],14]*Lck[Scoringpeptide[14],15]*Lck[Scoringpeptide[15],16] LckGeneratedScores[x]<-LckScore LynScore<-Lyn[Scoringpeptide[1],2]*Lyn[Scoringpeptide[2],3]*Lyn[Scoringpeptide[3],4]*Lyn[Scoringpeptide[4],5]*Lyn[Scoringpeptide[5],6]*Lyn[Scoringpeptide[6],7]* Lyn[Scoringpeptide[7],8]*Lyn[Scoringpeptide[9],10]*Lyn[Scoringpeptide[10],11]*Lyn[Scoringpeptide[11],12]*Lyn[Scoringpeptide[12],13]* Lyn[Scoringpeptide[13],14]*Lyn[Scoringpeptide[14],15]*Lyn[Scoringpeptide[15],16] LynGeneratedScores[x]<-LynScore Pyk2Score<-Pyk2[Scoringpeptide[1],2]*Pyk2[Scoringpeptide[2],3]*Pyk2[Scoringpeptide[3],4]*Pyk2[Scoringpeptide[4],5]*Pyk2[Scoringpeptide[5],6]*Pyk2[Scoringpeptide[6],7]* Pyk2[Scoringpeptide[7],8]*Pyk2[Scoringpeptide[9],10]*Pyk2[Scoringpeptide[10],11]*Pyk2[Scoringpeptide[11],12]*Pyk2[Scoringpeptide[12],13]* Pyk2[Scoringpeptide[13],14]*Pyk2[Scoringpeptide[14],15]*Pyk2[Scoringpeptide[15],16] Pyk2GeneratedScores[x]<-Pyk2Score SrcScore<-Src[Scoringpeptide[1],2]*Src[Scoringpeptide[2],3]*Src[Scoringpeptide[3],4]*Src[Scoringpeptide[4],5]*Src[Scoringpeptide[5],6]*Src[Scoringpeptide[6],7]* Src[Scoringpeptide[7],8]*Src[Scoringpeptide[9],10]*Src[Scoringpeptide[10],11]*Src[Scoringpeptide[11],12]*Src[Scoringpeptide[12],13]* Src[Scoringpeptide[13],14]*Src[Scoringpeptide[14],15]*Src[Scoringpeptide[15],16] SrcGeneratedScores[x]<-SrcScore SykScore<-Syk[Scoringpeptide[1],2]*Syk[Scoringpeptide[2],3]*Syk[Scoringpeptide[3],4]*Syk[Scoringpeptide[4],5]*Syk[Scoringpeptide[5],6]*Syk[Scoringpeptide[6],7]* Syk[Scoringpeptide[7],8]*Syk[Scoringpeptide[9],10]*Syk[Scoringpeptide[10],11]*Syk[Scoringpeptide[11],12]*Syk[Scoringpeptide[12],13]* Syk[Scoringpeptide[13],14]*Syk[Scoringpeptide[14],15]*Syk[Scoringpeptide[15],16] SykGeneratedScores[x]<-SykScore YesScore<-Yes[Scoringpeptide[1],2]*Yes[Scoringpeptide[2],3]*Yes[Scoringpeptide[3],4]*Yes[Scoringpeptide[4],5]*Yes[Scoringpeptide[5],6]*Yes[Scoringpeptide[6],7]* Yes[Scoringpeptide[7],8]*Yes[Scoringpeptide[9],10]*Yes[Scoringpeptide[10],11]*Yes[Scoringpeptide[11],12]*Yes[Scoringpeptide[12],13]* Yes[Scoringpeptide[13],14]*Yes[Scoringpeptide[14],15]*Yes[Scoringpeptide[15],16] YesGeneratedScores[x]<-YesScore # ThisKinTableScore<-ThisKinTable[as.numeric(Scoringpeptide[1]),3]*ThisKinTable[as.numeric(Scoringpeptide[2]),4]*ThisKinTable[as.numeric(Scoringpeptide[3]),5]* # ThisKinTable[as.numeric(Scoringpeptide[4]),6]*ThisKinTable[as.numeric(Scoringpeptide[6]),8]* # ThisKinTable[as.numeric(Scoringpeptide[7]),9]*ThisKinTable[as.numeric(Scoringpeptide[8]),10]*ThisKinTable[as.numeric(Scoringpeptide[9]),11] # ThisKinGeneratedScores[x]<-ThisKinTableScore } AblNorm<-1/as.numeric(Abl[22,1]) AblThresh<-as.numeric(Abl[24,1]) AblTrueThresh<-((AblThresh*AblNorm)/(100-AblThresh)) AblActive<-unlist(AblGeneratedScores)>AblTrueThresh ArgNorm<-1/as.numeric(Arg[22,1]) ArgThresh<-as.numeric(Arg[24,1]) ArgTrueThresh<-((ArgThresh*ArgNorm)/(100-ArgThresh)) ArgActive<-unlist(ArgGeneratedScores)>ArgTrueThresh BtkNorm<-1/as.numeric(Btk[22,1]) BtkThresh<-as.numeric(Btk[24,1]) BtkTrueThresh<-((BtkThresh*BtkNorm)/(100-BtkThresh)) BtkActive<-unlist(BtkGeneratedScores)>BtkTrueThresh CskNorm<-1/as.numeric(Csk[22,1]) CskThresh<-as.numeric(Csk[24,1]) CskTrueThresh<-((CskThresh*CskNorm)/(100-CskThresh)) CskActive<-(CskGeneratedScores)>CskTrueThresh FynNorm<-1/as.numeric(Fyn[22,1]) FynThresh<-as.numeric(Fyn[24,1]) FynTrueThresh<-((FynThresh*FynNorm)/(100-FynThresh)) FynActive<-unlist(FynGeneratedScores)>FynTrueThresh HckNorm<-1/as.numeric(Hck[22,1]) HckThresh<-as.numeric(Hck[24,1]) HckTrueThresh<-((HckThresh*HckNorm)/(100-HckThresh)) HckActive<-unlist(HckGeneratedScores)>HckTrueThresh JAK2Norm<-1/as.numeric(JAK2[22,1]) JAK2Thresh<-as.numeric(JAK2[24,1]) JAK2TrueThresh<-((JAK2Thresh*JAK2Norm)/(100-JAK2Thresh)) JAk2Active<-unlist(JAK2GeneratedScores)>JAK2TrueThresh LckNorm<-1/as.numeric(Lck[22,1]) LckThresh<-as.numeric(Lck[24,1]) LckTrueThresh<-((LckThresh*LckNorm)/(100-LckThresh)) LckActive<-unlist(LckGeneratedScores)>LckTrueThresh LynNorm<-1/as.numeric(Lyn[22,1]) LynThresh<-as.numeric(Lyn[24,1]) LynTrueThresh<-((LynThresh*LynNorm)/(100-LynThresh)) LynActive<-unlist(LynGeneratedScores)>LynTrueThresh Pyk2Norm<-1/as.numeric(Pyk2[22,1]) Pyk2Thresh<-as.numeric(Pyk2[24,1]) Pyk2TrueThresh<-((Pyk2Thresh*Pyk2Norm)/(100-Pyk2Thresh)) Pyk2Active<-unlist(Pyk2GeneratedScores)>Pyk2TrueThresh SrcNorm<-1/as.numeric(Src[22,1]) SrcThresh<-as.numeric(Src[24,1]) SrcTrueThresh<-((SrcThresh*SrcNorm)/(100-SrcThresh)) SrcActive<-unlist(SrcGeneratedScores)>SrcTrueThresh SykNorm<-1/as.numeric(Syk[22,1]) SykThresh<-as.numeric(Syk[24,1]) SykTrueThresh<-((SykThresh*SykNorm)/(100-SykThresh)) SykActive<-unlist(SykGeneratedScores)>SykTrueThresh YesNorm<-1/as.numeric(Yes[22,1]) YesThresh<-as.numeric(Yes[24,1]) YesTrueThresh<-((YesThresh*YesNorm)/(100-YesThresh)) YesActive<-unlist(YesGeneratedScores)>YesTrueThresh AllActive<-AblActive+ArgActive+BtkActive+CskActive+FynActive+HckActive+JAk2Active+LckActive+LynActive+Pyk2Active+SrcActive+SykActive+YesActive #Btkactive+ Scores<-ThisKinGeneratedScores ThresholdValues<-ThisKinGenWeirdScore FullMotifs<-rep("Z",times=nrow(GeneratedPeptides)) for (i in 1:nrow(GeneratedPeptides)) { motif<-GeneratedPeptides[i,1:15] motif<-paste(motif,sep = "", collapse = "") FullMotifs[i]<-motif } PeptidesWithRanks<-cbind.data.frame(FullMotifs,GeneratedPeptides,Scores,ThresholdValues) PeptidesWithRanks<-cbind.data.frame(PeptidesWithRanks,AllActive,AblActive,ArgActive,BtkActive,CskActive,FynActive,HckActive,JAk2Active,LckActive,LynActive,Pyk2Active,SrcActive,SykActive,YesActive) RanksPeptides<-PeptidesWithRanks[order(PeptidesWithRanks$AllActive,decreasing = FALSE),] # PepRankHead<-c(1:9,"Sequence","RPMS","PMS") # RanksPeptides<-rbind.data.frame(PepRankHead,PeptidesWithRanks) #head(RanksPeptides) #now I have to score the negative sequences... for some reason #write up how we transfect with lipofectamine #3,4,5 questions #PAUSED EHRE AT 4:50, HOPING THAT FIXING MINERVOTHING SO THAT LEFT SPACES WORKS FIXES A THING. OTHERWISE #I FUCKED WITH THE MCC TABLE AND NEED TO FINISH IT ThisKinBlanks<-rep(1,times=17) #indx <- sapply(breast, is.factor) #ThisKinTable[indx] <- lapply(ThisKinTable[indx], function(x) as.character(x)) ThisKinTable$SetOfAAs<-as.character(ThisKinTable$SetOfAAs) #ThisKinTest<-rbind.data.frame(ThisKinTable,ThisKinBlanks) ThisKinTable<-rbind.data.frame(ThisKinTable,ThisKinBlanks) NegativeScores<-rep(NA,times=nrow(NegativeSubstrateList)) NegativeWeirdScores<-rep(NA,times=nrow(NegativeSubstrateList)) for (v in 1:nrow(NegativeSubstrateList)) { motif<-NegativeSubstrateList[v,2] motif<-unlist(strsplit(motif,"")) #if (length(motif)<9){print(v)}} # motif[1] <- sapply(motif[1], function (x) aa_props[x]) # motif[2] <- sapply(motif[2], function (x) aa_props[x]) # motif[3] <- sapply(motif[3], function (x) aa_props[x]) # motif[4] <- sapply(motif[4], function (x) aa_props[x]) # motif[5] <- sapply(motif[5], function (x) aa_props[x]) # motif[6] <- sapply(motif[6], function (x) aa_props[x]) # motif[7] <- sapply(motif[7], function (x) aa_props[x]) # motif[8] <- sapply(motif[8], function (x) aa_props[x]) # motif[9] <- sapply(motif[9], function (x) aa_props[x]) motif<- gsub(" ","O",motif) motif <- sapply(motif, function (x) aa_props[x]) Scoringpeptide<-motif Scoringpeptide<-Scoringpeptide+1 ThisKinTableScore<-as.numeric(ThisKinTable[Scoringpeptide[1],3])*ThisKinTable[as.numeric(Scoringpeptide[2]),4]*ThisKinTable[as.numeric(Scoringpeptide[3]),5]* ThisKinTable[as.numeric(Scoringpeptide[4]),6]*ThisKinTable[as.numeric(Scoringpeptide[5]),7]*ThisKinTable[as.numeric(Scoringpeptide[6]),8]*ThisKinTable[as.numeric(Scoringpeptide[7]),9]* #ThisKinTable[as.numeric(Scoringpeptide[8]),10]* ThisKinTable[as.numeric(Scoringpeptide[9]),11]*ThisKinTable[as.numeric(Scoringpeptide[10]),12]*ThisKinTable[as.numeric(Scoringpeptide[11]),13]* ThisKinTable[as.numeric(Scoringpeptide[12]),14]*ThisKinTable[as.numeric(Scoringpeptide[13]),15]*ThisKinTable[as.numeric(Scoringpeptide[14]),16]*ThisKinTable[as.numeric(Scoringpeptide[15]),17] NegativeScores[v]<-ThisKinTableScore ThisKinTableScore<-(ThisKinTableScore/(ThisKinTableScore+1/as.numeric(NormalizationScore[2]))) NegativeWeirdScores[v]<-ThisKinTableScore*100 } negativesubstrates<-NegativeSubstrateList[,2] NegativeWithScores<-cbind(negativesubstrates,as.character(NegativeScores),as.character(NegativeWeirdScores)) #NEED TO HAVE THE NEGATIVE SUBSTRATES BE OUTPUTTED PositiveScores<-rep(NA,times=nrow(ImportedSubstrateList)) PositiveWeirdScores<-rep(NA,times=nrow(ImportedSubstrateList)) for (v in 1:nrow(ImportedSubstrateList)) { motif<-ImportedSubstrateList[v,4:18] motif<-unlist(motif) motif<- gsub("^$","O",motif) motif <- sapply(motif, function (x) aa_props[x]) Scoringpeptide<-motif Scoringpeptide<-Scoringpeptide+1 ThisKinTableScore<-as.numeric(ThisKinTable[Scoringpeptide[1],3])*ThisKinTable[as.numeric(Scoringpeptide[2]),4]*ThisKinTable[as.numeric(Scoringpeptide[3]),5]* ThisKinTable[as.numeric(Scoringpeptide[4]),6]*ThisKinTable[as.numeric(Scoringpeptide[5]),7]*ThisKinTable[as.numeric(Scoringpeptide[6]),8]*ThisKinTable[as.numeric(Scoringpeptide[7]),9]* #ThisKinTable[as.numeric(Scoringpeptide[8]),10]* ThisKinTable[as.numeric(Scoringpeptide[9]),11]*ThisKinTable[as.numeric(Scoringpeptide[10]),12]*ThisKinTable[as.numeric(Scoringpeptide[11]),13]* ThisKinTable[as.numeric(Scoringpeptide[12]),14]*ThisKinTable[as.numeric(Scoringpeptide[13]),15]*ThisKinTable[as.numeric(Scoringpeptide[14]),16]*ThisKinTable[as.numeric(Scoringpeptide[15]),17] PositiveScores[v]<-ThisKinTableScore ThisKinTableScore<-(ThisKinTableScore/(ThisKinTableScore+1/as.numeric(NormalizationScore[2]))) PositiveWeirdScores[v]<-ThisKinTableScore*100 } positivesubstrates<-ImportedSubstrateList[,4:18] positivewithscores<-cbind.data.frame(positivesubstrates,PositiveScores,PositiveWeirdScores) #write down the transient transfection SOP and what we will be doing with them #write down the vector names I will be using #write down something about transforming bacteria and with what #90% whatevernness # TPninetyone<-length(PositiveWeirdScores[PositiveWeirdScores>=0.91]) # Senseninetyone<-TPninetyone/nrow(positivesubstrates) # # TNninetyone<-length(NegativeWeirdScores[NegativeWeirdScores<91]) # Specninetyone<-TNninetyone/100 #create the MCC table threshold<-c(1:100) threshold<-order(threshold,decreasing = TRUE) Truepositives<-c(1:100) Falsenegatives<-c(1:100) Sensitivity<-c(1:100) TrueNegatives<-c(1:100) FalsePositives<-c(1:100) Specificity<-c(1:100) Accuracy<-c(1:100) MCC<-c(1:100) EER<-c(1:100) #MAKE DAMN SURE THAT THE ACCESSION NUMBERS FOLLOW THE MOTIFS for (z in 1:100) { thres<-101-z Truepositives[z]<-length(PositiveWeirdScores[PositiveWeirdScores>=(thres)]) Falsenegatives[z]<-nrow(positivesubstrates)-Truepositives[z] Sensitivity[z]<-Truepositives[z]/(Falsenegatives[z]+Truepositives[z]) TrueNegatives[z]<-length(NegativeWeirdScores[NegativeWeirdScores<(thres)]) # at thresh 100 this should be 0, because it is total minus true negatives FalsePositives[z]<-nrow(NegativeSubstrateList)-TrueNegatives[z] Specificity[z]<-1-(TrueNegatives[z]/(FalsePositives[z]+TrueNegatives[z])) Accuracy[z]<-100*(Truepositives[z]+TrueNegatives[z])/(Falsenegatives[z]+FalsePositives[z]+TrueNegatives[z]+Truepositives[z]) MCC[z]<-((Truepositives[z]+TrueNegatives[z])-(Falsenegatives[z]+FalsePositives[z]))/sqrt(round(round(Truepositives[z]+Falsenegatives[z])*round(TrueNegatives[z]+FalsePositives[z])*round(Truepositives[z]+FalsePositives[z])*round(TrueNegatives[z]+Falsenegatives[z]))) EER[z]<-.01*(((1-(Sensitivity[z]))*(Truepositives[z]+Falsenegatives[z]))+(Specificity[z]*(1-(Truepositives[z]+Falsenegatives[z])))) } Characterization<-cbind.data.frame(threshold,Truepositives,Falsenegatives,Sensitivity,TrueNegatives,FalsePositives,Specificity,Accuracy,MCC,EER) positiveheader<-c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,"RPMS","PMS") positivewithscores<-rbind.data.frame(positiveheader,positivewithscores) negativeheader<-c("Substrate","RPMS","PMS") colnames(NegativeWithScores)<-negativeheader # write.xlsx(NegativeWithScores,file = FILENAME, sheetName = "Negative Sequences Scored",col.names = TRUE,row.names = FALSE,append = TRUE) # write.xlsx(Characterization,file = FILENAME,sheetName = "Characterization Table",col.names = TRUE,row.names = FALSE,append = TRUE) # write.xlsx(RanksPeptides,file = FILENAME,sheetName = "Ranked Generated Peptides",col.names = FALSE,row.names = FALSE,append = TRUE) # write.xlsx(positivewithscores,file = FILENAME, sheetName = "Positive Sequences Scored",col.names = FALSE,row.names = FALSE,append = TRUE) write.table(x=c("Characterzation Table"),file = FILENAME2, col.names = FALSE,row.names = FALSE, append = TRUE,sep = ",") header<-colnames(Characterization) Characterization<-rbind.data.frame(header,Characterization) write.table(Characterization,file = FILENAME2, col.names = FALSE,row.names = FALSE, append = TRUE,sep = ",") # header<-colnames(RanksPeptides) # RanksPeptides<-rbind.data.frame(header,RanksPeptides) write.table(RanksPeptides,file = FILENAME3,append = FALSE,row.names = FALSE,col.names = TRUE,sep = ",")