# HG changeset patch # User jfb # Date 1518119472 18000 # Node ID a36f9cce16a36a408fb3b0afd277327b098d99f0 # Parent de59605e960a68fbaf9e00f947a2995cfd9530fd Deleted selected files diff -r de59605e960a -r a36f9cce16a3 Kinatest-R_part1.R --- a/Kinatest-R_part1.R Thu Feb 08 14:51:06 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1114 +0,0 @@ - -ImportedSubstrateList<- read.csv("input1", stringsAsFactors=FALSE) -NegativeSubstrateList<- read.csv("input2", stringsAsFactors=FALSE) -SubstrateBackgroundFrequency<- read.csv("input3", stringsAsFactors=FALSE) - -ScreenerFilename<-"screener" - - - -FILENAME<-"output1.csv" -FILENAME2<-"output2.csv" -FILENAME3<-"output3.csv" - - - - - - - - - - - - - - - - - - - - - - - - -OutputMatrix<-"KinaseMatrix.csv" -CharacterizationTable<-"CharacterizationTableForThisKinase.csv" -SDtable<-"SDtableforthisKinase" -SiteSelectivityTable<-"SiteSelectivityForThisKinase" - - - -substrates<-matrix(rep("A",times=((nrow(ImportedSubstrateList)-1)*15)),ncol = 15) -#SeqsToBeScored<-"asdasd" - -for (i in 2:nrow(ImportedSubstrateList)) -{ - substratemotif<-ImportedSubstrateList[i,4:18] - substratemotif[8]<-"Y" - #substratemotif<-paste(substratemotif,sep = "",collapse = "") - j=i-1 - substratemotif<-unlist(substratemotif) - substrates[j,1:15]<-substratemotif -} - -# SpacesToOs<-c(""="O",) -# substrates<-SpacesToOs[substrates] - -SubstrateBackgroundFrequency[nrow(SubstrateBackgroundFrequency),2] - -if(2==2){ -Amean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),2]), na.rm=TRUE) -Cmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),3]), na.rm=TRUE) -Dmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),4]), na.rm=TRUE) -Emean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),5]), na.rm=TRUE) -Fmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),6]), na.rm=TRUE) -Gmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),7]), na.rm=TRUE) -Hmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),8]), na.rm=TRUE) -Imean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),9]), na.rm=TRUE) -Kmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),10]), na.rm=TRUE) -Lmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),11]), na.rm=TRUE) -Mmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),12]), na.rm=TRUE) -Nmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),13]), na.rm=TRUE) -Pmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),14]), na.rm=TRUE) -Qmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),15]), na.rm=TRUE) -Rmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),16]), na.rm=TRUE) -Smean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),17]), na.rm=TRUE) -Tmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),18]), na.rm=TRUE) -Vmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),19]), na.rm=TRUE) -Wmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),20]), na.rm=TRUE) -Ymean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),21]), na.rm=TRUE) - -AllMeans<-c(Amean,Cmean,Dmean,Emean,Fmean,Gmean,Hmean,Imean,Kmean,Lmean,Mmean,Nmean,Pmean,Qmean,Rmean,Smean,Tmean,Vmean,Wmean,Ymean) - -Asd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),2]), na.rm=TRUE) -Csd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),3]), na.rm=TRUE) -Dsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),4]), na.rm=TRUE) -Esd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),5]), na.rm=TRUE) -Fsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),6]), na.rm=TRUE) -Gsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),7]), na.rm=TRUE) -Hsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),8]), na.rm=TRUE) -Isd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),9]), na.rm=TRUE) -Ksd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),10]), na.rm=TRUE) -Lsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),11]), na.rm=TRUE) -Msd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),12]), na.rm=TRUE) -Nsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),13]), na.rm=TRUE) -Psd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),14]), na.rm=TRUE) -Qsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),15]), na.rm=TRUE) -Rsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),16]), na.rm=TRUE) -Ssd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),17]), na.rm=TRUE) -Tsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),18]), na.rm=TRUE) -Vsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),19]), na.rm=TRUE) -Wsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),20]), na.rm=TRUE) -Ysd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),21]), na.rm=TRUE) -} -AllSDs<-c(Asd,Csd,Dsd,Esd,Fsd,Gsd,Hsd,Isd,Ksd,Lsd,Msd,Nsd,Psd,Qsd,Rsd,Ssd,Tsd,Vsd,Wsd,Ysd) -#this is subbackfreq SDs - -SBF_statisticalvalues<-cbind(AllMeans,AllSDs) - -#create the percent table -if (1==1){ - Column1<-substrates[,1] - Column2<-substrates[,2] - Column3<-substrates[,3] - Column4<-substrates[,4] - Column5<-substrates[,5] - Column6<-substrates[,6] - Column7<-substrates[,7] - Column8<-substrates[,8] - Column9<-substrates[,9] - Column10<-substrates[,10] - Column11<-substrates[,11] - Column12<-substrates[,12] - Column13<-substrates[,13] - Column14<-substrates[,14] - Column15<-substrates[,15] - - spaces1<-sum((Column1%in% "")) - spaces2<-sum(Column2%in% "") - spaces3<-sum(Column3%in% "") - spaces4<-sum(Column4%in% "") - spaces5<-sum(Column5%in% "") - spaces6<-sum(Column6%in% "") - spaces7<-sum(Column7%in% "") - spaces8<-sum(Column8%in% "") - spaces9<-sum(Column9%in% "") - spaces10<-sum(Column10%in% "") - spaces11<-sum(Column11%in% "") - spaces12<-sum(Column12%in% "") - spaces13<-sum(Column13%in% "") - spaces14<-sum(Column14%in% "") - spaces15<-sum(Column15%in% "") - - A1<-sum(Column1 %in% "A")/(length(Column1)-spaces1) - A2<-sum(Column2 %in% "A")/(length(Column2)-spaces2) - A3<-sum(Column3 %in% "A")/(length(Column3)-spaces3) - A4<-sum(Column4 %in% "A")/(length(Column4)-spaces4) - A5<-sum(Column5 %in% "A")/(length(Column5)-spaces5) - A6<-sum(Column6 %in% "A")/(length(Column6)-spaces6) - A7<-sum(Column7 %in% "A")/(length(Column7)-spaces7) - A8<-sum(Column8 %in% "A")/(length(Column8)-spaces8) - A9<-sum(Column9 %in% "A")/(length(Column9)-spaces9) - A10<-sum(Column10 %in% "A")/(length(Column10)-spaces10) - A11<-sum(Column11 %in% "A")/(length(Column11)-spaces11) - A12<-sum(Column12 %in% "A")/(length(Column12)-spaces12) - A13<-sum(Column13 %in% "A")/(length(Column13)-spaces13) - A14<-sum(Column14 %in% "A")/(length(Column14)-spaces14) - A15<-sum(Column15 %in% "A")/(length(Column15)-spaces15) - AllAs<-cbind(A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15) - - C1<-sum(Column1 %in% "C")/(length(Column1)-spaces1) - C2<-sum(Column2 %in% "C")/(length(Column2)-spaces2) - C3<-sum(Column3 %in% "C")/(length(Column3)-spaces3) - C4<-sum(Column4 %in% "C")/(length(Column4)-spaces4) - C5<-sum(Column5 %in% "C")/(length(Column5)-spaces5) - C6<-sum(Column6 %in% "C")/(length(Column6)-spaces6) - C7<-sum(Column7 %in% "C")/(length(Column7)-spaces7) - C8<-sum(Column8 %in% "C")/(length(Column8)-spaces8) - C9<-sum(Column9 %in% "C")/(length(Column9)-spaces9) - C10<-sum(Column10 %in% "C")/(length(Column10)-spaces10) - C11<-sum(Column11 %in% "C")/(length(Column11)-spaces11) - C12<-sum(Column12 %in% "C")/(length(Column12)-spaces12) - C13<-sum(Column13 %in% "C")/(length(Column13)-spaces13) - C14<-sum(Column14 %in% "C")/(length(Column14)-spaces14) - C15<-sum(Column15 %in% "C")/(length(Column15)-spaces15) - CllCs<-cbind(C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15) - - D1<-sum(Column1 %in% "D")/(length(Column1)-spaces1) - D2<-sum(Column2 %in% "D")/(length(Column2)-spaces2) - D3<-sum(Column3 %in% "D")/(length(Column3)-spaces3) - D4<-sum(Column4 %in% "D")/(length(Column4)-spaces4) - D5<-sum(Column5 %in% "D")/(length(Column5)-spaces5) - D6<-sum(Column6 %in% "D")/(length(Column6)-spaces6) - D7<-sum(Column7 %in% "D")/(length(Column7)-spaces7) - D8<-sum(Column8 %in% "D")/(length(Column8)-spaces8) - D9<-sum(Column9 %in% "D")/(length(Column9)-spaces9) - D10<-sum(Column10 %in% "D")/(length(Column10)-spaces10) - D11<-sum(Column11 %in% "D")/(length(Column11)-spaces11) - D12<-sum(Column12 %in% "D")/(length(Column12)-spaces12) - D13<-sum(Column13 %in% "D")/(length(Column13)-spaces13) - D14<-sum(Column14 %in% "D")/(length(Column14)-spaces14) - D15<-sum(Column15 %in% "D")/(length(Column15)-spaces15) - DllDs<-cbind(D1,D2,D3,D4,D5,D6,D7,D8,D9,D10,D11,D12,D13,D14,D15) - - E1<-sum(Column1 %in% "E")/(length(Column1)-spaces1) - E2<-sum(Column2 %in% "E")/(length(Column2)-spaces2) - E3<-sum(Column3 %in% "E")/(length(Column3)-spaces3) - E4<-sum(Column4 %in% "E")/(length(Column4)-spaces4) - E5<-sum(Column5 %in% "E")/(length(Column5)-spaces5) - E6<-sum(Column6 %in% "E")/(length(Column6)-spaces6) - E7<-sum(Column7 %in% "E")/(length(Column7)-spaces7) - E8<-sum(Column8 %in% "E")/(length(Column8)-spaces8) - E9<-sum(Column9 %in% "E")/(length(Column9)-spaces9) - E10<-sum(Column10 %in% "E")/(length(Column10)-spaces10) - E11<-sum(Column11 %in% "E")/(length(Column11)-spaces11) - E12<-sum(Column12 %in% "E")/(length(Column12)-spaces12) - E13<-sum(Column13 %in% "E")/(length(Column13)-spaces13) - E14<-sum(Column14 %in% "E")/(length(Column14)-spaces14) - E15<-sum(Column15 %in% "E")/(length(Column15)-spaces15) - EllEs<-cbind(E1,E2,E3,E4,E5,E6,E7,E8,E9,E10,E11,E12,E13,E14,E15) - - - F1<-sum(Column1 %in% "F")/(length(Column1)-spaces1) - F2<-sum(Column2 %in% "F")/(length(Column2)-spaces2) - F3<-sum(Column3 %in% "F")/(length(Column3)-spaces3) - F4<-sum(Column4 %in% "F")/(length(Column4)-spaces4) - F5<-sum(Column5 %in% "F")/(length(Column5)-spaces5) - F6<-sum(Column6 %in% "F")/(length(Column6)-spaces6) - F7<-sum(Column7 %in% "F")/(length(Column7)-spaces7) - F8<-sum(Column8 %in% "F")/(length(Column8)-spaces8) - F9<-sum(Column9 %in% "F")/(length(Column9)-spaces9) - F10<-sum(Column10 %in% "F")/(length(Column10)-spaces10) - F11<-sum(Column11 %in% "F")/(length(Column11)-spaces11) - F12<-sum(Column12 %in% "F")/(length(Column12)-spaces12) - F13<-sum(Column13 %in% "F")/(length(Column13)-spaces13) - F14<-sum(Column14 %in% "F")/(length(Column14)-spaces14) - F15<-sum(Column15 %in% "F")/(length(Column15)-spaces15) - FllFs<-cbind(F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15) - - - G1<-sum(Column1 %in% "G")/(length(Column1)-spaces1) - G2<-sum(Column2 %in% "G")/(length(Column2)-spaces2) - G3<-sum(Column3 %in% "G")/(length(Column3)-spaces3) - G4<-sum(Column4 %in% "G")/(length(Column4)-spaces4) - G5<-sum(Column5 %in% "G")/(length(Column5)-spaces5) - G6<-sum(Column6 %in% "G")/(length(Column6)-spaces6) - G7<-sum(Column7 %in% "G")/(length(Column7)-spaces7) - G8<-sum(Column8 %in% "G")/(length(Column8)-spaces8) - G9<-sum(Column9 %in% "G")/(length(Column9)-spaces9) - G10<-sum(Column10 %in% "G")/(length(Column10)-spaces10) - G11<-sum(Column11 %in% "G")/(length(Column11)-spaces11) - G12<-sum(Column12 %in% "G")/(length(Column12)-spaces12) - G13<-sum(Column13 %in% "G")/(length(Column13)-spaces13) - G14<-sum(Column14 %in% "G")/(length(Column14)-spaces14) - G15<-sum(Column15 %in% "G")/(length(Column15)-spaces15) - GllGs<-cbind(G1,G2,G3,G4,G5,G6,G7,G8,G9,G10,G11,G12,G13,G14,G15) - - - H1<-sum(Column1 %in% "H")/(length(Column1)-spaces1) - H2<-sum(Column2 %in% "H")/(length(Column2)-spaces2) - H3<-sum(Column3 %in% "H")/(length(Column3)-spaces3) - H4<-sum(Column4 %in% "H")/(length(Column4)-spaces4) - H5<-sum(Column5 %in% "H")/(length(Column5)-spaces5) - H6<-sum(Column6 %in% "H")/(length(Column6)-spaces6) - H7<-sum(Column7 %in% "H")/(length(Column7)-spaces7) - H8<-sum(Column8 %in% "H")/(length(Column8)-spaces8) - H9<-sum(Column9 %in% "H")/(length(Column9)-spaces9) - H10<-sum(Column10 %in% "H")/(length(Column10)-spaces10) - H11<-sum(Column11 %in% "H")/(length(Column11)-spaces11) - H12<-sum(Column12 %in% "H")/(length(Column12)-spaces12) - H13<-sum(Column13 %in% "H")/(length(Column13)-spaces13) - H14<-sum(Column14 %in% "H")/(length(Column14)-spaces14) - H15<-sum(Column15 %in% "H")/(length(Column15)-spaces15) - HllHs<-cbind(H1,H2,H3,H4,H5,H6,H7,H8,H9,H10,H11,H12,H13,H14,H15) - - - I1<-sum(Column1 %in% "I")/(length(Column1)-spaces1) - I2<-sum(Column2 %in% "I")/(length(Column2)-spaces2) - I3<-sum(Column3 %in% "I")/(length(Column3)-spaces3) - I4<-sum(Column4 %in% "I")/(length(Column4)-spaces4) - I5<-sum(Column5 %in% "I")/(length(Column5)-spaces5) - I6<-sum(Column6 %in% "I")/(length(Column6)-spaces6) - I7<-sum(Column7 %in% "I")/(length(Column7)-spaces7) - I8<-sum(Column8 %in% "I")/(length(Column8)-spaces8) - I9<-sum(Column9 %in% "I")/(length(Column9)-spaces9) - I10<-sum(Column10 %in% "I")/(length(Column10)-spaces10) - I11<-sum(Column11 %in% "I")/(length(Column11)-spaces11) - I12<-sum(Column12 %in% "I")/(length(Column12)-spaces12) - I13<-sum(Column13 %in% "I")/(length(Column13)-spaces13) - I14<-sum(Column14 %in% "I")/(length(Column14)-spaces14) - I15<-sum(Column15 %in% "I")/(length(Column15)-spaces15) - IllIs<-cbind(I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,I11,I12,I13,I14,I15) - - - K1<-sum(Column1 %in% "K")/(length(Column1)-spaces1) - K2<-sum(Column2 %in% "K")/(length(Column2)-spaces2) - K3<-sum(Column3 %in% "K")/(length(Column3)-spaces3) - K4<-sum(Column4 %in% "K")/(length(Column4)-spaces4) - K5<-sum(Column5 %in% "K")/(length(Column5)-spaces5) - K6<-sum(Column6 %in% "K")/(length(Column6)-spaces6) - K7<-sum(Column7 %in% "K")/(length(Column7)-spaces7) - K8<-sum(Column8 %in% "K")/(length(Column8)-spaces8) - K9<-sum(Column9 %in% "K")/(length(Column9)-spaces9) - K10<-sum(Column10 %in% "K")/(length(Column10)-spaces10) - K11<-sum(Column11 %in% "K")/(length(Column11)-spaces11) - K12<-sum(Column12 %in% "K")/(length(Column12)-spaces12) - K13<-sum(Column13 %in% "K")/(length(Column13)-spaces13) - K14<-sum(Column14 %in% "K")/(length(Column14)-spaces14) - K15<-sum(Column15 %in% "K")/(length(Column15)-spaces15) - KllKs<-cbind(K1,K2,K3,K4,K5,K6,K7,K8,K9,K10,K11,K12,K13,K14,K15) - - - L1<-sum(Column1 %in% "L")/(length(Column1)-spaces1) - L2<-sum(Column2 %in% "L")/(length(Column2)-spaces2) - L3<-sum(Column3 %in% "L")/(length(Column3)-spaces3) - L4<-sum(Column4 %in% "L")/(length(Column4)-spaces4) - L5<-sum(Column5 %in% "L")/(length(Column5)-spaces5) - L6<-sum(Column6 %in% "L")/(length(Column6)-spaces6) - L7<-sum(Column7 %in% "L")/(length(Column7)-spaces7) - L8<-sum(Column8 %in% "L")/(length(Column8)-spaces8) - L9<-sum(Column9 %in% "L")/(length(Column9)-spaces9) - L10<-sum(Column10 %in% "L")/(length(Column10)-spaces10) - L11<-sum(Column11 %in% "L")/(length(Column11)-spaces11) - L12<-sum(Column12 %in% "L")/(length(Column12)-spaces12) - L13<-sum(Column13 %in% "L")/(length(Column13)-spaces13) - L14<-sum(Column14 %in% "L")/(length(Column14)-spaces14) - L15<-sum(Column15 %in% "L")/(length(Column15)-spaces15) - LllLs<-cbind(L1,L2,L3,L4,L5,L6,L7,L8,L9,L10,L11,L12,L13,L14,L15) - - - M1<-sum(Column1 %in% "M")/(length(Column1)-spaces1) - M2<-sum(Column2 %in% "M")/(length(Column2)-spaces2) - M3<-sum(Column3 %in% "M")/(length(Column3)-spaces3) - M4<-sum(Column4 %in% "M")/(length(Column4)-spaces4) - M5<-sum(Column5 %in% "M")/(length(Column5)-spaces5) - M6<-sum(Column6 %in% "M")/(length(Column6)-spaces6) - M7<-sum(Column7 %in% "M")/(length(Column7)-spaces7) - M8<-sum(Column8 %in% "M")/(length(Column8)-spaces8) - M9<-sum(Column9 %in% "M")/(length(Column9)-spaces9) - M10<-sum(Column10 %in% "M")/(length(Column10)-spaces10) - M11<-sum(Column11 %in% "M")/(length(Column11)-spaces11) - M12<-sum(Column12 %in% "M")/(length(Column12)-spaces12) - M13<-sum(Column13 %in% "M")/(length(Column13)-spaces13) - M14<-sum(Column14 %in% "M")/(length(Column14)-spaces14) - M15<-sum(Column15 %in% "M")/(length(Column15)-spaces15) - MllMs<-cbind(M1,M2,M3,M4,M5,M6,M7,M8,M9,M10,M11,M12,M13,M14,M15) - - - N1<-sum(Column1 %in% "N")/(length(Column1)-spaces1) - N2<-sum(Column2 %in% "N")/(length(Column2)-spaces2) - N3<-sum(Column3 %in% "N")/(length(Column3)-spaces3) - N4<-sum(Column4 %in% "N")/(length(Column4)-spaces4) - N5<-sum(Column5 %in% "N")/(length(Column5)-spaces5) - N6<-sum(Column6 %in% "N")/(length(Column6)-spaces6) - N7<-sum(Column7 %in% "N")/(length(Column7)-spaces7) - N8<-sum(Column8 %in% "N")/(length(Column8)-spaces8) - N9<-sum(Column9 %in% "N")/(length(Column9)-spaces9) - N10<-sum(Column10 %in% "N")/(length(Column10)-spaces10) - N11<-sum(Column11 %in% "N")/(length(Column11)-spaces11) - N12<-sum(Column12 %in% "N")/(length(Column12)-spaces12) - N13<-sum(Column13 %in% "N")/(length(Column13)-spaces13) - N14<-sum(Column14 %in% "N")/(length(Column14)-spaces14) - N15<-sum(Column15 %in% "N")/(length(Column15)-spaces15) - NllNs<-cbind(N1,N2,N3,N4,N5,N6,N7,N8,N9,N10,N11,N12,N13,N14,N15) - - - P1<-sum(Column1 %in% "P")/(length(Column1)-spaces1) - P2<-sum(Column2 %in% "P")/(length(Column2)-spaces2) - P3<-sum(Column3 %in% "P")/(length(Column3)-spaces3) - P4<-sum(Column4 %in% "P")/(length(Column4)-spaces4) - P5<-sum(Column5 %in% "P")/(length(Column5)-spaces5) - P6<-sum(Column6 %in% "P")/(length(Column6)-spaces6) - P7<-sum(Column7 %in% "P")/(length(Column7)-spaces7) - P8<-sum(Column8 %in% "P")/(length(Column8)-spaces8) - P9<-sum(Column9 %in% "P")/(length(Column9)-spaces9) - P10<-sum(Column10 %in% "P")/(length(Column10)-spaces10) - P11<-sum(Column11 %in% "P")/(length(Column11)-spaces11) - P12<-sum(Column12 %in% "P")/(length(Column12)-spaces12) - P13<-sum(Column13 %in% "P")/(length(Column13)-spaces13) - P14<-sum(Column14 %in% "P")/(length(Column14)-spaces14) - P15<-sum(Column15 %in% "P")/(length(Column15)-spaces15) - PllPs<-cbind(P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11,P12,P13,P14,P15) - - - Q1<-sum(Column1 %in% "Q")/(length(Column1)-spaces1) - Q2<-sum(Column2 %in% "Q")/(length(Column2)-spaces2) - Q3<-sum(Column3 %in% "Q")/(length(Column3)-spaces3) - Q4<-sum(Column4 %in% "Q")/(length(Column4)-spaces4) - Q5<-sum(Column5 %in% "Q")/(length(Column5)-spaces5) - Q6<-sum(Column6 %in% "Q")/(length(Column6)-spaces6) - Q7<-sum(Column7 %in% "Q")/(length(Column7)-spaces7) - Q8<-sum(Column8 %in% "Q")/(length(Column8)-spaces8) - Q9<-sum(Column9 %in% "Q")/(length(Column9)-spaces9) - Q10<-sum(Column10 %in% "Q")/(length(Column10)-spaces10) - Q11<-sum(Column11 %in% "Q")/(length(Column11)-spaces11) - Q12<-sum(Column12 %in% "Q")/(length(Column12)-spaces12) - Q13<-sum(Column13 %in% "Q")/(length(Column13)-spaces13) - Q14<-sum(Column14 %in% "Q")/(length(Column14)-spaces14) - Q15<-sum(Column15 %in% "Q")/(length(Column15)-spaces15) - QllQs<-cbind(Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,Q11,Q12,Q13,Q14,Q15) - - - R1<-sum(Column1 %in% "R")/(length(Column1)-spaces1) - R2<-sum(Column2 %in% "R")/(length(Column2)-spaces2) - R3<-sum(Column3 %in% "R")/(length(Column3)-spaces3) - R4<-sum(Column4 %in% "R")/(length(Column4)-spaces4) - R5<-sum(Column5 %in% "R")/(length(Column5)-spaces5) - R6<-sum(Column6 %in% "R")/(length(Column6)-spaces6) - R7<-sum(Column7 %in% "R")/(length(Column7)-spaces7) - R8<-sum(Column8 %in% "R")/(length(Column8)-spaces8) - R9<-sum(Column9 %in% "R")/(length(Column9)-spaces9) - R10<-sum(Column10 %in% "R")/(length(Column10)-spaces10) - R11<-sum(Column11 %in% "R")/(length(Column11)-spaces11) - R12<-sum(Column12 %in% "R")/(length(Column12)-spaces12) - R13<-sum(Column13 %in% "R")/(length(Column13)-spaces13) - R14<-sum(Column14 %in% "R")/(length(Column14)-spaces14) - R15<-sum(Column15 %in% "R")/(length(Column15)-spaces15) - RllRs<-cbind(R1,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,R13,R14,R15) - - - S1<-sum(Column1 %in% "S")/(length(Column1)-spaces1) - S2<-sum(Column2 %in% "S")/(length(Column2)-spaces2) - S3<-sum(Column3 %in% "S")/(length(Column3)-spaces3) - S4<-sum(Column4 %in% "S")/(length(Column4)-spaces4) - S5<-sum(Column5 %in% "S")/(length(Column5)-spaces5) - S6<-sum(Column6 %in% "S")/(length(Column6)-spaces6) - S7<-sum(Column7 %in% "S")/(length(Column7)-spaces7) - S8<-sum(Column8 %in% "S")/(length(Column8)-spaces8) - S9<-sum(Column9 %in% "S")/(length(Column9)-spaces9) - S10<-sum(Column10 %in% "S")/(length(Column10)-spaces10) - S11<-sum(Column11 %in% "S")/(length(Column11)-spaces11) - S12<-sum(Column12 %in% "S")/(length(Column12)-spaces12) - S13<-sum(Column13 %in% "S")/(length(Column13)-spaces13) - S14<-sum(Column14 %in% "S")/(length(Column14)-spaces14) - S15<-sum(Column15 %in% "S")/(length(Column15)-spaces15) - SllSs<-cbind(S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,S14,S15) - - - T1<-sum(Column1 %in% "T")/(length(Column1)-spaces1) - T2<-sum(Column2 %in% "T")/(length(Column2)-spaces2) - T3<-sum(Column3 %in% "T")/(length(Column3)-spaces3) - T4<-sum(Column4 %in% "T")/(length(Column4)-spaces4) - T5<-sum(Column5 %in% "T")/(length(Column5)-spaces5) - T6<-sum(Column6 %in% "T")/(length(Column6)-spaces6) - T7<-sum(Column7 %in% "T")/(length(Column7)-spaces7) - T8<-sum(Column8 %in% "T")/(length(Column8)-spaces8) - T9<-sum(Column9 %in% "T")/(length(Column9)-spaces9) - T10<-sum(Column10 %in% "T")/(length(Column10)-spaces10) - T11<-sum(Column11 %in% "T")/(length(Column11)-spaces11) - T12<-sum(Column12 %in% "T")/(length(Column12)-spaces12) - T13<-sum(Column13 %in% "T")/(length(Column13)-spaces13) - T14<-sum(Column14 %in% "T")/(length(Column14)-spaces14) - T15<-sum(Column15 %in% "T")/(length(Column15)-spaces15) - TllTs<-cbind(T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15) - - - V1<-sum(Column1 %in% "V")/(length(Column1)-spaces1) - V2<-sum(Column2 %in% "V")/(length(Column2)-spaces2) - V3<-sum(Column3 %in% "V")/(length(Column3)-spaces3) - V4<-sum(Column4 %in% "V")/(length(Column4)-spaces4) - V5<-sum(Column5 %in% "V")/(length(Column5)-spaces5) - V6<-sum(Column6 %in% "V")/(length(Column6)-spaces6) - V7<-sum(Column7 %in% "V")/(length(Column7)-spaces7) - V8<-sum(Column8 %in% "V")/(length(Column8)-spaces8) - V9<-sum(Column9 %in% "V")/(length(Column9)-spaces9) - V10<-sum(Column10 %in% "V")/(length(Column10)-spaces10) - V11<-sum(Column11 %in% "V")/(length(Column11)-spaces11) - V12<-sum(Column12 %in% "V")/(length(Column12)-spaces12) - V13<-sum(Column13 %in% "V")/(length(Column13)-spaces13) - V14<-sum(Column14 %in% "V")/(length(Column14)-spaces14) - V15<-sum(Column15 %in% "V")/(length(Column15)-spaces15) - VllVs<-cbind(V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15) - - - W1<-sum(Column1 %in% "W")/(length(Column1)-spaces1) - W2<-sum(Column2 %in% "W")/(length(Column2)-spaces2) - W3<-sum(Column3 %in% "W")/(length(Column3)-spaces3) - W4<-sum(Column4 %in% "W")/(length(Column4)-spaces4) - W5<-sum(Column5 %in% "W")/(length(Column5)-spaces5) - W6<-sum(Column6 %in% "W")/(length(Column6)-spaces6) - W7<-sum(Column7 %in% "W")/(length(Column7)-spaces7) - W8<-sum(Column8 %in% "W")/(length(Column8)-spaces8) - W9<-sum(Column9 %in% "W")/(length(Column9)-spaces9) - W10<-sum(Column10 %in% "W")/(length(Column10)-spaces10) - W11<-sum(Column11 %in% "W")/(length(Column11)-spaces11) - W12<-sum(Column12 %in% "W")/(length(Column12)-spaces12) - W13<-sum(Column13 %in% "W")/(length(Column13)-spaces13) - W14<-sum(Column14 %in% "W")/(length(Column14)-spaces14) - W15<-sum(Column15 %in% "W")/(length(Column15)-spaces15) - WllWs<-cbind(W1,W2,W3,W4,W5,W6,W7,W8,W9,W10,W11,W12,W13,W14,W15) - - - Y1<-sum(Column1 %in% "Y")/(length(Column1)-spaces1) - Y2<-sum(Column2 %in% "Y")/(length(Column2)-spaces2) - Y3<-sum(Column3 %in% "Y")/(length(Column3)-spaces3) - Y4<-sum(Column4 %in% "Y")/(length(Column4)-spaces4) - Y5<-sum(Column5 %in% "Y")/(length(Column5)-spaces5) - Y6<-sum(Column6 %in% "Y")/(length(Column6)-spaces6) - Y7<-sum(Column7 %in% "Y")/(length(Column7)-spaces7) - Y8<-sum(Column8 %in% "Y")/(length(Column8)-spaces8) - Y9<-sum(Column9 %in% "Y")/(length(Column9)-spaces9) - Y10<-sum(Column10 %in% "Y")/(length(Column10)-spaces10) - Y11<-sum(Column11 %in% "Y")/(length(Column11)-spaces11) - Y12<-sum(Column12 %in% "Y")/(length(Column12)-spaces12) - Y13<-sum(Column13 %in% "Y")/(length(Column13)-spaces13) - Y14<-sum(Column14 %in% "Y")/(length(Column14)-spaces14) - Y15<-sum(Column15 %in% "Y")/(length(Column15)-spaces15) - YllYs<-cbind(Y1,Y2,Y3,Y4,Y5,Y6,Y7,Y8,Y9,Y10,Y11,Y12,Y13,Y14,Y15) -} -#this is substrate percents - -#A C D E F G H I K L N P Q R S T V W Y - -PercentTable<-rbind(AllAs,CllCs,DllDs,EllEs,FllFs,GllGs,HllHs,IllIs,KllKs,LllLs,MllMs,NllNs,PllPs,QllQs,RllRs,SllSs,TllTs,VllVs,WllWs,YllYs) -PercentTable<-PercentTable*100 - -#create the SD table -SDtable<-matrix(data = rep(1,times=(nrow(PercentTable)*ncol(PercentTable))),nrow = nrow(PercentTable),ncol = ncol(PercentTable)) -#for every row, a percertage minus the same mean over the same SD -if(1==1){ - SDtable[1,]<-(PercentTable[1,]-Amean)/Asd - SDtable[2,]<-(PercentTable[2,]-Cmean)/Csd - SDtable[3,]<-(PercentTable[3,]-Dmean)/Dsd - SDtable[4,]<-(PercentTable[4,]-Emean)/Esd - SDtable[5,]<-(PercentTable[5,]-Fmean)/Fsd - SDtable[6,]<-(PercentTable[6,]-Gmean)/Gsd - SDtable[7,]<-(PercentTable[7,]-Hmean)/Hsd - SDtable[8,]<-(PercentTable[8,]-Imean)/Isd - SDtable[9,]<-(PercentTable[9,]-Kmean)/Ksd - SDtable[10,]<-(PercentTable[10,]-Lmean)/Lsd - SDtable[11,]<-(PercentTable[11,]-Mmean)/Msd - SDtable[12,]<-(PercentTable[12,]-Nmean)/Nsd - SDtable[13,]<-(PercentTable[13,]-Pmean)/Psd - SDtable[14,]<-(PercentTable[14,]-Qmean)/Qsd - SDtable[15,]<-(PercentTable[15,]-Rmean)/Rsd - SDtable[16,]<-(PercentTable[16,]-Smean)/Ssd - SDtable[17,]<-(PercentTable[17,]-Tmean)/Tsd - SDtable[18,]<-(PercentTable[18,]-Vmean)/Vsd - SDtable[19,]<-(PercentTable[19,]-Wmean)/Wsd - SDtable[20,]<-(PercentTable[20,]-Ymean)/Ysd -} - - -SetOfAAs<-c("Letter","A","C","D","E","F","G","H","I","K","L","M","N","P","Q","R","S","T","V","W","Y") - - -SumOfSigmaAAs<-c(1:15) - -for (i in 1:15){ - SumOfSigmasValue<-0 - for (j in 1:20){ - value<-0 - if (SDtable[j,i]>2){ - value<-sum(substrates[,i]==SetOfAAs[j]) - } - SumOfSigmasValue<-SumOfSigmasValue+value - } - SumOfSigmaAAs[i]<-SumOfSigmasValue -} - -# AAs1<-length(substrates[,1])-sum(substrates[,1]=="") -# AAs2<-length(substrates[,2])-sum(substrates[,2]=="") -# AAs3<-length(substrates[,3])-sum(substrates[,3]=="") -# AAs4<-length(substrates[,4])-sum(substrates[,4]=="") -# AAs5<-length(substrates[,5])-sum(substrates[,5]=="") -# AAs6<-length(substrates[,6])-sum(substrates[,6]=="") -# AAs7<-length(substrates[,7])-sum(substrates[,7]=="") -# AAs8<-length(substrates[,8])-sum(substrates[,8]=="") -# AAs9<-length(substrates[,9])-sum(substrates[,9]=="") -# -# -# -# #AAsAtPositions<-c(AAs1,AAs2,AAs3,AAs4,AAs5,AAs6,AAs7,AAs8,AAs9) -# AAsAtPositions<-c(length(substrates[,1]),length(substrates[,2]),length(substrates[,3]),length(substrates[,4]), -# length(substrates[,5]),length(substrates[,6]),length(substrates[,7]),length(substrates[,8]), -# length(substrates[,9])) - -SumOfExpectedSigmaAAs<-c(1:15) -for (i in 1:15){ - ExpectedValue<-0 - for (j in 1:20){ - value<-0 - if (SDtable[j,i]>2){ - value<-AllMeans[j] - } - ExpectedValue<-ExpectedValue+value - } - SumOfExpectedSigmaAAs[i]<-ExpectedValue*(length(substrates[,i])-sum(substrates[,i]%in% ""))/100 -} - -SelectivityRow<-SumOfSigmaAAs/SumOfExpectedSigmaAAs -SelectivitySheet<-rbind(SumOfSigmaAAs,SumOfExpectedSigmaAAs,SelectivityRow) - -SetOfAAs<-matrix(data = SetOfAAs,ncol = 1) - -SDtableu<-SDtable -HeaderSD<-c(-7:7) -SDtable<-rbind(HeaderSD,SDtableu) -row.names(SDtable)<-NULL -SDtable<-data.frame(SetOfAAs,SDtable) - -PercentTable<-rbind(HeaderSD,PercentTable) -row.names(PercentTable)<-NULL -PercentTable<-data.frame(SetOfAAs,PercentTable) -numberofY<-as.numeric(SubstrateBackgroundFrequency$Number.of.Y) -numberofY<-numberofY[!is.na(numberofY)] - -numberofPY<-as.numeric(SubstrateBackgroundFrequency$Number.of.pY) -numberofPY<-numberofPY[!is.na(numberofPY)] - -NormalizationScore<-sum(numberofPY)/sum(numberofY) - -# positions<-matrix(data = NA, nrow=20,ncol = 15) -# -# #column1 -# -# for (q in 1:15) { -# sA<-sum(substrates[,i]=="A") -# positions[1,i]<-sA -# sC<-sum(substrates[,i]=="C") -# positions[2,i]<-sC -# sD<-sum(substrates[,i]=="D") -# positions[3,i]<-sD -# sE<-sum(substrates[,i]=="E") -# positions[4,i]<-sE -# sF<-sum(substrates[,i]=="F") -# sG<-sum(substrates[,i]=="G") -# sH<-sum(substrates[,i]=="H") -# sI<-sum(substrates[,i]=="I") -# sK<-sum(substrates[,i]=="K") -# sL<-sum(substrates[,i]=="L") -# sM<-sum(substrates[,i]=="M") -# sN<-sum(substrates[,i]=="N") -# sP<-sum(substrates[,i]=="P") -# sQ<-sum(substrates[,i]=="Q") -# sR<-sum(substrates[,i]=="R") -# sS<-sum(substrates[,i]=="S") -# sT<-sum(substrates[,i]=="T") -# sV<-sum(substrates[,i]=="V") -# sW<-sum(substrates[,i]=="W") -# sY<-sum(substrates[,i]=="Y") -# positions[5,i]<-sF -# positions[6,i]<-sG -# positions[7,i]<-sH -# positions[8,i]<-sI -# positions[9,i]<-sK -# positions[10,i]<-sL -# positions[11,i]<-sM -# positions[12,i]<-sN -# positions[13,i]<-sP -# positions[14,i]<-sQ -# positions[15,i]<-sR -# positions[16,i]<-sS -# positions[17,i]<-sT -# positions[18,i]<-sV -# positions[19,i]<-sW -# positions[20,i]<-sY -# } - -#here I create the positions table which is needed for the endogenous prob matrix and is simply "how many times did an AA show up in this spot? -if (6==6){ - Column1<-substrates[,1] - Column2<-substrates[,2] - Column3<-substrates[,3] - Column4<-substrates[,4] - Column5<-substrates[,5] - Column6<-substrates[,6] - Column7<-substrates[,7] - Column8<-substrates[,8] - Column9<-substrates[,9] - Column10<-substrates[,10] - Column11<-substrates[,11] - Column12<-substrates[,12] - Column13<-substrates[,13] - Column14<-substrates[,14] - Column15<-substrates[,15] - - spaces1<-sum((Column1%in% "")) - spaces2<-sum(Column2%in% "") - spaces3<-sum(Column3%in% "") - spaces4<-sum(Column4%in% "") - spaces5<-sum(Column5%in% "") - spaces6<-sum(Column6%in% "") - spaces7<-sum(Column7%in% "") - spaces8<-sum(Column8%in% "") - spaces9<-sum(Column9%in% "") - spaces10<-sum(Column10%in% "") - spaces11<-sum(Column11%in% "") - spaces12<-sum(Column12%in% "") - spaces13<-sum(Column13%in% "") - spaces14<-sum(Column14%in% "") - spaces15<-sum(Column15%in% "") - - A1<-sum(Column1 %in% "A") - A2<-sum(Column2 %in% "A") - A3<-sum(Column3 %in% "A") - A4<-sum(Column4 %in% "A") - A5<-sum(Column5 %in% "A") - A6<-sum(Column6 %in% "A") - A7<-sum(Column7 %in% "A") - A8<-sum(Column8 %in% "A") - A9<-sum(Column9 %in% "A") - A10<-sum(Column10 %in% "A") - A11<-sum(Column11 %in% "A") - A12<-sum(Column12 %in% "A") - A13<-sum(Column13 %in% "A") - A14<-sum(Column14 %in% "A") - A15<-sum(Column15 %in% "A") - AllAs<-cbind(A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15) - - C1<-sum(Column1 %in% "C") - C2<-sum(Column2 %in% "C") - C3<-sum(Column3 %in% "C") - C4<-sum(Column4 %in% "C") - C5<-sum(Column5 %in% "C") - C6<-sum(Column6 %in% "C") - C7<-sum(Column7 %in% "C") - C8<-sum(Column8 %in% "C") - C9<-sum(Column9 %in% "C") - C10<-sum(Column10 %in% "C") - C11<-sum(Column11 %in% "C") - C12<-sum(Column12 %in% "C") - C13<-sum(Column13 %in% "C") - C14<-sum(Column14 %in% "C") - C15<-sum(Column15 %in% "C") - CllCs<-cbind(C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15) - - D1<-sum(Column1 %in% "D") - D2<-sum(Column2 %in% "D") - D3<-sum(Column3 %in% "D") - D4<-sum(Column4 %in% "D") - D5<-sum(Column5 %in% "D") - D6<-sum(Column6 %in% "D") - D7<-sum(Column7 %in% "D") - D8<-sum(Column8 %in% "D") - D9<-sum(Column9 %in% "D") - D10<-sum(Column10 %in% "D") - D11<-sum(Column11 %in% "D") - D12<-sum(Column12 %in% "D") - D13<-sum(Column13 %in% "D") - D14<-sum(Column14 %in% "D") - D15<-sum(Column15 %in% "D") - DllDs<-cbind(D1,D2,D3,D4,D5,D6,D7,D8,D9,D10,D11,D12,D13,D14,D15) - - E1<-sum(Column1 %in% "E") - E2<-sum(Column2 %in% "E") - E3<-sum(Column3 %in% "E") - E4<-sum(Column4 %in% "E") - E5<-sum(Column5 %in% "E") - E6<-sum(Column6 %in% "E") - E7<-sum(Column7 %in% "E") - E8<-sum(Column8 %in% "E") - E9<-sum(Column9 %in% "E") - E10<-sum(Column10 %in% "E") - E11<-sum(Column11 %in% "E") - E12<-sum(Column12 %in% "E") - E13<-sum(Column13 %in% "E") - E14<-sum(Column14 %in% "E") - E15<-sum(Column15 %in% "E") - EllEs<-cbind(E1,E2,E3,E4,E5,E6,E7,E8,E9,E10,E11,E12,E13,E14,E15) - - F1<-sum(Column1 %in% "F") - F2<-sum(Column2 %in% "F") - F3<-sum(Column3 %in% "F") - F4<-sum(Column4 %in% "F") - F5<-sum(Column5 %in% "F") - F6<-sum(Column6 %in% "F") - F7<-sum(Column7 %in% "F") - F8<-sum(Column8 %in% "F") - F9<-sum(Column9 %in% "F") - F10<-sum(Column10 %in% "F") - F11<-sum(Column11 %in% "F") - F12<-sum(Column12 %in% "F") - F13<-sum(Column13 %in% "F") - F14<-sum(Column14 %in% "F") - F15<-sum(Column15 %in% "F") - FllFs<-cbind(F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15) - - G1<-sum(Column1 %in% "G") - G2<-sum(Column2 %in% "G") - G3<-sum(Column3 %in% "G") - G4<-sum(Column4 %in% "G") - G5<-sum(Column5 %in% "G") - G6<-sum(Column6 %in% "G") - G7<-sum(Column7 %in% "G") - G8<-sum(Column8 %in% "G") - G9<-sum(Column9 %in% "G") - G10<-sum(Column10 %in% "G") - G11<-sum(Column11 %in% "G") - G12<-sum(Column12 %in% "G") - G13<-sum(Column13 %in% "G") - G14<-sum(Column14 %in% "G") - G15<-sum(Column15 %in% "G") - GllGs<-cbind(G1,G2,G3,G4,G5,G6,G7,G8,G9,G10,G11,G12,G13,G14,G15) - - H1<-sum(Column1 %in% "H") - H2<-sum(Column2 %in% "H") - H3<-sum(Column3 %in% "H") - H4<-sum(Column4 %in% "H") - H5<-sum(Column5 %in% "H") - H6<-sum(Column6 %in% "H") - H7<-sum(Column7 %in% "H") - H8<-sum(Column8 %in% "H") - H9<-sum(Column9 %in% "H") - H10<-sum(Column10 %in% "H") - H11<-sum(Column11 %in% "H") - H12<-sum(Column12 %in% "H") - H13<-sum(Column13 %in% "H") - H14<-sum(Column14 %in% "H") - H15<-sum(Column15 %in% "H") - HllHs<-cbind(H1,H2,H3,H4,H5,H6,H7,H8,H9,H10,H11,H12,H13,H14,H15) - - I1<-sum(Column1 %in% "I") - I2<-sum(Column2 %in% "I") - I3<-sum(Column3 %in% "I") - I4<-sum(Column4 %in% "I") - I5<-sum(Column5 %in% "I") - I6<-sum(Column6 %in% "I") - I7<-sum(Column7 %in% "I") - I8<-sum(Column8 %in% "I") - I9<-sum(Column9 %in% "I") - I10<-sum(Column10 %in% "I") - I11<-sum(Column11 %in% "I") - I12<-sum(Column12 %in% "I") - I13<-sum(Column13 %in% "I") - I14<-sum(Column14 %in% "I") - I15<-sum(Column15 %in% "I") - IllIs<-cbind(I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,I11,I12,I13,I14,I15) - - K1<-sum(Column1 %in% "K") - K2<-sum(Column2 %in% "K") - K3<-sum(Column3 %in% "K") - K4<-sum(Column4 %in% "K") - K5<-sum(Column5 %in% "K") - K6<-sum(Column6 %in% "K") - K7<-sum(Column7 %in% "K") - K8<-sum(Column8 %in% "K") - K9<-sum(Column9 %in% "K") - K10<-sum(Column10 %in% "K") - K11<-sum(Column11 %in% "K") - K12<-sum(Column12 %in% "K") - K13<-sum(Column13 %in% "K") - K14<-sum(Column14 %in% "K") - K15<-sum(Column15 %in% "K") - KllKs<-cbind(K1,K2,K3,K4,K5,K6,K7,K8,K9,K10,K11,K12,K13,K14,K15) - - L1<-sum(Column1 %in% "L") - L2<-sum(Column2 %in% "L") - L3<-sum(Column3 %in% "L") - L4<-sum(Column4 %in% "L") - L5<-sum(Column5 %in% "L") - L6<-sum(Column6 %in% "L") - L7<-sum(Column7 %in% "L") - L8<-sum(Column8 %in% "L") - L9<-sum(Column9 %in% "L") - L10<-sum(Column10 %in% "L") - L11<-sum(Column11 %in% "L") - L12<-sum(Column12 %in% "L") - L13<-sum(Column13 %in% "L") - L14<-sum(Column14 %in% "L") - L15<-sum(Column15 %in% "L") - LllLs<-cbind(L1,L2,L3,L4,L5,L6,L7,L8,L9,L10,L11,L12,L13,L14,L15) - - M1<-sum(Column1 %in% "M") - M2<-sum(Column2 %in% "M") - M3<-sum(Column3 %in% "M") - M4<-sum(Column4 %in% "M") - M5<-sum(Column5 %in% "M") - M6<-sum(Column6 %in% "M") - M7<-sum(Column7 %in% "M") - M8<-sum(Column8 %in% "M") - M9<-sum(Column9 %in% "M") - M10<-sum(Column10 %in% "M") - M11<-sum(Column11 %in% "M") - M12<-sum(Column12 %in% "M") - M13<-sum(Column13 %in% "M") - M14<-sum(Column14 %in% "M") - M15<-sum(Column15 %in% "M") - MllMs<-cbind(M1,M2,M3,M4,M5,M6,M7,M8,M9,M10,M11,M12,M13,M14,M15) - - N1<-sum(Column1 %in% "N") - N2<-sum(Column2 %in% "N") - N3<-sum(Column3 %in% "N") - N4<-sum(Column4 %in% "N") - N5<-sum(Column5 %in% "N") - N6<-sum(Column6 %in% "N") - N7<-sum(Column7 %in% "N") - N8<-sum(Column8 %in% "N") - N9<-sum(Column9 %in% "N") - N10<-sum(Column10 %in% "N") - N11<-sum(Column11 %in% "N") - N12<-sum(Column12 %in% "N") - N13<-sum(Column13 %in% "N") - N14<-sum(Column14 %in% "N") - N15<-sum(Column15 %in% "N") - NllNs<-cbind(N1,N2,N3,N4,N5,N6,N7,N8,N9,N10,N11,N12,N13,N14,N15) - - P1<-sum(Column1 %in% "P") - P2<-sum(Column2 %in% "P") - P3<-sum(Column3 %in% "P") - P4<-sum(Column4 %in% "P") - P5<-sum(Column5 %in% "P") - P6<-sum(Column6 %in% "P") - P7<-sum(Column7 %in% "P") - P8<-sum(Column8 %in% "P") - P9<-sum(Column9 %in% "P") - P10<-sum(Column10 %in% "P") - P11<-sum(Column11 %in% "P") - P12<-sum(Column12 %in% "P") - P13<-sum(Column13 %in% "P") - P14<-sum(Column14 %in% "P") - P15<-sum(Column15 %in% "P") - PllPs<-cbind(P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11,P12,P13,P14,P15) - - Q1<-sum(Column1 %in% "Q") - Q2<-sum(Column2 %in% "Q") - Q3<-sum(Column3 %in% "Q") - Q4<-sum(Column4 %in% "Q") - Q5<-sum(Column5 %in% "Q") - Q6<-sum(Column6 %in% "Q") - Q7<-sum(Column7 %in% "Q") - Q8<-sum(Column8 %in% "Q") - Q9<-sum(Column9 %in% "Q") - Q10<-sum(Column10 %in% "Q") - Q11<-sum(Column11 %in% "Q") - Q12<-sum(Column12 %in% "Q") - Q13<-sum(Column13 %in% "Q") - Q14<-sum(Column14 %in% "Q") - Q15<-sum(Column15 %in% "Q") - QllQs<-cbind(Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,Q11,Q12,Q13,Q14,Q15) - - R1<-sum(Column1 %in% "R") - R2<-sum(Column2 %in% "R") - R3<-sum(Column3 %in% "R") - R4<-sum(Column4 %in% "R") - R5<-sum(Column5 %in% "R") - R6<-sum(Column6 %in% "R") - R7<-sum(Column7 %in% "R") - R8<-sum(Column8 %in% "R") - R9<-sum(Column9 %in% "R") - R10<-sum(Column10 %in% "R") - R11<-sum(Column11 %in% "R") - R12<-sum(Column12 %in% "R") - R13<-sum(Column13 %in% "R") - R14<-sum(Column14 %in% "R") - R15<-sum(Column15 %in% "R") - RllRs<-cbind(R1,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,R13,R14,R15) - - S1<-sum(Column1 %in% "S") - S2<-sum(Column2 %in% "S") - S3<-sum(Column3 %in% "S") - S4<-sum(Column4 %in% "S") - S5<-sum(Column5 %in% "S") - S6<-sum(Column6 %in% "S") - S7<-sum(Column7 %in% "S") - S8<-sum(Column8 %in% "S") - S9<-sum(Column9 %in% "S") - S10<-sum(Column10 %in% "S") - S11<-sum(Column11 %in% "S") - S12<-sum(Column12 %in% "S") - S13<-sum(Column13 %in% "S") - S14<-sum(Column14 %in% "S") - S15<-sum(Column15 %in% "S") - SllSs<-cbind(S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,S14,S15) - - T1<-sum(Column1 %in% "T") - T2<-sum(Column2 %in% "T") - T3<-sum(Column3 %in% "T") - T4<-sum(Column4 %in% "T") - T5<-sum(Column5 %in% "T") - T6<-sum(Column6 %in% "T") - T7<-sum(Column7 %in% "T") - T8<-sum(Column8 %in% "T") - T9<-sum(Column9 %in% "T") - T10<-sum(Column10 %in% "T") - T11<-sum(Column11 %in% "T") - T12<-sum(Column12 %in% "T") - T13<-sum(Column13 %in% "T") - T14<-sum(Column14 %in% "T") - T15<-sum(Column15 %in% "T") - TllTs<-cbind(T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15) - - V1<-sum(Column1 %in% "V") - V2<-sum(Column2 %in% "V") - V3<-sum(Column3 %in% "V") - V4<-sum(Column4 %in% "V") - V5<-sum(Column5 %in% "V") - V6<-sum(Column6 %in% "V") - V7<-sum(Column7 %in% "V") - V8<-sum(Column8 %in% "V") - V9<-sum(Column9 %in% "V") - V10<-sum(Column10 %in% "V") - V11<-sum(Column11 %in% "V") - V12<-sum(Column12 %in% "V") - V13<-sum(Column13 %in% "V") - V14<-sum(Column14 %in% "V") - V15<-sum(Column15 %in% "V") - VllVs<-cbind(V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15) - - W1<-sum(Column1 %in% "W") - W2<-sum(Column2 %in% "W") - W3<-sum(Column3 %in% "W") - W4<-sum(Column4 %in% "W") - W5<-sum(Column5 %in% "W") - W6<-sum(Column6 %in% "W") - W7<-sum(Column7 %in% "W") - W8<-sum(Column8 %in% "W") - W9<-sum(Column9 %in% "W") - W10<-sum(Column10 %in% "W") - W11<-sum(Column11 %in% "W") - W12<-sum(Column12 %in% "W") - W13<-sum(Column13 %in% "W") - W14<-sum(Column14 %in% "W") - W15<-sum(Column15 %in% "W") - WllWs<-cbind(W1,W2,W3,W4,W5,W6,W7,W8,W9,W10,W11,W12,W13,W14,W15) - - Y1<-sum(Column1 %in% "Y") - Y2<-sum(Column2 %in% "Y") - Y3<-sum(Column3 %in% "Y") - Y4<-sum(Column4 %in% "Y") - Y5<-sum(Column5 %in% "Y") - Y6<-sum(Column6 %in% "Y") - Y7<-sum(Column7 %in% "Y") - Y8<-sum(Column8 %in% "Y") - Y9<-sum(Column9 %in% "Y") - Y10<-sum(Column10 %in% "Y") - Y11<-sum(Column11 %in% "Y") - Y12<-sum(Column12 %in% "Y") - Y13<-sum(Column13 %in% "Y") - Y14<-sum(Column14 %in% "Y") - Y15<-sum(Column15 %in% "Y") - YllYs<-cbind(Y1,Y2,Y3,Y4,Y5,Y6,Y7,Y8,Y9,Y10,Y11,Y12,Y13,Y14,Y15) - PositionTable<-rbind(AllAs,CllCs,DllDs,EllEs,FllFs,GllGs,HllHs,IllIs,KllKs,LllLs,MllMs,NllNs,PllPs,QllQs,RllRs,SllSs,TllTs,VllVs,WllWs,YllYs) -} -#endogenous prob matrix is AA position over subbackfreqmean -dim(PositionTable) -EPMtable<-PositionTable -# EPMtable[1,]<-(PositionTable[1,]/(PositionTable[1,]*.01*Amean)) -# EPMtable[2,]<-(PositionTable[2,]/(PositionTable[2,]*.01*Cmean)) -# EPMtable[3,]<-(PositionTable[3,]/(PositionTable[3,]*.01*Dmean)) -# EPMtable[4,]<-(PositionTable[4,]/(PositionTable[4,]*.01*Emean)) -# EPMtable[5,]<-(PositionTable[5,]/(PositionTable[5,]*.01*Fmean)) -# EPMtable[6,]<-(PositionTable[6,]/(PositionTable[6,]*.01*Gmean)) -# EPMtable[7,]<-(PositionTable[7,]/(PositionTable[7,]*.01*Hmean)) -# EPMtable[8,]<-(PositionTable[8,]/(PositionTable[8,]*.01*Imean)) -# EPMtable[9,]<-(PositionTable[9,]/(PositionTable[9,]*.01*Kmean)) -# EPMtable[10,]<-(PositionTable[10,]/(PositionTable[10,]*.01*Lmean)) -# EPMtable[11,]<-(PositionTable[11,]/(PositionTable[11,]*.01*Mmean)) -# EPMtable[12,]<-(PositionTable[12,]/(PositionTable[12,]*.01*Nmean)) -# EPMtable[13,]<-(PositionTable[13,]/(PositionTable[13,]*.01*Pmean)) -# EPMtable[14,]<-(PositionTable[14,]/(PositionTable[14,]*.01*Qmean)) -# EPMtable[15,]<-(PositionTable[15,]/(PositionTable[15,]*.01*Rmean)) -# EPMtable[16,]<-(PositionTable[16,]/(PositionTable[16,]*.01*Smean)) -# EPMtable[17,]<-(PositionTable[17,]/(PositionTable[17,]*.01*Tmean)) -# EPMtable[18,]<-(PositionTable[18,]/(PositionTable[18,]*.01*Vmean)) -# EPMtable[19,]<-(PositionTable[19,]/(PositionTable[19,]*.01*Wmean)) -# EPMtable[20,]<-(PositionTable[20,]/(PositionTable[20,]*.01*Ymean)) - -columns<-c(length(Column1)-sum(Column1==""), - length(Column2)-sum(Column2==""), - length(Column3)-sum(Column3==""), - length(Column4)-sum(Column4==""), - length(Column5)-sum(Column5==""), - length(Column6)-sum(Column6==""), - length(Column7)-sum(Column7==""), - length(Column8)-sum(Column8==""), - length(Column9)-sum(Column9==""), - length(Column10)-sum(Column10==""), - length(Column11)-sum(Column11==""), - length(Column12)-sum(Column12==""), - length(Column13)-sum(Column13==""), - length(Column14)-sum(Column14==""), - length(Column15)-sum(Column15=="")) - -for (z in 1:15) { - for (y in 1:20) { - if (PositionTable[y,z]>0){ - EPMtable[y,z]<-PositionTable[y,z]/((columns[z]*.01*AllMeans[y])) - } - if (PositionTable[y,z]==0){ - EPMtable[y,z]<-(1/columns[z])/((columns[z]*.01*AllMeans[y])) - } - } -} -#here I created the endogenous probability matrix -#now all I need to do is make the program automatically determine which SDs are >2, and then make it perform screener and sorter on those SDs - - - - - -# write.xlsx(SDtable,file=FILENAME, sheetName = "Standard Deviation Table",col.names = FALSE,row.names = FALSE,append = TRUE) -# write.xlsx(PercentTable,file = FILENAME,sheetName = "Percent Table",col.names = FALSE,row.names = FALSE,append = TRUE) -# write.xlsx(SelectivitySheet,file = FILENAME,sheetName = "Site Selectivity",col.names = FALSE,row.names = FALSE,append = TRUE) -# write.xlsx(EPMtable,file=FILENAME,sheetName = "Endogenous Probability Matrix",col.names = FALSE,row.names = FALSE,append = TRUE) -# write.xlsx(NormalizationScore,file = FILENAME,sheetName = "Normalization Score",col.names = FALSE,row.names = FALSE,append = TRUE) - -NormalizationScore<-c("Normalization Score",NormalizationScore) - -write.table(x=c("SD Table"),file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE) -write.table(SDtable,file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE) -write.table(x=c("Percent Table"),file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE) -write.table(PercentTable,file=FILENAME, append = TRUE,sep=",",row.names = FALSE, col.names = FALSE) - -EPMtableu<-EPMtable -HeaderSD<-c(-7:7) -EPMtableu<-rbind(HeaderSD,EPMtableu) -row.names(EPMtableu)<-NULL -EPMtableu<-data.frame(SetOfAAs,EPMtableu) - -write.table("Site Selectivity Matrix", file = FILENAME2, append = TRUE, sep = ",", row.names = FALSE, col.names = FALSE) -SelectivityHeader=matrix(data = c("Position",-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7),nrow = 1) -head<-matrix(data=rep(" ",times=16),nrow = 1) -SelectivityHeader<-rbind(head,SelectivityHeader) - -write.table(SelectivityHeader, file = FILENAME2, append = TRUE, sep = ",", row.names = FALSE, col.names = FALSE) -#colnames(SelectivitySheet)<-c("-7","-6","-5","-4","-3","-2","-1","0","1","2","3","4","5","6","7") -write.table(SelectivitySheet,file = FILENAME2, append = TRUE,sep = ",",row.names = TRUE, col.names = FALSE) -write.table(x=c("Endogenous Probability Matrix"),file=FILENAME2,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE) -write.table(EPMtableu,file = FILENAME2, append = TRUE,sep = ",",row.names = FALSE, col.names = FALSE) -write.table(NormalizationScore, file = FILENAME2, append = TRUE,sep = ",",row.names = FALSE, col.names = FALSE) -