changeset 10:de59605e960a draft

Uploaded
author jfb
date Thu, 08 Feb 2018 14:51:06 -0500
parents f80306fc5d69
children a36f9cce16a3
files Kinatest-R_part1.R
diffstat 1 files changed, 1114 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Kinatest-R_part1.R	Thu Feb 08 14:51:06 2018 -0500
@@ -0,0 +1,1114 @@
+
+ImportedSubstrateList<- read.csv("input1", stringsAsFactors=FALSE)
+NegativeSubstrateList<- read.csv("input2", stringsAsFactors=FALSE)
+SubstrateBackgroundFrequency<- read.csv("input3", stringsAsFactors=FALSE)
+
+ScreenerFilename<-"screener"
+
+
+
+FILENAME<-"output1.csv"
+FILENAME2<-"output2.csv"
+FILENAME3<-"output3.csv"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+OutputMatrix<-"KinaseMatrix.csv"
+CharacterizationTable<-"CharacterizationTableForThisKinase.csv"
+SDtable<-"SDtableforthisKinase"
+SiteSelectivityTable<-"SiteSelectivityForThisKinase"
+
+
+
+substrates<-matrix(rep("A",times=((nrow(ImportedSubstrateList)-1)*15)),ncol = 15)
+#SeqsToBeScored<-"asdasd"
+  
+for (i in 2:nrow(ImportedSubstrateList))
+{
+  substratemotif<-ImportedSubstrateList[i,4:18]
+  substratemotif[8]<-"Y"
+  #substratemotif<-paste(substratemotif,sep = "",collapse = "")
+  j=i-1
+  substratemotif<-unlist(substratemotif)
+  substrates[j,1:15]<-substratemotif
+}
+
+# SpacesToOs<-c(""="O",)
+# substrates<-SpacesToOs[substrates]
+
+SubstrateBackgroundFrequency[nrow(SubstrateBackgroundFrequency),2]
+
+if(2==2){
+Amean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),2]), na.rm=TRUE)
+Cmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),3]), na.rm=TRUE)
+Dmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),4]), na.rm=TRUE)
+Emean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),5]), na.rm=TRUE)
+Fmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),6]), na.rm=TRUE)
+Gmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),7]), na.rm=TRUE)
+Hmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),8]), na.rm=TRUE)
+Imean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),9]), na.rm=TRUE)
+Kmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),10]), na.rm=TRUE)
+Lmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),11]), na.rm=TRUE)
+Mmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),12]), na.rm=TRUE)
+Nmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),13]), na.rm=TRUE)
+Pmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),14]), na.rm=TRUE)
+Qmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),15]), na.rm=TRUE)
+Rmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),16]), na.rm=TRUE)
+Smean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),17]), na.rm=TRUE)
+Tmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),18]), na.rm=TRUE)
+Vmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),19]), na.rm=TRUE)
+Wmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),20]), na.rm=TRUE)
+Ymean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),21]), na.rm=TRUE)
+
+AllMeans<-c(Amean,Cmean,Dmean,Emean,Fmean,Gmean,Hmean,Imean,Kmean,Lmean,Mmean,Nmean,Pmean,Qmean,Rmean,Smean,Tmean,Vmean,Wmean,Ymean)
+
+Asd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),2]), na.rm=TRUE)
+Csd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),3]), na.rm=TRUE)
+Dsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),4]), na.rm=TRUE)
+Esd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),5]), na.rm=TRUE)
+Fsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),6]), na.rm=TRUE)
+Gsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),7]), na.rm=TRUE)
+Hsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),8]), na.rm=TRUE)
+Isd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),9]), na.rm=TRUE)
+Ksd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),10]), na.rm=TRUE)
+Lsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),11]), na.rm=TRUE)
+Msd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),12]), na.rm=TRUE)
+Nsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),13]), na.rm=TRUE)
+Psd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),14]), na.rm=TRUE)
+Qsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),15]), na.rm=TRUE)
+Rsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),16]), na.rm=TRUE)
+Ssd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),17]), na.rm=TRUE)
+Tsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),18]), na.rm=TRUE)
+Vsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),19]), na.rm=TRUE)
+Wsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),20]), na.rm=TRUE)
+Ysd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),21]), na.rm=TRUE)
+}
+AllSDs<-c(Asd,Csd,Dsd,Esd,Fsd,Gsd,Hsd,Isd,Ksd,Lsd,Msd,Nsd,Psd,Qsd,Rsd,Ssd,Tsd,Vsd,Wsd,Ysd)
+#this is subbackfreq SDs
+
+SBF_statisticalvalues<-cbind(AllMeans,AllSDs)
+
+#create the percent table
+if (1==1){
+  Column1<-substrates[,1]
+  Column2<-substrates[,2]
+  Column3<-substrates[,3]
+  Column4<-substrates[,4]
+  Column5<-substrates[,5]
+  Column6<-substrates[,6]
+  Column7<-substrates[,7]
+  Column8<-substrates[,8]
+  Column9<-substrates[,9]
+  Column10<-substrates[,10]
+  Column11<-substrates[,11]
+  Column12<-substrates[,12]
+  Column13<-substrates[,13]
+  Column14<-substrates[,14]
+  Column15<-substrates[,15]
+  
+  spaces1<-sum((Column1%in% ""))
+  spaces2<-sum(Column2%in% "")
+  spaces3<-sum(Column3%in% "")
+  spaces4<-sum(Column4%in% "")
+  spaces5<-sum(Column5%in% "")
+  spaces6<-sum(Column6%in% "")
+  spaces7<-sum(Column7%in% "")
+  spaces8<-sum(Column8%in% "")
+  spaces9<-sum(Column9%in% "")
+  spaces10<-sum(Column10%in% "")
+  spaces11<-sum(Column11%in% "")
+  spaces12<-sum(Column12%in% "")
+  spaces13<-sum(Column13%in% "")
+  spaces14<-sum(Column14%in% "")
+  spaces15<-sum(Column15%in% "")
+  
+  A1<-sum(Column1 %in% "A")/(length(Column1)-spaces1)
+  A2<-sum(Column2 %in% "A")/(length(Column2)-spaces2)
+  A3<-sum(Column3 %in% "A")/(length(Column3)-spaces3)
+  A4<-sum(Column4 %in% "A")/(length(Column4)-spaces4)
+  A5<-sum(Column5 %in% "A")/(length(Column5)-spaces5)
+  A6<-sum(Column6 %in% "A")/(length(Column6)-spaces6)
+  A7<-sum(Column7 %in% "A")/(length(Column7)-spaces7)
+  A8<-sum(Column8 %in% "A")/(length(Column8)-spaces8)
+  A9<-sum(Column9 %in% "A")/(length(Column9)-spaces9)
+  A10<-sum(Column10 %in% "A")/(length(Column10)-spaces10)
+  A11<-sum(Column11 %in% "A")/(length(Column11)-spaces11)
+  A12<-sum(Column12 %in% "A")/(length(Column12)-spaces12)
+  A13<-sum(Column13 %in% "A")/(length(Column13)-spaces13)
+  A14<-sum(Column14 %in% "A")/(length(Column14)-spaces14)
+  A15<-sum(Column15 %in% "A")/(length(Column15)-spaces15)
+  AllAs<-cbind(A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15)
+  
+  C1<-sum(Column1 %in% "C")/(length(Column1)-spaces1)
+  C2<-sum(Column2 %in% "C")/(length(Column2)-spaces2)
+  C3<-sum(Column3 %in% "C")/(length(Column3)-spaces3)
+  C4<-sum(Column4 %in% "C")/(length(Column4)-spaces4)
+  C5<-sum(Column5 %in% "C")/(length(Column5)-spaces5)
+  C6<-sum(Column6 %in% "C")/(length(Column6)-spaces6)
+  C7<-sum(Column7 %in% "C")/(length(Column7)-spaces7)
+  C8<-sum(Column8 %in% "C")/(length(Column8)-spaces8)
+  C9<-sum(Column9 %in% "C")/(length(Column9)-spaces9)
+  C10<-sum(Column10 %in% "C")/(length(Column10)-spaces10)
+  C11<-sum(Column11 %in% "C")/(length(Column11)-spaces11)
+  C12<-sum(Column12 %in% "C")/(length(Column12)-spaces12)
+  C13<-sum(Column13 %in% "C")/(length(Column13)-spaces13)
+  C14<-sum(Column14 %in% "C")/(length(Column14)-spaces14)
+  C15<-sum(Column15 %in% "C")/(length(Column15)-spaces15)
+  CllCs<-cbind(C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15)
+  
+  D1<-sum(Column1 %in% "D")/(length(Column1)-spaces1)
+  D2<-sum(Column2 %in% "D")/(length(Column2)-spaces2)
+  D3<-sum(Column3 %in% "D")/(length(Column3)-spaces3)
+  D4<-sum(Column4 %in% "D")/(length(Column4)-spaces4)
+  D5<-sum(Column5 %in% "D")/(length(Column5)-spaces5)
+  D6<-sum(Column6 %in% "D")/(length(Column6)-spaces6)
+  D7<-sum(Column7 %in% "D")/(length(Column7)-spaces7)
+  D8<-sum(Column8 %in% "D")/(length(Column8)-spaces8)
+  D9<-sum(Column9 %in% "D")/(length(Column9)-spaces9)
+  D10<-sum(Column10 %in% "D")/(length(Column10)-spaces10)
+  D11<-sum(Column11 %in% "D")/(length(Column11)-spaces11)
+  D12<-sum(Column12 %in% "D")/(length(Column12)-spaces12)
+  D13<-sum(Column13 %in% "D")/(length(Column13)-spaces13)
+  D14<-sum(Column14 %in% "D")/(length(Column14)-spaces14)
+  D15<-sum(Column15 %in% "D")/(length(Column15)-spaces15)
+  DllDs<-cbind(D1,D2,D3,D4,D5,D6,D7,D8,D9,D10,D11,D12,D13,D14,D15)
+  
+  E1<-sum(Column1 %in% "E")/(length(Column1)-spaces1)
+  E2<-sum(Column2 %in% "E")/(length(Column2)-spaces2)
+  E3<-sum(Column3 %in% "E")/(length(Column3)-spaces3)
+  E4<-sum(Column4 %in% "E")/(length(Column4)-spaces4)
+  E5<-sum(Column5 %in% "E")/(length(Column5)-spaces5)
+  E6<-sum(Column6 %in% "E")/(length(Column6)-spaces6)
+  E7<-sum(Column7 %in% "E")/(length(Column7)-spaces7)
+  E8<-sum(Column8 %in% "E")/(length(Column8)-spaces8)
+  E9<-sum(Column9 %in% "E")/(length(Column9)-spaces9)
+  E10<-sum(Column10 %in% "E")/(length(Column10)-spaces10)
+  E11<-sum(Column11 %in% "E")/(length(Column11)-spaces11)
+  E12<-sum(Column12 %in% "E")/(length(Column12)-spaces12)
+  E13<-sum(Column13 %in% "E")/(length(Column13)-spaces13)
+  E14<-sum(Column14 %in% "E")/(length(Column14)-spaces14)
+  E15<-sum(Column15 %in% "E")/(length(Column15)-spaces15)
+  EllEs<-cbind(E1,E2,E3,E4,E5,E6,E7,E8,E9,E10,E11,E12,E13,E14,E15)
+  
+  
+  F1<-sum(Column1 %in% "F")/(length(Column1)-spaces1)
+  F2<-sum(Column2 %in% "F")/(length(Column2)-spaces2)
+  F3<-sum(Column3 %in% "F")/(length(Column3)-spaces3)
+  F4<-sum(Column4 %in% "F")/(length(Column4)-spaces4)
+  F5<-sum(Column5 %in% "F")/(length(Column5)-spaces5)
+  F6<-sum(Column6 %in% "F")/(length(Column6)-spaces6)
+  F7<-sum(Column7 %in% "F")/(length(Column7)-spaces7)
+  F8<-sum(Column8 %in% "F")/(length(Column8)-spaces8)
+  F9<-sum(Column9 %in% "F")/(length(Column9)-spaces9)
+  F10<-sum(Column10 %in% "F")/(length(Column10)-spaces10)
+  F11<-sum(Column11 %in% "F")/(length(Column11)-spaces11)
+  F12<-sum(Column12 %in% "F")/(length(Column12)-spaces12)
+  F13<-sum(Column13 %in% "F")/(length(Column13)-spaces13)
+  F14<-sum(Column14 %in% "F")/(length(Column14)-spaces14)
+  F15<-sum(Column15 %in% "F")/(length(Column15)-spaces15)
+  FllFs<-cbind(F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15)
+  
+  
+  G1<-sum(Column1 %in% "G")/(length(Column1)-spaces1)
+  G2<-sum(Column2 %in% "G")/(length(Column2)-spaces2)
+  G3<-sum(Column3 %in% "G")/(length(Column3)-spaces3)
+  G4<-sum(Column4 %in% "G")/(length(Column4)-spaces4)
+  G5<-sum(Column5 %in% "G")/(length(Column5)-spaces5)
+  G6<-sum(Column6 %in% "G")/(length(Column6)-spaces6)
+  G7<-sum(Column7 %in% "G")/(length(Column7)-spaces7)
+  G8<-sum(Column8 %in% "G")/(length(Column8)-spaces8)
+  G9<-sum(Column9 %in% "G")/(length(Column9)-spaces9)
+  G10<-sum(Column10 %in% "G")/(length(Column10)-spaces10)
+  G11<-sum(Column11 %in% "G")/(length(Column11)-spaces11)
+  G12<-sum(Column12 %in% "G")/(length(Column12)-spaces12)
+  G13<-sum(Column13 %in% "G")/(length(Column13)-spaces13)
+  G14<-sum(Column14 %in% "G")/(length(Column14)-spaces14)
+  G15<-sum(Column15 %in% "G")/(length(Column15)-spaces15)
+  GllGs<-cbind(G1,G2,G3,G4,G5,G6,G7,G8,G9,G10,G11,G12,G13,G14,G15)
+  
+  
+  H1<-sum(Column1 %in% "H")/(length(Column1)-spaces1)
+  H2<-sum(Column2 %in% "H")/(length(Column2)-spaces2)
+  H3<-sum(Column3 %in% "H")/(length(Column3)-spaces3)
+  H4<-sum(Column4 %in% "H")/(length(Column4)-spaces4)
+  H5<-sum(Column5 %in% "H")/(length(Column5)-spaces5)
+  H6<-sum(Column6 %in% "H")/(length(Column6)-spaces6)
+  H7<-sum(Column7 %in% "H")/(length(Column7)-spaces7)
+  H8<-sum(Column8 %in% "H")/(length(Column8)-spaces8)
+  H9<-sum(Column9 %in% "H")/(length(Column9)-spaces9)
+  H10<-sum(Column10 %in% "H")/(length(Column10)-spaces10)
+  H11<-sum(Column11 %in% "H")/(length(Column11)-spaces11)
+  H12<-sum(Column12 %in% "H")/(length(Column12)-spaces12)
+  H13<-sum(Column13 %in% "H")/(length(Column13)-spaces13)
+  H14<-sum(Column14 %in% "H")/(length(Column14)-spaces14)
+  H15<-sum(Column15 %in% "H")/(length(Column15)-spaces15)
+  HllHs<-cbind(H1,H2,H3,H4,H5,H6,H7,H8,H9,H10,H11,H12,H13,H14,H15)
+  
+  
+  I1<-sum(Column1 %in% "I")/(length(Column1)-spaces1)
+  I2<-sum(Column2 %in% "I")/(length(Column2)-spaces2)
+  I3<-sum(Column3 %in% "I")/(length(Column3)-spaces3)
+  I4<-sum(Column4 %in% "I")/(length(Column4)-spaces4)
+  I5<-sum(Column5 %in% "I")/(length(Column5)-spaces5)
+  I6<-sum(Column6 %in% "I")/(length(Column6)-spaces6)
+  I7<-sum(Column7 %in% "I")/(length(Column7)-spaces7)
+  I8<-sum(Column8 %in% "I")/(length(Column8)-spaces8)
+  I9<-sum(Column9 %in% "I")/(length(Column9)-spaces9)
+  I10<-sum(Column10 %in% "I")/(length(Column10)-spaces10)
+  I11<-sum(Column11 %in% "I")/(length(Column11)-spaces11)
+  I12<-sum(Column12 %in% "I")/(length(Column12)-spaces12)
+  I13<-sum(Column13 %in% "I")/(length(Column13)-spaces13)
+  I14<-sum(Column14 %in% "I")/(length(Column14)-spaces14)
+  I15<-sum(Column15 %in% "I")/(length(Column15)-spaces15)
+  IllIs<-cbind(I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,I11,I12,I13,I14,I15)
+  
+  
+  K1<-sum(Column1 %in% "K")/(length(Column1)-spaces1)
+  K2<-sum(Column2 %in% "K")/(length(Column2)-spaces2)
+  K3<-sum(Column3 %in% "K")/(length(Column3)-spaces3)
+  K4<-sum(Column4 %in% "K")/(length(Column4)-spaces4)
+  K5<-sum(Column5 %in% "K")/(length(Column5)-spaces5)
+  K6<-sum(Column6 %in% "K")/(length(Column6)-spaces6)
+  K7<-sum(Column7 %in% "K")/(length(Column7)-spaces7)
+  K8<-sum(Column8 %in% "K")/(length(Column8)-spaces8)
+  K9<-sum(Column9 %in% "K")/(length(Column9)-spaces9)
+  K10<-sum(Column10 %in% "K")/(length(Column10)-spaces10)
+  K11<-sum(Column11 %in% "K")/(length(Column11)-spaces11)
+  K12<-sum(Column12 %in% "K")/(length(Column12)-spaces12)
+  K13<-sum(Column13 %in% "K")/(length(Column13)-spaces13)
+  K14<-sum(Column14 %in% "K")/(length(Column14)-spaces14)
+  K15<-sum(Column15 %in% "K")/(length(Column15)-spaces15)
+  KllKs<-cbind(K1,K2,K3,K4,K5,K6,K7,K8,K9,K10,K11,K12,K13,K14,K15)
+  
+  
+  L1<-sum(Column1 %in% "L")/(length(Column1)-spaces1)
+  L2<-sum(Column2 %in% "L")/(length(Column2)-spaces2)
+  L3<-sum(Column3 %in% "L")/(length(Column3)-spaces3)
+  L4<-sum(Column4 %in% "L")/(length(Column4)-spaces4)
+  L5<-sum(Column5 %in% "L")/(length(Column5)-spaces5)
+  L6<-sum(Column6 %in% "L")/(length(Column6)-spaces6)
+  L7<-sum(Column7 %in% "L")/(length(Column7)-spaces7)
+  L8<-sum(Column8 %in% "L")/(length(Column8)-spaces8)
+  L9<-sum(Column9 %in% "L")/(length(Column9)-spaces9)
+  L10<-sum(Column10 %in% "L")/(length(Column10)-spaces10)
+  L11<-sum(Column11 %in% "L")/(length(Column11)-spaces11)
+  L12<-sum(Column12 %in% "L")/(length(Column12)-spaces12)
+  L13<-sum(Column13 %in% "L")/(length(Column13)-spaces13)
+  L14<-sum(Column14 %in% "L")/(length(Column14)-spaces14)
+  L15<-sum(Column15 %in% "L")/(length(Column15)-spaces15)
+  LllLs<-cbind(L1,L2,L3,L4,L5,L6,L7,L8,L9,L10,L11,L12,L13,L14,L15)
+  
+  
+  M1<-sum(Column1 %in% "M")/(length(Column1)-spaces1)
+  M2<-sum(Column2 %in% "M")/(length(Column2)-spaces2)
+  M3<-sum(Column3 %in% "M")/(length(Column3)-spaces3)
+  M4<-sum(Column4 %in% "M")/(length(Column4)-spaces4)
+  M5<-sum(Column5 %in% "M")/(length(Column5)-spaces5)
+  M6<-sum(Column6 %in% "M")/(length(Column6)-spaces6)
+  M7<-sum(Column7 %in% "M")/(length(Column7)-spaces7)
+  M8<-sum(Column8 %in% "M")/(length(Column8)-spaces8)
+  M9<-sum(Column9 %in% "M")/(length(Column9)-spaces9)
+  M10<-sum(Column10 %in% "M")/(length(Column10)-spaces10)
+  M11<-sum(Column11 %in% "M")/(length(Column11)-spaces11)
+  M12<-sum(Column12 %in% "M")/(length(Column12)-spaces12)
+  M13<-sum(Column13 %in% "M")/(length(Column13)-spaces13)
+  M14<-sum(Column14 %in% "M")/(length(Column14)-spaces14)
+  M15<-sum(Column15 %in% "M")/(length(Column15)-spaces15)
+  MllMs<-cbind(M1,M2,M3,M4,M5,M6,M7,M8,M9,M10,M11,M12,M13,M14,M15)
+  
+  
+  N1<-sum(Column1 %in% "N")/(length(Column1)-spaces1)
+  N2<-sum(Column2 %in% "N")/(length(Column2)-spaces2)
+  N3<-sum(Column3 %in% "N")/(length(Column3)-spaces3)
+  N4<-sum(Column4 %in% "N")/(length(Column4)-spaces4)
+  N5<-sum(Column5 %in% "N")/(length(Column5)-spaces5)
+  N6<-sum(Column6 %in% "N")/(length(Column6)-spaces6)
+  N7<-sum(Column7 %in% "N")/(length(Column7)-spaces7)
+  N8<-sum(Column8 %in% "N")/(length(Column8)-spaces8)
+  N9<-sum(Column9 %in% "N")/(length(Column9)-spaces9)
+  N10<-sum(Column10 %in% "N")/(length(Column10)-spaces10)
+  N11<-sum(Column11 %in% "N")/(length(Column11)-spaces11)
+  N12<-sum(Column12 %in% "N")/(length(Column12)-spaces12)
+  N13<-sum(Column13 %in% "N")/(length(Column13)-spaces13)
+  N14<-sum(Column14 %in% "N")/(length(Column14)-spaces14)
+  N15<-sum(Column15 %in% "N")/(length(Column15)-spaces15)
+  NllNs<-cbind(N1,N2,N3,N4,N5,N6,N7,N8,N9,N10,N11,N12,N13,N14,N15)
+  
+  
+  P1<-sum(Column1 %in% "P")/(length(Column1)-spaces1)
+  P2<-sum(Column2 %in% "P")/(length(Column2)-spaces2)
+  P3<-sum(Column3 %in% "P")/(length(Column3)-spaces3)
+  P4<-sum(Column4 %in% "P")/(length(Column4)-spaces4)
+  P5<-sum(Column5 %in% "P")/(length(Column5)-spaces5)
+  P6<-sum(Column6 %in% "P")/(length(Column6)-spaces6)
+  P7<-sum(Column7 %in% "P")/(length(Column7)-spaces7)
+  P8<-sum(Column8 %in% "P")/(length(Column8)-spaces8)
+  P9<-sum(Column9 %in% "P")/(length(Column9)-spaces9)
+  P10<-sum(Column10 %in% "P")/(length(Column10)-spaces10)
+  P11<-sum(Column11 %in% "P")/(length(Column11)-spaces11)
+  P12<-sum(Column12 %in% "P")/(length(Column12)-spaces12)
+  P13<-sum(Column13 %in% "P")/(length(Column13)-spaces13)
+  P14<-sum(Column14 %in% "P")/(length(Column14)-spaces14)
+  P15<-sum(Column15 %in% "P")/(length(Column15)-spaces15)
+  PllPs<-cbind(P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11,P12,P13,P14,P15)
+  
+  
+  Q1<-sum(Column1 %in% "Q")/(length(Column1)-spaces1)
+  Q2<-sum(Column2 %in% "Q")/(length(Column2)-spaces2)
+  Q3<-sum(Column3 %in% "Q")/(length(Column3)-spaces3)
+  Q4<-sum(Column4 %in% "Q")/(length(Column4)-spaces4)
+  Q5<-sum(Column5 %in% "Q")/(length(Column5)-spaces5)
+  Q6<-sum(Column6 %in% "Q")/(length(Column6)-spaces6)
+  Q7<-sum(Column7 %in% "Q")/(length(Column7)-spaces7)
+  Q8<-sum(Column8 %in% "Q")/(length(Column8)-spaces8)
+  Q9<-sum(Column9 %in% "Q")/(length(Column9)-spaces9)
+  Q10<-sum(Column10 %in% "Q")/(length(Column10)-spaces10)
+  Q11<-sum(Column11 %in% "Q")/(length(Column11)-spaces11)
+  Q12<-sum(Column12 %in% "Q")/(length(Column12)-spaces12)
+  Q13<-sum(Column13 %in% "Q")/(length(Column13)-spaces13)
+  Q14<-sum(Column14 %in% "Q")/(length(Column14)-spaces14)
+  Q15<-sum(Column15 %in% "Q")/(length(Column15)-spaces15)
+  QllQs<-cbind(Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,Q11,Q12,Q13,Q14,Q15)
+  
+  
+  R1<-sum(Column1 %in% "R")/(length(Column1)-spaces1)
+  R2<-sum(Column2 %in% "R")/(length(Column2)-spaces2)
+  R3<-sum(Column3 %in% "R")/(length(Column3)-spaces3)
+  R4<-sum(Column4 %in% "R")/(length(Column4)-spaces4)
+  R5<-sum(Column5 %in% "R")/(length(Column5)-spaces5)
+  R6<-sum(Column6 %in% "R")/(length(Column6)-spaces6)
+  R7<-sum(Column7 %in% "R")/(length(Column7)-spaces7)
+  R8<-sum(Column8 %in% "R")/(length(Column8)-spaces8)
+  R9<-sum(Column9 %in% "R")/(length(Column9)-spaces9)
+  R10<-sum(Column10 %in% "R")/(length(Column10)-spaces10)
+  R11<-sum(Column11 %in% "R")/(length(Column11)-spaces11)
+  R12<-sum(Column12 %in% "R")/(length(Column12)-spaces12)
+  R13<-sum(Column13 %in% "R")/(length(Column13)-spaces13)
+  R14<-sum(Column14 %in% "R")/(length(Column14)-spaces14)
+  R15<-sum(Column15 %in% "R")/(length(Column15)-spaces15)
+  RllRs<-cbind(R1,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,R13,R14,R15)
+  
+  
+  S1<-sum(Column1 %in% "S")/(length(Column1)-spaces1)
+  S2<-sum(Column2 %in% "S")/(length(Column2)-spaces2)
+  S3<-sum(Column3 %in% "S")/(length(Column3)-spaces3)
+  S4<-sum(Column4 %in% "S")/(length(Column4)-spaces4)
+  S5<-sum(Column5 %in% "S")/(length(Column5)-spaces5)
+  S6<-sum(Column6 %in% "S")/(length(Column6)-spaces6)
+  S7<-sum(Column7 %in% "S")/(length(Column7)-spaces7)
+  S8<-sum(Column8 %in% "S")/(length(Column8)-spaces8)
+  S9<-sum(Column9 %in% "S")/(length(Column9)-spaces9)
+  S10<-sum(Column10 %in% "S")/(length(Column10)-spaces10)
+  S11<-sum(Column11 %in% "S")/(length(Column11)-spaces11)
+  S12<-sum(Column12 %in% "S")/(length(Column12)-spaces12)
+  S13<-sum(Column13 %in% "S")/(length(Column13)-spaces13)
+  S14<-sum(Column14 %in% "S")/(length(Column14)-spaces14)
+  S15<-sum(Column15 %in% "S")/(length(Column15)-spaces15)
+  SllSs<-cbind(S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,S14,S15)
+  
+  
+  T1<-sum(Column1 %in% "T")/(length(Column1)-spaces1)
+  T2<-sum(Column2 %in% "T")/(length(Column2)-spaces2)
+  T3<-sum(Column3 %in% "T")/(length(Column3)-spaces3)
+  T4<-sum(Column4 %in% "T")/(length(Column4)-spaces4)
+  T5<-sum(Column5 %in% "T")/(length(Column5)-spaces5)
+  T6<-sum(Column6 %in% "T")/(length(Column6)-spaces6)
+  T7<-sum(Column7 %in% "T")/(length(Column7)-spaces7)
+  T8<-sum(Column8 %in% "T")/(length(Column8)-spaces8)
+  T9<-sum(Column9 %in% "T")/(length(Column9)-spaces9)
+  T10<-sum(Column10 %in% "T")/(length(Column10)-spaces10)
+  T11<-sum(Column11 %in% "T")/(length(Column11)-spaces11)
+  T12<-sum(Column12 %in% "T")/(length(Column12)-spaces12)
+  T13<-sum(Column13 %in% "T")/(length(Column13)-spaces13)
+  T14<-sum(Column14 %in% "T")/(length(Column14)-spaces14)
+  T15<-sum(Column15 %in% "T")/(length(Column15)-spaces15)
+  TllTs<-cbind(T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15)
+  
+  
+  V1<-sum(Column1 %in% "V")/(length(Column1)-spaces1)
+  V2<-sum(Column2 %in% "V")/(length(Column2)-spaces2)
+  V3<-sum(Column3 %in% "V")/(length(Column3)-spaces3)
+  V4<-sum(Column4 %in% "V")/(length(Column4)-spaces4)
+  V5<-sum(Column5 %in% "V")/(length(Column5)-spaces5)
+  V6<-sum(Column6 %in% "V")/(length(Column6)-spaces6)
+  V7<-sum(Column7 %in% "V")/(length(Column7)-spaces7)
+  V8<-sum(Column8 %in% "V")/(length(Column8)-spaces8)
+  V9<-sum(Column9 %in% "V")/(length(Column9)-spaces9)
+  V10<-sum(Column10 %in% "V")/(length(Column10)-spaces10)
+  V11<-sum(Column11 %in% "V")/(length(Column11)-spaces11)
+  V12<-sum(Column12 %in% "V")/(length(Column12)-spaces12)
+  V13<-sum(Column13 %in% "V")/(length(Column13)-spaces13)
+  V14<-sum(Column14 %in% "V")/(length(Column14)-spaces14)
+  V15<-sum(Column15 %in% "V")/(length(Column15)-spaces15)
+  VllVs<-cbind(V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15)
+  
+  
+  W1<-sum(Column1 %in% "W")/(length(Column1)-spaces1)
+  W2<-sum(Column2 %in% "W")/(length(Column2)-spaces2)
+  W3<-sum(Column3 %in% "W")/(length(Column3)-spaces3)
+  W4<-sum(Column4 %in% "W")/(length(Column4)-spaces4)
+  W5<-sum(Column5 %in% "W")/(length(Column5)-spaces5)
+  W6<-sum(Column6 %in% "W")/(length(Column6)-spaces6)
+  W7<-sum(Column7 %in% "W")/(length(Column7)-spaces7)
+  W8<-sum(Column8 %in% "W")/(length(Column8)-spaces8)
+  W9<-sum(Column9 %in% "W")/(length(Column9)-spaces9)
+  W10<-sum(Column10 %in% "W")/(length(Column10)-spaces10)
+  W11<-sum(Column11 %in% "W")/(length(Column11)-spaces11)
+  W12<-sum(Column12 %in% "W")/(length(Column12)-spaces12)
+  W13<-sum(Column13 %in% "W")/(length(Column13)-spaces13)
+  W14<-sum(Column14 %in% "W")/(length(Column14)-spaces14)
+  W15<-sum(Column15 %in% "W")/(length(Column15)-spaces15)
+  WllWs<-cbind(W1,W2,W3,W4,W5,W6,W7,W8,W9,W10,W11,W12,W13,W14,W15)
+  
+  
+  Y1<-sum(Column1 %in% "Y")/(length(Column1)-spaces1)
+  Y2<-sum(Column2 %in% "Y")/(length(Column2)-spaces2)
+  Y3<-sum(Column3 %in% "Y")/(length(Column3)-spaces3)
+  Y4<-sum(Column4 %in% "Y")/(length(Column4)-spaces4)
+  Y5<-sum(Column5 %in% "Y")/(length(Column5)-spaces5)
+  Y6<-sum(Column6 %in% "Y")/(length(Column6)-spaces6)
+  Y7<-sum(Column7 %in% "Y")/(length(Column7)-spaces7)
+  Y8<-sum(Column8 %in% "Y")/(length(Column8)-spaces8)
+  Y9<-sum(Column9 %in% "Y")/(length(Column9)-spaces9)
+  Y10<-sum(Column10 %in% "Y")/(length(Column10)-spaces10)
+  Y11<-sum(Column11 %in% "Y")/(length(Column11)-spaces11)
+  Y12<-sum(Column12 %in% "Y")/(length(Column12)-spaces12)
+  Y13<-sum(Column13 %in% "Y")/(length(Column13)-spaces13)
+  Y14<-sum(Column14 %in% "Y")/(length(Column14)-spaces14)
+  Y15<-sum(Column15 %in% "Y")/(length(Column15)-spaces15)
+  YllYs<-cbind(Y1,Y2,Y3,Y4,Y5,Y6,Y7,Y8,Y9,Y10,Y11,Y12,Y13,Y14,Y15)
+}
+#this is substrate percents
+
+#A C D E F G H I K L N P Q R S T V W Y
+
+PercentTable<-rbind(AllAs,CllCs,DllDs,EllEs,FllFs,GllGs,HllHs,IllIs,KllKs,LllLs,MllMs,NllNs,PllPs,QllQs,RllRs,SllSs,TllTs,VllVs,WllWs,YllYs)
+PercentTable<-PercentTable*100
+
+#create the SD table
+SDtable<-matrix(data = rep(1,times=(nrow(PercentTable)*ncol(PercentTable))),nrow = nrow(PercentTable),ncol = ncol(PercentTable))
+#for every row, a percertage minus the same mean over the same SD
+if(1==1){
+  SDtable[1,]<-(PercentTable[1,]-Amean)/Asd
+  SDtable[2,]<-(PercentTable[2,]-Cmean)/Csd
+  SDtable[3,]<-(PercentTable[3,]-Dmean)/Dsd
+  SDtable[4,]<-(PercentTable[4,]-Emean)/Esd
+  SDtable[5,]<-(PercentTable[5,]-Fmean)/Fsd
+  SDtable[6,]<-(PercentTable[6,]-Gmean)/Gsd
+  SDtable[7,]<-(PercentTable[7,]-Hmean)/Hsd
+  SDtable[8,]<-(PercentTable[8,]-Imean)/Isd
+  SDtable[9,]<-(PercentTable[9,]-Kmean)/Ksd
+  SDtable[10,]<-(PercentTable[10,]-Lmean)/Lsd
+  SDtable[11,]<-(PercentTable[11,]-Mmean)/Msd
+  SDtable[12,]<-(PercentTable[12,]-Nmean)/Nsd
+  SDtable[13,]<-(PercentTable[13,]-Pmean)/Psd
+  SDtable[14,]<-(PercentTable[14,]-Qmean)/Qsd
+  SDtable[15,]<-(PercentTable[15,]-Rmean)/Rsd
+  SDtable[16,]<-(PercentTable[16,]-Smean)/Ssd
+  SDtable[17,]<-(PercentTable[17,]-Tmean)/Tsd
+  SDtable[18,]<-(PercentTable[18,]-Vmean)/Vsd
+  SDtable[19,]<-(PercentTable[19,]-Wmean)/Wsd
+  SDtable[20,]<-(PercentTable[20,]-Ymean)/Ysd
+}
+
+
+SetOfAAs<-c("Letter","A","C","D","E","F","G","H","I","K","L","M","N","P","Q","R","S","T","V","W","Y")
+
+
+SumOfSigmaAAs<-c(1:15)
+
+for (i in 1:15){
+  SumOfSigmasValue<-0
+  for (j in 1:20){
+        value<-0
+    if (SDtable[j,i]>2){
+      value<-sum(substrates[,i]==SetOfAAs[j])
+    }
+    SumOfSigmasValue<-SumOfSigmasValue+value
+  }
+  SumOfSigmaAAs[i]<-SumOfSigmasValue
+}
+
+# AAs1<-length(substrates[,1])-sum(substrates[,1]=="")
+# AAs2<-length(substrates[,2])-sum(substrates[,2]=="")
+# AAs3<-length(substrates[,3])-sum(substrates[,3]=="")
+# AAs4<-length(substrates[,4])-sum(substrates[,4]=="")
+# AAs5<-length(substrates[,5])-sum(substrates[,5]=="")
+# AAs6<-length(substrates[,6])-sum(substrates[,6]=="")
+# AAs7<-length(substrates[,7])-sum(substrates[,7]=="")
+# AAs8<-length(substrates[,8])-sum(substrates[,8]=="")
+# AAs9<-length(substrates[,9])-sum(substrates[,9]=="")
+# 
+# 
+# 
+# #AAsAtPositions<-c(AAs1,AAs2,AAs3,AAs4,AAs5,AAs6,AAs7,AAs8,AAs9)
+# AAsAtPositions<-c(length(substrates[,1]),length(substrates[,2]),length(substrates[,3]),length(substrates[,4]),
+#                   length(substrates[,5]),length(substrates[,6]),length(substrates[,7]),length(substrates[,8]),
+#                   length(substrates[,9]))
+
+SumOfExpectedSigmaAAs<-c(1:15)
+for (i in 1:15){
+  ExpectedValue<-0
+  for (j in 1:20){
+        value<-0
+    if (SDtable[j,i]>2){
+      value<-AllMeans[j]
+    }
+    ExpectedValue<-ExpectedValue+value
+  }
+  SumOfExpectedSigmaAAs[i]<-ExpectedValue*(length(substrates[,i])-sum(substrates[,i]%in% ""))/100
+}
+
+SelectivityRow<-SumOfSigmaAAs/SumOfExpectedSigmaAAs
+SelectivitySheet<-rbind(SumOfSigmaAAs,SumOfExpectedSigmaAAs,SelectivityRow)
+
+SetOfAAs<-matrix(data = SetOfAAs,ncol = 1)
+
+SDtableu<-SDtable
+HeaderSD<-c(-7:7)
+SDtable<-rbind(HeaderSD,SDtableu)
+row.names(SDtable)<-NULL
+SDtable<-data.frame(SetOfAAs,SDtable)
+
+PercentTable<-rbind(HeaderSD,PercentTable)
+row.names(PercentTable)<-NULL
+PercentTable<-data.frame(SetOfAAs,PercentTable)
+numberofY<-as.numeric(SubstrateBackgroundFrequency$Number.of.Y)
+numberofY<-numberofY[!is.na(numberofY)]
+
+numberofPY<-as.numeric(SubstrateBackgroundFrequency$Number.of.pY)
+numberofPY<-numberofPY[!is.na(numberofPY)]
+
+NormalizationScore<-sum(numberofPY)/sum(numberofY)
+
+# positions<-matrix(data = NA, nrow=20,ncol = 15)
+# 
+# #column1
+# 
+# for (q in 1:15) {
+#   sA<-sum(substrates[,i]=="A")
+#   positions[1,i]<-sA
+#   sC<-sum(substrates[,i]=="C")
+#   positions[2,i]<-sC
+#   sD<-sum(substrates[,i]=="D")
+#   positions[3,i]<-sD
+#   sE<-sum(substrates[,i]=="E")
+#   positions[4,i]<-sE
+#   sF<-sum(substrates[,i]=="F")
+#   sG<-sum(substrates[,i]=="G")
+#   sH<-sum(substrates[,i]=="H")
+#   sI<-sum(substrates[,i]=="I")
+#   sK<-sum(substrates[,i]=="K")
+#   sL<-sum(substrates[,i]=="L")
+#   sM<-sum(substrates[,i]=="M")
+#   sN<-sum(substrates[,i]=="N")
+#   sP<-sum(substrates[,i]=="P")
+#   sQ<-sum(substrates[,i]=="Q")
+#   sR<-sum(substrates[,i]=="R")
+#   sS<-sum(substrates[,i]=="S")
+#   sT<-sum(substrates[,i]=="T")
+#   sV<-sum(substrates[,i]=="V")
+#   sW<-sum(substrates[,i]=="W")
+#   sY<-sum(substrates[,i]=="Y")
+#   positions[5,i]<-sF
+#   positions[6,i]<-sG
+#   positions[7,i]<-sH
+#   positions[8,i]<-sI
+#   positions[9,i]<-sK
+#   positions[10,i]<-sL
+#   positions[11,i]<-sM
+#   positions[12,i]<-sN
+#   positions[13,i]<-sP
+#   positions[14,i]<-sQ
+#   positions[15,i]<-sR
+#   positions[16,i]<-sS
+#   positions[17,i]<-sT
+#   positions[18,i]<-sV
+#   positions[19,i]<-sW
+#   positions[20,i]<-sY
+# }
+
+#here I create the positions table which is needed for the endogenous prob matrix and is simply "how many times did an AA show up in this spot?
+if (6==6){
+  Column1<-substrates[,1]
+  Column2<-substrates[,2]
+  Column3<-substrates[,3]
+  Column4<-substrates[,4]
+  Column5<-substrates[,5]
+  Column6<-substrates[,6]
+  Column7<-substrates[,7]
+  Column8<-substrates[,8]
+  Column9<-substrates[,9]
+  Column10<-substrates[,10]
+  Column11<-substrates[,11]
+  Column12<-substrates[,12]
+  Column13<-substrates[,13]
+  Column14<-substrates[,14]
+  Column15<-substrates[,15]
+  
+  spaces1<-sum((Column1%in% ""))
+  spaces2<-sum(Column2%in% "")
+  spaces3<-sum(Column3%in% "")
+  spaces4<-sum(Column4%in% "")
+  spaces5<-sum(Column5%in% "")
+  spaces6<-sum(Column6%in% "")
+  spaces7<-sum(Column7%in% "")
+  spaces8<-sum(Column8%in% "")
+  spaces9<-sum(Column9%in% "")
+  spaces10<-sum(Column10%in% "")
+  spaces11<-sum(Column11%in% "")
+  spaces12<-sum(Column12%in% "")
+  spaces13<-sum(Column13%in% "")
+  spaces14<-sum(Column14%in% "")
+  spaces15<-sum(Column15%in% "")
+  
+  A1<-sum(Column1 %in% "A")
+  A2<-sum(Column2 %in% "A")
+  A3<-sum(Column3 %in% "A")
+  A4<-sum(Column4 %in% "A")
+  A5<-sum(Column5 %in% "A")
+  A6<-sum(Column6 %in% "A")
+  A7<-sum(Column7 %in% "A")
+  A8<-sum(Column8 %in% "A")
+  A9<-sum(Column9 %in% "A")
+  A10<-sum(Column10 %in% "A")
+  A11<-sum(Column11 %in% "A")
+  A12<-sum(Column12 %in% "A")
+  A13<-sum(Column13 %in% "A")
+  A14<-sum(Column14 %in% "A")
+  A15<-sum(Column15 %in% "A")
+  AllAs<-cbind(A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15)
+  
+  C1<-sum(Column1 %in% "C")
+  C2<-sum(Column2 %in% "C")
+  C3<-sum(Column3 %in% "C")
+  C4<-sum(Column4 %in% "C")
+  C5<-sum(Column5 %in% "C")
+  C6<-sum(Column6 %in% "C")
+  C7<-sum(Column7 %in% "C")
+  C8<-sum(Column8 %in% "C")
+  C9<-sum(Column9 %in% "C")  
+  C10<-sum(Column10 %in% "C")
+  C11<-sum(Column11 %in% "C")
+  C12<-sum(Column12 %in% "C")
+  C13<-sum(Column13 %in% "C")
+  C14<-sum(Column14 %in% "C")
+  C15<-sum(Column15 %in% "C")
+  CllCs<-cbind(C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15)
+  
+  D1<-sum(Column1 %in% "D")
+  D2<-sum(Column2 %in% "D")
+  D3<-sum(Column3 %in% "D")
+  D4<-sum(Column4 %in% "D")
+  D5<-sum(Column5 %in% "D")
+  D6<-sum(Column6 %in% "D")
+  D7<-sum(Column7 %in% "D")
+  D8<-sum(Column8 %in% "D")
+  D9<-sum(Column9 %in% "D")
+  D10<-sum(Column10 %in% "D")
+  D11<-sum(Column11 %in% "D")
+  D12<-sum(Column12 %in% "D")
+  D13<-sum(Column13 %in% "D")
+  D14<-sum(Column14 %in% "D")
+  D15<-sum(Column15 %in% "D")
+  DllDs<-cbind(D1,D2,D3,D4,D5,D6,D7,D8,D9,D10,D11,D12,D13,D14,D15)
+  
+  E1<-sum(Column1 %in% "E")
+  E2<-sum(Column2 %in% "E")
+  E3<-sum(Column3 %in% "E")
+  E4<-sum(Column4 %in% "E")
+  E5<-sum(Column5 %in% "E")
+  E6<-sum(Column6 %in% "E")
+  E7<-sum(Column7 %in% "E")
+  E8<-sum(Column8 %in% "E")
+  E9<-sum(Column9 %in% "E")
+  E10<-sum(Column10 %in% "E")
+  E11<-sum(Column11 %in% "E")
+  E12<-sum(Column12 %in% "E")
+  E13<-sum(Column13 %in% "E")
+  E14<-sum(Column14 %in% "E")
+  E15<-sum(Column15 %in% "E")
+  EllEs<-cbind(E1,E2,E3,E4,E5,E6,E7,E8,E9,E10,E11,E12,E13,E14,E15)
+  
+  F1<-sum(Column1 %in% "F")
+  F2<-sum(Column2 %in% "F")
+  F3<-sum(Column3 %in% "F")
+  F4<-sum(Column4 %in% "F")
+  F5<-sum(Column5 %in% "F")
+  F6<-sum(Column6 %in% "F")
+  F7<-sum(Column7 %in% "F")
+  F8<-sum(Column8 %in% "F")
+  F9<-sum(Column9 %in% "F")
+  F10<-sum(Column10 %in% "F")
+  F11<-sum(Column11 %in% "F")
+  F12<-sum(Column12 %in% "F")
+  F13<-sum(Column13 %in% "F")
+  F14<-sum(Column14 %in% "F")
+  F15<-sum(Column15 %in% "F")
+  FllFs<-cbind(F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15)
+  
+  G1<-sum(Column1 %in% "G")
+  G2<-sum(Column2 %in% "G")
+  G3<-sum(Column3 %in% "G")
+  G4<-sum(Column4 %in% "G")
+  G5<-sum(Column5 %in% "G")
+  G6<-sum(Column6 %in% "G")
+  G7<-sum(Column7 %in% "G")
+  G8<-sum(Column8 %in% "G")
+  G9<-sum(Column9 %in% "G")
+  G10<-sum(Column10 %in% "G")
+  G11<-sum(Column11 %in% "G")
+  G12<-sum(Column12 %in% "G")
+  G13<-sum(Column13 %in% "G")
+  G14<-sum(Column14 %in% "G")
+  G15<-sum(Column15 %in% "G")
+  GllGs<-cbind(G1,G2,G3,G4,G5,G6,G7,G8,G9,G10,G11,G12,G13,G14,G15)
+  
+  H1<-sum(Column1 %in% "H")
+  H2<-sum(Column2 %in% "H")
+  H3<-sum(Column3 %in% "H")
+  H4<-sum(Column4 %in% "H")
+  H5<-sum(Column5 %in% "H")
+  H6<-sum(Column6 %in% "H")
+  H7<-sum(Column7 %in% "H")
+  H8<-sum(Column8 %in% "H")
+  H9<-sum(Column9 %in% "H")
+  H10<-sum(Column10 %in% "H")
+  H11<-sum(Column11 %in% "H")
+  H12<-sum(Column12 %in% "H")
+  H13<-sum(Column13 %in% "H")
+  H14<-sum(Column14 %in% "H")
+  H15<-sum(Column15 %in% "H")
+  HllHs<-cbind(H1,H2,H3,H4,H5,H6,H7,H8,H9,H10,H11,H12,H13,H14,H15)
+  
+  I1<-sum(Column1 %in% "I")
+  I2<-sum(Column2 %in% "I")
+  I3<-sum(Column3 %in% "I")
+  I4<-sum(Column4 %in% "I")
+  I5<-sum(Column5 %in% "I")
+  I6<-sum(Column6 %in% "I")
+  I7<-sum(Column7 %in% "I")
+  I8<-sum(Column8 %in% "I")
+  I9<-sum(Column9 %in% "I")
+  I10<-sum(Column10 %in% "I")
+  I11<-sum(Column11 %in% "I")
+  I12<-sum(Column12 %in% "I")
+  I13<-sum(Column13 %in% "I")
+  I14<-sum(Column14 %in% "I")
+  I15<-sum(Column15 %in% "I")
+  IllIs<-cbind(I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,I11,I12,I13,I14,I15)
+  
+  K1<-sum(Column1 %in% "K")
+  K2<-sum(Column2 %in% "K")
+  K3<-sum(Column3 %in% "K")
+  K4<-sum(Column4 %in% "K")
+  K5<-sum(Column5 %in% "K")
+  K6<-sum(Column6 %in% "K")
+  K7<-sum(Column7 %in% "K")
+  K8<-sum(Column8 %in% "K")
+  K9<-sum(Column9 %in% "K")
+  K10<-sum(Column10 %in% "K")
+  K11<-sum(Column11 %in% "K")
+  K12<-sum(Column12 %in% "K")
+  K13<-sum(Column13 %in% "K")
+  K14<-sum(Column14 %in% "K")
+  K15<-sum(Column15 %in% "K")
+  KllKs<-cbind(K1,K2,K3,K4,K5,K6,K7,K8,K9,K10,K11,K12,K13,K14,K15)
+  
+  L1<-sum(Column1 %in% "L")
+  L2<-sum(Column2 %in% "L")
+  L3<-sum(Column3 %in% "L")
+  L4<-sum(Column4 %in% "L")
+  L5<-sum(Column5 %in% "L")
+  L6<-sum(Column6 %in% "L")
+  L7<-sum(Column7 %in% "L")
+  L8<-sum(Column8 %in% "L")
+  L9<-sum(Column9 %in% "L")
+  L10<-sum(Column10 %in% "L")
+  L11<-sum(Column11 %in% "L")
+  L12<-sum(Column12 %in% "L")
+  L13<-sum(Column13 %in% "L")
+  L14<-sum(Column14 %in% "L")
+  L15<-sum(Column15 %in% "L")
+  LllLs<-cbind(L1,L2,L3,L4,L5,L6,L7,L8,L9,L10,L11,L12,L13,L14,L15)
+  
+  M1<-sum(Column1 %in% "M")
+  M2<-sum(Column2 %in% "M")
+  M3<-sum(Column3 %in% "M")
+  M4<-sum(Column4 %in% "M")
+  M5<-sum(Column5 %in% "M")
+  M6<-sum(Column6 %in% "M")
+  M7<-sum(Column7 %in% "M")
+  M8<-sum(Column8 %in% "M")
+  M9<-sum(Column9 %in% "M")
+  M10<-sum(Column10 %in% "M")
+  M11<-sum(Column11 %in% "M")
+  M12<-sum(Column12 %in% "M")
+  M13<-sum(Column13 %in% "M")
+  M14<-sum(Column14 %in% "M")
+  M15<-sum(Column15 %in% "M")
+  MllMs<-cbind(M1,M2,M3,M4,M5,M6,M7,M8,M9,M10,M11,M12,M13,M14,M15)
+  
+  N1<-sum(Column1 %in% "N")
+  N2<-sum(Column2 %in% "N")
+  N3<-sum(Column3 %in% "N")
+  N4<-sum(Column4 %in% "N")
+  N5<-sum(Column5 %in% "N")
+  N6<-sum(Column6 %in% "N")
+  N7<-sum(Column7 %in% "N")
+  N8<-sum(Column8 %in% "N")
+  N9<-sum(Column9 %in% "N")
+  N10<-sum(Column10 %in% "N")
+  N11<-sum(Column11 %in% "N")
+  N12<-sum(Column12 %in% "N")
+  N13<-sum(Column13 %in% "N")
+  N14<-sum(Column14 %in% "N")
+  N15<-sum(Column15 %in% "N")
+  NllNs<-cbind(N1,N2,N3,N4,N5,N6,N7,N8,N9,N10,N11,N12,N13,N14,N15)
+  
+  P1<-sum(Column1 %in% "P")
+  P2<-sum(Column2 %in% "P")
+  P3<-sum(Column3 %in% "P")
+  P4<-sum(Column4 %in% "P")
+  P5<-sum(Column5 %in% "P")
+  P6<-sum(Column6 %in% "P")
+  P7<-sum(Column7 %in% "P")
+  P8<-sum(Column8 %in% "P")
+  P9<-sum(Column9 %in% "P")
+  P10<-sum(Column10 %in% "P")
+  P11<-sum(Column11 %in% "P")
+  P12<-sum(Column12 %in% "P")
+  P13<-sum(Column13 %in% "P")
+  P14<-sum(Column14 %in% "P")
+  P15<-sum(Column15 %in% "P")
+  PllPs<-cbind(P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11,P12,P13,P14,P15)
+  
+  Q1<-sum(Column1 %in% "Q")
+  Q2<-sum(Column2 %in% "Q")
+  Q3<-sum(Column3 %in% "Q")
+  Q4<-sum(Column4 %in% "Q")
+  Q5<-sum(Column5 %in% "Q")
+  Q6<-sum(Column6 %in% "Q")
+  Q7<-sum(Column7 %in% "Q")
+  Q8<-sum(Column8 %in% "Q")
+  Q9<-sum(Column9 %in% "Q")
+  Q10<-sum(Column10 %in% "Q")
+  Q11<-sum(Column11 %in% "Q")
+  Q12<-sum(Column12 %in% "Q")
+  Q13<-sum(Column13 %in% "Q")
+  Q14<-sum(Column14 %in% "Q")
+  Q15<-sum(Column15 %in% "Q")
+  QllQs<-cbind(Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,Q11,Q12,Q13,Q14,Q15)
+  
+  R1<-sum(Column1 %in% "R")
+  R2<-sum(Column2 %in% "R")
+  R3<-sum(Column3 %in% "R")
+  R4<-sum(Column4 %in% "R")
+  R5<-sum(Column5 %in% "R")
+  R6<-sum(Column6 %in% "R")
+  R7<-sum(Column7 %in% "R")
+  R8<-sum(Column8 %in% "R")
+  R9<-sum(Column9 %in% "R")
+  R10<-sum(Column10 %in% "R")
+  R11<-sum(Column11 %in% "R")
+  R12<-sum(Column12 %in% "R")
+  R13<-sum(Column13 %in% "R")
+  R14<-sum(Column14 %in% "R")
+  R15<-sum(Column15 %in% "R")
+  RllRs<-cbind(R1,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,R13,R14,R15)
+  
+  S1<-sum(Column1 %in% "S")
+  S2<-sum(Column2 %in% "S")
+  S3<-sum(Column3 %in% "S")
+  S4<-sum(Column4 %in% "S")
+  S5<-sum(Column5 %in% "S")
+  S6<-sum(Column6 %in% "S")
+  S7<-sum(Column7 %in% "S")
+  S8<-sum(Column8 %in% "S")
+  S9<-sum(Column9 %in% "S")
+  S10<-sum(Column10 %in% "S")
+  S11<-sum(Column11 %in% "S")
+  S12<-sum(Column12 %in% "S")
+  S13<-sum(Column13 %in% "S")
+  S14<-sum(Column14 %in% "S")
+  S15<-sum(Column15 %in% "S")
+  SllSs<-cbind(S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,S14,S15)
+  
+  T1<-sum(Column1 %in% "T")
+  T2<-sum(Column2 %in% "T")
+  T3<-sum(Column3 %in% "T")
+  T4<-sum(Column4 %in% "T")
+  T5<-sum(Column5 %in% "T")
+  T6<-sum(Column6 %in% "T")
+  T7<-sum(Column7 %in% "T")
+  T8<-sum(Column8 %in% "T")
+  T9<-sum(Column9 %in% "T")
+  T10<-sum(Column10 %in% "T")
+  T11<-sum(Column11 %in% "T")
+  T12<-sum(Column12 %in% "T")
+  T13<-sum(Column13 %in% "T")
+  T14<-sum(Column14 %in% "T")
+  T15<-sum(Column15 %in% "T")
+  TllTs<-cbind(T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15)
+  
+  V1<-sum(Column1 %in% "V")
+  V2<-sum(Column2 %in% "V")
+  V3<-sum(Column3 %in% "V")
+  V4<-sum(Column4 %in% "V")
+  V5<-sum(Column5 %in% "V")
+  V6<-sum(Column6 %in% "V")
+  V7<-sum(Column7 %in% "V")
+  V8<-sum(Column8 %in% "V")
+  V9<-sum(Column9 %in% "V")
+  V10<-sum(Column10 %in% "V")
+  V11<-sum(Column11 %in% "V")
+  V12<-sum(Column12 %in% "V")
+  V13<-sum(Column13 %in% "V")
+  V14<-sum(Column14 %in% "V")
+  V15<-sum(Column15 %in% "V")
+  VllVs<-cbind(V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15)
+  
+  W1<-sum(Column1 %in% "W")
+  W2<-sum(Column2 %in% "W")
+  W3<-sum(Column3 %in% "W")
+  W4<-sum(Column4 %in% "W")
+  W5<-sum(Column5 %in% "W")
+  W6<-sum(Column6 %in% "W")
+  W7<-sum(Column7 %in% "W")
+  W8<-sum(Column8 %in% "W")
+  W9<-sum(Column9 %in% "W")
+  W10<-sum(Column10 %in% "W")
+  W11<-sum(Column11 %in% "W")
+  W12<-sum(Column12 %in% "W")
+  W13<-sum(Column13 %in% "W")
+  W14<-sum(Column14 %in% "W")
+  W15<-sum(Column15 %in% "W")
+  WllWs<-cbind(W1,W2,W3,W4,W5,W6,W7,W8,W9,W10,W11,W12,W13,W14,W15)
+  
+  Y1<-sum(Column1 %in% "Y")
+  Y2<-sum(Column2 %in% "Y")
+  Y3<-sum(Column3 %in% "Y")
+  Y4<-sum(Column4 %in% "Y")
+  Y5<-sum(Column5 %in% "Y")
+  Y6<-sum(Column6 %in% "Y")
+  Y7<-sum(Column7 %in% "Y")
+  Y8<-sum(Column8 %in% "Y")
+  Y9<-sum(Column9 %in% "Y")
+  Y10<-sum(Column10 %in% "Y")
+  Y11<-sum(Column11 %in% "Y")
+  Y12<-sum(Column12 %in% "Y")
+  Y13<-sum(Column13 %in% "Y")
+  Y14<-sum(Column14 %in% "Y")
+  Y15<-sum(Column15 %in% "Y")
+  YllYs<-cbind(Y1,Y2,Y3,Y4,Y5,Y6,Y7,Y8,Y9,Y10,Y11,Y12,Y13,Y14,Y15)
+  PositionTable<-rbind(AllAs,CllCs,DllDs,EllEs,FllFs,GllGs,HllHs,IllIs,KllKs,LllLs,MllMs,NllNs,PllPs,QllQs,RllRs,SllSs,TllTs,VllVs,WllWs,YllYs)
+}
+#endogenous prob matrix is AA position over subbackfreqmean
+dim(PositionTable)
+EPMtable<-PositionTable
+# EPMtable[1,]<-(PositionTable[1,]/(PositionTable[1,]*.01*Amean))
+# EPMtable[2,]<-(PositionTable[2,]/(PositionTable[2,]*.01*Cmean))
+# EPMtable[3,]<-(PositionTable[3,]/(PositionTable[3,]*.01*Dmean))
+# EPMtable[4,]<-(PositionTable[4,]/(PositionTable[4,]*.01*Emean))
+# EPMtable[5,]<-(PositionTable[5,]/(PositionTable[5,]*.01*Fmean))
+# EPMtable[6,]<-(PositionTable[6,]/(PositionTable[6,]*.01*Gmean))
+# EPMtable[7,]<-(PositionTable[7,]/(PositionTable[7,]*.01*Hmean))
+# EPMtable[8,]<-(PositionTable[8,]/(PositionTable[8,]*.01*Imean))
+# EPMtable[9,]<-(PositionTable[9,]/(PositionTable[9,]*.01*Kmean))
+# EPMtable[10,]<-(PositionTable[10,]/(PositionTable[10,]*.01*Lmean))
+# EPMtable[11,]<-(PositionTable[11,]/(PositionTable[11,]*.01*Mmean))
+# EPMtable[12,]<-(PositionTable[12,]/(PositionTable[12,]*.01*Nmean))
+# EPMtable[13,]<-(PositionTable[13,]/(PositionTable[13,]*.01*Pmean))
+# EPMtable[14,]<-(PositionTable[14,]/(PositionTable[14,]*.01*Qmean))
+# EPMtable[15,]<-(PositionTable[15,]/(PositionTable[15,]*.01*Rmean))
+# EPMtable[16,]<-(PositionTable[16,]/(PositionTable[16,]*.01*Smean))
+# EPMtable[17,]<-(PositionTable[17,]/(PositionTable[17,]*.01*Tmean))
+# EPMtable[18,]<-(PositionTable[18,]/(PositionTable[18,]*.01*Vmean))
+# EPMtable[19,]<-(PositionTable[19,]/(PositionTable[19,]*.01*Wmean))
+# EPMtable[20,]<-(PositionTable[20,]/(PositionTable[20,]*.01*Ymean))
+
+columns<-c(length(Column1)-sum(Column1==""),
+           length(Column2)-sum(Column2==""),
+           length(Column3)-sum(Column3==""),
+           length(Column4)-sum(Column4==""),
+           length(Column5)-sum(Column5==""),
+           length(Column6)-sum(Column6==""),
+           length(Column7)-sum(Column7==""),
+           length(Column8)-sum(Column8==""),
+           length(Column9)-sum(Column9==""),
+           length(Column10)-sum(Column10==""),
+           length(Column11)-sum(Column11==""),
+           length(Column12)-sum(Column12==""),
+           length(Column13)-sum(Column13==""),
+           length(Column14)-sum(Column14==""),
+           length(Column15)-sum(Column15==""))
+
+for (z in 1:15) {
+  for (y in 1:20) {
+    if (PositionTable[y,z]>0){
+      EPMtable[y,z]<-PositionTable[y,z]/((columns[z]*.01*AllMeans[y]))
+    }
+    if (PositionTable[y,z]==0){
+      EPMtable[y,z]<-(1/columns[z])/((columns[z]*.01*AllMeans[y]))
+    }
+  }
+}
+#here I created the endogenous probability matrix
+#now all I need to do is make the program automatically determine which SDs are >2, and then make it perform screener and sorter on those SDs
+
+
+
+
+
+# write.xlsx(SDtable,file=FILENAME, sheetName = "Standard Deviation Table",col.names = FALSE,row.names = FALSE,append = TRUE)
+# write.xlsx(PercentTable,file = FILENAME,sheetName = "Percent Table",col.names = FALSE,row.names = FALSE,append = TRUE)
+# write.xlsx(SelectivitySheet,file = FILENAME,sheetName = "Site Selectivity",col.names = FALSE,row.names = FALSE,append = TRUE)
+# write.xlsx(EPMtable,file=FILENAME,sheetName = "Endogenous Probability Matrix",col.names = FALSE,row.names = FALSE,append = TRUE)
+# write.xlsx(NormalizationScore,file = FILENAME,sheetName = "Normalization Score",col.names = FALSE,row.names = FALSE,append = TRUE)
+
+NormalizationScore<-c("Normalization Score",NormalizationScore)
+
+write.table(x=c("SD Table"),file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)
+write.table(SDtable,file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)
+write.table(x=c("Percent Table"),file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)
+write.table(PercentTable,file=FILENAME, append = TRUE,sep=",",row.names = FALSE, col.names = FALSE)
+
+EPMtableu<-EPMtable
+HeaderSD<-c(-7:7)
+EPMtableu<-rbind(HeaderSD,EPMtableu)
+row.names(EPMtableu)<-NULL
+EPMtableu<-data.frame(SetOfAAs,EPMtableu)
+
+write.table("Site Selectivity Matrix", file = FILENAME2, append = TRUE, sep = ",", row.names = FALSE, col.names = FALSE)
+SelectivityHeader=matrix(data = c("Position",-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7),nrow = 1)
+head<-matrix(data=rep(" ",times=16),nrow = 1)
+SelectivityHeader<-rbind(head,SelectivityHeader)
+
+write.table(SelectivityHeader, file = FILENAME2, append = TRUE, sep = ",", row.names = FALSE, col.names = FALSE)
+#colnames(SelectivitySheet)<-c("-7","-6","-5","-4","-3","-2","-1","0","1","2","3","4","5","6","7")
+write.table(SelectivitySheet,file = FILENAME2, append = TRUE,sep = ",",row.names = TRUE, col.names = FALSE)
+write.table(x=c("Endogenous Probability Matrix"),file=FILENAME2,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)
+write.table(EPMtableu,file = FILENAME2, append = TRUE,sep = ",",row.names = FALSE, col.names = FALSE)
+write.table(NormalizationScore, file = FILENAME2, append = TRUE,sep = ",",row.names = FALSE, col.names = FALSE)
+