view kinatestid_r/Kinatest-R_part1.R @ 13:d71eb1d66a88 draft

Uploaded
author jfb
date Thu, 08 Feb 2018 15:10:42 -0500
parents cf93d3535a31
children
line wrap: on
line source


ImportedSubstrateList<- read.csv("input1", stringsAsFactors=FALSE)
NegativeSubstrateList<- read.csv("input2", stringsAsFactors=FALSE)
SubstrateBackgroundFrequency<- read.csv("input3", stringsAsFactors=FALSE)

ScreenerFilename<-"screener"



FILENAME<-"output1"
FILENAME2<-"output2"
FILENAME3<-"output3"
























OutputMatrix<-"KinaseMatrix.csv"
CharacterizationTable<-"CharacterizationTableForThisKinase.csv"
SDtable<-"SDtableforthisKinase"
SiteSelectivityTable<-"SiteSelectivityForThisKinase"



substrates<-matrix(rep("A",times=((nrow(ImportedSubstrateList)-1)*15)),ncol = 15)
#SeqsToBeScored<-"asdasd"
  
for (i in 2:nrow(ImportedSubstrateList))
{
  substratemotif<-ImportedSubstrateList[i,4:18]
  substratemotif[8]<-"Y"
  #substratemotif<-paste(substratemotif,sep = "",collapse = "")
  j=i-1
  substratemotif<-unlist(substratemotif)
  substrates[j,1:15]<-substratemotif
}

# SpacesToOs<-c(""="O",)
# substrates<-SpacesToOs[substrates]

SubstrateBackgroundFrequency[nrow(SubstrateBackgroundFrequency),2]

if(2==2){
Amean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),2]), na.rm=TRUE)
Cmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),3]), na.rm=TRUE)
Dmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),4]), na.rm=TRUE)
Emean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),5]), na.rm=TRUE)
Fmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),6]), na.rm=TRUE)
Gmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),7]), na.rm=TRUE)
Hmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),8]), na.rm=TRUE)
Imean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),9]), na.rm=TRUE)
Kmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),10]), na.rm=TRUE)
Lmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),11]), na.rm=TRUE)
Mmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),12]), na.rm=TRUE)
Nmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),13]), na.rm=TRUE)
Pmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),14]), na.rm=TRUE)
Qmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),15]), na.rm=TRUE)
Rmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),16]), na.rm=TRUE)
Smean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),17]), na.rm=TRUE)
Tmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),18]), na.rm=TRUE)
Vmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),19]), na.rm=TRUE)
Wmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),20]), na.rm=TRUE)
Ymean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),21]), na.rm=TRUE)

AllMeans<-c(Amean,Cmean,Dmean,Emean,Fmean,Gmean,Hmean,Imean,Kmean,Lmean,Mmean,Nmean,Pmean,Qmean,Rmean,Smean,Tmean,Vmean,Wmean,Ymean)

Asd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),2]), na.rm=TRUE)
Csd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),3]), na.rm=TRUE)
Dsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),4]), na.rm=TRUE)
Esd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),5]), na.rm=TRUE)
Fsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),6]), na.rm=TRUE)
Gsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),7]), na.rm=TRUE)
Hsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),8]), na.rm=TRUE)
Isd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),9]), na.rm=TRUE)
Ksd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),10]), na.rm=TRUE)
Lsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),11]), na.rm=TRUE)
Msd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),12]), na.rm=TRUE)
Nsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),13]), na.rm=TRUE)
Psd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),14]), na.rm=TRUE)
Qsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),15]), na.rm=TRUE)
Rsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),16]), na.rm=TRUE)
Ssd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),17]), na.rm=TRUE)
Tsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),18]), na.rm=TRUE)
Vsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),19]), na.rm=TRUE)
Wsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),20]), na.rm=TRUE)
Ysd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),21]), na.rm=TRUE)
}
AllSDs<-c(Asd,Csd,Dsd,Esd,Fsd,Gsd,Hsd,Isd,Ksd,Lsd,Msd,Nsd,Psd,Qsd,Rsd,Ssd,Tsd,Vsd,Wsd,Ysd)
#this is subbackfreq SDs

SBF_statisticalvalues<-cbind(AllMeans,AllSDs)

#create the percent table
if (1==1){
  Column1<-substrates[,1]
  Column2<-substrates[,2]
  Column3<-substrates[,3]
  Column4<-substrates[,4]
  Column5<-substrates[,5]
  Column6<-substrates[,6]
  Column7<-substrates[,7]
  Column8<-substrates[,8]
  Column9<-substrates[,9]
  Column10<-substrates[,10]
  Column11<-substrates[,11]
  Column12<-substrates[,12]
  Column13<-substrates[,13]
  Column14<-substrates[,14]
  Column15<-substrates[,15]
  
  spaces1<-sum((Column1%in% ""))
  spaces2<-sum(Column2%in% "")
  spaces3<-sum(Column3%in% "")
  spaces4<-sum(Column4%in% "")
  spaces5<-sum(Column5%in% "")
  spaces6<-sum(Column6%in% "")
  spaces7<-sum(Column7%in% "")
  spaces8<-sum(Column8%in% "")
  spaces9<-sum(Column9%in% "")
  spaces10<-sum(Column10%in% "")
  spaces11<-sum(Column11%in% "")
  spaces12<-sum(Column12%in% "")
  spaces13<-sum(Column13%in% "")
  spaces14<-sum(Column14%in% "")
  spaces15<-sum(Column15%in% "")
  
  A1<-sum(Column1 %in% "A")/(length(Column1)-spaces1)
  A2<-sum(Column2 %in% "A")/(length(Column2)-spaces2)
  A3<-sum(Column3 %in% "A")/(length(Column3)-spaces3)
  A4<-sum(Column4 %in% "A")/(length(Column4)-spaces4)
  A5<-sum(Column5 %in% "A")/(length(Column5)-spaces5)
  A6<-sum(Column6 %in% "A")/(length(Column6)-spaces6)
  A7<-sum(Column7 %in% "A")/(length(Column7)-spaces7)
  A8<-sum(Column8 %in% "A")/(length(Column8)-spaces8)
  A9<-sum(Column9 %in% "A")/(length(Column9)-spaces9)
  A10<-sum(Column10 %in% "A")/(length(Column10)-spaces10)
  A11<-sum(Column11 %in% "A")/(length(Column11)-spaces11)
  A12<-sum(Column12 %in% "A")/(length(Column12)-spaces12)
  A13<-sum(Column13 %in% "A")/(length(Column13)-spaces13)
  A14<-sum(Column14 %in% "A")/(length(Column14)-spaces14)
  A15<-sum(Column15 %in% "A")/(length(Column15)-spaces15)
  AllAs<-cbind(A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15)
  
  C1<-sum(Column1 %in% "C")/(length(Column1)-spaces1)
  C2<-sum(Column2 %in% "C")/(length(Column2)-spaces2)
  C3<-sum(Column3 %in% "C")/(length(Column3)-spaces3)
  C4<-sum(Column4 %in% "C")/(length(Column4)-spaces4)
  C5<-sum(Column5 %in% "C")/(length(Column5)-spaces5)
  C6<-sum(Column6 %in% "C")/(length(Column6)-spaces6)
  C7<-sum(Column7 %in% "C")/(length(Column7)-spaces7)
  C8<-sum(Column8 %in% "C")/(length(Column8)-spaces8)
  C9<-sum(Column9 %in% "C")/(length(Column9)-spaces9)
  C10<-sum(Column10 %in% "C")/(length(Column10)-spaces10)
  C11<-sum(Column11 %in% "C")/(length(Column11)-spaces11)
  C12<-sum(Column12 %in% "C")/(length(Column12)-spaces12)
  C13<-sum(Column13 %in% "C")/(length(Column13)-spaces13)
  C14<-sum(Column14 %in% "C")/(length(Column14)-spaces14)
  C15<-sum(Column15 %in% "C")/(length(Column15)-spaces15)
  CllCs<-cbind(C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15)
  
  D1<-sum(Column1 %in% "D")/(length(Column1)-spaces1)
  D2<-sum(Column2 %in% "D")/(length(Column2)-spaces2)
  D3<-sum(Column3 %in% "D")/(length(Column3)-spaces3)
  D4<-sum(Column4 %in% "D")/(length(Column4)-spaces4)
  D5<-sum(Column5 %in% "D")/(length(Column5)-spaces5)
  D6<-sum(Column6 %in% "D")/(length(Column6)-spaces6)
  D7<-sum(Column7 %in% "D")/(length(Column7)-spaces7)
  D8<-sum(Column8 %in% "D")/(length(Column8)-spaces8)
  D9<-sum(Column9 %in% "D")/(length(Column9)-spaces9)
  D10<-sum(Column10 %in% "D")/(length(Column10)-spaces10)
  D11<-sum(Column11 %in% "D")/(length(Column11)-spaces11)
  D12<-sum(Column12 %in% "D")/(length(Column12)-spaces12)
  D13<-sum(Column13 %in% "D")/(length(Column13)-spaces13)
  D14<-sum(Column14 %in% "D")/(length(Column14)-spaces14)
  D15<-sum(Column15 %in% "D")/(length(Column15)-spaces15)
  DllDs<-cbind(D1,D2,D3,D4,D5,D6,D7,D8,D9,D10,D11,D12,D13,D14,D15)
  
  E1<-sum(Column1 %in% "E")/(length(Column1)-spaces1)
  E2<-sum(Column2 %in% "E")/(length(Column2)-spaces2)
  E3<-sum(Column3 %in% "E")/(length(Column3)-spaces3)
  E4<-sum(Column4 %in% "E")/(length(Column4)-spaces4)
  E5<-sum(Column5 %in% "E")/(length(Column5)-spaces5)
  E6<-sum(Column6 %in% "E")/(length(Column6)-spaces6)
  E7<-sum(Column7 %in% "E")/(length(Column7)-spaces7)
  E8<-sum(Column8 %in% "E")/(length(Column8)-spaces8)
  E9<-sum(Column9 %in% "E")/(length(Column9)-spaces9)
  E10<-sum(Column10 %in% "E")/(length(Column10)-spaces10)
  E11<-sum(Column11 %in% "E")/(length(Column11)-spaces11)
  E12<-sum(Column12 %in% "E")/(length(Column12)-spaces12)
  E13<-sum(Column13 %in% "E")/(length(Column13)-spaces13)
  E14<-sum(Column14 %in% "E")/(length(Column14)-spaces14)
  E15<-sum(Column15 %in% "E")/(length(Column15)-spaces15)
  EllEs<-cbind(E1,E2,E3,E4,E5,E6,E7,E8,E9,E10,E11,E12,E13,E14,E15)
  
  
  F1<-sum(Column1 %in% "F")/(length(Column1)-spaces1)
  F2<-sum(Column2 %in% "F")/(length(Column2)-spaces2)
  F3<-sum(Column3 %in% "F")/(length(Column3)-spaces3)
  F4<-sum(Column4 %in% "F")/(length(Column4)-spaces4)
  F5<-sum(Column5 %in% "F")/(length(Column5)-spaces5)
  F6<-sum(Column6 %in% "F")/(length(Column6)-spaces6)
  F7<-sum(Column7 %in% "F")/(length(Column7)-spaces7)
  F8<-sum(Column8 %in% "F")/(length(Column8)-spaces8)
  F9<-sum(Column9 %in% "F")/(length(Column9)-spaces9)
  F10<-sum(Column10 %in% "F")/(length(Column10)-spaces10)
  F11<-sum(Column11 %in% "F")/(length(Column11)-spaces11)
  F12<-sum(Column12 %in% "F")/(length(Column12)-spaces12)
  F13<-sum(Column13 %in% "F")/(length(Column13)-spaces13)
  F14<-sum(Column14 %in% "F")/(length(Column14)-spaces14)
  F15<-sum(Column15 %in% "F")/(length(Column15)-spaces15)
  FllFs<-cbind(F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15)
  
  
  G1<-sum(Column1 %in% "G")/(length(Column1)-spaces1)
  G2<-sum(Column2 %in% "G")/(length(Column2)-spaces2)
  G3<-sum(Column3 %in% "G")/(length(Column3)-spaces3)
  G4<-sum(Column4 %in% "G")/(length(Column4)-spaces4)
  G5<-sum(Column5 %in% "G")/(length(Column5)-spaces5)
  G6<-sum(Column6 %in% "G")/(length(Column6)-spaces6)
  G7<-sum(Column7 %in% "G")/(length(Column7)-spaces7)
  G8<-sum(Column8 %in% "G")/(length(Column8)-spaces8)
  G9<-sum(Column9 %in% "G")/(length(Column9)-spaces9)
  G10<-sum(Column10 %in% "G")/(length(Column10)-spaces10)
  G11<-sum(Column11 %in% "G")/(length(Column11)-spaces11)
  G12<-sum(Column12 %in% "G")/(length(Column12)-spaces12)
  G13<-sum(Column13 %in% "G")/(length(Column13)-spaces13)
  G14<-sum(Column14 %in% "G")/(length(Column14)-spaces14)
  G15<-sum(Column15 %in% "G")/(length(Column15)-spaces15)
  GllGs<-cbind(G1,G2,G3,G4,G5,G6,G7,G8,G9,G10,G11,G12,G13,G14,G15)
  
  
  H1<-sum(Column1 %in% "H")/(length(Column1)-spaces1)
  H2<-sum(Column2 %in% "H")/(length(Column2)-spaces2)
  H3<-sum(Column3 %in% "H")/(length(Column3)-spaces3)
  H4<-sum(Column4 %in% "H")/(length(Column4)-spaces4)
  H5<-sum(Column5 %in% "H")/(length(Column5)-spaces5)
  H6<-sum(Column6 %in% "H")/(length(Column6)-spaces6)
  H7<-sum(Column7 %in% "H")/(length(Column7)-spaces7)
  H8<-sum(Column8 %in% "H")/(length(Column8)-spaces8)
  H9<-sum(Column9 %in% "H")/(length(Column9)-spaces9)
  H10<-sum(Column10 %in% "H")/(length(Column10)-spaces10)
  H11<-sum(Column11 %in% "H")/(length(Column11)-spaces11)
  H12<-sum(Column12 %in% "H")/(length(Column12)-spaces12)
  H13<-sum(Column13 %in% "H")/(length(Column13)-spaces13)
  H14<-sum(Column14 %in% "H")/(length(Column14)-spaces14)
  H15<-sum(Column15 %in% "H")/(length(Column15)-spaces15)
  HllHs<-cbind(H1,H2,H3,H4,H5,H6,H7,H8,H9,H10,H11,H12,H13,H14,H15)
  
  
  I1<-sum(Column1 %in% "I")/(length(Column1)-spaces1)
  I2<-sum(Column2 %in% "I")/(length(Column2)-spaces2)
  I3<-sum(Column3 %in% "I")/(length(Column3)-spaces3)
  I4<-sum(Column4 %in% "I")/(length(Column4)-spaces4)
  I5<-sum(Column5 %in% "I")/(length(Column5)-spaces5)
  I6<-sum(Column6 %in% "I")/(length(Column6)-spaces6)
  I7<-sum(Column7 %in% "I")/(length(Column7)-spaces7)
  I8<-sum(Column8 %in% "I")/(length(Column8)-spaces8)
  I9<-sum(Column9 %in% "I")/(length(Column9)-spaces9)
  I10<-sum(Column10 %in% "I")/(length(Column10)-spaces10)
  I11<-sum(Column11 %in% "I")/(length(Column11)-spaces11)
  I12<-sum(Column12 %in% "I")/(length(Column12)-spaces12)
  I13<-sum(Column13 %in% "I")/(length(Column13)-spaces13)
  I14<-sum(Column14 %in% "I")/(length(Column14)-spaces14)
  I15<-sum(Column15 %in% "I")/(length(Column15)-spaces15)
  IllIs<-cbind(I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,I11,I12,I13,I14,I15)
  
  
  K1<-sum(Column1 %in% "K")/(length(Column1)-spaces1)
  K2<-sum(Column2 %in% "K")/(length(Column2)-spaces2)
  K3<-sum(Column3 %in% "K")/(length(Column3)-spaces3)
  K4<-sum(Column4 %in% "K")/(length(Column4)-spaces4)
  K5<-sum(Column5 %in% "K")/(length(Column5)-spaces5)
  K6<-sum(Column6 %in% "K")/(length(Column6)-spaces6)
  K7<-sum(Column7 %in% "K")/(length(Column7)-spaces7)
  K8<-sum(Column8 %in% "K")/(length(Column8)-spaces8)
  K9<-sum(Column9 %in% "K")/(length(Column9)-spaces9)
  K10<-sum(Column10 %in% "K")/(length(Column10)-spaces10)
  K11<-sum(Column11 %in% "K")/(length(Column11)-spaces11)
  K12<-sum(Column12 %in% "K")/(length(Column12)-spaces12)
  K13<-sum(Column13 %in% "K")/(length(Column13)-spaces13)
  K14<-sum(Column14 %in% "K")/(length(Column14)-spaces14)
  K15<-sum(Column15 %in% "K")/(length(Column15)-spaces15)
  KllKs<-cbind(K1,K2,K3,K4,K5,K6,K7,K8,K9,K10,K11,K12,K13,K14,K15)
  
  
  L1<-sum(Column1 %in% "L")/(length(Column1)-spaces1)
  L2<-sum(Column2 %in% "L")/(length(Column2)-spaces2)
  L3<-sum(Column3 %in% "L")/(length(Column3)-spaces3)
  L4<-sum(Column4 %in% "L")/(length(Column4)-spaces4)
  L5<-sum(Column5 %in% "L")/(length(Column5)-spaces5)
  L6<-sum(Column6 %in% "L")/(length(Column6)-spaces6)
  L7<-sum(Column7 %in% "L")/(length(Column7)-spaces7)
  L8<-sum(Column8 %in% "L")/(length(Column8)-spaces8)
  L9<-sum(Column9 %in% "L")/(length(Column9)-spaces9)
  L10<-sum(Column10 %in% "L")/(length(Column10)-spaces10)
  L11<-sum(Column11 %in% "L")/(length(Column11)-spaces11)
  L12<-sum(Column12 %in% "L")/(length(Column12)-spaces12)
  L13<-sum(Column13 %in% "L")/(length(Column13)-spaces13)
  L14<-sum(Column14 %in% "L")/(length(Column14)-spaces14)
  L15<-sum(Column15 %in% "L")/(length(Column15)-spaces15)
  LllLs<-cbind(L1,L2,L3,L4,L5,L6,L7,L8,L9,L10,L11,L12,L13,L14,L15)
  
  
  M1<-sum(Column1 %in% "M")/(length(Column1)-spaces1)
  M2<-sum(Column2 %in% "M")/(length(Column2)-spaces2)
  M3<-sum(Column3 %in% "M")/(length(Column3)-spaces3)
  M4<-sum(Column4 %in% "M")/(length(Column4)-spaces4)
  M5<-sum(Column5 %in% "M")/(length(Column5)-spaces5)
  M6<-sum(Column6 %in% "M")/(length(Column6)-spaces6)
  M7<-sum(Column7 %in% "M")/(length(Column7)-spaces7)
  M8<-sum(Column8 %in% "M")/(length(Column8)-spaces8)
  M9<-sum(Column9 %in% "M")/(length(Column9)-spaces9)
  M10<-sum(Column10 %in% "M")/(length(Column10)-spaces10)
  M11<-sum(Column11 %in% "M")/(length(Column11)-spaces11)
  M12<-sum(Column12 %in% "M")/(length(Column12)-spaces12)
  M13<-sum(Column13 %in% "M")/(length(Column13)-spaces13)
  M14<-sum(Column14 %in% "M")/(length(Column14)-spaces14)
  M15<-sum(Column15 %in% "M")/(length(Column15)-spaces15)
  MllMs<-cbind(M1,M2,M3,M4,M5,M6,M7,M8,M9,M10,M11,M12,M13,M14,M15)
  
  
  N1<-sum(Column1 %in% "N")/(length(Column1)-spaces1)
  N2<-sum(Column2 %in% "N")/(length(Column2)-spaces2)
  N3<-sum(Column3 %in% "N")/(length(Column3)-spaces3)
  N4<-sum(Column4 %in% "N")/(length(Column4)-spaces4)
  N5<-sum(Column5 %in% "N")/(length(Column5)-spaces5)
  N6<-sum(Column6 %in% "N")/(length(Column6)-spaces6)
  N7<-sum(Column7 %in% "N")/(length(Column7)-spaces7)
  N8<-sum(Column8 %in% "N")/(length(Column8)-spaces8)
  N9<-sum(Column9 %in% "N")/(length(Column9)-spaces9)
  N10<-sum(Column10 %in% "N")/(length(Column10)-spaces10)
  N11<-sum(Column11 %in% "N")/(length(Column11)-spaces11)
  N12<-sum(Column12 %in% "N")/(length(Column12)-spaces12)
  N13<-sum(Column13 %in% "N")/(length(Column13)-spaces13)
  N14<-sum(Column14 %in% "N")/(length(Column14)-spaces14)
  N15<-sum(Column15 %in% "N")/(length(Column15)-spaces15)
  NllNs<-cbind(N1,N2,N3,N4,N5,N6,N7,N8,N9,N10,N11,N12,N13,N14,N15)
  
  
  P1<-sum(Column1 %in% "P")/(length(Column1)-spaces1)
  P2<-sum(Column2 %in% "P")/(length(Column2)-spaces2)
  P3<-sum(Column3 %in% "P")/(length(Column3)-spaces3)
  P4<-sum(Column4 %in% "P")/(length(Column4)-spaces4)
  P5<-sum(Column5 %in% "P")/(length(Column5)-spaces5)
  P6<-sum(Column6 %in% "P")/(length(Column6)-spaces6)
  P7<-sum(Column7 %in% "P")/(length(Column7)-spaces7)
  P8<-sum(Column8 %in% "P")/(length(Column8)-spaces8)
  P9<-sum(Column9 %in% "P")/(length(Column9)-spaces9)
  P10<-sum(Column10 %in% "P")/(length(Column10)-spaces10)
  P11<-sum(Column11 %in% "P")/(length(Column11)-spaces11)
  P12<-sum(Column12 %in% "P")/(length(Column12)-spaces12)
  P13<-sum(Column13 %in% "P")/(length(Column13)-spaces13)
  P14<-sum(Column14 %in% "P")/(length(Column14)-spaces14)
  P15<-sum(Column15 %in% "P")/(length(Column15)-spaces15)
  PllPs<-cbind(P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11,P12,P13,P14,P15)
  
  
  Q1<-sum(Column1 %in% "Q")/(length(Column1)-spaces1)
  Q2<-sum(Column2 %in% "Q")/(length(Column2)-spaces2)
  Q3<-sum(Column3 %in% "Q")/(length(Column3)-spaces3)
  Q4<-sum(Column4 %in% "Q")/(length(Column4)-spaces4)
  Q5<-sum(Column5 %in% "Q")/(length(Column5)-spaces5)
  Q6<-sum(Column6 %in% "Q")/(length(Column6)-spaces6)
  Q7<-sum(Column7 %in% "Q")/(length(Column7)-spaces7)
  Q8<-sum(Column8 %in% "Q")/(length(Column8)-spaces8)
  Q9<-sum(Column9 %in% "Q")/(length(Column9)-spaces9)
  Q10<-sum(Column10 %in% "Q")/(length(Column10)-spaces10)
  Q11<-sum(Column11 %in% "Q")/(length(Column11)-spaces11)
  Q12<-sum(Column12 %in% "Q")/(length(Column12)-spaces12)
  Q13<-sum(Column13 %in% "Q")/(length(Column13)-spaces13)
  Q14<-sum(Column14 %in% "Q")/(length(Column14)-spaces14)
  Q15<-sum(Column15 %in% "Q")/(length(Column15)-spaces15)
  QllQs<-cbind(Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,Q11,Q12,Q13,Q14,Q15)
  
  
  R1<-sum(Column1 %in% "R")/(length(Column1)-spaces1)
  R2<-sum(Column2 %in% "R")/(length(Column2)-spaces2)
  R3<-sum(Column3 %in% "R")/(length(Column3)-spaces3)
  R4<-sum(Column4 %in% "R")/(length(Column4)-spaces4)
  R5<-sum(Column5 %in% "R")/(length(Column5)-spaces5)
  R6<-sum(Column6 %in% "R")/(length(Column6)-spaces6)
  R7<-sum(Column7 %in% "R")/(length(Column7)-spaces7)
  R8<-sum(Column8 %in% "R")/(length(Column8)-spaces8)
  R9<-sum(Column9 %in% "R")/(length(Column9)-spaces9)
  R10<-sum(Column10 %in% "R")/(length(Column10)-spaces10)
  R11<-sum(Column11 %in% "R")/(length(Column11)-spaces11)
  R12<-sum(Column12 %in% "R")/(length(Column12)-spaces12)
  R13<-sum(Column13 %in% "R")/(length(Column13)-spaces13)
  R14<-sum(Column14 %in% "R")/(length(Column14)-spaces14)
  R15<-sum(Column15 %in% "R")/(length(Column15)-spaces15)
  RllRs<-cbind(R1,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,R13,R14,R15)
  
  
  S1<-sum(Column1 %in% "S")/(length(Column1)-spaces1)
  S2<-sum(Column2 %in% "S")/(length(Column2)-spaces2)
  S3<-sum(Column3 %in% "S")/(length(Column3)-spaces3)
  S4<-sum(Column4 %in% "S")/(length(Column4)-spaces4)
  S5<-sum(Column5 %in% "S")/(length(Column5)-spaces5)
  S6<-sum(Column6 %in% "S")/(length(Column6)-spaces6)
  S7<-sum(Column7 %in% "S")/(length(Column7)-spaces7)
  S8<-sum(Column8 %in% "S")/(length(Column8)-spaces8)
  S9<-sum(Column9 %in% "S")/(length(Column9)-spaces9)
  S10<-sum(Column10 %in% "S")/(length(Column10)-spaces10)
  S11<-sum(Column11 %in% "S")/(length(Column11)-spaces11)
  S12<-sum(Column12 %in% "S")/(length(Column12)-spaces12)
  S13<-sum(Column13 %in% "S")/(length(Column13)-spaces13)
  S14<-sum(Column14 %in% "S")/(length(Column14)-spaces14)
  S15<-sum(Column15 %in% "S")/(length(Column15)-spaces15)
  SllSs<-cbind(S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,S14,S15)
  
  
  T1<-sum(Column1 %in% "T")/(length(Column1)-spaces1)
  T2<-sum(Column2 %in% "T")/(length(Column2)-spaces2)
  T3<-sum(Column3 %in% "T")/(length(Column3)-spaces3)
  T4<-sum(Column4 %in% "T")/(length(Column4)-spaces4)
  T5<-sum(Column5 %in% "T")/(length(Column5)-spaces5)
  T6<-sum(Column6 %in% "T")/(length(Column6)-spaces6)
  T7<-sum(Column7 %in% "T")/(length(Column7)-spaces7)
  T8<-sum(Column8 %in% "T")/(length(Column8)-spaces8)
  T9<-sum(Column9 %in% "T")/(length(Column9)-spaces9)
  T10<-sum(Column10 %in% "T")/(length(Column10)-spaces10)
  T11<-sum(Column11 %in% "T")/(length(Column11)-spaces11)
  T12<-sum(Column12 %in% "T")/(length(Column12)-spaces12)
  T13<-sum(Column13 %in% "T")/(length(Column13)-spaces13)
  T14<-sum(Column14 %in% "T")/(length(Column14)-spaces14)
  T15<-sum(Column15 %in% "T")/(length(Column15)-spaces15)
  TllTs<-cbind(T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15)
  
  
  V1<-sum(Column1 %in% "V")/(length(Column1)-spaces1)
  V2<-sum(Column2 %in% "V")/(length(Column2)-spaces2)
  V3<-sum(Column3 %in% "V")/(length(Column3)-spaces3)
  V4<-sum(Column4 %in% "V")/(length(Column4)-spaces4)
  V5<-sum(Column5 %in% "V")/(length(Column5)-spaces5)
  V6<-sum(Column6 %in% "V")/(length(Column6)-spaces6)
  V7<-sum(Column7 %in% "V")/(length(Column7)-spaces7)
  V8<-sum(Column8 %in% "V")/(length(Column8)-spaces8)
  V9<-sum(Column9 %in% "V")/(length(Column9)-spaces9)
  V10<-sum(Column10 %in% "V")/(length(Column10)-spaces10)
  V11<-sum(Column11 %in% "V")/(length(Column11)-spaces11)
  V12<-sum(Column12 %in% "V")/(length(Column12)-spaces12)
  V13<-sum(Column13 %in% "V")/(length(Column13)-spaces13)
  V14<-sum(Column14 %in% "V")/(length(Column14)-spaces14)
  V15<-sum(Column15 %in% "V")/(length(Column15)-spaces15)
  VllVs<-cbind(V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15)
  
  
  W1<-sum(Column1 %in% "W")/(length(Column1)-spaces1)
  W2<-sum(Column2 %in% "W")/(length(Column2)-spaces2)
  W3<-sum(Column3 %in% "W")/(length(Column3)-spaces3)
  W4<-sum(Column4 %in% "W")/(length(Column4)-spaces4)
  W5<-sum(Column5 %in% "W")/(length(Column5)-spaces5)
  W6<-sum(Column6 %in% "W")/(length(Column6)-spaces6)
  W7<-sum(Column7 %in% "W")/(length(Column7)-spaces7)
  W8<-sum(Column8 %in% "W")/(length(Column8)-spaces8)
  W9<-sum(Column9 %in% "W")/(length(Column9)-spaces9)
  W10<-sum(Column10 %in% "W")/(length(Column10)-spaces10)
  W11<-sum(Column11 %in% "W")/(length(Column11)-spaces11)
  W12<-sum(Column12 %in% "W")/(length(Column12)-spaces12)
  W13<-sum(Column13 %in% "W")/(length(Column13)-spaces13)
  W14<-sum(Column14 %in% "W")/(length(Column14)-spaces14)
  W15<-sum(Column15 %in% "W")/(length(Column15)-spaces15)
  WllWs<-cbind(W1,W2,W3,W4,W5,W6,W7,W8,W9,W10,W11,W12,W13,W14,W15)
  
  
  Y1<-sum(Column1 %in% "Y")/(length(Column1)-spaces1)
  Y2<-sum(Column2 %in% "Y")/(length(Column2)-spaces2)
  Y3<-sum(Column3 %in% "Y")/(length(Column3)-spaces3)
  Y4<-sum(Column4 %in% "Y")/(length(Column4)-spaces4)
  Y5<-sum(Column5 %in% "Y")/(length(Column5)-spaces5)
  Y6<-sum(Column6 %in% "Y")/(length(Column6)-spaces6)
  Y7<-sum(Column7 %in% "Y")/(length(Column7)-spaces7)
  Y8<-sum(Column8 %in% "Y")/(length(Column8)-spaces8)
  Y9<-sum(Column9 %in% "Y")/(length(Column9)-spaces9)
  Y10<-sum(Column10 %in% "Y")/(length(Column10)-spaces10)
  Y11<-sum(Column11 %in% "Y")/(length(Column11)-spaces11)
  Y12<-sum(Column12 %in% "Y")/(length(Column12)-spaces12)
  Y13<-sum(Column13 %in% "Y")/(length(Column13)-spaces13)
  Y14<-sum(Column14 %in% "Y")/(length(Column14)-spaces14)
  Y15<-sum(Column15 %in% "Y")/(length(Column15)-spaces15)
  YllYs<-cbind(Y1,Y2,Y3,Y4,Y5,Y6,Y7,Y8,Y9,Y10,Y11,Y12,Y13,Y14,Y15)
}
#this is substrate percents

#A C D E F G H I K L N P Q R S T V W Y

PercentTable<-rbind(AllAs,CllCs,DllDs,EllEs,FllFs,GllGs,HllHs,IllIs,KllKs,LllLs,MllMs,NllNs,PllPs,QllQs,RllRs,SllSs,TllTs,VllVs,WllWs,YllYs)
PercentTable<-PercentTable*100

#create the SD table
SDtable<-matrix(data = rep(1,times=(nrow(PercentTable)*ncol(PercentTable))),nrow = nrow(PercentTable),ncol = ncol(PercentTable))
#for every row, a percertage minus the same mean over the same SD
if(1==1){
  SDtable[1,]<-(PercentTable[1,]-Amean)/Asd
  SDtable[2,]<-(PercentTable[2,]-Cmean)/Csd
  SDtable[3,]<-(PercentTable[3,]-Dmean)/Dsd
  SDtable[4,]<-(PercentTable[4,]-Emean)/Esd
  SDtable[5,]<-(PercentTable[5,]-Fmean)/Fsd
  SDtable[6,]<-(PercentTable[6,]-Gmean)/Gsd
  SDtable[7,]<-(PercentTable[7,]-Hmean)/Hsd
  SDtable[8,]<-(PercentTable[8,]-Imean)/Isd
  SDtable[9,]<-(PercentTable[9,]-Kmean)/Ksd
  SDtable[10,]<-(PercentTable[10,]-Lmean)/Lsd
  SDtable[11,]<-(PercentTable[11,]-Mmean)/Msd
  SDtable[12,]<-(PercentTable[12,]-Nmean)/Nsd
  SDtable[13,]<-(PercentTable[13,]-Pmean)/Psd
  SDtable[14,]<-(PercentTable[14,]-Qmean)/Qsd
  SDtable[15,]<-(PercentTable[15,]-Rmean)/Rsd
  SDtable[16,]<-(PercentTable[16,]-Smean)/Ssd
  SDtable[17,]<-(PercentTable[17,]-Tmean)/Tsd
  SDtable[18,]<-(PercentTable[18,]-Vmean)/Vsd
  SDtable[19,]<-(PercentTable[19,]-Wmean)/Wsd
  SDtable[20,]<-(PercentTable[20,]-Ymean)/Ysd
}


SetOfAAs<-c("Letter","A","C","D","E","F","G","H","I","K","L","M","N","P","Q","R","S","T","V","W","Y")


SumOfSigmaAAs<-c(1:15)

for (i in 1:15){
  SumOfSigmasValue<-0
  for (j in 1:20){
        value<-0
    if (SDtable[j,i]>2){
      value<-sum(substrates[,i]==SetOfAAs[j])
    }
    SumOfSigmasValue<-SumOfSigmasValue+value
  }
  SumOfSigmaAAs[i]<-SumOfSigmasValue
}

# AAs1<-length(substrates[,1])-sum(substrates[,1]=="")
# AAs2<-length(substrates[,2])-sum(substrates[,2]=="")
# AAs3<-length(substrates[,3])-sum(substrates[,3]=="")
# AAs4<-length(substrates[,4])-sum(substrates[,4]=="")
# AAs5<-length(substrates[,5])-sum(substrates[,5]=="")
# AAs6<-length(substrates[,6])-sum(substrates[,6]=="")
# AAs7<-length(substrates[,7])-sum(substrates[,7]=="")
# AAs8<-length(substrates[,8])-sum(substrates[,8]=="")
# AAs9<-length(substrates[,9])-sum(substrates[,9]=="")
# 
# 
# 
# #AAsAtPositions<-c(AAs1,AAs2,AAs3,AAs4,AAs5,AAs6,AAs7,AAs8,AAs9)
# AAsAtPositions<-c(length(substrates[,1]),length(substrates[,2]),length(substrates[,3]),length(substrates[,4]),
#                   length(substrates[,5]),length(substrates[,6]),length(substrates[,7]),length(substrates[,8]),
#                   length(substrates[,9]))

SumOfExpectedSigmaAAs<-c(1:15)
for (i in 1:15){
  ExpectedValue<-0
  for (j in 1:20){
        value<-0
    if (SDtable[j,i]>2){
      value<-AllMeans[j]
    }
    ExpectedValue<-ExpectedValue+value
  }
  SumOfExpectedSigmaAAs[i]<-ExpectedValue*(length(substrates[,i])-sum(substrates[,i]%in% ""))/100
}

SelectivityRow<-SumOfSigmaAAs/SumOfExpectedSigmaAAs
SelectivitySheet<-rbind(SumOfSigmaAAs,SumOfExpectedSigmaAAs,SelectivityRow)

SetOfAAs<-matrix(data = SetOfAAs,ncol = 1)

SDtableu<-SDtable
HeaderSD<-c(-7:7)
SDtable<-rbind(HeaderSD,SDtableu)
row.names(SDtable)<-NULL
SDtable<-data.frame(SetOfAAs,SDtable)

PercentTable<-rbind(HeaderSD,PercentTable)
row.names(PercentTable)<-NULL
PercentTable<-data.frame(SetOfAAs,PercentTable)
numberofY<-as.numeric(SubstrateBackgroundFrequency$Number.of.Y)
numberofY<-numberofY[!is.na(numberofY)]

numberofPY<-as.numeric(SubstrateBackgroundFrequency$Number.of.pY)
numberofPY<-numberofPY[!is.na(numberofPY)]

NormalizationScore<-sum(numberofPY)/sum(numberofY)

# positions<-matrix(data = NA, nrow=20,ncol = 15)
# 
# #column1
# 
# for (q in 1:15) {
#   sA<-sum(substrates[,i]=="A")
#   positions[1,i]<-sA
#   sC<-sum(substrates[,i]=="C")
#   positions[2,i]<-sC
#   sD<-sum(substrates[,i]=="D")
#   positions[3,i]<-sD
#   sE<-sum(substrates[,i]=="E")
#   positions[4,i]<-sE
#   sF<-sum(substrates[,i]=="F")
#   sG<-sum(substrates[,i]=="G")
#   sH<-sum(substrates[,i]=="H")
#   sI<-sum(substrates[,i]=="I")
#   sK<-sum(substrates[,i]=="K")
#   sL<-sum(substrates[,i]=="L")
#   sM<-sum(substrates[,i]=="M")
#   sN<-sum(substrates[,i]=="N")
#   sP<-sum(substrates[,i]=="P")
#   sQ<-sum(substrates[,i]=="Q")
#   sR<-sum(substrates[,i]=="R")
#   sS<-sum(substrates[,i]=="S")
#   sT<-sum(substrates[,i]=="T")
#   sV<-sum(substrates[,i]=="V")
#   sW<-sum(substrates[,i]=="W")
#   sY<-sum(substrates[,i]=="Y")
#   positions[5,i]<-sF
#   positions[6,i]<-sG
#   positions[7,i]<-sH
#   positions[8,i]<-sI
#   positions[9,i]<-sK
#   positions[10,i]<-sL
#   positions[11,i]<-sM
#   positions[12,i]<-sN
#   positions[13,i]<-sP
#   positions[14,i]<-sQ
#   positions[15,i]<-sR
#   positions[16,i]<-sS
#   positions[17,i]<-sT
#   positions[18,i]<-sV
#   positions[19,i]<-sW
#   positions[20,i]<-sY
# }

#here I create the positions table which is needed for the endogenous prob matrix and is simply "how many times did an AA show up in this spot?
if (6==6){
  Column1<-substrates[,1]
  Column2<-substrates[,2]
  Column3<-substrates[,3]
  Column4<-substrates[,4]
  Column5<-substrates[,5]
  Column6<-substrates[,6]
  Column7<-substrates[,7]
  Column8<-substrates[,8]
  Column9<-substrates[,9]
  Column10<-substrates[,10]
  Column11<-substrates[,11]
  Column12<-substrates[,12]
  Column13<-substrates[,13]
  Column14<-substrates[,14]
  Column15<-substrates[,15]
  
  spaces1<-sum((Column1%in% ""))
  spaces2<-sum(Column2%in% "")
  spaces3<-sum(Column3%in% "")
  spaces4<-sum(Column4%in% "")
  spaces5<-sum(Column5%in% "")
  spaces6<-sum(Column6%in% "")
  spaces7<-sum(Column7%in% "")
  spaces8<-sum(Column8%in% "")
  spaces9<-sum(Column9%in% "")
  spaces10<-sum(Column10%in% "")
  spaces11<-sum(Column11%in% "")
  spaces12<-sum(Column12%in% "")
  spaces13<-sum(Column13%in% "")
  spaces14<-sum(Column14%in% "")
  spaces15<-sum(Column15%in% "")
  
  A1<-sum(Column1 %in% "A")
  A2<-sum(Column2 %in% "A")
  A3<-sum(Column3 %in% "A")
  A4<-sum(Column4 %in% "A")
  A5<-sum(Column5 %in% "A")
  A6<-sum(Column6 %in% "A")
  A7<-sum(Column7 %in% "A")
  A8<-sum(Column8 %in% "A")
  A9<-sum(Column9 %in% "A")
  A10<-sum(Column10 %in% "A")
  A11<-sum(Column11 %in% "A")
  A12<-sum(Column12 %in% "A")
  A13<-sum(Column13 %in% "A")
  A14<-sum(Column14 %in% "A")
  A15<-sum(Column15 %in% "A")
  AllAs<-cbind(A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15)
  
  C1<-sum(Column1 %in% "C")
  C2<-sum(Column2 %in% "C")
  C3<-sum(Column3 %in% "C")
  C4<-sum(Column4 %in% "C")
  C5<-sum(Column5 %in% "C")
  C6<-sum(Column6 %in% "C")
  C7<-sum(Column7 %in% "C")
  C8<-sum(Column8 %in% "C")
  C9<-sum(Column9 %in% "C")  
  C10<-sum(Column10 %in% "C")
  C11<-sum(Column11 %in% "C")
  C12<-sum(Column12 %in% "C")
  C13<-sum(Column13 %in% "C")
  C14<-sum(Column14 %in% "C")
  C15<-sum(Column15 %in% "C")
  CllCs<-cbind(C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15)
  
  D1<-sum(Column1 %in% "D")
  D2<-sum(Column2 %in% "D")
  D3<-sum(Column3 %in% "D")
  D4<-sum(Column4 %in% "D")
  D5<-sum(Column5 %in% "D")
  D6<-sum(Column6 %in% "D")
  D7<-sum(Column7 %in% "D")
  D8<-sum(Column8 %in% "D")
  D9<-sum(Column9 %in% "D")
  D10<-sum(Column10 %in% "D")
  D11<-sum(Column11 %in% "D")
  D12<-sum(Column12 %in% "D")
  D13<-sum(Column13 %in% "D")
  D14<-sum(Column14 %in% "D")
  D15<-sum(Column15 %in% "D")
  DllDs<-cbind(D1,D2,D3,D4,D5,D6,D7,D8,D9,D10,D11,D12,D13,D14,D15)
  
  E1<-sum(Column1 %in% "E")
  E2<-sum(Column2 %in% "E")
  E3<-sum(Column3 %in% "E")
  E4<-sum(Column4 %in% "E")
  E5<-sum(Column5 %in% "E")
  E6<-sum(Column6 %in% "E")
  E7<-sum(Column7 %in% "E")
  E8<-sum(Column8 %in% "E")
  E9<-sum(Column9 %in% "E")
  E10<-sum(Column10 %in% "E")
  E11<-sum(Column11 %in% "E")
  E12<-sum(Column12 %in% "E")
  E13<-sum(Column13 %in% "E")
  E14<-sum(Column14 %in% "E")
  E15<-sum(Column15 %in% "E")
  EllEs<-cbind(E1,E2,E3,E4,E5,E6,E7,E8,E9,E10,E11,E12,E13,E14,E15)
  
  F1<-sum(Column1 %in% "F")
  F2<-sum(Column2 %in% "F")
  F3<-sum(Column3 %in% "F")
  F4<-sum(Column4 %in% "F")
  F5<-sum(Column5 %in% "F")
  F6<-sum(Column6 %in% "F")
  F7<-sum(Column7 %in% "F")
  F8<-sum(Column8 %in% "F")
  F9<-sum(Column9 %in% "F")
  F10<-sum(Column10 %in% "F")
  F11<-sum(Column11 %in% "F")
  F12<-sum(Column12 %in% "F")
  F13<-sum(Column13 %in% "F")
  F14<-sum(Column14 %in% "F")
  F15<-sum(Column15 %in% "F")
  FllFs<-cbind(F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15)
  
  G1<-sum(Column1 %in% "G")
  G2<-sum(Column2 %in% "G")
  G3<-sum(Column3 %in% "G")
  G4<-sum(Column4 %in% "G")
  G5<-sum(Column5 %in% "G")
  G6<-sum(Column6 %in% "G")
  G7<-sum(Column7 %in% "G")
  G8<-sum(Column8 %in% "G")
  G9<-sum(Column9 %in% "G")
  G10<-sum(Column10 %in% "G")
  G11<-sum(Column11 %in% "G")
  G12<-sum(Column12 %in% "G")
  G13<-sum(Column13 %in% "G")
  G14<-sum(Column14 %in% "G")
  G15<-sum(Column15 %in% "G")
  GllGs<-cbind(G1,G2,G3,G4,G5,G6,G7,G8,G9,G10,G11,G12,G13,G14,G15)
  
  H1<-sum(Column1 %in% "H")
  H2<-sum(Column2 %in% "H")
  H3<-sum(Column3 %in% "H")
  H4<-sum(Column4 %in% "H")
  H5<-sum(Column5 %in% "H")
  H6<-sum(Column6 %in% "H")
  H7<-sum(Column7 %in% "H")
  H8<-sum(Column8 %in% "H")
  H9<-sum(Column9 %in% "H")
  H10<-sum(Column10 %in% "H")
  H11<-sum(Column11 %in% "H")
  H12<-sum(Column12 %in% "H")
  H13<-sum(Column13 %in% "H")
  H14<-sum(Column14 %in% "H")
  H15<-sum(Column15 %in% "H")
  HllHs<-cbind(H1,H2,H3,H4,H5,H6,H7,H8,H9,H10,H11,H12,H13,H14,H15)
  
  I1<-sum(Column1 %in% "I")
  I2<-sum(Column2 %in% "I")
  I3<-sum(Column3 %in% "I")
  I4<-sum(Column4 %in% "I")
  I5<-sum(Column5 %in% "I")
  I6<-sum(Column6 %in% "I")
  I7<-sum(Column7 %in% "I")
  I8<-sum(Column8 %in% "I")
  I9<-sum(Column9 %in% "I")
  I10<-sum(Column10 %in% "I")
  I11<-sum(Column11 %in% "I")
  I12<-sum(Column12 %in% "I")
  I13<-sum(Column13 %in% "I")
  I14<-sum(Column14 %in% "I")
  I15<-sum(Column15 %in% "I")
  IllIs<-cbind(I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,I11,I12,I13,I14,I15)
  
  K1<-sum(Column1 %in% "K")
  K2<-sum(Column2 %in% "K")
  K3<-sum(Column3 %in% "K")
  K4<-sum(Column4 %in% "K")
  K5<-sum(Column5 %in% "K")
  K6<-sum(Column6 %in% "K")
  K7<-sum(Column7 %in% "K")
  K8<-sum(Column8 %in% "K")
  K9<-sum(Column9 %in% "K")
  K10<-sum(Column10 %in% "K")
  K11<-sum(Column11 %in% "K")
  K12<-sum(Column12 %in% "K")
  K13<-sum(Column13 %in% "K")
  K14<-sum(Column14 %in% "K")
  K15<-sum(Column15 %in% "K")
  KllKs<-cbind(K1,K2,K3,K4,K5,K6,K7,K8,K9,K10,K11,K12,K13,K14,K15)
  
  L1<-sum(Column1 %in% "L")
  L2<-sum(Column2 %in% "L")
  L3<-sum(Column3 %in% "L")
  L4<-sum(Column4 %in% "L")
  L5<-sum(Column5 %in% "L")
  L6<-sum(Column6 %in% "L")
  L7<-sum(Column7 %in% "L")
  L8<-sum(Column8 %in% "L")
  L9<-sum(Column9 %in% "L")
  L10<-sum(Column10 %in% "L")
  L11<-sum(Column11 %in% "L")
  L12<-sum(Column12 %in% "L")
  L13<-sum(Column13 %in% "L")
  L14<-sum(Column14 %in% "L")
  L15<-sum(Column15 %in% "L")
  LllLs<-cbind(L1,L2,L3,L4,L5,L6,L7,L8,L9,L10,L11,L12,L13,L14,L15)
  
  M1<-sum(Column1 %in% "M")
  M2<-sum(Column2 %in% "M")
  M3<-sum(Column3 %in% "M")
  M4<-sum(Column4 %in% "M")
  M5<-sum(Column5 %in% "M")
  M6<-sum(Column6 %in% "M")
  M7<-sum(Column7 %in% "M")
  M8<-sum(Column8 %in% "M")
  M9<-sum(Column9 %in% "M")
  M10<-sum(Column10 %in% "M")
  M11<-sum(Column11 %in% "M")
  M12<-sum(Column12 %in% "M")
  M13<-sum(Column13 %in% "M")
  M14<-sum(Column14 %in% "M")
  M15<-sum(Column15 %in% "M")
  MllMs<-cbind(M1,M2,M3,M4,M5,M6,M7,M8,M9,M10,M11,M12,M13,M14,M15)
  
  N1<-sum(Column1 %in% "N")
  N2<-sum(Column2 %in% "N")
  N3<-sum(Column3 %in% "N")
  N4<-sum(Column4 %in% "N")
  N5<-sum(Column5 %in% "N")
  N6<-sum(Column6 %in% "N")
  N7<-sum(Column7 %in% "N")
  N8<-sum(Column8 %in% "N")
  N9<-sum(Column9 %in% "N")
  N10<-sum(Column10 %in% "N")
  N11<-sum(Column11 %in% "N")
  N12<-sum(Column12 %in% "N")
  N13<-sum(Column13 %in% "N")
  N14<-sum(Column14 %in% "N")
  N15<-sum(Column15 %in% "N")
  NllNs<-cbind(N1,N2,N3,N4,N5,N6,N7,N8,N9,N10,N11,N12,N13,N14,N15)
  
  P1<-sum(Column1 %in% "P")
  P2<-sum(Column2 %in% "P")
  P3<-sum(Column3 %in% "P")
  P4<-sum(Column4 %in% "P")
  P5<-sum(Column5 %in% "P")
  P6<-sum(Column6 %in% "P")
  P7<-sum(Column7 %in% "P")
  P8<-sum(Column8 %in% "P")
  P9<-sum(Column9 %in% "P")
  P10<-sum(Column10 %in% "P")
  P11<-sum(Column11 %in% "P")
  P12<-sum(Column12 %in% "P")
  P13<-sum(Column13 %in% "P")
  P14<-sum(Column14 %in% "P")
  P15<-sum(Column15 %in% "P")
  PllPs<-cbind(P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11,P12,P13,P14,P15)
  
  Q1<-sum(Column1 %in% "Q")
  Q2<-sum(Column2 %in% "Q")
  Q3<-sum(Column3 %in% "Q")
  Q4<-sum(Column4 %in% "Q")
  Q5<-sum(Column5 %in% "Q")
  Q6<-sum(Column6 %in% "Q")
  Q7<-sum(Column7 %in% "Q")
  Q8<-sum(Column8 %in% "Q")
  Q9<-sum(Column9 %in% "Q")
  Q10<-sum(Column10 %in% "Q")
  Q11<-sum(Column11 %in% "Q")
  Q12<-sum(Column12 %in% "Q")
  Q13<-sum(Column13 %in% "Q")
  Q14<-sum(Column14 %in% "Q")
  Q15<-sum(Column15 %in% "Q")
  QllQs<-cbind(Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,Q11,Q12,Q13,Q14,Q15)
  
  R1<-sum(Column1 %in% "R")
  R2<-sum(Column2 %in% "R")
  R3<-sum(Column3 %in% "R")
  R4<-sum(Column4 %in% "R")
  R5<-sum(Column5 %in% "R")
  R6<-sum(Column6 %in% "R")
  R7<-sum(Column7 %in% "R")
  R8<-sum(Column8 %in% "R")
  R9<-sum(Column9 %in% "R")
  R10<-sum(Column10 %in% "R")
  R11<-sum(Column11 %in% "R")
  R12<-sum(Column12 %in% "R")
  R13<-sum(Column13 %in% "R")
  R14<-sum(Column14 %in% "R")
  R15<-sum(Column15 %in% "R")
  RllRs<-cbind(R1,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,R13,R14,R15)
  
  S1<-sum(Column1 %in% "S")
  S2<-sum(Column2 %in% "S")
  S3<-sum(Column3 %in% "S")
  S4<-sum(Column4 %in% "S")
  S5<-sum(Column5 %in% "S")
  S6<-sum(Column6 %in% "S")
  S7<-sum(Column7 %in% "S")
  S8<-sum(Column8 %in% "S")
  S9<-sum(Column9 %in% "S")
  S10<-sum(Column10 %in% "S")
  S11<-sum(Column11 %in% "S")
  S12<-sum(Column12 %in% "S")
  S13<-sum(Column13 %in% "S")
  S14<-sum(Column14 %in% "S")
  S15<-sum(Column15 %in% "S")
  SllSs<-cbind(S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,S14,S15)
  
  T1<-sum(Column1 %in% "T")
  T2<-sum(Column2 %in% "T")
  T3<-sum(Column3 %in% "T")
  T4<-sum(Column4 %in% "T")
  T5<-sum(Column5 %in% "T")
  T6<-sum(Column6 %in% "T")
  T7<-sum(Column7 %in% "T")
  T8<-sum(Column8 %in% "T")
  T9<-sum(Column9 %in% "T")
  T10<-sum(Column10 %in% "T")
  T11<-sum(Column11 %in% "T")
  T12<-sum(Column12 %in% "T")
  T13<-sum(Column13 %in% "T")
  T14<-sum(Column14 %in% "T")
  T15<-sum(Column15 %in% "T")
  TllTs<-cbind(T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15)
  
  V1<-sum(Column1 %in% "V")
  V2<-sum(Column2 %in% "V")
  V3<-sum(Column3 %in% "V")
  V4<-sum(Column4 %in% "V")
  V5<-sum(Column5 %in% "V")
  V6<-sum(Column6 %in% "V")
  V7<-sum(Column7 %in% "V")
  V8<-sum(Column8 %in% "V")
  V9<-sum(Column9 %in% "V")
  V10<-sum(Column10 %in% "V")
  V11<-sum(Column11 %in% "V")
  V12<-sum(Column12 %in% "V")
  V13<-sum(Column13 %in% "V")
  V14<-sum(Column14 %in% "V")
  V15<-sum(Column15 %in% "V")
  VllVs<-cbind(V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15)
  
  W1<-sum(Column1 %in% "W")
  W2<-sum(Column2 %in% "W")
  W3<-sum(Column3 %in% "W")
  W4<-sum(Column4 %in% "W")
  W5<-sum(Column5 %in% "W")
  W6<-sum(Column6 %in% "W")
  W7<-sum(Column7 %in% "W")
  W8<-sum(Column8 %in% "W")
  W9<-sum(Column9 %in% "W")
  W10<-sum(Column10 %in% "W")
  W11<-sum(Column11 %in% "W")
  W12<-sum(Column12 %in% "W")
  W13<-sum(Column13 %in% "W")
  W14<-sum(Column14 %in% "W")
  W15<-sum(Column15 %in% "W")
  WllWs<-cbind(W1,W2,W3,W4,W5,W6,W7,W8,W9,W10,W11,W12,W13,W14,W15)
  
  Y1<-sum(Column1 %in% "Y")
  Y2<-sum(Column2 %in% "Y")
  Y3<-sum(Column3 %in% "Y")
  Y4<-sum(Column4 %in% "Y")
  Y5<-sum(Column5 %in% "Y")
  Y6<-sum(Column6 %in% "Y")
  Y7<-sum(Column7 %in% "Y")
  Y8<-sum(Column8 %in% "Y")
  Y9<-sum(Column9 %in% "Y")
  Y10<-sum(Column10 %in% "Y")
  Y11<-sum(Column11 %in% "Y")
  Y12<-sum(Column12 %in% "Y")
  Y13<-sum(Column13 %in% "Y")
  Y14<-sum(Column14 %in% "Y")
  Y15<-sum(Column15 %in% "Y")
  YllYs<-cbind(Y1,Y2,Y3,Y4,Y5,Y6,Y7,Y8,Y9,Y10,Y11,Y12,Y13,Y14,Y15)
  PositionTable<-rbind(AllAs,CllCs,DllDs,EllEs,FllFs,GllGs,HllHs,IllIs,KllKs,LllLs,MllMs,NllNs,PllPs,QllQs,RllRs,SllSs,TllTs,VllVs,WllWs,YllYs)
}
#endogenous prob matrix is AA position over subbackfreqmean
dim(PositionTable)
EPMtable<-PositionTable
# EPMtable[1,]<-(PositionTable[1,]/(PositionTable[1,]*.01*Amean))
# EPMtable[2,]<-(PositionTable[2,]/(PositionTable[2,]*.01*Cmean))
# EPMtable[3,]<-(PositionTable[3,]/(PositionTable[3,]*.01*Dmean))
# EPMtable[4,]<-(PositionTable[4,]/(PositionTable[4,]*.01*Emean))
# EPMtable[5,]<-(PositionTable[5,]/(PositionTable[5,]*.01*Fmean))
# EPMtable[6,]<-(PositionTable[6,]/(PositionTable[6,]*.01*Gmean))
# EPMtable[7,]<-(PositionTable[7,]/(PositionTable[7,]*.01*Hmean))
# EPMtable[8,]<-(PositionTable[8,]/(PositionTable[8,]*.01*Imean))
# EPMtable[9,]<-(PositionTable[9,]/(PositionTable[9,]*.01*Kmean))
# EPMtable[10,]<-(PositionTable[10,]/(PositionTable[10,]*.01*Lmean))
# EPMtable[11,]<-(PositionTable[11,]/(PositionTable[11,]*.01*Mmean))
# EPMtable[12,]<-(PositionTable[12,]/(PositionTable[12,]*.01*Nmean))
# EPMtable[13,]<-(PositionTable[13,]/(PositionTable[13,]*.01*Pmean))
# EPMtable[14,]<-(PositionTable[14,]/(PositionTable[14,]*.01*Qmean))
# EPMtable[15,]<-(PositionTable[15,]/(PositionTable[15,]*.01*Rmean))
# EPMtable[16,]<-(PositionTable[16,]/(PositionTable[16,]*.01*Smean))
# EPMtable[17,]<-(PositionTable[17,]/(PositionTable[17,]*.01*Tmean))
# EPMtable[18,]<-(PositionTable[18,]/(PositionTable[18,]*.01*Vmean))
# EPMtable[19,]<-(PositionTable[19,]/(PositionTable[19,]*.01*Wmean))
# EPMtable[20,]<-(PositionTable[20,]/(PositionTable[20,]*.01*Ymean))

columns<-c(length(Column1)-sum(Column1==""),
           length(Column2)-sum(Column2==""),
           length(Column3)-sum(Column3==""),
           length(Column4)-sum(Column4==""),
           length(Column5)-sum(Column5==""),
           length(Column6)-sum(Column6==""),
           length(Column7)-sum(Column7==""),
           length(Column8)-sum(Column8==""),
           length(Column9)-sum(Column9==""),
           length(Column10)-sum(Column10==""),
           length(Column11)-sum(Column11==""),
           length(Column12)-sum(Column12==""),
           length(Column13)-sum(Column13==""),
           length(Column14)-sum(Column14==""),
           length(Column15)-sum(Column15==""))

for (z in 1:15) {
  for (y in 1:20) {
    if (PositionTable[y,z]>0){
      EPMtable[y,z]<-PositionTable[y,z]/((columns[z]*.01*AllMeans[y]))
    }
    if (PositionTable[y,z]==0){
      EPMtable[y,z]<-(1/columns[z])/((columns[z]*.01*AllMeans[y]))
    }
  }
}
#here I created the endogenous probability matrix
#now all I need to do is make the program automatically determine which SDs are >2, and then make it perform screener and sorter on those SDs





# write.xlsx(SDtable,file=FILENAME, sheetName = "Standard Deviation Table",col.names = FALSE,row.names = FALSE,append = TRUE)
# write.xlsx(PercentTable,file = FILENAME,sheetName = "Percent Table",col.names = FALSE,row.names = FALSE,append = TRUE)
# write.xlsx(SelectivitySheet,file = FILENAME,sheetName = "Site Selectivity",col.names = FALSE,row.names = FALSE,append = TRUE)
# write.xlsx(EPMtable,file=FILENAME,sheetName = "Endogenous Probability Matrix",col.names = FALSE,row.names = FALSE,append = TRUE)
# write.xlsx(NormalizationScore,file = FILENAME,sheetName = "Normalization Score",col.names = FALSE,row.names = FALSE,append = TRUE)

NormalizationScore<-c("Normalization Score",NormalizationScore)

write.table(x=c("SD Table"),file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)
write.table(SDtable,file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)
write.table(x=c("Percent Table"),file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)
write.table(PercentTable,file=FILENAME, append = TRUE,sep=",",row.names = FALSE, col.names = FALSE)

EPMtableu<-EPMtable
HeaderSD<-c(-7:7)
EPMtableu<-rbind(HeaderSD,EPMtableu)
row.names(EPMtableu)<-NULL
EPMtableu<-data.frame(SetOfAAs,EPMtableu)

write.table("Site Selectivity Matrix", file = FILENAME2, append = TRUE, sep = ",", row.names = FALSE, col.names = FALSE)
SelectivityHeader=matrix(data = c("Position",-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7),nrow = 1)
head<-matrix(data=rep(" ",times=16),nrow = 1)
SelectivityHeader<-rbind(head,SelectivityHeader)

write.table(SelectivityHeader, file = FILENAME2, append = TRUE, sep = ",", row.names = FALSE, col.names = FALSE)
#colnames(SelectivitySheet)<-c("-7","-6","-5","-4","-3","-2","-1","0","1","2","3","4","5","6","7")
write.table(SelectivitySheet,file = FILENAME2, append = TRUE,sep = ",",row.names = TRUE, col.names = FALSE)
write.table(x=c("Endogenous Probability Matrix"),file=FILENAME2,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)
write.table(EPMtableu,file = FILENAME2, append = TRUE,sep = ",",row.names = FALSE, col.names = FALSE)
write.table(NormalizationScore, file = FILENAME2, append = TRUE,sep = ",",row.names = FALSE, col.names = FALSE)