changeset 11:a36f9cce16a3 draft

Deleted selected files
author jfb
date Thu, 08 Feb 2018 14:51:12 -0500
parents de59605e960a
children cf93d3535a31
files Kinatest-R_part1.R
diffstat 1 files changed, 0 insertions(+), 1114 deletions(-) [+]
line wrap: on
line diff
--- a/Kinatest-R_part1.R	Thu Feb 08 14:51:06 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1114 +0,0 @@
-
-ImportedSubstrateList<- read.csv("input1", stringsAsFactors=FALSE)
-NegativeSubstrateList<- read.csv("input2", stringsAsFactors=FALSE)
-SubstrateBackgroundFrequency<- read.csv("input3", stringsAsFactors=FALSE)
-
-ScreenerFilename<-"screener"
-
-
-
-FILENAME<-"output1.csv"
-FILENAME2<-"output2.csv"
-FILENAME3<-"output3.csv"
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-OutputMatrix<-"KinaseMatrix.csv"
-CharacterizationTable<-"CharacterizationTableForThisKinase.csv"
-SDtable<-"SDtableforthisKinase"
-SiteSelectivityTable<-"SiteSelectivityForThisKinase"
-
-
-
-substrates<-matrix(rep("A",times=((nrow(ImportedSubstrateList)-1)*15)),ncol = 15)
-#SeqsToBeScored<-"asdasd"
-  
-for (i in 2:nrow(ImportedSubstrateList))
-{
-  substratemotif<-ImportedSubstrateList[i,4:18]
-  substratemotif[8]<-"Y"
-  #substratemotif<-paste(substratemotif,sep = "",collapse = "")
-  j=i-1
-  substratemotif<-unlist(substratemotif)
-  substrates[j,1:15]<-substratemotif
-}
-
-# SpacesToOs<-c(""="O",)
-# substrates<-SpacesToOs[substrates]
-
-SubstrateBackgroundFrequency[nrow(SubstrateBackgroundFrequency),2]
-
-if(2==2){
-Amean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),2]), na.rm=TRUE)
-Cmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),3]), na.rm=TRUE)
-Dmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),4]), na.rm=TRUE)
-Emean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),5]), na.rm=TRUE)
-Fmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),6]), na.rm=TRUE)
-Gmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),7]), na.rm=TRUE)
-Hmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),8]), na.rm=TRUE)
-Imean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),9]), na.rm=TRUE)
-Kmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),10]), na.rm=TRUE)
-Lmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),11]), na.rm=TRUE)
-Mmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),12]), na.rm=TRUE)
-Nmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),13]), na.rm=TRUE)
-Pmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),14]), na.rm=TRUE)
-Qmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),15]), na.rm=TRUE)
-Rmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),16]), na.rm=TRUE)
-Smean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),17]), na.rm=TRUE)
-Tmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),18]), na.rm=TRUE)
-Vmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),19]), na.rm=TRUE)
-Wmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),20]), na.rm=TRUE)
-Ymean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),21]), na.rm=TRUE)
-
-AllMeans<-c(Amean,Cmean,Dmean,Emean,Fmean,Gmean,Hmean,Imean,Kmean,Lmean,Mmean,Nmean,Pmean,Qmean,Rmean,Smean,Tmean,Vmean,Wmean,Ymean)
-
-Asd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),2]), na.rm=TRUE)
-Csd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),3]), na.rm=TRUE)
-Dsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),4]), na.rm=TRUE)
-Esd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),5]), na.rm=TRUE)
-Fsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),6]), na.rm=TRUE)
-Gsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),7]), na.rm=TRUE)
-Hsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),8]), na.rm=TRUE)
-Isd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),9]), na.rm=TRUE)
-Ksd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),10]), na.rm=TRUE)
-Lsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),11]), na.rm=TRUE)
-Msd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),12]), na.rm=TRUE)
-Nsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),13]), na.rm=TRUE)
-Psd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),14]), na.rm=TRUE)
-Qsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),15]), na.rm=TRUE)
-Rsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),16]), na.rm=TRUE)
-Ssd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),17]), na.rm=TRUE)
-Tsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),18]), na.rm=TRUE)
-Vsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),19]), na.rm=TRUE)
-Wsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),20]), na.rm=TRUE)
-Ysd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),21]), na.rm=TRUE)
-}
-AllSDs<-c(Asd,Csd,Dsd,Esd,Fsd,Gsd,Hsd,Isd,Ksd,Lsd,Msd,Nsd,Psd,Qsd,Rsd,Ssd,Tsd,Vsd,Wsd,Ysd)
-#this is subbackfreq SDs
-
-SBF_statisticalvalues<-cbind(AllMeans,AllSDs)
-
-#create the percent table
-if (1==1){
-  Column1<-substrates[,1]
-  Column2<-substrates[,2]
-  Column3<-substrates[,3]
-  Column4<-substrates[,4]
-  Column5<-substrates[,5]
-  Column6<-substrates[,6]
-  Column7<-substrates[,7]
-  Column8<-substrates[,8]
-  Column9<-substrates[,9]
-  Column10<-substrates[,10]
-  Column11<-substrates[,11]
-  Column12<-substrates[,12]
-  Column13<-substrates[,13]
-  Column14<-substrates[,14]
-  Column15<-substrates[,15]
-  
-  spaces1<-sum((Column1%in% ""))
-  spaces2<-sum(Column2%in% "")
-  spaces3<-sum(Column3%in% "")
-  spaces4<-sum(Column4%in% "")
-  spaces5<-sum(Column5%in% "")
-  spaces6<-sum(Column6%in% "")
-  spaces7<-sum(Column7%in% "")
-  spaces8<-sum(Column8%in% "")
-  spaces9<-sum(Column9%in% "")
-  spaces10<-sum(Column10%in% "")
-  spaces11<-sum(Column11%in% "")
-  spaces12<-sum(Column12%in% "")
-  spaces13<-sum(Column13%in% "")
-  spaces14<-sum(Column14%in% "")
-  spaces15<-sum(Column15%in% "")
-  
-  A1<-sum(Column1 %in% "A")/(length(Column1)-spaces1)
-  A2<-sum(Column2 %in% "A")/(length(Column2)-spaces2)
-  A3<-sum(Column3 %in% "A")/(length(Column3)-spaces3)
-  A4<-sum(Column4 %in% "A")/(length(Column4)-spaces4)
-  A5<-sum(Column5 %in% "A")/(length(Column5)-spaces5)
-  A6<-sum(Column6 %in% "A")/(length(Column6)-spaces6)
-  A7<-sum(Column7 %in% "A")/(length(Column7)-spaces7)
-  A8<-sum(Column8 %in% "A")/(length(Column8)-spaces8)
-  A9<-sum(Column9 %in% "A")/(length(Column9)-spaces9)
-  A10<-sum(Column10 %in% "A")/(length(Column10)-spaces10)
-  A11<-sum(Column11 %in% "A")/(length(Column11)-spaces11)
-  A12<-sum(Column12 %in% "A")/(length(Column12)-spaces12)
-  A13<-sum(Column13 %in% "A")/(length(Column13)-spaces13)
-  A14<-sum(Column14 %in% "A")/(length(Column14)-spaces14)
-  A15<-sum(Column15 %in% "A")/(length(Column15)-spaces15)
-  AllAs<-cbind(A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15)
-  
-  C1<-sum(Column1 %in% "C")/(length(Column1)-spaces1)
-  C2<-sum(Column2 %in% "C")/(length(Column2)-spaces2)
-  C3<-sum(Column3 %in% "C")/(length(Column3)-spaces3)
-  C4<-sum(Column4 %in% "C")/(length(Column4)-spaces4)
-  C5<-sum(Column5 %in% "C")/(length(Column5)-spaces5)
-  C6<-sum(Column6 %in% "C")/(length(Column6)-spaces6)
-  C7<-sum(Column7 %in% "C")/(length(Column7)-spaces7)
-  C8<-sum(Column8 %in% "C")/(length(Column8)-spaces8)
-  C9<-sum(Column9 %in% "C")/(length(Column9)-spaces9)
-  C10<-sum(Column10 %in% "C")/(length(Column10)-spaces10)
-  C11<-sum(Column11 %in% "C")/(length(Column11)-spaces11)
-  C12<-sum(Column12 %in% "C")/(length(Column12)-spaces12)
-  C13<-sum(Column13 %in% "C")/(length(Column13)-spaces13)
-  C14<-sum(Column14 %in% "C")/(length(Column14)-spaces14)
-  C15<-sum(Column15 %in% "C")/(length(Column15)-spaces15)
-  CllCs<-cbind(C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15)
-  
-  D1<-sum(Column1 %in% "D")/(length(Column1)-spaces1)
-  D2<-sum(Column2 %in% "D")/(length(Column2)-spaces2)
-  D3<-sum(Column3 %in% "D")/(length(Column3)-spaces3)
-  D4<-sum(Column4 %in% "D")/(length(Column4)-spaces4)
-  D5<-sum(Column5 %in% "D")/(length(Column5)-spaces5)
-  D6<-sum(Column6 %in% "D")/(length(Column6)-spaces6)
-  D7<-sum(Column7 %in% "D")/(length(Column7)-spaces7)
-  D8<-sum(Column8 %in% "D")/(length(Column8)-spaces8)
-  D9<-sum(Column9 %in% "D")/(length(Column9)-spaces9)
-  D10<-sum(Column10 %in% "D")/(length(Column10)-spaces10)
-  D11<-sum(Column11 %in% "D")/(length(Column11)-spaces11)
-  D12<-sum(Column12 %in% "D")/(length(Column12)-spaces12)
-  D13<-sum(Column13 %in% "D")/(length(Column13)-spaces13)
-  D14<-sum(Column14 %in% "D")/(length(Column14)-spaces14)
-  D15<-sum(Column15 %in% "D")/(length(Column15)-spaces15)
-  DllDs<-cbind(D1,D2,D3,D4,D5,D6,D7,D8,D9,D10,D11,D12,D13,D14,D15)
-  
-  E1<-sum(Column1 %in% "E")/(length(Column1)-spaces1)
-  E2<-sum(Column2 %in% "E")/(length(Column2)-spaces2)
-  E3<-sum(Column3 %in% "E")/(length(Column3)-spaces3)
-  E4<-sum(Column4 %in% "E")/(length(Column4)-spaces4)
-  E5<-sum(Column5 %in% "E")/(length(Column5)-spaces5)
-  E6<-sum(Column6 %in% "E")/(length(Column6)-spaces6)
-  E7<-sum(Column7 %in% "E")/(length(Column7)-spaces7)
-  E8<-sum(Column8 %in% "E")/(length(Column8)-spaces8)
-  E9<-sum(Column9 %in% "E")/(length(Column9)-spaces9)
-  E10<-sum(Column10 %in% "E")/(length(Column10)-spaces10)
-  E11<-sum(Column11 %in% "E")/(length(Column11)-spaces11)
-  E12<-sum(Column12 %in% "E")/(length(Column12)-spaces12)
-  E13<-sum(Column13 %in% "E")/(length(Column13)-spaces13)
-  E14<-sum(Column14 %in% "E")/(length(Column14)-spaces14)
-  E15<-sum(Column15 %in% "E")/(length(Column15)-spaces15)
-  EllEs<-cbind(E1,E2,E3,E4,E5,E6,E7,E8,E9,E10,E11,E12,E13,E14,E15)
-  
-  
-  F1<-sum(Column1 %in% "F")/(length(Column1)-spaces1)
-  F2<-sum(Column2 %in% "F")/(length(Column2)-spaces2)
-  F3<-sum(Column3 %in% "F")/(length(Column3)-spaces3)
-  F4<-sum(Column4 %in% "F")/(length(Column4)-spaces4)
-  F5<-sum(Column5 %in% "F")/(length(Column5)-spaces5)
-  F6<-sum(Column6 %in% "F")/(length(Column6)-spaces6)
-  F7<-sum(Column7 %in% "F")/(length(Column7)-spaces7)
-  F8<-sum(Column8 %in% "F")/(length(Column8)-spaces8)
-  F9<-sum(Column9 %in% "F")/(length(Column9)-spaces9)
-  F10<-sum(Column10 %in% "F")/(length(Column10)-spaces10)
-  F11<-sum(Column11 %in% "F")/(length(Column11)-spaces11)
-  F12<-sum(Column12 %in% "F")/(length(Column12)-spaces12)
-  F13<-sum(Column13 %in% "F")/(length(Column13)-spaces13)
-  F14<-sum(Column14 %in% "F")/(length(Column14)-spaces14)
-  F15<-sum(Column15 %in% "F")/(length(Column15)-spaces15)
-  FllFs<-cbind(F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15)
-  
-  
-  G1<-sum(Column1 %in% "G")/(length(Column1)-spaces1)
-  G2<-sum(Column2 %in% "G")/(length(Column2)-spaces2)
-  G3<-sum(Column3 %in% "G")/(length(Column3)-spaces3)
-  G4<-sum(Column4 %in% "G")/(length(Column4)-spaces4)
-  G5<-sum(Column5 %in% "G")/(length(Column5)-spaces5)
-  G6<-sum(Column6 %in% "G")/(length(Column6)-spaces6)
-  G7<-sum(Column7 %in% "G")/(length(Column7)-spaces7)
-  G8<-sum(Column8 %in% "G")/(length(Column8)-spaces8)
-  G9<-sum(Column9 %in% "G")/(length(Column9)-spaces9)
-  G10<-sum(Column10 %in% "G")/(length(Column10)-spaces10)
-  G11<-sum(Column11 %in% "G")/(length(Column11)-spaces11)
-  G12<-sum(Column12 %in% "G")/(length(Column12)-spaces12)
-  G13<-sum(Column13 %in% "G")/(length(Column13)-spaces13)
-  G14<-sum(Column14 %in% "G")/(length(Column14)-spaces14)
-  G15<-sum(Column15 %in% "G")/(length(Column15)-spaces15)
-  GllGs<-cbind(G1,G2,G3,G4,G5,G6,G7,G8,G9,G10,G11,G12,G13,G14,G15)
-  
-  
-  H1<-sum(Column1 %in% "H")/(length(Column1)-spaces1)
-  H2<-sum(Column2 %in% "H")/(length(Column2)-spaces2)
-  H3<-sum(Column3 %in% "H")/(length(Column3)-spaces3)
-  H4<-sum(Column4 %in% "H")/(length(Column4)-spaces4)
-  H5<-sum(Column5 %in% "H")/(length(Column5)-spaces5)
-  H6<-sum(Column6 %in% "H")/(length(Column6)-spaces6)
-  H7<-sum(Column7 %in% "H")/(length(Column7)-spaces7)
-  H8<-sum(Column8 %in% "H")/(length(Column8)-spaces8)
-  H9<-sum(Column9 %in% "H")/(length(Column9)-spaces9)
-  H10<-sum(Column10 %in% "H")/(length(Column10)-spaces10)
-  H11<-sum(Column11 %in% "H")/(length(Column11)-spaces11)
-  H12<-sum(Column12 %in% "H")/(length(Column12)-spaces12)
-  H13<-sum(Column13 %in% "H")/(length(Column13)-spaces13)
-  H14<-sum(Column14 %in% "H")/(length(Column14)-spaces14)
-  H15<-sum(Column15 %in% "H")/(length(Column15)-spaces15)
-  HllHs<-cbind(H1,H2,H3,H4,H5,H6,H7,H8,H9,H10,H11,H12,H13,H14,H15)
-  
-  
-  I1<-sum(Column1 %in% "I")/(length(Column1)-spaces1)
-  I2<-sum(Column2 %in% "I")/(length(Column2)-spaces2)
-  I3<-sum(Column3 %in% "I")/(length(Column3)-spaces3)
-  I4<-sum(Column4 %in% "I")/(length(Column4)-spaces4)
-  I5<-sum(Column5 %in% "I")/(length(Column5)-spaces5)
-  I6<-sum(Column6 %in% "I")/(length(Column6)-spaces6)
-  I7<-sum(Column7 %in% "I")/(length(Column7)-spaces7)
-  I8<-sum(Column8 %in% "I")/(length(Column8)-spaces8)
-  I9<-sum(Column9 %in% "I")/(length(Column9)-spaces9)
-  I10<-sum(Column10 %in% "I")/(length(Column10)-spaces10)
-  I11<-sum(Column11 %in% "I")/(length(Column11)-spaces11)
-  I12<-sum(Column12 %in% "I")/(length(Column12)-spaces12)
-  I13<-sum(Column13 %in% "I")/(length(Column13)-spaces13)
-  I14<-sum(Column14 %in% "I")/(length(Column14)-spaces14)
-  I15<-sum(Column15 %in% "I")/(length(Column15)-spaces15)
-  IllIs<-cbind(I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,I11,I12,I13,I14,I15)
-  
-  
-  K1<-sum(Column1 %in% "K")/(length(Column1)-spaces1)
-  K2<-sum(Column2 %in% "K")/(length(Column2)-spaces2)
-  K3<-sum(Column3 %in% "K")/(length(Column3)-spaces3)
-  K4<-sum(Column4 %in% "K")/(length(Column4)-spaces4)
-  K5<-sum(Column5 %in% "K")/(length(Column5)-spaces5)
-  K6<-sum(Column6 %in% "K")/(length(Column6)-spaces6)
-  K7<-sum(Column7 %in% "K")/(length(Column7)-spaces7)
-  K8<-sum(Column8 %in% "K")/(length(Column8)-spaces8)
-  K9<-sum(Column9 %in% "K")/(length(Column9)-spaces9)
-  K10<-sum(Column10 %in% "K")/(length(Column10)-spaces10)
-  K11<-sum(Column11 %in% "K")/(length(Column11)-spaces11)
-  K12<-sum(Column12 %in% "K")/(length(Column12)-spaces12)
-  K13<-sum(Column13 %in% "K")/(length(Column13)-spaces13)
-  K14<-sum(Column14 %in% "K")/(length(Column14)-spaces14)
-  K15<-sum(Column15 %in% "K")/(length(Column15)-spaces15)
-  KllKs<-cbind(K1,K2,K3,K4,K5,K6,K7,K8,K9,K10,K11,K12,K13,K14,K15)
-  
-  
-  L1<-sum(Column1 %in% "L")/(length(Column1)-spaces1)
-  L2<-sum(Column2 %in% "L")/(length(Column2)-spaces2)
-  L3<-sum(Column3 %in% "L")/(length(Column3)-spaces3)
-  L4<-sum(Column4 %in% "L")/(length(Column4)-spaces4)
-  L5<-sum(Column5 %in% "L")/(length(Column5)-spaces5)
-  L6<-sum(Column6 %in% "L")/(length(Column6)-spaces6)
-  L7<-sum(Column7 %in% "L")/(length(Column7)-spaces7)
-  L8<-sum(Column8 %in% "L")/(length(Column8)-spaces8)
-  L9<-sum(Column9 %in% "L")/(length(Column9)-spaces9)
-  L10<-sum(Column10 %in% "L")/(length(Column10)-spaces10)
-  L11<-sum(Column11 %in% "L")/(length(Column11)-spaces11)
-  L12<-sum(Column12 %in% "L")/(length(Column12)-spaces12)
-  L13<-sum(Column13 %in% "L")/(length(Column13)-spaces13)
-  L14<-sum(Column14 %in% "L")/(length(Column14)-spaces14)
-  L15<-sum(Column15 %in% "L")/(length(Column15)-spaces15)
-  LllLs<-cbind(L1,L2,L3,L4,L5,L6,L7,L8,L9,L10,L11,L12,L13,L14,L15)
-  
-  
-  M1<-sum(Column1 %in% "M")/(length(Column1)-spaces1)
-  M2<-sum(Column2 %in% "M")/(length(Column2)-spaces2)
-  M3<-sum(Column3 %in% "M")/(length(Column3)-spaces3)
-  M4<-sum(Column4 %in% "M")/(length(Column4)-spaces4)
-  M5<-sum(Column5 %in% "M")/(length(Column5)-spaces5)
-  M6<-sum(Column6 %in% "M")/(length(Column6)-spaces6)
-  M7<-sum(Column7 %in% "M")/(length(Column7)-spaces7)
-  M8<-sum(Column8 %in% "M")/(length(Column8)-spaces8)
-  M9<-sum(Column9 %in% "M")/(length(Column9)-spaces9)
-  M10<-sum(Column10 %in% "M")/(length(Column10)-spaces10)
-  M11<-sum(Column11 %in% "M")/(length(Column11)-spaces11)
-  M12<-sum(Column12 %in% "M")/(length(Column12)-spaces12)
-  M13<-sum(Column13 %in% "M")/(length(Column13)-spaces13)
-  M14<-sum(Column14 %in% "M")/(length(Column14)-spaces14)
-  M15<-sum(Column15 %in% "M")/(length(Column15)-spaces15)
-  MllMs<-cbind(M1,M2,M3,M4,M5,M6,M7,M8,M9,M10,M11,M12,M13,M14,M15)
-  
-  
-  N1<-sum(Column1 %in% "N")/(length(Column1)-spaces1)
-  N2<-sum(Column2 %in% "N")/(length(Column2)-spaces2)
-  N3<-sum(Column3 %in% "N")/(length(Column3)-spaces3)
-  N4<-sum(Column4 %in% "N")/(length(Column4)-spaces4)
-  N5<-sum(Column5 %in% "N")/(length(Column5)-spaces5)
-  N6<-sum(Column6 %in% "N")/(length(Column6)-spaces6)
-  N7<-sum(Column7 %in% "N")/(length(Column7)-spaces7)
-  N8<-sum(Column8 %in% "N")/(length(Column8)-spaces8)
-  N9<-sum(Column9 %in% "N")/(length(Column9)-spaces9)
-  N10<-sum(Column10 %in% "N")/(length(Column10)-spaces10)
-  N11<-sum(Column11 %in% "N")/(length(Column11)-spaces11)
-  N12<-sum(Column12 %in% "N")/(length(Column12)-spaces12)
-  N13<-sum(Column13 %in% "N")/(length(Column13)-spaces13)
-  N14<-sum(Column14 %in% "N")/(length(Column14)-spaces14)
-  N15<-sum(Column15 %in% "N")/(length(Column15)-spaces15)
-  NllNs<-cbind(N1,N2,N3,N4,N5,N6,N7,N8,N9,N10,N11,N12,N13,N14,N15)
-  
-  
-  P1<-sum(Column1 %in% "P")/(length(Column1)-spaces1)
-  P2<-sum(Column2 %in% "P")/(length(Column2)-spaces2)
-  P3<-sum(Column3 %in% "P")/(length(Column3)-spaces3)
-  P4<-sum(Column4 %in% "P")/(length(Column4)-spaces4)
-  P5<-sum(Column5 %in% "P")/(length(Column5)-spaces5)
-  P6<-sum(Column6 %in% "P")/(length(Column6)-spaces6)
-  P7<-sum(Column7 %in% "P")/(length(Column7)-spaces7)
-  P8<-sum(Column8 %in% "P")/(length(Column8)-spaces8)
-  P9<-sum(Column9 %in% "P")/(length(Column9)-spaces9)
-  P10<-sum(Column10 %in% "P")/(length(Column10)-spaces10)
-  P11<-sum(Column11 %in% "P")/(length(Column11)-spaces11)
-  P12<-sum(Column12 %in% "P")/(length(Column12)-spaces12)
-  P13<-sum(Column13 %in% "P")/(length(Column13)-spaces13)
-  P14<-sum(Column14 %in% "P")/(length(Column14)-spaces14)
-  P15<-sum(Column15 %in% "P")/(length(Column15)-spaces15)
-  PllPs<-cbind(P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11,P12,P13,P14,P15)
-  
-  
-  Q1<-sum(Column1 %in% "Q")/(length(Column1)-spaces1)
-  Q2<-sum(Column2 %in% "Q")/(length(Column2)-spaces2)
-  Q3<-sum(Column3 %in% "Q")/(length(Column3)-spaces3)
-  Q4<-sum(Column4 %in% "Q")/(length(Column4)-spaces4)
-  Q5<-sum(Column5 %in% "Q")/(length(Column5)-spaces5)
-  Q6<-sum(Column6 %in% "Q")/(length(Column6)-spaces6)
-  Q7<-sum(Column7 %in% "Q")/(length(Column7)-spaces7)
-  Q8<-sum(Column8 %in% "Q")/(length(Column8)-spaces8)
-  Q9<-sum(Column9 %in% "Q")/(length(Column9)-spaces9)
-  Q10<-sum(Column10 %in% "Q")/(length(Column10)-spaces10)
-  Q11<-sum(Column11 %in% "Q")/(length(Column11)-spaces11)
-  Q12<-sum(Column12 %in% "Q")/(length(Column12)-spaces12)
-  Q13<-sum(Column13 %in% "Q")/(length(Column13)-spaces13)
-  Q14<-sum(Column14 %in% "Q")/(length(Column14)-spaces14)
-  Q15<-sum(Column15 %in% "Q")/(length(Column15)-spaces15)
-  QllQs<-cbind(Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,Q11,Q12,Q13,Q14,Q15)
-  
-  
-  R1<-sum(Column1 %in% "R")/(length(Column1)-spaces1)
-  R2<-sum(Column2 %in% "R")/(length(Column2)-spaces2)
-  R3<-sum(Column3 %in% "R")/(length(Column3)-spaces3)
-  R4<-sum(Column4 %in% "R")/(length(Column4)-spaces4)
-  R5<-sum(Column5 %in% "R")/(length(Column5)-spaces5)
-  R6<-sum(Column6 %in% "R")/(length(Column6)-spaces6)
-  R7<-sum(Column7 %in% "R")/(length(Column7)-spaces7)
-  R8<-sum(Column8 %in% "R")/(length(Column8)-spaces8)
-  R9<-sum(Column9 %in% "R")/(length(Column9)-spaces9)
-  R10<-sum(Column10 %in% "R")/(length(Column10)-spaces10)
-  R11<-sum(Column11 %in% "R")/(length(Column11)-spaces11)
-  R12<-sum(Column12 %in% "R")/(length(Column12)-spaces12)
-  R13<-sum(Column13 %in% "R")/(length(Column13)-spaces13)
-  R14<-sum(Column14 %in% "R")/(length(Column14)-spaces14)
-  R15<-sum(Column15 %in% "R")/(length(Column15)-spaces15)
-  RllRs<-cbind(R1,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,R13,R14,R15)
-  
-  
-  S1<-sum(Column1 %in% "S")/(length(Column1)-spaces1)
-  S2<-sum(Column2 %in% "S")/(length(Column2)-spaces2)
-  S3<-sum(Column3 %in% "S")/(length(Column3)-spaces3)
-  S4<-sum(Column4 %in% "S")/(length(Column4)-spaces4)
-  S5<-sum(Column5 %in% "S")/(length(Column5)-spaces5)
-  S6<-sum(Column6 %in% "S")/(length(Column6)-spaces6)
-  S7<-sum(Column7 %in% "S")/(length(Column7)-spaces7)
-  S8<-sum(Column8 %in% "S")/(length(Column8)-spaces8)
-  S9<-sum(Column9 %in% "S")/(length(Column9)-spaces9)
-  S10<-sum(Column10 %in% "S")/(length(Column10)-spaces10)
-  S11<-sum(Column11 %in% "S")/(length(Column11)-spaces11)
-  S12<-sum(Column12 %in% "S")/(length(Column12)-spaces12)
-  S13<-sum(Column13 %in% "S")/(length(Column13)-spaces13)
-  S14<-sum(Column14 %in% "S")/(length(Column14)-spaces14)
-  S15<-sum(Column15 %in% "S")/(length(Column15)-spaces15)
-  SllSs<-cbind(S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,S14,S15)
-  
-  
-  T1<-sum(Column1 %in% "T")/(length(Column1)-spaces1)
-  T2<-sum(Column2 %in% "T")/(length(Column2)-spaces2)
-  T3<-sum(Column3 %in% "T")/(length(Column3)-spaces3)
-  T4<-sum(Column4 %in% "T")/(length(Column4)-spaces4)
-  T5<-sum(Column5 %in% "T")/(length(Column5)-spaces5)
-  T6<-sum(Column6 %in% "T")/(length(Column6)-spaces6)
-  T7<-sum(Column7 %in% "T")/(length(Column7)-spaces7)
-  T8<-sum(Column8 %in% "T")/(length(Column8)-spaces8)
-  T9<-sum(Column9 %in% "T")/(length(Column9)-spaces9)
-  T10<-sum(Column10 %in% "T")/(length(Column10)-spaces10)
-  T11<-sum(Column11 %in% "T")/(length(Column11)-spaces11)
-  T12<-sum(Column12 %in% "T")/(length(Column12)-spaces12)
-  T13<-sum(Column13 %in% "T")/(length(Column13)-spaces13)
-  T14<-sum(Column14 %in% "T")/(length(Column14)-spaces14)
-  T15<-sum(Column15 %in% "T")/(length(Column15)-spaces15)
-  TllTs<-cbind(T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15)
-  
-  
-  V1<-sum(Column1 %in% "V")/(length(Column1)-spaces1)
-  V2<-sum(Column2 %in% "V")/(length(Column2)-spaces2)
-  V3<-sum(Column3 %in% "V")/(length(Column3)-spaces3)
-  V4<-sum(Column4 %in% "V")/(length(Column4)-spaces4)
-  V5<-sum(Column5 %in% "V")/(length(Column5)-spaces5)
-  V6<-sum(Column6 %in% "V")/(length(Column6)-spaces6)
-  V7<-sum(Column7 %in% "V")/(length(Column7)-spaces7)
-  V8<-sum(Column8 %in% "V")/(length(Column8)-spaces8)
-  V9<-sum(Column9 %in% "V")/(length(Column9)-spaces9)
-  V10<-sum(Column10 %in% "V")/(length(Column10)-spaces10)
-  V11<-sum(Column11 %in% "V")/(length(Column11)-spaces11)
-  V12<-sum(Column12 %in% "V")/(length(Column12)-spaces12)
-  V13<-sum(Column13 %in% "V")/(length(Column13)-spaces13)
-  V14<-sum(Column14 %in% "V")/(length(Column14)-spaces14)
-  V15<-sum(Column15 %in% "V")/(length(Column15)-spaces15)
-  VllVs<-cbind(V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15)
-  
-  
-  W1<-sum(Column1 %in% "W")/(length(Column1)-spaces1)
-  W2<-sum(Column2 %in% "W")/(length(Column2)-spaces2)
-  W3<-sum(Column3 %in% "W")/(length(Column3)-spaces3)
-  W4<-sum(Column4 %in% "W")/(length(Column4)-spaces4)
-  W5<-sum(Column5 %in% "W")/(length(Column5)-spaces5)
-  W6<-sum(Column6 %in% "W")/(length(Column6)-spaces6)
-  W7<-sum(Column7 %in% "W")/(length(Column7)-spaces7)
-  W8<-sum(Column8 %in% "W")/(length(Column8)-spaces8)
-  W9<-sum(Column9 %in% "W")/(length(Column9)-spaces9)
-  W10<-sum(Column10 %in% "W")/(length(Column10)-spaces10)
-  W11<-sum(Column11 %in% "W")/(length(Column11)-spaces11)
-  W12<-sum(Column12 %in% "W")/(length(Column12)-spaces12)
-  W13<-sum(Column13 %in% "W")/(length(Column13)-spaces13)
-  W14<-sum(Column14 %in% "W")/(length(Column14)-spaces14)
-  W15<-sum(Column15 %in% "W")/(length(Column15)-spaces15)
-  WllWs<-cbind(W1,W2,W3,W4,W5,W6,W7,W8,W9,W10,W11,W12,W13,W14,W15)
-  
-  
-  Y1<-sum(Column1 %in% "Y")/(length(Column1)-spaces1)
-  Y2<-sum(Column2 %in% "Y")/(length(Column2)-spaces2)
-  Y3<-sum(Column3 %in% "Y")/(length(Column3)-spaces3)
-  Y4<-sum(Column4 %in% "Y")/(length(Column4)-spaces4)
-  Y5<-sum(Column5 %in% "Y")/(length(Column5)-spaces5)
-  Y6<-sum(Column6 %in% "Y")/(length(Column6)-spaces6)
-  Y7<-sum(Column7 %in% "Y")/(length(Column7)-spaces7)
-  Y8<-sum(Column8 %in% "Y")/(length(Column8)-spaces8)
-  Y9<-sum(Column9 %in% "Y")/(length(Column9)-spaces9)
-  Y10<-sum(Column10 %in% "Y")/(length(Column10)-spaces10)
-  Y11<-sum(Column11 %in% "Y")/(length(Column11)-spaces11)
-  Y12<-sum(Column12 %in% "Y")/(length(Column12)-spaces12)
-  Y13<-sum(Column13 %in% "Y")/(length(Column13)-spaces13)
-  Y14<-sum(Column14 %in% "Y")/(length(Column14)-spaces14)
-  Y15<-sum(Column15 %in% "Y")/(length(Column15)-spaces15)
-  YllYs<-cbind(Y1,Y2,Y3,Y4,Y5,Y6,Y7,Y8,Y9,Y10,Y11,Y12,Y13,Y14,Y15)
-}
-#this is substrate percents
-
-#A C D E F G H I K L N P Q R S T V W Y
-
-PercentTable<-rbind(AllAs,CllCs,DllDs,EllEs,FllFs,GllGs,HllHs,IllIs,KllKs,LllLs,MllMs,NllNs,PllPs,QllQs,RllRs,SllSs,TllTs,VllVs,WllWs,YllYs)
-PercentTable<-PercentTable*100
-
-#create the SD table
-SDtable<-matrix(data = rep(1,times=(nrow(PercentTable)*ncol(PercentTable))),nrow = nrow(PercentTable),ncol = ncol(PercentTable))
-#for every row, a percertage minus the same mean over the same SD
-if(1==1){
-  SDtable[1,]<-(PercentTable[1,]-Amean)/Asd
-  SDtable[2,]<-(PercentTable[2,]-Cmean)/Csd
-  SDtable[3,]<-(PercentTable[3,]-Dmean)/Dsd
-  SDtable[4,]<-(PercentTable[4,]-Emean)/Esd
-  SDtable[5,]<-(PercentTable[5,]-Fmean)/Fsd
-  SDtable[6,]<-(PercentTable[6,]-Gmean)/Gsd
-  SDtable[7,]<-(PercentTable[7,]-Hmean)/Hsd
-  SDtable[8,]<-(PercentTable[8,]-Imean)/Isd
-  SDtable[9,]<-(PercentTable[9,]-Kmean)/Ksd
-  SDtable[10,]<-(PercentTable[10,]-Lmean)/Lsd
-  SDtable[11,]<-(PercentTable[11,]-Mmean)/Msd
-  SDtable[12,]<-(PercentTable[12,]-Nmean)/Nsd
-  SDtable[13,]<-(PercentTable[13,]-Pmean)/Psd
-  SDtable[14,]<-(PercentTable[14,]-Qmean)/Qsd
-  SDtable[15,]<-(PercentTable[15,]-Rmean)/Rsd
-  SDtable[16,]<-(PercentTable[16,]-Smean)/Ssd
-  SDtable[17,]<-(PercentTable[17,]-Tmean)/Tsd
-  SDtable[18,]<-(PercentTable[18,]-Vmean)/Vsd
-  SDtable[19,]<-(PercentTable[19,]-Wmean)/Wsd
-  SDtable[20,]<-(PercentTable[20,]-Ymean)/Ysd
-}
-
-
-SetOfAAs<-c("Letter","A","C","D","E","F","G","H","I","K","L","M","N","P","Q","R","S","T","V","W","Y")
-
-
-SumOfSigmaAAs<-c(1:15)
-
-for (i in 1:15){
-  SumOfSigmasValue<-0
-  for (j in 1:20){
-        value<-0
-    if (SDtable[j,i]>2){
-      value<-sum(substrates[,i]==SetOfAAs[j])
-    }
-    SumOfSigmasValue<-SumOfSigmasValue+value
-  }
-  SumOfSigmaAAs[i]<-SumOfSigmasValue
-}
-
-# AAs1<-length(substrates[,1])-sum(substrates[,1]=="")
-# AAs2<-length(substrates[,2])-sum(substrates[,2]=="")
-# AAs3<-length(substrates[,3])-sum(substrates[,3]=="")
-# AAs4<-length(substrates[,4])-sum(substrates[,4]=="")
-# AAs5<-length(substrates[,5])-sum(substrates[,5]=="")
-# AAs6<-length(substrates[,6])-sum(substrates[,6]=="")
-# AAs7<-length(substrates[,7])-sum(substrates[,7]=="")
-# AAs8<-length(substrates[,8])-sum(substrates[,8]=="")
-# AAs9<-length(substrates[,9])-sum(substrates[,9]=="")
-# 
-# 
-# 
-# #AAsAtPositions<-c(AAs1,AAs2,AAs3,AAs4,AAs5,AAs6,AAs7,AAs8,AAs9)
-# AAsAtPositions<-c(length(substrates[,1]),length(substrates[,2]),length(substrates[,3]),length(substrates[,4]),
-#                   length(substrates[,5]),length(substrates[,6]),length(substrates[,7]),length(substrates[,8]),
-#                   length(substrates[,9]))
-
-SumOfExpectedSigmaAAs<-c(1:15)
-for (i in 1:15){
-  ExpectedValue<-0
-  for (j in 1:20){
-        value<-0
-    if (SDtable[j,i]>2){
-      value<-AllMeans[j]
-    }
-    ExpectedValue<-ExpectedValue+value
-  }
-  SumOfExpectedSigmaAAs[i]<-ExpectedValue*(length(substrates[,i])-sum(substrates[,i]%in% ""))/100
-}
-
-SelectivityRow<-SumOfSigmaAAs/SumOfExpectedSigmaAAs
-SelectivitySheet<-rbind(SumOfSigmaAAs,SumOfExpectedSigmaAAs,SelectivityRow)
-
-SetOfAAs<-matrix(data = SetOfAAs,ncol = 1)
-
-SDtableu<-SDtable
-HeaderSD<-c(-7:7)
-SDtable<-rbind(HeaderSD,SDtableu)
-row.names(SDtable)<-NULL
-SDtable<-data.frame(SetOfAAs,SDtable)
-
-PercentTable<-rbind(HeaderSD,PercentTable)
-row.names(PercentTable)<-NULL
-PercentTable<-data.frame(SetOfAAs,PercentTable)
-numberofY<-as.numeric(SubstrateBackgroundFrequency$Number.of.Y)
-numberofY<-numberofY[!is.na(numberofY)]
-
-numberofPY<-as.numeric(SubstrateBackgroundFrequency$Number.of.pY)
-numberofPY<-numberofPY[!is.na(numberofPY)]
-
-NormalizationScore<-sum(numberofPY)/sum(numberofY)
-
-# positions<-matrix(data = NA, nrow=20,ncol = 15)
-# 
-# #column1
-# 
-# for (q in 1:15) {
-#   sA<-sum(substrates[,i]=="A")
-#   positions[1,i]<-sA
-#   sC<-sum(substrates[,i]=="C")
-#   positions[2,i]<-sC
-#   sD<-sum(substrates[,i]=="D")
-#   positions[3,i]<-sD
-#   sE<-sum(substrates[,i]=="E")
-#   positions[4,i]<-sE
-#   sF<-sum(substrates[,i]=="F")
-#   sG<-sum(substrates[,i]=="G")
-#   sH<-sum(substrates[,i]=="H")
-#   sI<-sum(substrates[,i]=="I")
-#   sK<-sum(substrates[,i]=="K")
-#   sL<-sum(substrates[,i]=="L")
-#   sM<-sum(substrates[,i]=="M")
-#   sN<-sum(substrates[,i]=="N")
-#   sP<-sum(substrates[,i]=="P")
-#   sQ<-sum(substrates[,i]=="Q")
-#   sR<-sum(substrates[,i]=="R")
-#   sS<-sum(substrates[,i]=="S")
-#   sT<-sum(substrates[,i]=="T")
-#   sV<-sum(substrates[,i]=="V")
-#   sW<-sum(substrates[,i]=="W")
-#   sY<-sum(substrates[,i]=="Y")
-#   positions[5,i]<-sF
-#   positions[6,i]<-sG
-#   positions[7,i]<-sH
-#   positions[8,i]<-sI
-#   positions[9,i]<-sK
-#   positions[10,i]<-sL
-#   positions[11,i]<-sM
-#   positions[12,i]<-sN
-#   positions[13,i]<-sP
-#   positions[14,i]<-sQ
-#   positions[15,i]<-sR
-#   positions[16,i]<-sS
-#   positions[17,i]<-sT
-#   positions[18,i]<-sV
-#   positions[19,i]<-sW
-#   positions[20,i]<-sY
-# }
-
-#here I create the positions table which is needed for the endogenous prob matrix and is simply "how many times did an AA show up in this spot?
-if (6==6){
-  Column1<-substrates[,1]
-  Column2<-substrates[,2]
-  Column3<-substrates[,3]
-  Column4<-substrates[,4]
-  Column5<-substrates[,5]
-  Column6<-substrates[,6]
-  Column7<-substrates[,7]
-  Column8<-substrates[,8]
-  Column9<-substrates[,9]
-  Column10<-substrates[,10]
-  Column11<-substrates[,11]
-  Column12<-substrates[,12]
-  Column13<-substrates[,13]
-  Column14<-substrates[,14]
-  Column15<-substrates[,15]
-  
-  spaces1<-sum((Column1%in% ""))
-  spaces2<-sum(Column2%in% "")
-  spaces3<-sum(Column3%in% "")
-  spaces4<-sum(Column4%in% "")
-  spaces5<-sum(Column5%in% "")
-  spaces6<-sum(Column6%in% "")
-  spaces7<-sum(Column7%in% "")
-  spaces8<-sum(Column8%in% "")
-  spaces9<-sum(Column9%in% "")
-  spaces10<-sum(Column10%in% "")
-  spaces11<-sum(Column11%in% "")
-  spaces12<-sum(Column12%in% "")
-  spaces13<-sum(Column13%in% "")
-  spaces14<-sum(Column14%in% "")
-  spaces15<-sum(Column15%in% "")
-  
-  A1<-sum(Column1 %in% "A")
-  A2<-sum(Column2 %in% "A")
-  A3<-sum(Column3 %in% "A")
-  A4<-sum(Column4 %in% "A")
-  A5<-sum(Column5 %in% "A")
-  A6<-sum(Column6 %in% "A")
-  A7<-sum(Column7 %in% "A")
-  A8<-sum(Column8 %in% "A")
-  A9<-sum(Column9 %in% "A")
-  A10<-sum(Column10 %in% "A")
-  A11<-sum(Column11 %in% "A")
-  A12<-sum(Column12 %in% "A")
-  A13<-sum(Column13 %in% "A")
-  A14<-sum(Column14 %in% "A")
-  A15<-sum(Column15 %in% "A")
-  AllAs<-cbind(A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15)
-  
-  C1<-sum(Column1 %in% "C")
-  C2<-sum(Column2 %in% "C")
-  C3<-sum(Column3 %in% "C")
-  C4<-sum(Column4 %in% "C")
-  C5<-sum(Column5 %in% "C")
-  C6<-sum(Column6 %in% "C")
-  C7<-sum(Column7 %in% "C")
-  C8<-sum(Column8 %in% "C")
-  C9<-sum(Column9 %in% "C")  
-  C10<-sum(Column10 %in% "C")
-  C11<-sum(Column11 %in% "C")
-  C12<-sum(Column12 %in% "C")
-  C13<-sum(Column13 %in% "C")
-  C14<-sum(Column14 %in% "C")
-  C15<-sum(Column15 %in% "C")
-  CllCs<-cbind(C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15)
-  
-  D1<-sum(Column1 %in% "D")
-  D2<-sum(Column2 %in% "D")
-  D3<-sum(Column3 %in% "D")
-  D4<-sum(Column4 %in% "D")
-  D5<-sum(Column5 %in% "D")
-  D6<-sum(Column6 %in% "D")
-  D7<-sum(Column7 %in% "D")
-  D8<-sum(Column8 %in% "D")
-  D9<-sum(Column9 %in% "D")
-  D10<-sum(Column10 %in% "D")
-  D11<-sum(Column11 %in% "D")
-  D12<-sum(Column12 %in% "D")
-  D13<-sum(Column13 %in% "D")
-  D14<-sum(Column14 %in% "D")
-  D15<-sum(Column15 %in% "D")
-  DllDs<-cbind(D1,D2,D3,D4,D5,D6,D7,D8,D9,D10,D11,D12,D13,D14,D15)
-  
-  E1<-sum(Column1 %in% "E")
-  E2<-sum(Column2 %in% "E")
-  E3<-sum(Column3 %in% "E")
-  E4<-sum(Column4 %in% "E")
-  E5<-sum(Column5 %in% "E")
-  E6<-sum(Column6 %in% "E")
-  E7<-sum(Column7 %in% "E")
-  E8<-sum(Column8 %in% "E")
-  E9<-sum(Column9 %in% "E")
-  E10<-sum(Column10 %in% "E")
-  E11<-sum(Column11 %in% "E")
-  E12<-sum(Column12 %in% "E")
-  E13<-sum(Column13 %in% "E")
-  E14<-sum(Column14 %in% "E")
-  E15<-sum(Column15 %in% "E")
-  EllEs<-cbind(E1,E2,E3,E4,E5,E6,E7,E8,E9,E10,E11,E12,E13,E14,E15)
-  
-  F1<-sum(Column1 %in% "F")
-  F2<-sum(Column2 %in% "F")
-  F3<-sum(Column3 %in% "F")
-  F4<-sum(Column4 %in% "F")
-  F5<-sum(Column5 %in% "F")
-  F6<-sum(Column6 %in% "F")
-  F7<-sum(Column7 %in% "F")
-  F8<-sum(Column8 %in% "F")
-  F9<-sum(Column9 %in% "F")
-  F10<-sum(Column10 %in% "F")
-  F11<-sum(Column11 %in% "F")
-  F12<-sum(Column12 %in% "F")
-  F13<-sum(Column13 %in% "F")
-  F14<-sum(Column14 %in% "F")
-  F15<-sum(Column15 %in% "F")
-  FllFs<-cbind(F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15)
-  
-  G1<-sum(Column1 %in% "G")
-  G2<-sum(Column2 %in% "G")
-  G3<-sum(Column3 %in% "G")
-  G4<-sum(Column4 %in% "G")
-  G5<-sum(Column5 %in% "G")
-  G6<-sum(Column6 %in% "G")
-  G7<-sum(Column7 %in% "G")
-  G8<-sum(Column8 %in% "G")
-  G9<-sum(Column9 %in% "G")
-  G10<-sum(Column10 %in% "G")
-  G11<-sum(Column11 %in% "G")
-  G12<-sum(Column12 %in% "G")
-  G13<-sum(Column13 %in% "G")
-  G14<-sum(Column14 %in% "G")
-  G15<-sum(Column15 %in% "G")
-  GllGs<-cbind(G1,G2,G3,G4,G5,G6,G7,G8,G9,G10,G11,G12,G13,G14,G15)
-  
-  H1<-sum(Column1 %in% "H")
-  H2<-sum(Column2 %in% "H")
-  H3<-sum(Column3 %in% "H")
-  H4<-sum(Column4 %in% "H")
-  H5<-sum(Column5 %in% "H")
-  H6<-sum(Column6 %in% "H")
-  H7<-sum(Column7 %in% "H")
-  H8<-sum(Column8 %in% "H")
-  H9<-sum(Column9 %in% "H")
-  H10<-sum(Column10 %in% "H")
-  H11<-sum(Column11 %in% "H")
-  H12<-sum(Column12 %in% "H")
-  H13<-sum(Column13 %in% "H")
-  H14<-sum(Column14 %in% "H")
-  H15<-sum(Column15 %in% "H")
-  HllHs<-cbind(H1,H2,H3,H4,H5,H6,H7,H8,H9,H10,H11,H12,H13,H14,H15)
-  
-  I1<-sum(Column1 %in% "I")
-  I2<-sum(Column2 %in% "I")
-  I3<-sum(Column3 %in% "I")
-  I4<-sum(Column4 %in% "I")
-  I5<-sum(Column5 %in% "I")
-  I6<-sum(Column6 %in% "I")
-  I7<-sum(Column7 %in% "I")
-  I8<-sum(Column8 %in% "I")
-  I9<-sum(Column9 %in% "I")
-  I10<-sum(Column10 %in% "I")
-  I11<-sum(Column11 %in% "I")
-  I12<-sum(Column12 %in% "I")
-  I13<-sum(Column13 %in% "I")
-  I14<-sum(Column14 %in% "I")
-  I15<-sum(Column15 %in% "I")
-  IllIs<-cbind(I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,I11,I12,I13,I14,I15)
-  
-  K1<-sum(Column1 %in% "K")
-  K2<-sum(Column2 %in% "K")
-  K3<-sum(Column3 %in% "K")
-  K4<-sum(Column4 %in% "K")
-  K5<-sum(Column5 %in% "K")
-  K6<-sum(Column6 %in% "K")
-  K7<-sum(Column7 %in% "K")
-  K8<-sum(Column8 %in% "K")
-  K9<-sum(Column9 %in% "K")
-  K10<-sum(Column10 %in% "K")
-  K11<-sum(Column11 %in% "K")
-  K12<-sum(Column12 %in% "K")
-  K13<-sum(Column13 %in% "K")
-  K14<-sum(Column14 %in% "K")
-  K15<-sum(Column15 %in% "K")
-  KllKs<-cbind(K1,K2,K3,K4,K5,K6,K7,K8,K9,K10,K11,K12,K13,K14,K15)
-  
-  L1<-sum(Column1 %in% "L")
-  L2<-sum(Column2 %in% "L")
-  L3<-sum(Column3 %in% "L")
-  L4<-sum(Column4 %in% "L")
-  L5<-sum(Column5 %in% "L")
-  L6<-sum(Column6 %in% "L")
-  L7<-sum(Column7 %in% "L")
-  L8<-sum(Column8 %in% "L")
-  L9<-sum(Column9 %in% "L")
-  L10<-sum(Column10 %in% "L")
-  L11<-sum(Column11 %in% "L")
-  L12<-sum(Column12 %in% "L")
-  L13<-sum(Column13 %in% "L")
-  L14<-sum(Column14 %in% "L")
-  L15<-sum(Column15 %in% "L")
-  LllLs<-cbind(L1,L2,L3,L4,L5,L6,L7,L8,L9,L10,L11,L12,L13,L14,L15)
-  
-  M1<-sum(Column1 %in% "M")
-  M2<-sum(Column2 %in% "M")
-  M3<-sum(Column3 %in% "M")
-  M4<-sum(Column4 %in% "M")
-  M5<-sum(Column5 %in% "M")
-  M6<-sum(Column6 %in% "M")
-  M7<-sum(Column7 %in% "M")
-  M8<-sum(Column8 %in% "M")
-  M9<-sum(Column9 %in% "M")
-  M10<-sum(Column10 %in% "M")
-  M11<-sum(Column11 %in% "M")
-  M12<-sum(Column12 %in% "M")
-  M13<-sum(Column13 %in% "M")
-  M14<-sum(Column14 %in% "M")
-  M15<-sum(Column15 %in% "M")
-  MllMs<-cbind(M1,M2,M3,M4,M5,M6,M7,M8,M9,M10,M11,M12,M13,M14,M15)
-  
-  N1<-sum(Column1 %in% "N")
-  N2<-sum(Column2 %in% "N")
-  N3<-sum(Column3 %in% "N")
-  N4<-sum(Column4 %in% "N")
-  N5<-sum(Column5 %in% "N")
-  N6<-sum(Column6 %in% "N")
-  N7<-sum(Column7 %in% "N")
-  N8<-sum(Column8 %in% "N")
-  N9<-sum(Column9 %in% "N")
-  N10<-sum(Column10 %in% "N")
-  N11<-sum(Column11 %in% "N")
-  N12<-sum(Column12 %in% "N")
-  N13<-sum(Column13 %in% "N")
-  N14<-sum(Column14 %in% "N")
-  N15<-sum(Column15 %in% "N")
-  NllNs<-cbind(N1,N2,N3,N4,N5,N6,N7,N8,N9,N10,N11,N12,N13,N14,N15)
-  
-  P1<-sum(Column1 %in% "P")
-  P2<-sum(Column2 %in% "P")
-  P3<-sum(Column3 %in% "P")
-  P4<-sum(Column4 %in% "P")
-  P5<-sum(Column5 %in% "P")
-  P6<-sum(Column6 %in% "P")
-  P7<-sum(Column7 %in% "P")
-  P8<-sum(Column8 %in% "P")
-  P9<-sum(Column9 %in% "P")
-  P10<-sum(Column10 %in% "P")
-  P11<-sum(Column11 %in% "P")
-  P12<-sum(Column12 %in% "P")
-  P13<-sum(Column13 %in% "P")
-  P14<-sum(Column14 %in% "P")
-  P15<-sum(Column15 %in% "P")
-  PllPs<-cbind(P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11,P12,P13,P14,P15)
-  
-  Q1<-sum(Column1 %in% "Q")
-  Q2<-sum(Column2 %in% "Q")
-  Q3<-sum(Column3 %in% "Q")
-  Q4<-sum(Column4 %in% "Q")
-  Q5<-sum(Column5 %in% "Q")
-  Q6<-sum(Column6 %in% "Q")
-  Q7<-sum(Column7 %in% "Q")
-  Q8<-sum(Column8 %in% "Q")
-  Q9<-sum(Column9 %in% "Q")
-  Q10<-sum(Column10 %in% "Q")
-  Q11<-sum(Column11 %in% "Q")
-  Q12<-sum(Column12 %in% "Q")
-  Q13<-sum(Column13 %in% "Q")
-  Q14<-sum(Column14 %in% "Q")
-  Q15<-sum(Column15 %in% "Q")
-  QllQs<-cbind(Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,Q11,Q12,Q13,Q14,Q15)
-  
-  R1<-sum(Column1 %in% "R")
-  R2<-sum(Column2 %in% "R")
-  R3<-sum(Column3 %in% "R")
-  R4<-sum(Column4 %in% "R")
-  R5<-sum(Column5 %in% "R")
-  R6<-sum(Column6 %in% "R")
-  R7<-sum(Column7 %in% "R")
-  R8<-sum(Column8 %in% "R")
-  R9<-sum(Column9 %in% "R")
-  R10<-sum(Column10 %in% "R")
-  R11<-sum(Column11 %in% "R")
-  R12<-sum(Column12 %in% "R")
-  R13<-sum(Column13 %in% "R")
-  R14<-sum(Column14 %in% "R")
-  R15<-sum(Column15 %in% "R")
-  RllRs<-cbind(R1,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,R13,R14,R15)
-  
-  S1<-sum(Column1 %in% "S")
-  S2<-sum(Column2 %in% "S")
-  S3<-sum(Column3 %in% "S")
-  S4<-sum(Column4 %in% "S")
-  S5<-sum(Column5 %in% "S")
-  S6<-sum(Column6 %in% "S")
-  S7<-sum(Column7 %in% "S")
-  S8<-sum(Column8 %in% "S")
-  S9<-sum(Column9 %in% "S")
-  S10<-sum(Column10 %in% "S")
-  S11<-sum(Column11 %in% "S")
-  S12<-sum(Column12 %in% "S")
-  S13<-sum(Column13 %in% "S")
-  S14<-sum(Column14 %in% "S")
-  S15<-sum(Column15 %in% "S")
-  SllSs<-cbind(S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,S14,S15)
-  
-  T1<-sum(Column1 %in% "T")
-  T2<-sum(Column2 %in% "T")
-  T3<-sum(Column3 %in% "T")
-  T4<-sum(Column4 %in% "T")
-  T5<-sum(Column5 %in% "T")
-  T6<-sum(Column6 %in% "T")
-  T7<-sum(Column7 %in% "T")
-  T8<-sum(Column8 %in% "T")
-  T9<-sum(Column9 %in% "T")
-  T10<-sum(Column10 %in% "T")
-  T11<-sum(Column11 %in% "T")
-  T12<-sum(Column12 %in% "T")
-  T13<-sum(Column13 %in% "T")
-  T14<-sum(Column14 %in% "T")
-  T15<-sum(Column15 %in% "T")
-  TllTs<-cbind(T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15)
-  
-  V1<-sum(Column1 %in% "V")
-  V2<-sum(Column2 %in% "V")
-  V3<-sum(Column3 %in% "V")
-  V4<-sum(Column4 %in% "V")
-  V5<-sum(Column5 %in% "V")
-  V6<-sum(Column6 %in% "V")
-  V7<-sum(Column7 %in% "V")
-  V8<-sum(Column8 %in% "V")
-  V9<-sum(Column9 %in% "V")
-  V10<-sum(Column10 %in% "V")
-  V11<-sum(Column11 %in% "V")
-  V12<-sum(Column12 %in% "V")
-  V13<-sum(Column13 %in% "V")
-  V14<-sum(Column14 %in% "V")
-  V15<-sum(Column15 %in% "V")
-  VllVs<-cbind(V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15)
-  
-  W1<-sum(Column1 %in% "W")
-  W2<-sum(Column2 %in% "W")
-  W3<-sum(Column3 %in% "W")
-  W4<-sum(Column4 %in% "W")
-  W5<-sum(Column5 %in% "W")
-  W6<-sum(Column6 %in% "W")
-  W7<-sum(Column7 %in% "W")
-  W8<-sum(Column8 %in% "W")
-  W9<-sum(Column9 %in% "W")
-  W10<-sum(Column10 %in% "W")
-  W11<-sum(Column11 %in% "W")
-  W12<-sum(Column12 %in% "W")
-  W13<-sum(Column13 %in% "W")
-  W14<-sum(Column14 %in% "W")
-  W15<-sum(Column15 %in% "W")
-  WllWs<-cbind(W1,W2,W3,W4,W5,W6,W7,W8,W9,W10,W11,W12,W13,W14,W15)
-  
-  Y1<-sum(Column1 %in% "Y")
-  Y2<-sum(Column2 %in% "Y")
-  Y3<-sum(Column3 %in% "Y")
-  Y4<-sum(Column4 %in% "Y")
-  Y5<-sum(Column5 %in% "Y")
-  Y6<-sum(Column6 %in% "Y")
-  Y7<-sum(Column7 %in% "Y")
-  Y8<-sum(Column8 %in% "Y")
-  Y9<-sum(Column9 %in% "Y")
-  Y10<-sum(Column10 %in% "Y")
-  Y11<-sum(Column11 %in% "Y")
-  Y12<-sum(Column12 %in% "Y")
-  Y13<-sum(Column13 %in% "Y")
-  Y14<-sum(Column14 %in% "Y")
-  Y15<-sum(Column15 %in% "Y")
-  YllYs<-cbind(Y1,Y2,Y3,Y4,Y5,Y6,Y7,Y8,Y9,Y10,Y11,Y12,Y13,Y14,Y15)
-  PositionTable<-rbind(AllAs,CllCs,DllDs,EllEs,FllFs,GllGs,HllHs,IllIs,KllKs,LllLs,MllMs,NllNs,PllPs,QllQs,RllRs,SllSs,TllTs,VllVs,WllWs,YllYs)
-}
-#endogenous prob matrix is AA position over subbackfreqmean
-dim(PositionTable)
-EPMtable<-PositionTable
-# EPMtable[1,]<-(PositionTable[1,]/(PositionTable[1,]*.01*Amean))
-# EPMtable[2,]<-(PositionTable[2,]/(PositionTable[2,]*.01*Cmean))
-# EPMtable[3,]<-(PositionTable[3,]/(PositionTable[3,]*.01*Dmean))
-# EPMtable[4,]<-(PositionTable[4,]/(PositionTable[4,]*.01*Emean))
-# EPMtable[5,]<-(PositionTable[5,]/(PositionTable[5,]*.01*Fmean))
-# EPMtable[6,]<-(PositionTable[6,]/(PositionTable[6,]*.01*Gmean))
-# EPMtable[7,]<-(PositionTable[7,]/(PositionTable[7,]*.01*Hmean))
-# EPMtable[8,]<-(PositionTable[8,]/(PositionTable[8,]*.01*Imean))
-# EPMtable[9,]<-(PositionTable[9,]/(PositionTable[9,]*.01*Kmean))
-# EPMtable[10,]<-(PositionTable[10,]/(PositionTable[10,]*.01*Lmean))
-# EPMtable[11,]<-(PositionTable[11,]/(PositionTable[11,]*.01*Mmean))
-# EPMtable[12,]<-(PositionTable[12,]/(PositionTable[12,]*.01*Nmean))
-# EPMtable[13,]<-(PositionTable[13,]/(PositionTable[13,]*.01*Pmean))
-# EPMtable[14,]<-(PositionTable[14,]/(PositionTable[14,]*.01*Qmean))
-# EPMtable[15,]<-(PositionTable[15,]/(PositionTable[15,]*.01*Rmean))
-# EPMtable[16,]<-(PositionTable[16,]/(PositionTable[16,]*.01*Smean))
-# EPMtable[17,]<-(PositionTable[17,]/(PositionTable[17,]*.01*Tmean))
-# EPMtable[18,]<-(PositionTable[18,]/(PositionTable[18,]*.01*Vmean))
-# EPMtable[19,]<-(PositionTable[19,]/(PositionTable[19,]*.01*Wmean))
-# EPMtable[20,]<-(PositionTable[20,]/(PositionTable[20,]*.01*Ymean))
-
-columns<-c(length(Column1)-sum(Column1==""),
-           length(Column2)-sum(Column2==""),
-           length(Column3)-sum(Column3==""),
-           length(Column4)-sum(Column4==""),
-           length(Column5)-sum(Column5==""),
-           length(Column6)-sum(Column6==""),
-           length(Column7)-sum(Column7==""),
-           length(Column8)-sum(Column8==""),
-           length(Column9)-sum(Column9==""),
-           length(Column10)-sum(Column10==""),
-           length(Column11)-sum(Column11==""),
-           length(Column12)-sum(Column12==""),
-           length(Column13)-sum(Column13==""),
-           length(Column14)-sum(Column14==""),
-           length(Column15)-sum(Column15==""))
-
-for (z in 1:15) {
-  for (y in 1:20) {
-    if (PositionTable[y,z]>0){
-      EPMtable[y,z]<-PositionTable[y,z]/((columns[z]*.01*AllMeans[y]))
-    }
-    if (PositionTable[y,z]==0){
-      EPMtable[y,z]<-(1/columns[z])/((columns[z]*.01*AllMeans[y]))
-    }
-  }
-}
-#here I created the endogenous probability matrix
-#now all I need to do is make the program automatically determine which SDs are >2, and then make it perform screener and sorter on those SDs
-
-
-
-
-
-# write.xlsx(SDtable,file=FILENAME, sheetName = "Standard Deviation Table",col.names = FALSE,row.names = FALSE,append = TRUE)
-# write.xlsx(PercentTable,file = FILENAME,sheetName = "Percent Table",col.names = FALSE,row.names = FALSE,append = TRUE)
-# write.xlsx(SelectivitySheet,file = FILENAME,sheetName = "Site Selectivity",col.names = FALSE,row.names = FALSE,append = TRUE)
-# write.xlsx(EPMtable,file=FILENAME,sheetName = "Endogenous Probability Matrix",col.names = FALSE,row.names = FALSE,append = TRUE)
-# write.xlsx(NormalizationScore,file = FILENAME,sheetName = "Normalization Score",col.names = FALSE,row.names = FALSE,append = TRUE)
-
-NormalizationScore<-c("Normalization Score",NormalizationScore)
-
-write.table(x=c("SD Table"),file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)
-write.table(SDtable,file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)
-write.table(x=c("Percent Table"),file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)
-write.table(PercentTable,file=FILENAME, append = TRUE,sep=",",row.names = FALSE, col.names = FALSE)
-
-EPMtableu<-EPMtable
-HeaderSD<-c(-7:7)
-EPMtableu<-rbind(HeaderSD,EPMtableu)
-row.names(EPMtableu)<-NULL
-EPMtableu<-data.frame(SetOfAAs,EPMtableu)
-
-write.table("Site Selectivity Matrix", file = FILENAME2, append = TRUE, sep = ",", row.names = FALSE, col.names = FALSE)
-SelectivityHeader=matrix(data = c("Position",-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7),nrow = 1)
-head<-matrix(data=rep(" ",times=16),nrow = 1)
-SelectivityHeader<-rbind(head,SelectivityHeader)
-
-write.table(SelectivityHeader, file = FILENAME2, append = TRUE, sep = ",", row.names = FALSE, col.names = FALSE)
-#colnames(SelectivitySheet)<-c("-7","-6","-5","-4","-3","-2","-1","0","1","2","3","4","5","6","7")
-write.table(SelectivitySheet,file = FILENAME2, append = TRUE,sep = ",",row.names = TRUE, col.names = FALSE)
-write.table(x=c("Endogenous Probability Matrix"),file=FILENAME2,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)
-write.table(EPMtableu,file = FILENAME2, append = TRUE,sep = ",",row.names = FALSE, col.names = FALSE)
-write.table(NormalizationScore, file = FILENAME2, append = TRUE,sep = ",",row.names = FALSE, col.names = FALSE)
-