# HG changeset patch # User jfb # Date 1517955209 18000 # Node ID 2323b6c5511f673cc74354808d03e408b096c6be # Parent f1bbd121dfb7d227baa9985681d8446b0119bfde Uploaded diff -r f1bbd121dfb7 -r 2323b6c5511f Kinatest-R_part2.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Kinatest-R_part2.R Tue Feb 06 17:13:29 2018 -0500 @@ -0,0 +1,782 @@ +#test myself: this script should take in amino acids for each of the 9 positions and give out every single combination of those AAs + +#need to do following: fix it so that the accession numbers stay with the substrates, +#also the neg false constant is totaly unphos'd Ys found by FASTA-2-CSV system# uniprot + +#HOW MANY: IF THERE'S two aas in each position you get 2^9, so I assume the numbers are: +#(number in position-4)*(number in position -3)*(number in position -2)...=total +# require(rJava) +# require(xlsxjars) +# require(xlsx) +# # require(readxl) + +View(SDtable) +bareSDs<-SDtable[2:21,2:16] +goodones<-bareSDs>2 + +Positionm7<-which(goodones[,1] %in% TRUE) +if (length(Positionm7)<1){Positionm7<-which(bareSDs[,1]==max(bareSDs[,1]))} +Positionm6<-which(goodones[,2] %in% TRUE) +if (length(Positionm6)<1){Positionm6<-which(bareSDs[,2]==max(bareSDs[,2]))} +Positionm5<-which(goodones[,3] %in% TRUE) +if (length(Positionm5)<1){Positionm5<-which(bareSDs[,3]==max(bareSDs[,3]))} +Positionm4<-which(goodones[,4] %in% TRUE) +if (length(Positionm4)<1){Positionm4<-which(bareSDs[,4]==max(bareSDs[,4]))} +Positionm3<-which(goodones[,5] %in% TRUE) +if (length(Positionm3)<1){Positionm3<-which(bareSDs[,5]==max(bareSDs[,5]))} +Positionm2<-which(goodones[,6] %in% TRUE) +if (length(Positionm2)<1){Positionm2<-which(bareSDs[,6]==max(bareSDs[,6]))} +Positionm1<-which(goodones[,7] %in% TRUE) +if (length(Positionm1)<1){Positionm1<-which(bareSDs[,7]==max(bareSDs[,7]))} + +Positiond0<-which(goodones[,8] %in% TRUE) +if (length(Positiond0)<1){Positiond0<-which(bareSDs[,8]==max(bareSDs[,8]))} + +Positionp1<-which(goodones[,9] %in% TRUE) +if (length(Positionp1)<1){Positionp1<-which(bareSDs[,9]==max(bareSDs[,9]))} +Positionp2<-which(goodones[,10] %in% TRUE) +if (length(Positionp2)<1){Positionp2<-which(bareSDs[,10]==max(bareSDs[,10]))} +Positionp3<-which(goodones[,11] %in% TRUE) +if (length(Positionp3)<1){Positionp3<-which(bareSDs[,11]==max(bareSDs[,11]))} +Positionp4<-which(goodones[,12] %in% TRUE) +if (length(Positionp4)<1){Positionp4<-which(bareSDs[,12]==max(bareSDs[,12]))} +Positionp5<-which(goodones[,13] %in% TRUE) +if (length(Positionp5)<1){Positionp5<-which(bareSDs[,13]==max(bareSDs[,13]))} +Positionp6<-which(goodones[,14] %in% TRUE) +if (length(Positionp6)<1){Positionp6<-which(bareSDs[,14]==max(bareSDs[,14]))} +Positionp7<-which(goodones[,15] %in% TRUE) +if (length(Positionp7)<1){Positionp7<-which(bareSDs[,15]==max(bareSDs[,15]))} + +aa_props2 <- c("1"="A", "2"="C", "3"="D", "4"="E", "5"="F", "6"="G", "7"="H", "8"="I", "9"="K", "10"="L", "11"="M", "12"="N", + "13"="P", "14"="Q", "15"="R", "16"="S", "17"="T", "18"="V", "19"="W", "20"="Y") +aa_props2<-c(1="A") + +Positionm7<-sapply(Positionm7, function (x) aa_props2[x]) +Positionm6<-sapply(Positionm6, function (x) aa_props2[x]) +Positionm5<-sapply(Positionm5, function (x) aa_props2[x]) +Positionm4<-sapply(Positionm4, function (x) aa_props2[x]) +Positionm3<-sapply(Positionm3, function (x) aa_props2[x]) +Positionm2<-sapply(Positionm2, function (x) aa_props2[x]) +Positionm1<-sapply(Positionm1, function (x) aa_props2[x]) +Positiond0<-sapply(Positiond0, function (x) aa_props2[x]) +Positionp1<-sapply(Positionp1, function (x) aa_props2[x]) +Positionp2<-sapply(Positionp2, function (x) aa_props2[x]) +Positionp3<-sapply(Positionp3, function (x) aa_props2[x]) +Positionp4<-sapply(Positionp4, function (x) aa_props2[x]) +Positionp5<-sapply(Positionp5, function (x) aa_props2[x]) +Positionp6<-sapply(Positionp6, function (x) aa_props2[x]) +Positionp7<-sapply(Positionp7, function (x) aa_props2[x]) + + +# Positionm7<-c("D","H","N","V") +# Positionm6<-c("E","V") +# Positionm5<-c("D","H") +# Positionm4<-c("D","N") +# Positionm3<-c("D","E","F","Q") +# Positionm2<-c("D","N","Q","S") +# Positionm1<-c("F","I","L") +# Positiond0<-c("Y") +# Positionp1<-c("A","E") +# Positionp2<-c("T","S","Q","E") +# Positionp3<-c("V") +# Positionp4<-c("K") +# Positionp5<-c("K") +# Positionp6<-c("K") +# Positionp7<-c("R") +#this is where the amino acids for each position are given. m means minus, p mean plus +######################################## +# ScreenerFilename<-"C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls" + + + + + + + + + + + +screaner<-read.csv(ScreenerFilename, header = FALSE, stringsAsFactors = FALSE) + + +Abl<-screaner[2:25,] +Arg<-screaner[27:50,] +Btk<-screaner[52:75,] +Csk<-screaner[77:100,] +Fyn<-screaner[102:125,] +Hck<-screaner[127:150,] +JAK2<-screaner[152:175,] +Lck<-screaner[177:200,] +Lyn<-screaner[202:225,] +Pyk2<-screaner[227:250,] +Src<-screaner[252:275,] +Syk<-screaner[277:300,] +Yes<-screaner[302:325,] + +#two questions: why are we doing BTK when we already have a bioninformatics page about it? +#two I reran everything and only get 96 positions of interest in the SD table + + + + + + + + + +#Do_You_want_An_Excel_Output_Questionmark<-"NO" +GeneratedPeptidesFile<-"GeneratedPeptidesFile.csv" + + +# Abl<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 4) +# Arg<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 5) +# Btk<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 6) +# Csk<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 7) +# Fyn<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 8) +# Hck<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 9) +# JAK2<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 10) +# Lck<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 11) +# Lyn<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 12) +# Pyk2<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 13) +# Src<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 14) +# Syk<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 15) +# Yes<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 16) +# + + +"A"=1 +"C"=2 +"D"=3 +"E"=4 +"F"=5 +"G"=6 +"H"=7 +"I"=8 +"K"=9 +"L"=10 +"M"=11 +"N"=12 +"P"=13 +"Q"=14 +"R"=15 +"S"=16 +"T"=17 +"V"=18 +"W"=19 +"Y"=20 + +aa_props <- c("A"=A, "C"=C, "D"=D, "E"=E, "F"=F,"G"=G,"H"=H,"I"=I,"K"=K,"L"=L,"M"=M,"N"=N,"P"=P,"Q"=Q,"R"=R, + "S"=S,"T"=T,"V"=V,"W"=W,"Y"=Y,"xY"=Y,"O"=21) + +number15<-sapply(Positionm7, function (x) aa_props[x]) +number14<-sapply(Positionm6, function (x) aa_props[x]) +number13<-sapply(Positionm5, function (x) aa_props[x]) +number1 <- sapply(Positionm4, function (x) aa_props[x]) +number2 <- sapply(Positionm3, function (x) aa_props[x]) +number3 <- sapply(Positionm2, function (x) aa_props[x]) +number4 <- sapply(Positionm1, function (x) aa_props[x]) +number5 <- sapply(Positiond0, function (x) aa_props[x]) +number6 <- sapply(Positionp1, function (x) aa_props[x]) +number7 <- sapply(Positionp2, function (x) aa_props[x]) +number8 <- sapply(Positionp3, function (x) aa_props[x]) +number9 <- sapply(Positionp4, function (x) aa_props[x]) +number10<-sapply(Positionp5, function (x) aa_props[x]) +number11<-sapply(Positionp6, function (x) aa_props[x]) +number12<-sapply(Positionp7, function (x) aa_props[x]) + +# number1<-Positionm4 +# number2<-Positionm3 +# number3<-Positionm2 +# number4<-Positionm1 +# number5<-Positiond0 +# number6<-Positionp1 +# number7<-Positionp2 +# number8<-Positionp3 +# number9<-Positionp4 + +############################# +#here I create the Abl seqs with proper value for each number +if (1==0){ + Ablnumber1<- gsub("A",A,Ablnumber1,perl = TRUE) + Ablnumber1<- gsub("C",C,Ablnumber1,perl = TRUE) + Ablnumber1<- gsub("D",D,Ablnumber1,perl = TRUE) + Ablnumber1<- gsub("E",E,Ablnumber1,perl = TRUE) + Ablnumber1<- gsub("F",F,Ablnumber1,perl = TRUE) + Ablnumber1<- gsub("G",G,Ablnumber1,perl = TRUE) + Ablnumber1<- gsub("H",H,Ablnumber1,perl = TRUE) + Ablnumber1<- gsub("I",I,Ablnumber1,perl = TRUE) + Ablnumber1<- gsub("K",K,Ablnumber1,perl = TRUE) + Ablnumber1<- gsub("L",L,Ablnumber1,perl = TRUE) + Ablnumber1<- gsub("M",M,Ablnumber1,perl = TRUE) + Ablnumber1<- gsub("N",N,Ablnumber1,perl = TRUE) + Ablnumber1<- gsub("P",P,Ablnumber1,perl = TRUE) + Ablnumber1<- gsub("Q",Q,Ablnumber1,perl = TRUE) + Ablnumber1<- gsub("R",R,Ablnumber1,perl = TRUE) + Ablnumber1<- gsub("S",S,Ablnumber1,perl = TRUE) + Ablnumber1<- gsub("T",T,Ablnumber1,perl = TRUE) + Ablnumber1<- gsub("V",V,Ablnumber1,perl = TRUE) + Ablnumber1<- gsub("W",W,Ablnumber1,perl = TRUE) + Ablnumber1<- gsub("Y",Y,Ablnumber1,perl = TRUE) + + Ablnumber2<- gsub("A",A,Ablnumber2,perl = TRUE) + Ablnumber2<- gsub("C",C,Ablnumber2,perl = TRUE) + Ablnumber2<- gsub("D",D,Ablnumber2,perl = TRUE) + Ablnumber2<- gsub("E",E,Ablnumber2,perl = TRUE) + Ablnumber2<- gsub("F",F,Ablnumber2,perl = TRUE) + Ablnumber2<- gsub("G",G,Ablnumber2,perl = TRUE) + Ablnumber2<- gsub("H",H,Ablnumber2,perl = TRUE) + Ablnumber2<- gsub("I",I,Ablnumber2,perl = TRUE) + Ablnumber2<- gsub("K",K,Ablnumber2,perl = TRUE) + Ablnumber2<- gsub("L",L,Ablnumber2,perl = TRUE) + Ablnumber2<- gsub("M",M,Ablnumber2,perl = TRUE) + Ablnumber2<- gsub("N",N,Ablnumber2,perl = TRUE) + Ablnumber2<- gsub("P",P,Ablnumber2,perl = TRUE) + Ablnumber2<- gsub("Q",Q,Ablnumber2,perl = TRUE) + Ablnumber2<- gsub("R",R,Ablnumber2,perl = TRUE) + Ablnumber2<- gsub("S",S,Ablnumber2,perl = TRUE) + Ablnumber2<- gsub("T",T,Ablnumber2,perl = TRUE) + Ablnumber2<- gsub("V",V,Ablnumber2,perl = TRUE) + Ablnumber2<- gsub("W",W,Ablnumber2,perl = TRUE) + Ablnumber2<- gsub("Y",Y,Ablnumber2,perl = TRUE) + + Ablnumber3<- gsub("A",A,Ablnumber3,perl = TRUE) + Ablnumber3<- gsub("C",C,Ablnumber3,perl = TRUE) + Ablnumber3<- gsub("D",D,Ablnumber3,perl = TRUE) + Ablnumber3<- gsub("E",E,Ablnumber3,perl = TRUE) + Ablnumber3<- gsub("F",F,Ablnumber3,perl = TRUE) + Ablnumber3<- gsub("G",G,Ablnumber3,perl = TRUE) + Ablnumber3<- gsub("H",H,Ablnumber3,perl = TRUE) + Ablnumber3<- gsub("I",I,Ablnumber3,perl = TRUE) + Ablnumber3<- gsub("K",K,Ablnumber3,perl = TRUE) + Ablnumber3<- gsub("L",L,Ablnumber3,perl = TRUE) + Ablnumber3<- gsub("M",M,Ablnumber3,perl = TRUE) + Ablnumber3<- gsub("N",N,Ablnumber3,perl = TRUE) + Ablnumber3<- gsub("P",P,Ablnumber3,perl = TRUE) + Ablnumber3<- gsub("Q",Q,Ablnumber3,perl = TRUE) + Ablnumber3<- gsub("R",R,Ablnumber3,perl = TRUE) + Ablnumber3<- gsub("S",S,Ablnumber3,perl = TRUE) + Ablnumber3<- gsub("T",T,Ablnumber3,perl = TRUE) + Ablnumber3<- gsub("V",V,Ablnumber3,perl = TRUE) + Ablnumber3<- gsub("W",W,Ablnumber3,perl = TRUE) + Ablnumber3<- gsub("Y",Y,Ablnumber3,perl = TRUE) + + Ablnumber4<- gsub("A",A,Ablnumber4,perl = TRUE) + Ablnumber4<- gsub("C",C,Ablnumber4,perl = TRUE) + Ablnumber4<- gsub("D",D,Ablnumber4,perl = TRUE) + Ablnumber4<- gsub("E",E,Ablnumber4,perl = TRUE) + Ablnumber4<- gsub("F",F,Ablnumber4,perl = TRUE) + Ablnumber4<- gsub("G",G,Ablnumber4,perl = TRUE) + Ablnumber4<- gsub("H",H,Ablnumber4,perl = TRUE) + Ablnumber4<- gsub("I",I,Ablnumber4,perl = TRUE) + Ablnumber4<- gsub("K",K,Ablnumber4,perl = TRUE) + Ablnumber4<- gsub("L",L,Ablnumber4,perl = TRUE) + Ablnumber4<- gsub("M",M,Ablnumber4,perl = TRUE) + Ablnumber4<- gsub("N",N,Ablnumber4,perl = TRUE) + Ablnumber4<- gsub("P",P,Ablnumber4,perl = TRUE) + Ablnumber4<- gsub("Q",Q,Ablnumber4,perl = TRUE) + Ablnumber4<- gsub("R",R,Ablnumber4,perl = TRUE) + Ablnumber4<- gsub("S",S,Ablnumber4,perl = TRUE) + Ablnumber4<- gsub("T",T,Ablnumber4,perl = TRUE) + Ablnumber4<- gsub("V",V,Ablnumber4,perl = TRUE) + Ablnumber4<- gsub("W",W,Ablnumber4,perl = TRUE) + Ablnumber4<- gsub("Y",Y,Ablnumber4,perl = TRUE) + + Ablnumber5<- gsub("A",A,Ablnumber5,perl = TRUE) + Ablnumber5<- gsub("C",C,Ablnumber5,perl = TRUE) + Ablnumber5<- gsub("D",D,Ablnumber5,perl = TRUE) + Ablnumber5<- gsub("E",E,Ablnumber5,perl = TRUE) + Ablnumber5<- gsub("F",F,Ablnumber5,perl = TRUE) + Ablnumber5<- gsub("G",G,Ablnumber5,perl = TRUE) + Ablnumber5<- gsub("H",H,Ablnumber5,perl = TRUE) + Ablnumber5<- gsub("I",I,Ablnumber5,perl = TRUE) + Ablnumber5<- gsub("K",K,Ablnumber5,perl = TRUE) + Ablnumber5<- gsub("L",L,Ablnumber5,perl = TRUE) + Ablnumber5<- gsub("M",M,Ablnumber5,perl = TRUE) + Ablnumber5<- gsub("N",N,Ablnumber5,perl = TRUE) + Ablnumber5<- gsub("P",P,Ablnumber5,perl = TRUE) + Ablnumber5<- gsub("Q",Q,Ablnumber5,perl = TRUE) + Ablnumber5<- gsub("R",R,Ablnumber5,perl = TRUE) + Ablnumber5<- gsub("S",S,Ablnumber5,perl = TRUE) + Ablnumber5<- gsub("T",T,Ablnumber5,perl = TRUE) + Ablnumber5<- gsub("V",V,Ablnumber5,perl = TRUE) + Ablnumber5<- gsub("W",W,Ablnumber5,perl = TRUE) + Ablnumber5<- gsub("Y",Y,Ablnumber5,perl = TRUE) + + Ablnumber6<- gsub("A",A,Ablnumber6,perl = TRUE) + Ablnumber6<- gsub("C",C,Ablnumber6,perl = TRUE) + Ablnumber6<- gsub("D",D,Ablnumber6,perl = TRUE) + Ablnumber6<- gsub("E",E,Ablnumber6,perl = TRUE) + Ablnumber6<- gsub("F",F,Ablnumber6,perl = TRUE) + Ablnumber6<- gsub("G",G,Ablnumber6,perl = TRUE) + Ablnumber6<- gsub("H",H,Ablnumber6,perl = TRUE) + Ablnumber6<- gsub("I",I,Ablnumber6,perl = TRUE) + Ablnumber6<- gsub("K",K,Ablnumber6,perl = TRUE) + Ablnumber6<- gsub("L",L,Ablnumber6,perl = TRUE) + Ablnumber6<- gsub("M",M,Ablnumber6,perl = TRUE) + Ablnumber6<- gsub("N",N,Ablnumber6,perl = TRUE) + Ablnumber6<- gsub("P",P,Ablnumber6,perl = TRUE) + Ablnumber6<- gsub("Q",Q,Ablnumber6,perl = TRUE) + Ablnumber6<- gsub("R",R,Ablnumber6,perl = TRUE) + Ablnumber6<- gsub("S",S,Ablnumber6,perl = TRUE) + Ablnumber6<- gsub("T",T,Ablnumber6,perl = TRUE) + Ablnumber6<- gsub("V",V,Ablnumber6,perl = TRUE) + Ablnumber6<- gsub("W",W,Ablnumber6,perl = TRUE) + Ablnumber6<- gsub("Y",Y,Ablnumber6,perl = TRUE) + + Ablnumber7<- gsub("A",A,Ablnumber7,perl = TRUE) + Ablnumber7<- gsub("C",C,Ablnumber7,perl = TRUE) + Ablnumber7<- gsub("D",D,Ablnumber7,perl = TRUE) + Ablnumber7<- gsub("E",E,Ablnumber7,perl = TRUE) + Ablnumber7<- gsub("F",F,Ablnumber7,perl = TRUE) + Ablnumber7<- gsub("G",G,Ablnumber7,perl = TRUE) + Ablnumber7<- gsub("H",H,Ablnumber7,perl = TRUE) + Ablnumber7<- gsub("I",I,Ablnumber7,perl = TRUE) + Ablnumber7<- gsub("K",K,Ablnumber7,perl = TRUE) + Ablnumber7<- gsub("L",L,Ablnumber7,perl = TRUE) + Ablnumber7<- gsub("M",M,Ablnumber7,perl = TRUE) + Ablnumber7<- gsub("N",N,Ablnumber7,perl = TRUE) + Ablnumber7<- gsub("P",P,Ablnumber7,perl = TRUE) + Ablnumber7<- gsub("Q",Q,Ablnumber7,perl = TRUE) + Ablnumber7<- gsub("R",R,Ablnumber7,perl = TRUE) + Ablnumber7<- gsub("S",S,Ablnumber7,perl = TRUE) + Ablnumber7<- gsub("T",T,Ablnumber7,perl = TRUE) + Ablnumber7<- gsub("V",V,Ablnumber7,perl = TRUE) + Ablnumber7<- gsub("W",W,Ablnumber7,perl = TRUE) + Ablnumber7<- gsub("Y",Y,Ablnumber7,perl = TRUE) + + Ablnumber8<- gsub("A",A,Ablnumber8,perl = TRUE) + Ablnumber8<- gsub("C",C,Ablnumber8,perl = TRUE) + Ablnumber8<- gsub("D",D,Ablnumber8,perl = TRUE) + Ablnumber8<- gsub("E",E,Ablnumber8,perl = TRUE) + Ablnumber8<- gsub("F",F,Ablnumber8,perl = TRUE) + Ablnumber8<- gsub("G",G,Ablnumber8,perl = TRUE) + Ablnumber8<- gsub("H",H,Ablnumber8,perl = TRUE) + Ablnumber8<- gsub("I",I,Ablnumber8,perl = TRUE) + Ablnumber8<- gsub("K",K,Ablnumber8,perl = TRUE) + Ablnumber8<- gsub("L",L,Ablnumber8,perl = TRUE) + Ablnumber8<- gsub("M",M,Ablnumber8,perl = TRUE) + Ablnumber8<- gsub("N",N,Ablnumber8,perl = TRUE) + Ablnumber8<- gsub("P",P,Ablnumber8,perl = TRUE) + Ablnumber8<- gsub("Q",Q,Ablnumber8,perl = TRUE) + Ablnumber8<- gsub("R",R,Ablnumber8,perl = TRUE) + Ablnumber8<- gsub("S",S,Ablnumber8,perl = TRUE) + Ablnumber8<- gsub("T",T,Ablnumber8,perl = TRUE) + Ablnumber8<- gsub("V",V,Ablnumber8,perl = TRUE) + Ablnumber8<- gsub("W",W,Ablnumber8,perl = TRUE) + Ablnumber8<- gsub("Y",Y,Ablnumber8,perl = TRUE) + + Ablnumber9<- gsub("A",A,Ablnumber9,perl = TRUE) + Ablnumber9<- gsub("C",C,Ablnumber9,perl = TRUE) + Ablnumber9<- gsub("D",D,Ablnumber9,perl = TRUE) + Ablnumber9<- gsub("E",E,Ablnumber9,perl = TRUE) + Ablnumber9<- gsub("F",F,Ablnumber9,perl = TRUE) + Ablnumber9<- gsub("G",G,Ablnumber9,perl = TRUE) + Ablnumber9<- gsub("H",H,Ablnumber9,perl = TRUE) + Ablnumber9<- gsub("I",I,Ablnumber9,perl = TRUE) + Ablnumber9<- gsub("K",K,Ablnumber9,perl = TRUE) + Ablnumber9<- gsub("L",L,Ablnumber9,perl = TRUE) + Ablnumber9<- gsub("M",M,Ablnumber9,perl = TRUE) + Ablnumber9<- gsub("N",N,Ablnumber9,perl = TRUE) + Ablnumber9<- gsub("P",P,Ablnumber9,perl = TRUE) + Ablnumber9<- gsub("Q",Q,Ablnumber9,perl = TRUE) + Ablnumber9<- gsub("R",R,Ablnumber9,perl = TRUE) + Ablnumber9<- gsub("S",S,Ablnumber9,perl = TRUE) + Ablnumber9<- gsub("T",T,Ablnumber9,perl = TRUE) + Ablnumber9<- gsub("V",V,Ablnumber9,perl = TRUE) + Ablnumber9<- gsub("W",W,Ablnumber9,perl = TRUE) + Ablnumber9<- gsub("Y",Y,Ablnumber9,perl = TRUE) +} +######################################## + + +total=length(Positionp7)*length(Positionp6)*length(Positionp5)*length(Positionp4)*length(Positionp3)*(length(Positionp2))*length(Positionp1)* + length(Positiond0)*length(Positionm1)*length(Positionm2)*length(Positionm3)*length(Positionm4)*length(Positionm5)*length(Positionm6)*length(Positionm7) +#this is just a way to doublecheck that the length of the generated peptides vector is correct + +GeneratedPeptides<-rep(NA, times=total*15) +GeneratedPeptides<-matrix(data = GeneratedPeptides,ncol = 15) + +NumeratedPeptides<-GeneratedPeptides +#create an empty vector of correct length by finding the number of each AAs per position and multiplying them +count<-0 +for (t in 1:length(Positionm7)) { + for (s in 1:length(Positionm6)) { + for (r in 1:length(Positionm5)) { + for (i in 1:length(Positionm4)) { + for (j in 1:length(Positionm3)) { + for (k in 1:length(Positionm2)) { + for (l in 1:length(Positionm1)) { + for (m in 1:length(Positiond0)) { + for (n in 1:length(Positionp1)) { + for (o in 1:length(Positionp2)) { + for (p in 1:length(Positionp3)) { + for (q in 1:length(Positionp4)) { + for (u in 1:length(Positionp5)) { + for (v in 1:length(Positionp6)) { + for (w in 1:length(Positionp7)) { + # i=1 + # j=1 + # k=1 + # l=1 + # m=1 + # n=1 + # o=1 + # p=1 + # q=1 + # + #for every single position, increment the count number, create a peptide using the AAs at that position + #then put them together into the generated peptides sequencex + count<-count+1 + tabulation<-c(Positionm7[t],Positionm6[s],Positionm5[r],Positionm4[i],Positionm3[j],Positionm2[k],Positionm1[l],Positiond0[m],Positionp1[n], + Positionp2[o],Positionp3[p],Positionp4[q],Positionp5[u],Positionp6[v],Positionp7[w]) + numeration<-c(number15[t],number14[s],number13[r],number1[i],number2[j],number3[k],number4[l],number5[m],number6[n],number7[o],number8[p],number9[q],number10[u],number11[v], + number12[w]) + #tabulation<-paste(tabulation, sep="", collapse="") + GeneratedPeptides[count,1:15]<-tabulation + NumeratedPeptides[count,1:15]<-numeration + } + } + } + } + } + } + } + } + } + } + } + } + } + } +} +#################################################################### +#now here I use the Endogenous Probabilty matrix from the previous script, which is called EMPtable +#to score the created peptides +ThisKinTable<-EPMtableu#[1:nrow(SDtable),] +TKTcolumn<-c(data=rep(1,times=21)) +TKTcolumn<-as.matrix(TKTcolumn,ncol=1) +ThisKinTable<-cbind(TKTcolumn,ThisKinTable) + +ThisKinGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) +ThisKinGenWeirdScore<-rep(NA,times=nrow(GeneratedPeptides)) + +for (x in 1:nrow(GeneratedPeptides)){ + Scoringpeptide<-NumeratedPeptides[x,1:15] + Scoringpeptide<-Scoringpeptide+1 + ThisKinTableScore<-as.numeric(ThisKinTable[Scoringpeptide[1],3])*ThisKinTable[as.numeric(Scoringpeptide[2]),4]*ThisKinTable[as.numeric(Scoringpeptide[3]),5]* + ThisKinTable[as.numeric(Scoringpeptide[4]),6]*ThisKinTable[as.numeric(Scoringpeptide[5]),7]*ThisKinTable[as.numeric(Scoringpeptide[6]),8]*ThisKinTable[as.numeric(Scoringpeptide[7]),9]* + #ThisKinTable[as.numeric(Scoringpeptide[8]),10]* + ThisKinTable[as.numeric(Scoringpeptide[9]),11]*ThisKinTable[as.numeric(Scoringpeptide[10]),12]*ThisKinTable[as.numeric(Scoringpeptide[11]),13]* + ThisKinTable[as.numeric(Scoringpeptide[12]),14]*ThisKinTable[as.numeric(Scoringpeptide[13]),15]*ThisKinTable[as.numeric(Scoringpeptide[14]),16]*ThisKinTable[as.numeric(Scoringpeptide[15]),17] + ThisKinGeneratedScores[x]<-ThisKinTableScore + ThisKinTableScore<-(ThisKinTableScore/(ThisKinTableScore+1/as.numeric(NormalizationScore[2]))) + ThisKinGenWeirdScore[x]<-ThisKinTableScore +} + +AblGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) +ArgGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) +BtkGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) +CskGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) +FynGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) +HckGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) +JAK2GeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) +LckGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) +LynGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) +Pyk2GeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) +SrcGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) +SykGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) +YesGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) + + +for (x in 1:nrow(GeneratedPeptides)){ + Scoringpeptide<-NumeratedPeptides[x,1:15] + AblScore<-Abl[Scoringpeptide[1],2]*Abl[Scoringpeptide[2],3]*Abl[Scoringpeptide[3],4]*Abl[Scoringpeptide[4],5]*Abl[Scoringpeptide[5],6]*Abl[Scoringpeptide[6],7]* + Abl[Scoringpeptide[7],8]*Abl[Scoringpeptide[9],10]*Abl[Scoringpeptide[10],11]*Abl[Scoringpeptide[11],12]*Abl[Scoringpeptide[12],13]* + Abl[Scoringpeptide[13],14]*Abl[Scoringpeptide[14],15]*Abl[Scoringpeptide[15],16] + AblGeneratedScores[x]<-AblScore + + ArgScore<-Arg[Scoringpeptide[1],2]*Arg[Scoringpeptide[2],3]*Arg[Scoringpeptide[3],4]*Arg[Scoringpeptide[4],5]*Arg[Scoringpeptide[5],6]*Arg[Scoringpeptide[6],7]* + Arg[Scoringpeptide[7],8]*Arg[Scoringpeptide[9],10]*Arg[Scoringpeptide[10],11]*Arg[Scoringpeptide[11],12]*Arg[Scoringpeptide[12],13]* + Arg[Scoringpeptide[13],14]*Arg[Scoringpeptide[14],15]*Arg[Scoringpeptide[15],16] + ArgGeneratedScores[x]<-ArgScore + + BtkScore<-Btk[Scoringpeptide[1],2]*Btk[Scoringpeptide[2],3]*Btk[Scoringpeptide[3],4]*Btk[Scoringpeptide[4],5]*Btk[Scoringpeptide[5],6]*Btk[Scoringpeptide[6],7]* + Btk[Scoringpeptide[7],8]*Btk[Scoringpeptide[9],10]*Btk[Scoringpeptide[10],11]*Btk[Scoringpeptide[11],12]*Btk[Scoringpeptide[12],13]* + Btk[Scoringpeptide[13],14]*Btk[Scoringpeptide[14],15]*Btk[Scoringpeptide[15],16] + BtkGeneratedScores[x]<-BtkScore + + CskScore<-Csk[Scoringpeptide[1],2]*Csk[Scoringpeptide[2],3]*Csk[Scoringpeptide[3],4]*Csk[Scoringpeptide[4],5]*Csk[Scoringpeptide[5],6]*Csk[Scoringpeptide[6],7]* + Csk[Scoringpeptide[7],8]*Csk[Scoringpeptide[9],10]*Csk[Scoringpeptide[10],11]*Csk[Scoringpeptide[11],12]*Csk[Scoringpeptide[12],13]* + Csk[Scoringpeptide[13],14]*Csk[Scoringpeptide[14],15]*Csk[Scoringpeptide[15],16] + CskGeneratedScores[x]<-CskScore + + FynScore<-Fyn[Scoringpeptide[1],2]*Fyn[Scoringpeptide[2],3]*Fyn[Scoringpeptide[3],4]*Fyn[Scoringpeptide[4],5]*Fyn[Scoringpeptide[5],6]*Fyn[Scoringpeptide[6],7]* + Fyn[Scoringpeptide[7],8]*Fyn[Scoringpeptide[9],10]*Fyn[Scoringpeptide[10],11]*Fyn[Scoringpeptide[11],12]*Fyn[Scoringpeptide[12],13]* + Fyn[Scoringpeptide[13],14]*Fyn[Scoringpeptide[14],15]*Fyn[Scoringpeptide[15],16] + FynGeneratedScores[x]<-FynScore + + HckScore<-Hck[Scoringpeptide[1],2]*Hck[Scoringpeptide[2],3]*Hck[Scoringpeptide[3],4]*Hck[Scoringpeptide[4],5]*Hck[Scoringpeptide[5],6]*Hck[Scoringpeptide[6],7]* + Hck[Scoringpeptide[7],8]*Hck[Scoringpeptide[9],10]*Hck[Scoringpeptide[10],11]*Hck[Scoringpeptide[11],12]*Hck[Scoringpeptide[12],13]* + Hck[Scoringpeptide[13],14]*Hck[Scoringpeptide[14],15]*Hck[Scoringpeptide[15],16] + HckGeneratedScores[x]<-HckScore + + JAK2Score<-JAK2[Scoringpeptide[1],2]*JAK2[Scoringpeptide[2],3]*JAK2[Scoringpeptide[3],4]*JAK2[Scoringpeptide[4],5]*JAK2[Scoringpeptide[5],6]*JAK2[Scoringpeptide[6],7]* + JAK2[Scoringpeptide[7],8]*JAK2[Scoringpeptide[9],10]*JAK2[Scoringpeptide[10],11]*JAK2[Scoringpeptide[11],12]*JAK2[Scoringpeptide[12],13]* + JAK2[Scoringpeptide[13],14]*JAK2[Scoringpeptide[14],15]*JAK2[Scoringpeptide[15],16] + JAK2GeneratedScores[x]<-JAK2Score + + LckScore<-Lck[Scoringpeptide[1],2]*Lck[Scoringpeptide[2],3]*Lck[Scoringpeptide[3],4]*Lck[Scoringpeptide[4],5]*Lck[Scoringpeptide[5],6]*Lck[Scoringpeptide[6],7]* + Lck[Scoringpeptide[7],8]*Lck[Scoringpeptide[9],10]*Lck[Scoringpeptide[10],11]*Lck[Scoringpeptide[11],12]*Lck[Scoringpeptide[12],13]* + Lck[Scoringpeptide[13],14]*Lck[Scoringpeptide[14],15]*Lck[Scoringpeptide[15],16] + LckGeneratedScores[x]<-LckScore + + LynScore<-Lyn[Scoringpeptide[1],2]*Lyn[Scoringpeptide[2],3]*Lyn[Scoringpeptide[3],4]*Lyn[Scoringpeptide[4],5]*Lyn[Scoringpeptide[5],6]*Lyn[Scoringpeptide[6],7]* + Lyn[Scoringpeptide[7],8]*Lyn[Scoringpeptide[9],10]*Lyn[Scoringpeptide[10],11]*Lyn[Scoringpeptide[11],12]*Lyn[Scoringpeptide[12],13]* + Lyn[Scoringpeptide[13],14]*Lyn[Scoringpeptide[14],15]*Lyn[Scoringpeptide[15],16] + LynGeneratedScores[x]<-LynScore + + Pyk2Score<-Pyk2[Scoringpeptide[1],2]*Pyk2[Scoringpeptide[2],3]*Pyk2[Scoringpeptide[3],4]*Pyk2[Scoringpeptide[4],5]*Pyk2[Scoringpeptide[5],6]*Pyk2[Scoringpeptide[6],7]* + Pyk2[Scoringpeptide[7],8]*Pyk2[Scoringpeptide[9],10]*Pyk2[Scoringpeptide[10],11]*Pyk2[Scoringpeptide[11],12]*Pyk2[Scoringpeptide[12],13]* + Pyk2[Scoringpeptide[13],14]*Pyk2[Scoringpeptide[14],15]*Pyk2[Scoringpeptide[15],16] + Pyk2GeneratedScores[x]<-Pyk2Score + + SrcScore<-Src[Scoringpeptide[1],2]*Src[Scoringpeptide[2],3]*Src[Scoringpeptide[3],4]*Src[Scoringpeptide[4],5]*Src[Scoringpeptide[5],6]*Src[Scoringpeptide[6],7]* + Src[Scoringpeptide[7],8]*Src[Scoringpeptide[9],10]*Src[Scoringpeptide[10],11]*Src[Scoringpeptide[11],12]*Src[Scoringpeptide[12],13]* + Src[Scoringpeptide[13],14]*Src[Scoringpeptide[14],15]*Src[Scoringpeptide[15],16] + SrcGeneratedScores[x]<-SrcScore + + SykScore<-Syk[Scoringpeptide[1],2]*Syk[Scoringpeptide[2],3]*Syk[Scoringpeptide[3],4]*Syk[Scoringpeptide[4],5]*Syk[Scoringpeptide[5],6]*Syk[Scoringpeptide[6],7]* + Syk[Scoringpeptide[7],8]*Syk[Scoringpeptide[9],10]*Syk[Scoringpeptide[10],11]*Syk[Scoringpeptide[11],12]*Syk[Scoringpeptide[12],13]* + Syk[Scoringpeptide[13],14]*Syk[Scoringpeptide[14],15]*Syk[Scoringpeptide[15],16] + SykGeneratedScores[x]<-SykScore + + YesScore<-Yes[Scoringpeptide[1],2]*Yes[Scoringpeptide[2],3]*Yes[Scoringpeptide[3],4]*Yes[Scoringpeptide[4],5]*Yes[Scoringpeptide[5],6]*Yes[Scoringpeptide[6],7]* + Yes[Scoringpeptide[7],8]*Yes[Scoringpeptide[9],10]*Yes[Scoringpeptide[10],11]*Yes[Scoringpeptide[11],12]*Yes[Scoringpeptide[12],13]* + Yes[Scoringpeptide[13],14]*Yes[Scoringpeptide[14],15]*Yes[Scoringpeptide[15],16] + YesGeneratedScores[x]<-YesScore + + # ThisKinTableScore<-ThisKinTable[as.numeric(Scoringpeptide[1]),3]*ThisKinTable[as.numeric(Scoringpeptide[2]),4]*ThisKinTable[as.numeric(Scoringpeptide[3]),5]* + # ThisKinTable[as.numeric(Scoringpeptide[4]),6]*ThisKinTable[as.numeric(Scoringpeptide[6]),8]* + # ThisKinTable[as.numeric(Scoringpeptide[7]),9]*ThisKinTable[as.numeric(Scoringpeptide[8]),10]*ThisKinTable[as.numeric(Scoringpeptide[9]),11] + # ThisKinGeneratedScores[x]<-ThisKinTableScore +} + + + +AblNorm<-1/as.numeric(Abl[22,1]) +AblThresh<-as.numeric(Abl[24,1]) +AblTrueThresh<-((AblThresh*AblNorm)/(100-AblThresh)) +AblActive<-unlist(AblGeneratedScores)>AblTrueThresh + +ArgNorm<-1/as.numeric(Arg[22,1]) +ArgThresh<-as.numeric(Arg[24,1]) +ArgTrueThresh<-((ArgThresh*ArgNorm)/(100-ArgThresh)) +ArgActive<-unlist(ArgGeneratedScores)>ArgTrueThresh + +BtkNorm<-1/as.numeric(Btk[22,1]) +BtkThresh<-as.numeric(Btk[24,1]) +BtkTrueThresh<-((BtkThresh*BtkNorm)/(100-BtkThresh)) +BtkActive<-unlist(BtkGeneratedScores)>BtkTrueThresh + +CskNorm<-1/as.numeric(Csk[22,1]) +CskThresh<-as.numeric(Csk[24,1]) +CskTrueThresh<-((CskThresh*CskNorm)/(100-CskThresh)) +CskActive<-(CskGeneratedScores)>CskTrueThresh + +FynNorm<-1/as.numeric(Fyn[22,1]) +FynThresh<-as.numeric(Fyn[24,1]) +FynTrueThresh<-((FynThresh*FynNorm)/(100-FynThresh)) +FynActive<-unlist(FynGeneratedScores)>FynTrueThresh + +HckNorm<-1/as.numeric(Hck[22,1]) +HckThresh<-as.numeric(Hck[24,1]) +HckTrueThresh<-((HckThresh*HckNorm)/(100-HckThresh)) +HckActive<-unlist(HckGeneratedScores)>HckTrueThresh + +JAK2Norm<-1/as.numeric(JAK2[22,1]) +JAK2Thresh<-as.numeric(JAK2[24,1]) +JAK2TrueThresh<-((JAK2Thresh*JAK2Norm)/(100-JAK2Thresh)) +JAk2Active<-unlist(JAK2GeneratedScores)>JAK2TrueThresh + +LckNorm<-1/as.numeric(Lck[22,1]) +LckThresh<-as.numeric(Lck[24,1]) +LckTrueThresh<-((LckThresh*LckNorm)/(100-LckThresh)) +LckActive<-unlist(LckGeneratedScores)>LckTrueThresh + +LynNorm<-1/as.numeric(Lyn[22,1]) +LynThresh<-as.numeric(Lyn[24,1]) +LynTrueThresh<-((LynThresh*LynNorm)/(100-LynThresh)) +LynActive<-unlist(LynGeneratedScores)>LynTrueThresh + +Pyk2Norm<-1/as.numeric(Pyk2[22,1]) +Pyk2Thresh<-as.numeric(Pyk2[24,1]) +Pyk2TrueThresh<-((Pyk2Thresh*Pyk2Norm)/(100-Pyk2Thresh)) +Pyk2Active<-unlist(Pyk2GeneratedScores)>Pyk2TrueThresh + +SrcNorm<-1/as.numeric(Src[22,1]) +SrcThresh<-as.numeric(Src[24,1]) +SrcTrueThresh<-((SrcThresh*SrcNorm)/(100-SrcThresh)) +SrcActive<-unlist(SrcGeneratedScores)>SrcTrueThresh + +SykNorm<-1/as.numeric(Syk[22,1]) +SykThresh<-as.numeric(Syk[24,1]) +SykTrueThresh<-((SykThresh*SykNorm)/(100-SykThresh)) +SykActive<-unlist(SykGeneratedScores)>SykTrueThresh + +YesNorm<-1/as.numeric(Yes[22,1]) +YesThresh<-as.numeric(Yes[24,1]) +YesTrueThresh<-((YesThresh*YesNorm)/(100-YesThresh)) +YesActive<-unlist(YesGeneratedScores)>YesTrueThresh + +AllActive<-AblActive+ArgActive+BtkActive+CskActive+FynActive+HckActive+JAk2Active+LckActive+LynActive+Pyk2Active+SrcActive+SykActive+YesActive +#Btkactive+ + +Scores<-ThisKinGeneratedScores +ThresholdValues<-ThisKinGenWeirdScore + +FullMotifs<-rep("Z",times=nrow(GeneratedPeptides)) +for (i in 1:nrow(GeneratedPeptides)) { + motif<-GeneratedPeptides[i,1:15] + motif<-paste(motif,sep = "", collapse = "") + FullMotifs[i]<-motif +} + +PeptidesWithRanks<-cbind.data.frame(FullMotifs,GeneratedPeptides,Scores,ThresholdValues) +PeptidesWithRanks<-cbind.data.frame(PeptidesWithRanks,AllActive,AblActive,ArgActive,BtkActive,CskActive,FynActive,HckActive,JAk2Active,LckActive,LynActive,Pyk2Active,SrcActive,SykActive,YesActive) +RanksPeptides<-PeptidesWithRanks[order(PeptidesWithRanks$AllActive,decreasing = FALSE),] +# PepRankHead<-c(1:9,"Sequence","RPMS","PMS") +# RanksPeptides<-rbind.data.frame(PepRankHead,PeptidesWithRanks) +head(RanksPeptides) + + +#now I have to score the negative sequences... for some reason +#write up how we transfect with lipofectamine +#3,4,5 questions + +#PAUSED EHRE AT 4:50, HOPING THAT FIXING MINERVOTHING SO THAT LEFT SPACES WORKS FIXES A THING. OTHERWISE +#I FUCKED WITH THE MCC TABLE AND NEED TO FINISH IT + +ThisKinBlanks<-rep(1,times=17) +ThisKinTable<-rbind(ThisKinTable,ThisKinBlanks) + +NegativeScores<-rep(NA,times=nrow(NegativeSubstrateList)) +NegativeWeirdScores<-rep(NA,times=nrow(NegativeSubstrateList)) +for (v in 1:nrow(NegativeSubstrateList)) { + motif<-NegativeSubstrateList[v,2] + motif<-unlist(strsplit(motif,"")) + #if (length(motif)<9){print(v)}} + # motif[1] <- sapply(motif[1], function (x) aa_props[x]) + # motif[2] <- sapply(motif[2], function (x) aa_props[x]) + # motif[3] <- sapply(motif[3], function (x) aa_props[x]) + # motif[4] <- sapply(motif[4], function (x) aa_props[x]) + # motif[5] <- sapply(motif[5], function (x) aa_props[x]) + # motif[6] <- sapply(motif[6], function (x) aa_props[x]) + # motif[7] <- sapply(motif[7], function (x) aa_props[x]) + # motif[8] <- sapply(motif[8], function (x) aa_props[x]) + # motif[9] <- sapply(motif[9], function (x) aa_props[x]) + motif<- gsub(" ","O",motif) + motif <- sapply(motif, function (x) aa_props[x]) + Scoringpeptide<-motif + Scoringpeptide<-Scoringpeptide+1 + ThisKinTableScore<-as.numeric(ThisKinTable[Scoringpeptide[1],3])*ThisKinTable[as.numeric(Scoringpeptide[2]),4]*ThisKinTable[as.numeric(Scoringpeptide[3]),5]* + ThisKinTable[as.numeric(Scoringpeptide[4]),6]*ThisKinTable[as.numeric(Scoringpeptide[5]),7]*ThisKinTable[as.numeric(Scoringpeptide[6]),8]*ThisKinTable[as.numeric(Scoringpeptide[7]),9]* + #ThisKinTable[as.numeric(Scoringpeptide[8]),10]* + ThisKinTable[as.numeric(Scoringpeptide[9]),11]*ThisKinTable[as.numeric(Scoringpeptide[10]),12]*ThisKinTable[as.numeric(Scoringpeptide[11]),13]* + ThisKinTable[as.numeric(Scoringpeptide[12]),14]*ThisKinTable[as.numeric(Scoringpeptide[13]),15]*ThisKinTable[as.numeric(Scoringpeptide[14]),16]*ThisKinTable[as.numeric(Scoringpeptide[15]),17] + NegativeScores[v]<-ThisKinTableScore + ThisKinTableScore<-(ThisKinTableScore/(ThisKinTableScore+1/as.numeric(NormalizationScore[2]))) + NegativeWeirdScores[v]<-ThisKinTableScore*100 +} + +negativesubstrates<-NegativeSubstrateList[,2] +NegativeWithScores<-cbind(negativesubstrates,as.character(NegativeScores),as.character(NegativeWeirdScores)) + + +#NEED TO HAVE THE NEGATIVE SUBSTRATES BE OUTPUTTED + +PositiveScores<-rep(NA,times=nrow(ImportedSubstrateList)) +PositiveWeirdScores<-rep(NA,times=nrow(ImportedSubstrateList)) + +for (v in 1:nrow(ImportedSubstrateList)) { + motif<-ImportedSubstrateList[v,4:18] + motif<-unlist(motif) + motif<- gsub("^$","O",motif) + motif <- sapply(motif, function (x) aa_props[x]) + Scoringpeptide<-motif + Scoringpeptide<-Scoringpeptide+1 + ThisKinTableScore<-as.numeric(ThisKinTable[Scoringpeptide[1],3])*ThisKinTable[as.numeric(Scoringpeptide[2]),4]*ThisKinTable[as.numeric(Scoringpeptide[3]),5]* + ThisKinTable[as.numeric(Scoringpeptide[4]),6]*ThisKinTable[as.numeric(Scoringpeptide[5]),7]*ThisKinTable[as.numeric(Scoringpeptide[6]),8]*ThisKinTable[as.numeric(Scoringpeptide[7]),9]* + #ThisKinTable[as.numeric(Scoringpeptide[8]),10]* + ThisKinTable[as.numeric(Scoringpeptide[9]),11]*ThisKinTable[as.numeric(Scoringpeptide[10]),12]*ThisKinTable[as.numeric(Scoringpeptide[11]),13]* + ThisKinTable[as.numeric(Scoringpeptide[12]),14]*ThisKinTable[as.numeric(Scoringpeptide[13]),15]*ThisKinTable[as.numeric(Scoringpeptide[14]),16]*ThisKinTable[as.numeric(Scoringpeptide[15]),17] + + PositiveScores[v]<-ThisKinTableScore + ThisKinTableScore<-(ThisKinTableScore/(ThisKinTableScore+1/as.numeric(NormalizationScore[2]))) + PositiveWeirdScores[v]<-ThisKinTableScore*100 +} + +positivesubstrates<-ImportedSubstrateList[,4:18] +positivewithscores<-cbind.data.frame(positivesubstrates,PositiveScores,PositiveWeirdScores) + + +#write down the transient transfection SOP and what we will be doing with them +#write down the vector names I will be using +#write down something about transforming bacteria and with what + +#90% whatevernness +# TPninetyone<-length(PositiveWeirdScores[PositiveWeirdScores>=0.91]) +# Senseninetyone<-TPninetyone/nrow(positivesubstrates) +# +# TNninetyone<-length(NegativeWeirdScores[NegativeWeirdScores<91]) +# Specninetyone<-TNninetyone/100 + +#create the MCC table + +threshold<-c(1:100) +threshold<-order(threshold,decreasing = TRUE) + +Truepositives<-c(1:100) +Falsenegatives<-c(1:100) +Sensitivity<-c(1:100) +TrueNegatives<-c(1:100) +FalsePositives<-c(1:100) +Specificity<-c(1:100) +Accuracy<-c(1:100) +MCC<-c(1:100) +EER<-c(1:100) + +#MAKE DAMN SURE THAT THE ACCESSION NUMBERS FOLLOW THE MOTIFS + +for (z in 1:100) { + thres<-101-z + Truepositives[z]<-length(PositiveWeirdScores[PositiveWeirdScores>=(thres)]) + Falsenegatives[z]<-nrow(positivesubstrates)-Truepositives[z] + Sensitivity[z]<-Truepositives[z]/(Falsenegatives[z]+Truepositives[z]) + TrueNegatives[z]<-length(NegativeWeirdScores[NegativeWeirdScores<(thres)]) +# at thresh 100 this should be 0, because it is total minus true negatives + FalsePositives[z]<-nrow(NegativeSubstrateList)-TrueNegatives[z] + Specificity[z]<-1-(TrueNegatives[z]/(FalsePositives[z]+TrueNegatives[z])) + Accuracy[z]<-100*(Truepositives[z]+TrueNegatives[z])/(Falsenegatives[z]+FalsePositives[z]+TrueNegatives[z]+Truepositives[z]) + MCC[z]<-((Truepositives[z]+TrueNegatives[z])-(Falsenegatives[z]+FalsePositives[z]))/sqrt(round(round(Truepositives[z]+Falsenegatives[z])*round(TrueNegatives[z]+FalsePositives[z])*round(Truepositives[z]+FalsePositives[z])*round(TrueNegatives[z]+Falsenegatives[z]))) + EER[z]<-.01*(((1-(Sensitivity[z]))*(Truepositives[z]+Falsenegatives[z]))+(Specificity[z]*(1-(Truepositives[z]+Falsenegatives[z])))) +} +Characterization<-cbind.data.frame(threshold,Truepositives,Falsenegatives,Sensitivity,TrueNegatives,FalsePositives,Specificity,Accuracy,MCC,EER) + +positiveheader<-c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,"RPMS","PMS") +positivewithscores<-rbind.data.frame(positiveheader,positivewithscores) + +negativeheader<-c("Substrate","RPMS","PMS") +colnames(NegativeWithScores)<-negativeheader + +# write.xlsx(NegativeWithScores,file = FILENAME, sheetName = "Negative Sequences Scored",col.names = TRUE,row.names = FALSE,append = TRUE) +# write.xlsx(Characterization,file = FILENAME,sheetName = "Characterization Table",col.names = TRUE,row.names = FALSE,append = TRUE) +# write.xlsx(RanksPeptides,file = FILENAME,sheetName = "Ranked Generated Peptides",col.names = FALSE,row.names = FALSE,append = TRUE) +# write.xlsx(positivewithscores,file = FILENAME, sheetName = "Positive Sequences Scored",col.names = FALSE,row.names = FALSE,append = TRUE) +write.table(x=c("Characterzation Table"),file = FILENAME2, col.names = FALSE,row.names = FALSE, append = TRUE,sep = ",") +write.table(Characterization,file = FILENAME2, col.names = TRUE,row.names = FALSE, append = TRUE,sep = ",") + + +write.table(RanksPeptides,file = FILENAME3,append = TRUE,row.names = FALSE,col.names = TRUE,sep = ",") + +