diff kinatestid_r/Kinatest-R.R @ 17:26ef4add9f7b draft

Uploaded
author jfb
date Wed, 28 Feb 2018 14:09:19 -0500
parents 15b5d4ae4480
children e16ca3e9fc49
line wrap: on
line diff
--- a/kinatestid_r/Kinatest-R.R	Thu Feb 08 15:49:05 2018 -0500
+++ b/kinatestid_r/Kinatest-R.R	Wed Feb 28 14:09:19 2018 -0500
@@ -1,7 +1,6 @@
-
 ImportedSubstrateList<- read.csv("input1", stringsAsFactors=FALSE)
 NegativeSubstrateList<- read.csv("input2", stringsAsFactors=FALSE)
-SubstrateBackgroundFrequency<- read.csv("input3", stringsAsFactors=FALSE)
+SubstrateBackgroundFrequency<- read.csv("input3", stringsAsFactors=FALSE, header = FALSE)
 
 ScreenerFilename<-"screener"
 
@@ -13,21 +12,14 @@
 
 
 
-
-
-
-
-
-
-
+SubstrateBackgroundFrequency<-t(SubstrateBackgroundFrequency)
+# number<-nrow(SubstrateBackgroundFrequency)-1
+SubstrateBackgroundFrequency<-SubstrateBackgroundFrequency[2:nrow(SubstrateBackgroundFrequency),]
+Sub<-na.omit(SubstrateBackgroundFrequency)
+SubstrateBackgroundFrequency<-Sub
 
-
-
-
-
-
-
-
+args = commandArgs(trailingOnly=TRUE)
+TodaysKinase<-args[1]
 
 
 
@@ -594,10 +586,10 @@
 PercentTable<-rbind(HeaderSD,PercentTable)
 row.names(PercentTable)<-NULL
 PercentTable<-data.frame(SetOfAAs,PercentTable)
-numberofY<-as.numeric(SubstrateBackgroundFrequency$Number.of.Y)
+numberofY<-as.numeric(SubstrateBackgroundFrequency[,34])
 numberofY<-numberofY[!is.na(numberofY)]
 
-numberofPY<-as.numeric(SubstrateBackgroundFrequency$Number.of.pY)
+numberofPY<-as.numeric(SubstrateBackgroundFrequency[,35])
 numberofPY<-numberofPY[!is.na(numberofPY)]
 
 NormalizationScore<-sum(numberofPY)/sum(numberofY)
@@ -1161,38 +1153,110 @@
 bareSDs<-SDtable[2:21,2:16]
 goodones<-bareSDs>2
 
+# Positionm7<-which(goodones[,1] %in% TRUE)
+# if (length(Positionm7)<1){Positionm7<-which(bareSDs[,1]==max(bareSDs[,1]))}
+# Positionm6<-which(goodones[,2] %in% TRUE)
+# if (length(Positionm6)<1){Positionm6<-which(bareSDs[,2]==max(bareSDs[,2]))}
+# Positionm5<-which(goodones[,3] %in% TRUE)
+# if (length(Positionm5)<1){Positionm5<-which(bareSDs[,3]==max(bareSDs[,3]))}
+# Positionm4<-which(goodones[,4] %in% TRUE)
+# if (length(Positionm4)<2){Positionm4<-bareSDs[,4][order(bareSDs[,4])[1:2]]}
+# Positionm3<-which(goodones[,5] %in% TRUE)
+# if (length(Positionm3)<2){Positionm3<-bareSDs[,5][order(bareSDs[,5])[1:2]]}
+# Positionm2<-which(goodones[,6] %in% TRUE)
+# if (length(Positionm2)<2){Positionm2<-bareSDs[,6][order(bareSDs[,6])[1:2]]}
+# Positionm1<-which(goodones[,7] %in% TRUE)
+# if (length(Positionm1)<2){Positionm1<-bareSDs[,7][order(bareSDs[,7])[1:2]]}
+# 
+# Positiond0<-which(goodones[,8] %in% TRUE)
+# if (length(Positiond0)<1){Positiond0<-which(bareSDs[,8]==max(bareSDs[,8]))}
+# 
+# Positionp1<-which(goodones[,9] %in% TRUE)
+# if (length(Positionp1)<2){Positionp1<-bareSDs[,9][order(bareSDs[,9])[1:2]]}
+# Positionp2<-which(goodones[,10] %in% TRUE)
+# if (length(Positionp2)<2){Positionp2<-bareSDs[,10][order(bareSDs[,10])[1:2]]}
+# Positionp3<-which(goodones[,11] %in% TRUE)
+# if (length(Positionp3)<2){Positionp3<-bareSDs[,11][order(bareSDs[,11])[1:2]]}
+# Positionp4<-which(goodones[,12] %in% TRUE)
+# if (length(Positionp4)<2){Positionp4<-bareSDs[,12][order(bareSDs[,12])[1:2]]}
+# Positionp5<-which(goodones[,13] %in% TRUE)
+# if (length(Positionp5)<1){Positionp5<-which(bareSDs[,13]==max(bareSDs[,13]))}
+# Positionp6<-which(goodones[,14] %in% TRUE)
+# if (length(Positionp6)<1){Positionp6<-which(bareSDs[,14]==max(bareSDs[,14]))}
+# Positionp7<-which(goodones[,15] %in% TRUE)
+# if (length(Positionp7)<1){Positionp7<-which(bareSDs[,15]==max(bareSDs[,15]))}
+
+
+
+
+# Positionm7<-which(goodones[,1] %in% TRUE)
+# if (length(Positionm7)<1){Positionm7<-which(bareSDs[,1]==max(bareSDs[,1]))}
+# Positionm6<-which(goodones[,2] %in% TRUE)
+# if (length(Positionm6)<1){Positionm6<-which(bareSDs[,2]==max(bareSDs[,2]))}
+# Positionm5<-which(goodones[,3] %in% TRUE)
+# if (length(Positionm5)<1){Positionm5<-which(bareSDs[,3]==max(bareSDs[,3]))}
+# Positionm4<-which(goodones[,4] %in% TRUE)
+# if (length(Positionm4)<1){Positionm4<-which(bareSDs[,4]==max(bareSDs[,4]))}
+# Positionm3<-which(goodones[,5] %in% TRUE)
+# if (length(Positionm3)<1){Positionm3<-which(bareSDs[,5]==max(bareSDs[,5]))}
+# Positionm2<-which(goodones[,6] %in% TRUE)
+# if (length(Positionm2)<1){Positionm2<-which(bareSDs[,6]==max(bareSDs[,6]))}
+# Positionm1<-which(goodones[,7] %in% TRUE)
+# if (length(Positionm1)<1){Positionm1<-which(bareSDs[,7]==max(bareSDs[,7]))}
+# 
+# Positiond0<-which(goodones[,8] %in% TRUE)
+# if (length(Positiond0)<1){Positiond0<-which(bareSDs[,8]==max(bareSDs[,8]))}
+# 
+# Positionp1<-which(goodones[,9] %in% TRUE)
+# if (length(Positionp1)<1){Positionp1<-which(bareSDs[,9]==max(bareSDs[,9]))}
+# Positionp2<-which(goodones[,10] %in% TRUE)
+# if (length(Positionp2)<1){Positionp2<-which(bareSDs[,10]==max(bareSDs[,10]))}
+# Positionp3<-which(goodones[,11] %in% TRUE)
+# if (length(Positionp3)<1){Positionp3<-which(bareSDs[,11]==max(bareSDs[,11]))}
+# Positionp4<-which(goodones[,12] %in% TRUE)
+# if (length(Positionp4)<1){Positionp4<-which(bareSDs[,12]==max(bareSDs[,12]))}
+# Positionp5<-which(goodones[,13] %in% TRUE)
+# if (length(Positionp5)<1){Positionp5<-which(bareSDs[,13]==max(bareSDs[,13]))}
+# Positionp6<-which(goodones[,14] %in% TRUE)
+# if (length(Positionp6)<1){Positionp6<-which(bareSDs[,14]==max(bareSDs[,14]))}
+# Positionp7<-which(goodones[,15] %in% TRUE)
+# if (length(Positionp7)<1){Positionp7<-which(bareSDs[,15]==max(bareSDs[,15]))}
+
+match(c(bareSDs[,2][order(bareSDs[,2])[1:2]]),bareSDs[,2])
+
 Positionm7<-which(goodones[,1] %in% TRUE)
-if (length(Positionm7)<1){Positionm7<-which(bareSDs[,1]==max(bareSDs[,1]))}
+if (length(Positionm7)<3){Positionm7<-match(c(bareSDs[,1][order(bareSDs[,1])[19:20]]),bareSDs[,1])}
 Positionm6<-which(goodones[,2] %in% TRUE)
-if (length(Positionm6)<1){Positionm6<-which(bareSDs[,2]==max(bareSDs[,2]))}
+if (length(Positionm6)<3){Positionm6<-match(c(bareSDs[,2][order(bareSDs[,2])[19:20]]),bareSDs[,2])}
 Positionm5<-which(goodones[,3] %in% TRUE)
-if (length(Positionm5)<1){Positionm5<-which(bareSDs[,3]==max(bareSDs[,3]))}
+if (length(Positionm5)<3){Positionm5<-match(c(bareSDs[,3][order(bareSDs[,3])[19:20]]),bareSDs[,3])}
 Positionm4<-which(goodones[,4] %in% TRUE)
-if (length(Positionm4)<1){Positionm4<-which(bareSDs[,4]==max(bareSDs[,4]))}
+if (length(Positionm4)<3){Positionm4<-match(c(bareSDs[,4][order(bareSDs[,4])[19:20]]),bareSDs[,4])}
 Positionm3<-which(goodones[,5] %in% TRUE)
-if (length(Positionm3)<1){Positionm3<-which(bareSDs[,5]==max(bareSDs[,5]))}
+if (length(Positionm3)<3){Positionm3<-match(c(bareSDs[,5][order(bareSDs[,5])[19:20]]),bareSDs[,5])}
 Positionm2<-which(goodones[,6] %in% TRUE)
-if (length(Positionm2)<1){Positionm2<-which(bareSDs[,6]==max(bareSDs[,6]))}
+if (length(Positionm2)<3){Positionm2<-match(c(bareSDs[,6][order(bareSDs[,6])[19:20]]),bareSDs[,6])}
 Positionm1<-which(goodones[,7] %in% TRUE)
-if (length(Positionm1)<1){Positionm1<-which(bareSDs[,7]==max(bareSDs[,7]))}
+if (length(Positionm1)<3){Positionm1<-match(c(bareSDs[,7][order(bareSDs[,7])[19:20]]),bareSDs[,7])}
 
 Positiond0<-which(goodones[,8] %in% TRUE)
-if (length(Positiond0)<1){Positiond0<-which(bareSDs[,8]==max(bareSDs[,8]))}
+#if (length(Positiond0)<3){Positiond0<-bareSDs[,8][order(bareSDs[,8])[1:2]]}
 
 Positionp1<-which(goodones[,9] %in% TRUE)
-if (length(Positionp1)<1){Positionp1<-which(bareSDs[,9]==max(bareSDs[,9]))}
+if (length(Positionp1)<3){Positionp1<-match(c(bareSDs[,9][order(bareSDs[,9])[19:20]]),bareSDs[,9])}
 Positionp2<-which(goodones[,10] %in% TRUE)
-if (length(Positionp2)<1){Positionp2<-which(bareSDs[,10]==max(bareSDs[,10]))}
+if (length(Positionp2)<3){Positionp2<-match(c(bareSDs[,10][order(bareSDs[,10])[19:20]]),bareSDs[,10])}
 Positionp3<-which(goodones[,11] %in% TRUE)
-if (length(Positionp3)<1){Positionp3<-which(bareSDs[,11]==max(bareSDs[,11]))}
+if (length(Positionp3)<3){Positionp3<-match(c(bareSDs[,11][order(bareSDs[,11])[19:20]]),bareSDs[,11])}
 Positionp4<-which(goodones[,12] %in% TRUE)
-if (length(Positionp4)<1){Positionp4<-which(bareSDs[,12]==max(bareSDs[,12]))}
+if (length(Positionp4)<3){Positionp4<-match(c(bareSDs[,12][order(bareSDs[,12])[19:20]]),bareSDs[,12])}
 Positionp5<-which(goodones[,13] %in% TRUE)
-if (length(Positionp5)<1){Positionp5<-which(bareSDs[,13]==max(bareSDs[,13]))}
+if (length(Positionp5)<3){Positionp5<-match(c(bareSDs[,13][order(bareSDs[,13])[19:20]]),bareSDs[,13])}
 Positionp6<-which(goodones[,14] %in% TRUE)
-if (length(Positionp6)<1){Positionp6<-which(bareSDs[,14]==max(bareSDs[,14]))}
+if (length(Positionp6)<3){Positionp6<-match(c(bareSDs[,14][order(bareSDs[,14])[19:20]]),bareSDs[,14])}
 Positionp7<-which(goodones[,15] %in% TRUE)
-if (length(Positionp7)<1){Positionp7<-which(bareSDs[,15]==max(bareSDs[,15]))}
+if (length(Positionp7)<3){Positionp7<-match(c(bareSDs[,15][order(bareSDs[,15])[19:20]]),bareSDs[,15])}
+
 
 aa_props2 <- c("1"="A", "2"="C", "3"="D", "4"="E", "5"="F", "6"="G", "7"="H", "8"="I", "9"="K", "10"="L", "11"="M", "12"="N",
                "13"="P", "14"="Q", "15"="R", "16"="S", "17"="T", "18"="V", "19"="W", "20"="Y")
@@ -1714,66 +1778,79 @@
 AblThresh<-as.numeric(Abl[24,1])
 AblTrueThresh<-((AblThresh*AblNorm)/(100-AblThresh))
 AblActive<-unlist(AblGeneratedScores)>AblTrueThresh
+if (TodaysKinase=="ABL"){AblActive<-rep(0,times=nrow(GeneratedPeptides))}
 
 ArgNorm<-1/as.numeric(Arg[22,1])
 ArgThresh<-as.numeric(Arg[24,1])
 ArgTrueThresh<-((ArgThresh*ArgNorm)/(100-ArgThresh))
 ArgActive<-unlist(ArgGeneratedScores)>ArgTrueThresh
+if (TodaysKinase=="ARG"){ArgActive<-rep(0,times=nrow(GeneratedPeptides))}
 
 BtkNorm<-1/as.numeric(Btk[22,1])
 BtkThresh<-as.numeric(Btk[24,1])
 BtkTrueThresh<-((BtkThresh*BtkNorm)/(100-BtkThresh))
 BtkActive<-unlist(BtkGeneratedScores)>BtkTrueThresh
+if (TodaysKinase=="BTK"){BtkActive<-rep(0,times=nrow(GeneratedPeptides))}
 
 CskNorm<-1/as.numeric(Csk[22,1])
 CskThresh<-as.numeric(Csk[24,1])
 CskTrueThresh<-((CskThresh*CskNorm)/(100-CskThresh))
 CskActive<-(CskGeneratedScores)>CskTrueThresh
+if (TodaysKinase=="CSK"){CskActive<-rep(0,times=nrow(GeneratedPeptides))}
 
 FynNorm<-1/as.numeric(Fyn[22,1])
 FynThresh<-as.numeric(Fyn[24,1])
 FynTrueThresh<-((FynThresh*FynNorm)/(100-FynThresh))
 FynActive<-unlist(FynGeneratedScores)>FynTrueThresh
+if (TodaysKinase=="FYN"){FynActive<-rep(0,times=nrow(GeneratedPeptides))}
 
 HckNorm<-1/as.numeric(Hck[22,1])
 HckThresh<-as.numeric(Hck[24,1])
 HckTrueThresh<-((HckThresh*HckNorm)/(100-HckThresh))
 HckActive<-unlist(HckGeneratedScores)>HckTrueThresh
+if (TodaysKinase=="HCK"){HckActive<-rep(0,times=nrow(GeneratedPeptides))}
 
 JAK2Norm<-1/as.numeric(JAK2[22,1])
 JAK2Thresh<-as.numeric(JAK2[24,1])
 JAK2TrueThresh<-((JAK2Thresh*JAK2Norm)/(100-JAK2Thresh))
 JAk2Active<-unlist(JAK2GeneratedScores)>JAK2TrueThresh
+if (TodaysKinase=="JAK2"){JAk2Active<-rep(0,times=nrow(GeneratedPeptides))}
 
 LckNorm<-1/as.numeric(Lck[22,1])
 LckThresh<-as.numeric(Lck[24,1])
 LckTrueThresh<-((LckThresh*LckNorm)/(100-LckThresh))
 LckActive<-unlist(LckGeneratedScores)>LckTrueThresh
+if (TodaysKinase=="LCK"){LckActive<-rep(0,times=nrow(GeneratedPeptides))}
 
 LynNorm<-1/as.numeric(Lyn[22,1])
 LynThresh<-as.numeric(Lyn[24,1])
 LynTrueThresh<-((LynThresh*LynNorm)/(100-LynThresh))
 LynActive<-unlist(LynGeneratedScores)>LynTrueThresh
+if (TodaysKinase=="LYN"){LynActive<-rep(0,times=nrow(GeneratedPeptides))}
 
 Pyk2Norm<-1/as.numeric(Pyk2[22,1])
 Pyk2Thresh<-as.numeric(Pyk2[24,1])
 Pyk2TrueThresh<-((Pyk2Thresh*Pyk2Norm)/(100-Pyk2Thresh))
 Pyk2Active<-unlist(Pyk2GeneratedScores)>Pyk2TrueThresh
+if (TodaysKinase=="PYK2"){Pyk2Active<-rep(0,times=nrow(GeneratedPeptides))}
 
 SrcNorm<-1/as.numeric(Src[22,1])
 SrcThresh<-as.numeric(Src[24,1])
 SrcTrueThresh<-((SrcThresh*SrcNorm)/(100-SrcThresh))
 SrcActive<-unlist(SrcGeneratedScores)>SrcTrueThresh
+if (TodaysKinase=="SRC"){SrcActive<-rep(0,times=nrow(GeneratedPeptides))}
 
 SykNorm<-1/as.numeric(Syk[22,1])
 SykThresh<-as.numeric(Syk[24,1])
 SykTrueThresh<-((SykThresh*SykNorm)/(100-SykThresh))
 SykActive<-unlist(SykGeneratedScores)>SykTrueThresh
+if (TodaysKinase=="SYK"){SykActive<-rep(0,times=nrow(GeneratedPeptides))}
 
 YesNorm<-1/as.numeric(Yes[22,1])
 YesThresh<-as.numeric(Yes[24,1])
 YesTrueThresh<-((YesThresh*YesNorm)/(100-YesThresh))
 YesActive<-unlist(YesGeneratedScores)>YesTrueThresh
+if (TodaysKinase=="YES"){YesActive<-rep(0,times=nrow(GeneratedPeptides))}
 
 AllActive<-AblActive+ArgActive+BtkActive+CskActive+FynActive+HckActive+JAk2Active+LckActive+LynActive+Pyk2Active+SrcActive+SykActive+YesActive
 #Btkactive+
@@ -1884,23 +1961,24 @@
 
 #create the MCC table
 
-threshold<-c(1:100)
-threshold<-order(threshold,decreasing = TRUE)
+threshold<-c(1:100,(1:9)/10,(1:9)/100,0,-.1)
+threshold<-threshold[order(threshold,decreasing = TRUE)]
+threshold
 
-Truepositives<-c(1:100)
-Falsenegatives<-c(1:100)
-Sensitivity<-c(1:100)
-TrueNegatives<-c(1:100)
-FalsePositives<-c(1:100)
-Specificity<-c(1:100)
-Accuracy<-c(1:100)
-MCC<-c(1:100)
-EER<-c(1:100)
+Truepositives<-c(1:120)
+Falsenegatives<-c(1:120)
+Sensitivity<-c(1:120)
+TrueNegatives<-c(1:120)
+FalsePositives<-c(1:120)
+Specificity<-c(1:120)
+Accuracy<-c(1:120)
+MCC<-c(1:120)
+EER<-c(1:120)
 
 #MAKE DAMN SURE THAT THE ACCESSION NUMBERS FOLLOW THE MOTIFS
 
-for (z in 1:100) {
-  thres<-101-z
+for (z in 1:120) {
+  thres<-threshold[z]
   Truepositives[z]<-length(PositiveWeirdScores[PositiveWeirdScores>=(thres)])
   Falsenegatives[z]<-nrow(positivesubstrates)-Truepositives[z]
   Sensitivity[z]<-Truepositives[z]/(Falsenegatives[z]+Truepositives[z])
@@ -1909,10 +1987,10 @@
   FalsePositives[z]<-nrow(NegativeSubstrateList)-TrueNegatives[z]
   Specificity[z]<-1-(TrueNegatives[z]/(FalsePositives[z]+TrueNegatives[z]))
   Accuracy[z]<-100*(Truepositives[z]+TrueNegatives[z])/(Falsenegatives[z]+FalsePositives[z]+TrueNegatives[z]+Truepositives[z])
-  MCC[z]<-((Truepositives[z]+TrueNegatives[z])-(Falsenegatives[z]+FalsePositives[z]))/sqrt(round(round(Truepositives[z]+Falsenegatives[z])*round(TrueNegatives[z]+FalsePositives[z])*round(Truepositives[z]+FalsePositives[z])*round(TrueNegatives[z]+Falsenegatives[z])))
+  MCC[z]<-((Truepositives[z]*TrueNegatives[z])-(Falsenegatives[z]*FalsePositives[z]))/sqrt(round(round(Truepositives[z]+Falsenegatives[z])*round(TrueNegatives[z]+FalsePositives[z])*round(Truepositives[z]+FalsePositives[z])*round(TrueNegatives[z]+Falsenegatives[z])))
   EER[z]<-.01*(((1-(Sensitivity[z]))*(Truepositives[z]+Falsenegatives[z]))+(Specificity[z]*(1-(Truepositives[z]+Falsenegatives[z]))))
 }
-Characterization<-cbind.data.frame(threshold,Truepositives,Falsenegatives,Sensitivity,TrueNegatives,FalsePositives,Specificity,Accuracy,MCC,EER)
+Characterization<-cbind.data.frame(threshold,Truepositives,Falsenegatives,Sensitivity,TrueNegatives,FalsePositives,Specificity,MCC,EER)
 
 positiveheader<-c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,"RPMS","PMS")
 positivewithscores<-rbind.data.frame(positiveheader,positivewithscores)
@@ -1931,4 +2009,5 @@
 
 # header<-colnames(RanksPeptides)
 # RanksPeptides<-rbind.data.frame(header,RanksPeptides)
+write.table(x="Off Target Kinase activity (your kinase of interest should have zeros here because it is ON-target)",file = FILENAME3,append = FALSE,row.names = FALSE,col.names = TRUE,sep = ",")
 write.table(RanksPeptides,file = FILENAME3,append = FALSE,row.names = FALSE,col.names = TRUE,sep = ",")