Repository 'kinatest_r_7_7testing'
hg clone https://toolshed.g2.bx.psu.edu/repos/jfb/kinatest_r_7_7testing

Changeset 14:da1012f014bd (2018-02-08)
Previous changeset 13:d71eb1d66a88 (2018-02-08) Next changeset 15:36ef4ff2d130 (2018-02-08)
Commit message:
Uploaded
added:
kinatestid_r/Kinatest-R.R
removed:
kinatestid_r/Kinatest-R_part1.R
kinatestid_r/Kinatest-R_part2.R
b
diff -r d71eb1d66a88 -r da1012f014bd kinatestid_r/Kinatest-R.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/kinatestid_r/Kinatest-R.R Thu Feb 08 15:29:56 2018 -0500
[
b'@@ -0,0 +1,1934 @@\n+\r\n+ImportedSubstrateList<- read.csv("input1", stringsAsFactors=FALSE)\r\n+NegativeSubstrateList<- read.csv("input2", stringsAsFactors=FALSE)\r\n+SubstrateBackgroundFrequency<- read.csv("input3", stringsAsFactors=FALSE)\r\n+\r\n+ScreenerFilename<-"screener"\r\n+\r\n+\r\n+\r\n+FILENAME<-"output1.csv"\r\n+FILENAME2<-"output2.csv"\r\n+FILENAME3<-"output3.csv"\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+OutputMatrix<-"KinaseMatrix.csv"\r\n+CharacterizationTable<-"CharacterizationTableForThisKinase.csv"\r\n+SDtable<-"SDtableforthisKinase"\r\n+SiteSelectivityTable<-"SiteSelectivityForThisKinase"\r\n+\r\n+\r\n+\r\n+substrates<-matrix(rep("A",times=((nrow(ImportedSubstrateList)-1)*15)),ncol = 15)\r\n+#SeqsToBeScored<-"asdasd"\r\n+  \r\n+for (i in 2:nrow(ImportedSubstrateList))\r\n+{\r\n+  substratemotif<-ImportedSubstrateList[i,4:18]\r\n+  substratemotif[8]<-"Y"\r\n+  #substratemotif<-paste(substratemotif,sep = "",collapse = "")\r\n+  j=i-1\r\n+  substratemotif<-unlist(substratemotif)\r\n+  substrates[j,1:15]<-substratemotif\r\n+}\r\n+\r\n+# SpacesToOs<-c(""="O",)\r\n+# substrates<-SpacesToOs[substrates]\r\n+\r\n+SubstrateBackgroundFrequency[nrow(SubstrateBackgroundFrequency),2]\r\n+\r\n+if(2==2){\r\n+Amean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),2]), na.rm=TRUE)\r\n+Cmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),3]), na.rm=TRUE)\r\n+Dmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),4]), na.rm=TRUE)\r\n+Emean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),5]), na.rm=TRUE)\r\n+Fmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),6]), na.rm=TRUE)\r\n+Gmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),7]), na.rm=TRUE)\r\n+Hmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),8]), na.rm=TRUE)\r\n+Imean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),9]), na.rm=TRUE)\r\n+Kmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),10]), na.rm=TRUE)\r\n+Lmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),11]), na.rm=TRUE)\r\n+Mmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),12]), na.rm=TRUE)\r\n+Nmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),13]), na.rm=TRUE)\r\n+Pmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),14]), na.rm=TRUE)\r\n+Qmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),15]), na.rm=TRUE)\r\n+Rmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),16]), na.rm=TRUE)\r\n+Smean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),17]), na.rm=TRUE)\r\n+Tmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),18]), na.rm=TRUE)\r\n+Vmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),19]), na.rm=TRUE)\r\n+Wmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),20]), na.rm=TRUE)\r\n+Ymean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),21]), na.rm=TRUE)\r\n+\r\n+AllMeans<-c(Amean,Cmean,Dmean,Emean,Fmean,Gmean,Hmean,Imean,Kmean,Lmean,Mmean,Nmean,Pmean,Qmean,Rmean,Smean,Tmean,Vmean,Wmean,Ymean)\r\n+\r\n+Asd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),2]), na.rm=TRUE)\r\n+Csd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),3]), na.rm=TRUE)\r\n+Dsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),4]), na.rm=TRUE)\r\n+Esd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),5]), na.rm=TRUE)\r\n+Fsd<-sd(as.numeric(SubstrateBack'..b'eric(Scoringpeptide[11]),13]*\r\n+    ThisKinTable[as.numeric(Scoringpeptide[12]),14]*ThisKinTable[as.numeric(Scoringpeptide[13]),15]*ThisKinTable[as.numeric(Scoringpeptide[14]),16]*ThisKinTable[as.numeric(Scoringpeptide[15]),17]\r\n+  \r\n+  PositiveScores[v]<-ThisKinTableScore\r\n+  ThisKinTableScore<-(ThisKinTableScore/(ThisKinTableScore+1/as.numeric(NormalizationScore[2])))\r\n+  PositiveWeirdScores[v]<-ThisKinTableScore*100\r\n+}\r\n+\r\n+positivesubstrates<-ImportedSubstrateList[,4:18]\r\n+positivewithscores<-cbind.data.frame(positivesubstrates,PositiveScores,PositiveWeirdScores)\r\n+\r\n+\r\n+#write down the transient transfection SOP and what we will be doing with them\r\n+#write down the vector names I will be using\r\n+#write down something about transforming bacteria and with what\r\n+\r\n+#90% whatevernness\r\n+# TPninetyone<-length(PositiveWeirdScores[PositiveWeirdScores>=0.91])\r\n+# Senseninetyone<-TPninetyone/nrow(positivesubstrates)\r\n+# \r\n+# TNninetyone<-length(NegativeWeirdScores[NegativeWeirdScores<91])\r\n+# Specninetyone<-TNninetyone/100\r\n+\r\n+#create the MCC table\r\n+\r\n+threshold<-c(1:100)\r\n+threshold<-order(threshold,decreasing = TRUE)\r\n+\r\n+Truepositives<-c(1:100)\r\n+Falsenegatives<-c(1:100)\r\n+Sensitivity<-c(1:100)\r\n+TrueNegatives<-c(1:100)\r\n+FalsePositives<-c(1:100)\r\n+Specificity<-c(1:100)\r\n+Accuracy<-c(1:100)\r\n+MCC<-c(1:100)\r\n+EER<-c(1:100)\r\n+\r\n+#MAKE DAMN SURE THAT THE ACCESSION NUMBERS FOLLOW THE MOTIFS\r\n+\r\n+for (z in 1:100) {\r\n+  thres<-101-z\r\n+  Truepositives[z]<-length(PositiveWeirdScores[PositiveWeirdScores>=(thres)])\r\n+  Falsenegatives[z]<-nrow(positivesubstrates)-Truepositives[z]\r\n+  Sensitivity[z]<-Truepositives[z]/(Falsenegatives[z]+Truepositives[z])\r\n+  TrueNegatives[z]<-length(NegativeWeirdScores[NegativeWeirdScores<(thres)])\r\n+  # at thresh 100 this should be 0, because it is total minus true negatives\r\n+  FalsePositives[z]<-nrow(NegativeSubstrateList)-TrueNegatives[z]\r\n+  Specificity[z]<-1-(TrueNegatives[z]/(FalsePositives[z]+TrueNegatives[z]))\r\n+  Accuracy[z]<-100*(Truepositives[z]+TrueNegatives[z])/(Falsenegatives[z]+FalsePositives[z]+TrueNegatives[z]+Truepositives[z])\r\n+  MCC[z]<-((Truepositives[z]+TrueNegatives[z])-(Falsenegatives[z]+FalsePositives[z]))/sqrt(round(round(Truepositives[z]+Falsenegatives[z])*round(TrueNegatives[z]+FalsePositives[z])*round(Truepositives[z]+FalsePositives[z])*round(TrueNegatives[z]+Falsenegatives[z])))\r\n+  EER[z]<-.01*(((1-(Sensitivity[z]))*(Truepositives[z]+Falsenegatives[z]))+(Specificity[z]*(1-(Truepositives[z]+Falsenegatives[z]))))\r\n+}\r\n+Characterization<-cbind.data.frame(threshold,Truepositives,Falsenegatives,Sensitivity,TrueNegatives,FalsePositives,Specificity,Accuracy,MCC,EER)\r\n+\r\n+positiveheader<-c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,"RPMS","PMS")\r\n+positivewithscores<-rbind.data.frame(positiveheader,positivewithscores)\r\n+\r\n+negativeheader<-c("Substrate","RPMS","PMS")\r\n+colnames(NegativeWithScores)<-negativeheader\r\n+\r\n+# write.xlsx(NegativeWithScores,file = FILENAME, sheetName = "Negative Sequences Scored",col.names = TRUE,row.names = FALSE,append = TRUE)\r\n+# write.xlsx(Characterization,file = FILENAME,sheetName = "Characterization Table",col.names = TRUE,row.names = FALSE,append = TRUE)\r\n+# write.xlsx(RanksPeptides,file = FILENAME,sheetName = "Ranked Generated Peptides",col.names = FALSE,row.names = FALSE,append = TRUE)\r\n+# write.xlsx(positivewithscores,file = FILENAME, sheetName = "Positive Sequences Scored",col.names = FALSE,row.names = FALSE,append = TRUE)\r\n+write.table(x=c("Characterzation Table"),file = FILENAME2, col.names = FALSE,row.names = FALSE, append = TRUE,sep = ",")\r\n+header<-colnames(Characterization)\r\n+Characterization<-rbind.data.frame(header,Characterization)\r\n+write.table(Characterization,file = FILENAME2, col.names = FALSE,row.names = FALSE, append = TRUE,sep = ",")\r\n+\r\n+# header<-colnames(RanksPeptides)\r\n+# RanksPeptides<-rbind.data.frame(header,RanksPeptides)\r\n+write.table(RanksPeptides,file = FILENAME3,append = FALSE,row.names = FALSE,col.names = TRUE,sep = ",")\r\n'
b
diff -r d71eb1d66a88 -r da1012f014bd kinatestid_r/Kinatest-R_part1.R
--- a/kinatestid_r/Kinatest-R_part1.R Thu Feb 08 15:10:42 2018 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,1114 +0,0 @@\n-\r\n-ImportedSubstrateList<- read.csv("input1", stringsAsFactors=FALSE)\r\n-NegativeSubstrateList<- read.csv("input2", stringsAsFactors=FALSE)\r\n-SubstrateBackgroundFrequency<- read.csv("input3", stringsAsFactors=FALSE)\r\n-\r\n-ScreenerFilename<-"screener"\r\n-\r\n-\r\n-\r\n-FILENAME<-"output1"\r\n-FILENAME2<-"output2"\r\n-FILENAME3<-"output3"\r\n-\r\n-\r\n-\r\n-\r\n-\r\n-\r\n-\r\n-\r\n-\r\n-\r\n-\r\n-\r\n-\r\n-\r\n-\r\n-\r\n-\r\n-\r\n-\r\n-\r\n-\r\n-\r\n-\r\n-\r\n-OutputMatrix<-"KinaseMatrix.csv"\r\n-CharacterizationTable<-"CharacterizationTableForThisKinase.csv"\r\n-SDtable<-"SDtableforthisKinase"\r\n-SiteSelectivityTable<-"SiteSelectivityForThisKinase"\r\n-\r\n-\r\n-\r\n-substrates<-matrix(rep("A",times=((nrow(ImportedSubstrateList)-1)*15)),ncol = 15)\r\n-#SeqsToBeScored<-"asdasd"\r\n-  \r\n-for (i in 2:nrow(ImportedSubstrateList))\r\n-{\r\n-  substratemotif<-ImportedSubstrateList[i,4:18]\r\n-  substratemotif[8]<-"Y"\r\n-  #substratemotif<-paste(substratemotif,sep = "",collapse = "")\r\n-  j=i-1\r\n-  substratemotif<-unlist(substratemotif)\r\n-  substrates[j,1:15]<-substratemotif\r\n-}\r\n-\r\n-# SpacesToOs<-c(""="O",)\r\n-# substrates<-SpacesToOs[substrates]\r\n-\r\n-SubstrateBackgroundFrequency[nrow(SubstrateBackgroundFrequency),2]\r\n-\r\n-if(2==2){\r\n-Amean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),2]), na.rm=TRUE)\r\n-Cmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),3]), na.rm=TRUE)\r\n-Dmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),4]), na.rm=TRUE)\r\n-Emean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),5]), na.rm=TRUE)\r\n-Fmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),6]), na.rm=TRUE)\r\n-Gmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),7]), na.rm=TRUE)\r\n-Hmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),8]), na.rm=TRUE)\r\n-Imean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),9]), na.rm=TRUE)\r\n-Kmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),10]), na.rm=TRUE)\r\n-Lmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),11]), na.rm=TRUE)\r\n-Mmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),12]), na.rm=TRUE)\r\n-Nmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),13]), na.rm=TRUE)\r\n-Pmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),14]), na.rm=TRUE)\r\n-Qmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),15]), na.rm=TRUE)\r\n-Rmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),16]), na.rm=TRUE)\r\n-Smean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),17]), na.rm=TRUE)\r\n-Tmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),18]), na.rm=TRUE)\r\n-Vmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),19]), na.rm=TRUE)\r\n-Wmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),20]), na.rm=TRUE)\r\n-Ymean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),21]), na.rm=TRUE)\r\n-\r\n-AllMeans<-c(Amean,Cmean,Dmean,Emean,Fmean,Gmean,Hmean,Imean,Kmean,Lmean,Mmean,Nmean,Pmean,Qmean,Rmean,Smean,Tmean,Vmean,Wmean,Ymean)\r\n-\r\n-Asd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),2]), na.rm=TRUE)\r\n-Csd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),3]), na.rm=TRUE)\r\n-Dsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),4]), na.rm=TRUE)\r\n-Esd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),5]), na.rm=TRUE)\r\n-Fsd<-sd(as.numeric(SubstrateBackgroundFreque'..b'\r\n-# EPMtable[14,]<-(PositionTable[14,]/(PositionTable[14,]*.01*Qmean))\r\n-# EPMtable[15,]<-(PositionTable[15,]/(PositionTable[15,]*.01*Rmean))\r\n-# EPMtable[16,]<-(PositionTable[16,]/(PositionTable[16,]*.01*Smean))\r\n-# EPMtable[17,]<-(PositionTable[17,]/(PositionTable[17,]*.01*Tmean))\r\n-# EPMtable[18,]<-(PositionTable[18,]/(PositionTable[18,]*.01*Vmean))\r\n-# EPMtable[19,]<-(PositionTable[19,]/(PositionTable[19,]*.01*Wmean))\r\n-# EPMtable[20,]<-(PositionTable[20,]/(PositionTable[20,]*.01*Ymean))\r\n-\r\n-columns<-c(length(Column1)-sum(Column1==""),\r\n-           length(Column2)-sum(Column2==""),\r\n-           length(Column3)-sum(Column3==""),\r\n-           length(Column4)-sum(Column4==""),\r\n-           length(Column5)-sum(Column5==""),\r\n-           length(Column6)-sum(Column6==""),\r\n-           length(Column7)-sum(Column7==""),\r\n-           length(Column8)-sum(Column8==""),\r\n-           length(Column9)-sum(Column9==""),\r\n-           length(Column10)-sum(Column10==""),\r\n-           length(Column11)-sum(Column11==""),\r\n-           length(Column12)-sum(Column12==""),\r\n-           length(Column13)-sum(Column13==""),\r\n-           length(Column14)-sum(Column14==""),\r\n-           length(Column15)-sum(Column15==""))\r\n-\r\n-for (z in 1:15) {\r\n-  for (y in 1:20) {\r\n-    if (PositionTable[y,z]>0){\r\n-      EPMtable[y,z]<-PositionTable[y,z]/((columns[z]*.01*AllMeans[y]))\r\n-    }\r\n-    if (PositionTable[y,z]==0){\r\n-      EPMtable[y,z]<-(1/columns[z])/((columns[z]*.01*AllMeans[y]))\r\n-    }\r\n-  }\r\n-}\r\n-#here I created the endogenous probability matrix\r\n-#now all I need to do is make the program automatically determine which SDs are >2, and then make it perform screener and sorter on those SDs\r\n-\r\n-\r\n-\r\n-\r\n-\r\n-# write.xlsx(SDtable,file=FILENAME, sheetName = "Standard Deviation Table",col.names = FALSE,row.names = FALSE,append = TRUE)\r\n-# write.xlsx(PercentTable,file = FILENAME,sheetName = "Percent Table",col.names = FALSE,row.names = FALSE,append = TRUE)\r\n-# write.xlsx(SelectivitySheet,file = FILENAME,sheetName = "Site Selectivity",col.names = FALSE,row.names = FALSE,append = TRUE)\r\n-# write.xlsx(EPMtable,file=FILENAME,sheetName = "Endogenous Probability Matrix",col.names = FALSE,row.names = FALSE,append = TRUE)\r\n-# write.xlsx(NormalizationScore,file = FILENAME,sheetName = "Normalization Score",col.names = FALSE,row.names = FALSE,append = TRUE)\r\n-\r\n-NormalizationScore<-c("Normalization Score",NormalizationScore)\r\n-\r\n-write.table(x=c("SD Table"),file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)\r\n-write.table(SDtable,file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)\r\n-write.table(x=c("Percent Table"),file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)\r\n-write.table(PercentTable,file=FILENAME, append = TRUE,sep=",",row.names = FALSE, col.names = FALSE)\r\n-\r\n-EPMtableu<-EPMtable\r\n-HeaderSD<-c(-7:7)\r\n-EPMtableu<-rbind(HeaderSD,EPMtableu)\r\n-row.names(EPMtableu)<-NULL\r\n-EPMtableu<-data.frame(SetOfAAs,EPMtableu)\r\n-\r\n-write.table("Site Selectivity Matrix", file = FILENAME2, append = TRUE, sep = ",", row.names = FALSE, col.names = FALSE)\r\n-SelectivityHeader=matrix(data = c("Position",-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7),nrow = 1)\r\n-head<-matrix(data=rep(" ",times=16),nrow = 1)\r\n-SelectivityHeader<-rbind(head,SelectivityHeader)\r\n-\r\n-write.table(SelectivityHeader, file = FILENAME2, append = TRUE, sep = ",", row.names = FALSE, col.names = FALSE)\r\n-#colnames(SelectivitySheet)<-c("-7","-6","-5","-4","-3","-2","-1","0","1","2","3","4","5","6","7")\r\n-write.table(SelectivitySheet,file = FILENAME2, append = TRUE,sep = ",",row.names = TRUE, col.names = FALSE)\r\n-write.table(x=c("Endogenous Probability Matrix"),file=FILENAME2,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)\r\n-write.table(EPMtableu,file = FILENAME2, append = TRUE,sep = ",",row.names = FALSE, col.names = FALSE)\r\n-write.table(NormalizationScore, file = FILENAME2, append = TRUE,sep = ",",row.names = FALSE, col.names = FALSE)\r\n-\r\n'
b
diff -r d71eb1d66a88 -r da1012f014bd kinatestid_r/Kinatest-R_part2.R
--- a/kinatestid_r/Kinatest-R_part2.R Thu Feb 08 15:10:42 2018 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,790 +0,0 @@\n-#test myself: this script should take in  amino acids for each of the 9 positions and give out every single combination of those AAs\r\n-\r\n-#need to do following: fix it so that the accession numbers stay with the substrates,\r\n-#also the neg false constant is totaly unphos\'d Ys found by FASTA-2-CSV system# uniprot\r\n-\r\n-#HOW MANY: IF THERE\'S two aas in each position you get 2^9, so I assume the numbers are:\r\n-#(number in position-4)*(number in position -3)*(number in position -2)...=total\r\n-# require(rJava)\r\n-# require(xlsxjars)\r\n-# require(xlsx)\r\n-# # require(readxl)\r\n-\r\n-#View(SDtable)\r\n-bareSDs<-SDtable[2:21,2:16]\r\n-goodones<-bareSDs>2\r\n-\r\n-Positionm7<-which(goodones[,1] %in% TRUE)\r\n-if (length(Positionm7)<1){Positionm7<-which(bareSDs[,1]==max(bareSDs[,1]))}\r\n-Positionm6<-which(goodones[,2] %in% TRUE)\r\n-if (length(Positionm6)<1){Positionm6<-which(bareSDs[,2]==max(bareSDs[,2]))}\r\n-Positionm5<-which(goodones[,3] %in% TRUE)\r\n-if (length(Positionm5)<1){Positionm5<-which(bareSDs[,3]==max(bareSDs[,3]))}\r\n-Positionm4<-which(goodones[,4] %in% TRUE)\r\n-if (length(Positionm4)<1){Positionm4<-which(bareSDs[,4]==max(bareSDs[,4]))}\r\n-Positionm3<-which(goodones[,5] %in% TRUE)\r\n-if (length(Positionm3)<1){Positionm3<-which(bareSDs[,5]==max(bareSDs[,5]))}\r\n-Positionm2<-which(goodones[,6] %in% TRUE)\r\n-if (length(Positionm2)<1){Positionm2<-which(bareSDs[,6]==max(bareSDs[,6]))}\r\n-Positionm1<-which(goodones[,7] %in% TRUE)\r\n-if (length(Positionm1)<1){Positionm1<-which(bareSDs[,7]==max(bareSDs[,7]))}\r\n-\r\n-Positiond0<-which(goodones[,8] %in% TRUE)\r\n-if (length(Positiond0)<1){Positiond0<-which(bareSDs[,8]==max(bareSDs[,8]))}\r\n-\r\n-Positionp1<-which(goodones[,9] %in% TRUE)\r\n-if (length(Positionp1)<1){Positionp1<-which(bareSDs[,9]==max(bareSDs[,9]))}\r\n-Positionp2<-which(goodones[,10] %in% TRUE)\r\n-if (length(Positionp2)<1){Positionp2<-which(bareSDs[,10]==max(bareSDs[,10]))}\r\n-Positionp3<-which(goodones[,11] %in% TRUE)\r\n-if (length(Positionp3)<1){Positionp3<-which(bareSDs[,11]==max(bareSDs[,11]))}\r\n-Positionp4<-which(goodones[,12] %in% TRUE)\r\n-if (length(Positionp4)<1){Positionp4<-which(bareSDs[,12]==max(bareSDs[,12]))}\r\n-Positionp5<-which(goodones[,13] %in% TRUE)\r\n-if (length(Positionp5)<1){Positionp5<-which(bareSDs[,13]==max(bareSDs[,13]))}\r\n-Positionp6<-which(goodones[,14] %in% TRUE)\r\n-if (length(Positionp6)<1){Positionp6<-which(bareSDs[,14]==max(bareSDs[,14]))}\r\n-Positionp7<-which(goodones[,15] %in% TRUE)\r\n-if (length(Positionp7)<1){Positionp7<-which(bareSDs[,15]==max(bareSDs[,15]))}\r\n-\r\n-aa_props2 <- c("1"="A", "2"="C", "3"="D", "4"="E", "5"="F", "6"="G", "7"="H", "8"="I", "9"="K", "10"="L", "11"="M", "12"="N",\r\n-               "13"="P", "14"="Q", "15"="R", "16"="S", "17"="T", "18"="V", "19"="W", "20"="Y")\r\n-\r\n-Positionm7<-sapply(Positionm7, function (x) aa_props2[x])\r\n-Positionm6<-sapply(Positionm6, function (x) aa_props2[x])\r\n-Positionm5<-sapply(Positionm5, function (x) aa_props2[x])\r\n-Positionm4<-sapply(Positionm4, function (x) aa_props2[x])\r\n-Positionm3<-sapply(Positionm3, function (x) aa_props2[x])\r\n-Positionm2<-sapply(Positionm2, function (x) aa_props2[x])\r\n-Positionm1<-sapply(Positionm1, function (x) aa_props2[x])\r\n-Positiond0<-sapply(Positiond0, function (x) aa_props2[x])\r\n-Positionp1<-sapply(Positionp1, function (x) aa_props2[x])\r\n-Positionp2<-sapply(Positionp2, function (x) aa_props2[x])\r\n-Positionp3<-sapply(Positionp3, function (x) aa_props2[x])\r\n-Positionp4<-sapply(Positionp4, function (x) aa_props2[x])\r\n-Positionp5<-sapply(Positionp5, function (x) aa_props2[x])\r\n-Positionp6<-sapply(Positionp6, function (x) aa_props2[x])\r\n-Positionp7<-sapply(Positionp7, function (x) aa_props2[x])\r\n-\r\n-\r\n-# Positionm7<-c("D","H","N","V")\r\n-# Positionm6<-c("E","V")\r\n-# Positionm5<-c("D","H")\r\n-# Positionm4<-c("D","N")\r\n-# Positionm3<-c("D","E","F","Q")\r\n-# Positionm2<-c("D","N","Q","S")\r\n-# Positionm1<-c("F","I","L")\r\n-# Positiond0<-c("Y")\r\n-# Positionp1<-c("A","E")\r\n-# Positionp2<-c("T","S","Q","E")\r\n-# Positionp3<-c("V")\r\n-# Positionp4<-c'..b'oringpeptide[11]),13]*\r\n-    ThisKinTable[as.numeric(Scoringpeptide[12]),14]*ThisKinTable[as.numeric(Scoringpeptide[13]),15]*ThisKinTable[as.numeric(Scoringpeptide[14]),16]*ThisKinTable[as.numeric(Scoringpeptide[15]),17]\r\n-  \r\n-  PositiveScores[v]<-ThisKinTableScore\r\n-  ThisKinTableScore<-(ThisKinTableScore/(ThisKinTableScore+1/as.numeric(NormalizationScore[2])))\r\n-  PositiveWeirdScores[v]<-ThisKinTableScore*100\r\n-}\r\n-\r\n-positivesubstrates<-ImportedSubstrateList[,4:18]\r\n-positivewithscores<-cbind.data.frame(positivesubstrates,PositiveScores,PositiveWeirdScores)\r\n-\r\n-\r\n-#write down the transient transfection SOP and what we will be doing with them\r\n-#write down the vector names I will be using\r\n-#write down something about transforming bacteria and with what\r\n-\r\n-#90% whatevernness\r\n-# TPninetyone<-length(PositiveWeirdScores[PositiveWeirdScores>=0.91])\r\n-# Senseninetyone<-TPninetyone/nrow(positivesubstrates)\r\n-# \r\n-# TNninetyone<-length(NegativeWeirdScores[NegativeWeirdScores<91])\r\n-# Specninetyone<-TNninetyone/100\r\n-\r\n-#create the MCC table\r\n-\r\n-threshold<-c(1:100)\r\n-threshold<-order(threshold,decreasing = TRUE)\r\n-\r\n-Truepositives<-c(1:100)\r\n-Falsenegatives<-c(1:100)\r\n-Sensitivity<-c(1:100)\r\n-TrueNegatives<-c(1:100)\r\n-FalsePositives<-c(1:100)\r\n-Specificity<-c(1:100)\r\n-Accuracy<-c(1:100)\r\n-MCC<-c(1:100)\r\n-EER<-c(1:100)\r\n-\r\n-#MAKE DAMN SURE THAT THE ACCESSION NUMBERS FOLLOW THE MOTIFS\r\n-\r\n-for (z in 1:100) {\r\n-  thres<-101-z\r\n-  Truepositives[z]<-length(PositiveWeirdScores[PositiveWeirdScores>=(thres)])\r\n-  Falsenegatives[z]<-nrow(positivesubstrates)-Truepositives[z]\r\n-  Sensitivity[z]<-Truepositives[z]/(Falsenegatives[z]+Truepositives[z])\r\n-  TrueNegatives[z]<-length(NegativeWeirdScores[NegativeWeirdScores<(thres)])\r\n-# at thresh 100 this should be 0, because it is total minus true negatives\r\n-  FalsePositives[z]<-nrow(NegativeSubstrateList)-TrueNegatives[z]\r\n-  Specificity[z]<-1-(TrueNegatives[z]/(FalsePositives[z]+TrueNegatives[z]))\r\n-  Accuracy[z]<-100*(Truepositives[z]+TrueNegatives[z])/(Falsenegatives[z]+FalsePositives[z]+TrueNegatives[z]+Truepositives[z])\r\n-  MCC[z]<-((Truepositives[z]+TrueNegatives[z])-(Falsenegatives[z]+FalsePositives[z]))/sqrt(round(round(Truepositives[z]+Falsenegatives[z])*round(TrueNegatives[z]+FalsePositives[z])*round(Truepositives[z]+FalsePositives[z])*round(TrueNegatives[z]+Falsenegatives[z])))\r\n-  EER[z]<-.01*(((1-(Sensitivity[z]))*(Truepositives[z]+Falsenegatives[z]))+(Specificity[z]*(1-(Truepositives[z]+Falsenegatives[z]))))\r\n-}\r\n-Characterization<-cbind.data.frame(threshold,Truepositives,Falsenegatives,Sensitivity,TrueNegatives,FalsePositives,Specificity,Accuracy,MCC,EER)\r\n-\r\n-positiveheader<-c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,"RPMS","PMS")\r\n-positivewithscores<-rbind.data.frame(positiveheader,positivewithscores)\r\n-\r\n-negativeheader<-c("Substrate","RPMS","PMS")\r\n-colnames(NegativeWithScores)<-negativeheader\r\n-\r\n-# write.xlsx(NegativeWithScores,file = FILENAME, sheetName = "Negative Sequences Scored",col.names = TRUE,row.names = FALSE,append = TRUE)\r\n-# write.xlsx(Characterization,file = FILENAME,sheetName = "Characterization Table",col.names = TRUE,row.names = FALSE,append = TRUE)\r\n-# write.xlsx(RanksPeptides,file = FILENAME,sheetName = "Ranked Generated Peptides",col.names = FALSE,row.names = FALSE,append = TRUE)\r\n-# write.xlsx(positivewithscores,file = FILENAME, sheetName = "Positive Sequences Scored",col.names = FALSE,row.names = FALSE,append = TRUE)\r\n-write.table(x=c("Characterzation Table"),file = FILENAME2, col.names = FALSE,row.names = FALSE, append = TRUE,sep = ",")\r\n-header<-colnames(Characterization)\r\n-Characterization<-rbind.data.frame(header,Characterization)\r\n-write.table(Characterization,file = FILENAME2, col.names = FALSE,row.names = FALSE, append = TRUE,sep = ",")\r\n-\r\n-# header<-colnames(RanksPeptides)\r\n-# RanksPeptides<-rbind.data.frame(header,RanksPeptides)\r\n-write.table(RanksPeptides,file = FILENAME3,append = FALSE,row.names = FALSE,col.names = TRUE,sep = ",")\r\n-\r\n-\r\n-\r\n'