Repository 'kinatest_r_7_7testing'
hg clone https://toolshed.g2.bx.psu.edu/repos/jfb/kinatest_r_7_7testing

Changeset 10:de59605e960a (2018-02-08)
Previous changeset 9:f80306fc5d69 (2018-02-08) Next changeset 11:a36f9cce16a3 (2018-02-08)
Commit message:
Uploaded
added:
Kinatest-R_part1.R
b
diff -r f80306fc5d69 -r de59605e960a Kinatest-R_part1.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Kinatest-R_part1.R Thu Feb 08 14:51:06 2018 -0500
[
b'@@ -0,0 +1,1114 @@\n+\r\n+ImportedSubstrateList<- read.csv("input1", stringsAsFactors=FALSE)\r\n+NegativeSubstrateList<- read.csv("input2", stringsAsFactors=FALSE)\r\n+SubstrateBackgroundFrequency<- read.csv("input3", stringsAsFactors=FALSE)\r\n+\r\n+ScreenerFilename<-"screener"\r\n+\r\n+\r\n+\r\n+FILENAME<-"output1.csv"\r\n+FILENAME2<-"output2.csv"\r\n+FILENAME3<-"output3.csv"\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+OutputMatrix<-"KinaseMatrix.csv"\r\n+CharacterizationTable<-"CharacterizationTableForThisKinase.csv"\r\n+SDtable<-"SDtableforthisKinase"\r\n+SiteSelectivityTable<-"SiteSelectivityForThisKinase"\r\n+\r\n+\r\n+\r\n+substrates<-matrix(rep("A",times=((nrow(ImportedSubstrateList)-1)*15)),ncol = 15)\r\n+#SeqsToBeScored<-"asdasd"\r\n+  \r\n+for (i in 2:nrow(ImportedSubstrateList))\r\n+{\r\n+  substratemotif<-ImportedSubstrateList[i,4:18]\r\n+  substratemotif[8]<-"Y"\r\n+  #substratemotif<-paste(substratemotif,sep = "",collapse = "")\r\n+  j=i-1\r\n+  substratemotif<-unlist(substratemotif)\r\n+  substrates[j,1:15]<-substratemotif\r\n+}\r\n+\r\n+# SpacesToOs<-c(""="O",)\r\n+# substrates<-SpacesToOs[substrates]\r\n+\r\n+SubstrateBackgroundFrequency[nrow(SubstrateBackgroundFrequency),2]\r\n+\r\n+if(2==2){\r\n+Amean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),2]), na.rm=TRUE)\r\n+Cmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),3]), na.rm=TRUE)\r\n+Dmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),4]), na.rm=TRUE)\r\n+Emean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),5]), na.rm=TRUE)\r\n+Fmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),6]), na.rm=TRUE)\r\n+Gmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),7]), na.rm=TRUE)\r\n+Hmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),8]), na.rm=TRUE)\r\n+Imean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),9]), na.rm=TRUE)\r\n+Kmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),10]), na.rm=TRUE)\r\n+Lmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),11]), na.rm=TRUE)\r\n+Mmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),12]), na.rm=TRUE)\r\n+Nmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),13]), na.rm=TRUE)\r\n+Pmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),14]), na.rm=TRUE)\r\n+Qmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),15]), na.rm=TRUE)\r\n+Rmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),16]), na.rm=TRUE)\r\n+Smean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),17]), na.rm=TRUE)\r\n+Tmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),18]), na.rm=TRUE)\r\n+Vmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),19]), na.rm=TRUE)\r\n+Wmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),20]), na.rm=TRUE)\r\n+Ymean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),21]), na.rm=TRUE)\r\n+\r\n+AllMeans<-c(Amean,Cmean,Dmean,Emean,Fmean,Gmean,Hmean,Imean,Kmean,Lmean,Mmean,Nmean,Pmean,Qmean,Rmean,Smean,Tmean,Vmean,Wmean,Ymean)\r\n+\r\n+Asd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),2]), na.rm=TRUE)\r\n+Csd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),3]), na.rm=TRUE)\r\n+Dsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),4]), na.rm=TRUE)\r\n+Esd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),5]), na.rm=TRUE)\r\n+Fsd<-sd(as.numeric(SubstrateBack'..b'\r\n+# EPMtable[14,]<-(PositionTable[14,]/(PositionTable[14,]*.01*Qmean))\r\n+# EPMtable[15,]<-(PositionTable[15,]/(PositionTable[15,]*.01*Rmean))\r\n+# EPMtable[16,]<-(PositionTable[16,]/(PositionTable[16,]*.01*Smean))\r\n+# EPMtable[17,]<-(PositionTable[17,]/(PositionTable[17,]*.01*Tmean))\r\n+# EPMtable[18,]<-(PositionTable[18,]/(PositionTable[18,]*.01*Vmean))\r\n+# EPMtable[19,]<-(PositionTable[19,]/(PositionTable[19,]*.01*Wmean))\r\n+# EPMtable[20,]<-(PositionTable[20,]/(PositionTable[20,]*.01*Ymean))\r\n+\r\n+columns<-c(length(Column1)-sum(Column1==""),\r\n+           length(Column2)-sum(Column2==""),\r\n+           length(Column3)-sum(Column3==""),\r\n+           length(Column4)-sum(Column4==""),\r\n+           length(Column5)-sum(Column5==""),\r\n+           length(Column6)-sum(Column6==""),\r\n+           length(Column7)-sum(Column7==""),\r\n+           length(Column8)-sum(Column8==""),\r\n+           length(Column9)-sum(Column9==""),\r\n+           length(Column10)-sum(Column10==""),\r\n+           length(Column11)-sum(Column11==""),\r\n+           length(Column12)-sum(Column12==""),\r\n+           length(Column13)-sum(Column13==""),\r\n+           length(Column14)-sum(Column14==""),\r\n+           length(Column15)-sum(Column15==""))\r\n+\r\n+for (z in 1:15) {\r\n+  for (y in 1:20) {\r\n+    if (PositionTable[y,z]>0){\r\n+      EPMtable[y,z]<-PositionTable[y,z]/((columns[z]*.01*AllMeans[y]))\r\n+    }\r\n+    if (PositionTable[y,z]==0){\r\n+      EPMtable[y,z]<-(1/columns[z])/((columns[z]*.01*AllMeans[y]))\r\n+    }\r\n+  }\r\n+}\r\n+#here I created the endogenous probability matrix\r\n+#now all I need to do is make the program automatically determine which SDs are >2, and then make it perform screener and sorter on those SDs\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+# write.xlsx(SDtable,file=FILENAME, sheetName = "Standard Deviation Table",col.names = FALSE,row.names = FALSE,append = TRUE)\r\n+# write.xlsx(PercentTable,file = FILENAME,sheetName = "Percent Table",col.names = FALSE,row.names = FALSE,append = TRUE)\r\n+# write.xlsx(SelectivitySheet,file = FILENAME,sheetName = "Site Selectivity",col.names = FALSE,row.names = FALSE,append = TRUE)\r\n+# write.xlsx(EPMtable,file=FILENAME,sheetName = "Endogenous Probability Matrix",col.names = FALSE,row.names = FALSE,append = TRUE)\r\n+# write.xlsx(NormalizationScore,file = FILENAME,sheetName = "Normalization Score",col.names = FALSE,row.names = FALSE,append = TRUE)\r\n+\r\n+NormalizationScore<-c("Normalization Score",NormalizationScore)\r\n+\r\n+write.table(x=c("SD Table"),file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)\r\n+write.table(SDtable,file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)\r\n+write.table(x=c("Percent Table"),file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)\r\n+write.table(PercentTable,file=FILENAME, append = TRUE,sep=",",row.names = FALSE, col.names = FALSE)\r\n+\r\n+EPMtableu<-EPMtable\r\n+HeaderSD<-c(-7:7)\r\n+EPMtableu<-rbind(HeaderSD,EPMtableu)\r\n+row.names(EPMtableu)<-NULL\r\n+EPMtableu<-data.frame(SetOfAAs,EPMtableu)\r\n+\r\n+write.table("Site Selectivity Matrix", file = FILENAME2, append = TRUE, sep = ",", row.names = FALSE, col.names = FALSE)\r\n+SelectivityHeader=matrix(data = c("Position",-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7),nrow = 1)\r\n+head<-matrix(data=rep(" ",times=16),nrow = 1)\r\n+SelectivityHeader<-rbind(head,SelectivityHeader)\r\n+\r\n+write.table(SelectivityHeader, file = FILENAME2, append = TRUE, sep = ",", row.names = FALSE, col.names = FALSE)\r\n+#colnames(SelectivitySheet)<-c("-7","-6","-5","-4","-3","-2","-1","0","1","2","3","4","5","6","7")\r\n+write.table(SelectivitySheet,file = FILENAME2, append = TRUE,sep = ",",row.names = TRUE, col.names = FALSE)\r\n+write.table(x=c("Endogenous Probability Matrix"),file=FILENAME2,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)\r\n+write.table(EPMtableu,file = FILENAME2, append = TRUE,sep = ",",row.names = FALSE, col.names = FALSE)\r\n+write.table(NormalizationScore, file = FILENAME2, append = TRUE,sep = ",",row.names = FALSE, col.names = FALSE)\r\n+\r\n'