Repository 'difference_finder'
hg clone https://toolshed.g2.bx.psu.edu/repos/jfb/difference_finder

Changeset 6:8fa6b79a2f19 (2019-04-19)
Previous changeset 5:f375051b0fcd (2019-04-19) Next changeset 7:f2dd4121a4f1 (2019-04-19)
Commit message:
Uploaded
added:
all stuff/Difference finder for GalaxyP 4-18-2019.R
b
diff -r f375051b0fcd -r 8fa6b79a2f19 all stuff/Difference finder for GalaxyP 4-18-2019.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/all stuff/Difference finder for GalaxyP 4-18-2019.R Fri Apr 19 16:41:00 2019 -0400
[
b'@@ -0,0 +1,351 @@\n+#I should make an SOP for this.  Problems we encountered: no x in the xY motif, and the kilodemon\r\n+#the output files have both Y and xY, they shouldn\'t why is that happening?  make it not happen\r\n+#make sure that accession numbers stay locked to each motif, somehow\r\n+#output should look just like the KALIP input\r\n+\r\n+#ff you want ONLY FULL MOTIFS, put "YES" here, please use all caps\r\n+FullMotifsOnly_questionmark<-"NO"\r\n+#If you want ONLY TRUNCATED MOTIFS, put "YES" here, please use all caps\r\n+TruncatedMotifsOnly_questionmark<-"NO"\r\n+#if you want to find the overlap, put a "YES" here (all caps), if you want to find the non-overlap, put "NO" (all caps)\r\n+Are_You_Looking_For_Commonality<-"NO"\r\n+\r\n+\r\n+#put the names of your input files here\r\n+FirstSubstrateSet<- read.csv("S1.csv", stringsAsFactors=FALSE)\r\n+Firstsubbackfreq<- read.csv("SBF1.csv", header=FALSE, stringsAsFactors=FALSE)\r\n+\r\n+SecondSubstrateSet<- read.csv("S2.csv", stringsAsFactors=FALSE)\r\n+Secondsubbackfreq<- read.csv("SBF2.csv", header=FALSE, stringsAsFactors=FALSE)\r\n+\r\n+# ThirdSubstrateSet<- read.csv("Galaxy69-BTK_PLUS_R3_Substrates.csv", stringsAsFactors=FALSE)\r\n+# Thirdsubbackfreq<- read.csv("Galaxy70-BTK_PLUS_R3_SubstrateBackgroundFrequency.csv", header=FALSE, stringsAsFactors=FALSE)\r\n+\r\n+#then put the names of your output files here\r\n+# Shared_motifs_table<-"180719_GALAXY-BTK-plus-rep-OVLP-7to7-substrates.csv"\r\n+# Shared_subbackfreq_table<-"180719_GALAXY-BTK-plus-rep-OVLP-7to7-SubBackFreq.csv"\r\n+\r\n+# Shared_motifs_table<-"Shared motifs 7-27-17.csv"\r\n+# Shared_subbackfreq_table<-"SubstrateBackgrounFrequency-for-shared-motifs 4 7-27-17.csv"\r\n+\r\n+First_unshared_motifs_table<-"1RS.csv"\r\n+First_unshared_subbackfreq<-"1RSBF.csv"\r\n+\r\n+Second_unshared_motifs_table<-"2SR.csv"\r\n+Second_unshared_subbackfreq<-"2RSBF.csv"\r\n+\r\n+# Third_unshared_motifs_table<-"R3 subs.csv"\r\n+# Third_unshared_subbackfreq<-"R3 SBF.csv"\r\n+\r\n+#final note, this code is going to be unworkable if you want to make a Venn diagram of more than 3 circles.  I think I\'ll poke around\r\n+#other languages to see if any of them can do it.\r\n+####################################################################################################################################\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+# grepl(pattern = "S", x=asdf, ignore.case = TRUE)\r\n+\r\n+FirstCentralLetters<-FirstSubstrateSet[,11]\r\n+SecondCentralLetters<-SecondSubstrateSet[,11]\r\n+\r\n+FirstEsses<-sapply(FirstCentralLetters, grepl, pattern="S", ignore.case=TRUE)\r\n+FirstTees<-sapply(FirstCentralLetters, grepl, pattern="T", ignore.case=TRUE)\r\n+FirstWys<-sapply(FirstCentralLetters, grepl, pattern="Y", ignore.case=TRUE)\r\n+\r\n+SecondEsses<-sapply(SecondCentralLetters, grepl, pattern="S", ignore.case=TRUE)\r\n+SecondTees<-sapply(SecondCentralLetters, grepl, pattern="T", ignore.case=TRUE)\r\n+SecondWys<-sapply(SecondCentralLetters, grepl, pattern="Y", ignore.case=TRUE)\r\n+\r\n+FirstCentralLetters<-replace(FirstCentralLetters,FirstEsses,"xS")\r\n+FirstCentralLetters<-replace(FirstCentralLetters,FirstTees,"xT")\r\n+FirstCentralLetters<-replace(FirstCentralLetters,FirstWys,"xY")\r\n+\r\n+SecondCentralLetters<-replace(SecondCentralLetters,SecondEsses,"xS")\r\n+SecondCentralLetters<-replace(SecondCentralLetters,SecondTees,"xT")\r\n+SecondCentralLetters<-replace(SecondCentralLetters,SecondWys,"xY")\r\n+\r\n+FirstCentralLetters->FirstSubstrateSet[,11]\r\n+SecondCentralLetters->SecondSubstrateSet[,11]\r\n+\r\n+####################################################################################################################################\r\n+####################################################################################################################################\r\n+# better version of this code written in C: what happens when two kinases share a motif, but they found that motif in two \r\n+# separate proteins thus two separate accession numbers?\r\n+# It should actually output the shared motif and BOTH accession numbers.  Right now it does not, it only maps out the second\r\n+# '..b'es(FTLoutputmatrix2)<-NULL\r\n+  rownames(FTLoutputmatrix2)<-NULL\r\n+  colnames(FLTheader)<-NULL\r\n+  rownames(FLTheader)<-NULL\r\n+  \r\n+  \r\n+  FirstCentralLettersAGAIN<-FTLoutputmatrix2[,11]\r\n+  \r\n+  FirstEsses<-sapply(FirstCentralLettersAGAIN, grepl, pattern="S", ignore.case=TRUE)\r\n+  FirstTees<-sapply(FirstCentralLettersAGAIN, grepl, pattern="T", ignore.case=TRUE)\r\n+  FirstWys<-sapply(FirstCentralLettersAGAIN, grepl, pattern="Y", ignore.case=TRUE)\r\n+  \r\n+  FirstCentralLettersAGAIN<-replace(FirstCentralLettersAGAIN,FirstEsses,"xS")\r\n+  FirstCentralLettersAGAIN<-replace(FirstCentralLettersAGAIN,FirstTees,"xT")\r\n+  FirstCentralLettersAGAIN<-replace(FirstCentralLettersAGAIN,FirstWys,"xY")\r\n+  \r\n+  FirstCentralLettersAGAIN->FTLoutputmatrix2[,11]\r\n+  \r\n+  FTLoutputmatrix2<-rbind(FLTheader,FTLoutputmatrix2)\r\n+  \r\n+  write.table(x=FTLoutputmatrix2,\r\n+              file=First_unshared_motifs_table,\r\n+              quote=FALSE, sep=",",\r\n+              row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+  \r\n+  columnalheader<-c("Accession Numbers",as.character(Firstsubbackfreq[1:35,1]))\r\n+  columnalheader<-matrix(columnalheader,nrow = 1)\r\n+  write.table(x=columnalheader,\r\n+              file=First_unshared_subbackfreq,\r\n+              quote=FALSE, sep=",",\r\n+              row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+  \r\n+  write.table(x=FTLFinalMatrix,\r\n+              file=First_unshared_subbackfreq,\r\n+              quote=FALSE, sep=",",\r\n+              row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+  \r\n+  ############################################################################################################\r\n+  \r\n+  D835Youtputmatrix<-matrix(data=c(D835YmotifsFINAL,names(D835YmotifsFINAL)),ncol = 2)\r\n+  \r\n+  D835Yheader<-c("Substrate","Species","Reference","-7","-6","-5","-4","-3","-2","-1","0","1","2","3","4","5","6","7","Phosphite")\r\n+  # D835Yheader<-unlist(D835Yheader)\r\n+  lefthandD835<-matrix(data = rep(NA,times=2*nrow(D835Youtputmatrix)),nrow=nrow(D835Youtputmatrix))\r\n+  righthandD835<-matrix(data = rep(NA,times=1*nrow(D835Youtputmatrix)),nrow=nrow(D835Youtputmatrix))\r\n+  D835Yaset<-D835Youtputmatrix[,2]\r\n+  D835meat<-sapply(D835Youtputmatrix[,1], strsplit, "")\r\n+  D835meat<-sapply(D835meat, unlist)\r\n+  colnames(D835meat)<-NULL\r\n+  D835meat<-t(D835meat)\r\n+  \r\n+  D835Youtputmatrix2<-cbind(lefthandD835,D835Yaset,D835meat,righthandD835)\r\n+  colnames(D835Youtputmatrix2)<-NULL\r\n+  rownames(D835Youtputmatrix2)<-NULL\r\n+  colnames(D835Yheader)<-NULL\r\n+  rownames(D835Yheader)<-NULL\r\n+  \r\n+  \r\n+  SecondCentralLettersAGAIN<-D835Youtputmatrix2[,11]\r\n+  \r\n+  SecondEsses<-sapply(SecondCentralLettersAGAIN, grepl, pattern="S", ignore.case=TRUE)\r\n+  SecondTees<-sapply(SecondCentralLettersAGAIN, grepl, pattern="T", ignore.case=TRUE)\r\n+  SecondWys<-sapply(SecondCentralLettersAGAIN, grepl, pattern="Y", ignore.case=TRUE)\r\n+  \r\n+  SecondCentralLettersAGAIN<-replace(SecondCentralLettersAGAIN,SecondEsses,"xS")\r\n+  SecondCentralLettersAGAIN<-replace(SecondCentralLettersAGAIN,SecondTees,"xT")\r\n+  SecondCentralLettersAGAIN<-replace(SecondCentralLettersAGAIN,SecondWys,"xY")\r\n+  \r\n+  SecondCentralLettersAGAIN->D835Youtputmatrix2[,11]\r\n+  \r\n+  D835Youtputmatrix2<-rbind(D835Yheader,D835Youtputmatrix2)\r\n+  \r\n+  write.table(x=D835Youtputmatrix2,\r\n+              file=Second_unshared_motifs_table,\r\n+              quote=FALSE, sep=",",\r\n+              row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+  \r\n+  columnalheader<-c("Accession Numbers",as.character(Firstsubbackfreq[1:35,1]))\r\n+  columnalheader<-matrix(columnalheader,nrow = 1)\r\n+  write.table(x=columnalheader,\r\n+              file=Second_unshared_subbackfreq,\r\n+              quote=FALSE, sep=",",\r\n+              row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+  \r\n+  write.table(x=D835YFinalMatrix,\r\n+              file=Second_unshared_subbackfreq,\r\n+              quote=FALSE, sep=",",\r\n+              row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+}\r\n+\r\n'