Previous changeset 1:cf513786a758 (2016-03-15) Next changeset 3:8496e237d5f8 (2016-03-15) |
Commit message:
Uploaded |
added:
bubbles_v9_NSAF_natural_log.R |
b |
diff -r cf513786a758 -r e6e356e89fc2 bubbles_v9_NSAF_natural_log.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bubbles_v9_NSAF_natural_log.R Tue Mar 15 15:11:07 2016 -0400 |
[ |
b'@@ -0,0 +1,233 @@\n+rm(list=ls())\r\n+###################################################################################################\r\n+# R-code: Multi-bubble graph generation from SAINTexpress output\r\n+# Author: Brent Kuenzi\r\n+###################################################################################################\r\n+library(dplyr); library(tidyr); library(ggplot2)\r\n+###################################################################################################\r\n+### Run program ###\r\n+\r\n+## REQUIRED INPUT ##\r\n+# 1) listfile: SAINTexpress generated "list.txt" file\r\n+# 2) preyfile: SAINT pre-processing generated "prey.txt" file used to run SAINTexpress\r\n+## OPTIONAL INPUT ##\r\n+# 3) crapome: raw output from crapome Workflow 1 query (http://www.crapome.org)\r\n+# 4) color: bubble color (default = "red")\r\n+# - color= "crapome": color bubbles based on Crapome(%)\r\n+# - Also recognizes any color within R\'s built-in colors() vector\r\n+# 5) label: Adds gene name labels to bubbles within the "zoomed in" graphs (default = FALSE)\r\n+# 6) cutoff: Saintscore cutoff to be assigned for filtering the "zoomed in" graphs (default = 0.8)\r\n+###################################################################################################\r\n+main <- function(listfile, preyfile , crapome=FALSE, color="red", label=FALSE, cutoff=0.8, type="SC", inc_file = "None", exc_file = "None" ) {\r\n+ cutoff_check(cutoff)\r\n+ listfile <- list_type(listfile, inc_file, exc_file)\r\n+ if(type == "SC") {\r\n+ df <- merge_files_sc(listfile, preyfile, crapome)\r\n+ }\r\n+ if(type == "MQ") {\r\n+ df <- merge_files_mq(listfile, preyfile, crapome)\r\n+ }\r\n+ bubble_NSAF(df,color)\r\n+ bubble_SAINT(df,color)\r\n+ bubble_zoom_SAINT(df, color, label, cutoff)\r\n+ bubble_zoom_NSAF(df, color, label, cutoff)\r\n+ write.table(df,"output.txt",sep="\\t",quote=FALSE, row.names=FALSE)\r\n+}\r\n+\r\n+list_type <- function(df, inc_file, exc_file) {\r\n+ Saint <- read.delim(df, stringsAsFactors=FALSE)\r\n+ if (inc_file != "None") {\r\n+ if (exc_file == "None"){\r\n+ inc_prots <- read.delim(inc_file, sep=\'\\t\', header=FALSE, stringsAsFactors=FALSE)\r\n+ filtered_df = subset(Saint, Saint$Prey == inc_prots[,1])\r\n+ }\r\n+ else {\r\n+ inc_prots <- read.delim(inc_file, sep=\'\\t\', header=FALSE, stringsAsFactors=FALSE)\r\n+ exc_prots <- read.delim(exc_file, sep=\'\\t\', header=FALSE, stringsAsFactors=FALSE)\r\n+ filtered_df = subset(Saint, Saint$Prey == inc_prots[,1])\r\n+ filtered_df = subset(filtered_df, filtered_df$Prey != exc_prots[,1])\r\n+ }\r\n+ }\r\n+ else if (exc_file != "None") {\r\n+ exc_prots <- read.delim(exc_file, sep=\'\\t\', header=FALSE, stringsAsFactors=FALSE)\r\n+ filtered_df = subset(Saint, Saint$Prey != exc_prots[,1])\r\n+ }\r\n+ else {\r\n+ filtered_df = Saint\r\n+ }\r\n+ return(filtered_df)\r\n+ \r\n+}\r\n+###################################################################################################\r\n+# Merge input files and caculate Crapome(%) and NSAF for each protein for each bait\r\n+###################################################################################################\r\n+merge_files_mq <- function(SAINT, prey_DF, crapome=FALSE) {\r\n+ #SAINT <- read.table(SAINT_DF, sep=\'\\t\', header=TRUE)\r\n+ prey <- read.table(prey_DF, sep=\'\\t\', header=FALSE); colnames(prey) <- c("Prey", "Length", "PreyGene")\r\n+ DF <- merge(SAINT,prey)\r\n+ DF$SpecSum <- log2(DF$SpecSum)\r\n+ \r\n+ if(crapome!=FALSE) {\r\n+ crapome <- read.table(crapome, sep=\'\\t\', header=TRUE)\r\n+ colnames(crapome) <- c("Prey", "Symbol", "Num.of.Exp", "Ave.SC", "Max.SC")\r\n+ DF1 <- merge(DF, crapome); as.character(DF1$Num.of.Exp); DF1$Symbol <- NULL;\r\n+ DF1$Ave.SC <- NULL; DF1$Max.SC <- NULL #remove unnecessary columns\r\n+ DF1$Num.of.Exp <- sub("^$", "0 / 1", DF1$Num.of.Exp ) #replace blank values with 0 / 1\r\n+ DF <- DF1 %>% separate(Num.of.Exp, c("NumExp", "TotalExp"), " / ") #split into 2 columns\r\n+ DF$CrapomePCT <- 100 - (as.integer(DF$NumExp) / as.integer(D'..b'ecSum) +\r\n+ scale_size(range=c(1,10)) + ggtitle("Filtered on SAINT score") + \r\n+ geom_point(aes(x=SaintScore,y=log2(FoldChange), size=SpecSum), colour="black", shape=21, data=a)\r\n+ if(label==TRUE & length(a$NSAF!=0)) {\r\n+ p <- p + geom_text(data=a, aes(label=PreyGene, size=10, vjust=0, hjust=0),colour="black", show_guide=FALSE)\r\n+ }\r\n+ if(length(levels(data$Bait) > 1)) {p <- p + facet_wrap(~Bait, scales="free_y")}\r\n+ return(ggsave(p, width=8,height=4,filename = "bubble_zoom_SAINT.png"))\r\n+ }\r\n+}\r\n+###################################################################################################\r\n+# Filter proteins on Saintscore cutoff and plot for each bait x=log(NSAF), y=Log2(FoldChange)\r\n+###################################################################################################\r\n+bubble_zoom_NSAF <- function(data, color, label=FALSE, cutoff=0.8) {\r\n+ if(color=="crapome") {\r\n+ a <- subset(data, CrapomePCT <80 & SaintScore>=cutoff, select = c(NSAF,SpecSum, CrapomePCT, FoldChange, SaintScore, Bait, PreyGene))\r\n+ b <- subset(data, CrapomePCT >=80 & SaintScore >=cutoff, select = c(NSAF,SpecSum, CrapomePCT, FoldChange, SaintScore, Bait, PreyGene))\r\n+ p <- qplot(x=log(NSAF), y=log2(FoldChange), data=a, colour=I("tan"),size=SpecSum) + \r\n+ scale_size(range=c(1,10)) + ggtitle("Filtered on SAINT score") + \r\n+ geom_point(aes(x=log(NSAF),y=log2(FoldChange), size=SpecSum), colour="black", shape=21, data=a)\r\n+ if(label==TRUE & length(a$NSAF!=0)) {\r\n+ p <- p + geom_text(data=a, aes(label=PreyGene, size=10, vjust=0, hjust=0),colour="black")\r\n+ }\r\n+ if(length(levels(a$Bait) > 1)) {p <- p + facet_wrap(~Bait, scales="free_y")}\r\n+ p <- p + geom_point(aes(x=log(NSAF),y=log2(FoldChange), size=SpecSum, color=CrapomePCT), data=b) + \r\n+ scale_colour_gradient(limits=c(80, 100), low="tan", high="red") + \r\n+ labs(colour="CRAPome Probability \\nof Specific Interaction (%)", x="ln(NSAF)") + \r\n+ geom_point(aes(x=log(NSAF),y=log2(FoldChange), size=SpecSum), colour="black", shape=21, data=b)\r\n+ if(label==TRUE & length(b$NSAF!=0)) {\r\n+ p <- p + geom_text(data=b, aes(label=PreyGene, size=10, vjust=0, hjust=0),colour="black", show_guide=FALSE)\r\n+ }\r\n+ return(ggsave(p, width=8,height=4,filename = "bubble_zoom_NSAF.png"))\r\n+ }\r\n+ if(color != "crapome") {\r\n+ a <- subset(data, SaintScore>=cutoff, select = c(NSAF,SpecSum, FoldChange, SaintScore, Bait, PreyGene))\r\n+ p <- qplot(x=log(NSAF), y=log2(FoldChange), data=a, colour=I(color), size=SpecSum) +\r\n+ scale_size(range=c(1,10)) + ggtitle("Filtered on SAINT score") + \r\n+ geom_point(aes(x=log(NSAF),y=log2(FoldChange), size=SpecSum), colour="black", shape=21, data=a) + \r\n+ labs(x="ln(NSAF)")\r\n+ if(label==TRUE & length(a$NSAF!=0)) {\r\n+ p <- p + geom_text(data=a, aes(label=PreyGene, size=10, vjust=0, hjust=0),colour="black", show_guide=FALSE)\r\n+ }\r\n+ if(length(levels(data$Bait) > 1)) {p <- p + facet_wrap(~Bait, scales="free_y")}\r\n+ return(ggsave(p, width=8,height=4,filename = "bubble_zoom_NSAF.png"))\r\n+ }\r\n+}\r\n+###################################################################################################\r\n+# Check Saintscore cutoff and stop program if not between 0 and 1\r\n+###################################################################################################\r\n+cutoff_check <- function(cutoff){\r\n+ if( any(cutoff < 0 | cutoff > 1) ) stop(\'SAINT score cutoff not between 0 and 1. Please correct and try again\')\r\n+}\r\n+\r\n+args <- commandArgs(trailingOnly = TRUE)\r\n+main(args[1],args[2],args[3],args[4],args[5],args[6],args[7],args[8],args[9])\r\n+\r\n+#main("test_list.txt", "preytest.txt", crapome="craptest.txt", color="crapome", label=TRUE)\r\n+#main("Crizo_list.txt", "prey_cr.txt", crapome = "crizo_crap.txt", color="crapome", label=TRUE, cutoff=0.7)\r\n+#main("test_list.txt", "preytest.txt", crapome=FALSE, color="magenta", label=FALSE, cutoff=1.1)\n\\ No newline at end of file\n' |