Mercurial > repos > bornea > saint_bubblebeam
comparison bubbles_v9_NSAF_natural_log.R @ 3:463a0ca33d7f draft
Uploaded
author | bornea |
---|---|
date | Tue, 17 Nov 2015 10:57:45 -0500 |
parents | |
children | 8eb1dd926f6e |
comparison
equal
deleted
inserted
replaced
2:d3be2b91e8d4 | 3:463a0ca33d7f |
---|---|
1 rm(list=ls()) | |
2 ################################################################################################### | |
3 # R-code: Multi-bubble graph generation from SAINTexpress output | |
4 # Author: Brent Kuenzi | |
5 ################################################################################################### | |
6 library(dplyr); library(tidyr); library(ggplot2) | |
7 ################################################################################################### | |
8 ### Run program ### | |
9 | |
10 ## REQUIRED INPUT ## | |
11 # 1) listfile: SAINTexpress generated "list.txt" file | |
12 # 2) preyfile: SAINT pre-processing generated "prey.txt" file used to run SAINTexpress | |
13 ## OPTIONAL INPUT ## | |
14 # 3) crapome: raw output from crapome Workflow 1 query (http://www.crapome.org) | |
15 # 4) color: bubble color (default = "red") | |
16 # - color= "crapome": color bubbles based on Crapome(%) | |
17 # - Also recognizes any color within R's built-in colors() vector | |
18 # 5) label: Adds gene name labels to bubbles within the "zoomed in" graphs (default = FALSE) | |
19 # 6) cutoff: Saintscore cutoff to be assigned for filtering the "zoomed in" graphs (default = 0.8) | |
20 ################################################################################################### | |
21 main <- function(listfile, preyfile , crapome=FALSE, color="red", label=FALSE, cutoff=0.8, type="SC", inc_file = "None", exc_file = "None" ) { | |
22 cutoff_check(cutoff) | |
23 listfile <- list_type(listfile, inc_file, exc_file) | |
24 if(type == "SC") { | |
25 df <- merge_files_sc(listfile, preyfile, crapome) | |
26 } | |
27 if(type == "MQ") { | |
28 df <- merge_files_mq(listfile, preyfile, crapome) | |
29 } | |
30 bubble_NSAF(df,color) | |
31 bubble_SAINT(df,color) | |
32 bubble_zoom_SAINT(df, color, label, cutoff) | |
33 bubble_zoom_NSAF(df, color, label, cutoff) | |
34 write.table(df,"output.txt",sep="\t",quote=FALSE, row.names=FALSE) | |
35 } | |
36 | |
37 list_type <- function(df, inc_file, exc_file) { | |
38 Saint <- read.delim(df, stringsAsFactors=FALSE) | |
39 if (inc_file != "None") { | |
40 if (exc_file == "None"){ | |
41 inc_prots <- read.delim(inc_file, sep='\t', header=FALSE, stringsAsFactors=FALSE) | |
42 print(inc_prots[,1]) | |
43 print(Saint$Prey) | |
44 filtered_df = subset(Saint, Saint$Prey == inc_prots[,1]) | |
45 } | |
46 else { | |
47 inc_prots <- read.delim(inc_file, sep='\t', header=FALSE, stringsAsFactors=FALSE) | |
48 exc_prots <- read.delim(exc_file, sep='\t', header=FALSE, stringsAsFactors=FALSE) | |
49 filtered_df = subset(Saint, Saint$Prey == inc_prots[,1]) | |
50 filtered_df = subset(filtered_df, filtered_df$Prey != exc_prots[,1]) | |
51 } | |
52 } | |
53 else if (exc_file != "None") { | |
54 exc_prots <- read.delim(exc_file, sep='\t', header=FALSE, stringsAsFactors=FALSE) | |
55 filtered_df = subset(Saint, Saint$Prey != exc_prots[,1]) | |
56 } | |
57 else { | |
58 filtered_df = Saint | |
59 } | |
60 return(filtered_df) | |
61 | |
62 } | |
63 ################################################################################################### | |
64 # Merge input files and caculate Crapome(%) and NSAF for each protein for each bait | |
65 ################################################################################################### | |
66 merge_files_mq <- function(SAINT, prey_DF, crapome=FALSE) { | |
67 #SAINT <- read.table(SAINT_DF, sep='\t', header=TRUE) | |
68 prey <- read.table(prey_DF, sep='\t', header=FALSE); colnames(prey) <- c("Prey", "Length", "PreyGene") | |
69 DF <- merge(SAINT,prey) | |
70 DF$SpecSum <- log2(DF$SpecSum) | |
71 | |
72 if(crapome!=FALSE) { | |
73 crapome <- read.table(crapome, sep='\t', header=TRUE) | |
74 colnames(crapome) <- c("Prey", "Symbol", "Num.of.Exp", "Ave.SC", "Max.SC") | |
75 DF1 <- merge(DF, crapome); as.character(DF1$Num.of.Exp); DF1$Symbol <- NULL; | |
76 DF1$Ave.SC <- NULL; DF1$Max.SC <- NULL #remove unnecessary columns | |
77 DF1$Num.of.Exp <- sub("^$", "0 / 1", DF1$Num.of.Exp ) #replace blank values with 0 / 1 | |
78 DF <- DF1 %>% separate(Num.of.Exp, c("NumExp", "TotalExp"), " / ") #split into 2 columns | |
79 DF$CrapomePCT <- 100 - (as.integer(DF$NumExp) / as.integer(DF$TotalExp) * 100) #calculate crapome % | |
80 } | |
81 DF$SAF <- DF$AvgSpec / DF$Length | |
82 DF2 = DF %>% group_by(Bait) %>% mutate(NSAF = SAF/sum(SAF)) | |
83 DF$NSAF = DF2$NSAF | |
84 return(DF) | |
85 } | |
86 | |
87 merge_files_sc <- function(SAINT, prey_DF, crapome=FALSE) { | |
88 #SAINT <- read.table(SAINT_DF, sep='\t', header=TRUE) | |
89 prey <- read.table(prey_DF, sep='\t', header=FALSE); colnames(prey) <- c("Prey", "Length", "PreyGene") | |
90 DF <- merge(SAINT,prey) | |
91 | |
92 if(crapome!=FALSE) { | |
93 crapome <- read.table(crapome, sep='\t', header=TRUE) | |
94 colnames(crapome) <- c("Prey", "Symbol", "Num.of.Exp", "Ave.SC", "Max.SC") | |
95 DF1 <- merge(DF, crapome); as.character(DF1$Num.of.Exp); DF1$Symbol <- NULL; | |
96 DF1$Ave.SC <- NULL; DF1$Max.SC <- NULL #remove unnecessary columns | |
97 DF1$Num.of.Exp <- sub("^$", "0 / 1", DF1$Num.of.Exp ) #replace blank values with 0 / 1 | |
98 DF <- DF1 %>% separate(Num.of.Exp, c("NumExp", "TotalExp"), " / ") #split into 2 columns | |
99 DF$CrapomePCT <- 100 - (as.integer(DF$NumExp) / as.integer(DF$TotalExp) * 100) #calculate crapome % | |
100 } | |
101 DF$SAF <- DF$AvgSpec / DF$Length | |
102 DF2 = DF %>% group_by(Bait) %>% mutate(NSAF = SAF/sum(SAF)) | |
103 DF$NSAF = DF2$NSAF | |
104 return(DF) | |
105 } | |
106 ################################################################################################### | |
107 # Plot all proteins for each bait by x=ln(NSAF), y=Log2(FoldChange) | |
108 ################################################################################################### | |
109 bubble_NSAF <- function(data, color) { | |
110 if(color=="crapome") { | |
111 a <- subset(data, CrapomePCT <80, select = c(NSAF,SpecSum, CrapomePCT, FoldChange, SaintScore, Bait)) | |
112 b <- subset(data, CrapomePCT>=80, select = c(NSAF,SpecSum, CrapomePCT, FoldChange, SaintScore, Bait)) | |
113 p <- qplot(x=log(NSAF), y=log2(FoldChange), data=a, colour=I("tan"),size=SpecSum) + scale_size(range=c(1,10)) + | |
114 geom_point(aes(x=log(NSAF),y=log2(FoldChange), size=SpecSum), colour="black", shape=21, data=a) | |
115 if(length(levels(a$Bait) > 1)) {p <- p + facet_wrap(~Bait, scales="free_y")} # multiple graphs if multiple baits | |
116 p <- p + geom_point(aes(x=log(NSAF),y=log2(FoldChange), size=SpecSum, color=CrapomePCT), data=b) + | |
117 scale_colour_gradient(limits=c(80, 100), low="tan", high="red") + | |
118 labs(colour="CRAPome Probability \nof Specific Interaction (%)", x="ln(NSAF)") + | |
119 geom_point(aes(x=log(NSAF),y=log2(FoldChange), size=SpecSum), colour="black", shape=21, data=b) | |
120 return(ggsave(p, width=8,height=4,filename = "bubble_NSAF.png")) | |
121 } | |
122 if(color != "crapome") { | |
123 p <- qplot(x=log(NSAF), y=log2(FoldChange), data=data, colour=I(color),size=SpecSum) + scale_size(range=c(1,10)) + | |
124 geom_point(aes(x=log(NSAF),y=log2(FoldChange), size=SpecSum), colour="black", shape=21, data=data) + # add bubble outlines | |
125 labs(x="ln(NSAF)") | |
126 if(length(levels(data$Bait) > 1)) {p <- p + facet_wrap(~Bait, scales="free_y")} | |
127 return(ggsave(p, width=8,height=4,filename = "bubble_NSAF.png")) | |
128 } | |
129 } | |
130 ################################################################################################### | |
131 # Plot all proteins for each bait by x=Saintscore, y=Log2(FoldChange) | |
132 ################################################################################################### | |
133 bubble_SAINT <- function(data, color) { | |
134 if(color=="crapome") { | |
135 a <- subset(data, CrapomePCT <80, select = c(NSAF,SpecSum, CrapomePCT, FoldChange, SaintScore, Bait)) #filter on CRAPome | |
136 b <- subset(data, CrapomePCT >=80, select = c(NSAF,SpecSum, CrapomePCT, FoldChange, SaintScore, Bait)) | |
137 p <- qplot(x=SaintScore, y=log2(FoldChange), data=a, colour=I("tan"),size=SpecSum) + | |
138 scale_size(range=c(1,10)) + geom_point(aes(x=SaintScore,y=log2(FoldChange), size=SpecSum), colour="black", shape=21, data=a) | |
139 if(length(levels(a$Bait) > 1)) {p <- p + facet_wrap(~Bait, scales="free_y")} | |
140 p <- p + geom_point(aes(x=SaintScore,y=log2(FoldChange), size=SpecSum, color=CrapomePCT), data=b) + | |
141 scale_colour_gradient(limits=c(80, 100), low="tan", high="red") + | |
142 labs(colour="CRAPome Probability \nof Specific Interaction (%)") + | |
143 geom_point(aes(x=SaintScore,y=log2(FoldChange), size=SpecSum), colour="black", shape=21, data=b) | |
144 return(ggsave(p, width=8,height=4,filename = "bubble_SAINT.png")) | |
145 } | |
146 if(color != "crapome") { | |
147 p <- qplot(x=SaintScore, y=log2(FoldChange), data=data, colour=I(color),size=SpecSum) + | |
148 scale_size(range=c(1,10)) + geom_point(aes(x=SaintScore,y=log2(FoldChange), size=SpecSum), colour="black", shape=21, data=data) | |
149 if(length(levels(data$Bait) > 1)) {p <- p + facet_wrap(~Bait, scales="free_y")} | |
150 return(ggsave(p, width=8,height=4,filename = "bubble_SAINT.png")) | |
151 } | |
152 } | |
153 ################################################################################################### | |
154 # Filter proteins on Saintscore cutoff and plot for each bait x=Saintscore, y=Log2(FoldChange) | |
155 ################################################################################################### | |
156 bubble_zoom_SAINT <- function(data, color, label=FALSE, cutoff=0.8) { | |
157 if(color=="crapome") { | |
158 a <- subset(data, CrapomePCT <80 & SaintScore>=cutoff, select = c(NSAF,SpecSum, CrapomePCT, FoldChange, SaintScore, Bait, PreyGene)) | |
159 b <- subset(data, CrapomePCT >=80 & SaintScore >=cutoff, select = c(NSAF,SpecSum, CrapomePCT, FoldChange, SaintScore, Bait, PreyGene)) | |
160 p <- qplot(x=SaintScore, y=log2(FoldChange), data=a, colour=I("tan"),size=SpecSum) + | |
161 scale_size(range=c(1,10)) + ggtitle("Filtered on SAINT score")+geom_point(aes(x=SaintScore,y=log2(FoldChange), size=SpecSum), colour="black", shape=21, data=a) | |
162 if(label==TRUE & length(a$NSAF!=0)) { | |
163 p <- p + geom_text(data=a, aes(label=PreyGene, size=10, vjust=0, hjust=0),colour="black") | |
164 } | |
165 if(length(levels(a$Bait) > 1)) {p <- p + facet_wrap(~Bait, scales="free_y")} | |
166 p <- p + geom_point(aes(x=SaintScore,y=log2(FoldChange), size=SpecSum, color=CrapomePCT), data=b) + | |
167 scale_colour_gradient(limits=c(80, 100), low="tan", high="red") + | |
168 labs(colour="CRAPome Probability \nof Specific Interaction (%)") + | |
169 geom_point(aes(x=SaintScore,y=log2(FoldChange), size=SpecSum), colour="black", shape=21, data=b) | |
170 if(label==TRUE & length(b$NSAF!=0)) { | |
171 p <- p + geom_text(data=b, aes(label=PreyGene, size=10, vjust=0, hjust=0),colour="black", show_guide=FALSE) | |
172 } | |
173 return(ggsave(p, width=8,height=4,filename = "bubble_zoom_SAINT.png")) | |
174 } | |
175 if(color != "crapome") { | |
176 a <- subset(data, SaintScore>=cutoff, select = c(NSAF,SpecSum, FoldChange, SaintScore, Bait, PreyGene)) | |
177 p <- qplot(x=SaintScore, y=log2(FoldChange), data=a, colour=I(color),size=SpecSum) + | |
178 scale_size(range=c(1,10)) + ggtitle("Filtered on SAINT score") + | |
179 geom_point(aes(x=SaintScore,y=log2(FoldChange), size=SpecSum), colour="black", shape=21, data=a) | |
180 if(label==TRUE & length(a$NSAF!=0)) { | |
181 p <- p + geom_text(data=a, aes(label=PreyGene, size=10, vjust=0, hjust=0),colour="black", show_guide=FALSE) | |
182 } | |
183 if(length(levels(data$Bait) > 1)) {p <- p + facet_wrap(~Bait, scales="free_y")} | |
184 return(ggsave(p, width=8,height=4,filename = "bubble_zoom_SAINT.png")) | |
185 } | |
186 } | |
187 ################################################################################################### | |
188 # Filter proteins on Saintscore cutoff and plot for each bait x=log(NSAF), y=Log2(FoldChange) | |
189 ################################################################################################### | |
190 bubble_zoom_NSAF <- function(data, color, label=FALSE, cutoff=0.8) { | |
191 if(color=="crapome") { | |
192 a <- subset(data, CrapomePCT <80 & SaintScore>=cutoff, select = c(NSAF,SpecSum, CrapomePCT, FoldChange, SaintScore, Bait, PreyGene)) | |
193 b <- subset(data, CrapomePCT >=80 & SaintScore >=cutoff, select = c(NSAF,SpecSum, CrapomePCT, FoldChange, SaintScore, Bait, PreyGene)) | |
194 p <- qplot(x=log(NSAF), y=log2(FoldChange), data=a, colour=I("tan"),size=SpecSum) + | |
195 scale_size(range=c(1,10)) + ggtitle("Filtered on SAINT score") + | |
196 geom_point(aes(x=log(NSAF),y=log2(FoldChange), size=SpecSum), colour="black", shape=21, data=a) | |
197 if(label==TRUE & length(a$NSAF!=0)) { | |
198 p <- p + geom_text(data=a, aes(label=PreyGene, size=10, vjust=0, hjust=0),colour="black") | |
199 } | |
200 if(length(levels(a$Bait) > 1)) {p <- p + facet_wrap(~Bait, scales="free_y")} | |
201 p <- p + geom_point(aes(x=log(NSAF),y=log2(FoldChange), size=SpecSum, color=CrapomePCT), data=b) + | |
202 scale_colour_gradient(limits=c(80, 100), low="tan", high="red") + | |
203 labs(colour="CRAPome Probability \nof Specific Interaction (%)", x="ln(NSAF)") + | |
204 geom_point(aes(x=log(NSAF),y=log2(FoldChange), size=SpecSum), colour="black", shape=21, data=b) | |
205 if(label==TRUE & length(b$NSAF!=0)) { | |
206 p <- p + geom_text(data=b, aes(label=PreyGene, size=10, vjust=0, hjust=0),colour="black", show_guide=FALSE) | |
207 } | |
208 return(ggsave(p, width=8,height=4,filename = "bubble_zoom_NSAF.png")) | |
209 } | |
210 if(color != "crapome") { | |
211 a <- subset(data, SaintScore>=cutoff, select = c(NSAF,SpecSum, FoldChange, SaintScore, Bait, PreyGene)) | |
212 p <- qplot(x=log(NSAF), y=log2(FoldChange), data=a, colour=I(color), size=SpecSum) + | |
213 scale_size(range=c(1,10)) + ggtitle("Filtered on SAINT score") + | |
214 geom_point(aes(x=log(NSAF),y=log2(FoldChange), size=SpecSum), colour="black", shape=21, data=a) + | |
215 labs(x="ln(NSAF)") | |
216 if(label==TRUE & length(a$NSAF!=0)) { | |
217 p <- p + geom_text(data=a, aes(label=PreyGene, size=10, vjust=0, hjust=0),colour="black", show_guide=FALSE) | |
218 } | |
219 if(length(levels(data$Bait) > 1)) {p <- p + facet_wrap(~Bait, scales="free_y")} | |
220 return(ggsave(p, width=8,height=4,filename = "bubble_zoom_NSAF.png")) | |
221 } | |
222 } | |
223 ################################################################################################### | |
224 # Check Saintscore cutoff and stop program if not between 0 and 1 | |
225 ################################################################################################### | |
226 cutoff_check <- function(cutoff){ | |
227 if( any(cutoff < 0 | cutoff > 1) ) stop('SAINT score cutoff not between 0 and 1. Please correct and try again') | |
228 } | |
229 | |
230 #args <- commandArgs(trailingOnly = TRUE) | |
231 #main(args[1],args[2],args[3],args[4],args[5],args[6],args[7],args[8],args[9]) | |
232 | |
233 #main("test_list.txt", "preytest.txt", crapome="craptest.txt", color="crapome", label=TRUE) | |
234 #main("Crizo_list.txt", "prey_cr.txt", crapome = "crizo_crap.txt", color="crapome", label=TRUE, cutoff=0.7) | |
235 #main("test_list.txt", "preytest.txt", crapome=FALSE, color="magenta", label=FALSE, cutoff=1.1) |