annotate bubbles_v9_NSAF_natural_log.R @ 27:ab9ace68e3b5 draft

Uploaded
author bornea
date Fri, 29 Jan 2016 09:34:17 -0500
parents ab602bbf4ac5
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
25
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
1 rm(list = ls())
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
2 ###################################################################################################
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
3 # R-code: Multi-bubble graph generation from SAINTexpress output
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
4 # Author: Brent Kuenzi
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
5 ###################################################################################################
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
6 # This Script generates the bubble graphs based upon Saint output.
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
7 ###################################################################################################
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
8 # Copyright (C) Brent Kuenzi.
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
9 # Permission is granted to copy, distribute and/or modify this document
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
10 # under the terms of the GNU Free Documentation License, Version 1.3
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
11 # or any later version published by the Free Software Foundation;
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
12 # with no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts.
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
13 # A copy of the license is included in the section entitled "GNU
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
14 # Free Documentation License".
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
15 ###################################################################################################
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
16 ## REQUIRED INPUT ##
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
17
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
18 # 1) listfile: SAINTexpress generated "list.txt" file
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
19 # 2) preyfile: SAINT pre-processing generated "prey.txt" file used to run SAINTexpress
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
20
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
21 ## OPTIONAL INPUT ##
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
22
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
23 # 3) crapome: raw output from crapome Workflow 1 query (http://www.crapome.org)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
24 # 4) color: bubble color (default = "red")
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
25 # - color = "crapome": color bubbles based on Crapome(%)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
26 # - Also recognizes any color within R's built-in colors() vector
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
27 # 5) label: Adds gene name labels to bubbles within the "zoomed in" graphs (default = FALSE)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
28 # 6) cutoff: Saintscore cutoff to be assigned for filtering the "zoomed in" graphs (default = 0.8)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
29 # 7) type: Specifies if the data is MaxQuant (MQ) or Scaffold (SC) data (default = "SC")
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
30 # 8) inc_file: Selects only the uniprot ids in the provided list (default ="None")
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
31 # 9) exc_file: Removes the proteins in the list (default = "None")
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
32 ###################################################################################################
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
33
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
34
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
35 ins_check_run <- function(){
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
36 if ('dplyr' %in% rownames(installed.packages())){}
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
37 else {
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
38 install.packages('dplyr', repos = 'http://cran.us.r-project.org')
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
39 }
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
40 if ('tidyr' %in% rownames(installed.packages())){}
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
41 else {
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
42 install.packages('tidyr', repos = 'http://cran.us.r-project.org')
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
43 }
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
44 if ('ggplot2' %in% rownames(installed.packages())){}
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
45 else {
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
46 install.packages('ggplot2', repos = 'http://cran.us.r-project.org')
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
47 }
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
48 }
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
49
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
50 ins_check_run()
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
51 library(dplyr); library(tidyr); library(ggplot2)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
52
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
53
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
54 main <- function(listfile, preyfile, crapome = FALSE, color = "red", label = FALSE, cutoff = 0.8, type = "SC", inc_file = "None", exc_file = "None" ) {
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
55 cutoff_check(cutoff)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
56 listfile <- list_type(listfile, inc_file, exc_file)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
57 if(type == "SC") {
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
58 df <- merge_files_sc(listfile, preyfile, crapome)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
59 }
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
60 if(type == "MQ") {
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
61 df <- merge_files_mq(listfile, preyfile, crapome)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
62 }
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
63 bubble_NSAF(df, color)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
64 bubble_SAINT(df, color)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
65 bubble_zoom_SAINT(df, color, label, cutoff)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
66 bubble_zoom_NSAF(df, color, label, cutoff)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
67 write.table(df, "output.txt", sep = "\t", quote = FALSE, row.names = FALSE)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
68 }
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
69 ###################################################################################################
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
70 # Include and Exclude list filtering
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
71 ###################################################################################################
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
72 list_type <- function(df, inc_file, exc_file) {
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
73 Saint <- read.delim(df, stringsAsFactors = FALSE)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
74 if (inc_file != "None") {
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
75 if (exc_file == "None") {
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
76 inc_prots <- read.delim(inc_file, sep = '\t', header = FALSE, stringsAsFactors = FALSE)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
77 print(inc_prots[, 1])
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
78 print(Saint$Prey)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
79 filtered_df = subset(Saint, Saint$Prey == inc_prots[, 1])
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
80 }
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
81 else {
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
82 inc_prots <- read.delim(inc_file, sep = '\t', header = FALSE, stringsAsFactors = FALSE)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
83 exc_prots <- read.delim(exc_file, sep = '\t', header = FALSE, stringsAsFactors = FALSE)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
84 filtered_df = subset(Saint, Saint$Prey == inc_prots[, 1])
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
85 filtered_df = subset(filtered_df, filtered_df$Prey != exc_prots[, 1])
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
86 }
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
87 }
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
88 else if (exc_file != "None") {
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
89 exc_prots <- read.delim(exc_file, sep = '\t', header = FALSE, stringsAsFactors = FALSE)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
90 filtered_df = subset(Saint, Saint$Prey != exc_prots[, 1])
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
91 }
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
92 else {
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
93 filtered_df = Saint
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
94 }
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
95 return(filtered_df)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
96
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
97 }
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
98 ###################################################################################################
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
99 # Merge input files and caculate Crapome(%) and NSAF for each protein for each bait
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
100 ###################################################################################################
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
101 merge_files_mq <- function(SAINT, prey_DF, crapome = FALSE) {
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
102 #SAINT <- read.table(SAINT_DF, sep = '\t', header = TRUE)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
103 #Some of these read.table()'s don't use stringsAsFactors = FALSE. Is this on purpose? Factors give rise to some really weird and unpredictable behavior; suggest always using stringsAsFactors = FALSE
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
104 prey <- read.table(prey_DF, sep = '\t', header = FALSE); colnames(prey) <- c("Prey", "Length", "PreyGene")
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
105 DF <- merge(SAINT, prey)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
106 DF$SpecSum <- log2(DF$SpecSum)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
107
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
108 if(crapome != FALSE) {
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
109 crapome <- read.table(crapome, sep = '\t', header = TRUE)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
110 colnames(crapome) <- c("Prey", "Symbol", "Num.of.Exp", "Ave.SC", "Max.SC")
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
111 DF1 <- merge(DF, crapome); as.character(DF1$Num.of.Exp); DF1$Symbol <- NULL;
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
112 DF1$Ave.SC <- NULL; DF1$Max.SC <- NULL # Removes unnecessary columns.
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
113 DF1$Num.of.Exp <- sub("^$", "0 / 1", DF1$Num.of.Exp ) # Replace blank values with 0 / 1.
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
114 DF <- DF1 %>% separate(Num.of.Exp, c("NumExp", "TotalExp"), " / ") # Split into 2 columns.
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
115 DF$CrapomePCT <- 100 - (as.integer(DF$NumExp) / as.integer(DF$TotalExp) * 100) # Calculate the crapome %.
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
116 }
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
117 DF$SAF <- DF$AvgSpec / DF$Length
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
118 DF2 = DF %>% group_by(Bait) %>% mutate(NSAF = SAF/sum(SAF))
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
119 DF$NSAF = DF2$NSAF
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
120 return(DF)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
121 }
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
122
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
123 merge_files_sc <- function(SAINT, prey_DF, crapome = FALSE) {
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
124 #SAINT <- read.table(SAINT_DF, sep = '\t', header = TRUE)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
125 prey <- read.table(prey_DF, sep = '\t', header = FALSE); colnames(prey) <- c("Prey", "Length", "PreyGene")
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
126 DF <- merge(SAINT, prey)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
127
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
128 if(crapome != FALSE) {
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
129 crapome <- read.table(crapome, sep = '\t', header = TRUE)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
130 colnames(crapome) <- c("Prey", "Symbol", "Num.of.Exp", "Ave.SC", "Max.SC")
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
131 DF1 <- merge(DF, crapome); as.character(DF1$Num.of.Exp); DF1$Symbol <- NULL;
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
132 DF1$Ave.SC <- NULL; DF1$Max.SC <- NULL # Removes unnecessary columns.
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
133 DF1$Num.of.Exp <- sub("^$", "0 / 1", DF1$Num.of.Exp ) # Replace blank values with 0 / 1.
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
134 DF <- DF1 %>% separate(Num.of.Exp, c("NumExp", "TotalExp"), " / ") # Split into 2 columns.
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
135 DF$CrapomePCT <- 100 - (as.integer(DF$NumExp) / as.integer(DF$TotalExp) * 100) # Calculate the crapome %.
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
136 }
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
137 DF$SAF <- DF$AvgSpec / DF$Length
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
138 DF2 = DF %>% group_by(Bait) %>% mutate(NSAF = SAF/sum(SAF))
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
139 DF$NSAF = DF2$NSAF
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
140 return(DF)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
141 }
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
142 ###################################################################################################
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
143 # Plot all proteins for each bait by x = ln(NSAF), y = Log2(FoldChange)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
144 ###################################################################################################
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
145 bubble_NSAF <- function(data, color) {
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
146 if(color == "crapome") {
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
147 a <- subset(data, CrapomePCT < 80, select = c(NSAF, SpecSum, CrapomePCT, FoldChange, SaintScore, Bait))
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
148 b <- subset(data, CrapomePCT >= 80, select = c(NSAF, SpecSum, CrapomePCT, FoldChange, SaintScore, Bait))
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
149 p <- qplot(x = log(NSAF), y = log2(FoldChange), data = a, colour = I("tan"), size = SpecSum) + scale_size(range = c(1, 10)) +
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
150 geom_point(aes(x = log(NSAF), y = log2(FoldChange), size = SpecSum), colour = "black", shape = 21, data = a)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
151 if(length(levels(a$Bait) > 1)) {p <- p + facet_wrap(~Bait, scales = "free_y")} # multiple graphs if multiple baits
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
152 #The text says ln() which is log base e, but the code uses log base 10. Fix code or the axis label.
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
153 p <- p + geom_point(aes(x = log(NSAF), y = log2(FoldChange), size = SpecSum, color = CrapomePCT), data = b) +
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
154 scale_colour_gradient(limits = c(80, 100), low = "tan", high = "red") +
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
155 labs(colour = "CRAPome Probability \nof Specific Interaction (%)", x = "ln(NSAF)") +
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
156 geom_point(aes(x = log(NSAF), y = log2(FoldChange), size = SpecSum), colour = "black", shape = 21, data = b)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
157 return(ggsave(p, width = 8, height = 4, filename = "bubble_NSAF.png"))
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
158 }
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
159 if(color != "crapome") {
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
160 p <- qplot(x = log(NSAF), y = log2(FoldChange), data = data, colour = I(color), size = SpecSum) + scale_size(range = c(1, 10)) +
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
161 geom_point(aes(x = log(NSAF), y = log2(FoldChange), size = SpecSum), colour = "black", shape = 21, data = data) + # add bubble outlines
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
162 labs(x = "ln(NSAF)")
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
163 if(length(levels(data$Bait) > 1)) {p <- p + facet_wrap(~Bait, scales = "free_y")}
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
164 return(ggsave(p, width = 8, height = 4, filename = "bubble_NSAF.png"))
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
165 }
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
166 }
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
167 ###################################################################################################
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
168 # Plot all proteins for each bait by x = Saintscore, y = Log2(FoldChange)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
169 ###################################################################################################
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
170 bubble_SAINT <- function(data, color) {
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
171 if(color == "crapome") {
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
172 a <- subset(data, CrapomePCT < 80, select = c(NSAF, SpecSum, CrapomePCT, FoldChange, SaintScore, Bait)) #filter on CRAPome
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
173 b <- subset(data, CrapomePCT >= 80, select = c(NSAF, SpecSum, CrapomePCT, FoldChange, SaintScore, Bait))
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
174 p <- qplot(x = SaintScore, y = log2(FoldChange), data = a, colour = I("tan"), size = SpecSum) +
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
175 scale_size(range = c(1, 10)) + geom_point(aes(x = SaintScore, y = log2(FoldChange), size = SpecSum), colour = "black", shape = 21, data = a)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
176 if(length(levels(a$Bait) > 1)) {p <- p + facet_wrap(~Bait, scales = "free_y")}
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
177 p <- p + geom_point(aes(x = SaintScore, y = log2(FoldChange), size = SpecSum, color = CrapomePCT), data = b) +
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
178 scale_colour_gradient(limits = c(80, 100), low = "tan", high = "red") +
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
179 labs(colour = "CRAPome Probability \nof Specific Interaction (%)") +
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
180 geom_point(aes(x = SaintScore, y = log2(FoldChange), size = SpecSum), colour = "black", shape = 21, data = b)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
181 return(ggsave(p, width = 8, height = 4, filename = "bubble_SAINT.png"))
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
182 }
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
183 if(color != "crapome") {
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
184 p <- qplot(x = SaintScore, y = log2(FoldChange), data = data, colour = I(color), size = SpecSum) +
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
185 scale_size(range = c(1, 10)) + geom_point(aes(x = SaintScore, y = log2(FoldChange), size = SpecSum), colour = "black", shape = 21, data = data)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
186 if(length(levels(data$Bait) > 1)) {p <- p + facet_wrap(~Bait, scales = "free_y")}
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
187 return(ggsave(p, width = 8, height = 4, filename = "bubble_SAINT.png"))
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
188 }
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
189 }
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
190 ###################################################################################################
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
191 # Filter proteins on Saintscore cutoff and plot for each bait x = Saintscore, y = Log2(FoldChange)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
192 ###################################################################################################
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
193 bubble_zoom_SAINT <- function(data, color, label = FALSE, cutoff = 0.8) {
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
194 if(color == "crapome") {
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
195 a <- subset(data, CrapomePCT < 80 & SaintScore >= cutoff, select = c(NSAF, SpecSum, CrapomePCT, FoldChange, SaintScore, Bait, PreyGene))
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
196 b <- subset(data, CrapomePCT >= 80 & SaintScore >= cutoff, select = c(NSAF, SpecSum, CrapomePCT, FoldChange, SaintScore, Bait, PreyGene))
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
197 p <- qplot(x = SaintScore, y = log2(FoldChange), data = a, colour = I("tan"), size = SpecSum) +
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
198 scale_size(range = c(1, 10)) + ggtitle("Filtered on SAINT score")+geom_point(aes(x = SaintScore, y = log2(FoldChange), size = SpecSum), colour = "black", shape = 21, data = a)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
199 if(label == TRUE & length(a$NSAF != 0)) {
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
200 p <- p + geom_text(data = a, aes(label = PreyGene, size = 10, vjust = 0, hjust = 0), colour = "black")
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
201 }
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
202 if(length(levels(a$Bait) > 1)) {p <- p + facet_wrap(~Bait, scales = "free_y")}
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
203 p <- p + geom_point(aes(x = SaintScore, y = log2(FoldChange), size = SpecSum, color = CrapomePCT), data = b) +
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
204 scale_colour_gradient(limits = c(80, 100), low = "tan", high = "red") +
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
205 labs(colour = "CRAPome Probability \nof Specific Interaction (%)") +
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
206 geom_point(aes(x = SaintScore, y = log2(FoldChange), size = SpecSum), colour = "black", shape = 21, data = b)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
207 if(label == TRUE & length(b$NSAF != 0)) {
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
208 p <- p + geom_text(data = b, aes(label = PreyGene, size = 10, vjust = 0, hjust = 0), colour = "black", show_guide = FALSE)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
209 }
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
210 return(ggsave(p, width = 8, height = 4, filename = "bubble_zoom_SAINT.png"))
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
211 }
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
212 if(color != "crapome") {
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
213 a <- subset(data, SaintScore >= cutoff, select = c(NSAF, SpecSum, FoldChange, SaintScore, Bait, PreyGene))
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
214 p <- qplot(x = SaintScore, y = log2(FoldChange), data = a, colour = I(color), size = SpecSum) +
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
215 scale_size(range = c(1, 10)) + ggtitle("Filtered on SAINT score") +
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
216 geom_point(aes(x = SaintScore, y = log2(FoldChange), size = SpecSum), colour = "black", shape = 21, data = a)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
217 if(label == TRUE & length(a$NSAF != 0)) {
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
218 p <- p + geom_text(data = a, aes(label = PreyGene, size = 10, vjust = 0, hjust = 0), colour = "black", show_guide = FALSE)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
219 }
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
220 if(length(levels(data$Bait) > 1)) {p <- p + facet_wrap(~Bait, scales = "free_y")}
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
221 return(ggsave(p, width = 8, height = 4, filename = "bubble_zoom_SAINT.png"))
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
222 }
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
223 }
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
224 ###################################################################################################
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
225 # Filter proteins on Saintscore cutoff and plot for each bait x = log(NSAF), y = Log2(FoldChange)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
226 ###################################################################################################
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
227 bubble_zoom_NSAF <- function(data, color, label = FALSE, cutoff = 0.8) {
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
228 if(color == "crapome") {
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
229 a <- subset(data, CrapomePCT < 80 & SaintScore >= cutoff, select = c(NSAF, SpecSum, CrapomePCT, FoldChange, SaintScore, Bait, PreyGene))
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
230 b <- subset(data, CrapomePCT >= 80 & SaintScore >= cutoff, select = c(NSAF, SpecSum, CrapomePCT, FoldChange, SaintScore, Bait, PreyGene))
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
231 p <- qplot(x = log(NSAF), y = log2(FoldChange), data = a, colour = I("tan"), size = SpecSum) +
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
232 scale_size(range = c(1, 10)) + ggtitle("Filtered on SAINT score") +
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
233 geom_point(aes(x = log(NSAF), y = log2(FoldChange), size = SpecSum), colour = "black", shape = 21, data = a)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
234 if(label == TRUE & length(a$NSAF != 0)) {
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
235 p <- p + geom_text(data = a, aes(label = PreyGene, size = 10, vjust = 0, hjust = 0), colour = "black")
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
236 }
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
237 if(length(levels(a$Bait) > 1)) {p <- p + facet_wrap(~Bait, scales = "free_y")}
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
238 p <- p + geom_point(aes(x = log(NSAF), y = log2(FoldChange), size = SpecSum, color = CrapomePCT), data = b) +
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
239 scale_colour_gradient(limits = c(80, 100), low = "tan", high = "red") +
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
240 labs(colour = "CRAPome Probability \nof Specific Interaction (%)", x = "ln(NSAF)") +
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
241 geom_point(aes(x = log(NSAF), y = log2(FoldChange), size = SpecSum), colour = "black", shape = 21, data = b)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
242 if(label == TRUE & length(b$NSAF != 0)) {
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
243 p <- p + geom_text(data = b, aes(label = PreyGene, size = 10, vjust = 0, hjust = 0), colour = "black", show_guide = FALSE)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
244 }
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
245 return(ggsave(p, width = 8, height = 4, filename = "bubble_zoom_NSAF.png"))
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
246 }
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
247 if(color != "crapome") {
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
248 a <- subset(data, SaintScore >= cutoff, select = c(NSAF, SpecSum, FoldChange, SaintScore, Bait, PreyGene))
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
249 p <- qplot(x = log(NSAF), y = log2(FoldChange), data = a, colour = I(color), size = SpecSum) +
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
250 scale_size(range = c(1, 10)) + ggtitle("Filtered on SAINT score") +
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
251 geom_point(aes(x = log(NSAF), y = log2(FoldChange), size = SpecSum), colour = "black", shape = 21, data = a) +
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
252 labs(x = "ln(NSAF)")
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
253 if(label == TRUE & length(a$NSAF != 0)) {
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
254 p <- p + geom_text(data = a, aes(label = PreyGene, size = 10, vjust = 0, hjust = 0), colour = "black", show_guide = FALSE)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
255 }
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
256 if(length(levels(data$Bait) > 1)) {p <- p + facet_wrap(~Bait, scales = "free_y")}
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
257 return(ggsave(p, width = 8, height = 4, filename = "bubble_zoom_NSAF.png"))
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
258 }
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
259 }
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
260 ###################################################################################################
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
261 # Check Saintscore cutoff and stop program if not between 0 and 1
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
262 ###################################################################################################
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
263 cutoff_check <- function(cutoff){
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
264 if( any(cutoff < 0 | cutoff > 1) ) stop('SAINT score cutoff not between 0 and 1. Please correct and try again')
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
265 }
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
266
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
267 args <- commandArgs(trailingOnly = TRUE)
ab602bbf4ac5 Uploaded
bornea
parents:
diff changeset
268 main(args[1], args[2], args[3], args[4], args[5], args[6], args[7], args[8], args[9])