annotate nsaf_scoring.R @ 7:71feabcbe3d2 draft

Uploaded
author bornea
date Tue, 17 May 2016 13:01:01 -0400
parents 2843d0da7f20
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
1 ###################################################################################################
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
2 # R-code: APOSTL Global Variables
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
3 # Author: Brent Kuenzi
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
4
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
5 ###################################################################################################
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
6 # This program performs the file merging as well as a serious of calculations
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
7 # Following merging the following parameters will be calculated:
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
8 # 1) CRAPomePCT
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
9 # 2) NSAF
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
10 # 3) NSAFscore
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
11 # The resulting table will be exported. This is performed as its own tool and should not be used
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
12 # for input into the interactive analysis tool or the standalone bubble graph tool
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
13 ################################## Dependencies ###################################################
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
14 library(dplyr); library(tidyr)
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
15 ################################# Read in Data ####################################################
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
16 ## REQUIRED INPUTS ##
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
17 ## 1) listfile (filename)
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
18 #listfile <- "EGFR_list.txt"
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
19 ## 2) Prey File (filename)
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
20 #preyfile <- "EGFR_prey.txt"
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
21 ## 3) crapome File (filename or FALSE)
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
22 #crapfile <- "EGFR_crap.txt"
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
23 ## 4) Inter File (filename)
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
24 #interfile <- "inter.txt"
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
25 ################################# Create Table ####################################################
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
26 merge_files <- function(SAINT_DF, prey_DF, crapome=FALSE) {
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
27 SAINT <- read.table(SAINT_DF, sep='\t', header=TRUE)
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
28 prey <- read.table(prey_DF, sep='\t', header=FALSE); colnames(prey) <- c("Prey", "Length", "PreyGene")
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
29 DF <- merge(SAINT,prey)
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
30
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
31 if(crapome!=FALSE) {
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
32 crapome <- read.table(crapome, sep='\t', header=TRUE)
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
33 colnames(crapome) <- c("Prey", "Symbol", "Num.of.Exp", "Ave.SC", "Max.SC")
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
34 DF1 <- merge(DF, crapome); as.character(DF1$Num.of.Exp); DF1$Symbol <- NULL;
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
35 DF1$Ave.SC <- NULL; DF1$Max.SC <- NULL #remove unnecessary columns
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
36 DF1$Num.of.Exp <- sub("^$", "0 / 1", DF1$Num.of.Exp ) #replace blank values with 0 / 1
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
37 DF <- DF1 %>% separate(Num.of.Exp, c("NumExp", "TotalExp"), " / ") #split into 2 columns
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
38 DF$CrapomePCT <- round(100 - (as.integer(DF$NumExp) / as.integer(DF$TotalExp) * 100), digits=2) #calculate crapome %
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
39
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
40 }
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
41 DF$FoldChange <- round(log2(DF$FoldChange),digits=2)
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
42 colnames(DF)[(colnames(DF)=="FoldChange")] <- "log2(FoldChange)"
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
43
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
44 DF$SAF <- DF$AvgSpec / DF$Length
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
45 by_bait <- DF %>% group_by(Bait) %>% mutate("NSAF" = SAF/sum(SAF))
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
46 by_bait$SAF <- NULL
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
47 return(by_bait[!duplicated(by_bait),])
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
48 }
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
49
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
50 main <- function(listfile, preyfile, crapfile, interfile) {
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
51 working <- as.data.frame(merge_files(listfile, preyfile, crapfile))
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
52 inter_df <- read.table(interfile, sep='\t', header=FALSE)
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
53 working$temp <- strsplit(as.character(working$ctrlCounts),"[|]")
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
54 cnt <- 0
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
55 for(i in working$temp){
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
56 cnt <- cnt+1
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
57 working$ctrl_mean[cnt] <- mean(as.numeric(unlist(i)))
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
58 working$ctrl_number[cnt] <- length(i)}
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
59 working$ctrl_SAF <- working$ctrl_mean / working$Length
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
60 main.data <- working %>% group_by(Bait) %>% mutate("control_NSAF" = ctrl_SAF/sum(ctrl_SAF))
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
61 ctrl_SAF_constant <- 1/mean(main.data$ctrl_SAF)
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
62 # add ctrl_SAF_constant to prevent dividing by 0
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
63 cnt <- 0
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
64 for(i in main.data$control_NSAF){
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
65 cnt <- cnt + 1
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
66 main.data$nsafScore[cnt] <- ((main.data$NSAF[cnt])+ctrl_SAF_constant)/((i/main.data$ctrl_number[cnt])+ctrl_SAF_constant)
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
67 }
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
68 main.data$NSAF <- log(main.data$NSAF)
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
69 main.data$nsafScore <- log(main.data$nsafScore)
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
70 main.data <- filter(main.data, NSAF > -Inf)
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
71 colnames(main.data)[colnames(main.data)=="NSAF"] <- "ln(NSAF)"
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
72 colnames(main.data)[colnames(main.data)=="nsafScore"] <- "NSAFScore"
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
73 main.data$SAF <- NULL; main.data$ctrl_SAF <- NULL
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
74 main.data$control_NSAF <- NULL; main.data$temp <- NULL
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
75 main.data$ctrl_mean <- NULL
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
76 write.table(main.data,file="SaintTable.txt",sep="\t",row.names=FALSE,quote=FALSE)
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
77 }
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
78
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
79 args <- commandArgs(trailingOnly = TRUE)
2843d0da7f20 Uploaded
bornea
parents:
diff changeset
80 main(args[1], args[2], args[3], args[4])