annotate R/chi2test_MutSpecStat_Galaxy.r @ 7:eda59b985b1c draft default tip

Uploaded
author iarc
date Mon, 13 Mar 2017 08:21:19 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
7
eda59b985b1c Uploaded
iarc
parents:
diff changeset
1 #!/usr/bin/Rscript
eda59b985b1c Uploaded
iarc
parents:
diff changeset
2
eda59b985b1c Uploaded
iarc
parents:
diff changeset
3 #---------------------------------------#
eda59b985b1c Uploaded
iarc
parents:
diff changeset
4 # Author: Maude #
eda59b985b1c Uploaded
iarc
parents:
diff changeset
5 # Script: chi2test_MutSpecStat_Galaxy.r #
eda59b985b1c Uploaded
iarc
parents:
diff changeset
6 # Last update: 18/10/16 #
eda59b985b1c Uploaded
iarc
parents:
diff changeset
7 #---------------------------------------#
eda59b985b1c Uploaded
iarc
parents:
diff changeset
8
eda59b985b1c Uploaded
iarc
parents:
diff changeset
9
eda59b985b1c Uploaded
iarc
parents:
diff changeset
10 #########################################################################################################################################
eda59b985b1c Uploaded
iarc
parents:
diff changeset
11 # Calculate the chi2 test for the strand bias #
eda59b985b1c Uploaded
iarc
parents:
diff changeset
12 #########################################################################################################################################
eda59b985b1c Uploaded
iarc
parents:
diff changeset
13
eda59b985b1c Uploaded
iarc
parents:
diff changeset
14 #-------------------------------------------------------------------------------
eda59b985b1c Uploaded
iarc
parents:
diff changeset
15 # Load library for recovering the arguments
eda59b985b1c Uploaded
iarc
parents:
diff changeset
16 #-------------------------------------------------------------------------------
eda59b985b1c Uploaded
iarc
parents:
diff changeset
17 suppressMessages(suppressWarnings(require("getopt")))
eda59b985b1c Uploaded
iarc
parents:
diff changeset
18
eda59b985b1c Uploaded
iarc
parents:
diff changeset
19
eda59b985b1c Uploaded
iarc
parents:
diff changeset
20 #-------------------------------------------------------------------------------
eda59b985b1c Uploaded
iarc
parents:
diff changeset
21 # Recover the arguments
eda59b985b1c Uploaded
iarc
parents:
diff changeset
22 #-------------------------------------------------------------------------------
eda59b985b1c Uploaded
iarc
parents:
diff changeset
23 spec = matrix(c(
eda59b985b1c Uploaded
iarc
parents:
diff changeset
24 "folderChi2", "folderChi2", 1, "character",
eda59b985b1c Uploaded
iarc
parents:
diff changeset
25 "help", "h", 0, "logical"
eda59b985b1c Uploaded
iarc
parents:
diff changeset
26 ),
eda59b985b1c Uploaded
iarc
parents:
diff changeset
27 byrow=TRUE, ncol=4
eda59b985b1c Uploaded
iarc
parents:
diff changeset
28 )
eda59b985b1c Uploaded
iarc
parents:
diff changeset
29
eda59b985b1c Uploaded
iarc
parents:
diff changeset
30 opt = getopt(spec)
eda59b985b1c Uploaded
iarc
parents:
diff changeset
31
eda59b985b1c Uploaded
iarc
parents:
diff changeset
32 # No argument is pass to the command line
eda59b985b1c Uploaded
iarc
parents:
diff changeset
33 if(length(opt) == 1)
eda59b985b1c Uploaded
iarc
parents:
diff changeset
34 {
eda59b985b1c Uploaded
iarc
parents:
diff changeset
35 cat(paste("Usage:\n chi2test_MutSpecStat_Galaxy.r --folderChi2 <path_to_folder> \n",sep=""))
eda59b985b1c Uploaded
iarc
parents:
diff changeset
36 q(status=1)
eda59b985b1c Uploaded
iarc
parents:
diff changeset
37 }
eda59b985b1c Uploaded
iarc
parents:
diff changeset
38
eda59b985b1c Uploaded
iarc
parents:
diff changeset
39 # Help was asked for.
eda59b985b1c Uploaded
iarc
parents:
diff changeset
40 if ( !is.null(opt$help) )
eda59b985b1c Uploaded
iarc
parents:
diff changeset
41 {
eda59b985b1c Uploaded
iarc
parents:
diff changeset
42 # print a friendly message and exit with a non-zero error code
eda59b985b1c Uploaded
iarc
parents:
diff changeset
43 cat(paste("Usage:\n chi2test_MutSpecStat_Galaxy.r --folderChi2 <path_to_folder> \n",sep=""))
eda59b985b1c Uploaded
iarc
parents:
diff changeset
44 q(status=1)
eda59b985b1c Uploaded
iarc
parents:
diff changeset
45 }
eda59b985b1c Uploaded
iarc
parents:
diff changeset
46
eda59b985b1c Uploaded
iarc
parents:
diff changeset
47
eda59b985b1c Uploaded
iarc
parents:
diff changeset
48
eda59b985b1c Uploaded
iarc
parents:
diff changeset
49
eda59b985b1c Uploaded
iarc
parents:
diff changeset
50
eda59b985b1c Uploaded
iarc
parents:
diff changeset
51 ## Load the data. There is one column with the mutation type and the sample name but it's just for knowing what is corresponding to each line. The two columns with the number of variant per strand would be sufficient.
eda59b985b1c Uploaded
iarc
parents:
diff changeset
52 inputChi2 <- paste0(opt$folderChi2, "/Input_chi2_strandBias.txt")
eda59b985b1c Uploaded
iarc
parents:
diff changeset
53 strBias<-read.delim(inputChi2, dec=".")
eda59b985b1c Uploaded
iarc
parents:
diff changeset
54
eda59b985b1c Uploaded
iarc
parents:
diff changeset
55 # Chi2
eda59b985b1c Uploaded
iarc
parents:
diff changeset
56 pValChi2 <- c() # First I create an empty vector and then I apply a for on the data load
eda59b985b1c Uploaded
iarc
parents:
diff changeset
57 pValChi2_round <- c() # Empty vector with the rounded p-values
eda59b985b1c Uploaded
iarc
parents:
diff changeset
58 confInt <- c() # Empty vector for the confident interval
eda59b985b1c Uploaded
iarc
parents:
diff changeset
59 proportion <- c() # Empty vector for the proportion of NonTr compared to the (NonTr+Tr)
eda59b985b1c Uploaded
iarc
parents:
diff changeset
60 sampleSize <- c() # Empty vector for the count of samples in NonTr and Tr
eda59b985b1c Uploaded
iarc
parents:
diff changeset
61 # For Pool_Data save the p-values in a different vector for not having them for the FDR
eda59b985b1c Uploaded
iarc
parents:
diff changeset
62 pValChi2_PoolData <- c()
eda59b985b1c Uploaded
iarc
parents:
diff changeset
63 pValChi2_PoolData_Round <- c()
eda59b985b1c Uploaded
iarc
parents:
diff changeset
64
eda59b985b1c Uploaded
iarc
parents:
diff changeset
65 j = 1 # Timer for pValChi2_PoolData vector
eda59b985b1c Uploaded
iarc
parents:
diff changeset
66 k = 1 # Timer for pValChi2
eda59b985b1c Uploaded
iarc
parents:
diff changeset
67
eda59b985b1c Uploaded
iarc
parents:
diff changeset
68 for(i in 1:nrow(strBias))
eda59b985b1c Uploaded
iarc
parents:
diff changeset
69 {
eda59b985b1c Uploaded
iarc
parents:
diff changeset
70 if(! sum(strBias[i,2:3]) == 0)
eda59b985b1c Uploaded
iarc
parents:
diff changeset
71 {
eda59b985b1c Uploaded
iarc
parents:
diff changeset
72 # For Pool_Data
eda59b985b1c Uploaded
iarc
parents:
diff changeset
73 if(strBias[i,1] == "Pool_Data")
eda59b985b1c Uploaded
iarc
parents:
diff changeset
74 {
eda59b985b1c Uploaded
iarc
parents:
diff changeset
75 pValChi2_PoolData[j] <- prop.test(x=strBias[i,2],n=sum(strBias[i,2:3]),p=0.5)$p.value
eda59b985b1c Uploaded
iarc
parents:
diff changeset
76 j <- j+1
eda59b985b1c Uploaded
iarc
parents:
diff changeset
77 }
eda59b985b1c Uploaded
iarc
parents:
diff changeset
78 # For the other sample(s)
eda59b985b1c Uploaded
iarc
parents:
diff changeset
79 else
eda59b985b1c Uploaded
iarc
parents:
diff changeset
80 {
eda59b985b1c Uploaded
iarc
parents:
diff changeset
81 # Calculate the p-value
eda59b985b1c Uploaded
iarc
parents:
diff changeset
82 pValChi2[k] <- prop.test(x=strBias[i,2],n=sum(strBias[i,2:3]),p=0.5)$p.value
eda59b985b1c Uploaded
iarc
parents:
diff changeset
83 k <- k+1
eda59b985b1c Uploaded
iarc
parents:
diff changeset
84 }
eda59b985b1c Uploaded
iarc
parents:
diff changeset
85
eda59b985b1c Uploaded
iarc
parents:
diff changeset
86 # Calculate the confidence interval
eda59b985b1c Uploaded
iarc
parents:
diff changeset
87 temp <- prop.test(x=strBias[i,2],n=sum(strBias[i,2:3]),p=0.5)$conf.int
eda59b985b1c Uploaded
iarc
parents:
diff changeset
88 confInt[i] <- paste0("[", round(temp[1],2), "-", round(temp[2],2), "]") # Same as paste(sep="")
eda59b985b1c Uploaded
iarc
parents:
diff changeset
89
eda59b985b1c Uploaded
iarc
parents:
diff changeset
90 # Save the proportion
eda59b985b1c Uploaded
iarc
parents:
diff changeset
91 proportion[i] <- strBias[i,2] / sum(strBias[i,2:3])
eda59b985b1c Uploaded
iarc
parents:
diff changeset
92
eda59b985b1c Uploaded
iarc
parents:
diff changeset
93 # Save the sample size (count on NonTr and Tr)
eda59b985b1c Uploaded
iarc
parents:
diff changeset
94 sampleSize[i] <- paste(strBias[i,2], strBias[i,3], sep="-")
eda59b985b1c Uploaded
iarc
parents:
diff changeset
95 } else
eda59b985b1c Uploaded
iarc
parents:
diff changeset
96 {
eda59b985b1c Uploaded
iarc
parents:
diff changeset
97 if(strBias[i,1] == "Pool_Data")
eda59b985b1c Uploaded
iarc
parents:
diff changeset
98 {
eda59b985b1c Uploaded
iarc
parents:
diff changeset
99 pValChi2_PoolData[j] <- NA
eda59b985b1c Uploaded
iarc
parents:
diff changeset
100 pValChi2_PoolData_Round[j] <- NA
eda59b985b1c Uploaded
iarc
parents:
diff changeset
101 j <- j+1
eda59b985b1c Uploaded
iarc
parents:
diff changeset
102 }
eda59b985b1c Uploaded
iarc
parents:
diff changeset
103 else
eda59b985b1c Uploaded
iarc
parents:
diff changeset
104 {
eda59b985b1c Uploaded
iarc
parents:
diff changeset
105 # Not enough effective for the test
eda59b985b1c Uploaded
iarc
parents:
diff changeset
106 pValChi2[k] <- NA
eda59b985b1c Uploaded
iarc
parents:
diff changeset
107 pValChi2_round[k] <- NA
eda59b985b1c Uploaded
iarc
parents:
diff changeset
108 k <- k+1
eda59b985b1c Uploaded
iarc
parents:
diff changeset
109 }
eda59b985b1c Uploaded
iarc
parents:
diff changeset
110
eda59b985b1c Uploaded
iarc
parents:
diff changeset
111 confInt[i] <- NA
eda59b985b1c Uploaded
iarc
parents:
diff changeset
112 proportion[i] <- NA
eda59b985b1c Uploaded
iarc
parents:
diff changeset
113 sampleSize[i] <- NA
eda59b985b1c Uploaded
iarc
parents:
diff changeset
114 }
eda59b985b1c Uploaded
iarc
parents:
diff changeset
115 }
eda59b985b1c Uploaded
iarc
parents:
diff changeset
116
eda59b985b1c Uploaded
iarc
parents:
diff changeset
117 # Adjust with FDR
eda59b985b1c Uploaded
iarc
parents:
diff changeset
118 FDR<-p.adjust(pValChi2, method="BH")
eda59b985b1c Uploaded
iarc
parents:
diff changeset
119
eda59b985b1c Uploaded
iarc
parents:
diff changeset
120 # Rount the p-value
eda59b985b1c Uploaded
iarc
parents:
diff changeset
121 for(i in 1:nrow(strBias))
eda59b985b1c Uploaded
iarc
parents:
diff changeset
122 {
eda59b985b1c Uploaded
iarc
parents:
diff changeset
123 pValChi2_round[i] <- format(pValChi2[i], scientific=T, digits=3)
eda59b985b1c Uploaded
iarc
parents:
diff changeset
124 }
eda59b985b1c Uploaded
iarc
parents:
diff changeset
125
eda59b985b1c Uploaded
iarc
parents:
diff changeset
126 # The option for the pool is specified
eda59b985b1c Uploaded
iarc
parents:
diff changeset
127 if(!is.null(pValChi2_PoolData))
eda59b985b1c Uploaded
iarc
parents:
diff changeset
128 {
eda59b985b1c Uploaded
iarc
parents:
diff changeset
129 # Round the p-value for Pool_Data
eda59b985b1c Uploaded
iarc
parents:
diff changeset
130 for(i in 1:6)
eda59b985b1c Uploaded
iarc
parents:
diff changeset
131 {
eda59b985b1c Uploaded
iarc
parents:
diff changeset
132 pValChi2_PoolData_Round[i] <- format(pValChi2_PoolData[i], scientific=T, digits=3)
eda59b985b1c Uploaded
iarc
parents:
diff changeset
133 }
eda59b985b1c Uploaded
iarc
parents:
diff changeset
134 }
eda59b985b1c Uploaded
iarc
parents:
diff changeset
135
eda59b985b1c Uploaded
iarc
parents:
diff changeset
136
eda59b985b1c Uploaded
iarc
parents:
diff changeset
137 # I create a dataframe for add what I want
eda59b985b1c Uploaded
iarc
parents:
diff changeset
138 outputChi2 <- data.frame(round(strBias[,2]/strBias[,3], digits=2), sampleSize, round(proportion, 3), confInt)
eda59b985b1c Uploaded
iarc
parents:
diff changeset
139 outputChi2$Mut.type <- strBias$Alteration
eda59b985b1c Uploaded
iarc
parents:
diff changeset
140 outputChi2$SampleName <- strBias$SampleName
eda59b985b1c Uploaded
iarc
parents:
diff changeset
141 colnames(outputChi2)[1:6]<-c("Strand_Bias", "NonTr-Tr", "Proportion", "Confidence Interval", "Mutation_Type", "SampleName")
eda59b985b1c Uploaded
iarc
parents:
diff changeset
142
eda59b985b1c Uploaded
iarc
parents:
diff changeset
143 # Transform the data frame into a matrix for adding the p-value for the samples and Pool_Data
eda59b985b1c Uploaded
iarc
parents:
diff changeset
144 matrix <- as.matrix(outputChi2)
eda59b985b1c Uploaded
iarc
parents:
diff changeset
145 tempColPValFDR <- matrix(, nrow=length(sampleSize), ncol = 2) # Create an empty matrix with 2 columns for adding the p-value and the FDR
eda59b985b1c Uploaded
iarc
parents:
diff changeset
146 matrix <- cbind(matrix, tempColPValFDR)
eda59b985b1c Uploaded
iarc
parents:
diff changeset
147 j = 1 # Timer for all the sample
eda59b985b1c Uploaded
iarc
parents:
diff changeset
148 k = 1 # Timer for Pool_Data
eda59b985b1c Uploaded
iarc
parents:
diff changeset
149 for(i in 1:nrow(matrix))
eda59b985b1c Uploaded
iarc
parents:
diff changeset
150 {
eda59b985b1c Uploaded
iarc
parents:
diff changeset
151 if(matrix[i,6] == "Pool_Data")
eda59b985b1c Uploaded
iarc
parents:
diff changeset
152 {
eda59b985b1c Uploaded
iarc
parents:
diff changeset
153 matrix[i,7] <- pValChi2_PoolData_Round[k]
eda59b985b1c Uploaded
iarc
parents:
diff changeset
154 matrix[i,8] <- "NA" # No FDR for Pool_Data
eda59b985b1c Uploaded
iarc
parents:
diff changeset
155 k = k+1
eda59b985b1c Uploaded
iarc
parents:
diff changeset
156 }
eda59b985b1c Uploaded
iarc
parents:
diff changeset
157 else
eda59b985b1c Uploaded
iarc
parents:
diff changeset
158 {
eda59b985b1c Uploaded
iarc
parents:
diff changeset
159 matrix[i,7] <- pValChi2_round[j]
eda59b985b1c Uploaded
iarc
parents:
diff changeset
160 matrix[i,8] <- round(FDR[j], 3)
eda59b985b1c Uploaded
iarc
parents:
diff changeset
161 j = j+1
eda59b985b1c Uploaded
iarc
parents:
diff changeset
162 }
eda59b985b1c Uploaded
iarc
parents:
diff changeset
163 }
eda59b985b1c Uploaded
iarc
parents:
diff changeset
164
eda59b985b1c Uploaded
iarc
parents:
diff changeset
165 # Reorder the columns
eda59b985b1c Uploaded
iarc
parents:
diff changeset
166 matrix <- cbind(matrix[,1:3], matrix[,7], matrix[,8], matrix[,4:6])
eda59b985b1c Uploaded
iarc
parents:
diff changeset
167 colnames(matrix)[4] <- "P-val-Chi2"
eda59b985b1c Uploaded
iarc
parents:
diff changeset
168 colnames(matrix)[5] <- "FDR"
eda59b985b1c Uploaded
iarc
parents:
diff changeset
169
eda59b985b1c Uploaded
iarc
parents:
diff changeset
170 # Export the file
eda59b985b1c Uploaded
iarc
parents:
diff changeset
171 # dec=".": Set the separator for the decimal by "."
eda59b985b1c Uploaded
iarc
parents:
diff changeset
172 outputFileChi2 <- paste0(opt$folderChi2, "/Output_chi2_strandBias.txt")
eda59b985b1c Uploaded
iarc
parents:
diff changeset
173 write.table(matrix,file=outputFileChi2,quote = FALSE,sep="\t",row.names = FALSE,dec=".")