7
|
1 #!/usr/bin/Rscript
|
|
2
|
|
3 #---------------------------------------#
|
|
4 # Author: Maude #
|
|
5 # Script: chi2test_MutSpecStat_Galaxy.r #
|
|
6 # Last update: 18/10/16 #
|
|
7 #---------------------------------------#
|
|
8
|
|
9
|
|
10 #########################################################################################################################################
|
|
11 # Calculate the chi2 test for the strand bias #
|
|
12 #########################################################################################################################################
|
|
13
|
|
14 #-------------------------------------------------------------------------------
|
|
15 # Load library for recovering the arguments
|
|
16 #-------------------------------------------------------------------------------
|
|
17 suppressMessages(suppressWarnings(require("getopt")))
|
|
18
|
|
19
|
|
20 #-------------------------------------------------------------------------------
|
|
21 # Recover the arguments
|
|
22 #-------------------------------------------------------------------------------
|
|
23 spec = matrix(c(
|
|
24 "folderChi2", "folderChi2", 1, "character",
|
|
25 "help", "h", 0, "logical"
|
|
26 ),
|
|
27 byrow=TRUE, ncol=4
|
|
28 )
|
|
29
|
|
30 opt = getopt(spec)
|
|
31
|
|
32 # No argument is pass to the command line
|
|
33 if(length(opt) == 1)
|
|
34 {
|
|
35 cat(paste("Usage:\n chi2test_MutSpecStat_Galaxy.r --folderChi2 <path_to_folder> \n",sep=""))
|
|
36 q(status=1)
|
|
37 }
|
|
38
|
|
39 # Help was asked for.
|
|
40 if ( !is.null(opt$help) )
|
|
41 {
|
|
42 # print a friendly message and exit with a non-zero error code
|
|
43 cat(paste("Usage:\n chi2test_MutSpecStat_Galaxy.r --folderChi2 <path_to_folder> \n",sep=""))
|
|
44 q(status=1)
|
|
45 }
|
|
46
|
|
47
|
|
48
|
|
49
|
|
50
|
|
51 ## Load the data. There is one column with the mutation type and the sample name but it's just for knowing what is corresponding to each line. The two columns with the number of variant per strand would be sufficient.
|
|
52 inputChi2 <- paste0(opt$folderChi2, "/Input_chi2_strandBias.txt")
|
|
53 strBias<-read.delim(inputChi2, dec=".")
|
|
54
|
|
55 # Chi2
|
|
56 pValChi2 <- c() # First I create an empty vector and then I apply a for on the data load
|
|
57 pValChi2_round <- c() # Empty vector with the rounded p-values
|
|
58 confInt <- c() # Empty vector for the confident interval
|
|
59 proportion <- c() # Empty vector for the proportion of NonTr compared to the (NonTr+Tr)
|
|
60 sampleSize <- c() # Empty vector for the count of samples in NonTr and Tr
|
|
61 # For Pool_Data save the p-values in a different vector for not having them for the FDR
|
|
62 pValChi2_PoolData <- c()
|
|
63 pValChi2_PoolData_Round <- c()
|
|
64
|
|
65 j = 1 # Timer for pValChi2_PoolData vector
|
|
66 k = 1 # Timer for pValChi2
|
|
67
|
|
68 for(i in 1:nrow(strBias))
|
|
69 {
|
|
70 if(! sum(strBias[i,2:3]) == 0)
|
|
71 {
|
|
72 # For Pool_Data
|
|
73 if(strBias[i,1] == "Pool_Data")
|
|
74 {
|
|
75 pValChi2_PoolData[j] <- prop.test(x=strBias[i,2],n=sum(strBias[i,2:3]),p=0.5)$p.value
|
|
76 j <- j+1
|
|
77 }
|
|
78 # For the other sample(s)
|
|
79 else
|
|
80 {
|
|
81 # Calculate the p-value
|
|
82 pValChi2[k] <- prop.test(x=strBias[i,2],n=sum(strBias[i,2:3]),p=0.5)$p.value
|
|
83 k <- k+1
|
|
84 }
|
|
85
|
|
86 # Calculate the confidence interval
|
|
87 temp <- prop.test(x=strBias[i,2],n=sum(strBias[i,2:3]),p=0.5)$conf.int
|
|
88 confInt[i] <- paste0("[", round(temp[1],2), "-", round(temp[2],2), "]") # Same as paste(sep="")
|
|
89
|
|
90 # Save the proportion
|
|
91 proportion[i] <- strBias[i,2] / sum(strBias[i,2:3])
|
|
92
|
|
93 # Save the sample size (count on NonTr and Tr)
|
|
94 sampleSize[i] <- paste(strBias[i,2], strBias[i,3], sep="-")
|
|
95 } else
|
|
96 {
|
|
97 if(strBias[i,1] == "Pool_Data")
|
|
98 {
|
|
99 pValChi2_PoolData[j] <- NA
|
|
100 pValChi2_PoolData_Round[j] <- NA
|
|
101 j <- j+1
|
|
102 }
|
|
103 else
|
|
104 {
|
|
105 # Not enough effective for the test
|
|
106 pValChi2[k] <- NA
|
|
107 pValChi2_round[k] <- NA
|
|
108 k <- k+1
|
|
109 }
|
|
110
|
|
111 confInt[i] <- NA
|
|
112 proportion[i] <- NA
|
|
113 sampleSize[i] <- NA
|
|
114 }
|
|
115 }
|
|
116
|
|
117 # Adjust with FDR
|
|
118 FDR<-p.adjust(pValChi2, method="BH")
|
|
119
|
|
120 # Rount the p-value
|
|
121 for(i in 1:nrow(strBias))
|
|
122 {
|
|
123 pValChi2_round[i] <- format(pValChi2[i], scientific=T, digits=3)
|
|
124 }
|
|
125
|
|
126 # The option for the pool is specified
|
|
127 if(!is.null(pValChi2_PoolData))
|
|
128 {
|
|
129 # Round the p-value for Pool_Data
|
|
130 for(i in 1:6)
|
|
131 {
|
|
132 pValChi2_PoolData_Round[i] <- format(pValChi2_PoolData[i], scientific=T, digits=3)
|
|
133 }
|
|
134 }
|
|
135
|
|
136
|
|
137 # I create a dataframe for add what I want
|
|
138 outputChi2 <- data.frame(round(strBias[,2]/strBias[,3], digits=2), sampleSize, round(proportion, 3), confInt)
|
|
139 outputChi2$Mut.type <- strBias$Alteration
|
|
140 outputChi2$SampleName <- strBias$SampleName
|
|
141 colnames(outputChi2)[1:6]<-c("Strand_Bias", "NonTr-Tr", "Proportion", "Confidence Interval", "Mutation_Type", "SampleName")
|
|
142
|
|
143 # Transform the data frame into a matrix for adding the p-value for the samples and Pool_Data
|
|
144 matrix <- as.matrix(outputChi2)
|
|
145 tempColPValFDR <- matrix(, nrow=length(sampleSize), ncol = 2) # Create an empty matrix with 2 columns for adding the p-value and the FDR
|
|
146 matrix <- cbind(matrix, tempColPValFDR)
|
|
147 j = 1 # Timer for all the sample
|
|
148 k = 1 # Timer for Pool_Data
|
|
149 for(i in 1:nrow(matrix))
|
|
150 {
|
|
151 if(matrix[i,6] == "Pool_Data")
|
|
152 {
|
|
153 matrix[i,7] <- pValChi2_PoolData_Round[k]
|
|
154 matrix[i,8] <- "NA" # No FDR for Pool_Data
|
|
155 k = k+1
|
|
156 }
|
|
157 else
|
|
158 {
|
|
159 matrix[i,7] <- pValChi2_round[j]
|
|
160 matrix[i,8] <- round(FDR[j], 3)
|
|
161 j = j+1
|
|
162 }
|
|
163 }
|
|
164
|
|
165 # Reorder the columns
|
|
166 matrix <- cbind(matrix[,1:3], matrix[,7], matrix[,8], matrix[,4:6])
|
|
167 colnames(matrix)[4] <- "P-val-Chi2"
|
|
168 colnames(matrix)[5] <- "FDR"
|
|
169
|
|
170 # Export the file
|
|
171 # dec=".": Set the separator for the decimal by "."
|
|
172 outputFileChi2 <- paste0(opt$folderChi2, "/Output_chi2_strandBias.txt")
|
|
173 write.table(matrix,file=outputFileChi2,quote = FALSE,sep="\t",row.names = FALSE,dec=".")
|