comparison R/chi2test_MutSpecStat_Galaxy.r @ 7:eda59b985b1c draft default tip

Uploaded
author iarc
date Mon, 13 Mar 2017 08:21:19 -0400
parents
children
comparison
equal deleted inserted replaced
6:46a10309dfe2 7:eda59b985b1c
1 #!/usr/bin/Rscript
2
3 #---------------------------------------#
4 # Author: Maude #
5 # Script: chi2test_MutSpecStat_Galaxy.r #
6 # Last update: 18/10/16 #
7 #---------------------------------------#
8
9
10 #########################################################################################################################################
11 # Calculate the chi2 test for the strand bias #
12 #########################################################################################################################################
13
14 #-------------------------------------------------------------------------------
15 # Load library for recovering the arguments
16 #-------------------------------------------------------------------------------
17 suppressMessages(suppressWarnings(require("getopt")))
18
19
20 #-------------------------------------------------------------------------------
21 # Recover the arguments
22 #-------------------------------------------------------------------------------
23 spec = matrix(c(
24 "folderChi2", "folderChi2", 1, "character",
25 "help", "h", 0, "logical"
26 ),
27 byrow=TRUE, ncol=4
28 )
29
30 opt = getopt(spec)
31
32 # No argument is pass to the command line
33 if(length(opt) == 1)
34 {
35 cat(paste("Usage:\n chi2test_MutSpecStat_Galaxy.r --folderChi2 <path_to_folder> \n",sep=""))
36 q(status=1)
37 }
38
39 # Help was asked for.
40 if ( !is.null(opt$help) )
41 {
42 # print a friendly message and exit with a non-zero error code
43 cat(paste("Usage:\n chi2test_MutSpecStat_Galaxy.r --folderChi2 <path_to_folder> \n",sep=""))
44 q(status=1)
45 }
46
47
48
49
50
51 ## Load the data. There is one column with the mutation type and the sample name but it's just for knowing what is corresponding to each line. The two columns with the number of variant per strand would be sufficient.
52 inputChi2 <- paste0(opt$folderChi2, "/Input_chi2_strandBias.txt")
53 strBias<-read.delim(inputChi2, dec=".")
54
55 # Chi2
56 pValChi2 <- c() # First I create an empty vector and then I apply a for on the data load
57 pValChi2_round <- c() # Empty vector with the rounded p-values
58 confInt <- c() # Empty vector for the confident interval
59 proportion <- c() # Empty vector for the proportion of NonTr compared to the (NonTr+Tr)
60 sampleSize <- c() # Empty vector for the count of samples in NonTr and Tr
61 # For Pool_Data save the p-values in a different vector for not having them for the FDR
62 pValChi2_PoolData <- c()
63 pValChi2_PoolData_Round <- c()
64
65 j = 1 # Timer for pValChi2_PoolData vector
66 k = 1 # Timer for pValChi2
67
68 for(i in 1:nrow(strBias))
69 {
70 if(! sum(strBias[i,2:3]) == 0)
71 {
72 # For Pool_Data
73 if(strBias[i,1] == "Pool_Data")
74 {
75 pValChi2_PoolData[j] <- prop.test(x=strBias[i,2],n=sum(strBias[i,2:3]),p=0.5)$p.value
76 j <- j+1
77 }
78 # For the other sample(s)
79 else
80 {
81 # Calculate the p-value
82 pValChi2[k] <- prop.test(x=strBias[i,2],n=sum(strBias[i,2:3]),p=0.5)$p.value
83 k <- k+1
84 }
85
86 # Calculate the confidence interval
87 temp <- prop.test(x=strBias[i,2],n=sum(strBias[i,2:3]),p=0.5)$conf.int
88 confInt[i] <- paste0("[", round(temp[1],2), "-", round(temp[2],2), "]") # Same as paste(sep="")
89
90 # Save the proportion
91 proportion[i] <- strBias[i,2] / sum(strBias[i,2:3])
92
93 # Save the sample size (count on NonTr and Tr)
94 sampleSize[i] <- paste(strBias[i,2], strBias[i,3], sep="-")
95 } else
96 {
97 if(strBias[i,1] == "Pool_Data")
98 {
99 pValChi2_PoolData[j] <- NA
100 pValChi2_PoolData_Round[j] <- NA
101 j <- j+1
102 }
103 else
104 {
105 # Not enough effective for the test
106 pValChi2[k] <- NA
107 pValChi2_round[k] <- NA
108 k <- k+1
109 }
110
111 confInt[i] <- NA
112 proportion[i] <- NA
113 sampleSize[i] <- NA
114 }
115 }
116
117 # Adjust with FDR
118 FDR<-p.adjust(pValChi2, method="BH")
119
120 # Rount the p-value
121 for(i in 1:nrow(strBias))
122 {
123 pValChi2_round[i] <- format(pValChi2[i], scientific=T, digits=3)
124 }
125
126 # The option for the pool is specified
127 if(!is.null(pValChi2_PoolData))
128 {
129 # Round the p-value for Pool_Data
130 for(i in 1:6)
131 {
132 pValChi2_PoolData_Round[i] <- format(pValChi2_PoolData[i], scientific=T, digits=3)
133 }
134 }
135
136
137 # I create a dataframe for add what I want
138 outputChi2 <- data.frame(round(strBias[,2]/strBias[,3], digits=2), sampleSize, round(proportion, 3), confInt)
139 outputChi2$Mut.type <- strBias$Alteration
140 outputChi2$SampleName <- strBias$SampleName
141 colnames(outputChi2)[1:6]<-c("Strand_Bias", "NonTr-Tr", "Proportion", "Confidence Interval", "Mutation_Type", "SampleName")
142
143 # Transform the data frame into a matrix for adding the p-value for the samples and Pool_Data
144 matrix <- as.matrix(outputChi2)
145 tempColPValFDR <- matrix(, nrow=length(sampleSize), ncol = 2) # Create an empty matrix with 2 columns for adding the p-value and the FDR
146 matrix <- cbind(matrix, tempColPValFDR)
147 j = 1 # Timer for all the sample
148 k = 1 # Timer for Pool_Data
149 for(i in 1:nrow(matrix))
150 {
151 if(matrix[i,6] == "Pool_Data")
152 {
153 matrix[i,7] <- pValChi2_PoolData_Round[k]
154 matrix[i,8] <- "NA" # No FDR for Pool_Data
155 k = k+1
156 }
157 else
158 {
159 matrix[i,7] <- pValChi2_round[j]
160 matrix[i,8] <- round(FDR[j], 3)
161 j = j+1
162 }
163 }
164
165 # Reorder the columns
166 matrix <- cbind(matrix[,1:3], matrix[,7], matrix[,8], matrix[,4:6])
167 colnames(matrix)[4] <- "P-val-Chi2"
168 colnames(matrix)[5] <- "FDR"
169
170 # Export the file
171 # dec=".": Set the separator for the decimal by "."
172 outputFileChi2 <- paste0(opt$folderChi2, "/Output_chi2_strandBias.txt")
173 write.table(matrix,file=outputFileChi2,quote = FALSE,sep="\t",row.names = FALSE,dec=".")