annotate Intchecks/Script_intensity_check.R @ 3:bdee2c2c484b draft

Uploaded
author melpetera
date Fri, 08 Mar 2019 09:07:12 -0500
parents 4973a2104cfd
children a31f3f802b2b
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
1 #########################################################################
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
2 # SCRIPT INTENSITY CHECK #
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
3 # #
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
4 # Input: Data Matrix, VariableMetadata, SampleMetadata #
3
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
5 # Output: VariableMetadata, Graphics #
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
6 # #
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
7 # Dependencies: RcheckLibrary.R #
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
8 # #
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
9 #########################################################################
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
10
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
11
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
12 # Parameters (for dev)
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
13 if(FALSE){
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
14
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
15 rm(list = ls())
3
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
16 setwd("Y:\\Developpement\\Intensity check\\Pour tests\\Tests_global")
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
17
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
18 DM.name <- "DM_NA.tabular"
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
19 SM.name <- "SM_NA.tabular"
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
20 VM.name <- "vM_NA.tabular"
3
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
21 method <- "one_class"
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
22 chosen.stat <- "mean,sd,quartile,decile,NA"
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
23 class.col <- "2"
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
24 test.fold <- "Yes"
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
25 class1 <- "Pools"
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
26 fold.frac <- "Top"
3
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
27 logarithm <- "log10"
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
28 VM.output <- "new_VM.txt"
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
29 graphs.output <- "Barplots_and_Boxplots.pdf"
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
30 }
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
31
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
32
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
33
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
34
3
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
35 intens_check <- function(DM.name, SM.name, VM.name, method, chosen.stat, class.col, test.fold, class1, fold.frac,
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
36 logarithm, VM.output, graphs.output){
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
37
3
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
38 # This function allows to check the intensities with various statistics, number of missing values and mean fold change
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
39 #
3
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
40 # Three methods proposed:
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
41 # - global: tests for each variable without distinction between samples
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
42 # - one class: one class versus all the remaining samples
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
43 # - each class: if the class columns contains at least three classes and you want to test each of them
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
44 #
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
45 # Parameters:
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
46 # DM.name, SM.name, VM.name: dataMatrix, sampleMetadata, variableMetadata files access
3
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
47 # method: "global", "one_class", "each_class"
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
48 # chosen.stat: character listing the chosen analysis (comma-separated)
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
49 # class.col: number of the sampleMetadata's column with classes (if method = one_class or each_class)
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
50 # test.fold: "yes" or "no" (if method = one_class or each_class)
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
51 # class1: name of the class (if method = one_class)
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
52 # fold.frac: "Top" -> class1/other or "Bottom" -> other/class1 (if method = one_class)
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
53 # logarithm: "log2", "log10" or "none" (if method = one_class or each_class)
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
54 # VM.output: output file's access (VM with new columns)
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
55 # graphs.output: pdf file's access with barplots for the proportion of NA and boxplots with the folds values
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
56
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
57
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
58
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
59
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
60 # Input ---------------------------------------------------------------------------------------------------
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
61
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
62 DM <- read.table(DM.name, header=TRUE, sep="\t", check.names=FALSE)
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
63 SM <- read.table(SM.name, header=TRUE, sep="\t", check.names=FALSE)
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
64 VM <- read.table(VM.name, header=TRUE, sep="\t", check.names=FALSE)
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
65
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
66
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
67
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
68 # Table match check with Rchecklibrary
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
69 table.check <- match3(DM, SM, VM)
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
70 check.err(table.check)
3
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
71
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
72
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
73 rownames(DM) <- DM[,1]
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
74 var_names <- DM[,1]
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
75 DM <- DM[,-1]
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
76 DM <- data.frame(t(DM))
3
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
77
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
78 stat.list <- strsplit(chosen.stat,",")[[1]]
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
79
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
80
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
81 # check class.col, class1 and the number of classes ---------------------------------------------------------
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
82
3
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
83 #set 1 class for all samples in case of method = no_class
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
84 if(method=="no_class"){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
85 c_class <- rep("global", length=nrow(DM))
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
86 classnames <- "global"
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
87 nb_class=1
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
88 test.fold <- "No"
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
89 }
3
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
90
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
91
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
92 if(method != "no_class"){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
93
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
94 class.col <- colnames(SM)[as.numeric(class.col)]
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
95
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
96 if(!(class.col %in% colnames(SM))){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
97 stop("\n- - - - - - - - -\n", "The column ",class.col, " is not a part of the specify sample Metadata","\n- - - - - - - - -\n")
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
98 }
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
99
3
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
100 c_class <- SM[,class.col]
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
101 c_class <- as.factor(c_class)
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
102 nb_class <- nlevels(c_class)
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
103 classnames <- levels(c_class)
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
104
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
105 if((nb_class < 2)&&(test.fold=="Yes")){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
106 err.1class <- c("\n The column",class.col, "contains only one class, fold calculation could not be executed \n")
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
107 cat(err.1class)
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
108 }
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
109
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
110 if((nb_class > (nrow(SM))/3)&&(method == "each_class")){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
111 class.err <- c("\n There are too many classes, think about reducing the number of classes and excluding those
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
112 with few samples \n")
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
113 cat(class.err)
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
114 }
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
115
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
116
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
117 if(method == "one_class"){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
118 if(!(class1 %in% classnames)){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
119 list.class1 <- c("\n Classes:",classnames,"\n")
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
120 cat(list.class1)
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
121 err.class1 <- c("The class ",class1, " does not appear in the column ", class.col)
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
122 stop("\n- - - - - - - - -\n", err.class1,"\n- - - - - - - - -\n")
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
123 }
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
124
3
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
125 #If method is "one_class", change others classes in "other"
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
126 for(i in 1:length(c_class)){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
127 if(c_class[i]!=class1){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
128 c_class <- as.character(c_class)
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
129 c_class[i] <- "Other"
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
130 c_class <- as.factor(c_class)
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
131 nb_class <- nlevels(c_class)
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
132 classnames <- c(class1,"Other")
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
133 }
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
134 }
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
135 }
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
136
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
137 }
3
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
138
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
139
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
140 # Statistics ------------------------------------------------------------------------------------------------
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
141
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
142
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
143 ### Initialization
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
144
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
145 DM <- cbind(c_class,DM)
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
146
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
147 stat.res <- t(DM[0,-1,drop=FALSE])
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
148 names <- NULL
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
149
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
150 mean.res <- NULL
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
151 mean.names <- NULL
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
152
3
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
153 sd.res <- NULL
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
154 sd.names <- NULL
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
155
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
156 med.res <- NULL
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
157 med.names <- NULL
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
158
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
159 quart.res <- NULL
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
160 quart.names <- NULL
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
161
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
162 dec.res <- NULL
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
163 dec.names <- NULL
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
164
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
165 NA.res <- NULL
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
166 NA.names <- NULL
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
167 pct_NA.res <- NULL
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
168 pct_NA.names <- NULL
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
169
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
170 fold.res <- NULL
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
171 fold.names <- NULL
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
172
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
173 if(("NA" %in% stat.list)||(test.fold=="Yes")){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
174 graphs <- 1
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
175 }else{
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
176 graphs=0
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
177 }
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
178
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
179 data_bp <- data.frame() #table for NA barplot
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
180
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
181
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
182
3
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
183 ### Computation
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
184
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
185
3
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
186 for(j in 1:nb_class){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
187
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
188 # Mean ---------
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
189
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
190 if("mean" %in% stat.list){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
191 mean.res <- cbind(mean.res, colMeans(DM[which(DM$c_class==classnames[j]),-1],na.rm=TRUE))
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
192 mean.names <- cbind(mean.names, paste("Mean",classnames[j], sep="_"))
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
193 if(j == nb_class){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
194 stat.res <- cbind(stat.res, mean.res)
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
195 names <- cbind(names, mean.names)
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
196 }
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
197 }
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
198
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
199 # Standard deviation -----
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
200
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
201 if("sd" %in% stat.list){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
202 sd.res <- cbind(sd.res, apply(DM[which(DM$c_class==classnames[j]),-1],2,sd,na.rm=TRUE))
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
203 sd.names <- cbind(sd.names, paste("Sd",classnames[j], sep="_"))
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
204 if(j == nb_class){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
205 stat.res <- cbind(stat.res, sd.res)
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
206 names <- cbind(names, sd.names)
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
207 }
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
208 }
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
209
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
210 # Median ---------
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
211
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
212 if(("median" %in% stat.list)&&(!("quartile" %in% stat.list))){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
213 med.res <- cbind(med.res, apply(DM[which(DM$c_class==classnames[j]),-1],2,median,na.rm=TRUE))
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
214 med.names <- cbind(med.names, paste("Median",classnames[j], sep="_"))
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
215 if(j == nb_class){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
216 stat.res <- cbind(stat.res, med.res)
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
217 names <- cbind(names, med.names)
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
218 }
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
219 }
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
220
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
221 # Quartiles ------
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
222
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
223 if("quartile" %in% stat.list){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
224 quart.res <- cbind(quart.res,t(apply(DM[which(DM$c_class==classnames[j]),-1],2,quantile,na.rm=TRUE)))
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
225 quart.names <- cbind(quart.names, paste("Min",classnames[j], sep="_"),paste("Q1",classnames[j], sep="_"),
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
226 paste("Median",classnames[j],sep="_"),paste("Q3",classnames[j],sep="_"),
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
227 paste("Max",classnames[j],sep="_"))
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
228 if(j == nb_class){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
229 stat.res <- cbind(stat.res, quart.res)
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
230 names <- cbind(names, quart.names)
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
231 }
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
232 }
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
233
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
234 # Deciles ------
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
235
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
236 if("decile" %in% stat.list){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
237 dec.res <- cbind(dec.res,t(apply(DM[which(DM$c_class==classnames[j]),-1],2,quantile,na.rm=TRUE,seq(0,1,0.1))))
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
238 dec.names <- cbind(dec.names, t(matrix(paste((paste("D",seq(0,10,1),sep="")),classnames[j],sep="_"))))
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
239 if(j == nb_class){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
240 stat.res <- cbind(stat.res, dec.res)
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
241 names <- cbind(names, dec.names)
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
242 }
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
243 }
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
244
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
245 # Missing values ------------
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
246
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
247 if("NA" %in% stat.list){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
248
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
249 nb_NA <- apply(DM[which(DM$c_class==classnames[j]),-1],2,function(x) sum(is.na(x)))
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
250 pct_NA <- round(nb_NA/nrow(DM[which(DM$c_class==classnames[j]),-1])*100,digits=4)
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
251 NA.res <- cbind(NA.res,nb_NA)
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
252 pct_NA.res <- cbind(pct_NA.res,pct_NA)
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
253 NA.names <- cbind(NA.names, paste("NA",classnames[j], sep="_"))
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
254 pct_NA.names <- cbind(pct_NA.names,paste("Pct_NA", classnames[j], sep="_"))
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
255 if(j == nb_class){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
256 stat.res <- cbind(stat.res, NA.res,pct_NA.res)
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
257 names <- cbind(names, NA.names,pct_NA.names)
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
258 }
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
259
3
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
260 #for barplots
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
261 Nb_NA_0_20 <- 0
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
262 Nb_NA_20_40 <- 0
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
263 Nb_NA_40_60 <- 0
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
264 Nb_NA_60_80 <- 0
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
265 Nb_NA_80_100 <- 0
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
266
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
267 for (i in 1:length(pct_NA)){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
268
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
269 if ((0<=pct_NA[i])&(pct_NA[i]<20)){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
270 Nb_NA_0_20=Nb_NA_0_20+1}
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
271
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
272 if ((20<=pct_NA[i])&(pct_NA[i]<40)){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
273 Nb_NA_20_40=Nb_NA_20_40+1}
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
274
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
275 if ((40<=pct_NA[i])&(pct_NA[i]<60)){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
276 Nb_NA_40_60=Nb_NA_40_60+1}
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
277
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
278 if ((60<=pct_NA[i])&(pct_NA[i]<80)){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
279 Nb_NA_60_80=Nb_NA_60_80+1}
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
280
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
281 if ((80<=pct_NA[i])&(pct_NA[i]<=100)){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
282 Nb_NA_80_100=Nb_NA_80_100+1}
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
283 }
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
284 data_bp[1,j] <- Nb_NA_0_20
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
285 data_bp[2,j] <- Nb_NA_20_40
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
286 data_bp[3,j] <- Nb_NA_40_60
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
287 data_bp[4,j] <- Nb_NA_60_80
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
288 data_bp[5,j] <- Nb_NA_80_100
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
289 rownames(data_bp) <- c("0%-20%", "20%-40%", "40%-60%", "60%-80%", "80%-100%")
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
290
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
291 if(j == nb_class){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
292
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
293 # Alert message if there is no missing value in data matrix
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
294 sum_total <- sum(NA.res)
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
295 alerte <- NULL
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
296 if(sum_total==0){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
297 alerte <- c(alerte, "Data Matrix contains no NA.\n")
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
298 }
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
299 if(length(alerte) != 0){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
300 cat(alerte,"\n")
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
301 }
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
302
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
303
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
304 colnames(data_bp) <- classnames
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
305 data_bp <- as.matrix(data_bp)
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
306 }
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
307 }
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
308
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
309
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
310 # Mean fold change ------------
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
311
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
312 if(test.fold=="Yes"){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
313 if(nb_class >= 2){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
314 if(j!=nb_class){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
315 ratio1 <- NULL
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
316 ratio2 <- NULL
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
317 if(method=="each_class"){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
318 fold.frac <- "Top"
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
319 }
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
320 for(k in (j+1):nb_class) {
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
321 if(fold.frac=="Bottom"){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
322 ratio1 <- classnames[k]
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
323 ratio2 <- classnames[j]
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
324 }else{
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
325 ratio1 <- classnames[j]
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
326 ratio2 <- classnames[k]
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
327 }
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
328 fold <- colMeans(DM[which(DM$c_class==ratio1),-1],na.rm=TRUE)/
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
329 colMeans(DM[which(DM$c_class==ratio2),-1],na.rm=TRUE)
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
330 if(logarithm=="log2"){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
331 fold.res <- cbind(fold.res,log2(fold))
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
332 }else if(logarithm=="log10"){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
333 fold.res <- cbind(fold.res,log10(fold))
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
334 }else{
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
335 fold.res <- cbind(fold.res, fold)
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
336 }
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
337 if(logarithm == "none"){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
338 fold.names <- cbind(fold.names,paste("fold",ratio1,"VS", ratio2, sep="_"))
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
339 }else{
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
340 fold.names <- cbind(fold.names,paste(logarithm, "fold", ratio1, "VS", ratio2, sep="_"))
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
341 }
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
342 }
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
343
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
344 }else{
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
345 stat.res <- cbind(stat.res,fold.res)
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
346 names <- cbind(names, fold.names)
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
347 }
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
348 }
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
349 }
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
350
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
351 }
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
352
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
353 ############
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
354
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
355
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
356 # check columns names in variableMetadata
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
357
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
358 VM.names <- colnames(VM)
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
359 for (i in 1:length(VM.names)){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
360 for (j in 1:length(names)){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
361 if (VM.names[i]==names[j]){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
362 names[j] <- paste(names[j], "2", sep="_")
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
363 }
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
364 }
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
365 }
3
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
366
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
367 colnames(stat.res) <- names
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
368
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
369
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
370
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
371
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
372
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
373
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
374 # Output ---------------------------------------------------------------------------------------------------
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
375
3
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
376 VM <-cbind(VM,stat.res)
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
377
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
378 write.table(VM, VM.output,sep="\t", quote=FALSE, row.names=FALSE)
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
379
3
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
380
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
381 ### graphics pdf
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
382
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
383 if(graphs == 1){
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
384
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
385 pdf(graphs.output)
3
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
386
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
387
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
388 #Barplots for NA
3
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
389 if("NA" %in% stat.list){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
390 graph.colors <- c("green3","palegreen3","lightblue","orangered","red")
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
391 par(mar=c(5.1, 4.1, 4.1, 8.1), xpd=TRUE)
3
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
392
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
393 bp=barplot(data_bp, col=graph.colors, main="Proportion of NA", xlab="Classes", ylab="Variables")
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
394 legend("topright", fill=graph.colors,rownames(data_bp), inset=c(-0.3,0))
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
395
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
396 stock=0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
397 for (i in 1:nrow(data_bp)){
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
398 text(bp, stock+data_bp[i,]/2, data_bp[i,], col="white", cex=0.7)
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
399 stock <- stock+data_bp[i,]
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
400 }
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
401
3
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
402 }
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
403
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
404 # Boxplots for fold test
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
405
3
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
406 if((test.fold=="Yes")&&(nb_class >= 2)){
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
407
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
408 clean_fold <- fold.res
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
409 for(i in 1:nrow(clean_fold)){
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
410 for(j in 1:ncol(clean_fold)){
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
411 if(is.infinite(clean_fold[i,j])){
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
412 clean_fold[i,j] <- NA
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
413 }
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
414 }
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
415 }
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
416 for (j in 1:ncol(clean_fold)){
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
417 title <- paste(fold.names[j])
3
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
418 boxplot(clean_fold[,j], main=title)
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
419 }
3
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
420 }
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
421
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
422 dev.off()
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
423
3
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
424 }else{
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
425 pdf(graphs.output)
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
426 plot.new()
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
427 legend("center","You did not select any option with graphical output.")
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
428 dev.off()
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
429 }
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
430
3
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
431 }
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
432
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
433
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
434
bdee2c2c484b Uploaded
melpetera
parents: 1
diff changeset
435
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
436
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
437