Mercurial > repos > melpetera > intensity_checks
comparison Intchecks/Script_intensity_check.R @ 3:bdee2c2c484b draft
Uploaded
author | melpetera |
---|---|
date | Fri, 08 Mar 2019 09:07:12 -0500 |
parents | 4973a2104cfd |
children | a31f3f802b2b |
comparison
equal
deleted
inserted
replaced
2:a7553caa2572 | 3:bdee2c2c484b |
---|---|
1 ######################################################################### | 1 ######################################################################### |
2 # SCRIPT INTENSITY CHECK # | 2 # SCRIPT INTENSITY CHECK # |
3 # # | 3 # # |
4 # Input: Data Matrix, VariableMetadata, SampleMetadata # | 4 # Input: Data Matrix, VariableMetadata, SampleMetadata # |
5 # Output: VariableMetadata, Graphics (barplots and boxplots) # | 5 # Output: VariableMetadata, Graphics # |
6 # # | 6 # # |
7 # Dependencies: RcheckLibrary.R # | 7 # Dependencies: RcheckLibrary.R # |
8 # # | 8 # # |
9 ######################################################################### | 9 ######################################################################### |
10 | 10 |
11 | 11 |
12 # Parameters (for dev) | 12 # Parameters (for dev) |
13 if(FALSE){ | 13 if(FALSE){ |
14 | 14 |
15 rm(list = ls()) | 15 rm(list = ls()) |
16 setwd("Y:\\Developpement\\Intensity check\\Pour tests") | 16 setwd("Y:\\Developpement\\Intensity check\\Pour tests\\Tests_global") |
17 | 17 |
18 DM.name <- "DM_NA.tabular" | 18 DM.name <- "DM_NA.tabular" |
19 SM.name <- "SM_NA.tabular" | 19 SM.name <- "SM_NA.tabular" |
20 VM.name <- "vM_NA.tabular" | 20 VM.name <- "vM_NA.tabular" |
21 class.col <- "2" | 21 method <- "one_class" |
22 type <- "One_class" | 22 chosen.stat <- "mean,sd,quartile,decile,NA" |
23 class1 <- "Blanks" | 23 class.col <- "2" |
24 test.fold <- "Yes" | |
25 class1 <- "Pools" | |
24 fold.frac <- "Top" | 26 fold.frac <- "Top" |
25 logarithm <- "log2" | 27 logarithm <- "log10" |
26 VM.output <- "new_VM.txt" | 28 VM.output <- "new_VM.txt" |
27 graphs.output <- "Barplots_and_Boxplots.pdf" | 29 graphs.output <- "Barplots_and_Boxplots.pdf" |
28 } | 30 } |
29 | 31 |
30 | 32 |
31 | 33 |
32 | 34 |
33 intens_check <- function(DM.name, SM.name, VM.name, class.col, type, class1, fold.frac, logarithm, | 35 intens_check <- function(DM.name, SM.name, VM.name, method, chosen.stat, class.col, test.fold, class1, fold.frac, |
34 VM.output, graphs.output){ | 36 logarithm, VM.output, graphs.output){ |
35 | 37 |
36 | 38 # This function allows to check the intensities with various statistics, number of missing values and mean fold change |
37 # This function allows to check the intensities considering classes with a mean fold change calculation, | |
38 # the number and the proportion of missing values (NA) in dataMatrix | |
39 # | 39 # |
40 # Two options: | 40 # Three methods proposed: |
41 # - one class (selected by the user) against all the remaining samples ("One_class") | 41 # - global: tests for each variable without distinction between samples |
42 # - tests on each class ("Each_class") | 42 # - one class: one class versus all the remaining samples |
43 # - each class: if the class columns contains at least three classes and you want to test each of them | |
43 # | 44 # |
44 # Parameters: | 45 # Parameters: |
45 # DM.name, SM.name, VM.name: dataMatrix, sampleMetadata, variableMetadata files access | 46 # DM.name, SM.name, VM.name: dataMatrix, sampleMetadata, variableMetadata files access |
46 # class.col: number of the sampleMetadata's column with classes | 47 # method: "global", "one_class", "each_class" |
47 # type: "One_class" or "Each_class" | 48 # chosen.stat: character listing the chosen analysis (comma-separated) |
48 # class1: name of the class, only if type="One_class" | 49 # class.col: number of the sampleMetadata's column with classes (if method = one_class or each_class) |
49 # fold.frac: if type="One class": class1/other ("Top") or other/class1 ("Bottom") | 50 # test.fold: "yes" or "no" (if method = one_class or each_class) |
50 # logarithm: "log2", "log10" or "none" for log mean fold change | 51 # class1: name of the class (if method = one_class) |
52 # fold.frac: "Top" -> class1/other or "Bottom" -> other/class1 (if method = one_class) | |
53 # logarithm: "log2", "log10" or "none" (if method = one_class or each_class) | |
51 # VM.output: output file's access (VM with new columns) | 54 # VM.output: output file's access (VM with new columns) |
52 # graphs.output: pdf file's access with barplots for the proportion of NA and boxplots with the folds values | 55 # graphs.output: pdf file's access with barplots for the proportion of NA and boxplots with the folds values |
53 | 56 |
54 | |
55 | 57 |
56 | 58 |
57 | 59 |
58 # Input --------------------------------------------------------------------------------------------------- | 60 # Input --------------------------------------------------------------------------------------------------- |
59 | 61 |
64 | 66 |
65 | 67 |
66 # Table match check with Rchecklibrary | 68 # Table match check with Rchecklibrary |
67 table.check <- match3(DM, SM, VM) | 69 table.check <- match3(DM, SM, VM) |
68 check.err(table.check) | 70 check.err(table.check) |
69 | 71 |
70 | |
71 | 72 |
72 rownames(DM) <- DM[,1] | 73 rownames(DM) <- DM[,1] |
73 var_names <- DM[,1] | 74 var_names <- DM[,1] |
74 DM <- DM[,-1] | 75 DM <- DM[,-1] |
75 DM <- data.frame(t(DM)) | 76 DM <- data.frame(t(DM)) |
76 | 77 |
77 class.col <- colnames(SM)[as.numeric(class.col)] | 78 stat.list <- strsplit(chosen.stat,",")[[1]] |
78 | 79 |
79 | 80 |
80 # check class.col, class1 and the number of classes --------------------------------------------------------- | 81 # check class.col, class1 and the number of classes --------------------------------------------------------- |
81 | 82 |
82 if(!(class.col %in% colnames(SM))){ | 83 #set 1 class for all samples in case of method = no_class |
83 stop("\n- - - - - - - - -\n", "The column ",class.col, " is not a part of the specify sample Metadata","\n- - - - - - - - -\n") | 84 if(method=="no_class"){ |
84 } | 85 c_class <- rep("global", length=nrow(DM)) |
85 | 86 classnames <- "global" |
86 c_class <- SM[,class.col] | 87 nb_class=1 |
87 c_class <- as.factor(c_class) | 88 test.fold <- "No" |
88 nb_class <- nlevels(c_class) | 89 } |
89 classnames <- levels(c_class) | 90 |
90 | 91 |
91 if(nb_class < 2){ | 92 if(method != "no_class"){ |
92 err.1class <- c("\n The column",class.col, "contains only one class, fold calculation could not be executed \n") | 93 |
93 cat(err.1class) | 94 class.col <- colnames(SM)[as.numeric(class.col)] |
94 } | 95 |
95 | 96 if(!(class.col %in% colnames(SM))){ |
96 if((nb_class > (nrow(SM))/3)){ | 97 stop("\n- - - - - - - - -\n", "The column ",class.col, " is not a part of the specify sample Metadata","\n- - - - - - - - -\n") |
97 class.err <- c("\n There are too many classes, think about reducing the number of classes and excluding those | 98 } |
98 with few samples \n") | 99 |
99 cat(class.err) | 100 c_class <- SM[,class.col] |
100 } | 101 c_class <- as.factor(c_class) |
101 | 102 nb_class <- nlevels(c_class) |
102 | 103 classnames <- levels(c_class) |
103 if(type == "One_class"){ | 104 |
104 if(!(class1 %in% classnames)){ | 105 if((nb_class < 2)&&(test.fold=="Yes")){ |
105 list.class1 <- c("\n Classes:",classnames,"\n") | 106 err.1class <- c("\n The column",class.col, "contains only one class, fold calculation could not be executed \n") |
106 cat(list.class1) | 107 cat(err.1class) |
107 err.class1 <- c("The class ",class1, " does not appear in the column ", class.col) | 108 } |
108 stop("\n- - - - - - - - -\n", err.class1,"\n- - - - - - - - -\n") | 109 |
109 } | 110 if((nb_class > (nrow(SM))/3)&&(method == "each_class")){ |
110 } | 111 class.err <- c("\n There are too many classes, think about reducing the number of classes and excluding those |
111 | 112 with few samples \n") |
112 | 113 cat(class.err) |
113 #If type is "one_class", change others classes in "other" | 114 } |
114 if(type == "One_class"){ | 115 |
115 for(i in 1:length(c_class)){ | 116 |
116 if(c_class[i]!=class1){ | 117 if(method == "one_class"){ |
117 c_class <- as.character(c_class) | 118 if(!(class1 %in% classnames)){ |
118 c_class[i] <- "Other" | 119 list.class1 <- c("\n Classes:",classnames,"\n") |
119 c_class <- as.factor(c_class) | 120 cat(list.class1) |
120 nb_class <- nlevels(c_class) | 121 err.class1 <- c("The class ",class1, " does not appear in the column ", class.col) |
121 classnames <- c(class1,"Other") | 122 stop("\n- - - - - - - - -\n", err.class1,"\n- - - - - - - - -\n") |
123 } | |
124 | |
125 #If method is "one_class", change others classes in "other" | |
126 for(i in 1:length(c_class)){ | |
127 if(c_class[i]!=class1){ | |
128 c_class <- as.character(c_class) | |
129 c_class[i] <- "Other" | |
130 c_class <- as.factor(c_class) | |
131 nb_class <- nlevels(c_class) | |
132 classnames <- c(class1,"Other") | |
133 } | |
134 } | |
135 } | |
136 | |
137 } | |
138 | |
139 | |
140 # Statistics ------------------------------------------------------------------------------------------------ | |
141 | |
142 | |
143 ### Initialization | |
144 | |
145 DM <- cbind(c_class,DM) | |
146 | |
147 stat.res <- t(DM[0,-1,drop=FALSE]) | |
148 names <- NULL | |
149 | |
150 mean.res <- NULL | |
151 mean.names <- NULL | |
152 | |
153 sd.res <- NULL | |
154 sd.names <- NULL | |
155 | |
156 med.res <- NULL | |
157 med.names <- NULL | |
158 | |
159 quart.res <- NULL | |
160 quart.names <- NULL | |
161 | |
162 dec.res <- NULL | |
163 dec.names <- NULL | |
164 | |
165 NA.res <- NULL | |
166 NA.names <- NULL | |
167 pct_NA.res <- NULL | |
168 pct_NA.names <- NULL | |
169 | |
170 fold.res <- NULL | |
171 fold.names <- NULL | |
172 | |
173 if(("NA" %in% stat.list)||(test.fold=="Yes")){ | |
174 graphs <- 1 | |
175 }else{ | |
176 graphs=0 | |
177 } | |
178 | |
179 data_bp <- data.frame() #table for NA barplot | |
180 | |
181 | |
182 | |
183 ### Computation | |
184 | |
185 | |
186 for(j in 1:nb_class){ | |
187 | |
188 # Mean --------- | |
189 | |
190 if("mean" %in% stat.list){ | |
191 mean.res <- cbind(mean.res, colMeans(DM[which(DM$c_class==classnames[j]),-1],na.rm=TRUE)) | |
192 mean.names <- cbind(mean.names, paste("Mean",classnames[j], sep="_")) | |
193 if(j == nb_class){ | |
194 stat.res <- cbind(stat.res, mean.res) | |
195 names <- cbind(names, mean.names) | |
196 } | |
197 } | |
198 | |
199 # Standard deviation ----- | |
200 | |
201 if("sd" %in% stat.list){ | |
202 sd.res <- cbind(sd.res, apply(DM[which(DM$c_class==classnames[j]),-1],2,sd,na.rm=TRUE)) | |
203 sd.names <- cbind(sd.names, paste("Sd",classnames[j], sep="_")) | |
204 if(j == nb_class){ | |
205 stat.res <- cbind(stat.res, sd.res) | |
206 names <- cbind(names, sd.names) | |
207 } | |
208 } | |
209 | |
210 # Median --------- | |
211 | |
212 if(("median" %in% stat.list)&&(!("quartile" %in% stat.list))){ | |
213 med.res <- cbind(med.res, apply(DM[which(DM$c_class==classnames[j]),-1],2,median,na.rm=TRUE)) | |
214 med.names <- cbind(med.names, paste("Median",classnames[j], sep="_")) | |
215 if(j == nb_class){ | |
216 stat.res <- cbind(stat.res, med.res) | |
217 names <- cbind(names, med.names) | |
218 } | |
219 } | |
220 | |
221 # Quartiles ------ | |
222 | |
223 if("quartile" %in% stat.list){ | |
224 quart.res <- cbind(quart.res,t(apply(DM[which(DM$c_class==classnames[j]),-1],2,quantile,na.rm=TRUE))) | |
225 quart.names <- cbind(quart.names, paste("Min",classnames[j], sep="_"),paste("Q1",classnames[j], sep="_"), | |
226 paste("Median",classnames[j],sep="_"),paste("Q3",classnames[j],sep="_"), | |
227 paste("Max",classnames[j],sep="_")) | |
228 if(j == nb_class){ | |
229 stat.res <- cbind(stat.res, quart.res) | |
230 names <- cbind(names, quart.names) | |
231 } | |
232 } | |
233 | |
234 # Deciles ------ | |
235 | |
236 if("decile" %in% stat.list){ | |
237 dec.res <- cbind(dec.res,t(apply(DM[which(DM$c_class==classnames[j]),-1],2,quantile,na.rm=TRUE,seq(0,1,0.1)))) | |
238 dec.names <- cbind(dec.names, t(matrix(paste((paste("D",seq(0,10,1),sep="")),classnames[j],sep="_")))) | |
239 if(j == nb_class){ | |
240 stat.res <- cbind(stat.res, dec.res) | |
241 names <- cbind(names, dec.names) | |
242 } | |
243 } | |
244 | |
245 # Missing values ------------ | |
246 | |
247 if("NA" %in% stat.list){ | |
248 | |
249 nb_NA <- apply(DM[which(DM$c_class==classnames[j]),-1],2,function(x) sum(is.na(x))) | |
250 pct_NA <- round(nb_NA/nrow(DM[which(DM$c_class==classnames[j]),-1])*100,digits=4) | |
251 NA.res <- cbind(NA.res,nb_NA) | |
252 pct_NA.res <- cbind(pct_NA.res,pct_NA) | |
253 NA.names <- cbind(NA.names, paste("NA",classnames[j], sep="_")) | |
254 pct_NA.names <- cbind(pct_NA.names,paste("Pct_NA", classnames[j], sep="_")) | |
255 if(j == nb_class){ | |
256 stat.res <- cbind(stat.res, NA.res,pct_NA.res) | |
257 names <- cbind(names, NA.names,pct_NA.names) | |
258 } | |
122 | 259 |
123 } | 260 #for barplots |
124 } | 261 Nb_NA_0_20 <- 0 |
125 } | 262 Nb_NA_20_40 <- 0 |
126 | 263 Nb_NA_40_60 <- 0 |
127 DM <- cbind(DM,c_class) | 264 Nb_NA_60_80 <- 0 |
128 | 265 Nb_NA_80_100 <- 0 |
129 | 266 |
130 | 267 for (i in 1:length(pct_NA)){ |
131 # fold calculation ------------------------------------------------------------------------------------------- | 268 |
132 | 269 if ((0<=pct_NA[i])&(pct_NA[i]<20)){ |
133 if(nb_class >= 2){ | 270 Nb_NA_0_20=Nb_NA_0_20+1} |
134 | 271 |
135 | 272 if ((20<=pct_NA[i])&(pct_NA[i]<40)){ |
136 fold <- data.frame() | 273 Nb_NA_20_40=Nb_NA_20_40+1} |
137 n <- 1 | 274 |
138 ratio1 <- NULL | 275 if ((40<=pct_NA[i])&(pct_NA[i]<60)){ |
139 ratio2 <- NULL | 276 Nb_NA_40_60=Nb_NA_40_60+1} |
140 | 277 |
141 if(type=="Each_class"){ | 278 if ((60<=pct_NA[i])&(pct_NA[i]<80)){ |
142 fold.frac <- "Top" | 279 Nb_NA_60_80=Nb_NA_60_80+1} |
143 } | 280 |
144 | 281 if ((80<=pct_NA[i])&(pct_NA[i]<=100)){ |
145 for(j in 1:(nb_class-1)){ | 282 Nb_NA_80_100=Nb_NA_80_100+1} |
146 for(k in (j+1):nb_class) { | 283 } |
284 data_bp[1,j] <- Nb_NA_0_20 | |
285 data_bp[2,j] <- Nb_NA_20_40 | |
286 data_bp[3,j] <- Nb_NA_40_60 | |
287 data_bp[4,j] <- Nb_NA_60_80 | |
288 data_bp[5,j] <- Nb_NA_80_100 | |
289 rownames(data_bp) <- c("0%-20%", "20%-40%", "40%-60%", "60%-80%", "80%-100%") | |
290 | |
291 if(j == nb_class){ | |
147 | 292 |
148 if(fold.frac=="Bottom"){ | 293 # Alert message if there is no missing value in data matrix |
149 ratio1 <- classnames[k] | 294 sum_total <- sum(NA.res) |
150 ratio2 <- classnames[j] | 295 alerte <- NULL |
151 }else{ | 296 if(sum_total==0){ |
152 ratio1 <- classnames[j] | 297 alerte <- c(alerte, "Data Matrix contains no NA.\n") |
153 ratio2 <- classnames[k] | 298 } |
299 if(length(alerte) != 0){ | |
300 cat(alerte,"\n") | |
154 } | 301 } |
155 | 302 |
156 for (i in 1:(length(DM)-1)){ | 303 |
157 fold[i,n] <- mean(DM[which(DM$c_class==ratio1),i], na.rm=TRUE)/ | 304 colnames(data_bp) <- classnames |
158 mean(DM[which(DM$c_class==ratio2),i], na.rm=TRUE) | 305 data_bp <- as.matrix(data_bp) |
159 if(logarithm=="log2"){ | 306 } |
160 fold[i,n] <- log2(fold[i,n]) | 307 } |
161 }else if(logarithm=="log10"){ | 308 |
162 fold[i,n] <- log10(fold[i,n]) | 309 |
310 # Mean fold change ------------ | |
311 | |
312 if(test.fold=="Yes"){ | |
313 if(nb_class >= 2){ | |
314 if(j!=nb_class){ | |
315 ratio1 <- NULL | |
316 ratio2 <- NULL | |
317 if(method=="each_class"){ | |
318 fold.frac <- "Top" | |
163 } | 319 } |
320 for(k in (j+1):nb_class) { | |
321 if(fold.frac=="Bottom"){ | |
322 ratio1 <- classnames[k] | |
323 ratio2 <- classnames[j] | |
324 }else{ | |
325 ratio1 <- classnames[j] | |
326 ratio2 <- classnames[k] | |
327 } | |
328 fold <- colMeans(DM[which(DM$c_class==ratio1),-1],na.rm=TRUE)/ | |
329 colMeans(DM[which(DM$c_class==ratio2),-1],na.rm=TRUE) | |
330 if(logarithm=="log2"){ | |
331 fold.res <- cbind(fold.res,log2(fold)) | |
332 }else if(logarithm=="log10"){ | |
333 fold.res <- cbind(fold.res,log10(fold)) | |
334 }else{ | |
335 fold.res <- cbind(fold.res, fold) | |
336 } | |
337 if(logarithm == "none"){ | |
338 fold.names <- cbind(fold.names,paste("fold",ratio1,"VS", ratio2, sep="_")) | |
339 }else{ | |
340 fold.names <- cbind(fold.names,paste(logarithm, "fold", ratio1, "VS", ratio2, sep="_")) | |
341 } | |
342 } | |
343 | |
344 }else{ | |
345 stat.res <- cbind(stat.res,fold.res) | |
346 names <- cbind(names, fold.names) | |
164 } | 347 } |
165 names(fold)[n] <- paste("fold",ratio1,"VS", ratio2, sep="_") | 348 } |
166 if(logarithm != "none"){ | 349 } |
167 names(fold)[n] <- paste(logarithm,names(fold)[n], sep="_") | 350 |
168 } | 351 } |
169 n <- n + 1} | 352 |
170 } | 353 ############ |
171 | 354 |
172 } | 355 |
173 | 356 # check columns names in variableMetadata |
174 # number and proportion of NA --------------------------------------------------------------------------------- | 357 |
175 | |
176 calcul_NA <- data.frame() | |
177 pct_NA <- data.frame() | |
178 for (i in 1:(length(DM)-1)){ | |
179 for (j in 1:nb_class){ | |
180 n <- 0 | |
181 new_DM <- DM[which(DM$c_class==classnames[j]),i] | |
182 for(k in 1:length(new_DM)){ | |
183 if (is.na(new_DM[k])){ | |
184 n <- n + 1} | |
185 calcul_NA[i,j] <- n | |
186 pct_NA[i,j] <- (calcul_NA[i,j]/length(new_DM))*100} | |
187 } | |
188 } | |
189 names(calcul_NA) <- paste("NA",classnames, sep="_") | |
190 names(pct_NA) <- paste("Pct_NA", classnames, sep="_") | |
191 | |
192 # Alert message if there is no NA in data matrix | |
193 | |
194 sumNA <- colSums(calcul_NA) | |
195 sum_total <- sum(sumNA) | |
196 alerte <- NULL | |
197 if(sum_total==0){ | |
198 alerte <- c(alerte, "Data Matrix contains no NA.\n") | |
199 } | |
200 | |
201 if(length(alerte) != 0){ | |
202 cat(alerte,"\n") | |
203 } | |
204 table_NA <- cbind(calcul_NA, pct_NA) | |
205 | |
206 | |
207 | |
208 # check columns names --------------------------------------------------------------------------------------- | |
209 | |
210 | |
211 VM.names <- colnames(VM) | 358 VM.names <- colnames(VM) |
212 | |
213 # Fold | |
214 | |
215 if(nb_class >=2){ | |
216 fold.names <- colnames(fold) | |
217 | |
218 for (i in 1:length(VM.names)){ | |
219 for (j in 1:length(fold.names)){ | |
220 if (VM.names[i]==fold.names[j]){ | |
221 fold.names[j] <- paste(fold.names[j],"2", sep="_") | |
222 } | |
223 } | |
224 } | |
225 colnames(fold) <- fold.names | |
226 | |
227 VM <- cbind(VM,fold) | |
228 } | |
229 | |
230 # NA | |
231 NA.names <- colnames(table_NA) | |
232 | |
233 for (i in 1:length(VM.names)){ | 359 for (i in 1:length(VM.names)){ |
234 for (j in 1:length(NA.names)){ | 360 for (j in 1:length(names)){ |
235 if (VM.names[i]==NA.names[j]){ | 361 if (VM.names[i]==names[j]){ |
236 NA.names[j] <- paste(NA.names[j],"2", sep="_") | 362 names[j] <- paste(names[j], "2", sep="_") |
237 } | 363 } |
238 } | 364 } |
239 } | 365 } |
240 colnames(table_NA) <- NA.names | 366 |
241 VM <- cbind(VM,table_NA) | 367 colnames(stat.res) <- names |
242 | 368 |
243 | 369 |
244 #for NA barplots ------------------------------------------------------------------------------------------- | 370 |
245 | 371 |
246 data_bp <- data.frame() | 372 |
247 | |
248 for (j in 1:ncol(pct_NA)){ | |
249 Nb_NA_0_20 <- 0 | |
250 Nb_NA_20_40 <- 0 | |
251 Nb_NA_40_60 <- 0 | |
252 Nb_NA_60_80 <- 0 | |
253 Nb_NA_80_100 <- 0 | |
254 for (i in 1:nrow(pct_NA)){ | |
255 | |
256 if ((0<=pct_NA[i,j])&(pct_NA[i,j]<20)){ | |
257 Nb_NA_0_20=Nb_NA_0_20+1 | |
258 } | |
259 | |
260 if ((20<=pct_NA[i,j])&(pct_NA[i,j]<40)){ | |
261 Nb_NA_20_40=Nb_NA_20_40+1} | |
262 | |
263 if ((40<=pct_NA[i,j])&(pct_NA[i,j]<60)){ | |
264 Nb_NA_40_60=Nb_NA_40_60+1} | |
265 | |
266 if ((60<=pct_NA[i,j])&(pct_NA[i,j]<80)){ | |
267 Nb_NA_60_80=Nb_NA_60_80+1} | |
268 | |
269 if ((80<=pct_NA[i,j])&(pct_NA[i,j]<=100)){ | |
270 Nb_NA_80_100=Nb_NA_80_100+1} | |
271 } | |
272 data_bp[1,j] <- Nb_NA_0_20 | |
273 data_bp[2,j] <- Nb_NA_20_40 | |
274 data_bp[3,j] <- Nb_NA_40_60 | |
275 data_bp[4,j] <- Nb_NA_60_80 | |
276 data_bp[5,j] <- Nb_NA_80_100 | |
277 } | |
278 rownames(data_bp) <- c("0%-20%", "20%-40%", "40%-60%", "60%-80%", "80%-100%") | |
279 colnames(data_bp) <- classnames | |
280 data_bp <- as.matrix(data_bp) | |
281 | |
282 | 373 |
283 # Output --------------------------------------------------------------------------------------------------- | 374 # Output --------------------------------------------------------------------------------------------------- |
284 | 375 |
376 VM <-cbind(VM,stat.res) | |
285 | 377 |
286 write.table(VM, VM.output,sep="\t", quote=FALSE, row.names=FALSE) | 378 write.table(VM, VM.output,sep="\t", quote=FALSE, row.names=FALSE) |
287 | 379 |
288 #graphics pdf | 380 |
381 ### graphics pdf | |
382 | |
383 if(graphs == 1){ | |
289 | 384 |
290 pdf(graphs.output) | 385 pdf(graphs.output) |
386 | |
291 | 387 |
292 #Barplots for NA | 388 #Barplots for NA |
389 if("NA" %in% stat.list){ | |
390 graph.colors <- c("green3","palegreen3","lightblue","orangered","red") | |
293 par(mar=c(5.1, 4.1, 4.1, 8.1), xpd=TRUE) | 391 par(mar=c(5.1, 4.1, 4.1, 8.1), xpd=TRUE) |
294 | 392 |
295 bp=barplot(data_bp, col=rainbow(nrow(data_bp)), main="Proportion of NA", xlab="Classes", ylab="Variables") | 393 bp=barplot(data_bp, col=graph.colors, main="Proportion of NA", xlab="Classes", ylab="Variables") |
296 legend("topright", fill=rainbow(nrow(data_bp)),rownames(data_bp), inset=c(-0.3,0)) | 394 legend("topright", fill=graph.colors,rownames(data_bp), inset=c(-0.3,0)) |
297 | 395 |
298 stock=0 | 396 stock=0 |
299 for (i in 1:nrow(data_bp)){ | 397 for (i in 1:nrow(data_bp)){ |
300 text(bp, stock+data_bp[i,]/2, data_bp[i,], col="white", cex=0.7) | 398 text(bp, stock+data_bp[i,]/2, data_bp[i,], col="white", cex=0.7) |
301 stock <- stock+data_bp[i,] | 399 stock <- stock+data_bp[i,] |
302 } | 400 } |
303 | 401 |
304 | 402 } |
305 #Boxplots for fold test | 403 |
306 | 404 # Boxplots for fold test |
307 if(nb_class >= 2){ | 405 |
308 | 406 if((test.fold=="Yes")&&(nb_class >= 2)){ |
309 clean_fold <- fold | 407 |
408 clean_fold <- fold.res | |
310 for(i in 1:nrow(clean_fold)){ | 409 for(i in 1:nrow(clean_fold)){ |
311 for(j in 1:ncol(clean_fold)){ | 410 for(j in 1:ncol(clean_fold)){ |
312 if(is.infinite(clean_fold[i,j])){ | 411 if(is.infinite(clean_fold[i,j])){ |
313 clean_fold[i,j] <- NA | 412 clean_fold[i,j] <- NA |
314 } | 413 } |
315 } | 414 } |
316 } | 415 } |
317 for (j in 1:ncol(clean_fold)){ | 416 for (j in 1:ncol(clean_fold)){ |
318 title <- paste(fold.names[j]) | 417 title <- paste(fold.names[j]) |
319 boxplot(clean_fold[j], main=title) | 418 boxplot(clean_fold[,j], main=title) |
320 } | 419 } |
321 } | 420 } |
322 | 421 |
323 dev.off() | 422 dev.off() |
324 | 423 |
325 } | 424 }else{ |
326 | 425 pdf(graphs.output) |
327 | 426 plot.new() |
427 legend("center","You did not select any option with graphical output.") | |
428 dev.off() | |
429 } | |
430 | |
431 } | |
432 | |
433 | |
434 | |
435 | |
436 | |
437 |