annotate Intchecks/Script_intensity_check.R @ 1:4973a2104cfd draft

Uploaded
author melpetera
date Wed, 05 Dec 2018 10:27:45 -0500
parents c2c2e1be904a
children bdee2c2c484b
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
1 #########################################################################
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
2 # SCRIPT INTENSITY CHECK #
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
3 # #
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
4 # Input: Data Matrix, VariableMetadata, SampleMetadata #
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
5 # Output: VariableMetadata, Graphics (barplots and boxplots) #
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
6 # #
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
7 # Dependencies: RcheckLibrary.R #
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
8 # #
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
9 #########################################################################
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
10
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
11
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
12 # Parameters (for dev)
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
13 if(FALSE){
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
14
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
15 rm(list = ls())
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
16 setwd("Y:\\Developpement\\Intensity check\\Pour tests")
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
17
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
18 DM.name <- "DM_NA.tabular"
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
19 SM.name <- "SM_NA.tabular"
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
20 VM.name <- "vM_NA.tabular"
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
21 class.col <- "2"
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
22 type <- "One_class"
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
23 class1 <- "Blanks"
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
24 fold.frac <- "Top"
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
25 logarithm <- "log2"
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
26 VM.output <- "new_VM.txt"
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
27 graphs.output <- "Barplots_and_Boxplots.pdf"
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
28 }
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
29
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
30
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
31
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
32
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
33 intens_check <- function(DM.name, SM.name, VM.name, class.col, type, class1, fold.frac, logarithm,
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
34 VM.output, graphs.output){
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
35
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
36
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
37 # This function allows to check the intensities considering classes with a mean fold change calculation,
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
38 # the number and the proportion of missing values (NA) in dataMatrix
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
39 #
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
40 # Two options:
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
41 # - one class (selected by the user) against all the remaining samples ("One_class")
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
42 # - tests on each class ("Each_class")
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
43 #
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
44 # Parameters:
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
45 # DM.name, SM.name, VM.name: dataMatrix, sampleMetadata, variableMetadata files access
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
46 # class.col: number of the sampleMetadata's column with classes
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
47 # type: "One_class" or "Each_class"
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
48 # class1: name of the class, only if type="One_class"
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
49 # fold.frac: if type="One class": class1/other ("Top") or other/class1 ("Bottom")
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
50 # logarithm: "log2", "log10" or "none" for log mean fold change
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
51 # VM.output: output file's access (VM with new columns)
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
52 # graphs.output: pdf file's access with barplots for the proportion of NA and boxplots with the folds values
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
53
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
54
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
55
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
56
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
57
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
58 # Input ---------------------------------------------------------------------------------------------------
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
59
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
60 DM <- read.table(DM.name, header=TRUE, sep="\t", check.names=FALSE)
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
61 SM <- read.table(SM.name, header=TRUE, sep="\t", check.names=FALSE)
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
62 VM <- read.table(VM.name, header=TRUE, sep="\t", check.names=FALSE)
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
63
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
64
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
65
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
66 # Table match check with Rchecklibrary
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
67 table.check <- match3(DM, SM, VM)
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
68 check.err(table.check)
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
69
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
70
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
71
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
72 rownames(DM) <- DM[,1]
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
73 var_names <- DM[,1]
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
74 DM <- DM[,-1]
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
75 DM <- data.frame(t(DM))
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
76
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
77 class.col <- colnames(SM)[as.numeric(class.col)]
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
78
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
79
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
80 # check class.col, class1 and the number of classes ---------------------------------------------------------
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
81
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
82 if(!(class.col %in% colnames(SM))){
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
83 stop("\n- - - - - - - - -\n", "The column ",class.col, " is not a part of the specify sample Metadata","\n- - - - - - - - -\n")
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
84 }
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
85
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
86 c_class <- SM[,class.col]
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
87 c_class <- as.factor(c_class)
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
88 nb_class <- nlevels(c_class)
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
89 classnames <- levels(c_class)
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
90
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
91 if(nb_class < 2){
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
92 err.1class <- c("\n The column",class.col, "contains only one class, fold calculation could not be executed \n")
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
93 cat(err.1class)
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
94 }
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
95
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
96 if((nb_class > (nrow(SM))/3)){
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
97 class.err <- c("\n There are too many classes, think about reducing the number of classes and excluding those
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
98 with few samples \n")
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
99 cat(class.err)
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
100 }
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
101
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
102
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
103 if(type == "One_class"){
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
104 if(!(class1 %in% classnames)){
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
105 list.class1 <- c("\n Classes:",classnames,"\n")
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
106 cat(list.class1)
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
107 err.class1 <- c("The class ",class1, " does not appear in the column ", class.col)
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
108 stop("\n- - - - - - - - -\n", err.class1,"\n- - - - - - - - -\n")
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
109 }
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
110 }
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
111
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
112
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
113 #If type is "one_class", change others classes in "other"
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
114 if(type == "One_class"){
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
115 for(i in 1:length(c_class)){
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
116 if(c_class[i]!=class1){
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
117 c_class <- as.character(c_class)
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
118 c_class[i] <- "Other"
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
119 c_class <- as.factor(c_class)
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
120 nb_class <- nlevels(c_class)
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
121 classnames <- c(class1,"Other")
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
122
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
123 }
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
124 }
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
125 }
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
126
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
127 DM <- cbind(DM,c_class)
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
128
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
129
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
130
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
131 # fold calculation -------------------------------------------------------------------------------------------
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
132
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
133 if(nb_class >= 2){
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
134
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
135
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
136 fold <- data.frame()
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
137 n <- 1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
138 ratio1 <- NULL
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
139 ratio2 <- NULL
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
140
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
141 if(type=="Each_class"){
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
142 fold.frac <- "Top"
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
143 }
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
144
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
145 for(j in 1:(nb_class-1)){
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
146 for(k in (j+1):nb_class) {
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
147
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
148 if(fold.frac=="Bottom"){
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
149 ratio1 <- classnames[k]
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
150 ratio2 <- classnames[j]
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
151 }else{
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
152 ratio1 <- classnames[j]
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
153 ratio2 <- classnames[k]
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
154 }
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
155
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
156 for (i in 1:(length(DM)-1)){
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
157 fold[i,n] <- mean(DM[which(DM$c_class==ratio1),i], na.rm=TRUE)/
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
158 mean(DM[which(DM$c_class==ratio2),i], na.rm=TRUE)
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
159 if(logarithm=="log2"){
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
160 fold[i,n] <- log2(fold[i,n])
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
161 }else if(logarithm=="log10"){
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
162 fold[i,n] <- log10(fold[i,n])
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
163 }
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
164 }
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
165 names(fold)[n] <- paste("fold",ratio1,"VS", ratio2, sep="_")
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
166 if(logarithm != "none"){
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
167 names(fold)[n] <- paste(logarithm,names(fold)[n], sep="_")
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
168 }
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
169 n <- n + 1}
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
170 }
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
171
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
172 }
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
173
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
174 # number and proportion of NA ---------------------------------------------------------------------------------
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
175
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
176 calcul_NA <- data.frame()
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
177 pct_NA <- data.frame()
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
178 for (i in 1:(length(DM)-1)){
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
179 for (j in 1:nb_class){
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
180 n <- 0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
181 new_DM <- DM[which(DM$c_class==classnames[j]),i]
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
182 for(k in 1:length(new_DM)){
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
183 if (is.na(new_DM[k])){
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
184 n <- n + 1}
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
185 calcul_NA[i,j] <- n
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
186 pct_NA[i,j] <- (calcul_NA[i,j]/length(new_DM))*100}
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
187 }
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
188 }
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
189 names(calcul_NA) <- paste("NA",classnames, sep="_")
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
190 names(pct_NA) <- paste("Pct_NA", classnames, sep="_")
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
191
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
192 # Alert message if there is no NA in data matrix
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
193
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
194 sumNA <- colSums(calcul_NA)
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
195 sum_total <- sum(sumNA)
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
196 alerte <- NULL
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
197 if(sum_total==0){
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
198 alerte <- c(alerte, "Data Matrix contains no NA.\n")
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
199 }
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
200
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
201 if(length(alerte) != 0){
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
202 cat(alerte,"\n")
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
203 }
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
204 table_NA <- cbind(calcul_NA, pct_NA)
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
205
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
206
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
207
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
208 # check columns names ---------------------------------------------------------------------------------------
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
209
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
210
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
211 VM.names <- colnames(VM)
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
212
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
213 # Fold
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
214
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
215 if(nb_class >=2){
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
216 fold.names <- colnames(fold)
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
217
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
218 for (i in 1:length(VM.names)){
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
219 for (j in 1:length(fold.names)){
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
220 if (VM.names[i]==fold.names[j]){
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
221 fold.names[j] <- paste(fold.names[j],"2", sep="_")
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
222 }
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
223 }
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
224 }
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
225 colnames(fold) <- fold.names
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
226
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
227 VM <- cbind(VM,fold)
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
228 }
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
229
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
230 # NA
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
231 NA.names <- colnames(table_NA)
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
232
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
233 for (i in 1:length(VM.names)){
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
234 for (j in 1:length(NA.names)){
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
235 if (VM.names[i]==NA.names[j]){
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
236 NA.names[j] <- paste(NA.names[j],"2", sep="_")
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
237 }
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
238 }
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
239 }
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
240 colnames(table_NA) <- NA.names
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
241 VM <- cbind(VM,table_NA)
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
242
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
243
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
244 #for NA barplots -------------------------------------------------------------------------------------------
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
245
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
246 data_bp <- data.frame()
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
247
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
248 for (j in 1:ncol(pct_NA)){
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
249 Nb_NA_0_20 <- 0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
250 Nb_NA_20_40 <- 0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
251 Nb_NA_40_60 <- 0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
252 Nb_NA_60_80 <- 0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
253 Nb_NA_80_100 <- 0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
254 for (i in 1:nrow(pct_NA)){
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
255
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
256 if ((0<=pct_NA[i,j])&(pct_NA[i,j]<20)){
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
257 Nb_NA_0_20=Nb_NA_0_20+1
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
258 }
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
259
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
260 if ((20<=pct_NA[i,j])&(pct_NA[i,j]<40)){
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
261 Nb_NA_20_40=Nb_NA_20_40+1}
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
262
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
263 if ((40<=pct_NA[i,j])&(pct_NA[i,j]<60)){
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
264 Nb_NA_40_60=Nb_NA_40_60+1}
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
265
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
266 if ((60<=pct_NA[i,j])&(pct_NA[i,j]<80)){
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
267 Nb_NA_60_80=Nb_NA_60_80+1}
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
268
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
269 if ((80<=pct_NA[i,j])&(pct_NA[i,j]<=100)){
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
270 Nb_NA_80_100=Nb_NA_80_100+1}
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
271 }
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
272 data_bp[1,j] <- Nb_NA_0_20
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
273 data_bp[2,j] <- Nb_NA_20_40
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
274 data_bp[3,j] <- Nb_NA_40_60
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
275 data_bp[4,j] <- Nb_NA_60_80
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
276 data_bp[5,j] <- Nb_NA_80_100
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
277 }
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
278 rownames(data_bp) <- c("0%-20%", "20%-40%", "40%-60%", "60%-80%", "80%-100%")
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
279 colnames(data_bp) <- classnames
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
280 data_bp <- as.matrix(data_bp)
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
281
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
282
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
283 # Output ---------------------------------------------------------------------------------------------------
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
284
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
285
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
286 write.table(VM, VM.output,sep="\t", quote=FALSE, row.names=FALSE)
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
287
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
288 #graphics pdf
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
289
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
290 pdf(graphs.output)
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
291
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
292 #Barplots for NA
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
293 par(mar=c(5.1, 4.1, 4.1, 8.1), xpd=TRUE)
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
294
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
295 bp=barplot(data_bp, col=rainbow(nrow(data_bp)), main="Proportion of NA", xlab="Classes", ylab="Variables")
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
296 legend("topright", fill=rainbow(nrow(data_bp)),rownames(data_bp), inset=c(-0.3,0))
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
297
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
298 stock=0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
299 for (i in 1:nrow(data_bp)){
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
300 text(bp, stock+data_bp[i,]/2, data_bp[i,], col="white", cex=0.7)
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
301 stock <- stock+data_bp[i,]
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
302 }
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
303
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
304
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
305 #Boxplots for fold test
1
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
306
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
307 if(nb_class >= 2){
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
308
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
309 clean_fold <- fold
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
310 for(i in 1:nrow(clean_fold)){
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
311 for(j in 1:ncol(clean_fold)){
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
312 if(is.infinite(clean_fold[i,j])){
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
313 clean_fold[i,j] <- NA
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
314 }
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
315 }
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
316 }
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
317 for (j in 1:ncol(clean_fold)){
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
318 title <- paste(fold.names[j])
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
319 boxplot(clean_fold[j], main=title)
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
320 }
4973a2104cfd Uploaded
melpetera
parents: 0
diff changeset
321 }
0
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
322
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
323 dev.off()
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
324
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
325 }
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
326
c2c2e1be904a Uploaded
melpetera
parents:
diff changeset
327