# HG changeset patch # User melpetera # Date 1552054032 18000 # Node ID bdee2c2c484b892d3af8469cb14633b216bd614e # Parent a7553caa25729f6c30e2575f5881d83fb2e576cd Uploaded diff -r a7553caa2572 -r bdee2c2c484b Intchecks/Script_intensity_check.R --- a/Intchecks/Script_intensity_check.R Mon Jan 14 08:17:26 2019 -0500 +++ b/Intchecks/Script_intensity_check.R Fri Mar 08 09:07:12 2019 -0500 @@ -2,7 +2,7 @@ # SCRIPT INTENSITY CHECK # # # # Input: Data Matrix, VariableMetadata, SampleMetadata # -# Output: VariableMetadata, Graphics (barplots and boxplots) # +# Output: VariableMetadata, Graphics # # # # Dependencies: RcheckLibrary.R # # # @@ -13,16 +13,18 @@ if(FALSE){ rm(list = ls()) - setwd("Y:\\Developpement\\Intensity check\\Pour tests") + setwd("Y:\\Developpement\\Intensity check\\Pour tests\\Tests_global") DM.name <- "DM_NA.tabular" SM.name <- "SM_NA.tabular" VM.name <- "vM_NA.tabular" - class.col <- "2" - type <- "One_class" - class1 <- "Blanks" + method <- "one_class" + chosen.stat <- "mean,sd,quartile,decile,NA" + class.col <- "2" + test.fold <- "Yes" + class1 <- "Pools" fold.frac <- "Top" - logarithm <- "log2" + logarithm <- "log10" VM.output <- "new_VM.txt" graphs.output <- "Barplots_and_Boxplots.pdf" } @@ -30,31 +32,31 @@ -intens_check <- function(DM.name, SM.name, VM.name, class.col, type, class1, fold.frac, logarithm, - VM.output, graphs.output){ - +intens_check <- function(DM.name, SM.name, VM.name, method, chosen.stat, class.col, test.fold, class1, fold.frac, + logarithm, VM.output, graphs.output){ - # This function allows to check the intensities considering classes with a mean fold change calculation, - # the number and the proportion of missing values (NA) in dataMatrix + # This function allows to check the intensities with various statistics, number of missing values and mean fold change # - # Two options: - # - one class (selected by the user) against all the remaining samples ("One_class") - # - tests on each class ("Each_class") + # Three methods proposed: + # - global: tests for each variable without distinction between samples + # - one class: one class versus all the remaining samples + # - each class: if the class columns contains at least three classes and you want to test each of them # # Parameters: # DM.name, SM.name, VM.name: dataMatrix, sampleMetadata, variableMetadata files access - # class.col: number of the sampleMetadata's column with classes - # type: "One_class" or "Each_class" - # class1: name of the class, only if type="One_class" - # fold.frac: if type="One class": class1/other ("Top") or other/class1 ("Bottom") - # logarithm: "log2", "log10" or "none" for log mean fold change + # method: "global", "one_class", "each_class" + # chosen.stat: character listing the chosen analysis (comma-separated) + # class.col: number of the sampleMetadata's column with classes (if method = one_class or each_class) + # test.fold: "yes" or "no" (if method = one_class or each_class) + # class1: name of the class (if method = one_class) + # fold.frac: "Top" -> class1/other or "Bottom" -> other/class1 (if method = one_class) + # logarithm: "log2", "log10" or "none" (if method = one_class or each_class) # VM.output: output file's access (VM with new columns) # graphs.output: pdf file's access with barplots for the proportion of NA and boxplots with the folds values - # Input --------------------------------------------------------------------------------------------------- DM <- read.table(DM.name, header=TRUE, sep="\t", check.names=FALSE) @@ -66,247 +68,344 @@ # Table match check with Rchecklibrary table.check <- match3(DM, SM, VM) check.err(table.check) - - + rownames(DM) <- DM[,1] var_names <- DM[,1] DM <- DM[,-1] DM <- data.frame(t(DM)) - - class.col <- colnames(SM)[as.numeric(class.col)] + + stat.list <- strsplit(chosen.stat,",")[[1]] # check class.col, class1 and the number of classes --------------------------------------------------------- - if(!(class.col %in% colnames(SM))){ - stop("\n- - - - - - - - -\n", "The column ",class.col, " is not a part of the specify sample Metadata","\n- - - - - - - - -\n") + #set 1 class for all samples in case of method = no_class + if(method=="no_class"){ + c_class <- rep("global", length=nrow(DM)) + classnames <- "global" + nb_class=1 + test.fold <- "No" } + + + if(method != "no_class"){ + + class.col <- colnames(SM)[as.numeric(class.col)] + + if(!(class.col %in% colnames(SM))){ + stop("\n- - - - - - - - -\n", "The column ",class.col, " is not a part of the specify sample Metadata","\n- - - - - - - - -\n") + } - c_class <- SM[,class.col] - c_class <- as.factor(c_class) - nb_class <- nlevels(c_class) - classnames <- levels(c_class) + c_class <- SM[,class.col] + c_class <- as.factor(c_class) + nb_class <- nlevels(c_class) + classnames <- levels(c_class) + + if((nb_class < 2)&&(test.fold=="Yes")){ + err.1class <- c("\n The column",class.col, "contains only one class, fold calculation could not be executed \n") + cat(err.1class) + } + + if((nb_class > (nrow(SM))/3)&&(method == "each_class")){ + class.err <- c("\n There are too many classes, think about reducing the number of classes and excluding those + with few samples \n") + cat(class.err) + } + + + if(method == "one_class"){ + if(!(class1 %in% classnames)){ + list.class1 <- c("\n Classes:",classnames,"\n") + cat(list.class1) + err.class1 <- c("The class ",class1, " does not appear in the column ", class.col) + stop("\n- - - - - - - - -\n", err.class1,"\n- - - - - - - - -\n") + } - if(nb_class < 2){ - err.1class <- c("\n The column",class.col, "contains only one class, fold calculation could not be executed \n") - cat(err.1class) + #If method is "one_class", change others classes in "other" + for(i in 1:length(c_class)){ + if(c_class[i]!=class1){ + c_class <- as.character(c_class) + c_class[i] <- "Other" + c_class <- as.factor(c_class) + nb_class <- nlevels(c_class) + classnames <- c(class1,"Other") + } + } + } + } + + + # Statistics ------------------------------------------------------------------------------------------------ + + + ### Initialization + + DM <- cbind(c_class,DM) + + stat.res <- t(DM[0,-1,drop=FALSE]) + names <- NULL + + mean.res <- NULL + mean.names <- NULL - if((nb_class > (nrow(SM))/3)){ - class.err <- c("\n There are too many classes, think about reducing the number of classes and excluding those - with few samples \n") - cat(class.err) - } + sd.res <- NULL + sd.names <- NULL + + med.res <- NULL + med.names <- NULL + + quart.res <- NULL + quart.names <- NULL + + dec.res <- NULL + dec.names <- NULL + + NA.res <- NULL + NA.names <- NULL + pct_NA.res <- NULL + pct_NA.names <- NULL + + fold.res <- NULL + fold.names <- NULL + + if(("NA" %in% stat.list)||(test.fold=="Yes")){ + graphs <- 1 + }else{ + graphs=0 + } + + data_bp <- data.frame() #table for NA barplot + - if(type == "One_class"){ - if(!(class1 %in% classnames)){ - list.class1 <- c("\n Classes:",classnames,"\n") - cat(list.class1) - err.class1 <- c("The class ",class1, " does not appear in the column ", class.col) - stop("\n- - - - - - - - -\n", err.class1,"\n- - - - - - - - -\n") - } - } + ### Computation - #If type is "one_class", change others classes in "other" - if(type == "One_class"){ - for(i in 1:length(c_class)){ - if(c_class[i]!=class1){ - c_class <- as.character(c_class) - c_class[i] <- "Other" - c_class <- as.factor(c_class) - nb_class <- nlevels(c_class) - classnames <- c(class1,"Other") + for(j in 1:nb_class){ + + # Mean --------- + + if("mean" %in% stat.list){ + mean.res <- cbind(mean.res, colMeans(DM[which(DM$c_class==classnames[j]),-1],na.rm=TRUE)) + mean.names <- cbind(mean.names, paste("Mean",classnames[j], sep="_")) + if(j == nb_class){ + stat.res <- cbind(stat.res, mean.res) + names <- cbind(names, mean.names) + } + } + + # Standard deviation ----- + + if("sd" %in% stat.list){ + sd.res <- cbind(sd.res, apply(DM[which(DM$c_class==classnames[j]),-1],2,sd,na.rm=TRUE)) + sd.names <- cbind(sd.names, paste("Sd",classnames[j], sep="_")) + if(j == nb_class){ + stat.res <- cbind(stat.res, sd.res) + names <- cbind(names, sd.names) + } + } + + # Median --------- + + if(("median" %in% stat.list)&&(!("quartile" %in% stat.list))){ + med.res <- cbind(med.res, apply(DM[which(DM$c_class==classnames[j]),-1],2,median,na.rm=TRUE)) + med.names <- cbind(med.names, paste("Median",classnames[j], sep="_")) + if(j == nb_class){ + stat.res <- cbind(stat.res, med.res) + names <- cbind(names, med.names) + } + } + + # Quartiles ------ + + if("quartile" %in% stat.list){ + quart.res <- cbind(quart.res,t(apply(DM[which(DM$c_class==classnames[j]),-1],2,quantile,na.rm=TRUE))) + quart.names <- cbind(quart.names, paste("Min",classnames[j], sep="_"),paste("Q1",classnames[j], sep="_"), + paste("Median",classnames[j],sep="_"),paste("Q3",classnames[j],sep="_"), + paste("Max",classnames[j],sep="_")) + if(j == nb_class){ + stat.res <- cbind(stat.res, quart.res) + names <- cbind(names, quart.names) + } + } + + # Deciles ------ + + if("decile" %in% stat.list){ + dec.res <- cbind(dec.res,t(apply(DM[which(DM$c_class==classnames[j]),-1],2,quantile,na.rm=TRUE,seq(0,1,0.1)))) + dec.names <- cbind(dec.names, t(matrix(paste((paste("D",seq(0,10,1),sep="")),classnames[j],sep="_")))) + if(j == nb_class){ + stat.res <- cbind(stat.res, dec.res) + names <- cbind(names, dec.names) + } + } + + # Missing values ------------ + + if("NA" %in% stat.list){ + + nb_NA <- apply(DM[which(DM$c_class==classnames[j]),-1],2,function(x) sum(is.na(x))) + pct_NA <- round(nb_NA/nrow(DM[which(DM$c_class==classnames[j]),-1])*100,digits=4) + NA.res <- cbind(NA.res,nb_NA) + pct_NA.res <- cbind(pct_NA.res,pct_NA) + NA.names <- cbind(NA.names, paste("NA",classnames[j], sep="_")) + pct_NA.names <- cbind(pct_NA.names,paste("Pct_NA", classnames[j], sep="_")) + if(j == nb_class){ + stat.res <- cbind(stat.res, NA.res,pct_NA.res) + names <- cbind(names, NA.names,pct_NA.names) + } + #for barplots + Nb_NA_0_20 <- 0 + Nb_NA_20_40 <- 0 + Nb_NA_40_60 <- 0 + Nb_NA_60_80 <- 0 + Nb_NA_80_100 <- 0 + + for (i in 1:length(pct_NA)){ + + if ((0<=pct_NA[i])&(pct_NA[i]<20)){ + Nb_NA_0_20=Nb_NA_0_20+1} + + if ((20<=pct_NA[i])&(pct_NA[i]<40)){ + Nb_NA_20_40=Nb_NA_20_40+1} + + if ((40<=pct_NA[i])&(pct_NA[i]<60)){ + Nb_NA_40_60=Nb_NA_40_60+1} + + if ((60<=pct_NA[i])&(pct_NA[i]<80)){ + Nb_NA_60_80=Nb_NA_60_80+1} + + if ((80<=pct_NA[i])&(pct_NA[i]<=100)){ + Nb_NA_80_100=Nb_NA_80_100+1} + } + data_bp[1,j] <- Nb_NA_0_20 + data_bp[2,j] <- Nb_NA_20_40 + data_bp[3,j] <- Nb_NA_40_60 + data_bp[4,j] <- Nb_NA_60_80 + data_bp[5,j] <- Nb_NA_80_100 + rownames(data_bp) <- c("0%-20%", "20%-40%", "40%-60%", "60%-80%", "80%-100%") + + if(j == nb_class){ + + # Alert message if there is no missing value in data matrix + sum_total <- sum(NA.res) + alerte <- NULL + if(sum_total==0){ + alerte <- c(alerte, "Data Matrix contains no NA.\n") + } + if(length(alerte) != 0){ + cat(alerte,"\n") + } + + + colnames(data_bp) <- classnames + data_bp <- as.matrix(data_bp) + } + } + + + # Mean fold change ------------ + + if(test.fold=="Yes"){ + if(nb_class >= 2){ + if(j!=nb_class){ + ratio1 <- NULL + ratio2 <- NULL + if(method=="each_class"){ + fold.frac <- "Top" + } + for(k in (j+1):nb_class) { + if(fold.frac=="Bottom"){ + ratio1 <- classnames[k] + ratio2 <- classnames[j] + }else{ + ratio1 <- classnames[j] + ratio2 <- classnames[k] + } + fold <- colMeans(DM[which(DM$c_class==ratio1),-1],na.rm=TRUE)/ + colMeans(DM[which(DM$c_class==ratio2),-1],na.rm=TRUE) + if(logarithm=="log2"){ + fold.res <- cbind(fold.res,log2(fold)) + }else if(logarithm=="log10"){ + fold.res <- cbind(fold.res,log10(fold)) + }else{ + fold.res <- cbind(fold.res, fold) + } + if(logarithm == "none"){ + fold.names <- cbind(fold.names,paste("fold",ratio1,"VS", ratio2, sep="_")) + }else{ + fold.names <- cbind(fold.names,paste(logarithm, "fold", ratio1, "VS", ratio2, sep="_")) + } + } + + }else{ + stat.res <- cbind(stat.res,fold.res) + names <- cbind(names, fold.names) + } + } + } + + } + + ############ + + + # check columns names in variableMetadata + + VM.names <- colnames(VM) + for (i in 1:length(VM.names)){ + for (j in 1:length(names)){ + if (VM.names[i]==names[j]){ + names[j] <- paste(names[j], "2", sep="_") } } } + + colnames(stat.res) <- names + - DM <- cbind(DM,c_class) - - # fold calculation ------------------------------------------------------------------------------------------- - - if(nb_class >= 2){ - - - fold <- data.frame() - n <- 1 - ratio1 <- NULL - ratio2 <- NULL - - if(type=="Each_class"){ - fold.frac <- "Top" - } - - for(j in 1:(nb_class-1)){ - for(k in (j+1):nb_class) { - - if(fold.frac=="Bottom"){ - ratio1 <- classnames[k] - ratio2 <- classnames[j] - }else{ - ratio1 <- classnames[j] - ratio2 <- classnames[k] - } - - for (i in 1:(length(DM)-1)){ - fold[i,n] <- mean(DM[which(DM$c_class==ratio1),i], na.rm=TRUE)/ - mean(DM[which(DM$c_class==ratio2),i], na.rm=TRUE) - if(logarithm=="log2"){ - fold[i,n] <- log2(fold[i,n]) - }else if(logarithm=="log10"){ - fold[i,n] <- log10(fold[i,n]) - } - } - names(fold)[n] <- paste("fold",ratio1,"VS", ratio2, sep="_") - if(logarithm != "none"){ - names(fold)[n] <- paste(logarithm,names(fold)[n], sep="_") - } - n <- n + 1} - } - - } - - # number and proportion of NA --------------------------------------------------------------------------------- - - calcul_NA <- data.frame() - pct_NA <- data.frame() - for (i in 1:(length(DM)-1)){ - for (j in 1:nb_class){ - n <- 0 - new_DM <- DM[which(DM$c_class==classnames[j]),i] - for(k in 1:length(new_DM)){ - if (is.na(new_DM[k])){ - n <- n + 1} - calcul_NA[i,j] <- n - pct_NA[i,j] <- (calcul_NA[i,j]/length(new_DM))*100} - } - } - names(calcul_NA) <- paste("NA",classnames, sep="_") - names(pct_NA) <- paste("Pct_NA", classnames, sep="_") - - # Alert message if there is no NA in data matrix - - sumNA <- colSums(calcul_NA) - sum_total <- sum(sumNA) - alerte <- NULL - if(sum_total==0){ - alerte <- c(alerte, "Data Matrix contains no NA.\n") - } - - if(length(alerte) != 0){ - cat(alerte,"\n") - } - table_NA <- cbind(calcul_NA, pct_NA) - - - - # check columns names --------------------------------------------------------------------------------------- - - VM.names <- colnames(VM) - - # Fold - - if(nb_class >=2){ - fold.names <- colnames(fold) - - for (i in 1:length(VM.names)){ - for (j in 1:length(fold.names)){ - if (VM.names[i]==fold.names[j]){ - fold.names[j] <- paste(fold.names[j],"2", sep="_") - } - } - } - colnames(fold) <- fold.names - - VM <- cbind(VM,fold) - } - - # NA - NA.names <- colnames(table_NA) - - for (i in 1:length(VM.names)){ - for (j in 1:length(NA.names)){ - if (VM.names[i]==NA.names[j]){ - NA.names[j] <- paste(NA.names[j],"2", sep="_") - } - } - } - colnames(table_NA) <- NA.names - VM <- cbind(VM,table_NA) - - - #for NA barplots ------------------------------------------------------------------------------------------- - - data_bp <- data.frame() - - for (j in 1:ncol(pct_NA)){ - Nb_NA_0_20 <- 0 - Nb_NA_20_40 <- 0 - Nb_NA_40_60 <- 0 - Nb_NA_60_80 <- 0 - Nb_NA_80_100 <- 0 - for (i in 1:nrow(pct_NA)){ - - if ((0<=pct_NA[i,j])&(pct_NA[i,j]<20)){ - Nb_NA_0_20=Nb_NA_0_20+1 - } - - if ((20<=pct_NA[i,j])&(pct_NA[i,j]<40)){ - Nb_NA_20_40=Nb_NA_20_40+1} - - if ((40<=pct_NA[i,j])&(pct_NA[i,j]<60)){ - Nb_NA_40_60=Nb_NA_40_60+1} - - if ((60<=pct_NA[i,j])&(pct_NA[i,j]<80)){ - Nb_NA_60_80=Nb_NA_60_80+1} - - if ((80<=pct_NA[i,j])&(pct_NA[i,j]<=100)){ - Nb_NA_80_100=Nb_NA_80_100+1} - } - data_bp[1,j] <- Nb_NA_0_20 - data_bp[2,j] <- Nb_NA_20_40 - data_bp[3,j] <- Nb_NA_40_60 - data_bp[4,j] <- Nb_NA_60_80 - data_bp[5,j] <- Nb_NA_80_100 - } - rownames(data_bp) <- c("0%-20%", "20%-40%", "40%-60%", "60%-80%", "80%-100%") - colnames(data_bp) <- classnames - data_bp <- as.matrix(data_bp) - # Output --------------------------------------------------------------------------------------------------- + VM <-cbind(VM,stat.res) write.table(VM, VM.output,sep="\t", quote=FALSE, row.names=FALSE) - #graphics pdf + + ### graphics pdf + + if(graphs == 1){ pdf(graphs.output) + #Barplots for NA + if("NA" %in% stat.list){ + graph.colors <- c("green3","palegreen3","lightblue","orangered","red") par(mar=c(5.1, 4.1, 4.1, 8.1), xpd=TRUE) - - bp=barplot(data_bp, col=rainbow(nrow(data_bp)), main="Proportion of NA", xlab="Classes", ylab="Variables") - legend("topright", fill=rainbow(nrow(data_bp)),rownames(data_bp), inset=c(-0.3,0)) - + + bp=barplot(data_bp, col=graph.colors, main="Proportion of NA", xlab="Classes", ylab="Variables") + legend("topright", fill=graph.colors,rownames(data_bp), inset=c(-0.3,0)) + stock=0 for (i in 1:nrow(data_bp)){ text(bp, stock+data_bp[i,]/2, data_bp[i,], col="white", cex=0.7) stock <- stock+data_bp[i,] } - - #Boxplots for fold test + } + + # Boxplots for fold test - if(nb_class >= 2){ - - clean_fold <- fold + if((test.fold=="Yes")&&(nb_class >= 2)){ + + clean_fold <- fold.res for(i in 1:nrow(clean_fold)){ for(j in 1:ncol(clean_fold)){ if(is.infinite(clean_fold[i,j])){ @@ -316,12 +415,23 @@ } for (j in 1:ncol(clean_fold)){ title <- paste(fold.names[j]) - boxplot(clean_fold[j], main=title) + boxplot(clean_fold[,j], main=title) } - } + } + + dev.off() - dev.off() + }else{ + pdf(graphs.output) + plot.new() + legend("center","You did not select any option with graphical output.") + dev.off() + } -} + } + + + + \ No newline at end of file diff -r a7553caa2572 -r bdee2c2c484b Intchecks/wrapper_intensity_check.R --- a/Intchecks/wrapper_intensity_check.R Mon Jan 14 08:17:26 2019 -0500 +++ b/Intchecks/wrapper_intensity_check.R Fri Mar 08 09:07:12 2019 -0500 @@ -9,7 +9,7 @@ # # # # # Input: Data Matrix, VariableMetadata, SampleMetadata # -# Output: VariableMetadata, Graphics (barplots and boxplots) # +# Output: VariableMetadata, Graphics # # # # # ############################################################################# @@ -27,14 +27,36 @@ source_local("Script_intensity_check.R", "RcheckLibrary.R") -if(length(args) < 8){ stop("NOT enough argument !!!") } +if(length(args) < 7){ stop("NOT enough argument !!!") } - +class_col <- NULL +test_fold <- NULL class1 <- NULL fold_frac <- NULL -if(args$type == "One_class"){ +logarithm <- NULL +if(args$method == "each_class"){ + class_col <- args$class_col + test_fold <- args$test_fold + if(args$test_fold == "Yes"){ + logarithm <- args$logarithm + } +} +if(args$method == "one_class"){ + class_col <- args$class_col class1 <- args$class1 - fold_frac <- args$fold_frac + test_fold <- args$test_fold + if(args$test_fold == "Yes"){ + fold_frac <- args$fold_frac + logarithm <- args$logarithm + } +} + +err_no_option <- NULL + +if(((args$method == "no_class")&&(args$chosen_stat == "None"))|| + ((args$method != "no_class") && (args$chosen_stat == "None") && (test_fold == "No"))){ + err_no_option<- "You did not select any computational option. Program could not be executed." + stop("\n- - - - - - - - -\n",err_no_option,"\n- - - - - - - - -\n") } @@ -43,10 +65,12 @@ print(args) cat("--------------------------------\n") +if(is.null(err_no_option)){ -intens_check(args$dataMatrix_in, args$sampleMetadata_in, args$variableMetadata_in, args$class_col, args$type, - class1, fold_frac, args$logarithm, args$variableMetadata_out, args$graphs_out) - + intens_check(args$dataMatrix_in, args$sampleMetadata_in, args$variableMetadata_in, args$method, args$chosen_stat, + class_col, test_fold, class1, fold_frac, logarithm, args$variableMetadata_out, args$graphs_out) +} + sessionInfo() cat("--------------------------------\n") diff -r a7553caa2572 -r bdee2c2c484b Intchecks/xml_intensity_check.xml --- a/Intchecks/xml_intensity_check.xml Mon Jan 14 08:17:26 2019 -0500 +++ b/Intchecks/xml_intensity_check.xml Fri Mar 08 09:07:12 2019 -0500 @@ -1,7 +1,7 @@ - - Adding information about intensities (mean fold change and missing values) in the variable metadata table + + Statistical measures, number of missing values and mean fold change - r-batch + r-batch @@ -10,19 +10,34 @@ dataMatrix_in "$dataMatrix_in" sampleMetadata_in "$sampleMetadata_in" variableMetadata_in "$variableMetadata_in" - - class_col "$class_col" - - type "${type_cond.type}" - #if $type_cond.type == "One_class" : - class1 "${type_cond.class1}" - fold_frac "${type_cond.fold_frac}" + + method "${method_cond.method}" + + chosen_stat "${method_cond.chosen_stat}" + + #if $method_cond.method == "each_class" : + class_col "${method_cond.class_col}" + test_fold "${method_cond.test_fold_cond.test_fold}" + #if $method_cond.test_fold_cond.test_fold == "Yes" : + logarithm "${method_cond.test_fold_cond.logarithm}" + #end if #end if - logarithm "$logarithm" - + #if $method_cond.method == "one_class" : + class_col "${method_cond.class_col}" + class1 "${method_cond.class1}" + test_fold "${method_cond.test_fold_cond.test_fold}" + #if $method_cond.test_fold_cond.test_fold == "Yes" : + fold_frac "${method_cond.test_fold_cond.fold_frac}" + logarithm "${method_cond.test_fold_cond.logarithm}" + #end if + #end if + variableMetadata_out "$variableMetadata_out" graphs_out "$graphs_out" + + + @@ -30,31 +45,83 @@ - - - - - - + + + + + + - - - - - + + + + + + + + + + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - + @@ -75,23 +142,6 @@ ======================== ----------- -Description ------------ - -This tool performs two tests: a mean fold change calculation and the number of missing values. - -**Fold:** -The test calculates the mean fold change between two classes. It could be converted into logarithm. - -**Missing values:** -This tool calculates the number and the proportion of missing values in the data matrix considering the class. -Missing values in numerical columns of data must be coded NA. - -**Two types of tests:** - | - Between **one class** and the remaining samples: if you have only two classes or if you want to test only one class versus all the remaining samples without class distinction. - | - **Each class**: if the class columns contains at least three classes and you want to test each of them. Can also be used with only two classes, but you will not choose which class to put as numerator/denominator. - ------------ Input files ----------- @@ -109,29 +159,37 @@ Parameters ---------- +**Computation method** + | - **Without distinction between samples:** calculates chosen statistic(s) for each variable. + | - **For each class of samples:** separates samples between each class (class column to specified). Chosen statistic(s) and/or mean fold change are calculated for each of them. + | - **Between one class versus all the remaining samples:** If you want to focus only on one class versus all the remaining samples without class distinction. + + | In the case of two classes: "each class" and "one class" give the same results for statistical measures. We recommend to choose "one class" for mean fold change calculation in order to select the class you want to put as numerator or denominator (see below). + +**Statistics** + | Select the statistical measures you want to add in the variable metadata table. If the method is "each class" or "one class", you could choose no statistical measure if you only want to calculate the mean fold change (see below). + **Class column** | Select the class column in sample metadata table. -**Type of test** - | Two options: - | - "One class" allows to perform tests on one class against the remaining samples. - | - "Each class" allows to add several columns with the ratio between all the classes and the number of missing values for each class. - **Selected class** - | If the type is "one class", specify it to calculate the ratio and the number of missing values. + | If the method is "one class", specify it. Remaining samples will be named "Other". -**Numerator or denominator** - | If the type is "one class", choose "top" or "bottom" to put the selected class as numerator or denominator (respectively) for the mean fold change. +**Calculate the mean fold change** + | Choose if you want to calculate the mean fold change. If the method is "each class": mean fold change will be calculated for all combinations of classes. If the method is "one class": it will be calculated between the selected class (see above) and the remaining samples. + +**Where should the class be placed for the mean fold change calculation?** + | If the method is "one class", choose "top" or "bottom" to put the selected class as numerator or denominator (respectively) for the mean fold change calculation. **Logarithm** | Choose if you want to transform the mean fold change with a log2 or log10. - + ------------ Output file ------------ **Variable metadata file** - | Contains the previous columns in variable metadata file and the new ones with fold tests, number and proportion of missing values. + | Contains the previous columns in variable metadata file and the new ones. | In the column names for fold, the first class specified is the one used like numerator for the ratio. **Graphs file**