corr_table: CorrTable/Corr_Script_samples

comparison CorrTable/Corr_Script_samples_row.R @ 0:b22c453e4cf4 draft

Uploaded

author	melpetera
date	Thu, 11 Oct 2018 05:35:55 -0400
parents
children	29ec7e3afdd4

comparison

equal deleted inserted replaced

--1:000000000000
+:b22c453e4cf4
+#################################################################################################
+# CORRELATION TABLE                                                                             #
+#                                                                                               #
+#                                                                                               #
+# Input : 2 tables with common samples                                                          #
+# Output : Correlation table ; Heatmap (pdf)                                                    #
+#                                                                                               #
+# Dependencies : Libraries "ggplot2" and "reshape2"                                             #
+#                                                                                               #
+#################################################################################################
+# Parameters (for dev)
+if(FALSE){
+rm(list = ls())
+setwd(dir = "Y:/Developpement")
+tab1.name <- "Test/Ressources/Inputs/CT2_DM.tabular"
+tab2.name <- "Test/Ressources/Inputs/CT2_base_Diapason_14ClinCES_PRIN.txt"
+param1.samples <- "column"
+param2.samples <- "row"
+corr.method <- "pearson"
+test.corr <- "yes"
+alpha <- 0.05
+multi.name <- "none"
+filter <- "yes"
+filters.choice <- "filters_0_thr"
+threshold <- 0.2
+reorder.var <- "yes"
+color.heatmap <- "yes"
+type.classes <-"irregular"
+reg.value <- 1/3
+irreg.vect <- c(-0.3, -0.2, -0.1, 0, 0.3, 0.4)
+output1 <- "Correlation_table.txt"
+output2 <- "Heatmap.pdf"
+}
+correlation.tab <- function(tab1.name, tab2.name, param1.samples, param2.samples, corr.method, test.corr, alpha,
+multi.name, filter, filters.choice, threshold, reorder.var, color.heatmap, type.classes,
+reg.value, irreg.vect, output1, output2){
+# This function allows to visualize the correlation between two tables
+#
+# Parameters:
+# - tab1.name: table 1 file's access
+# - tab2.name: table 2 file's access
+# - param1.samples ("row" or "column"): where the samples are in tab1
+# - param2.samples ("row" or "column"): where the samples are in tab2
+# - corr.method ("pearson", "spearman", "kendall"):
+# - test.corr ("yes" or "no"): test the significance of a correlation coefficient
+# - alpha (value between 0 and 1): risk for the correlation significance test
+# - multi.name ("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none"): correction of multiple tests
+# - filter ("yes", "no"): use filter.0 or/and filter.threshold
+# - filters.choice ("filter_0" or "filters_0_thr"): zero filter removes variables with all their correlation coefficients = 0
+#   and threshold filter remove variables with all their correlation coefficients in abs < threshold
+# - threshold (value between 0 and 1): threshold for filter threshold
+# - reorder.var ("yes" or "no"): reorder variables in the correlation table thanks to the HCA
+# - color.heatmap ("yes" or "no"): color the heatmap with classes defined by the user
+# - type.classes ("regular" or "irregular"): choose to color the heatmap with regular or irregular classes
+# - reg.value (value between 0 and 1): value for regular classes
+# - irreg.vect (vector with values between -1 and 1): vector which indicates values for intervals (irregular classes)
+# - output1: correlation table file's access
+# - output2: heatmap (colored correlation table) file's access
+# Input ----------------------------------------------------------------------------------------------
+tab1 <- read.table(tab1.name, sep = "\t", header = TRUE, check.names = FALSE, row.names = 1)
+tab2 <- read.table(tab2.name, sep = "\t", header = TRUE, check.names = FALSE, row.names = 1)
+# Transpose tables according to the samples
+if(param1.samples == "column"){
+tab1 <- t(tab1)
+}
+if(param2.samples == "column"){
+tab2 <- t(tab2)
+}
+# Sorting tables in alphabetical order of the samples
+tab1 <- tab1[order(rownames(tab1)),]
+tab2 <- tab2[order(rownames(tab2)),]
+# Check if the 2 datasets match regarding samples identifiers
+# Adapt from functions "check.err" and "match2", RcheckLibrary.R
+err.stock <- NULL
+id1 <- rownames(tab1)
+id2 <- rownames(tab2)
+if(sum(id1 != id2) > 0){
+err.stock <- c("\nThe two tables do not match regarding sample identifiers.\n")
+if(length(which(id1%in%id2)) != length(id1)){
+identif <- id1[which(!(id1%in%id2))]
+if (length(identif) < 4){
+err.stock <- c(err.stock, "\nThe following identifier(s) found in the first table do not appear in the second table:\n")
+}
+else {
+err.stock <- c(err.stock, "\nFor example, the following identifiers found in the first table do not appear in the second table:\n")
+}
+identif <- identif[1:min(3,length(which(!(id1%in%id2))))]
+err.stock <- c(err.stock,"    ",paste(identif,collapse="\n    "),"\n")
+}
+if(length(which(id2%in%id1)) != length(id2)){
+identif <- id2[which(!(id2%in%id1))]
+if (length(identif) < 4){
+err.stock <- c(err.stock, "\nThe following identifier(s) found in the second table do not appear in the first table:\n")
+}
+else{
+err.stock <- c(err.stock, "\nFor example, the following identifiers found in the second table do not appear in the first table:\n")
+}
+identif <- identif[1:min(3,length(which(!(id2%in%id1))))]
+err.stock <- c(err.stock,"    ",paste(identif,collapse="\n    "),"\n")
+}
+err.stock <- c(err.stock,"\nPlease check your data.\n")
+}
+if(length(err.stock)!=0){
+stop("\n- - - - - - - - -\n",err.stock,"\n- - - - - - - - -\n\n")
+}
+# Check qualitative variables in each input tables
+err.msg <- NULL
+var1.quali <- vector()
+var2.quali <- vector()
+for (i in 1:dim(tab1)[2]){
+if(class(tab1[,i]) != "numeric" & class(tab1[,i]) != "integer"){
+var1.quali <- c(var1.quali,i)
+}
+}
+for (j in 1:dim(tab2)[2]){
+if(class(tab2[,j]) != "numeric" & class(tab2[,j]) != "integer"){
+var2.quali <- c(var2.quali, j)
+}
+}
+if (length(var1.quali) != 0 | length(var2.quali) != 0){
+err.msg <- c(err.msg, "\nThere are qualitative variables in your input tables which have been removed to compute the correlation table.\n\n")
+if(length(var1.quali) != 0 && length(var1.quali) < 4){
+err.msg <- c(err.msg, "In table 1, the following qualitative variables have been removed:\n",
+"  ",paste(colnames(tab1)[var1.quali],collapse="\n    "),"\n")
+} else if(length(var1.quali) != 0 && length(var1.quali) > 3){
+err.msg <- c(err.msg, "For example, in table 1, the following qualitative variables have been removed:\n",
+"  ",paste(colnames(tab1)[var1.quali[1:3]],collapse="\n    "),"\n")
+}
+if(length(var2.quali) != 0 && length(var2.quali) < 4){
+err.msg <- c(err.msg, "In table 2, the following qualitative variables have been removed:\n",
+"  ",paste(colnames(tab2)[var2.quali],collapse="\n    "),"\n")
+} else if(length(var2.quali) != 0 && length(var2.quali) > 3){
+err.msg <- c(err.msg, "For example, in table 2, the following qualitative variables have been removed:\n",
+"  ",paste(colnames(tab2)[var2.quali[1:3]],collapse="\n    "),"\n")
+}
+}
+if(length(var1.quali) != 0){
+tab1 <- tab1[,-var1.quali]
+}
+if(length(var2.quali) != 0){
+tab2 <- tab2[,-var2.quali]
+}
+if(length(err.msg) != 0){
+cat("\n- - - - - - - - -\n",err.msg,"\n- - - - - - - - -\n\n")
+}
+# Correlation table ---------------------------------------------------------------------------------
+tab.corr <- matrix(nrow = dim(tab2)[2], ncol = dim(tab1)[2])
+for (i in 1:dim(tab2)[2]){
+for (j in 1:dim(tab1)[2]){
+tab.corr[i,j] <- cor(tab2[,i], tab1[,j], method = corr.method, use = "pairwise.complete.obs")
+}
+}
+colnames(tab.corr) <- colnames(tab1)
+rownames(tab.corr) <- colnames(tab2)
+# Significance of correlation test  ------------------------------------------------------------------
+if (test.corr == "yes"){
+pvalue <- vector()
+for (i in 1:dim(tab.corr)[1]){
+for (j in 1:dim(tab.corr)[2]){
+suppressWarnings(corrtest <- cor.test(tab2[,i], tab1[,j], method = corr.method))
+pvalue <- c(pvalue, corrtest$p.value)
+if (multi.name == "none"){
+if (corrtest$p.value > alpha){
+tab.corr[i,j] <- 0
+}
+}
+}
+}
+if(multi.name != "none"){
+adjust <- matrix(p.adjust(pvalue, method = multi.name), nrow = dim(tab.corr)[1], ncol = dim(tab.corr)[2], byrow = T)
+tab.corr[adjust > alpha] <- 0
+}
+}
+# Filter settings ------------------------------------------------------------------------------------
+if (filter == "yes"){
+# Remove variables with all their correlation coefficients = 0 :
+if (filters.choice == "filter_0"){
+threshold <- 0
+}
+var2.thres <- vector()
+for (i in 1:dim(tab.corr)[1]){
+if (length(which(abs(tab.corr[i,]) <= threshold)) == dim(tab.corr)[2]){
+var2.thres <- c(var2.thres, i)
+}
+}
+if (length(var2.thres) != 0){
+tab.corr <- tab.corr[-var2.thres,]
+tab2 <- tab2[, -var2.thres]
+}
+var1.thres <- vector()
+for (i in 1:dim(tab.corr)[2]){
+if (length(which(abs(tab.corr[,i]) <= threshold)) == dim(tab.corr)[1]){
+var1.thres <- c(var1.thres, i)
+}
+}
+if (length(var1.thres) != 0){
+tab.corr <- tab.corr[,-var1.thres]
+tab1 <- tab1[,-var1.thres]
+}
+}
+# Reorder variables in the correlation table (with the HCA) ------------------------------------------
+if (reorder.var == "yes"){
+cormat.tab2 <- cor(tab2, method = corr.method, use = "pairwise.complete.obs")
+dist.tab2 <- as.dist(1 - cormat.tab2)
+hc.tab2 <- hclust(dist.tab2, method = "ward.D2")
+tab.corr <- tab.corr[hc.tab2$order,]
+cormat.tab1 <- cor(tab1, method = corr.method, use = "pairwise.complete.obs")
+dist.tab1 <- as.dist(1 - cormat.tab1)
+hc.tab1 <- hclust(dist.tab1, method = "ward.D2")
+tab.corr <- tab.corr[,hc.tab1$order]
+}
+# Output 1 : Correlation table -----------------------------------------------------------------------
+# Export correlation table
+write.table(x = data.frame(name = rownames(tab.corr), tab.corr), file = output1, sep = "\t", quote = FALSE, row.names = FALSE)
+# Create the heatmap ---------------------------------------------------------------------------------
+# A message if no variable kept
+if(length(tab.corr)==0){
+	pdf(output2)
+	plot.new()
+	legend("center","Filtering leads to no remaining correlation coefficient.")
+	dev.off()
+} else {
+library(ggplot2)
+library(reshape2)
+# Melt the correlation table :
+melted.tab.corr <- melt(tab.corr)
+if (color.heatmap == "yes") {
+# Add a column for the classes of each correlation coefficient
+classe <- rep(0, dim(melted.tab.corr)[1])
+melted <- cbind(melted.tab.corr, classe)
+if (type.classes == "regular"){
+vect <- vector()
+if (seq(-1,0,reg.value)[length(seq(-1,0,reg.value))] == 0){
+vect <- c(seq(-1,0,reg.value)[-length(seq(-1,0,reg.value))],
+rev(seq(1,0,-reg.value)))
+} else {
+vect <- c(seq(-1,0,reg.value), 0, rev(seq(1,0,-reg.value)))
+}
+} else if (type.classes == "irregular") {
+irreg.vect <- c(-1, irreg.vect, 1)
+vect <- irreg.vect
+}
+# Color palette :
+myPal <- colorRampPalette(c("#00CC00", "white", "red"), space = "Lab", interpolate = "spline")
+# Create vector intervals
+cl <- vector()
+cl <- paste("[", vect[1], ";", round(vect[2],3), "]", sep = "")
+for (x in 2:(length(vect)-1)) {
+if (vect[x+1] == 0) {
+cl <- c(cl, paste("]", round(vect[x],3), ";", round(vect[x+1],3), "[", sep = ""))
+} else {
+cl <- c(cl, paste("]", round(vect[x],3), ";",
+round(vect[x+1],3), "]", sep = ""))
+}
+}
+# Assign an interval to each correlation coefficient
+for (i in 1:dim(melted.tab.corr)[1]){
+for (j in 1:(length(cl))){
+if (vect[j] == -1){
+melted$classe[i][melted$value[i] >= vect[j]
+&& melted$value[i] <= vect[j+1]] <- cl[j]
+} else {
+melted$classe[i][melted$value[i] > vect[j]
+&& melted$value[i] <= vect[j+1]] <- cl[j]
+}
+}
+}
+# Find the 0 and assign it the white as name
+if (length(which(vect == 0)) == 1) {
+melted$classe[melted$value == 0] <- "0"
+indic <- which(vect == 0)
+cl <- c(cl[1:(indic-1)], 0, cl[indic:length(cl)])
+names(cl)[indic] <- "#FFFFFF"
+} else if (length(which(vect == 0)) == 0) {
+indic <- 0
+for (x in 1:(length(vect)-1)) {
+if (0 > vect[x] && 0 <= vect[x+1]) {
+names(cl)[x] <- "#FFFFFF"
+indic <- x
+}
+}
+}
+indic <- length(cl) - indic + 1
+cl <- rev(cl)
+# Assign the colors of each intervals as their name
+names(cl)[1:(indic-1)] <- myPal(length(cl[1:indic])*2-1)[1:indic-1]
+names(cl)[(indic+1):length(cl)] <- myPal(length(cl[indic:length(cl)])*2-1)[(ceiling(length(myPal(length(cl[indic:length(cl)])*2-1))/2)+1):length(myPal(length(cl[indic:length(cl)])*2-1))]
+melted$classe <- factor(melted$classe)
+melted$classe <- factor(melted$classe, levels = cl[cl%in%levels(melted$classe)])
+# Heatmap if color.heatmap = yes :
+ggplot(melted, aes(Var2, Var1, fill = classe)) +
+ggtitle("Colored correlation table" ) + xlab("Table 1") + ylab("Table 2") +
+geom_tile(color ="ghostwhite") +
+scale_fill_manual( breaks = levels(melted$classe),
+values = names(cl)[cl%in%levels(melted$classe)],
+name = paste(corr.method, "correlation", sep = "\n")) +
+theme_classic() +
+theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
+plot.title = element_text(hjust = 0.5))
+} else {
+# Heatmap if color.heatmap = no :
+ggplot(melted.tab.corr, aes(Var2, Var1, fill = value)) +
+ggtitle("Colored correlation table" ) + xlab("Table 1") + ylab("Table 2") +
+geom_tile(color ="ghostwhite") +
+scale_fill_gradient2(low = "red", high = "#00CC00", mid = "white", midpoint = 0, limit = c(-1,1),
+name = paste(corr.method, "correlation", sep = "\n")) +
+theme_classic() +
+theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
+plot.title = element_text(hjust = 0.5))
+}
+ggsave(output2, device = "pdf", width = 10+0.075*dim(tab.corr)[2], height = 5+0.075*dim(tab.corr)[1], limitsize = FALSE)
+} # End if(length(tab.corr)==0)else
+} # End of correlation.tab
+# Function call
+# correlation.tab(tab1.name, tab2.name, param1.samples, param2.samples, corr.method, test.corr, alpha, multi.name, filter,
+#                             filters.choice, threshold, reorder.var, color.heatmap, type.classes,
+#                             reg.value, irreg.vect, output1, output2)

Mercurial > repos > melpetera > corr_table

comparison CorrTable/Corr_Script_samples_row.R @ 0:b22c453e4cf4 draft