Mercurial > repos > proteore > proteore_heatmap_visualization
diff heatmap_viz.R @ 0:edbb84a94a36 draft
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
author | proteore |
---|---|
date | Tue, 18 Dec 2018 09:58:49 -0500 |
parents | |
children | b8a5139cf5b9 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/heatmap_viz.R Tue Dec 18 09:58:49 2018 -0500 @@ -0,0 +1,172 @@ +#!/usr/bin/Rscript + +suppressMessages(library('plotly',quietly = T)) +suppressMessages(library('heatmaply',quietly = T)) + +#packageVersion('plotly') + +get_args <- function(){ + + ## Collect arguments + args <- commandArgs(TRUE) + + ## Default setting when no arguments passed + if(length(args) < 1) { + args <- c("--help") + } + + ## Help section + if("--help" %in% args) { + cat("Pathview R script + Arguments: + --help Print this test + --input path of the input file (must contains a colum of uniprot and/or geneID accession number) + --output Output file + --type type of output file, could be html, pdf, jpg or png + --cols Columns to use for heatmap, exemple : '3:8' to use columns from the third to the 8th + --row_names Column which contains row names + --header True or False + --col_text_angle Angle of columns label ; from -90 to 90 degres + --dist_fun function used to compute the distance + + Example: + ./heatmap_viz.R --input='dat.nucl.norm.imputed.tsv' --output='heatmap.html' --cols='3:8' --row_names='2' --header=TRUE --col_text_angle=0 \n\n") + + q(save="no") + } + + parseArgs <- function(x) strsplit(sub("^--", "", x), "=") + argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) + args <- as.list(as.character(argsDF$V2)) + names(args) <- argsDF$V1 + + return(args) +} + +read_file <- function(path,header){ + file <- try(read.csv(path,header=header, sep="\t",stringsAsFactors = FALSE, quote="",fill=TRUE,check.names = F),silent=TRUE) + if (inherits(file,"try-error")){ + stop("File not found !") + }else{ + return(file) + } +} + +#convert a string to boolean +str2bool <- function(x){ + if (any(is.element(c("t","true"),tolower(x)))){ + return (TRUE) + }else if (any(is.element(c("f","false"),tolower(x)))){ + return (FALSE) + }else{ + return(NULL) + } +} + +#remove remaining quote +#only keep usefull columns +#remove lines with at least one empty cell in a matrix between two defined columns +clean_df <- function(mat,cols,rownames_col){ + uto = mat[,cols] + uto <- as.data.frame(apply(uto,c(1,2),function(x) gsub(",",".",x))) + uto <- as.data.frame(apply(uto,c(1,2),function(x) {ifelse(is.character(x),as.numeric(x),x)})) + rownames(uto) <- mat[,rownames_col] + #bad_lines <- which(apply(uto, 1, function(x) any(is.na(x)))) + #if (length(bad_lines) > 0) { + # uto <- uto[- bad_lines,] + # print(paste("lines",bad_lines, "has been removed: at least one non numeric content")) + #} + return(uto) +} + +get_cols <-function(input_cols) { + input_cols <- gsub("c","",input_cols) + if (grepl(":",input_cols)) { + first_col=unlist(strsplit(input_cols,":"))[1] + last_col=unlist(strsplit(input_cols,":"))[2] + cols=first_col:last_col + } else { + cols = as.integer(unlist(strsplit(input_cols,","))) + } + return(cols) +} + +#get args +args <- get_args() + +#save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/heatmap_viz/args.rda") +#load("/home/dchristiany/proteore_project/ProteoRE/tools/heatmap_viz/args.rda") + +header=str2bool(args$header) +output <- rapply(strsplit(args$output,"\\."),c) #remove extension +output <- paste(output[1:length(output)-1],collapse=".") +output <- paste(output,args$type,sep=".") +cols = get_cols(args$cols) +rownames_col = as.integer(gsub("c","",args$row_names)) +if (length(cols) <=1 ){ + stop("You need several colums to build a heatmap") +} +dist=args$dist +clust=args$clust +dendrogram=args$dendrogram + +#cleaning data +uto <- read_file(args$input,header) +uto <- clean_df(uto,cols,rownames_col) +uto <- uto[rowSums(is.na(uto)) != ncol(uto), ] #remove emptylines + +if (header) { + col_names = names(data) +} else { + col_names = cols +} + +#building heatmap +if (dist %in% c("pearson","spearman","kendall")){ + heatmaply(uto, file=output, margins=c(100,50,NA,0), plot_method="plotly", labRow = rownames(uto), labCol = col_names, distfun=dist, + hclust_method = clust, dendrogram = dendrogram, grid_gap = 0,cexCol = 1, column_text_angle = as.numeric(args$col_text_angle), + width = 1000, height=1000, colors = c('blue','green','yellow','red')) +} else { + heatmaply(uto, file=output, margins=c(100,50,NA,0), plot_method="plotly", labRow = rownames(uto), labCol = col_names, dist_method = dist, + hclust_method = clust, dendrogram = dendrogram, grid_gap = 0,cexCol = 1, column_text_angle = as.numeric(args$col_text_angle), + width = 1000, height=1000, colors = c('blue','green','yellow','red')) +} + +####heatmaply + +simulateExprData <- function(n, n0, p, rho0, rho1){ row + # n: total number of subjects + # n0: number of subjects with exposure 0 + # n1: number of subjects with exposure 1 + # p: number of genes + # rho0: rho between Z_i and Z_j for subjects with exposure 0 + # rho1: rho between Z_i and Z_j for subjects with exposure 1 + + # Simulate gene expression values according to exposure 0 or 1, + # according to a centered multivariate normal distribution with + # covariance between Z_i and Z_j being rho^|i-j| + n1 <- n - n0 + times <- 1:p + H <- abs(outer(times, times, "-")) + V0 <- rho0^H + V1 <- rho1^H + + # rows are people, columns are genes + genes0 <- MASS::mvrnorm(n = n0, mu = rep(0,p), Sigma = V0) + genes1 <- MASS::mvrnorm(n = n1, mu = rep(0,p), Sigma = V1) + genes <- rbind(genes0,genes1) + return(genes) +} + +#genes <- simulateExprData(n = 50, n0 = 25, p = 100, rho0 = 0.01, rho1 = 0.95) + +#heatmaply(genes, k_row = 2, k_col = 2) + +#heatmaply(cor(genes), k_row = 2, k_col = 2) + + + + + + +