comparison heatmap_viz.R @ 0:edbb84a94a36 draft

planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
author proteore
date Tue, 18 Dec 2018 09:58:49 -0500
parents
children b8a5139cf5b9
comparison
equal deleted inserted replaced
-1:000000000000 0:edbb84a94a36
1 #!/usr/bin/Rscript
2
3 suppressMessages(library('plotly',quietly = T))
4 suppressMessages(library('heatmaply',quietly = T))
5
6 #packageVersion('plotly')
7
8 get_args <- function(){
9
10 ## Collect arguments
11 args <- commandArgs(TRUE)
12
13 ## Default setting when no arguments passed
14 if(length(args) < 1) {
15 args <- c("--help")
16 }
17
18 ## Help section
19 if("--help" %in% args) {
20 cat("Pathview R script
21 Arguments:
22 --help Print this test
23 --input path of the input file (must contains a colum of uniprot and/or geneID accession number)
24 --output Output file
25 --type type of output file, could be html, pdf, jpg or png
26 --cols Columns to use for heatmap, exemple : '3:8' to use columns from the third to the 8th
27 --row_names Column which contains row names
28 --header True or False
29 --col_text_angle Angle of columns label ; from -90 to 90 degres
30 --dist_fun function used to compute the distance
31
32 Example:
33 ./heatmap_viz.R --input='dat.nucl.norm.imputed.tsv' --output='heatmap.html' --cols='3:8' --row_names='2' --header=TRUE --col_text_angle=0 \n\n")
34
35 q(save="no")
36 }
37
38 parseArgs <- function(x) strsplit(sub("^--", "", x), "=")
39 argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
40 args <- as.list(as.character(argsDF$V2))
41 names(args) <- argsDF$V1
42
43 return(args)
44 }
45
46 read_file <- function(path,header){
47 file <- try(read.csv(path,header=header, sep="\t",stringsAsFactors = FALSE, quote="",fill=TRUE,check.names = F),silent=TRUE)
48 if (inherits(file,"try-error")){
49 stop("File not found !")
50 }else{
51 return(file)
52 }
53 }
54
55 #convert a string to boolean
56 str2bool <- function(x){
57 if (any(is.element(c("t","true"),tolower(x)))){
58 return (TRUE)
59 }else if (any(is.element(c("f","false"),tolower(x)))){
60 return (FALSE)
61 }else{
62 return(NULL)
63 }
64 }
65
66 #remove remaining quote
67 #only keep usefull columns
68 #remove lines with at least one empty cell in a matrix between two defined columns
69 clean_df <- function(mat,cols,rownames_col){
70 uto = mat[,cols]
71 uto <- as.data.frame(apply(uto,c(1,2),function(x) gsub(",",".",x)))
72 uto <- as.data.frame(apply(uto,c(1,2),function(x) {ifelse(is.character(x),as.numeric(x),x)}))
73 rownames(uto) <- mat[,rownames_col]
74 #bad_lines <- which(apply(uto, 1, function(x) any(is.na(x))))
75 #if (length(bad_lines) > 0) {
76 # uto <- uto[- bad_lines,]
77 # print(paste("lines",bad_lines, "has been removed: at least one non numeric content"))
78 #}
79 return(uto)
80 }
81
82 get_cols <-function(input_cols) {
83 input_cols <- gsub("c","",input_cols)
84 if (grepl(":",input_cols)) {
85 first_col=unlist(strsplit(input_cols,":"))[1]
86 last_col=unlist(strsplit(input_cols,":"))[2]
87 cols=first_col:last_col
88 } else {
89 cols = as.integer(unlist(strsplit(input_cols,",")))
90 }
91 return(cols)
92 }
93
94 #get args
95 args <- get_args()
96
97 #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/heatmap_viz/args.rda")
98 #load("/home/dchristiany/proteore_project/ProteoRE/tools/heatmap_viz/args.rda")
99
100 header=str2bool(args$header)
101 output <- rapply(strsplit(args$output,"\\."),c) #remove extension
102 output <- paste(output[1:length(output)-1],collapse=".")
103 output <- paste(output,args$type,sep=".")
104 cols = get_cols(args$cols)
105 rownames_col = as.integer(gsub("c","",args$row_names))
106 if (length(cols) <=1 ){
107 stop("You need several colums to build a heatmap")
108 }
109 dist=args$dist
110 clust=args$clust
111 dendrogram=args$dendrogram
112
113 #cleaning data
114 uto <- read_file(args$input,header)
115 uto <- clean_df(uto,cols,rownames_col)
116 uto <- uto[rowSums(is.na(uto)) != ncol(uto), ] #remove emptylines
117
118 if (header) {
119 col_names = names(data)
120 } else {
121 col_names = cols
122 }
123
124 #building heatmap
125 if (dist %in% c("pearson","spearman","kendall")){
126 heatmaply(uto, file=output, margins=c(100,50,NA,0), plot_method="plotly", labRow = rownames(uto), labCol = col_names, distfun=dist,
127 hclust_method = clust, dendrogram = dendrogram, grid_gap = 0,cexCol = 1, column_text_angle = as.numeric(args$col_text_angle),
128 width = 1000, height=1000, colors = c('blue','green','yellow','red'))
129 } else {
130 heatmaply(uto, file=output, margins=c(100,50,NA,0), plot_method="plotly", labRow = rownames(uto), labCol = col_names, dist_method = dist,
131 hclust_method = clust, dendrogram = dendrogram, grid_gap = 0,cexCol = 1, column_text_angle = as.numeric(args$col_text_angle),
132 width = 1000, height=1000, colors = c('blue','green','yellow','red'))
133 }
134
135 ####heatmaply
136
137 simulateExprData <- function(n, n0, p, rho0, rho1){ row
138 # n: total number of subjects
139 # n0: number of subjects with exposure 0
140 # n1: number of subjects with exposure 1
141 # p: number of genes
142 # rho0: rho between Z_i and Z_j for subjects with exposure 0
143 # rho1: rho between Z_i and Z_j for subjects with exposure 1
144
145 # Simulate gene expression values according to exposure 0 or 1,
146 # according to a centered multivariate normal distribution with
147 # covariance between Z_i and Z_j being rho^|i-j|
148 n1 <- n - n0
149 times <- 1:p
150 H <- abs(outer(times, times, "-"))
151 V0 <- rho0^H
152 V1 <- rho1^H
153
154 # rows are people, columns are genes
155 genes0 <- MASS::mvrnorm(n = n0, mu = rep(0,p), Sigma = V0)
156 genes1 <- MASS::mvrnorm(n = n1, mu = rep(0,p), Sigma = V1)
157 genes <- rbind(genes0,genes1)
158 return(genes)
159 }
160
161 #genes <- simulateExprData(n = 50, n0 = 25, p = 100, rho0 = 0.01, rho1 = 0.95)
162
163 #heatmaply(genes, k_row = 2, k_col = 2)
164
165 #heatmaply(cor(genes), k_row = 2, k_col = 2)
166
167
168
169
170
171
172