Mercurial > repos > proteore > proteore_heatmap_visualization
comparison heatmap_viz.R @ 0:edbb84a94a36 draft
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
author | proteore |
---|---|
date | Tue, 18 Dec 2018 09:58:49 -0500 |
parents | |
children | b8a5139cf5b9 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:edbb84a94a36 |
---|---|
1 #!/usr/bin/Rscript | |
2 | |
3 suppressMessages(library('plotly',quietly = T)) | |
4 suppressMessages(library('heatmaply',quietly = T)) | |
5 | |
6 #packageVersion('plotly') | |
7 | |
8 get_args <- function(){ | |
9 | |
10 ## Collect arguments | |
11 args <- commandArgs(TRUE) | |
12 | |
13 ## Default setting when no arguments passed | |
14 if(length(args) < 1) { | |
15 args <- c("--help") | |
16 } | |
17 | |
18 ## Help section | |
19 if("--help" %in% args) { | |
20 cat("Pathview R script | |
21 Arguments: | |
22 --help Print this test | |
23 --input path of the input file (must contains a colum of uniprot and/or geneID accession number) | |
24 --output Output file | |
25 --type type of output file, could be html, pdf, jpg or png | |
26 --cols Columns to use for heatmap, exemple : '3:8' to use columns from the third to the 8th | |
27 --row_names Column which contains row names | |
28 --header True or False | |
29 --col_text_angle Angle of columns label ; from -90 to 90 degres | |
30 --dist_fun function used to compute the distance | |
31 | |
32 Example: | |
33 ./heatmap_viz.R --input='dat.nucl.norm.imputed.tsv' --output='heatmap.html' --cols='3:8' --row_names='2' --header=TRUE --col_text_angle=0 \n\n") | |
34 | |
35 q(save="no") | |
36 } | |
37 | |
38 parseArgs <- function(x) strsplit(sub("^--", "", x), "=") | |
39 argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) | |
40 args <- as.list(as.character(argsDF$V2)) | |
41 names(args) <- argsDF$V1 | |
42 | |
43 return(args) | |
44 } | |
45 | |
46 read_file <- function(path,header){ | |
47 file <- try(read.csv(path,header=header, sep="\t",stringsAsFactors = FALSE, quote="",fill=TRUE,check.names = F),silent=TRUE) | |
48 if (inherits(file,"try-error")){ | |
49 stop("File not found !") | |
50 }else{ | |
51 return(file) | |
52 } | |
53 } | |
54 | |
55 #convert a string to boolean | |
56 str2bool <- function(x){ | |
57 if (any(is.element(c("t","true"),tolower(x)))){ | |
58 return (TRUE) | |
59 }else if (any(is.element(c("f","false"),tolower(x)))){ | |
60 return (FALSE) | |
61 }else{ | |
62 return(NULL) | |
63 } | |
64 } | |
65 | |
66 #remove remaining quote | |
67 #only keep usefull columns | |
68 #remove lines with at least one empty cell in a matrix between two defined columns | |
69 clean_df <- function(mat,cols,rownames_col){ | |
70 uto = mat[,cols] | |
71 uto <- as.data.frame(apply(uto,c(1,2),function(x) gsub(",",".",x))) | |
72 uto <- as.data.frame(apply(uto,c(1,2),function(x) {ifelse(is.character(x),as.numeric(x),x)})) | |
73 rownames(uto) <- mat[,rownames_col] | |
74 #bad_lines <- which(apply(uto, 1, function(x) any(is.na(x)))) | |
75 #if (length(bad_lines) > 0) { | |
76 # uto <- uto[- bad_lines,] | |
77 # print(paste("lines",bad_lines, "has been removed: at least one non numeric content")) | |
78 #} | |
79 return(uto) | |
80 } | |
81 | |
82 get_cols <-function(input_cols) { | |
83 input_cols <- gsub("c","",input_cols) | |
84 if (grepl(":",input_cols)) { | |
85 first_col=unlist(strsplit(input_cols,":"))[1] | |
86 last_col=unlist(strsplit(input_cols,":"))[2] | |
87 cols=first_col:last_col | |
88 } else { | |
89 cols = as.integer(unlist(strsplit(input_cols,","))) | |
90 } | |
91 return(cols) | |
92 } | |
93 | |
94 #get args | |
95 args <- get_args() | |
96 | |
97 #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/heatmap_viz/args.rda") | |
98 #load("/home/dchristiany/proteore_project/ProteoRE/tools/heatmap_viz/args.rda") | |
99 | |
100 header=str2bool(args$header) | |
101 output <- rapply(strsplit(args$output,"\\."),c) #remove extension | |
102 output <- paste(output[1:length(output)-1],collapse=".") | |
103 output <- paste(output,args$type,sep=".") | |
104 cols = get_cols(args$cols) | |
105 rownames_col = as.integer(gsub("c","",args$row_names)) | |
106 if (length(cols) <=1 ){ | |
107 stop("You need several colums to build a heatmap") | |
108 } | |
109 dist=args$dist | |
110 clust=args$clust | |
111 dendrogram=args$dendrogram | |
112 | |
113 #cleaning data | |
114 uto <- read_file(args$input,header) | |
115 uto <- clean_df(uto,cols,rownames_col) | |
116 uto <- uto[rowSums(is.na(uto)) != ncol(uto), ] #remove emptylines | |
117 | |
118 if (header) { | |
119 col_names = names(data) | |
120 } else { | |
121 col_names = cols | |
122 } | |
123 | |
124 #building heatmap | |
125 if (dist %in% c("pearson","spearman","kendall")){ | |
126 heatmaply(uto, file=output, margins=c(100,50,NA,0), plot_method="plotly", labRow = rownames(uto), labCol = col_names, distfun=dist, | |
127 hclust_method = clust, dendrogram = dendrogram, grid_gap = 0,cexCol = 1, column_text_angle = as.numeric(args$col_text_angle), | |
128 width = 1000, height=1000, colors = c('blue','green','yellow','red')) | |
129 } else { | |
130 heatmaply(uto, file=output, margins=c(100,50,NA,0), plot_method="plotly", labRow = rownames(uto), labCol = col_names, dist_method = dist, | |
131 hclust_method = clust, dendrogram = dendrogram, grid_gap = 0,cexCol = 1, column_text_angle = as.numeric(args$col_text_angle), | |
132 width = 1000, height=1000, colors = c('blue','green','yellow','red')) | |
133 } | |
134 | |
135 ####heatmaply | |
136 | |
137 simulateExprData <- function(n, n0, p, rho0, rho1){ row | |
138 # n: total number of subjects | |
139 # n0: number of subjects with exposure 0 | |
140 # n1: number of subjects with exposure 1 | |
141 # p: number of genes | |
142 # rho0: rho between Z_i and Z_j for subjects with exposure 0 | |
143 # rho1: rho between Z_i and Z_j for subjects with exposure 1 | |
144 | |
145 # Simulate gene expression values according to exposure 0 or 1, | |
146 # according to a centered multivariate normal distribution with | |
147 # covariance between Z_i and Z_j being rho^|i-j| | |
148 n1 <- n - n0 | |
149 times <- 1:p | |
150 H <- abs(outer(times, times, "-")) | |
151 V0 <- rho0^H | |
152 V1 <- rho1^H | |
153 | |
154 # rows are people, columns are genes | |
155 genes0 <- MASS::mvrnorm(n = n0, mu = rep(0,p), Sigma = V0) | |
156 genes1 <- MASS::mvrnorm(n = n1, mu = rep(0,p), Sigma = V1) | |
157 genes <- rbind(genes0,genes1) | |
158 return(genes) | |
159 } | |
160 | |
161 #genes <- simulateExprData(n = 50, n0 = 25, p = 100, rho0 = 0.01, rho1 = 0.95) | |
162 | |
163 #heatmaply(genes, k_row = 2, k_col = 2) | |
164 | |
165 #heatmaply(cor(genes), k_row = 2, k_col = 2) | |
166 | |
167 | |
168 | |
169 | |
170 | |
171 | |
172 |