comparison heatmap_for_variants.R @ 1:e362b3143cde draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
author iuc
date Thu, 10 Dec 2020 13:41:29 +0000
parents 1062d6ad6503
children dc51db22310c
comparison
equal deleted inserted replaced
0:1062d6ad6503 1:e362b3143cde
16 variant_files <- fapply(samples$ids, read_and_process) # nolint 16 variant_files <- fapply(samples$ids, read_and_process) # nolint
17 17
18 extractall_data <- function(id) { 18 extractall_data <- function(id) {
19 variants <- variant_files[[id]] 19 variants <- variant_files[[id]]
20 tmp <- variants %>% 20 tmp <- variants %>%
21 mutate(posalt = uni_select) %>% 21 mutate(unique_selectors = group_select) %>%
22 select(posalt, AF) 22 select(unique_selectors, AF)
23 colnames(tmp) <- c("Mutation", id) 23 colnames(tmp) <- c("Mutation", id)
24 return(tmp) 24 return(tmp)
25 } 25 }
26 26
27 extractall_annots <- function(id) { 27 extractall_annots <- function(id) {
28 variants <- variant_files[[id]] 28 variants <- variant_files[[id]]
29 tmp <- variants %>% 29 tmp <- variants %>%
30 mutate(posalt = uni_select, 30 mutate(unique_selectors = group_select,
31 effect = EFF....EFFECT, gene = EFF....GENE) %>% 31 effect = EFF....EFFECT, gene = EFF....GENE) %>%
32 select(posalt, effect, gene) 32 select(unique_selectors, effect, gene)
33 # allow "." as an alternative missing value in EFF.EFFECT and EFF.GENE
34 tmp$effect <- sub("^\\.$", "", tmp$effect)
35 tmp$gene <- sub("^\\.$", "", tmp$gene)
33 return(tmp) 36 return(tmp)
34 } 37 }
35 38
36 # process allele frequencies 39 # process allele frequencies
37 processed_files <- fapply(samples$ids, extractall_data) 40 processed_files <- fapply(samples$ids, extractall_data)
51 ## readout annotations 54 ## readout annotations
52 processed_annots <- fapply(samples$ids, extractall_annots) 55 processed_annots <- fapply(samples$ids, extractall_annots)
53 ann_final <- processed_annots %>% 56 ann_final <- processed_annots %>%
54 reduce(function(x, y) { 57 reduce(function(x, y) {
55 unique(rbind(x, y))}) %>% 58 unique(rbind(x, y))}) %>%
56 filter(posalt %in% colnames(final)) ## apply frequency filter 59 ## apply frequency filter
60 filter(unique_selectors %in% colnames(final))
57 ann_final <- as_tibble(ann_final[str_order( 61 ann_final <- as_tibble(ann_final[str_order(
58 ann_final$posalt, numeric = T), ]) %>% 62 ann_final$unique_selectors, numeric = T), ]) %>%
59 column_to_rownames("posalt") ## sort 63 column_to_rownames("unique_selectors") ## sort
60 64
61 # rename annotations 65 # rename annotations
62 trans <- function(x, mapping, replace_missing=NULL) { 66 trans <- function(x, mapping, replace_missing=NULL) {
63 # helper function for translating effects 67 # helper function for translating effects
64 mapped <- mapping[[x]] 68 mapped <- mapping[[x]]
144 pheat_number_of_clusters <- length(samples$ids) 148 pheat_number_of_clusters <- length(samples$ids)
145 print(paste0("[INFO] Number of clusters: now set to ", 149 print(paste0("[INFO] Number of clusters: now set to ",
146 pheat_number_of_clusters)) 150 pheat_number_of_clusters))
147 } 151 }
148 152
153
154 # Fix Labels
155 ## Prettify names, check for label parity between final and ann_final
156 fix_label <- function(name) {
157 ##' Reduce: 424 AGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTT A
158 ##' to: 424 AGT… > A
159 cols <- unlist(str_split(name, " "))
160 ## first 3 are POS REF ALT, and the rest are optional differences
161 pos_ref_alt <- cols[1:3]
162 rest <- ""
163 if (length(cols) > 3) {
164 rest <- paste0(" :: ", paste(cols[4:length(cols)], sep = " "))
165 }
166 ## Trim the REF or ALT if too long
167 if (str_length(pos_ref_alt[2]) > 3) {
168 pos_ref_alt[2] <- paste0(substring(pos_ref_alt[2], 1, 3), "…")
169 }
170 if (str_length(pos_ref_alt[3]) > 3) {
171 pos_ref_alt[3] <- paste0(substring(pos_ref_alt[3], 1, 3), "…")
172 }
173 ## Join required
174 new_name <- paste0(pos_ref_alt[1], " ",
175 pos_ref_alt[2], " > ",
176 pos_ref_alt[3])
177 ## Join rest
178 new_name <- paste0(new_name, " ", paste(rest))
179 }
180
181 colnames(final) <- sapply(colnames(final), fix_label)
182 rownames(ann_final) <- sapply(rownames(ann_final), fix_label)
183 ## sanity test
184 stopifnot(all(colnames(final) %in% rownames(ann_final)))
185
186
187 # Perform Plotting
149 get_plot_dims <- function(heat_map) { 188 get_plot_dims <- function(heat_map) {
150 ## get the dimensions of a pheatmap object 189 ## get the dimensions of a pheatmap object
151 ## useful for plot formats that can't be written to a file directly, but 190 ## useful for plot formats that can't be written to a file directly, but
152 ## for which we need to set up a plotting device 191 ## for which we need to set up a plotting device
153 ## source: https://stackoverflow.com/a/61876386 192 ## source: https://stackoverflow.com/a/61876386