Mercurial > repos > iuc > snpfreqplot
comparison heatmap_for_variants.R @ 1:e362b3143cde draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
author | iuc |
---|---|
date | Thu, 10 Dec 2020 13:41:29 +0000 |
parents | 1062d6ad6503 |
children | dc51db22310c |
comparison
equal
deleted
inserted
replaced
0:1062d6ad6503 | 1:e362b3143cde |
---|---|
16 variant_files <- fapply(samples$ids, read_and_process) # nolint | 16 variant_files <- fapply(samples$ids, read_and_process) # nolint |
17 | 17 |
18 extractall_data <- function(id) { | 18 extractall_data <- function(id) { |
19 variants <- variant_files[[id]] | 19 variants <- variant_files[[id]] |
20 tmp <- variants %>% | 20 tmp <- variants %>% |
21 mutate(posalt = uni_select) %>% | 21 mutate(unique_selectors = group_select) %>% |
22 select(posalt, AF) | 22 select(unique_selectors, AF) |
23 colnames(tmp) <- c("Mutation", id) | 23 colnames(tmp) <- c("Mutation", id) |
24 return(tmp) | 24 return(tmp) |
25 } | 25 } |
26 | 26 |
27 extractall_annots <- function(id) { | 27 extractall_annots <- function(id) { |
28 variants <- variant_files[[id]] | 28 variants <- variant_files[[id]] |
29 tmp <- variants %>% | 29 tmp <- variants %>% |
30 mutate(posalt = uni_select, | 30 mutate(unique_selectors = group_select, |
31 effect = EFF....EFFECT, gene = EFF....GENE) %>% | 31 effect = EFF....EFFECT, gene = EFF....GENE) %>% |
32 select(posalt, effect, gene) | 32 select(unique_selectors, effect, gene) |
33 # allow "." as an alternative missing value in EFF.EFFECT and EFF.GENE | |
34 tmp$effect <- sub("^\\.$", "", tmp$effect) | |
35 tmp$gene <- sub("^\\.$", "", tmp$gene) | |
33 return(tmp) | 36 return(tmp) |
34 } | 37 } |
35 | 38 |
36 # process allele frequencies | 39 # process allele frequencies |
37 processed_files <- fapply(samples$ids, extractall_data) | 40 processed_files <- fapply(samples$ids, extractall_data) |
51 ## readout annotations | 54 ## readout annotations |
52 processed_annots <- fapply(samples$ids, extractall_annots) | 55 processed_annots <- fapply(samples$ids, extractall_annots) |
53 ann_final <- processed_annots %>% | 56 ann_final <- processed_annots %>% |
54 reduce(function(x, y) { | 57 reduce(function(x, y) { |
55 unique(rbind(x, y))}) %>% | 58 unique(rbind(x, y))}) %>% |
56 filter(posalt %in% colnames(final)) ## apply frequency filter | 59 ## apply frequency filter |
60 filter(unique_selectors %in% colnames(final)) | |
57 ann_final <- as_tibble(ann_final[str_order( | 61 ann_final <- as_tibble(ann_final[str_order( |
58 ann_final$posalt, numeric = T), ]) %>% | 62 ann_final$unique_selectors, numeric = T), ]) %>% |
59 column_to_rownames("posalt") ## sort | 63 column_to_rownames("unique_selectors") ## sort |
60 | 64 |
61 # rename annotations | 65 # rename annotations |
62 trans <- function(x, mapping, replace_missing=NULL) { | 66 trans <- function(x, mapping, replace_missing=NULL) { |
63 # helper function for translating effects | 67 # helper function for translating effects |
64 mapped <- mapping[[x]] | 68 mapped <- mapping[[x]] |
144 pheat_number_of_clusters <- length(samples$ids) | 148 pheat_number_of_clusters <- length(samples$ids) |
145 print(paste0("[INFO] Number of clusters: now set to ", | 149 print(paste0("[INFO] Number of clusters: now set to ", |
146 pheat_number_of_clusters)) | 150 pheat_number_of_clusters)) |
147 } | 151 } |
148 | 152 |
153 | |
154 # Fix Labels | |
155 ## Prettify names, check for label parity between final and ann_final | |
156 fix_label <- function(name) { | |
157 ##' Reduce: 424 AGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTT A | |
158 ##' to: 424 AGT… > A | |
159 cols <- unlist(str_split(name, " ")) | |
160 ## first 3 are POS REF ALT, and the rest are optional differences | |
161 pos_ref_alt <- cols[1:3] | |
162 rest <- "" | |
163 if (length(cols) > 3) { | |
164 rest <- paste0(" :: ", paste(cols[4:length(cols)], sep = " ")) | |
165 } | |
166 ## Trim the REF or ALT if too long | |
167 if (str_length(pos_ref_alt[2]) > 3) { | |
168 pos_ref_alt[2] <- paste0(substring(pos_ref_alt[2], 1, 3), "…") | |
169 } | |
170 if (str_length(pos_ref_alt[3]) > 3) { | |
171 pos_ref_alt[3] <- paste0(substring(pos_ref_alt[3], 1, 3), "…") | |
172 } | |
173 ## Join required | |
174 new_name <- paste0(pos_ref_alt[1], " ", | |
175 pos_ref_alt[2], " > ", | |
176 pos_ref_alt[3]) | |
177 ## Join rest | |
178 new_name <- paste0(new_name, " ", paste(rest)) | |
179 } | |
180 | |
181 colnames(final) <- sapply(colnames(final), fix_label) | |
182 rownames(ann_final) <- sapply(rownames(ann_final), fix_label) | |
183 ## sanity test | |
184 stopifnot(all(colnames(final) %in% rownames(ann_final))) | |
185 | |
186 | |
187 # Perform Plotting | |
149 get_plot_dims <- function(heat_map) { | 188 get_plot_dims <- function(heat_map) { |
150 ## get the dimensions of a pheatmap object | 189 ## get the dimensions of a pheatmap object |
151 ## useful for plot formats that can't be written to a file directly, but | 190 ## useful for plot formats that can't be written to a file directly, but |
152 ## for which we need to set up a plotting device | 191 ## for which we need to set up a plotting device |
153 ## source: https://stackoverflow.com/a/61876386 | 192 ## source: https://stackoverflow.com/a/61876386 |