annotate heatmap_for_variants.R @ 1:e362b3143cde draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
author iuc
date Thu, 10 Dec 2020 13:41:29 +0000
parents 1062d6ad6503
children dc51db22310c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
1 #!/usr/bin/env R
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
2
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
3 suppressPackageStartupMessages(library(pheatmap))
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
4 suppressPackageStartupMessages(library(RColorBrewer))
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
5 suppressPackageStartupMessages(library(tidyverse))
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
6
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
7 fapply <- function(vect_ids, func) {
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
8 #' List apply but preserve the names
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
9 res <- lapply(vect_ids, func)
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
10 names(res) <- vect_ids
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
11 return(res)
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
12 }
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
13
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
14 # M A I N
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
15 stopifnot(exists("samples"))
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
16 variant_files <- fapply(samples$ids, read_and_process) # nolint
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
17
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
18 extractall_data <- function(id) {
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
19 variants <- variant_files[[id]]
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
20 tmp <- variants %>%
1
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
21 mutate(unique_selectors = group_select) %>%
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
22 select(unique_selectors, AF)
0
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
23 colnames(tmp) <- c("Mutation", id)
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
24 return(tmp)
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
25 }
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
26
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
27 extractall_annots <- function(id) {
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
28 variants <- variant_files[[id]]
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
29 tmp <- variants %>%
1
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
30 mutate(unique_selectors = group_select,
0
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
31 effect = EFF....EFFECT, gene = EFF....GENE) %>%
1
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
32 select(unique_selectors, effect, gene)
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
33 # allow "." as an alternative missing value in EFF.EFFECT and EFF.GENE
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
34 tmp$effect <- sub("^\\.$", "", tmp$effect)
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
35 tmp$gene <- sub("^\\.$", "", tmp$gene)
0
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
36 return(tmp)
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
37 }
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
38
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
39 # process allele frequencies
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
40 processed_files <- fapply(samples$ids, extractall_data)
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
41 final <- as_tibble(
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
42 processed_files %>%
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
43 reduce(full_join, by = "Mutation", copy = T))
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
44
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
45 final <- final[str_order(final$Mutation, numeric = T), ] %>%
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
46 column_to_rownames("Mutation") ## sort and set rownames
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
47 final[final < variant_frequency] <- NA ## adjust the variant frequency:
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
48 final <- final[rowSums(is.na(final)) != ncol(final), ]
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
49 final <- t(final)
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
50 final[is.na(final)] <- 0
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
51 class(final) <- "numeric"
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
52
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
53 # add annotations
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
54 ## readout annotations
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
55 processed_annots <- fapply(samples$ids, extractall_annots)
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
56 ann_final <- processed_annots %>%
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
57 reduce(function(x, y) {
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
58 unique(rbind(x, y))}) %>%
1
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
59 ## apply frequency filter
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
60 filter(unique_selectors %in% colnames(final))
0
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
61 ann_final <- as_tibble(ann_final[str_order(
1
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
62 ann_final$unique_selectors, numeric = T), ]) %>%
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
63 column_to_rownames("unique_selectors") ## sort
0
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
64
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
65 # rename annotations
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
66 trans <- function(x, mapping, replace_missing=NULL) {
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
67 # helper function for translating effects
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
68 mapped <- mapping[[x]]
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
69 if (is.null(mapped)) {
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
70 if (is.null(replace_missing)) x else replace_missing
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
71 } else {
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
72 mapped
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
73 }
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
74 }
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
75
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
76 # handle translation of classic SnpEff effects to sequence ontology terms
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
77 # The following list defines the complete mapping between classic and So effect
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
78 # terms even if not all of these are likely to appear in viral variant data.
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
79 classic_snpeff_effects_to_so <- list(
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
80 "coding_sequence_variant", "coding_sequence_variant", "disruptive_inframe_deletion", "disruptive_inframe_insertion", "inframe_deletion", "inframe_insertion", "downstream_gene_variant", "exon_variant", "exon_loss_variant", "frameshift_variant", "gene_variant", "intergenic_variant", "intergenic_region", "conserved_intergenic_variant", "intragenic_variant", "intron_variant", "conserved_intron_variant", "missense_variant", "rare_amino_acid_variant", "splice_acceptor_variant", "splice_donor_variant", "splice_region_variant", "5_prime_UTR_premature_start_codon_variant", "start_lost", "stop_gained", "stop_lost", "synonymous_variant", "start_retained_variant", "stop_retained_variant", "transcript_variant", "upstream_gene_variant", "3_prime_UTR_truncation_+_exon_loss_variant", "3_prime_UTR_variant", "5_prime_UTR_truncation_+_exon_loss_variant", "5_prime_UTR_variant", "initiator_codon_variant", "None", "chromosomal_deletion"
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
81 )
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
82 names(classic_snpeff_effects_to_so) <- c(
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
83 "CDS", "CODON_CHANGE", "CODON_CHANGE_PLUS_CODON_DELETION", "CODON_CHANGE_PLUS_CODON_INSERTION", "CODON_DELETION", "CODON_INSERTION", "DOWNSTREAM", "EXON", "EXON_DELETED", "FRAME_SHIFT", "GENE", "INTERGENIC", "INTERGENIC_REGION", "INTERGENIC_CONSERVED", "INTRAGENIC", "INTRON", "INTRON_CONSERVED", "NON_SYNONYMOUS_CODING", "RARE_AMINO_ACID", "SPLICE_SITE_ACCEPTOR", "SPLICE_SITE_DONOR", "SPLICE_SITE_REGION", "START_GAINED", "START_LOST", "STOP_GAINED", "STOP_LOST", "SYNONYMOUS_CODING", "SYNONYMOUS_START", "SYNONYMOUS_STOP", "TRANSCRIPT", "UPSTREAM", "UTR_3_DELETED", "UTR_3_PRIME", "UTR_5_DELETED", "UTR_5_PRIME", "NON_SYNONYMOUS_START", "NONE", "CHROMOSOME_LARGE_DELETION"
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
84 )
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
85 # translate classic effects into SO terms leaving unknown terms (possibly SO already) as is
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
86 so_effects <- sapply(ann_final$effect, function(x) trans(x, classic_snpeff_effects_to_so))
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
87
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
88 # handle further translation of effects we care about
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
89 so_effects_translation <- list(
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
90 "non-syn", "syn",
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
91 "deletion", "deletion", "deletion",
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
92 "insertion", "insertion", "frame shift",
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
93 "stop gained", "stop lost"
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
94 )
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
95 names(so_effects_translation) <- c(
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
96 "missense_variant", "synonymous_variant",
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
97 "disruptive_inframe_deletion", "inframe_deletion", "chromosomal_deletion",
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
98 "disruptive_inframe_insertion", "inframe_insertion", "frameshift_variant",
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
99 "stop_gained", "stop_lost"
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
100 )
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
101 # translate to our simple terms turning undefined terms into '?'
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
102 simple_effects <- sapply(so_effects, function(x) trans(x, so_effects_translation, replace_missing = "?"))
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
103 # complex variant effects (those that do more than one thing) are concatenated
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
104 # with either '+' (for classic terms) or '&' (for SO terms)
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
105 simple_effects[grepl("+", so_effects, fixed = TRUE)] <- "complex"
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
106 simple_effects[grepl("&", so_effects, fixed = TRUE)] <- "complex"
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
107 simple_effects[so_effects == ""] <- "non-coding"
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
108
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
109 ann_final$effect <- simple_effects
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
110 ann_final$gene <- sub("^$", "NCR", ann_final$gene)
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
111
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
112 ## automatically determine gaps for the heatmap
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
113 gap_vector <- which(!(ann_final$gene[1:length(ann_final$gene) - 1] == # nolint
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
114 ann_final$gene[2:length(ann_final$gene)]))
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
115
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
116 # colormanagement
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
117 my_colors <- colorRampPalette(c("grey93", "brown", "black")) #heatmap
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
118 count <- length(unique(ann_final$gene)) #annotations (genes)
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
119 gene_color <- c(brewer.pal(brewer_color_gene_annotation, n = count))
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
120 names(gene_color) <- unique(ann_final$gene)
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
121
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
122 # colormanagement annotations (effect)
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
123 ## Define the full set of colors for each effect that we can encounter
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
124 ## This is not bulletproof. The effect names given here were swapped into the
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
125 ## data (see above substitutions in ann_final$effect) and so are hard-coded,
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
126 ## as well as their preferred colors.
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
127
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
128 all_colors <- data.frame(
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
129 color = c("white", "green", "orange", "red",
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
130 "black", "grey", "yellow", "blue", "purple", "brown"),
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
131 name = c("non-coding", "syn", "non-syn", "deletion",
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
132 "frame shift", "stop gained", "stop lost", "insertion",
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
133 "complex", "?"))
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
134 ## Reduce the full set to just those that we want
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
135 detected_effects <- unique(ann_final$effect)
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
136 subset_colors <- subset(all_colors, name %in% detected_effects)
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
137 effect_color <- subset_colors$color
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
138 names(effect_color) <- subset_colors$name
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
139 color_list <- list(gene_color = gene_color, effect_color = effect_color)
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
140 names(color_list) <- c("gene", "effect")
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
141
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
142 # visualize heatmap
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
143 if (pheat_number_of_clusters > length(samples$ids)) {
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
144 print(paste0("[INFO] Number of clusters: User-specified clusters (",
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
145 pheat_number_of_clusters,
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
146 ") is greater than the number of samples (",
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
147 length(samples$ids), ")"))
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
148 pheat_number_of_clusters <- length(samples$ids)
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
149 print(paste0("[INFO] Number of clusters: now set to ",
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
150 pheat_number_of_clusters))
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
151 }
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
152
1
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
153
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
154 # Fix Labels
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
155 ## Prettify names, check for label parity between final and ann_final
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
156 fix_label <- function(name) {
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
157 ##' Reduce: 424 AGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTT A
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
158 ##' to: 424 AGT… > A
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
159 cols <- unlist(str_split(name, " "))
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
160 ## first 3 are POS REF ALT, and the rest are optional differences
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
161 pos_ref_alt <- cols[1:3]
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
162 rest <- ""
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
163 if (length(cols) > 3) {
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
164 rest <- paste0(" :: ", paste(cols[4:length(cols)], sep = " "))
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
165 }
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
166 ## Trim the REF or ALT if too long
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
167 if (str_length(pos_ref_alt[2]) > 3) {
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
168 pos_ref_alt[2] <- paste0(substring(pos_ref_alt[2], 1, 3), "…")
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
169 }
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
170 if (str_length(pos_ref_alt[3]) > 3) {
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
171 pos_ref_alt[3] <- paste0(substring(pos_ref_alt[3], 1, 3), "…")
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
172 }
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
173 ## Join required
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
174 new_name <- paste0(pos_ref_alt[1], " ",
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
175 pos_ref_alt[2], " > ",
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
176 pos_ref_alt[3])
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
177 ## Join rest
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
178 new_name <- paste0(new_name, " ", paste(rest))
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
179 }
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
180
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
181 colnames(final) <- sapply(colnames(final), fix_label)
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
182 rownames(ann_final) <- sapply(rownames(ann_final), fix_label)
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
183 ## sanity test
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
184 stopifnot(all(colnames(final) %in% rownames(ann_final)))
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
185
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
186
e362b3143cde "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
iuc
parents: 0
diff changeset
187 # Perform Plotting
0
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
188 get_plot_dims <- function(heat_map) {
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
189 ## get the dimensions of a pheatmap object
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
190 ## useful for plot formats that can't be written to a file directly, but
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
191 ## for which we need to set up a plotting device
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
192 ## source: https://stackoverflow.com/a/61876386
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
193 plot_height <- sum(sapply(heat_map$gtable$heights,
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
194 grid::convertHeight, "in"))
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
195 plot_width <- sum(sapply(heat_map$gtable$widths,
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
196 grid::convertWidth, "in"))
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
197 return(list(height = plot_height, width = plot_width))
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
198 }
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
199
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
200 height <- round(max(c(max(c(
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
201 16 * (length(unique(ann_final$effect)) +
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
202 length(unique(ann_final$gene))), 160)) /
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
203 nrow(final), 15)))
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
204 width <- round(ratio * height)
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
205
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
206
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
207 if (!(out_ext %in% c("svg", "jpeg", "png", "pdf"))) {
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
208 stop("Unknown extension: ", ext, ", aborting.")
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
209 }
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
210 plot_device <- get(out_ext)
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
211
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
212
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
213 ## A constant scaling factor based on the calculated dimensions
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
214 ## above does not work for PNG, so we resort to feeding pheatmap
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
215 ## with a direct filename
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
216 plot_filename <- NA
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
217 if (out_ext %in% c("jpeg", "png")) {
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
218 plot_filename <- out_file
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
219 }
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
220
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
221 ## SVG is not a format pheatmap knows how to write to a file directly.
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
222 ## As a workaround we
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
223 ## 1. create the plot object
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
224 ## 2. get its dimensions
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
225 ## 3. set up a svg plotting device with these dimensions
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
226 ## 4. print the heatmap object to the device
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
227 hm <- pheatmap(
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
228 final,
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
229 color = my_colors(100),
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
230 cellwidth = width,
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
231 cellheight = height,
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
232 fontsize_col = round(1 / 3 * width),
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
233 fontsize_row = round(1 / 3 * min(c(height, width))),
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
234 clustering_method = pheat_clustering_method,
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
235 cluster_rows = pheat_clustering,
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
236 cluster_cols = F,
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
237 cutree_rows = pheat_number_of_clusters,
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
238 annotation_col = ann_final,
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
239 annotation_colors = color_list,
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
240 filename = plot_filename,
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
241 gaps_col = gap_vector
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
242 )
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
243
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
244 if (out_ext %in% c("pdf", "svg")) {
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
245 plot_dims <- get_plot_dims(hm)
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
246 plot_device(out_file,
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
247 width = plot_dims$width,
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
248 height = plot_dims$height)
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
249 print(hm)
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
250 dev.off()
1062d6ad6503 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
iuc
parents:
diff changeset
251 }