# HG changeset patch # User iuc # Date 1607607689 0 # Node ID e362b3143cde66c51dc3c4e53264def316f741b7 # Parent 1062d6ad65032cf464e0ae733cf3d637adba3057 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2" diff -r 1062d6ad6503 -r e362b3143cde heatmap_for_variants.R --- a/heatmap_for_variants.R Wed Dec 02 21:23:06 2020 +0000 +++ b/heatmap_for_variants.R Thu Dec 10 13:41:29 2020 +0000 @@ -18,8 +18,8 @@ extractall_data <- function(id) { variants <- variant_files[[id]] tmp <- variants %>% - mutate(posalt = uni_select) %>% - select(posalt, AF) + mutate(unique_selectors = group_select) %>% + select(unique_selectors, AF) colnames(tmp) <- c("Mutation", id) return(tmp) } @@ -27,9 +27,12 @@ extractall_annots <- function(id) { variants <- variant_files[[id]] tmp <- variants %>% - mutate(posalt = uni_select, + mutate(unique_selectors = group_select, effect = EFF....EFFECT, gene = EFF....GENE) %>% - select(posalt, effect, gene) + select(unique_selectors, effect, gene) + # allow "." as an alternative missing value in EFF.EFFECT and EFF.GENE + tmp$effect <- sub("^\\.$", "", tmp$effect) + tmp$gene <- sub("^\\.$", "", tmp$gene) return(tmp) } @@ -53,10 +56,11 @@ ann_final <- processed_annots %>% reduce(function(x, y) { unique(rbind(x, y))}) %>% - filter(posalt %in% colnames(final)) ## apply frequency filter + ## apply frequency filter + filter(unique_selectors %in% colnames(final)) ann_final <- as_tibble(ann_final[str_order( - ann_final$posalt, numeric = T), ]) %>% - column_to_rownames("posalt") ## sort + ann_final$unique_selectors, numeric = T), ]) %>% + column_to_rownames("unique_selectors") ## sort # rename annotations trans <- function(x, mapping, replace_missing=NULL) { @@ -146,6 +150,41 @@ pheat_number_of_clusters)) } + + # Fix Labels +## Prettify names, check for label parity between final and ann_final +fix_label <- function(name) { + ##' Reduce: 424 AGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTT A + ##' to: 424 AGT… > A + cols <- unlist(str_split(name, " ")) + ## first 3 are POS REF ALT, and the rest are optional differences + pos_ref_alt <- cols[1:3] + rest <- "" + if (length(cols) > 3) { + rest <- paste0(" :: ", paste(cols[4:length(cols)], sep = " ")) + } + ## Trim the REF or ALT if too long + if (str_length(pos_ref_alt[2]) > 3) { + pos_ref_alt[2] <- paste0(substring(pos_ref_alt[2], 1, 3), "…") + } + if (str_length(pos_ref_alt[3]) > 3) { + pos_ref_alt[3] <- paste0(substring(pos_ref_alt[3], 1, 3), "…") + } + ## Join required + new_name <- paste0(pos_ref_alt[1], " ", + pos_ref_alt[2], " > ", + pos_ref_alt[3]) + ## Join rest + new_name <- paste0(new_name, " ", paste(rest)) +} + +colnames(final) <- sapply(colnames(final), fix_label) +rownames(ann_final) <- sapply(rownames(ann_final), fix_label) +## sanity test +stopifnot(all(colnames(final) %in% rownames(ann_final))) + + + # Perform Plotting get_plot_dims <- function(heat_map) { ## get the dimensions of a pheatmap object ## useful for plot formats that can't be written to a file directly, but diff -r 1062d6ad6503 -r e362b3143cde helperFunctions.R --- a/helperFunctions.R Wed Dec 02 21:23:06 2020 +0000 +++ b/helperFunctions.R Thu Dec 10 13:41:29 2020 +0000 @@ -38,8 +38,8 @@ } } } - uni_select <- c("POS", "ALT", diff.colnames) - return(lines[, uni_select] %>% unite(uni_select, sep = " ")) # nolint + group_select <- c("POS", "REF", "ALT", diff.colnames) + return(lines[, group_select] %>% unite(group_select, sep = " ")) # nolint } split_table_and_process <- function(tab) { @@ -51,21 +51,21 @@ #' #' This function is necessary because tidyr is difficult #' to write custom group binding functions. - posalts <- tab %>% group_by(POS, ALT) %>% select(POS, ALT) # nolint + group_ind <- tab %>% group_by(POS, REF, ALT) %>% select(POS, REF, ALT) # nolint nlines <- nrow(tab) groups <- list() groups[[1]] <- c(1, 1) - last_pa <- paste(posalts[1, ]) + last_pa <- paste(group_ind[1, ]) for (r in 2:nlines) { - curr_pa <- paste(posalts[r, ]) - posalt_diff_between_lines <- !all(last_pa == curr_pa) - if (posalt_diff_between_lines) { + curr_pa <- paste(group_ind[r, ]) + group_ind_diff_between_lines <- !all(last_pa == curr_pa) + if (group_ind_diff_between_lines) { ## end of current group, start of new groups[[length(groups)]][2] <- r - 1 ## change prev end groups[[length(groups) + 1]] <- c(r, r) ## set (start, end) } else if (r == nlines) { ## i.e. if the very last line shares - ## the same POS ALT as the one before, + ## the same POS REF ALT as the one before, ## close current group. groups[[length(groups)]][2] <- r } diff -r 1062d6ad6503 -r e362b3143cde snpfreqplot.xml --- a/snpfreqplot.xml Wed Dec 02 21:23:06 2020 +0000 +++ b/snpfreqplot.xml Thu Dec 10 13:41:29 2020 +0000 @@ -3,14 +3,13 @@ Generates a heatmap of allele frequencies grouped by variant type for SnpEff-annotated SARS-CoV-2 data 1.0 - 0 + 1 r-base r-pheatmap r-tidyverse bioconductor-variantannotation - xorg-libxt topic_0797 @@ -187,7 +186,7 @@ - + @@ -202,7 +201,7 @@ - + @@ -219,7 +218,7 @@ - + @@ -238,7 +237,7 @@ - + @@ -251,7 +250,7 @@ - + @@ -292,6 +291,11 @@ Such files can be produced with SnpSift Extract Fields and can be useful if preprocessing of the lists with standard text processing tools is required. + .. class:: infomark + + To represent empty EFF fields in the tabular format you can choose between + ``.`` and the empty string. + ---- Example output: diff -r 1062d6ad6503 -r e362b3143cde test-data/heatmap.clustering2.jpeg Binary file test-data/heatmap.clustering2.jpeg has changed diff -r 1062d6ad6503 -r e362b3143cde test-data/heatmap.default.pdf Binary file test-data/heatmap.default.pdf has changed diff -r 1062d6ad6503 -r e362b3143cde test-data/heatmap.from_vcf.pdf Binary file test-data/heatmap.from_vcf.pdf has changed diff -r 1062d6ad6503 -r e362b3143cde test-data/heatmap.imageopts.png Binary file test-data/heatmap.imageopts.png has changed diff -r 1062d6ad6503 -r e362b3143cde test-data/input436.tabular --- a/test-data/input436.tabular Wed Dec 02 21:23:06 2020 +0000 +++ b/test-data/input436.tabular Thu Dec 10 13:41:29 2020 +0000 @@ -1,5 +1,5 @@ CHROM POS REF ALT AF EFF[*].AA EFF[*].GENE EFF[*].EFFECT -NC_045512 241 C T 0.992195 +NC_045512 241 C T 0.992195 . . . NC_045512 685 AAAGTCATTT A 0.363753 KSF141 ORF1ab CODON_DELETION NC_045512 1059 C T 0.988211 T265I ORF1ab NON_SYNONYMOUS_CODING NC_045512 3037 C T 0.988485 F924 ORF1ab SYNONYMOUS_CODING