Mercurial > repos > iuc > snpfreqplot
changeset 1:e362b3143cde draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
author | iuc |
---|---|
date | Thu, 10 Dec 2020 13:41:29 +0000 |
parents | 1062d6ad6503 |
children | dc51db22310c |
files | heatmap_for_variants.R helperFunctions.R snpfreqplot.xml test-data/heatmap.clustering2.jpeg test-data/heatmap.default.pdf test-data/heatmap.from_vcf.pdf test-data/heatmap.imageopts.png test-data/input436.tabular |
diffstat | 8 files changed, 66 insertions(+), 23 deletions(-) [+] |
line wrap: on
line diff
--- a/heatmap_for_variants.R Wed Dec 02 21:23:06 2020 +0000 +++ b/heatmap_for_variants.R Thu Dec 10 13:41:29 2020 +0000 @@ -18,8 +18,8 @@ extractall_data <- function(id) { variants <- variant_files[[id]] tmp <- variants %>% - mutate(posalt = uni_select) %>% - select(posalt, AF) + mutate(unique_selectors = group_select) %>% + select(unique_selectors, AF) colnames(tmp) <- c("Mutation", id) return(tmp) } @@ -27,9 +27,12 @@ extractall_annots <- function(id) { variants <- variant_files[[id]] tmp <- variants %>% - mutate(posalt = uni_select, + mutate(unique_selectors = group_select, effect = EFF....EFFECT, gene = EFF....GENE) %>% - select(posalt, effect, gene) + select(unique_selectors, effect, gene) + # allow "." as an alternative missing value in EFF.EFFECT and EFF.GENE + tmp$effect <- sub("^\\.$", "", tmp$effect) + tmp$gene <- sub("^\\.$", "", tmp$gene) return(tmp) } @@ -53,10 +56,11 @@ ann_final <- processed_annots %>% reduce(function(x, y) { unique(rbind(x, y))}) %>% - filter(posalt %in% colnames(final)) ## apply frequency filter + ## apply frequency filter + filter(unique_selectors %in% colnames(final)) ann_final <- as_tibble(ann_final[str_order( - ann_final$posalt, numeric = T), ]) %>% - column_to_rownames("posalt") ## sort + ann_final$unique_selectors, numeric = T), ]) %>% + column_to_rownames("unique_selectors") ## sort # rename annotations trans <- function(x, mapping, replace_missing=NULL) { @@ -146,6 +150,41 @@ pheat_number_of_clusters)) } + + # Fix Labels +## Prettify names, check for label parity between final and ann_final +fix_label <- function(name) { + ##' Reduce: 424 AGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTT A + ##' to: 424 AGT… > A + cols <- unlist(str_split(name, " ")) + ## first 3 are POS REF ALT, and the rest are optional differences + pos_ref_alt <- cols[1:3] + rest <- "" + if (length(cols) > 3) { + rest <- paste0(" :: ", paste(cols[4:length(cols)], sep = " ")) + } + ## Trim the REF or ALT if too long + if (str_length(pos_ref_alt[2]) > 3) { + pos_ref_alt[2] <- paste0(substring(pos_ref_alt[2], 1, 3), "…") + } + if (str_length(pos_ref_alt[3]) > 3) { + pos_ref_alt[3] <- paste0(substring(pos_ref_alt[3], 1, 3), "…") + } + ## Join required + new_name <- paste0(pos_ref_alt[1], " ", + pos_ref_alt[2], " > ", + pos_ref_alt[3]) + ## Join rest + new_name <- paste0(new_name, " ", paste(rest)) +} + +colnames(final) <- sapply(colnames(final), fix_label) +rownames(ann_final) <- sapply(rownames(ann_final), fix_label) +## sanity test +stopifnot(all(colnames(final) %in% rownames(ann_final))) + + + # Perform Plotting get_plot_dims <- function(heat_map) { ## get the dimensions of a pheatmap object ## useful for plot formats that can't be written to a file directly, but
--- a/helperFunctions.R Wed Dec 02 21:23:06 2020 +0000 +++ b/helperFunctions.R Thu Dec 10 13:41:29 2020 +0000 @@ -38,8 +38,8 @@ } } } - uni_select <- c("POS", "ALT", diff.colnames) - return(lines[, uni_select] %>% unite(uni_select, sep = " ")) # nolint + group_select <- c("POS", "REF", "ALT", diff.colnames) + return(lines[, group_select] %>% unite(group_select, sep = " ")) # nolint } split_table_and_process <- function(tab) { @@ -51,21 +51,21 @@ #' #' This function is necessary because tidyr is difficult #' to write custom group binding functions. - posalts <- tab %>% group_by(POS, ALT) %>% select(POS, ALT) # nolint + group_ind <- tab %>% group_by(POS, REF, ALT) %>% select(POS, REF, ALT) # nolint nlines <- nrow(tab) groups <- list() groups[[1]] <- c(1, 1) - last_pa <- paste(posalts[1, ]) + last_pa <- paste(group_ind[1, ]) for (r in 2:nlines) { - curr_pa <- paste(posalts[r, ]) - posalt_diff_between_lines <- !all(last_pa == curr_pa) - if (posalt_diff_between_lines) { + curr_pa <- paste(group_ind[r, ]) + group_ind_diff_between_lines <- !all(last_pa == curr_pa) + if (group_ind_diff_between_lines) { ## end of current group, start of new groups[[length(groups)]][2] <- r - 1 ## change prev end groups[[length(groups) + 1]] <- c(r, r) ## set (start, end) } else if (r == nlines) { ## i.e. if the very last line shares - ## the same POS ALT as the one before, + ## the same POS REF ALT as the one before, ## close current group. groups[[length(groups)]][2] <- r }
--- a/snpfreqplot.xml Wed Dec 02 21:23:06 2020 +0000 +++ b/snpfreqplot.xml Thu Dec 10 13:41:29 2020 +0000 @@ -3,14 +3,13 @@ <description>Generates a heatmap of allele frequencies grouped by variant type for SnpEff-annotated SARS-CoV-2 data</description> <macros> <token name="@VERSION@">1.0</token> - <token name="@GALAXY_VERSION@">0</token> + <token name="@GALAXY_VERSION@">1</token> </macros> <requirements> <requirement type="package" version="4.0">r-base</requirement> <requirement type="package" version="1.0.12">r-pheatmap</requirement> <requirement type="package" version="1.3.0">r-tidyverse</requirement> <requirement type="package" version="1.36.0">bioconductor-variantannotation</requirement> - <requirement type="package" version="">xorg-libxt</requirement> </requirements> <edam_topics> <edam_topic>topic_0797</edam_topic> @@ -187,7 +186,7 @@ <param name="color" value="Spectral" /> <param name="output_type" value="png" /> </section> - <output name="outfile" ftype="png" value="heatmap.imageopts.png" compare="sim_size" delta="86000" /> + <output name="outfile" ftype="png" value="heatmap.imageopts.png" compare="sim_size" delta="100000" /> </test> <test expect_num_outputs="1"> <!-- SVG, clustering defaults --> @@ -202,7 +201,7 @@ </section> <output name="outfile" ftype="svg"> <assert_contents> - <has_text text="viewBox="0 0 1156 335"" /> + <has_text text="viewBox="0 0 1156 361"" /> </assert_contents> </output> </test> @@ -219,7 +218,7 @@ <param name="ratio" value="1.2" /> <param name="output_type" value="jpeg" /> </section> - <output name="outfile" ftype="jpg" value="heatmap.clustering2.jpeg" compare="sim_size" delta="121000" /> + <output name="outfile" ftype="jpg" value="heatmap.clustering2.jpeg" compare="sim_size" delta="130000" /> </test> <test expect_num_outputs="1"> <!-- PDF, vcf test --> @@ -238,7 +237,7 @@ </section> <output name="outfile" ftype="svg"> <assert_contents> - <has_text text="viewBox="0 0 754 271"" /> + <has_text text="viewBox="0 0 754 292"" /> </assert_contents> </output> </test> @@ -251,7 +250,7 @@ </section> <output name="outfile" ftype="svg"> <assert_contents> - <has_text text="viewBox="0 0 3101 697"" /> + <has_text text="viewBox="0 0 3101 879"" /> </assert_contents> </output> </test> @@ -292,6 +291,11 @@ Such files can be produced with SnpSift Extract Fields and can be useful if preprocessing of the lists with standard text processing tools is required. + .. class:: infomark + + To represent empty EFF fields in the tabular format you can choose between + ``.`` and the empty string. + ---- Example output:
--- a/test-data/input436.tabular Wed Dec 02 21:23:06 2020 +0000 +++ b/test-data/input436.tabular Thu Dec 10 13:41:29 2020 +0000 @@ -1,5 +1,5 @@ CHROM POS REF ALT AF EFF[*].AA EFF[*].GENE EFF[*].EFFECT -NC_045512 241 C T 0.992195 +NC_045512 241 C T 0.992195 . . . NC_045512 685 AAAGTCATTT A 0.363753 KSF141 ORF1ab CODON_DELETION NC_045512 1059 C T 0.988211 T265I ORF1ab NON_SYNONYMOUS_CODING NC_045512 3037 C T 0.988485 F924 ORF1ab SYNONYMOUS_CODING