# HG changeset patch
# User iuc
# Date 1608335281 0
# Node ID dc51db22310c7bf74761bd1bb9c49df6fb65e36d
# Parent e362b3143cde66c51dc3c4e53264def316f741b7
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit d1c54d077cfc0eeb9699719760e668948cb9bbbc"
diff -r e362b3143cde -r dc51db22310c heatmap_for_variants.R
--- a/heatmap_for_variants.R Thu Dec 10 13:41:29 2020 +0000
+++ b/heatmap_for_variants.R Fri Dec 18 23:48:01 2020 +0000
@@ -116,7 +116,7 @@
# colormanagement
my_colors <- colorRampPalette(c("grey93", "brown", "black")) #heatmap
count <- length(unique(ann_final$gene)) #annotations (genes)
-gene_color <- c(brewer.pal(brewer_color_gene_annotation, n = count))
+gene_color <- rep(c(brewer.pal(brewer_color_gene_annotation, n = count)), length.out = count)
names(gene_color) <- unique(ann_final$gene)
# colormanagement annotations (effect)
@@ -161,7 +161,7 @@
pos_ref_alt <- cols[1:3]
rest <- ""
if (length(cols) > 3) {
- rest <- paste0(" :: ", paste(cols[4:length(cols)], sep = " "))
+ rest <- paste0(" :: ", paste0(cols[4:length(cols)], collapse = " "))
}
## Trim the REF or ALT if too long
if (str_length(pos_ref_alt[2]) > 3) {
@@ -175,7 +175,7 @@
pos_ref_alt[2], " > ",
pos_ref_alt[3])
## Join rest
- new_name <- paste0(new_name, " ", paste(rest))
+ new_name <- paste0(new_name, " ", rest)
}
colnames(final) <- sapply(colnames(final), fix_label)
diff -r e362b3143cde -r dc51db22310c helperFunctions.R
--- a/helperFunctions.R Thu Dec 10 13:41:29 2020 +0000
+++ b/helperFunctions.R Fri Dec 18 23:48:01 2020 +0000
@@ -54,22 +54,28 @@
group_ind <- tab %>% group_by(POS, REF, ALT) %>% select(POS, REF, ALT) # nolint
nlines <- nrow(tab)
groups <- list()
- groups[[1]] <- c(1, 1)
- last_pa <- paste(group_ind[1, ])
- for (r in 2:nlines) {
- curr_pa <- paste(group_ind[r, ])
- group_ind_diff_between_lines <- !all(last_pa == curr_pa)
- if (group_ind_diff_between_lines) {
- ## end of current group, start of new
- groups[[length(groups)]][2] <- r - 1 ## change prev end
- groups[[length(groups) + 1]] <- c(r, r) ## set (start, end)
- } else if (r == nlines) {
- ## i.e. if the very last line shares
- ## the same POS REF ALT as the one before,
- ## close current group.
- groups[[length(groups)]][2] <- r
+ if (nlines) {
+ groups[[1]] <- c(1, 1)
+ } else {
+ groups[[1]] <- c(0, 0)
+ }
+ if (nlines >= 2) {
+ last_pa <- paste(group_ind[1, ])
+ for (r in 2:nlines) {
+ curr_pa <- paste(group_ind[r, ])
+ group_ind_diff_between_lines <- !all(last_pa == curr_pa)
+ if (group_ind_diff_between_lines) {
+ ## end of current group, start of new
+ groups[[length(groups)]][2] <- r - 1 ## change prev end
+ groups[[length(groups) + 1]] <- c(r, r) ## set (start, end)
+ } else if (r == nlines) {
+ ## i.e. if the very last line shares
+ ## the same POS REF ALT as the one before,
+ ## close current group.
+ groups[[length(groups)]][2] <- r
+ }
+ last_pa <- curr_pa
}
- last_pa <- curr_pa
}
as_tibble(do.call(
"rbind",
@@ -82,7 +88,8 @@
read_and_process <- function(id) {
file <- (samples %>% filter(ids == id))$files # nolint
- variants <- read.table(file, header = T, sep = "\t")
+ variants <- read.table(file, header = T, sep = "\t", colClasses = "character")
+ variants["AF"] <- lapply(variants["AF"], as.numeric)
uniq_ids <- split_table_and_process(variants)
if (nrow(variants) != nrow(uniq_ids)) {
stop(paste0(id, " '", file, "' failed: ", file, "\"",
diff -r e362b3143cde -r dc51db22310c snpEffExtract.R
--- a/snpEffExtract.R Thu Dec 10 13:41:29 2020 +0000
+++ b/snpEffExtract.R Fri Dec 18 23:48:01 2020 +0000
@@ -5,6 +5,12 @@
tsv_eff_from_vcf <- function(input_vcf, output_tab) {
read_vcf <- readVcf(input_vcf) # nolint
+ if (!nrow(read_vcf@fixed)) {
+ # no variants in file -> just write a valid header line
+ write(c("CHROM", "POS", "REF", "ALT", "AF", "EFF[*].GENE", "EFF[*].EFFECT"),
+ ncolumns = 7, file = output_tab, sep = "\t")
+ return()
+ }
chrom_pos <- data.frame(read_vcf@rowRanges)[, c("seqnames", "start")]
ref_alt_filter <- read_vcf@fixed[, c("REF", "ALT", "FILTER")]
dp_af <- read_vcf@info[c("DP", "AF")]
diff -r e362b3143cde -r dc51db22310c snpfreqplot.xml
--- a/snpfreqplot.xml Thu Dec 10 13:41:29 2020 +0000
+++ b/snpfreqplot.xml Fri Dec 18 23:48:01 2020 +0000
@@ -3,7 +3,7 @@
Generates a heatmap of allele frequencies grouped by variant type for SnpEff-annotated SARS-CoV-2 data
1.0
- 1
+ 2
r-base
@@ -179,6 +179,14 @@
+
+
+
+
+
+
@@ -242,15 +250,16 @@
-
-
+
+
diff -r e362b3143cde -r dc51db22310c test-data/no_variants.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/no_variants.vcf Fri Dec 18 23:48:01 2020 +0000
@@ -0,0 +1,24 @@
+##fileformat=VCFv4.0
+##fileDate=20200707
+##source=lofreq call --verbose --ref reference.fa --call-indels --min-cov 5 --max-depth 1000000 --min-bq 30 --min-alt-bq 30 --min-mq 20 --max-mq 255 --min-jq 0 --min-alt-jq 0 --def-alt-jq 0 --sig 0.0005 --bonf dynamic --no-default-filter --no-default-filter -r NC_045512.2:1-14951 -o /data/dnb02/galaxy_db/job_working_directory/009/430/9430166/working/pp-tmp/lofreq2_call_parallelpfou9vt9/0.vcf.gz reads.bam
+##reference=reference.fa
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##FILTER=
+##FILTER=
+##FILTER=
+##FILTER=
+##FILTER=
+##FILTER=
+##FILTER= 0.001000">
+##SnpEffVersion="4.5covid19 (build 2020-04-15 22:26), by Pablo Cingolani"
+##SnpEffCmd="SnpEff -i vcf -o vcf -formatEff -classic -no-downstream -no-intergenic -no-upstream -no-utr -stats /data/dnb02/galaxy_db/job_working_directory/009/430/9430172/galaxy_dataset_24243789.dat NC_045512.2 /data/dnb02/galaxy_db/files/022/094/dataset_22094491.dat "
+##INFO=
+##INFO=
+##INFO=
+#CHROM POS ID REF ALT QUAL FILTER INFO
diff -r e362b3143cde -r dc51db22310c test-data/single_variant.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/single_variant.vcf Fri Dec 18 23:48:01 2020 +0000
@@ -0,0 +1,26 @@
+##fileformat=VCFv4.0
+##fileDate=20200707
+##source=lofreq call --verbose --ref reference.fa --call-indels --min-cov 5 --max-depth 1000000 --min-bq 30 --min-alt-bq 30 --min-mq 20 --max-mq 255 --min-jq 0 --min-alt-jq 0 --def-alt-jq 0 --sig 0.0005 --bonf dynamic --no-default-filter --no-default-filter -r NC_045512.2:1-14951 -o /data/dnb02/galaxy_db/job_working_directory/009/430/9430166/working/pp-tmp/lofreq2_call_parallelpfou9vt9/0.vcf.gz reads.bam
+##reference=reference.fa
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##FILTER=
+##FILTER=
+##FILTER=
+##FILTER=
+##FILTER=
+##FILTER=
+##FILTER= 0.001000">
+##SnpEffVersion="4.5covid19 (build 2020-04-15 22:26), by Pablo Cingolani"
+##SnpEffCmd="SnpEff -i vcf -o vcf -formatEff -classic -no-downstream -no-intergenic -no-upstream -no-utr -stats /data/dnb02/galaxy_db/job_working_directory/009/430/9430172/galaxy_dataset_24243789.dat NC_045512.2 /data/dnb02/galaxy_db/files/022/094/dataset_22094491.dat "
+##INFO=
+##INFO=
+##INFO=
+#CHROM POS ID REF ALT QUAL FILTER INFO
+NC_045512.2 241 . C T 13172.0 PASS DP=363;AF=0.980716;SB=0;DP4=1,1,167,193
+