Mercurial > repos > galaxy-australia > plot_ragtag_paf
changeset 0:664d63b0e817 draft
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 1d55571817c874123ca77eb12c8e579aa936eeab
author | galaxy-australia |
---|---|
date | Wed, 25 Sep 2024 07:52:38 +0000 |
parents | |
children | 94e0834ae4ed |
files | plot_ragtag_paf.R plot_ragtag_paf.xml static/images/ragtag.png test-data/ragtag.agp test-data/ragtag.paf.gz |
diffstat | 5 files changed, 496 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/plot_ragtag_paf.R Wed Sep 25 07:52:38 2024 +0000 @@ -0,0 +1,257 @@ +#!/usr/bin/env Rscript + +options( + show.error.messages = FALSE, + error = function() { + cat(geterrmessage(), file = stderr()) + q("no", 1, FALSE) + } +) + +library(data.table) +library(ggplot2) +library(pafr) +library(viridisLite) +library(yaml) + +args <- commandArgs(trailingOnly = TRUE) + +############# +# FUNCTIONS # +############# + + +# Adjust the PAF coordinates to the continuous x-axis +adjust_coords <- function(qname, qstart, qend, tname, tstart, tend) { + my_qstart <- lookup_qstart(qname) + my_tstart <- lookup_tstart(tname) + return( + data.table( + adj_qstart = qstart + my_qstart, + adj_qend = qend + my_qstart, + adj_tstart = tstart + my_tstart, + adj_tend = tend + my_tstart + ) + ) +} + +# calculate spacing between contigs so the ref and query take up the same x-axis +# space +get_padding <- function(paf) { + tlen_sum <- unique(paf, by = "tname")[, sum(tlen)] + tgaps_total <- paf[, length(unique(tname))] + + qlen_sum <- unique(paf, by = "qname")[, sum(qlen)] + qgaps_total <- paf[, length(unique(qname))] + + # the minumum gap is going to be one tenth of the x-axis space + if (tlen_sum > qlen_sum) { + min_gap <- (tlen_sum * gap_size) / tgaps_total + full_tlen <- tlen_sum + (tgaps_total * min_gap) + + total_qpadding <- full_tlen - qlen_sum + return( + c( + "t_padding" = min_gap, + "q_padding" = total_qpadding / qgaps_total + ) + ) + } else if (tlen_sum < qlen_sum) { + min_gap <- (qlen_sum * gap_size) / qgaps_total + full_qlen <- qlen_sum + (qgaps_total * min_gap) + + total_tpadding <- full_qlen - tlen_sum + + return( + c( + "t_padding" = total_tpadding / tgaps_total, + "q_padding" = min_gap + ) + ) + } else { + min_gap <- (tlen_sum * gap_size) / tgaps_total + return( + c( + "t_padding" = min_gap, + "q_padding" = min_gap + ) + ) + } +} + +# offsets for the adjusted coordinates +lookup_tstart <- function(x) { + return(unique(tstarts[tname == x, shift_tstart])) +} + + +lookup_qstart <- function(x) { + return(unique(qstarts[qname == x, shift_qstart])) +} + + +########### +# GLOBALS # +########### + +# PAF column spec +sort_columns <- c("tname", "tstart", "tend", "qname", "qstart", "qend") + +config_file <- args[1] +config <- yaml.load_file(config_file) +typed_config <- lapply(config, type.convert, as.is = TRUE) +list2env(typed_config, envir = .GlobalEnv) + +######## +# MAIN # +######## + +# read the data +agp <- fread(agp_file, fill = TRUE, skip = 2)[!V5 %in% c("N", "U")] +raw_paf <- read_paf(paf_file) +paf_dt <- data.table(raw_paf) + +# calculate spacing +padding <- get_padding(paf_dt[tp == "P" & nmatch >= min_nmatch]) + +# order the reference contigs +paf_dt[, tname := factor(tname, levels = gtools::mixedsort(unique(tname)))] +setkeyv(paf_dt, cols = sort_columns) + +# generate continuous reference coordinates +tpaf <- unique(paf_dt, by = "tname") +tpaf[, pad_tstart := shift(cumsum(tlen + padding[["t_padding"]]), 1, 0)] +tpaf[, shift_tstart := pad_tstart + (padding[["t_padding"]] / 2)] +tpaf[, pad_tend := shift_tstart + tlen] + +# order the query contigs by their position in the AGP file +query_order <- agp[, V6] + +query_paf <- paf_dt[qname %in% query_order & tp == "P" & nmatch >= min_nmatch] +subset_query_order <- query_order[query_order %in% query_paf[, qname]] + +# Map query contigs onto a universal x-scale +query_paf[, qname := factor(qname, levels = subset_query_order)] +setkeyv(query_paf, cols = sort_columns) + +qpaf <- unique(query_paf, by = "qname") +qpaf[, pad_qstart := shift(cumsum(qlen + padding[["q_padding"]]), 1, 0)] +qpaf[, shift_qstart := pad_qstart + (padding[["q_padding"]] / 2)] +qpaf[, pad_qend := shift_qstart + qlen] + +# generate offsets for the alignment records +tstarts <- unique(tpaf[, .(tname, shift_tstart)]) +qstarts <- unique(qpaf[, .(qname, shift_qstart)]) + +# adjust the alignment coordinates +paf_dt[, + c( + "adj_qstart", + "adj_qend", + "adj_tstart", + "adj_tend" + ) := adjust_coords(qname, qstart, qend, tname, tstart, tend), + by = .(qname, qstart, qend, tname, tstart, tend) +] + +# generate polygons. P is for primary alignments only +polygon_y_bump <- 0.017 # account for contig thickness +paf_polygons <- paf_dt[ + tp == "P" & nmatch >= min_nmatch, + .( + x = c(adj_tstart, adj_qstart, adj_qend, adj_tend), + y = c( + t_y + polygon_y_bump, + q_y - polygon_y_bump, + q_y - polygon_y_bump, + t_y + polygon_y_bump + ), + id = paste0("polygon", .GRP) + ), + by = .(qname, qstart, qend, tname, tstart, tend) +] + +# set up plot +total_height <- (q_y - t_y) * 1.618 +y_axis_space <- (total_height - (q_y - t_y)) / 2 +middle_x <- tpaf[1, shift_tstart] + tpaf[.N, pad_tend] / 2 + + + +all_contig_names <- c(tpaf[, unique(tname)]) +all_colours <- viridis( + length(all_contig_names) + palette_space + 1 +) +names(all_colours) <- c( + "query", + rep("blank", palette_space), + all_contig_names +) + +# Plot the ideogram with ribbons connecting the two sets of contigs +gp <- ggplot() + + theme_void(base_family = "Lato", base_size = 12) + + scale_fill_manual( + values = all_colours, guide = "none" + ) + + scale_colour_manual( + values = all_colours, guide = "none" + ) + + geom_polygon( + data = paf_polygons, + aes( + x = x, y = y, group = id, fill = tname + ), alpha = 0.5 + ) + + geom_segment( + data = tpaf, + aes( + x = shift_tstart, + xend = pad_tend, + colour = tname + ), + y = t_y, + linewidth = 5, + lineend = "butt" + ) + + geom_segment( + data = qpaf, + aes( + x = shift_qstart, + xend = pad_qend + ), + colour = all_colours[["query"]], + y = q_y, + linewidth = 5, + lineend = "butt" + ) + + ylim( + t_y - y_axis_space, + q_y + y_axis_space + ) + + annotate( + geom = "text", + label = "Query contigs", + x = middle_x, + y = q_y + (y_axis_space / 3), + hjust = 0.5, + vjust = 0.5 + ) + + annotate( + geom = "text", + label = "Reference contigs", + x = middle_x, + y = t_y - (y_axis_space / 3), + hjust = 0.5, + vjust = 0.5 + ) + +ggsave(plot_file, + gp, + width = plot_width, + height = plot_height, + units = "mm", + device = cairo_pdf +) + +sessionInfo()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/plot_ragtag_paf.xml Wed Sep 25 07:52:38 2024 +0000 @@ -0,0 +1,81 @@ +<tool id="plot_ragtag_paf" name="Plot RagTag output" version="0.0.1"> + <description>to compare query contigs to the reference</description> + <requirements> + <container type="docker">ghcr.io/tomharrop/r-containers:r2u_24.04_cv1</container> + <!-- The following requirements work but the fonts aren't configured + correctly --> + <!-- <requirement type="package" version="0.0.2">r-pafr</requirement> --> + <!-- <requirement type="package" version="0.4.2">r-viridislite</requirement> --> + <!-- <requirement type="package" version="1.15.4">r-data.table</requirement> --> + <!-- <requirement type="package" version="2.3.10">r-yaml</requirement> --> + <!-- <requirement type="package" version="3.5.1">r-ggplot2</requirement> --> + <!-- <requirement type="package" version="3.9.5">r-gtools</requirement> --> + </requirements> + <command detect_errors="exit_code"><![CDATA[ +Rscript +'${__tool_directory__}/plot_ragtag_paf.R' +'${plot_config}' + ]]></command> + <configfiles> + <configfile name="plot_config"> +agp_file: '${input_agp}' +paf_file: '${input_paf}' +plot_file: 'ragtag.pdf' +fontsize: '${plot_params.fontsize}' +gap_size: '${plot_params.gap_size}' +min_nmatch: '${min_nmatch}' +palette_space: '${plot_params.palette_space}' +plot_height: '${plot_params.plot_height}' +plot_width: '${plot_params.plot_width}' +q_y: '${plot_params.q_y}' +t_y: '${plot_params.t_y}' + </configfile> + </configfiles> + <inputs> + <param format="paf" name="input_paf" label="PAF output from RagTag" type="data"/> + <param format="agp" name="input_agp" label="AGP output from RagTag" type="data"/> + <param type="integer" name="min_nmatch" label="Minimum alignment length to include in plot" value="20000"/> + <section name="plot_params" title="Plot parameters"> + <param type="integer" name="plot_width" label="Plot width (mm)" value="254"/> + <param type="integer" name="plot_height" label="Plot height (mm)" value="191"/> + <param type="integer" name="fontsize" label="Fontsize (pt)" value="12"/> + <param type="integer" name="palette_space" label="Number of unused colours in the colour palette between the query contig colour and the colour of the first reference contig." help="Try a higher value if the query contigs look too similar to the reference contigs." value="4"/> + <param type="float" min="0" max="1" name="gap_size" label="Total length of gaps between contigs relative to the total assembly length." help="Try a higher value if the contigs look too close together." value="0.1"/> + <param type="integer" name="t_y" label="Position of reference contigs on the y-axis" value="1"/> + <param type="integer" name="q_y" label="Position of query contigs on the y-axis" value="2"/> + </section> + </inputs> + <outputs> + <data name="plot" format="pdf" label="${tool.name} on ${on_string}" from_work_dir="ragtag.pdf"/> + </outputs> + <tests> + <test expect_num_outputs="1"> + <param name="input_paf" ftype="paf.gz" value="ragtag.paf.gz"/> + <param name="input_agp" ftype="agp" value="ragtag.agp"/> + <output name="plot"> + <assert_contents> + <has_size size="8758" delta="1000" /> + </assert_contents> + </output> + </test> + </tests> + <help> + Accepts the PAF and AGP produced by the RagTag Scaffold and draws a plot + showing the alignment of the query contigs to the reference contigs. + + .. figure:: $PATH_TO_IMAGES/ragtag.png + :alt: RagTag plot + + </help> + <citations> + <citation type="bibtex"> + @misc{plot_ragtag_paf, + title = {plot_ragtag_paf}, + author = {Harrop, Tom}, + year = {2024}, + organization = {Galaxy Australia}, + url = {https://github.com/usegalaxy-au/tools-au}, + } + </citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ragtag.agp Wed Sep 25 07:52:38 2024 +0000 @@ -0,0 +1,158 @@ +## agp-version 2.1 +# AGP created by RagTag v2.1.0 +1_RagTag 1 243556 1 W contig_63 1 243556 - +1_RagTag 243557 243656 2 U 100 scaffold yes align_genus +1_RagTag 243657 769839 3 W contig_62 1 526183 + +1_RagTag 769840 769939 4 U 100 scaffold yes align_genus +1_RagTag 769940 1077133 5 W contig_100 1 307194 - +1_RagTag 1077134 1077233 6 U 100 scaffold yes align_genus +1_RagTag 1077234 1080142 7 W contig_80 1 2909 - +1_RagTag 1080143 1080242 8 U 100 scaffold yes align_genus +1_RagTag 1080243 1753612 9 W contig_60 1 673370 + +1_RagTag 1753613 1754048 10 N 436 scaffold yes align_genus +1_RagTag 1754049 2012384 11 W contig_16 1 258336 + +1_RagTag 2012385 2086929 12 N 74545 scaffold yes align_genus +1_RagTag 2086930 4693729 13 W contig_38 1 2606800 - +2_RagTag 1 12683 1 W contig_96 1 12683 - +2_RagTag 12684 12783 2 U 100 scaffold yes align_genus +2_RagTag 12784 1826104 3 W contig_30 1 1813321 + +2_RagTag 1826105 1862437 4 N 36333 scaffold yes align_genus +2_RagTag 1862438 1863805 5 W contig_11 1 1368 - +2_RagTag 1863806 1877480 6 N 13675 scaffold yes align_genus +2_RagTag 1877481 2472318 7 W contig_8 1 594838 - +2_RagTag 2472319 2472418 8 U 100 scaffold yes align_genus +2_RagTag 2472419 2480086 9 W contig_103 1 7668 - +2_RagTag 2480087 2480186 10 U 100 scaffold yes align_genus +2_RagTag 2480187 3401419 11 W contig_102 1 921233 - +2_RagTag 3401420 3401519 12 U 100 scaffold yes align_genus +2_RagTag 3401520 3987789 13 W contig_64 1 586270 + +2_RagTag 3987790 3987889 14 U 100 scaffold yes align_genus +2_RagTag 3987890 4003446 15 W contig_95 1 15557 + +2_RagTag 4003447 4003546 16 U 100 scaffold yes align_genus +2_RagTag 4003547 4302373 17 W contig_46 1 298827 - +2_RagTag 4302374 4302473 18 U 100 scaffold yes align_genus +2_RagTag 4302474 4416170 19 W contig_75 1 113697 + +2_RagTag 4416171 4435718 20 N 19548 scaffold yes align_genus +2_RagTag 4435719 4707751 21 W contig_93 1 272033 + +2_RagTag 4707752 4707851 22 U 100 scaffold yes align_genus +2_RagTag 4707852 4709556 23 W contig_131 1 1705 - +2_RagTag 4709557 4709656 24 U 100 scaffold yes align_genus +2_RagTag 4709657 4877510 25 W contig_83 1 167854 - +3_RagTag 1 1230166 1 W contig_67 1 1230166 - +3_RagTag 1230167 1251655 2 N 21489 scaffold yes align_genus +3_RagTag 1251656 1257278 3 W contig_135 1 5623 + +3_RagTag 1257279 1257378 4 U 100 scaffold yes align_genus +3_RagTag 1257379 1260195 5 W contig_105 1 2817 + +3_RagTag 1260196 1260295 6 U 100 scaffold yes align_genus +3_RagTag 1260296 4010002 7 W contig_73 1 2749707 - +3_RagTag 4010003 4017979 8 N 7977 scaffold yes align_genus +3_RagTag 4017980 4032481 9 W contig_78 1 14502 + +4_RagTag 1 421154 1 W contig_53 1 421154 + +4_RagTag 421155 421254 2 U 100 scaffold yes align_genus +4_RagTag 421255 428922 3 W contig_128 1 7668 + +4_RagTag 428923 429022 4 U 100 scaffold yes align_genus +4_RagTag 429023 1107160 5 W contig_48 1 678138 + +4_RagTag 1107161 1171788 6 N 64628 scaffold yes align_genus +4_RagTag 1171789 1397297 7 W contig_54 1 225509 + +4_RagTag 1397298 1397728 8 N 431 scaffold yes align_genus +4_RagTag 1397729 1457766 9 W contig_79 1 60038 + +4_RagTag 1457767 1457866 10 U 100 scaffold yes align_genus +4_RagTag 1457867 1563580 11 W contig_129 1 105714 - +4_RagTag 1563581 1563737 12 N 157 scaffold yes align_genus +4_RagTag 1563738 3581177 13 W contig_57 1 2017440 + +5_RagTag 1 30814 1 W contig_120 1 30814 + +5_RagTag 30815 75326 2 N 44512 scaffold yes align_genus +5_RagTag 75327 160509 3 W contig_108 1 85183 + +5_RagTag 160510 160609 4 U 100 scaffold yes align_genus +5_RagTag 160610 1269222 5 W contig_42 1 1108613 + +5_RagTag 1269223 1274879 6 N 5657 scaffold yes align_genus +5_RagTag 1274880 2139564 7 W contig_1 1 864685 - +5_RagTag 2139565 2139664 8 U 100 scaffold yes align_genus +5_RagTag 2139665 2455401 9 W contig_94 1 315737 + +5_RagTag 2455402 2455501 10 U 100 scaffold yes align_genus +5_RagTag 2455502 2458003 11 W contig_88 1 2502 - +5_RagTag 2458004 2458103 12 U 100 scaffold yes align_genus +5_RagTag 2458104 3882881 13 W contig_58 1 1424778 - +6_RagTag 1 1229661 1 W contig_59 1 1229661 - +6_RagTag 1229662 1229761 2 U 100 scaffold yes align_genus +6_RagTag 1229762 1237423 3 W contig_136 1 7662 - +6_RagTag 1237424 1263700 4 N 26277 scaffold yes align_genus +6_RagTag 1263701 1266598 5 W contig_25 1 2898 - +6_RagTag 1266599 1290831 6 N 24233 scaffold yes align_genus +6_RagTag 1290832 2200919 7 W contig_34 1 910088 - +6_RagTag 2200920 2201019 8 U 100 scaffold yes align_genus +6_RagTag 2201020 2255521 9 W contig_116 1 54502 - +6_RagTag 2255522 2255621 10 U 100 scaffold yes align_genus +6_RagTag 2255622 2295397 11 W contig_121 1 39776 + +6_RagTag 2295398 2295497 12 U 100 scaffold yes align_genus +6_RagTag 2295498 2348024 13 W contig_87 1 52527 + +6_RagTag 2348025 2348124 14 U 100 scaffold yes align_genus +6_RagTag 2348125 2924414 15 W contig_77 1 576290 + +6_RagTag 2924415 2924514 16 U 100 scaffold yes align_genus +6_RagTag 2924515 2946454 17 W contig_123 1 21940 - +6_RagTag 2946455 2946554 18 U 100 scaffold yes align_genus +6_RagTag 2946555 3673848 19 W contig_55 1 727294 - +6_RagTag 3673849 3673948 20 U 100 scaffold yes align_genus +6_RagTag 3673949 3844665 21 W contig_99 1 170717 + +7_RagTag 1 788918 1 W contig_44 1 788918 + +7_RagTag 788919 828815 2 N 39897 scaffold yes align_genus +7_RagTag 828816 1758265 3 W contig_56 1 929450 + +8_RagTag 1 495564 1 W contig_41 1 495564 + +8_RagTag 495565 495664 2 U 100 scaffold yes align_genus +8_RagTag 495665 500444 3 W contig_82 1 4780 + +8_RagTag 500445 500544 4 U 100 scaffold yes align_genus +8_RagTag 500545 742185 5 W contig_39 1 241641 + +8_RagTag 742186 742405 6 N 220 scaffold yes align_genus +8_RagTag 742406 773752 7 W contig_76 1 31347 - +8_RagTag 773753 773852 8 U 100 scaffold yes align_genus +8_RagTag 773853 976942 9 W contig_66 1 203090 + +8_RagTag 976943 977042 10 U 100 scaffold yes align_genus +8_RagTag 977043 1832718 11 W contig_61 1 855676 + +contig_101_RagTag 1 1302 1 W contig_101 1 1302 + +contig_104_RagTag 1 691 1 W contig_104 1 691 + +contig_106_RagTag 1 2823 1 W contig_106 1 2823 + +contig_109_RagTag 1 2768 1 W contig_109 1 2768 + +contig_110_RagTag 1 919 1 W contig_110 1 919 + +contig_112_RagTag 1 1174 1 W contig_112 1 1174 + +contig_113_RagTag 1 5303 1 W contig_113 1 5303 + +contig_114_RagTag 1 2947 1 W contig_114 1 2947 + +contig_115_RagTag 1 3079 1 W contig_115 1 3079 + +contig_117_RagTag 1 8274 1 W contig_117 1 8274 + +contig_119_RagTag 1 6118 1 W contig_119 1 6118 + +contig_122_RagTag 1 3904 1 W contig_122 1 3904 + +contig_124_RagTag 1 26365 1 W contig_124 1 26365 + +contig_126_RagTag 1 1109 1 W contig_126 1 1109 + +contig_127_RagTag 1 699 1 W contig_127 1 699 + +contig_130_RagTag 1 5634 1 W contig_130 1 5634 + +contig_132_RagTag 1 4033 1 W contig_132 1 4033 + +contig_134_RagTag 1 2622 1 W contig_134 1 2622 + +contig_137_RagTag 1 10758 1 W contig_137 1 10758 + +contig_140_RagTag 1 22358 1 W contig_140 1 22358 + +contig_142_RagTag 1 6894 1 W contig_142 1 6894 + +contig_144_RagTag 1 30504 1 W contig_144 1 30504 + +contig_145_RagTag 1 6073 1 W contig_145 1 6073 + +contig_17_RagTag 1 3562 1 W contig_17 1 3562 + +contig_18_RagTag 1 579 1 W contig_18 1 579 + +contig_2_RagTag 1 1966 1 W contig_2 1 1966 + +contig_24_RagTag 1 657 1 W contig_24 1 657 + +contig_26_RagTag 1 875 1 W contig_26 1 875 + +contig_27_RagTag 1 2290 1 W contig_27 1 2290 + +contig_28_RagTag 1 973 1 W contig_28 1 973 + +contig_3_RagTag 1 1885 1 W contig_3 1 1885 + +contig_33_RagTag 1 680 1 W contig_33 1 680 + +contig_36_RagTag 1 572 1 W contig_36 1 572 + +contig_37_RagTag 1 831 1 W contig_37 1 831 + +contig_40_RagTag 1 559 1 W contig_40 1 559 + +contig_45_RagTag 1 682 1 W contig_45 1 682 + +contig_50_RagTag 1 1846 1 W contig_50 1 1846 + +contig_51_RagTag 1 548 1 W contig_51 1 548 + +contig_6_RagTag 1 822 1 W contig_6 1 822 + +contig_68_RagTag 1 1319 1 W contig_68 1 1319 + +contig_69_RagTag 1 28525 1 W contig_69 1 28525 + +contig_7_RagTag 1 1050 1 W contig_7 1 1050 + +contig_71_RagTag 1 8337 1 W contig_71 1 8337 + +contig_72_RagTag 1 1700 1 W contig_72 1 1700 + +contig_74_RagTag 1 7707 1 W contig_74 1 7707 + +contig_81_RagTag 1 9978 1 W contig_81 1 9978 + +contig_9_RagTag 1 2661 1 W contig_9 1 2661 + +contig_97_RagTag 1 2205 1 W contig_97 1 2205 +