Mercurial > repos > galaxy-australia > plot_ragtag_paf
changeset 1:94e0834ae4ed draft
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit c0fbce102933ff20902a65795000339a8314d12e
author | galaxy-australia |
---|---|
date | Mon, 14 Oct 2024 02:33:22 +0000 |
parents | 664d63b0e817 |
children | c1bed78b9ed5 |
files | plot_ragtag_paf.R plot_ragtag_paf.xml |
diffstat | 2 files changed, 145 insertions(+), 34 deletions(-) [+] |
line wrap: on
line diff
--- a/plot_ragtag_paf.R Wed Sep 25 07:52:38 2024 +0000 +++ b/plot_ragtag_paf.R Mon Oct 14 02:33:22 2024 +0000 @@ -98,20 +98,35 @@ sort_columns <- c("tname", "tstart", "tend", "qname", "qstart", "qend") config_file <- args[1] -config <- yaml.load_file(config_file) -typed_config <- lapply(config, type.convert, as.is = TRUE) -list2env(typed_config, envir = .GlobalEnv) + +# fixed plotting paramaters +t_y <- 1 +q_y <- 2 ######## # MAIN # ######## +# process the config +message("Reading the plot config") +config <- yaml.load_file(config_file) +typed_config <- lapply(config, type.convert, as.is = TRUE) +invisible( + list2env( + typed_config, + envir = .GlobalEnv + ) +) + + # read the data +message("Reading the plot data") agp <- fread(agp_file, fill = TRUE, skip = 2)[!V5 %in% c("N", "U")] raw_paf <- read_paf(paf_file) paf_dt <- data.table(raw_paf) # calculate spacing +message("Calculating spacing between contigs") padding <- get_padding(paf_dt[tp == "P" & nmatch >= min_nmatch]) # order the reference contigs @@ -119,6 +134,7 @@ setkeyv(paf_dt, cols = sort_columns) # generate continuous reference coordinates +message("Generating coordinates for contigs") tpaf <- unique(paf_dt, by = "tname") tpaf[, pad_tstart := shift(cumsum(tlen + padding[["t_padding"]]), 1, 0)] tpaf[, shift_tstart := pad_tstart + (padding[["t_padding"]] / 2)] @@ -140,6 +156,7 @@ qpaf[, pad_qend := shift_qstart + qlen] # generate offsets for the alignment records +message("Generating coordinates for alignments") tstarts <- unique(tpaf[, .(tname, shift_tstart)]) qstarts <- unique(qpaf[, .(qname, shift_qstart)]) @@ -155,6 +172,7 @@ ] # generate polygons. P is for primary alignments only +message("Generating polygons for alignments") polygon_y_bump <- 0.017 # account for contig thickness paf_polygons <- paf_dt[ tp == "P" & nmatch >= min_nmatch, @@ -176,8 +194,6 @@ y_axis_space <- (total_height - (q_y - t_y)) / 2 middle_x <- tpaf[1, shift_tstart] + tpaf[.N, pad_tend] / 2 - - all_contig_names <- c(tpaf[, unique(tname)]) all_colours <- viridis( length(all_contig_names) + palette_space + 1 @@ -188,9 +204,12 @@ all_contig_names ) -# Plot the ideogram with ribbons connecting the two sets of contigs +# Plot the ideogram with ribbons connecting the two sets of contigs. To +# accomodate upside plots we have to draw the polygons first then add the +# contigs later +message("Plotting") gp <- ggplot() + - theme_void(base_family = "Lato", base_size = 12) + + theme_void(base_family = "Lato", base_size = fontsize) + scale_fill_manual( values = all_colours, guide = "none" ) + @@ -207,10 +226,10 @@ data = tpaf, aes( x = shift_tstart, + y = t_y, xend = pad_tend, colour = tname ), - y = t_y, linewidth = 5, lineend = "butt" ) + @@ -218,22 +237,21 @@ data = qpaf, aes( x = shift_qstart, + y = q_y, xend = pad_qend ), colour = all_colours[["query"]], - y = q_y, linewidth = 5, lineend = "butt" ) + - ylim( - t_y - y_axis_space, - q_y + y_axis_space - ) + annotate( geom = "text", label = "Query contigs", x = middle_x, - y = q_y + (y_axis_space / 3), + y = ifelse(label_query_contigs == TRUE, + q_y + (y_axis_space * 2 / 3), + q_y + (y_axis_space / 3) + ), hjust = 0.5, vjust = 0.5 ) + @@ -241,11 +259,61 @@ geom = "text", label = "Reference contigs", x = middle_x, - y = t_y - (y_axis_space / 3), + y = ifelse(label_ref_contigs == TRUE, + t_y - (y_axis_space * 2 / 3), + t_y - (y_axis_space / 3) + ), hjust = 0.5, vjust = 0.5 ) +# Reverse the scales and the limits to plot reference on top +if (upside_down == TRUE) { + gp <- gp + scale_y_continuous( + limits = c( + q_y + y_axis_space, + t_y - y_axis_space + ), + transform = "reverse" + ) +} else { + gp <- gp + scale_y_continuous( + limits = c( + t_y - y_axis_space, + q_y + y_axis_space + ) + ) +} + +# reference contig names +if (label_ref_contigs == TRUE) { + gp <- gp + geom_text( + data = tpaf, + aes( + x = (shift_tstart + pad_tend) / 2, + y = t_y - (2.5 * polygon_y_bump), + label = tname + ), + angle = 30, + hjust = ifelse(upside_down == TRUE, 0, 1) + ) +} + +# query contig names +if (label_query_contigs == TRUE) { + gp <- gp + geom_text( + data = qpaf, + aes( + x = (shift_qstart + pad_qend) / 2, + y = q_y + (2.5 * polygon_y_bump), + label = qname + ), + angle = 30, + hjust = ifelse(upside_down == TRUE, 1, 0), + ) +} + +message("Writing the plot to file") ggsave(plot_file, gp, width = plot_width, @@ -254,4 +322,8 @@ device = cairo_pdf ) +# Print session info to stderr +message("\nsessionInfo():\n") +sink(stderr()) sessionInfo() +sink()
--- a/plot_ragtag_paf.xml Wed Sep 25 07:52:38 2024 +0000 +++ b/plot_ragtag_paf.xml Mon Oct 14 02:33:22 2024 +0000 @@ -1,4 +1,4 @@ -<tool id="plot_ragtag_paf" name="Plot RagTag output" version="0.0.1"> +<tool id="plot_ragtag_paf" name="Plot RagTag output" version="0.0.2" profile="24.1" license="MIT"> <description>to compare query contigs to the reference</description> <requirements> <container type="docker">ghcr.io/tomharrop/r-containers:r2u_24.04_cv1</container> @@ -12,37 +12,54 @@ <!-- <requirement type="package" version="3.9.5">r-gtools</requirement> --> </requirements> <command detect_errors="exit_code"><![CDATA[ -Rscript -'${__tool_directory__}/plot_ragtag_paf.R' -'${plot_config}' - ]]></command> + + ## Suppress R warnings about container + if [ -f /etc/R/Rprofile.site ]; then + sed -i + '/options(bspm.version.check=FALSE)/i options(bspm.sudo = TRUE)' + /etc/R/Rprofile.site ; + fi ; + + ## Print the plot config to stderr + sed -e 's/^[ \t]*//' -e '/^$/d' '${plot_config}' > config.yaml && + echo "PLOT CONFIG:" >&2 && + cat config.yaml >&2 && + + ## Draw the plot + printf '\n%s\n' "Running plot_ragtag_paf.R" >&2 && + Rscript + '${__tool_directory__}/plot_ragtag_paf.R' + config.yaml + ]]></command> <configfiles> <configfile name="plot_config"> -agp_file: '${input_agp}' -paf_file: '${input_paf}' -plot_file: 'ragtag.pdf' -fontsize: '${plot_params.fontsize}' -gap_size: '${plot_params.gap_size}' -min_nmatch: '${min_nmatch}' -palette_space: '${plot_params.palette_space}' -plot_height: '${plot_params.plot_height}' -plot_width: '${plot_params.plot_width}' -q_y: '${plot_params.q_y}' -t_y: '${plot_params.t_y}' + agp_file: '${input_agp}' + paf_file: '${input_paf}' + plot_file: 'ragtag.pdf' + fontsize: '${plot_params.fontsize}' + gap_size: '${plot_params.gap_size}' + min_nmatch: '${min_nmatch}' + palette_space: '${plot_params.palette_space}' + plot_height: '${plot_params.plot_height}' + plot_width: '${plot_params.plot_width}' + label_ref_contigs: '${plot_params.label_ref_contigs}' + label_query_contigs: '${plot_params.label_query_contigs}' + upside_down: '${upside_down}' </configfile> </configfiles> <inputs> <param format="paf" name="input_paf" label="PAF output from RagTag" type="data"/> <param format="agp" name="input_agp" label="AGP output from RagTag" type="data"/> <param type="integer" name="min_nmatch" label="Minimum alignment length to include in plot" value="20000"/> + <param type="boolean" name="upside_down" label="Plot the reference contigs on top" value="false" truevalue="TRUE" falsevalue="FALSE"/> <section name="plot_params" title="Plot parameters"> <param type="integer" name="plot_width" label="Plot width (mm)" value="254"/> <param type="integer" name="plot_height" label="Plot height (mm)" value="191"/> <param type="integer" name="fontsize" label="Fontsize (pt)" value="12"/> + <param type="boolean" name="label_ref_contigs" label="Print the names of the reference contigs" value="false" truevalue="TRUE" falsevalue="FALSE"/> + <param type="boolean" name="label_query_contigs" label="Print the names of the query contigs" value="false" truevalue="TRUE" falsevalue="FALSE"/> <param type="integer" name="palette_space" label="Number of unused colours in the colour palette between the query contig colour and the colour of the first reference contig." help="Try a higher value if the query contigs look too similar to the reference contigs." value="4"/> <param type="float" min="0" max="1" name="gap_size" label="Total length of gaps between contigs relative to the total assembly length." help="Try a higher value if the contigs look too close together." value="0.1"/> - <param type="integer" name="t_y" label="Position of reference contigs on the y-axis" value="1"/> - <param type="integer" name="q_y" label="Position of query contigs on the y-axis" value="2"/> </section> </inputs> <outputs> @@ -54,7 +71,29 @@ <param name="input_agp" ftype="agp" value="ragtag.agp"/> <output name="plot"> <assert_contents> - <has_size size="8758" delta="1000" /> + <has_size size="8758" delta="1000"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <param name="input_paf" ftype="paf.gz" value="ragtag.paf.gz"/> + <param name="input_agp" ftype="agp" value="ragtag.agp"/> + <param name="upside_down" value="true"/> + <output name="plot"> + <assert_contents> + <has_size size="8756" delta="1000"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <param name="input_paf" ftype="paf.gz" value="ragtag.paf.gz"/> + <param name="input_agp" ftype="agp" value="ragtag.agp"/> + <param name="upside_down" value="true"/> + <param name="label_query_contigs" value="true"/> + <param name="label_ref_contigs" value="true"/> + <output name="plot"> + <assert_contents> + <has_size size="11159" delta="1000"/> </assert_contents> </output> </test>