Mercurial > repos > recetox > rcx_boxplot
changeset 0:92325ed91115 draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools commit 68159a987b0597222625834e235441b95e8c3a5e
| author | recetox | 
|---|---|
| date | Mon, 03 Feb 2025 16:18:52 +0000 | 
| parents | |
| children | ebb0730f6175 | 
| files | help.xml macros.xml rcx_boxplot.xml test-data/test_data.txt test-data/test_expDesign.txt | 
| diffstat | 5 files changed, 295 insertions(+), 0 deletions(-) [+] | 
line wrap: on
 line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/help.xml Mon Feb 03 16:18:52 2025 +0000 @@ -0,0 +1,53 @@ +<macros> + +<token name="@GENERAL_HELP@"> +recetox-boxplot help +===================== + +Overview +-------- + +recetox-boxplot tool can be used to plot boxplots for the tabular data. On the input, a dataframe in tabular/csv/parquet format, containing only columns to be plotted (the pre-filtering can be achieved e.g. using the `Cut` Galaxy tool) is expected. If the data contains as the first column the rownames - meaning identificators, ProteinID, etc., please do set the `Does the first column of input table contain rownames?` to TRUE. + +Typically, a table where rows are features and columns are samples is expected - if one wishes to plot the boxplots for the features, we recommend to transpose the table beforehand. + +Sometimes, it is better to transform the data for the visualization (or processing) purposes (`Should the quantitative variable be transformed?`). If no transformation option is selected, the data will be plotted as it is. Otherwise, one can choose from replacing all zero values by NA, log2 transformation or log10 transformation. Please note, that NA values are omitted while plotting. + +`Plot the boxplots horizontally?` option means flipping the axes: a categorical variable (e.g. samples) would be on y-axis, whereas quantitative variable (e.g. intensity) would be on x-axis. This improves the legibility in case of larger datasets. + +It is possible to use also a different variable for the plotting and coloring - in that case, a metadata table (in a tabular format) can be supplied. The metadata table must contain a column which will map to the data table column names (e.g. SampleName). + +It is also possible to use faceting, meaning splitting the plot based on multiple variables. One can then choose which variable to split the x axis and y axis on. + +Example data table +------------------- + ++----------------------+-------------------+-----------------------+--------------------+ +| RowID | sample1 | sample2 | sample3 | ++======================+===================+=======================+====================+ +| 1 | 350.58 | 211.33 | 288.90 | ++----------------------+-------------------+-----------------------+--------------------+ +| 2 | 130.17 | 287.54 | 100.11 | ++----------------------+-------------------+-----------------------+--------------------+ +| 3 | 134.80 | 683.15 | 112.34 | ++----------------------+-------------------+-----------------------+--------------------+ +| 4 | 183.99 | 920.57 | 590.44 | ++----------------------+-------------------+-----------------------+--------------------+ +| ... | ... | ... | ... | ++----------------------+-------------------+-----------------------+--------------------+ + + +Example metadata table +----------------------- + ++----------------------+-------------------+-----------------------+--------------------+ +| SampleName | replicate | condition | batch | ++======================+===================+=======================+====================+ +| sample1 | 1 | control | A | ++----------------------+-------------------+-----------------------+--------------------+ +| sample2 | 1 | treatment | A | ++----------------------+-------------------+-----------------------+--------------------+ +| sample3 | 2 | treatment | A | ++----------------------+-------------------+-----------------------+--------------------+ +</token> +</macros> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Mon Feb 03 16:18:52 2025 +0000 @@ -0,0 +1,66 @@ +<macros> + <token name="@TOOL_VERSION@">3.5.1</token> + <xml name="creator"> + <creator> + <person + givenName="Kristina" + familyName="Gomoryova" + url="https://github.com/KristinaGomoryova" + identifier="0000-0003-4407-3917" /> + <person + givenName="Helge" + familyName="Hecht" + url="https://github.com/hechth" + identifier="0000-0001-6744-996X" /> + <organization + url="https://www.recetox.muni.cz/" + email="GalaxyToolsDevelopmentandDeployment@space.muni.cz" + name="RECETOX MUNI" /> + </creator> + </xml> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">r-ggplot2</requirement> + <requirement type="package" version="1.3.1">r-tidyr</requirement> + <requirement type="package" version="19.0.0">r-arrow</requirement> + <requirement type="package" version="1.1.5">r-rlang</requirement> + </requirements> + </xml> + + <xml name="boxplot_param"> + <param name="input_data" type="data" format="csv,tsv,txt,tabular,parquet" label="Input table" help= "Input file in a tabular/tsv/csv/parquet format"/> + <param name="has_rownames" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="Does the first column of input table contain rownames?" help="Whether the first column of the input data table identifies the rownames (e.g. proteinID) - i.e. it is not a part of data matrix to be plotted."/> + <param name="transform_data" type="select" display="radio" label="Should the quantitative variable be transformed?" optional="false" help="Whether to transform the quantitative variable (e.g. intensity, counts, etc.)"> + <option value="none" selected="true">No transformation</option> + <option value="replace_zero">Replace zeroes with NA values</option> + <option value="log2">Log2 transformation</option> + <option value="log10">Log10 transformation</option> + </param> + <param name="flip_axes" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="Plot the boxplots horizontally? (flip the axes)" help="Whether to flip the axes, so the boxplots will be horizontal instead of vertical."/> + <conditional name="grouping_boxplot"> + <param type="select" name="use_grouping" label="Plot boxplot based on a column from the metadata table?" help="Whether to base the boxplot on a different variable than column names (usually corresponding to the samples) from the input table."> + <option value="no" selected="true">no</option> + <option value="yes">yes</option> + </param> + <when value="yes"> + <param name="input_metadata" type="data" format="tabular" label="Input metadata table" help= "Input metadata file in a tabular format"/> + <param name="sampleID" type="data_column" data_ref="input_metadata" use_header_names="true" label="Sample identification column in metadata table" help="Column containing sample names - it should correspond to the colNames in the data table."/> + <param name="groupingCol" type="data_column" data_ref="input_metadata" use_header_names="true" label="Which variable column to plot on the x-axis?" help="Which column from the metadata table should be plotted on x axis?"/> + <param name="colorCol" type="data_column" data_ref="input_metadata" use_header_names="true" label="Color the boxplot based on a variable?" help="Which column from the metadata table should be used for coloring?" optional = "true"/> + <param name="facet_x" type="data_column" data_ref="input_metadata" use_header_names="true" label="Column to use as facet on x-axis" optional="true" help="If using faceting, which column should be plotted on x-axis? Default 'Nothing selected' means no faceting will be done on x-axis."/> + <param name="facet_y" type="data_column" data_ref="input_metadata" use_header_names="true" label="Column to use as facet on y-axis" optional="true" help="If using faceting, which column should be plotted on y-axis? Default 'Nothing selected' means no faceting will be done on y-axis."/> + </when> + <when value="no"/> + </conditional> + <param name="xlab" type="text" label="Label for the x axis" optional="true"/> + <param name="ylab" type="text" label="Label for the y axis" optional="true"/> + <param name="export_R_script" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="Export the R script to reproduce the analysis" + help="Check this box to export the script executed in the Galaxy tool as an R file to be able to reproduce the same processing offline. Not that in this case, the file paths need to be altered and all the dependencies have to be managed manually."/> + </xml> + + <xml name="citations"> + <citations> + <citation type="doi">https://doi.org/10.1007/978-0-387-98141-3</citation> + </citations> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rcx_boxplot.xml Mon Feb 03 16:18:52 2025 +0000 @@ -0,0 +1,151 @@ +<tool id="rcx_boxplot" name="recetox-boxplot" version="@TOOL_VERSION@+galaxy0" profile="23.0"> + <description>Boxplot visualization tool using ggplot2</description> + <macros> + <import>macros.xml</import> + <import>help.xml</import> + </macros> + + <expand macro="creator" /> + <expand macro="requirements" /> + + <command detect_errors="exit_code"><![CDATA[ + Rscript '${run_script}' + #if $export_R_script + && cat ${run_script} >> $script + #end if + ]]></command> + + <configfiles> + <configfile name="run_script"><![CDATA[ + + #if $input_data.ext == "csv" + data_input <- read.csv("$input_data", check.names = "false") + #else if $input_data.ext in ["tsv", "txt", "tabular"] + data_input <- read.delim("$input_data", sep="\t", check.names = "false") + #else if $input_data.ext == "parquet" + data_input <- arrow::read_parquet("$input_data") + #end if + + #if $has_rownames + rownames(data_input) <- data_input[, 1] + data_input <- data_input[ ,-1] + #end if + + y_colname <- "intensity" + data_long <- tidyr::pivot_longer(data_input, + cols = c(1:ncol(data_input)), + names_to = "samples", + values_to = y_colname) + + #if $transform_data == "replace_zero" + data_long[data_long == 0] <- NA + #else if $transform_data == "log2" + data_long[[y_colname]] <- log2(data_long[[y_colname]]) + #else if $transform_data == "log10" + data_long[[y_colname]] <- log10(data_long[[y_colname]]) + #end if + + #if $grouping_boxplot.use_grouping == "yes" + metadata_input <- read.delim("$grouping_boxplot.input_metadata", sep="\t", check.names = "false") + sampleID_column <- colnames(metadata_input)[$grouping_boxplot.sampleID] + plotting_column <- colnames(metadata_input)[$grouping_boxplot.groupingCol] + metadata_input <- data.frame(lapply(metadata_input, as.factor)) + + data_long <- dplyr::left_join(data_long, metadata_input, by = c("samples" = sampleID_column), keep = TRUE) + + #if $grouping_boxplot.facet_x + facet_x <- rlang::sym(colnames(metadata_input)[$grouping_boxplot.facet_x]) + #else + facet_x <- NULL + #end if + + #if $grouping_boxplot.facet_y + facet_y <- rlang::sym(colnames(metadata_input)[$grouping_boxplot.facet_y]) + #else + facet_y <- NULL + #end if + + plot_boxplot <- ggplot2::ggplot(data_long, ggplot2::aes( + x = !!rlang::sym(plotting_column), + y = intensity, + #if $grouping_boxplot.colorCol + fill = !!rlang::sym(colnames(metadata_input)[$grouping_boxplot.colorCol]) + #end if + )) + + ggplot2::geom_boxplot() + + ggplot2::theme_bw()+ + ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust=1)) + + ggplot2::facet_grid(rows = if(!is.null(facet_y)) dplyr::vars(!!facet_y) else NULL, + cols = if(!is.null(facet_x)) dplyr::vars(!!facet_x) else NULL, + scales = "free") + + + #else + + plot_boxplot <- ggplot2::ggplot(data_long, ggplot2::aes(x = samples, y = intensity)) + + ggplot2::geom_boxplot() + + ggplot2::theme_bw()+ + ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust=1)) + + #end if + + #if $xlab + plot_boxplot <- plot_boxplot + ggplot2::xlab("$xlab") + #end if + + #if $ylab + plot_boxplot <- plot_boxplot + ggplot2::ylab("$ylab") + #end if + + #if $flip_axes == "true" + plot_boxplot <- plot_boxplot + ggplot2::coord_flip() + #end if + + ggplot2::ggsave(filename = "boxplot.png", plot_boxplot) + + ]]></configfile> + </configfiles> + + <inputs> + <expand macro="boxplot_param"/> + </inputs> + + <outputs> + <data name="boxplot" format="png" label="Boxplot on ${on_string}" from_work_dir="boxplot.png"/> + <data name="script" format="txt" label="R script"> + <filter>export_R_script</filter> + </data> + </outputs> + + <tests> + <test expect_num_outputs="1"> + <param name="input_data" value="test_data.txt"/> + <param name="has_rownames" value="true"/> + <output name="boxplot" ftype="png"> + <assert_contents> + <has_size size="1164615" delta="200"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="2"> + <param name="input_data" value="test_data.txt"/> + <param name="has_rownames" value="true"/> + <param name="use_grouping" value="yes"/> + <param name="input_metadata" value="test_expDesign.txt"/> + <param name="sampleID" value="1"/> + <param name="groupingCol" value="1"/> + <param name="export_R_script" value="TRUE"/> + <output name="boxplot" ftype="png"> + <assert_contents> + <has_size size="1164615" delta="200"/> + </assert_contents> + </output> + </test> + </tests> + + <help><![CDATA[ + @GENERAL_HELP@ + ]]></help> + + <expand macro="citations" /> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_data.txt Mon Feb 03 16:18:52 2025 +0000 @@ -0,0 +1,15 @@ +RowID File_1 File_2 File_3 File_4 File_5 File_6 File_7 File_8 File_9 +12 95.97218086 42.13959786 6.11506479 30.15646347 94.23814082 57.36969732 60.86347654 19.91409937 68.71355704 +14 59.66064371 64.93522126 32.32890474 40.14978233 2.891311477 32.96107785 66.75097997 0.920440008 24.22636581 +5 38.66154396 36.42452677 84.36958354 10.69964404 39.46448065 77.47277323 32.52771639 97.57285906 93.70459001 +36 32.97909046 43.52631758 59.83839323 69.87436805 98.12738555 4.751636512 47.47689026 41.3876903 67.78716531 +89 83.25413765 7.549367861 20.50592254 0.349459671 73.89946997 31.40388713 4.366449277 38.84340215 95.44116332 +965 71.74909204 40.82129731 21.73719426 32.67826786 93.7897543 92.54811762 57.16246361 98.97950241 38.61134734 +11 40.10030753 0.619259961 89.65946872 82.98278412 5.022064589 35.6513286 91.42743566 1.293400215 95.96990548 +2 82.35962922 25.48196256 47.51041934 71.45794006 90.23170047 34.52642435 88.0453423 63.2896977 28.82679247 +456 69.23813955 53.77332434 84.61367461 51.17705197 69.40525804 79.68268428 4.526957668 46.81700494 80.93458495 +68 88.78897848 85.84282896 51.53708603 71.26727107 94.03802875 77.02830433 50.12587615 30.93374275 15.06805862 +90 15.55940741 75.14719125 78.0377742 51.71345723 59.47299196 79.63191036 86.12417779 66.88062611 11.96856299 +23 51.55755631 26.99985192 85.64996749 74.54372275 97.09925394 47.50634659 43.94265016 60.73243208 89.94399967 +44 38.5549221 96.81614325 99.76966908 75.41077107 75.34246484 83.57435092 73.67006881 81.33326567 37.3100164 +567 72.74969753 17.0015972 64.01806032 37.71428705 41.30061893 84.5375675 44.21028268 9.076252458 50.67516692
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_expDesign.txt Mon Feb 03 16:18:52 2025 +0000 @@ -0,0 +1,10 @@ +SampleID Condition Replicate Batch +File_1 control 1 1 +File_2 control 2 1 +File_3 control 3 1 +File_4 treatment_A 1 1 +File_5 treatment_A 2 1 +File_6 treatment_A 3 2 +File_7 treatment_B 1 2 +File_8 treatment_B 2 2 +File_9 treatment_B 3 2
