changeset 0:92325ed91115 draft default tip

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools commit 68159a987b0597222625834e235441b95e8c3a5e
author recetox
date Mon, 03 Feb 2025 16:18:52 +0000
parents
children
files help.xml macros.xml rcx_boxplot.xml test-data/test_data.txt test-data/test_expDesign.txt
diffstat 5 files changed, 295 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/help.xml	Mon Feb 03 16:18:52 2025 +0000
@@ -0,0 +1,53 @@
+<macros>
+ 
+<token name="@GENERAL_HELP@">
+recetox-boxplot help
+=====================
+
+Overview
+--------
+
+recetox-boxplot tool can be used to plot boxplots for the tabular data. On the input, a dataframe in tabular/csv/parquet format, containing only columns to be plotted (the pre-filtering can be achieved e.g. using the `Cut` Galaxy tool) is expected. If the data contains as the first column the rownames - meaning identificators, ProteinID, etc., please do set the `Does the first column of input table contain rownames?` to TRUE.  
+
+Typically, a table where rows are features and columns are samples is expected - if one wishes to plot the boxplots for the features, we recommend to transpose the table beforehand.
+
+Sometimes, it is better to transform the data for the visualization (or processing) purposes (`Should the quantitative variable be transformed?`). If no transformation option is selected, the data will be plotted as it is. Otherwise, one can choose from replacing all zero values by NA, log2 transformation or log10 transformation. Please note, that NA values are omitted while plotting.
+
+`Plot the boxplots horizontally?` option means flipping the axes: a categorical variable (e.g. samples) would be on y-axis, whereas quantitative variable (e.g. intensity) would be on x-axis. This improves the legibility in case of larger datasets.
+
+It is possible to use also a different variable for the plotting and coloring - in that case, a metadata table (in a tabular format) can be supplied. The metadata table must contain a column which will map to the data table column names (e.g. SampleName). 
+
+It is also possible to use faceting, meaning splitting the plot based on multiple variables. One can then choose which variable to split the x axis and y axis on.
+
+Example data table
+-------------------
+
++----------------------+-------------------+-----------------------+--------------------+
+| RowID                |    sample1        |    sample2            |    sample3         |
++======================+===================+=======================+====================+
+| 1                    |    350.58         |    211.33             |    288.90          |
++----------------------+-------------------+-----------------------+--------------------+
+| 2                    |    130.17         |    287.54             |    100.11          |
++----------------------+-------------------+-----------------------+--------------------+
+| 3                    |    134.80         |    683.15             |    112.34          |
++----------------------+-------------------+-----------------------+--------------------+
+| 4                    |    183.99         |    920.57             |    590.44          |
++----------------------+-------------------+-----------------------+--------------------+
+| ...                  |    ...            |    ...                |    ...             |
++----------------------+-------------------+-----------------------+--------------------+
+
+
+Example metadata table
+-----------------------
+
++----------------------+-------------------+-----------------------+--------------------+
+| SampleName           |    replicate      |    condition          |    batch           |
++======================+===================+=======================+====================+
+| sample1              |    1              |    control            |    A               |
++----------------------+-------------------+-----------------------+--------------------+
+| sample2              |    1              |    treatment          |    A               |
++----------------------+-------------------+-----------------------+--------------------+
+| sample3              |    2              |    treatment          |    A               |
++----------------------+-------------------+-----------------------+--------------------+
+</token>
+</macros>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Mon Feb 03 16:18:52 2025 +0000
@@ -0,0 +1,66 @@
+<macros>
+    <token name="@TOOL_VERSION@">3.5.1</token>
+    <xml name="creator">
+        <creator>
+            <person
+                givenName="Kristina"
+                familyName="Gomoryova"
+                url="https://github.com/KristinaGomoryova"
+                identifier="0000-0003-4407-3917" />
+            <person
+                givenName="Helge"
+                familyName="Hecht"
+                url="https://github.com/hechth"
+                identifier="0000-0001-6744-996X" />
+            <organization
+                url="https://www.recetox.muni.cz/"
+                email="GalaxyToolsDevelopmentandDeployment@space.muni.cz"
+                name="RECETOX MUNI" />
+        </creator>
+    </xml>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">r-ggplot2</requirement>
+            <requirement type="package" version="1.3.1">r-tidyr</requirement>
+            <requirement type="package" version="19.0.0">r-arrow</requirement>
+            <requirement type="package" version="1.1.5">r-rlang</requirement>
+        </requirements>
+    </xml>
+
+    <xml name="boxplot_param">
+       <param name="input_data" type="data" format="csv,tsv,txt,tabular,parquet" label="Input table" help= "Input file in a tabular/tsv/csv/parquet format"/>
+        <param name="has_rownames" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="Does the first column of input table contain rownames?" help="Whether the first column of the input data table identifies the rownames (e.g. proteinID) - i.e. it is not a part of data matrix to be plotted."/>
+        <param name="transform_data" type="select" display="radio" label="Should the quantitative variable be transformed?" optional="false" help="Whether to transform the quantitative variable (e.g. intensity, counts, etc.)">
+                <option value="none" selected="true">No transformation</option>
+                <option value="replace_zero">Replace zeroes with NA values</option>
+                <option value="log2">Log2 transformation</option> 
+                <option value="log10">Log10 transformation</option> 
+        </param>
+        <param name="flip_axes" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="Plot the boxplots horizontally? (flip the axes)" help="Whether to flip the axes, so the boxplots will be horizontal instead of vertical."/> 
+        <conditional name="grouping_boxplot">
+                <param type="select" name="use_grouping" label="Plot boxplot based on a column from the metadata table?" help="Whether to base the boxplot on a different variable than column names (usually corresponding to the samples) from the input table.">
+                    <option value="no" selected="true">no</option> 
+                    <option value="yes">yes</option>
+                </param>     
+                <when value="yes">
+                    <param name="input_metadata" type="data" format="tabular" label="Input metadata table" help= "Input metadata file in a tabular format"/>
+                    <param name="sampleID" type="data_column" data_ref="input_metadata" use_header_names="true" label="Sample identification column in metadata table" help="Column containing sample names - it should correspond to the colNames in the data table."/>
+                    <param name="groupingCol" type="data_column" data_ref="input_metadata" use_header_names="true" label="Which variable column to plot on the x-axis?" help="Which column from the metadata table should be plotted on x axis?"/>
+                    <param name="colorCol" type="data_column" data_ref="input_metadata" use_header_names="true" label="Color the boxplot based on a variable?" help="Which column from the metadata table should be used for coloring?" optional = "true"/>
+                    <param name="facet_x" type="data_column" data_ref="input_metadata" use_header_names="true" label="Column to use as facet on x-axis" optional="true" help="If using faceting, which column should be plotted on x-axis? Default 'Nothing selected' means no faceting will be done on x-axis."/>
+                    <param name="facet_y" type="data_column" data_ref="input_metadata" use_header_names="true" label="Column to use as facet on y-axis" optional="true" help="If using faceting, which column should be plotted on y-axis? Default 'Nothing selected' means no faceting will be done on y-axis."/>
+                </when>
+                <when value="no"/>
+        </conditional>  
+        <param name="xlab" type="text" label="Label for the x axis" optional="true"/>
+        <param name="ylab" type="text" label="Label for the y axis"  optional="true"/>
+        <param name="export_R_script" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="Export the R script to reproduce the analysis"
+                help="Check this box to export the script executed in the Galaxy tool as an R file to be able to reproduce the same processing offline. Not that in this case, the file paths need to be altered and all the dependencies have to be managed manually."/>
+    </xml>
+
+    <xml name="citations">
+        <citations>
+            <citation type="doi">https://doi.org/10.1007/978-0-387-98141-3</citation>
+        </citations>        
+    </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rcx_boxplot.xml	Mon Feb 03 16:18:52 2025 +0000
@@ -0,0 +1,151 @@
+<tool id="rcx_boxplot" name="recetox-boxplot" version="@TOOL_VERSION@+galaxy0" profile="23.0">
+    <description>Boxplot visualization tool using ggplot2</description>
+    <macros>
+        <import>macros.xml</import>
+        <import>help.xml</import>
+    </macros>
+
+    <expand macro="creator" />
+    <expand macro="requirements" />
+
+    <command detect_errors="exit_code"><![CDATA[
+        Rscript '${run_script}'
+        #if $export_R_script
+        && cat ${run_script} >> $script
+        #end if
+    ]]></command>
+
+    <configfiles>
+        <configfile name="run_script"><![CDATA[
+
+        #if $input_data.ext == "csv"
+        data_input <- read.csv("$input_data", check.names = "false")
+        #else if $input_data.ext in ["tsv", "txt", "tabular"]
+        data_input <- read.delim("$input_data", sep="\t", check.names = "false")
+        #else if $input_data.ext == "parquet"
+        data_input <- arrow::read_parquet("$input_data")
+        #end if
+
+        #if $has_rownames
+        rownames(data_input) <- data_input[, 1]
+        data_input <- data_input[ ,-1]
+        #end if
+
+        y_colname <- "intensity"
+        data_long <- tidyr::pivot_longer(data_input,
+                                         cols = c(1:ncol(data_input)),
+                                         names_to = "samples",
+                                         values_to = y_colname)
+
+        #if $transform_data == "replace_zero"
+        data_long[data_long == 0] <- NA
+        #else if $transform_data == "log2"
+        data_long[[y_colname]] <- log2(data_long[[y_colname]])
+        #else if $transform_data == "log10"
+        data_long[[y_colname]] <- log10(data_long[[y_colname]])        
+        #end if
+
+        #if $grouping_boxplot.use_grouping == "yes"
+        metadata_input <- read.delim("$grouping_boxplot.input_metadata", sep="\t",  check.names = "false")
+        sampleID_column <- colnames(metadata_input)[$grouping_boxplot.sampleID]
+        plotting_column <- colnames(metadata_input)[$grouping_boxplot.groupingCol]
+        metadata_input <- data.frame(lapply(metadata_input, as.factor))
+
+        data_long <- dplyr::left_join(data_long, metadata_input, by = c("samples" = sampleID_column), keep = TRUE)
+
+        #if $grouping_boxplot.facet_x
+        facet_x <- rlang::sym(colnames(metadata_input)[$grouping_boxplot.facet_x])
+        #else 
+        facet_x <- NULL
+        #end if
+ 
+        #if $grouping_boxplot.facet_y
+        facet_y <- rlang::sym(colnames(metadata_input)[$grouping_boxplot.facet_y])
+        #else
+        facet_y <- NULL
+        #end if
+
+        plot_boxplot <- ggplot2::ggplot(data_long, ggplot2::aes(
+            x = !!rlang::sym(plotting_column),
+            y = intensity,
+            #if $grouping_boxplot.colorCol
+            fill = !!rlang::sym(colnames(metadata_input)[$grouping_boxplot.colorCol])
+            #end if
+        )) +
+                                        ggplot2::geom_boxplot() +
+                                        ggplot2::theme_bw()+
+                                        ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust=1)) +
+                                        ggplot2::facet_grid(rows = if(!is.null(facet_y)) dplyr::vars(!!facet_y) else NULL,
+                                                            cols = if(!is.null(facet_x)) dplyr::vars(!!facet_x) else NULL,
+                                                            scales = "free")
+
+
+        #else 
+
+        plot_boxplot <- ggplot2::ggplot(data_long, ggplot2::aes(x = samples, y = intensity)) +
+                                        ggplot2::geom_boxplot() +
+                                        ggplot2::theme_bw()+
+                                        ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust=1))
+
+        #end if
+
+        #if $xlab
+        plot_boxplot <- plot_boxplot + ggplot2::xlab("$xlab")
+        #end if
+
+        #if $ylab
+        plot_boxplot <- plot_boxplot + ggplot2::ylab("$ylab")
+        #end if
+
+        #if $flip_axes == "true"
+        plot_boxplot <- plot_boxplot + ggplot2::coord_flip()
+        #end if
+
+        ggplot2::ggsave(filename = "boxplot.png", plot_boxplot)
+
+        ]]></configfile>
+    </configfiles>
+
+    <inputs>
+        <expand macro="boxplot_param"/>
+    </inputs>
+
+    <outputs>
+        <data name="boxplot" format="png" label="Boxplot on ${on_string}" from_work_dir="boxplot.png"/>        
+        <data name="script" format="txt" label="R script">
+            <filter>export_R_script</filter>
+        </data>
+    </outputs>
+
+    <tests>
+        <test expect_num_outputs="1">
+            <param name="input_data" value="test_data.txt"/>
+            <param name="has_rownames" value="true"/>
+            <output name="boxplot" ftype="png">
+                <assert_contents>
+                    <has_size size="1164615" delta="200"/>
+                </assert_contents>
+            </output>          
+        </test>
+        <test expect_num_outputs="2">
+            <param name="input_data" value="test_data.txt"/>
+            <param name="has_rownames" value="true"/>
+            <param name="use_grouping" value="yes"/>
+            <param name="input_metadata" value="test_expDesign.txt"/>
+            <param name="sampleID" value="1"/>
+            <param name="groupingCol" value="1"/>
+            <param name="export_R_script" value="TRUE"/>
+            <output name="boxplot" ftype="png">
+                <assert_contents>
+                    <has_size size="1164615" delta="200"/>
+                </assert_contents>
+            </output>          
+        </test>
+    </tests>
+
+    <help><![CDATA[
+        @GENERAL_HELP@
+    ]]></help>
+
+    <expand macro="citations" />
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_data.txt	Mon Feb 03 16:18:52 2025 +0000
@@ -0,0 +1,15 @@
+RowID	File_1	File_2	File_3	File_4	File_5	File_6	File_7	File_8	File_9
+12	95.97218086	42.13959786	6.11506479	30.15646347	94.23814082	57.36969732	60.86347654	19.91409937	68.71355704
+14	59.66064371	64.93522126	32.32890474	40.14978233	2.891311477	32.96107785	66.75097997	0.920440008	24.22636581
+5	38.66154396	36.42452677	84.36958354	10.69964404	39.46448065	77.47277323	32.52771639	97.57285906	93.70459001
+36	32.97909046	43.52631758	59.83839323	69.87436805	98.12738555	4.751636512	47.47689026	41.3876903	67.78716531
+89	83.25413765	7.549367861	20.50592254	0.349459671	73.89946997	31.40388713	4.366449277	38.84340215	95.44116332
+965	71.74909204	40.82129731	21.73719426	32.67826786	93.7897543	92.54811762	57.16246361	98.97950241	38.61134734
+11	40.10030753	0.619259961	89.65946872	82.98278412	5.022064589	35.6513286	91.42743566	1.293400215	95.96990548
+2	82.35962922	25.48196256	47.51041934	71.45794006	90.23170047	34.52642435	88.0453423	63.2896977	28.82679247
+456	69.23813955	53.77332434	84.61367461	51.17705197	69.40525804	79.68268428	4.526957668	46.81700494	80.93458495
+68	88.78897848	85.84282896	51.53708603	71.26727107	94.03802875	77.02830433	50.12587615	30.93374275	15.06805862
+90	15.55940741	75.14719125	78.0377742	51.71345723	59.47299196	79.63191036	86.12417779	66.88062611	11.96856299
+23	51.55755631	26.99985192	85.64996749	74.54372275	97.09925394	47.50634659	43.94265016	60.73243208	89.94399967
+44	38.5549221	96.81614325	99.76966908	75.41077107	75.34246484	83.57435092	73.67006881	81.33326567	37.3100164
+567	72.74969753	17.0015972	64.01806032	37.71428705	41.30061893	84.5375675	44.21028268	9.076252458	50.67516692
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_expDesign.txt	Mon Feb 03 16:18:52 2025 +0000
@@ -0,0 +1,10 @@
+SampleID	Condition	Replicate	Batch
+File_1	control	1	1
+File_2	control	2	1
+File_3	control	3	1
+File_4	treatment_A	1	1
+File_5	treatment_A	2	1
+File_6	treatment_A	3	2
+File_7	treatment_B	1	2
+File_8	treatment_B	2	2
+File_9	treatment_B	3	2