view ggplot2_pca.xml @ 7:7f2ef7216cf8 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ggplot2 commit 075aa3ab0ebc5eef83aac753a2cac7dffe2c6b0c
author iuc
date Fri, 02 Dec 2022 09:36:01 +0000
parents fcb6a19960c6
children b1268b7544d3
line wrap: on
line source

<tool id="ggplot2_pca" name="PCA plot w ggplot2" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01">
    <expand macro="bio_tools"/>
    <macros>
        <import>macros.xml</import>
    </macros>
    <requirements>
        <requirement type="package" version="0.4.13">r-ggfortify</requirement>
        <requirement type="package" version="2.0.0">r-svglite</requirement>
    </requirements>
    <command detect_errors="exit_code"><![CDATA[
cat '$script' &&
Rscript '$script'
    ]]></command>
    <configfiles>
        <configfile name="script"><![CDATA[
@R_INIT@

## Import libraries
library(ggfortify)
library(scales)

## static VARs
groups <- TRUE
circles_boolean <- TRUE
group_name <- list()
group_cols <- list()
group_colors <- list()
group_feature <- ""
group_colors_vector <- ""

input <- '$input1'
header <- ${inputdata.header}
rowname_index <- as.integer('$inputdata.row_names_index')

horizontal <- ${inputdata.sample_name_orientation}
## title <- '$title'
transform <- '$adv.transform'
## background <- '$adv.theme'
## scaling <- '$adv.scaling.plot_scaling'
## legend <- '$adv.legend'
group_type <- '$groups.group_type'

##if(group_type == "define_groups"){
#if str($groups.group_type) == "define_groups":
    #for $q in $groups.group_names:
        group_name <- append(group_name, '$q.groupName')
        group_cols <- append(group_cols, '$q.groupCols')
        group_colors <- append(group_colors, '$q.color_cond.plot_color')
    #end for
#end if
group_name <- unlist(group_name)
group_cols <- unlist(group_cols)
group_colors <- unlist(group_colors)

circles <- '$groups.circle'
plot_param <- '$plotoptions'

## split plotoptions
plot_param_list <- strsplit(x = plot_param, split = ",")
plot_options <- c("shape", "label") %in% plot_param_list[[1]]

## read table with or with out header or row_names
if(rowname_index > 0){
    df <- read.table(input, header = header, row.names = rowname_index, sep = "\t")
}else{
    df <- read.table(input, header = header, sep = "\t")
}

## check if indices are out of range
num_cols <- length(names(df))
if(group_type == "define_groups"){
    check_col_indices <- as.integer(unlist(strsplit(group_cols, split = ",")))
    if(any(check_col_indices > num_cols)){
        stop("Error: column indices for grouping are out of range! Check help!")
    }
}

## check if table has only numbers
if(any(!sapply(df, is.numeric))){
    stop("Error: table contains not only numbers!")
}

## check if group_names are unique
if(length(unique(c(group_name, "no_group"))) != length(c(group_name, "no_group"))){
    stop("Error: group_names must be unique: ", paste(group_name, "no_group", collapse = ","), " is not unique!")
}

## prepare group_features for grouping of samples accouring to orientation
if(horizontal){
    num_cols <- length(names(df))
    group_feature <- rep("no_group", num_cols)
}else{
    num_rows <- nrow(df)
    group_feature <- rep("no_group", num_rows)
}

default_ggplot_colors <- ""
default_ggplot_colors_autoplot <- ""
## split group elements and assign indexes
if(group_type == "define_groups"){
    ## set up colors
    color_names <- c(group_name, "no_group")
    cat("\ncolor_names: ", color_names)
    default_ggplot_colors <- hue_pal()(length(color_names))
    names(default_ggplot_colors) <- color_names
    cat("\ndefault_ggplot_colors: ", default_ggplot_colors)
    names(group_colors) <- group_name
    group_colors <- group_colors[group_colors != "none"]
    default_ggplot_colors[names(group_colors)] <- group_colors
    cat("\ndefault_ggplot_colors: ", default_ggplot_colors)
    group_string <- lapply(seq_along(group_name), function(k){
        gname <- group_name[k]
        gindex <- as.integer(strsplit(group_cols[k], split = ",")[[1]])
        gnames <- rep(gname, length(gindex))
        names(gindex) <- gnames
        gindex
    })
    group_string <- do.call(c, group_string)
    group_feature[group_string] <- names(group_string)
    ## subset colors on groups if and check if there is "no_group" present
    default_ggplot_colors <- default_ggplot_colors[unique(group_feature)]
    default_ggplot_colors_autoplot <- default_ggplot_colors[group_feature]
    cat("\ndefault_ggplot_colors: ", default_ggplot_colors)
}

@LEGEND@

@XY_SCALING@

@THEME@

## transpose data.frame for plotting if sample names are horizontal
if(horizontal){
    df <- as.data.frame(t(df))
}

plot_df <- df
## set group column if wanted
if(group_type %in% "no_groups"){
    groups <- FALSE
}else{
    plot_df\$group <- group_feature
    group_name <- "group"
}

## set boolean elipes_value to plot circle options
if(circles %in% "no"){
    circles_boolean <- FALSE
}

plot_mat <- df

## transform dataset
if(transform == "log2"){
    plot_mat <- log2(plot_mat)
    cat("\n ", transform, " transformed")
}else if(transform == "log2plus1"){
    plot_mat <- log2(plot_mat+1)
    cat("\n ", transform, " transformed")
}else if(transform == "log10"){
    plot_mat <- log10(plot_mat)
    cat("\n ", transform, " transformed")
}else if(transform == "log10plus1"){
    plot_mat <- log10(plot_mat+1)
    cat("\n ", transform, " transformed")
}else{
    plot_mat <- plot_mat
}

## plot with or without groups and set more plotting options using autoplot
if(groups){
    if(circles_boolean){
        plot_out <- autoplot(prcomp(plot_mat), data = plot_df, colour = group_name,
        frame = T, frame.type=circles, shape = plot_options[1],
        label = plot_options[2])
    }else{
        plot_out <- autoplot(prcomp(plot_mat), data = plot_df, colour = group_name,
        frame = F, shape = plot_options[1], label = plot_options[2])
    }
}else{
    if(!circles_boolean){
        plot_out <- autoplot(prcomp(plot_mat), data = plot_df, frame = F, shape = plot_options[1], label = plot_options[2])
    }else{
        plot_out <- autoplot(prcomp(plot_mat), data = plot_df, frame = F, shape = plot_options[1], label = plot_options[2])
    }
}

## add advanced plotting options for final plot
plot_out <- plot_out +
    scale_color_manual(values=default_ggplot_colors) +
    scale_fill_manual(values=default_ggplot_colors) +
    gg_theme +
    gg_legend +
    ggtitle('$title') +
    theme(plot.title = element_text(hjust = 0.5))

@SAVE_OUTPUT@
        ]]></configfile>
    </configfiles>
    <inputs>
        <param name="input1" type="data" format="tabular" label="Select table"/>
        <conditional name="inputdata">
            <param name="input_type" type="select" label="Select input dataset options" help="specific dataset input for reading">
                <option value="with_header" selected="true">Dataset with header</option>
                <option value="with_rownames">Dataset with row names</option>
                <option value="with_header_rownames">Dataset with header and row names</option>
                <option value="no_header_rownames">Dataset without header or row names</option>
            </param>
            <when value="with_header">
                <param name="header" type="hidden" value="TRUE"/>
                <param name="row_names_index" type="hidden" value="0"/>
                <param name="sample_name_orientation" type="select" display="radio" multiple="false" label="Sample names orientation" help="Default horizontal: header names are interpreted as sample names">
                    <option value="TRUE" selected="true">horizontal</option>
                    <option value="FALSE">vertial</option>
                </param>
            </when>
            <when value="with_rownames">
                <param name="header" type="hidden" value="FALSE"/>
                <param name="row_names_index" type="data_column" data_ref="input1" label="Select column, for row names" help="WARNING: please consider that using row names might shift the grouping columns"/>
                <param name="sample_name_orientation" type="select" display="radio" label="Sample names orientation" help="default vertial: row names are interpreted as sample names">
                    <option value="TRUE">horizontal</option>
                    <option value="FALSE" selected="true">vertial</option>
                </param>
            </when>
            <when value="with_header_rownames">
                <param name="header" type="hidden" value="TRUE"/>
                <param name="row_names_index" label="Select column, for row names" type="data_column" data_ref="input1" help="WARNING: please consider that using row names might shift the grouping columns"/>
                <param name="sample_name_orientation" type="select" label="Sample names orientation" display="radio" multiple="false" help="deside whether header names or row names are interpreted as sample names">
                    <option value="TRUE" selected="true">horizontal</option>
                    <option value="FALSE">vertial</option>
                </param>
            </when>
            <when value="no_header_rownames">
                <param name="header" type="hidden" value="FALSE"/>
                <param name="row_names_index" type="hidden" value="0"/>
                <param name="sample_name_orientation" type="hidden" value="TRUE"/>
            </when>
        </conditional>
        <expand macro="title"/>
        <conditional name="groups">
            <param name="group_type" type="select" label="Select groups">
                <option value="no_groups" selected="true">No groups</option>
                <option value="define_groups">Define groups</option>
            </param>
            <when value="no_groups">
                <param name="circle" type="hidden" value="no"/>
            </when>
            <when value="define_groups">
                <repeat name="group_names" min="2" title="Group" help="Select the samples for this group">
                    <param name="groupName" type="text" optional="false" label="Group name"
                        help="Use short names, avoid special characters and numbers at the beginning of the name (The names might be changed by the program to make them conform to processing in R)">
                        <sanitizer invalid_char="">
                            <valid initial="string.letters,string.digits">
                                <add value=":" />
                                <add value="-" />
                                <add value=" " />
                                <add value="_" />
                            </valid>
                        </sanitizer>
                        <validator type="regex" message="Value may include alphanumeric characters, colons, dashes, underscores and spaces.">[0-9a-zA-Z:-_ ]+</validator>
                    </param>
                    <param name="groupCols" type="data_column" data_ref="input1" multiple="true" min="1" label="Columns for group"
                        help="WARNING: please use columns indices after removing the row name column if selected in second section!">
                        <validator type="regex" message="Select at least one column for each group!">[0-9a-zA-Z:-_ ]+</validator>
                        <!--<filter type="remove_value" meta_ref="groupCols" />-->
                    </param>
                    <conditional name="color_cond">
                        <param name="color_bool" type="select" label="Change colors">
                            <option value="default" selected="true">Default ggplot colors</option>
                            <option value="selected_colors">Select colors</option>
                        </param>
                        <when value="selected_colors">
                            <param name="plot_color" type="color" value="" label="Pick color">
                                 <sanitizer sanitize="false"/>
                            </param>
                        </when>
                        <when value="default">
                            <param name="plot_color" type="hidden" value="none"/>
                        </when>
                    </conditional>
                </repeat>
                <param name="circle" type="select" display="radio" multiple="false" label="Select type of ellipses">
                    <option value="no">no ellipes</option>
                    <option value="convex">convex</option>
                    <option value="t" selected="true">t-distribution</option>
                    <option value="norm">normal distribution</option>
                    <option value="euclid">euclidean distance</option>
                </param>
            </when>
        </conditional>
        <param name="plotoptions" type="select" label="Select plot layout" display="radio" >
            <option value="shape" >show shapes</option>
            <option value="label" selected="true">show labels (group elements)</option>
            <option value="shape,label" >show shapes and labels</option>
        </param>
        <!-- Advanced Options  -->
        <section name="adv" title="Advanced Options" expanded="false">
            <expand macro="transform"/>
            <expand macro="xy_scaling" />
            <expand macro="theme"/>
            <expand macro="legend"/>
        </section>
        <!-- Output Options  -->
        <section name="out" title="Output Options" expanded="true">
            <expand macro="dimensions" />
        </section>
    </inputs>
    <outputs>
        <expand macro="additional_output" />
    </outputs>
    <tests>
        <test>
            <param name="input1" value="mtcars.txt" ftype="tabular"/>
            <conditional name="inputdata">
                <param name="input_type" value="with_header_rownames"/>
                <param name="header" value="TRUE"/>
                <param name="row_names_index" value="1"/>
                <param name="sample_name_orientation" value="TRUE"/>
            </conditional>
            <conditional name="groups">
                <param name="group_type" value="define_groups"/>
                <repeat name="group_names">
                    <param name="groupName" value="group1"/>
                    <param name="groupCols" value="3,4"/>
                    <conditional name="color_cond">
                        <param name="color_bool" value="default"/>
                        <param name="plot_color" value="none"/>
                    </conditional>
                </repeat>
                <repeat name="group_names">
                    <param name="groupName" value="group2"/>
                    <param name="groupCols" value="2,5,6,8,9,10,11"/>
                    <conditional name="color_cond">
                        <param name="color_bool" value="default"/>
                        <param name="plot_color" value="none"/>
                    </conditional>
                </repeat>
                <param name="circle" value="convex"/>
            </conditional>
            <param name="additional_output_format" value="pdf"/>
            <output name="output2" file="ggplot_pca_result1.pdf" compare="sim_size"/>
        </test>
    </tests>
    <help><![CDATA[
**What it does**

This tool generates a Principal component analysis (PCA) for a given table using a combination of ggplot2 and ggfortify.

-----

**Example**

**WARNING:** Be carefull when selecting row names in the second option because the grouping elements do not update automaticly before executing the script. This means that columns have to be chosen as if the row name column was already be removed.

**Example for row names in table**

+--------+-----------+-----------+---------------+---------------+
| name   | control 1 | control 2 |  treatment 1  |  treatment 2  |
+========+===========+===========+===============+===============+
| gene 1 |   10      |     12    |       3455    |        232    |
+--------+-----------+-----------+---------------+---------------+
| gene 2 |      20   |     2     |       345     |        334    |
+--------+-----------+-----------+---------------+---------------+
| gene 3 |      200  |     210   |       20      |        2      |
+--------+-----------+-----------+---------------+---------------+
|        |           |           |               |               |
+--------+-----------+-----------+---------------+---------------+
|   1    |       2   |      3    |      4        |       5       |
+--------+-----------+-----------+---------------+---------------+

The new index after reading the table will be:

+--------+-----------+-----------+--------------+---------------+
| name   | control 1 | control 2 | treatment 1  |  treatment 2  |
+========+===========+===========+==============+===============+
|*       |      1    |     2     |      3       |      4        |
+--------+-----------+-----------+--------------+---------------+

-----

Pictures coming soon.
    ]]></help>
    <expand macro="citations">
        <citation type="bibtex">@article{tang2016ggfortify,
            title={ggfortify: unified interface to visualize statistical results of popular R packages},
            author={Tang, Yuan and Horikoshi, Masaaki and Li, Wenxuan},
            journal={The R Journal},
            volume={8},
            number={2},
            pages={478-489},
            year={2016},
            url = {https://journal.r-project.org/archive/2016/RJ-2016-060/RJ-2016-060.pdf}
            }
        </citation>
        <citation type="bibtex">@manual{gu2016getoptlong,
            title = {GetoptLong: Parsing Command-Line Arguments and Variable Interpolation},
            author = {Zuguang Gu},
            year = {2016},
            note = {R package version 0.1.5},
            url = {https://CRAN.R-project.org/package=GetoptLong},
            }
        </citation>
    </expand>
</tool>