diff ggplot2_pca.xml @ 0:c21099566418 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ggplot2 commit f9fb73a88ab8b52ce11c25a966d4fe99e67c9fbf
author iuc
date Mon, 11 Jun 2018 16:05:29 -0400
parents
children 7e616a1bdb07
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ggplot2_pca.xml	Mon Jun 11 16:05:29 2018 -0400
@@ -0,0 +1,395 @@
+<tool id="ggplot2_pca" name="PCA plot w ggplot2" version="@VERSION@">
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <requirements>
+        <requirement type="package" version="0.4.3">r-ggfortify</requirement>
+        <requirement type="package" version="1.2.1">r-svglite</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+cat '$script' &&
+Rscript '$script'
+    ]]></command>
+    <configfiles>
+        <configfile name="script"><![CDATA[
+@R_INIT@
+
+## Import libraries
+library(ggfortify)
+library(scales)
+
+## static VARs
+groups <- TRUE
+circles_boolean <- TRUE
+group_name <- list()
+group_cols <- list()
+group_colors <- list()
+group_feature <- ""
+group_colors_vector <- ""
+
+input <- '$input1'
+header <- ${inputdata.header}
+rowname_index <- as.integer('$inputdata.row_names_index')
+
+horizontal <- ${inputdata.sample_name_orientation}
+## title <- '$title'
+transform <- '$adv.transform'
+## background <- '$adv.theme'
+## scaling <- '$adv.scaling.plot_scaling'
+## legend <- '$adv.legend'
+group_type <- '$groups.group_type'
+
+##if(group_type == "define_groups"){
+#if str($groups.group_type) == "define_groups":
+    #for $q in $groups.group_names:
+        group_name <- append(group_name, '$q.groupName')
+        group_cols <- append(group_cols, '$q.groupCols')
+        group_colors <- append(group_colors, '$q.color_cond.plot_color')
+    #end for
+#end if
+group_name <- unlist(group_name)
+group_cols <- unlist(group_cols)
+group_colors <- unlist(group_colors)
+
+circles <- '$groups.circle'
+plot_param <- '$plotoptions'
+
+## split plotoptions
+plot_param_list <- strsplit(x = plot_param, split = ",")
+plot_options <- c("shape", "label") %in% plot_param_list[[1]]
+
+## read table with or with out header or row_names
+if(rowname_index > 0){
+    df <- read.table(input, header = header, row.names = rowname_index, sep = "\t")
+}else{
+    df <- read.table(input, header = header, sep = "\t")
+}
+
+## check if indices are out of range
+num_cols <- length(names(df))
+if(group_type == "define_groups"){
+    check_col_indices <- as.integer(unlist(strsplit(group_cols, split = ",")))
+    if(any(check_col_indices > num_cols)){
+        stop("Error: column indices for grouping are out of range! Check help!")
+    }
+}
+
+## check if table has only numbers
+if(any(!sapply(df, is.numeric))){
+    stop("Error: table contains not only numbers!")
+}
+
+## check if group_names are unique
+if(length(unique(c(group_name, "no_group"))) != length(c(group_name, "no_group"))){
+    stop("Error: group_names must be unique: ", paste(group_name, "no_group", collapse = ","), " is not unique!")
+}
+
+## prepare group_features for grouping of samples accouring to orientation
+if(horizontal){
+    num_cols <- length(names(df))
+    group_feature <- rep("no_group", num_cols)
+}else{
+    num_rows <- nrow(df)
+    group_feature <- rep("no_group", num_rows)
+}
+
+default_ggplot_colors <- ""
+default_ggplot_colors_autoplot <- ""
+## split group elements and assign indexes
+if(group_type == "define_groups"){
+    ## set up colors
+    color_names <- c(group_name, "no_group")
+    cat("\ncolor_names: ", color_names)
+    default_ggplot_colors <- hue_pal()(length(color_names))
+    names(default_ggplot_colors) <- color_names
+    cat("\ndefault_ggplot_colors: ", default_ggplot_colors)
+    names(group_colors) <- group_name
+    group_colors <- group_colors[group_colors != "none"]
+    default_ggplot_colors[names(group_colors)] <- group_colors
+    cat("\ndefault_ggplot_colors: ", default_ggplot_colors)
+    group_string <- lapply(seq_along(group_name), function(k){
+        gname <- group_name[k]
+        gindex <- as.integer(strsplit(group_cols[k], split = ",")[[1]])
+        gnames <- rep(gname, length(gindex))
+        names(gindex) <- gnames
+        gindex
+    })
+    group_string <- do.call(c, group_string)
+    group_feature[group_string] <- names(group_string)
+    ## subset colors on groups if and check if there is "no_group" present
+    default_ggplot_colors <- default_ggplot_colors[unique(group_feature)]
+    default_ggplot_colors_autoplot <- default_ggplot_colors[group_feature]
+    cat("\ndefault_ggplot_colors: ", default_ggplot_colors)
+}
+
+@LEGEND@
+
+@XY_SCALING@
+
+@THEME@
+
+## transpose data.frame for plotting if sample names are horizontal
+if(horizontal){
+    df <- as.data.frame(t(df))
+}
+
+plot_df <- df
+## set group column if wanted
+if(group_type %in% "no_groups"){
+    groups <- FALSE
+}else{
+    plot_df\$group <- group_feature
+    group_name <- "group"
+}
+
+## set boolean elipes_value to plot circle options
+if(circles %in% "no"){
+    circles_boolean <- FALSE
+}
+
+plot_mat <- df
+
+## transform dataset
+if(transform == "log2"){
+    plot_mat <- log2(plot_mat)
+    cat("\n ", transform, " transformed")
+}else if(transform == "log2plus1"){
+    plot_mat <- log2(plot_mat+1)
+    cat("\n ", transform, " transformed")
+}else if(transform == "log10"){
+    plot_mat <- log10(plot_mat)
+    cat("\n ", transform, " transformed")
+}else if(transform == "log10plus1"){
+    plot_mat <- log10(plot_mat+1)
+    cat("\n ", transform, " transformed")
+}else{
+    plot_mat <- plot_mat
+}
+
+## plot with or without groups and set more plotting options using autoplot
+if(groups){
+    if(circles_boolean){
+        plot_out <- autoplot(prcomp(plot_mat), data = plot_df, colour = group_name,
+        frame = T, frame.type=circles, shape = plot_options[1],
+        label = plot_options[2])
+    }else{
+        plot_out <- autoplot(prcomp(plot_mat), data = plot_df, colour = group_name,
+        frame = F, shape = plot_options[1], label = plot_options[2])
+    }
+}else{
+    if(!circles_boolean){
+        plot_out <- autoplot(prcomp(plot_mat), data = plot_df, frame = F, shape = plot_options[1], label = plot_options[2])
+    }else{
+        plot_out <- autoplot(prcomp(plot_mat), data = plot_df, frame = F, shape = plot_options[1], label = plot_options[2])
+    }
+}
+
+## add advanced plotting options for final plot
+plot_out <- plot_out +
+    scale_color_manual(values=default_ggplot_colors) +
+    scale_fill_manual(values=default_ggplot_colors) +
+    gg_theme +
+    gg_legend +
+    ggtitle('$title') +
+    theme(plot.title = element_text(hjust = 0.5))
+
+@SAVE_OUTPUT@
+        ]]></configfile>
+    </configfiles>
+    <inputs>
+        <param name="input1" type="data" format="tabular" label="Select table"/>
+        <conditional name="inputdata">
+            <param name="input_type" type="select" label="Select input dataset options" help="specific dataset input for reading">
+                <option value="with_header" selected="true">Dataset with header</option>
+                <option value="with_rownames">Dataset with row names</option>
+                <option value="with_header_rownames">Dataset with header and row names</option>
+                <option value="no_header_rownames">Dataset without header or row names</option>
+            </param>
+            <when value="with_header">
+                <param name="header" type="hidden" value="TRUE"/>
+                <param name="row_names_index" type="hidden" value="0"/>
+                <param name="sample_name_orientation" type="select" display="radio" multiple="false" label="Sample names orientation" help="Default horizontal: header names are interpreted as sample names">
+                    <option value="TRUE" selected="true">horizontal</option>
+                    <option value="FALSE">vertial</option>
+                </param>
+            </when>
+            <when value="with_rownames">
+                <param name="header" type="hidden" value="FALSE"/>
+                <param name="row_names_index" type="data_column" data_ref="input1" label="Select column, for row names" help="WARNING: please consider that using row names might shift the grouping columns"/>
+                <param name="sample_name_orientation" type="select" display="radio" label="Sample names orientation" help="default vertial: row names are interpreted as sample names">
+                    <option value="TRUE">horizontal</option>
+                    <option value="FALSE" selected="true">vertial</option>
+                </param>
+            </when>
+            <when value="with_header_rownames">
+                <param name="header" type="hidden" value="TRUE"/>
+                <param name="row_names_index" label="Select column, for row names" type="data_column" data_ref="input1" help="WARNING: please consider that using row names might shift the grouping columns"/>
+                <param name="sample_name_orientation" type="select" label="Sample names orientation" display="radio" multiple="false" help="deside whether header names or row names are interpreted as sample names">
+                    <option value="TRUE" selected="true">horizontal</option>
+                    <option value="FALSE">vertial</option>
+                </param>
+            </when>
+            <when value="no_header_rownames">
+                <param name="header" type="hidden" value="FALSE"/>
+                <param name="row_names_index" type="hidden" value="0"/>
+                <param name="sample_name_orientation" type="hidden" value="TRUE"/>
+            </when>
+        </conditional>
+        <expand macro="title"/>
+        <conditional name="groups">
+            <param name="group_type" type="select" label="Select groups">
+                <option value="no_groups" selected="true">No groups</option>
+                <option value="define_groups">Define groups</option>
+            </param>
+            <when value="no_groups">
+                <param name="circle" type="hidden" value="no"/>
+            </when>
+            <when value="define_groups">
+                <repeat name="group_names" min="2" title="Group" help="Select the samples for this group">
+                    <param name="groupName" type="text" optional="false" label="Group name"
+                        help="Use short names, avoid special characters and numbers at the beginning of the name (The names might be changed by the program to make them conform to processing in R)">
+                        <validator type="no_options" message="Please choose a name for this group!"/>
+                    </param>
+                    <param name="groupCols" type="data_column" data_ref="input1" multiple="true" min="1" label="Columns for group"
+                        help="WARNING: please use columns indices after removing the row name column if selected in second section!">
+                        <validator type="no_options" message="Select at least one column for each group!"/>
+                        <!--<filter type="remove_value" meta_ref="groupCols" />-->
+                    </param>
+                    <conditional name="color_cond">
+                        <param name="color_bool" type="select" label="Change colors">
+                            <option value="default" selected="true">Default ggplot colors</option>
+                            <option value="selected_colors">Select colors</option>
+                        </param>
+                        <when value="selected_colors">
+                            <param name="plot_color" type="color" value="" label="Pick color">
+                                 <sanitizer sanitize="false"/>
+                            </param>
+                        </when>
+                        <when value="default">
+                            <param name="plot_color" type="hidden" value="none"/>
+                        </when>
+                    </conditional>
+                </repeat>
+                <param name="circle" type="select" display="radio" multiple="false" label="Select type of ellipses">
+                    <option value="no">no ellipes</option>
+                    <option value="convex">convex</option>
+                    <option value="t" selected="true">t-distribution</option>
+                    <option value="norm">normal distribution</option>
+                    <option value="euclid">euclidean distance</option>
+                </param>
+            </when>
+        </conditional>
+        <param name="plotoptions" type="select" label="Select plot layout" display="radio" >
+            <option value="shape" >show shapes</option>
+            <option value="label" selected="true">show labels (group elements)</option>
+            <option value="shape,label" >show shapes and labels</option>
+        </param>
+        <!-- Advanced Options  -->
+        <section name="adv" title="Advanced Options" expanded="false">
+            <expand macro="transform"/>
+            <expand macro="xy_scaling" />
+            <expand macro="theme"/>
+            <expand macro="legend"/>
+        </section>
+        <!-- Output Options  -->
+        <section name="out" title="Output Options" expanded="true">
+            <expand macro="dimensions" />
+        </section>
+    </inputs>
+    <outputs>
+        <expand macro="additional_output" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="input1" value="mtcars.txt" ftype="tabular"/>
+            <conditional name="inputdata">
+                <param name="input_type" value="with_header_rownames"/>
+                <param name="header" value="TRUE"/>
+                <param name="row_names_index" value="1"/>
+                <param name="sample_name_orientation" value="TRUE"/>
+            </conditional>
+            <conditional name="groups">
+                <param name="group_type" value="define_groups"/>
+                <repeat name="group_names">
+                    <param name="groupName" value="group1"/>
+                    <param name="groupCols" value="3,4"/>
+                    <conditional name="color_cond">
+                        <param name="color_bool" value="default"/>
+                        <param name="plot_color" value="none"/>
+                    </conditional>
+                </repeat>
+                <repeat name="group_names">
+                    <param name="groupName" value="group2"/>
+                    <param name="groupCols" value="2,5,6,8,9,10,11"/>
+                    <conditional name="color_cond">
+                        <param name="color_bool" value="default"/>
+                        <param name="plot_color" value="none"/>
+                    </conditional>
+                </repeat>
+                <param name="circle" value="convex"/>
+            </conditional>
+            <param name="additional_output_format" value="pdf"/>
+            <output name="output2" file="ggplot_pca_result1.pdf" compare="sim_size"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+**What it does**
+
+This tool generates a Principal component analysis (PCA) for a given table using a combination of ggplot2 and ggfortify.
+
+-----
+
+**Example**
+
+**WARNING:** Be carefull when selecting row names in the second option because the grouping elements do not update automaticly before executing the script. This means that columns have to be chosen as if the row name column was already be removed.
+
+**Example for row names in table**
+
++--------+-----------+-----------+---------------+---------------+
+| name   | control 1 | control 2 |  treatment 1  |  treatment 2  |
++========+===========+===========+===============+===============+
+| gene 1 |   10      |     12    |       3455    |        232    |
++--------+-----------+-----------+---------------+---------------+
+| gene 2 |      20   |     2     |       345     |        334    |
++--------+-----------+-----------+---------------+---------------+
+| gene 3 |      200  |     210   |       20      |        2      |
++--------+-----------+-----------+---------------+---------------+
+|        |           |           |               |               |
++--------+-----------+-----------+---------------+---------------+
+|   1    |       2   |      3    |      4        |       5       |
++--------+-----------+-----------+---------------+---------------+
+
+The new index after reading the table will be:
+
++--------+-----------+-----------+--------------+---------------+
+| name   | control 1 | control 2 | treatment 1  |  treatment 2  |
++========+===========+===========+==============+===============+
+|*       |      1    |     2     |      3       |      4        |
++--------+-----------+-----------+--------------+---------------+
+
+-----
+
+Pictures coming soon.
+    ]]></help>
+    <expand macro="citations">
+        <citation type="bibtex">@article{tang2016ggfortify,
+            title={ggfortify: unified interface to visualize statistical results of popular R packages},
+            author={Tang, Yuan and Horikoshi, Masaaki and Li, Wenxuan},
+            journal={The R Journal},
+            volume={8},
+            number={2},
+            pages={478-489},
+            year={2016},
+            url = {https://journal.r-project.org/archive/2016/RJ-2016-060/RJ-2016-060.pdf}
+            }
+        </citation>
+        <citation type="bibtex">@manual{gu2016getoptlong,
+            title = {GetoptLong: Parsing Command-Line Arguments and Variable Interpolation},
+            author = {Zuguang Gu},
+            year = {2016},
+            note = {R package version 0.1.5},
+            url = {https://CRAN.R-project.org/package=GetoptLong},
+            }
+        </citation>
+    </expand>
+</tool>