view raceid_inspectclusters.xml @ 5:37a47c4fd84d draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/raceid3 commit 1ad3837c2251aba8bd997386c3c27f6cd5b1c9e7"
author iuc
date Wed, 29 Jan 2020 17:17:19 -0500
parents 20f522154663
children 41f34e925bd5
line wrap: on
line source

<tool id="raceid_inspectclusters" name="Cluster Inspection using RaceID" version="@VERSION_RACEID@.@VERSION_WRAPPER@" >
    <description>examines gene expression within clusters</description>
    <macros>
        <import>macros.xml</import>
        <import>macros_cheetah.xml</import>
        <macro name="select_cells" token_sectionname="@SECTIONNAME@" token_sectiontitle="@SECTIONTITLE@" >
            <section name="@SECTIONNAME@" title="@SECTIONTITLE@" expanded="true" >
                <param name="name_set" type="text" optional="true" label="Name of Set" >
                    <!-- Aname, Bname -->
                    <expand macro="sanitize_title" />
                </param>
                <conditional name="meth" >
                    <param name="type" type="select" label="Selection method" >
                        <option value="cln" selected="true">Cluster Numbers</option>
                        <option value="regex" >Regular Expression</option>
                        <option value="manual" >Manual Selection</option>
                    </param>
                    <when value="cln" >
                        <param name="selector" type="text" value="" label="List of clusters" help="Clusters should be listed delimited by commas (e.g. 3,4,5)" >
                            <expand macro="sanitize_numeric_vector" />
                        </param>
                    </when>
                    <when value="regex" >
                        <param name="selector" type="text" value="" label="Regular Expression to select gene names" >
                            <expand macro="sanitize_regex" />
                        </param>
                    </when>
                    <when value="manual" >
                        <param name="selector" type="text" value="" label="List of genes" >
                            <expand macro="sanitize_string_vector" />
                        </param>
                    </when>
                </conditional>
            </section>
        </macro>
    </macros>
    <expand macro="requirements" />
    <expand macro="version_command_config" prog="clusterinspect.R" cheetah="INSPECTCLUSTERS_CHEETAH" out="&#38;&#62; '$outlog'" />
    <inputs>
        <param name="inputrds" type="data" format="rdata" label="Input RaceID RDS" help="Requires the RDS output from the cluster analysis" />
        <conditional name="plotgen">
            <param name="do_opt" type="select" label="Plot Clusters?" help="Generates tSNE and F-R plots" >
                <option value="yes" selected="true" >Yes</option>
                <option value="no" >No</option>
            </param>
            <when value="no" />
            <when value="yes" >
                <param name="clusts_plot" type="text" label="Cluster Numbers to plot" value="" help="Leave blank to plot all clusters" optional="true" >
                    <expand macro="sanitize_numeric_vector" />
                </param>
            </when>
        </conditional>
        <conditional name="plotsym" >
            <param name="do_opt" type="select" label="Perform Subset Analysis?" >
                <expand macro="yesno_checkedno" />
            </param>
            <when value="no" />
            <when value="yes">
                <param name="types_regex" type="text" optional="true" label="Types Regex (removal)" help="Regular expression to be used to remove portions of the cell names, in order to indicate cell type. e.g. if your cells are labelled as 'cdk5_1, cdk5_2, glow_1, glow_2', then a regex of '_\\d$' would trim off the ends and set the cell types to 'cdk5, cdk5, glow, glow'. " >
                    <expand macro="sanitize_regex" />
                </param>
                <expand macro="use_defaults_no" >
                    <param name="subset_regex" type="text" optional="true" label="Subset of Types: Regex (select)" help="Regular expression to be used to select for cell types of interest. e.g. if we are only interested in 'cdk5' positive cells, then we would simply put 'cdk5' here. ">
                        <expand macro="sanitize_regex" />
                    </param>
                    <param name="fr" type="boolean" checked="false" label="Output FR map instead of tSNE" help="Performs a Fruchterman-Rheingold projection instead of tSNE" />
                </expand>
            </when>
        </conditional>
        <conditional name="gois" >
            <param name="do_opt" type="select" label="Examine Genes of Interest" >
                <expand macro="yesno_checkedno" />
            </param>
            <when value="no" />
            <when value="yes">
                <param name="inspect_goi_genes" type="text" value="" label="Genes to Examine" >
                    <expand macro="sanitize_string_vector" />
                </param>
                <param name="inspect_goi_cells" type="text" optional="true" label="Cells to Examine" help="Leave blank to examine all" ><!-- "":NULL -->
                    <expand macro="sanitize_string_vector" />
                </param>
                <expand macro="use_defaults_no" >
                    <!-- Yes, examine genes of interest, but no do not use defaults... -->
                    <param name="inspect_goi_imputed" type="boolean" checked="false" label="Impute Genes" help="Only works if k-nearest neighbours was used in the clustering" />
                    <param name="plotexp_n" type="text" optional="true" label="Title of plot" help="If blank, the list of genes are used" >
                        <expand macro="sanitize_title" />
                    </param>
                    <param name="plotmarkg_cthr" type="integer" min="0" value="0" label="Cluster Threshold" help="Include only clusters with cells greater than this" />
                    <param name="plotmarkg_cl" type="text" optional="true" label="Cluster List" help="List of cluster numbers to include. Leave blank to include all" >
                        <expand macro="sanitize_numeric_vector" />
                    </param><!-- "":NULL -->
                    <param name="plotmarkg_order_cells" type="boolean" checked="false" label="Order Cells" help="Order heatmap by cell names, not by cluster" />
                    <param name="plotmarkg_aggr" type="boolean" checked="false" label="Average Expression" help="Show only average expression for each cluster" />
                    <param name="plotmarkg_norm" type="boolean" checked="false" label="Normalise Gene Expression " help="Normalise gene expression to 1 to depict genes on the same scale" />
                    <param name="plotmarkg_flo" type="integer" max="-5" optional="true" label="Lower-bound for Gene Expression" /><!-- negative or null vals only for this seem to work-->
                    <param name="plotmarkg_cap" type="integer" min="1" optional="true" label="Upper-bound for Gene Expression" />
                    <param name="plotmarkg_samples" type="text" optional="true" label="Regex to Select Sample Names For Each cell" >
                        <expand macro="sanitize_regex" />
                    </param>
                    <param name="plotmarkg_cluster_cols" type="boolean" checked="false" label="Cluster columns" />
                    <param name="plotmarkg_cluster_rows" type="boolean" checked="true" label="Cluster rows" />
                    <param name="plotmarkg_cluster_set" type="boolean" checked="false" label="Order Clusters by Heirarchal Clustering of Cluster Medoids." />
                </expand>
            </when>
        </conditional>
        <conditional name="diffgtest" >
            <param name="do_opt" type="select" label="Differential Gene Testing" >
                <expand macro="yesno_checkedno" />
            </param>
            <when value="no" />
            <when value="yes" >
                <expand macro="select_cells" sectionname="set_a" sectiontitle="Cells in Set A" />
                <expand macro="select_cells" sectionname="set_b" sectiontitle="Cells in Set B" />
                <expand macro="use_defaults_no" >
                    <param name="plotdiffg_pthr" type="float" min="0" max="1" value="0.05" label="P-value cutoff" help="Cutoff for displaying differentially expressed genes" />
                    <param name="plotdiffg_padj" type="boolean" checked="true" label="Apply B-H Correction" help="Display genes with a Benjamini-Hochberg corrected false discovery rate lower than the above P-value threshold" />
                    <param name="plotdiffg_lthr" type="float" min="0" value="0" label="Log2-Fold Threshold" help="Differentially expressed genes are displayed only for log2 fold-changes higher than this" />
                    <param name="plotdiffg_mthr" type="float" optional="true" label="Log2 Mean Threshold" help="Differentially expressed genes are displayed only for log2 mean expression greater than this" /><!-- opt:-Inf -->
                    <param name="plotdiffg_show_names" type="boolean" checked="false" label="Display Gene Names" />
                </expand>
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data name="outpdf" format="pdf" label="${tool.name} on ${on_string}: PDF Report" />
        <data name="outlog" format="txt" hidden="true"  /><!-- useful for debugging, but hide from user -->
    </outputs>
    <tests>
        <test><!-- general plots, plotgen -->
            <param name="inputrds" value="out_cluster_default.rdat" />
            <conditional name="plotgen" >
                <param name="do_opt" value="yes" />
            </conditional>
            <output name="outpdf" value="out_cluster_general_default.pdf" lines_diff="30" />
        </test>
        <test><!-- general plots, plotgen with cluster 1,8,5 only -->
            <param name="inputrds" value="out_cluster_default.rdat" />
            <conditional name="plotgen" >
                <param name="do_opt" value="yes" />
                <param name="clusts_plot" value="1,8,5" />
            </conditional>
            <output name="outpdf" value="out_cluster_general_default.pdf" lines_diff="30" />
        </test>
        <test><!-- default test, plotsym -->
            <param name="inputrds" value="out_cluster_default.rdat" />
            <conditional name="plotsym" >
                <param name="do_opt" value="yes" />
                <param name="types_regex" value="\\_\\d+" />
                <expand macro="test_nondef" >
                    <param name="subset_regex" value="IV|V" />
                </expand>
            </conditional>
            <conditional name="gois" >
                <param name="do_opt" value="yes" />
                <param name="inspect_goi_genes" value="Apoa1,Apoa1bp,Apoa2,Apoa4,Apoa5" />
            </conditional>
            <output name="outpdf" value="out_cluster_inspect_default.pdf" lines_diff="30" />
        </test>
        <test><!-- Advanced 1 -->
            <param name="inputrds" value="out_cluster_default.rdat" />
            <conditional name="plotsym" >
                <param name="do_opt" value="yes" />
                <param name="types_regex" value="\\_\\d+" />
                <expand macro="test_nondef" >
                    <param name="subset_regex" value="IV|V" />
                    <param name="fr" value="true" />
                </expand>
            </conditional>
            <conditional name="gois" >
                <param name="do_opt" value="yes" />
                <param name="inspect_goi_genes" value="Apoa1,Apoa1bp,Apoa2,Apoa4,Apoa5" />
                <expand macro="test_nondef" >
                    <param name="inspect_goi_imputed" value="false" />
                    <param name="plotexp_n" value="Test Title" />
                    <param name="plotmarkg_order_cells" value="true" />
                    <param name="plotmarkg_aggr" value="true" />
                    <param name="plotmarkg_norm" value="true" />
                    <param name="plotmarkg_flo" value="-10" />
                    <param name="plotmarkg_cap" value="100" />
                    <param name="plotmarkg_samples" value="(\\_\\d+)$" />
                    <param name="plotmarkg_cluster_cols" value="true" />
                    <param name="plotmarkg_cluster_rows" value="false" />
                    <param name="plotmarkg_cluster_set" value="true" />
                </expand>
            </conditional>
            <conditional name="diffgtest" >
                <param name="do_opt" value="yes" />
                <section name="set_a" >
                    <param name="name_set" value="Test set A" />
                    <conditional name="meth" >
                        <param name="type" value="regex" />
                        <param name="selector" value="^V5.*" />
                    </conditional>
                </section>
                <section name="set_b" >
                    <param name="name_set" value="Test set B" />
                    <conditional name="meth" >
                        <param name="type" value="manual" />
                        <param name="selector" value="I5d_3,I5d_4,I5d_6,I5d_8,I5d_9,I5d_10,I5d_11,I5d_12,I5d_13,I5d_14" />
                    </conditional>
                </section>
                <expand macro="test_nondef" >
                    <param name="plotdiffg_pthr" value="0.1" />
                    <param name="plotdiffg_padj" value="false" />
                    <param name="plotdiffg_lthr" value="0.2" />
                    <param name="plotdiffg_mthr" value="0.2" />
                    <param name="plotdiffg_show_names" value="true" />
                </expand>
            </conditional>
            <output name="outpdf" value="out_cluster_inspect_advanced1.pdf" lines_diff="30"/>
        </test>
        <test><!-- Advanced 2 -->
            <param name="inputrds" value="out_cluster_default.rdat" />
            <conditional name="plotsym" >
                <param name="do_opt" value="yes" />
                <expand macro="test_nondef" >
                    <param name="fr" value="false" />
                </expand>
            </conditional>
            <conditional name="gois" >
                <param name="do_opt" value="yes" />
                <param name="inspect_goi_genes" value="Sp1,Spc24,Spcs1,Spcs2,Spcs3" />
                <expand macro="test_nondef" >
                    <param name="plotexp_n" value="Test Title 2" />
                    <param name="plotmarkg_order_cells" value="true" />
                    <param name="plotmarkg_aggr" value="true" />
                    <param name="plotmarkg_norm" value="false" />
                    <param name="plotmarkg_flo" value="-10" />
                    <param name="plotmarkg_cap" value="10" />
                    <param name="plotmarkg_cluster_cols" value="true" />
                    <param name="plotmarkg_cluster_rows" value="true" />
                    <param name="plotmarkg_cluster_set" value="true" />
                </expand>
            </conditional>
            <conditional name="diffgtest" >
                <param name="do_opt" value="yes" />
                <section name="set_a" >
                    <param name="name_set" value="Test set A" />
                    <conditional name="meth" >
                        <param name="type" value="regex" />
                        <param name="selector" value="^IV.*" />
                    </conditional>
                </section>
                <section name="set_b" >
                    <param name="name_set" value="Test set B" />
                    <conditional name="meth" >
                        <param name="type" value="cln" />
                        <param name="selector" value="3" />
                    </conditional>
                </section>
                <expand macro="test_nondef" >
                    <param name="plotdiffg_pthr" value="0.8" />
                    <param name="plotdiffg_padj" value="true" />
                    <param name="plotdiffg_lthr" value="0.8" />
                    <param name="plotdiffg_mthr" value="0.8" />
                    <param name="plotdiffg_show_names" value="false" />
                </expand>
            </conditional>
            <output name="outpdf" value="out_cluster_inspect_advanced2.pdf" lines_diff="30"/>
        </test>
        <test><!-- Advanced 3 -->
            <param name="inputrds" value="out_cluster_default.rdat" />
            <conditional name="plotsym" >
                <param name="do_opt" value="no" />
            </conditional>
            <conditional name="gois" >
                <param name="do_opt" value="no" />
            </conditional>
            <conditional name="diffgtest" >
                <param name="do_opt" value="yes" />
                <section name="set_a" >
                    <param name="name_set" value="Test set A" />
                    <conditional name="meth" >
                        <param name="type" value="cln" />
                        <param name="selector" value="1,2" />
                    </conditional>
                </section>
                <section name="set_b" >
                    <param name="name_set" value="Test set B" />
                    <conditional name="meth" >
                        <param name="type" value="cln" />
                        <param name="selector" value="3,4,6" />
                    </conditional>
                </section>
            </conditional>
            <output name="outpdf" value="out_diffgene_multiple.pdf" lines_diff="30"/>
        </test>
    </tests>
    <help><![CDATA[

RaceID3
=========

RaceID is a clustering algorithm for the identification of cell types from single-cell RNA-sequencing data. It was specifically designed for the detection of rare cells which correspond to outliers in conventional clustering methods.

This module inspects the clusters generated from the previous clustering step (and requires the output RData file from it as input).

The tool offers three modes of inspection which can all be activated at the same time, resulting in a single PDF report:

 * Plot All Clusters:
   * Produces a tSNE of all clusters, as well as a force-directed Fruchterman-Reingold (F-R) layout which may have tidier plots.
 * Perform Subset Analysis:
   * tSNE and F-R plots with cells whose name match the specified regex highlighted

 * Examine Genes of Interest:
   * Expression plots highlighting a gene or genes of interest across all clusters

 * Differential Gene Testing:
   * Examining the expression between

 * A list of the most differentially expressed genes in each cluster
 * An output PDF that provides heatmaps for:
    * The initial and final clustering (as determined using random forest)
    * Heatmaps for each of the most differentially expressed genes in each cluster

The tool requires the RData input from the previous filtering / normalisation / confounder removal step to work.



]]></help>
    <expand macro="citations" />
</tool>