view seurat.xml @ 15:fab6ff46e019 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/seurat commit b437a46efb50e543b6d7c9988f954efe2caa9046
author iuc
date Fri, 07 Jul 2023 01:43:02 +0000
parents c0fd285eb553
children
line wrap: on
line source

<tool id="seurat" name="Seurat" version="@TOOL_VERSION@+galaxy1">
    <description>- toolkit for exploration of single-cell RNA-seq data</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <command detect_errors="exit_code"><![CDATA[
    #if $function.function_select == "base": 
        Rscript -e "library(\"rmarkdown\"); render(\"$__tool_directory__/Seurat.R\",
        params = list(counts = \"${function.input}\",
        min_cells = \"${function.min_cells}\",
        min_genes = \"${function.min_genes}\",
        low_thresholds = \"${function.low_thresholds}\",
        high_thresholds = \"${function.high_thresholds}\",
        vlnfeat = \"${function.vlnfeat}\",
        norm_out = \"${function.norm_file}\",
        #if $function.variable_continue.variable_continue == "yes":
            featplot = \"${function.variable_continue.featplot}\",
            variable_out = \"${function.variable_continue.var_file}\",
            #if $function.variable_continue.pca_continue.pca_continue == "yes":
                numPCs = \"${function.variable_continue.pca_continue.num_PCs}\",
                PCplots = \"${function.variable_continue.pca_continue.pc_plots}\",
                pca_out = \"${function.variable_continue.pca_continue.pca_file}\",
                #if $function.variable_continue.pca_continue.clusters_continue.clusters_continue == "yes":
                    perplexity = \"${function.variable_continue.pca_continue.clusters_continue.perplexity}\",
                    resolution = \"${function.variable_continue.pca_continue.clusters_continue.resolution}\",
                    nmds = \"${function.variable_continue.pca_continue.clusters_continue.nmds}\",
                    clusters_out = \"${function.variable_continue.pca_continue.clusters_continue.clusters_file}\",
                    #if $function.variable_continue.pca_continue.clusters_continue.markers_continue.markers_continue == "yes":
                        min_pct = \"${function.variable_continue.pca_continue.clusters_continue.markers_continue.min_pct}\",
                        logfc_threshold = \"${function.variable_continue.pca_continue.clusters_continue.markers_continue.logfc_threshold}\",
                        heatmaps = \"${function.variable_continue.pca_continue.clusters_continue.markers_continue.heatmaps}\",
                        markers_out = \"${function.variable_continue.pca_continue.clusters_continue.markers_continue.markers_file}\",
                        end_step="\"5"\",
                    #else:
                        end_step="\"4"\",
                    #end if
                #else:
                    end_step="\"3"\",
                #end if
            #else:
                end_step="\"2"\",
            #end if
        #else:
            end_step="\"1"\",
        #end if
        varstate = \"${meta.varstate}\",
        warn = \"${meta.warn}\",
        showcode = \"${meta.showcode}\"),
        intermediates_dir = \".\",
        output_format = html_document(),
        output_dir = \".\",
        output_file = \"out.html\")"
    #else:
        Rscript -e "library(\"rmarkdown\"); render(\"$__tool_directory__/citeseq_Seurat.R\",
        params = list(rna = \"${function.rna}\",
        prot = \"${function.prot}\",
        cite_markers = \"${function.cite_markers}\",


        #if $function.comparison.comparison == "yes"
            comparison="\"T"\",
            feat_comp=\"${function.comparison.feat_comp}\",
        #else:
            comparison="\"F"\",
            feat_comp="\""\",
        #end if
        #if $function.marker_compare.marker_compare == "yes"
            marker_compare="\"T"\",
            top_x=\"${function.marker_compare.top_x}\",
        #else:
            marker_compare="\"F"\",
            top_x="\""\",
        #end if
        min_cells = \"${function.min_cells}\",
        min_genes = \"${function.min_genes}\",
        low_thresholds = \"${function.low_thresholds}\",
        high_thresholds = \"${function.high_thresholds}\",
        vlnfeat = \"${function.vlnfeat}\",
        norm_out = \"${function.norm_file}\",
        featplot = \"${function.featplot}\",
        variable_out = \"${function.var_file}\",
        numPCs = \"${function.num_PCs}\",
        PCplots = \"${function.pc_plots}\",
        pca_out = \"${function.pca_file}\",
        perplexity = \"${function.perplexity}\",
        resolution = \"${function.resolution}\",
        nmds = \"${function.nmds}\",
        clusters_out = \"${function.clusters_file}\",
        min_pct = \"${function.min_pct}\",
        logfc_threshold = \"${function.logfc_threshold}\",
        heatmaps = \"${function.heatmaps}\",
        markers_out = \"${function.markers_file}\",
        varstate = \"${meta.varstate}\",
        warn = \"${meta.warn}\",
        showcode = \"${meta.showcode}\"),
        intermediates_dir = \".\",
        output_format = html_document(),
        output_dir = \".\",
        output_file = \"out.html\")"
    #end if
    ]]></command>
    <inputs>
        <conditional name="function">
            <param name="function_select" type="select" label="Which Seurat method should be run?">
                <option value="base">Base</option>
                <option value="cite">Cite-seq</option>
            </param>
            <when value="base">
                <param name="input" type="data" format="tabular,tsv" label="Counts file" help="The should be a TAB-separated count matrix with gene identifers in the first column and a header row"/>
                <param name="min_cells" type="integer" min="0" value="0" label="Minimum cells" help="Include genes with detected expression in at least this many cells" />
                <param name="min_genes" type="integer" min="0"  value="0" label="Minimum genes" help="Include cells where at least this many genes are detected"/>
                <expand macro="norm"/>
                <conditional name="variable_continue">
                    <param name="variable_continue" type="select" label="Continue workflow after Normalization step?">
                        <option value="yes">Yes</option>
                        <option value="no" selected="true">No</option>
                    </param>
                    <when value="yes">
                        <expand macro="variable"/>
                        <conditional name="pca_continue">
                            <param name="pca_continue" type="select" label="Continue workflow after scaling step?">
                                <option value="yes">Yes</option>
                                <option value="no" selected="true">No</option>
                            </param>
                            <when value="yes">
                                <expand macro="pca"/>
                                <conditional name="clusters_continue">
                                    <param name="clusters_continue" type="select" label="Continue workflow after PCA step?">
                                        <option value="yes">Yes</option>
                                        <option value="no" selected="true">No</option>
                                    </param>
                                    <when value="yes">
                                        <expand macro="clusters"/>
                                        <conditional name="markers_continue">
                                            <param name="markers_continue" type="select" label="Continue workflow after TSNE and UMAP step?">
                                                <option value="yes">Yes</option>
                                                <option value="no" selected="true">No</option>
                                            </param>
                                            <when value="yes">
                                                <expand macro="markers"/>
                                            </when>
                                            <when value="no"/>
                                        </conditional>
                                    </when>
                                    <when value="no"/>
                                </conditional>
                            </when>
                            <when value="no"/>
                        </conditional>
                    </when>
                    <when value="no"/>
                </conditional>
            </when>
            <when value="cite">
                <param name="rna" type="data" format="tabular,tsv" label="RNA counts file" help="The should be a TAB-separated count matrix with gene identifers in the first column and a header row"/>
                <param name="prot" type="data" format="tabular,tsv" label="Protein counts file" help="The should be a TAB-separated count matrix with gene identifers in the first column and a header row"/>
                <param name="min_cells" type="integer" min="0" value="0" label="Minimum cells" help="Include genes with detected expression in at least this many cells" />
                <param name="min_genes" type="integer" min="0"  value="0" label="Minimum genes" help="Include cells where at least this many genes are detected"/>
                <expand macro="norm"/>
                <expand macro="variable"/>
                <expand macro="pca"/>
                <expand macro="clusters"/>
                <expand macro="markers"/>
                <expand macro="cite-seq"/>
            </when>
        </conditional>
        <section name="meta" title="Output options" expanded="true">
            <param name="showcode" type="boolean" truevalue="T" falsevalue="F" checked="false" label="Show code alongside outputs?"/>
            <param name="warn" type="boolean" truevalue="T" falsevalue="F" checked="false" label="Include warnings in the output file (Yes) or pipe to stdout (No)"/>
            <param name="varstate" type="boolean" truevalue="T" falsevalue="F" checked="false" label="Display variable values used in code at the beginning of output file?"/>
        </section>
    </inputs>
    <outputs>
        <data name="out_html" format="html" from_work_dir="out.html" label="${tool.name} on ${on_string}" />
        <data name="norm_out" format="rdata" from_work_dir="norm_out.rds" label="${tool.name} on ${on_string}: normalization intermediate output">
            <filter>@FUNCTION_BASE@</filter>
            <filter>function['norm_file']</filter>
        </data>
        <data name="var_out" format="rdata" from_work_dir="var_out.rds" label="${tool.name} on ${on_string}: normalization and scaling intermediate output">
            <filter>@FUNCTION_BASE@</filter>
            <filter>@VARIABLE_CONTINUE@</filter>
            <filter>function['variable_continue']['var_file']</filter>
        </data>
        <data name="pca_out" format="rdata" from_work_dir="pca_out.rds" label="${tool.name} on ${on_string}: PCA embedding data">
            <filter>@FUNCTION_BASE@</filter>
            <filter>@VARIABLE_CONTINUE@</filter>
            <filter>@PCA_CONTINUE@</filter>
            <filter>function['variable_continue']['pca_continue']['pca_file']</filter>
        </data>
        <data name="cluters_out" format="rdata" from_work_dir="tsne_out.rds" label="${tool.name} on ${on_string}: TSNE embedding data">
            <filter>@FUNCTION_BASE@</filter>
            <filter>@VARIABLE_CONTINUE@</filter>
            <filter>@PCA_CONTINUE@</filter>
            <filter>@CLUSTERS_CONTINUE@</filter>
            <filter>function['variable_continue']['pca_continue']['clusters_continue']['clusters_file']</filter>
        </data>
        <data name="umap_out" format="rdata" from_work_dir="umap_out.rds" label="${tool.name} on ${on_string}: UMAP embedding data">
            <filter>@FUNCTION_BASE@</filter>
            <filter>@VARIABLE_CONTINUE@</filter>
            <filter>@PCA_CONTINUE@</filter>
            <filter>@CLUSTERS_CONTINUE@</filter>
            <filter>function['variable_continue']['pca_continue']['clusters_continue']['clusters_file']</filter>
        </data>
        <data name="markers_out" format="rdata" from_work_dir="markers_out.rds" label="${tool.name} on ${on_string}: Markers data">
            <filter>@FUNCTION_BASE@</filter>
            <filter>@VARIABLE_CONTINUE@</filter>
            <filter>@PCA_CONTINUE@</filter>
            <filter>@CLUSTERS_CONTINUE@</filter>
            <filter>@MARKERS_CONTINUE@</filter>
            <filter>function['variable_continue']['pca_continue']['clusters_continue']['markers_continue']['markers_file']</filter>
        </data>
        <data name="markers_tabular" format="tabular" from_work_dir="markers_out.tsv" label="${tool.name} on ${on_string}: Markers list">
            <filter>@FUNCTION_BASE@</filter>
            <filter>@VARIABLE_CONTINUE@</filter>
            <filter>@PCA_CONTINUE@</filter>
            <filter>@CLUSTERS_CONTINUE@</filter>
            <filter>@MARKERS_CONTINUE@</filter>
            <filter>function['variable_continue']['pca_continue']['clusters_continue']['markers_continue']['markers_file']</filter>
            <actions>
                <action name="column_names" type="metadata" default="\\,p_val,avg_log2FC,pct.1,pct.2,p_val_adj,cluster,gene" />
                <action name="column_types" type="metadata" default="str,float,float,float,float,float,int,str" />
            </actions>
        </data>
        <!-- cite-seq out -->
        <data name="protmarkerst" format="tabular" from_work_dir="protein_out.tsv" label="${tool.name} cite-seq on ${on_string}: Protein markers">
            <filter>@FUNCTION_CITE@</filter>
            <filter>function['cite_markers']</filter>
            <actions>
                <action name="column_names" type="metadata" default="\\,p_val,avg_log2FC,pct.1,pct.2,p_val_adj,cluster,gene" />
                <action name="column_types" type="metadata" default="str,float,float,float,float,float,int,str" />
            </actions>
        </data>
        <data name="rnamarkerst" format="tabular" from_work_dir="rna_out.tsv" label="${tool.name} cite-seq on ${on_string}: RNA markers">
            <filter>@FUNCTION_CITE@</filter>
            <filter>function['cite_markers']</filter>
            <actions>
                <action name="column_names" type="metadata" default="\\,p_val,avg_log2FC,pct.1,pct.2,p_val_adj,cluster,gene" />
                <action name="column_types" type="metadata" default="str,float,float,float,float,float,int,str" />
            </actions>
        </data>
        <data name="cite_graps" format="pdf" from_work_dir="citeseq_out.pdf" label="${tool.name} cite-seq on ${on_string}: Citeseq graphs">
            <filter>@FUNCTION_CITE@</filter>
            <filter>function['marker_compare']['marker_compare'] == "yes"</filter>
        </data>
        <data name="norm_cite" format="rdata" from_work_dir="norm_out.rds" label="${tool.name} cite-seq on ${on_string}: normalization intermediate output">
            <filter>@FUNCTION_CITE@</filter>
            <filter>function['norm_file']</filter>
        </data>
        <data name="var_cite" format="rdata" from_work_dir="var_out.rds" label="${tool.name} cite-seq on ${on_string}: normalization and scaling intermediate output">
            <filter>@FUNCTION_CITE@</filter>
            <filter>function['var_file']</filter>
        </data>
        <data name="pca_cite" format="rdata" from_work_dir="pca_out.rds" label="${tool.name} cite-seq on ${on_string}: PCA embedding data">
            <filter>@FUNCTION_CITE@</filter>
            <filter>function['pca_file']</filter>
        </data>
        <data name="cluters_cite" format="rdata" from_work_dir="tsne_out.rds" label="${tool.name} cite-seq on ${on_string}: TSNE embedding data">
            <filter>@FUNCTION_CITE@</filter>
            <filter>function['clusters_file']</filter>
        </data>
        <data name="umap_cite" format="rdata" from_work_dir="umap_out.rds" label="${tool.name} cite-seq on ${on_string}: UMAP embedding data">
            <filter>@FUNCTION_CITE@</filter>

            <filter>function['clusters_file']</filter>
        </data>
        <data name="markers_cite" format="rdata" from_work_dir="markers_out.rds" label="${tool.name} cite-seq on ${on_string}: Markers data">
            <filter>@FUNCTION_CITE@</filter>
            <filter>function['markers_file']</filter>
        </data>
        <data name="markers_cite_tabular" format="tabular" from_work_dir="markers_out.tsv" label="${tool.name} cite-seq on ${on_string}: Markers list">
            <filter>@FUNCTION_CITE@</filter>
            <filter>function['markers_file']</filter>
            <actions>
                <action name="column_names" type="metadata" default="\\,p_val,avg_log2FC,pct.1,pct.2,p_val_adj,cluster,gene" />
                <action name="column_types" type="metadata" default="str,float,float,float,float,float,int,str" />        
            </actions>
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="8">
            <conditional name="function">
                <param name="function_select" value="base"/>
                <param name="input" ftype="tabular" value="counts.tab.gz"/>
                <param name="min_cells" value="3"/>
                <param name="min_genes" value="200"/>
                <param name="low_thresholds" value="1" />
                <param name="high_thresholds" value="20000000" />
                <param name="vlnfeat" value="T"/>
                <param name="norm_file" value="T"/>
                <conditional name="variable_continue">
                    <param name="variable_continue" value="yes"/>
                    <param name="featplot" value="T"/>
                    <param name="var_file" value="T"/>
                    <conditional name="pca_continue">
                        <param name="pca_continue" value="yes"/>
                        <param name="numPCs" value="10" />
                        <param name="pc_plots" value="T"/>
                        <param name="pca_file" value="T"/>
                        <conditional name="clusters_continue">
                            <param name="clusters_continue" value="yes"/>
                            <param name="resolution" value="0.6" />
                            <param name="nmds" value="T"/>
                            <param name="clusters_file" value="T"/>
                            <conditional name="markers_continue">
                                <param name="markers_continue" value="yes"/>
                                <param name="min_pct" value="0.25" />
                                <param name="logfc_threshold" value="0.25" />
                                <param name="heatmaps" value="T"/>
                                <param name="markers_file" value="T"/>
                            </conditional>
                        </conditional>
                    </conditional>
                </conditional>
            </conditional>
            <section name="meta">
                <param name="showcode" value="T"/>
                <param name="warn" value="F"/>
                <param name="varstate" value="F"/>
            </section>
            <output name="out_html" ftype="html">
                <assert_contents>
                    <has_text text="Seurat Analysis" />
                    <has_text text="Performed using Galaxy" />
                    <has_text text="img src=&quot;data:image/png;base64" />
                </assert_contents>
            </output>
            <output name="markers_tabular" file="markers.tsv" compare="sim_size" delta="500"/>
        </test>
        <test expect_num_outputs="1">
            <!-- test perplexity and output filters -->
            <conditional name="function">
                <param name="function_select" value="base"/>
                <param name="input" ftype="tabular" value="counts.tab.gz"/>
                <param name="min_cells" value="3"/>
                <param name="min_genes" value="200"/>
                <param name="low_thresholds" value="1" />
                <param name="high_thresholds" value="20000000" />
                <param name="vlnfeat" value="T"/>
                <param name="norm_file" value="F"/>
                <conditional name="variable_continue">
                    <param name="variable_continue" value="yes"/>
                    <param name="featplot" value="T"/>
                    <param name="var_file" value="F"/>
                    <conditional name="pca_continue">
                        <param name="pca_continue" value="yes"/>
                        <param name="numPCs" value="10" />
                        <param name="pc_plots" value="T"/>
                        <param name="pca_file" value="F"/>
                        <conditional name="clusters_continue">
                            <param name="clusters_continue" value="yes"/>
                            <param name="perplexity" value="16"/>
                            <param name="resolution" value="0.6" />
                            <param name="nmds" value="T"/>
                            <param name="clusters_file" value="F"/>
                            <conditional name="markers_continue">
                                <param name="markers_continue" value="yes"/>
                                <param name="min_pct" value="0.25" />
                                <param name="logfc_threshold" value="0.25" />
                                <param name="heatmaps" value="T"/>
                                <param name="markers_file" value="F"/>
                            </conditional>
                        </conditional>
                    </conditional>
                </conditional>
            </conditional>
            <section name="meta">
                <param name="showcode" value="T"/>
                <param name="warn" value="F"/>
                <param name="varstate" value="F"/>
            </section>
            <output name="out_html" ftype="html">
                <assert_contents>
                    <has_text text="Seurat Analysis" />
                    <has_text text="Performed using Galaxy" />
                    <has_text text="img src=&quot;data:image/png;base64" />
                </assert_contents>
            </output>
        </test>    
        <test expect_num_outputs="4">
            <conditional name="function">
                <param name="function_select" value="cite"/>
                <param name="rna" ftype="tabular" value="rna.tab.gz"/>
                <param name="prot" ftype="tabular" value="adt.tab.gz"/>
                <param name="min_cells" value="0"/>
                <param name="min_genes" value="0"/>
                <param name="low_thresholds" value="1" />
                <param name="high_thresholds" value="20000000" />
                <param name="vlnfeat" value="F"/>
                <param name="norm_file" value="F"/>
                <param name="featplot" value="T"/>
                <param name="var_file" value="F"/>
                <param name="num_PCs" value="20" />
                <param name="pc_plots" value="T"/>
                <param name="pca_file" value="F"/>
                <param name="resolution" value="0.6" />
                <param name="nmds" value="F"/>
                <param name="clusters_file" value="F"/>
                <param name="min_pct" value="0.25" />
                <param name="logfc_threshold" value="0.25" />
                <param name="heatmaps" value="T"/>
                <param name="markers_file" value="F"/>
                <param name="cite_markers" value="T"/>
                <conditional name="comparison">
                    <param name="comparison" value="yes"/>
                    <param name="feat_comp" value="CD4,CD19"/>
                </conditional>
                <conditional name="marker_compare">
                    <param name="marker_compare" value="yes"/>
                    <param name="top_x" value="3"/>
                </conditional>
            </conditional>
            <section name="meta">
                <param name="showcode" value="T"/>
                <param name="warn" value="F"/>
                <param name="varstate" value="F"/>
            </section>
            <output name="rnamarkerst" ftype="tabular" file="rna_out.tsv" compare="sim_size" delta="500" />
            <output name="protmarkerst" ftype="tabular" file="protein_out.tsv" compare="sim_size" delta="500" />
        </test>
        <test expect_num_outputs="9">
            <conditional name="function">
                <param name="function_select" value="cite"/>
                <param name="rna" ftype="tabular" value="rna.tab.gz"/>
                <param name="prot" ftype="tabular" value="adt.tab.gz"/>
                <param name="min_cells" value="0"/>
                <param name="min_genes" value="0"/>
                <param name="low_thresholds" value="1" />
                <param name="high_thresholds" value="20000000" />
                <param name="vlnfeat" value="F"/>
                <param name="norm_file" value="T"/>
                <param name="featplot" value="T"/>
                <param name="var_file" value="T"/>
                <param name="num_PCs" value="20" />
                <param name="pc_plots" value="T"/>
                <param name="pca_file" value="T"/>
                <param name="resolution" value="0.6" />
                <param name="nmds" value="T"/>
                <param name="clusters_file" value="T"/>
                <param name="min_pct" value="0.25" />
                <param name="logfc_threshold" value="0.25" />
                <param name="heatmaps" value="T"/>
                <param name="markers_file" value="T"/>
                <param name="cite_markers" value="F"/>
                <conditional name="comparison">
                    <param name="comparison" value="yes"/>
                    <param name="feat_comp" value="CD4,CD19"/>
                </conditional>
                <conditional name="marker_compare">
                    <param name="marker_compare" value="yes"/>
                    <param name="top_x" value="3"/>
                </conditional>
            </conditional>
            <section name="meta">
                <param name="showcode" value="T"/>
                <param name="warn" value="F"/>
                <param name="varstate" value="F"/>
            </section>
        </test>
    </tests>
    <help><![CDATA[
.. class:: infomark

**What it does**

Seurat_ is a toolkit for quality control, analysis, and exploration of single cell RNA sequencing data.
It is developed and maintained by the `Satija Lab`_ at NYGC. Seurat aims to enable users to identify and
interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse
types of single cell data. See the `Seurat Guided Clustering tutorial`_ for more information.

-----

**Inputs**

    * Gene count matrix in TAB-separated format
        or
    * RNA and Protein count matrices in TAB-separated formats

-----

**Outputs**

    * HTML of plots

Optionally you can choose to output

    * R commands used to generate plots printed alongside figures

.. _Seurat: https://www.nature.com/articles/nbt.4096
.. _Satija Lab: https://satijalab.org/seurat/
.. _Seurat Guided Clustering tutorial: https://satijalab.org/seurat/pbmc3k_tutorial.html

]]></help>
    <citations>
        <citation type="doi">10.1038/nbt.4096</citation>
    </citations>
</tool>