view seurat_find_markers.xml @ 1:7895b4bb6247 draft

planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 0463f230d18201c740851d72e31a5024f391207f
author ebi-gxa
date Mon, 25 Nov 2019 06:09:57 -0500
parents fde95fe95f15
children 4039a99ae846
line wrap: on
line source

<tool id="seurat_find_markers" name="Seurat FindMarkers" version="@SEURAT_VERSION@_@VERSION@+galaxy0">
    <description>find markers (differentially expressed genes)</description>
    <macros>
        <import>seurat_macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <expand macro="version" />
    <command detect_errors="exit_code"><![CDATA[
seurat-find-markers.R

@INPUT_OBJECT@
--output-text-file output.txt

#if $genes_use:
  --genes-use '$genes_use'
#end if

#if str($logfc_threshold):
  --logfc-threshold '$logfc_threshold'
#end if

#if str($adv.min_pct):
  --min-pct '$adv.min_pct'
#end if

#if str($adv.min_diff_pct):
  --min-diff-pct '$adv.min_diff_pct'
#end if

#if $adv.only_pos:
  --only-pos '$adv.only_pos'
#end if

--test-use '$adv.test_use'

#if str($max_cells_per_ident):
  --max-cells-per-ident '$max_cells_per_ident'
#end if

#if str($min_cells_per_gene):
  --min-cells-gene '$min_cells_per_gene'
#end if

#if str($min_cells_group):
  --min-cells-group '$min_cells_group'
#end if


]]></command>

    <inputs>
        <expand macro="input_object_params"/>
        <expand macro="genes-use-input"/>
        <param name="logfc_threshold" label="LogFC logfc_threshold" optional="true" argument="--logfc-threshold" type="float" help="Limit testing to genes which show, on average, at least X-fold difference (log-scale) between the two groups of cells. Default is 0.25 Increasing logfc.threshold speeds up the function, but can miss weaker signals."/>
        <param name="max_cells_per_ident" label="Max cells per ident" optional="true" argument="--max-cells-per-ident" type="integer" help="Down sample each identity class to a max number. Default is no downsampling. Not activated by default (set to Inf)."/>
        <param name="min_cells_per_gene" label="Min cells per gene" optional="true" argument="--min-cells-gene" type="integer" help="Minimum number of cells expressing the gene in at least one of the two groups, currently only used for poisson and negative binomial tests."/>
        <param name="min_cells_group" label="Min cells in one of the groups" optional="true" argument="--min-cells-group" type="integer"/>
        <section name="adv" title="Advanced Options">
            <param name="min_pct" type="float" label="Min Pct" optional="true" help="Only test genes that are detected in a minimum fraction of min.pct cells in either of the two populations. Meant to speed up the function by not testing genes that are very infrequently expressed. Default is 0.1."/>
            <param name="min_diff_pct" type="float" label="Min diff Pct" optional="true" help="Only test genes that show a minimum difference in the fraction of detection between the two groups. Set to -Inf by default."/>
            <param name="only_pos" type="boolean" label="Only positive markers" truevalue="TRUE" falsevalue="" help="Only return positive markers (FALSE by default)."/>
            <param name="test_use" type="select" label="Test to use" help="">
              <option value="wilcox" selected="true">Wilcoxon</option>
              <option value="bimod">Bi-modal</option>
              <option value="roc">ROC</option>
              <option value="t">t-Test</option>
              <option value="tobit">tobit</option>
              <option value="poisson">Poisson</option>
              <option value="negbinom">Negative binomia</option>
              <option value="MAST">MAST</option>
              <option value="DESeq2">DESeq2</option>
            </param>
        </section>
    </inputs>

    <outputs>
        <data name="output" format="csv" from_work_dir="output.txt" label="${tool.name} on ${on_string}: Text file"/>
    </outputs>

    <tests>
        <test>
            <param name="input" ftype="rdata" value="out_runtsne.rds"/>
            <output name="output" ftype="txt" value="out_findmark.txt"/>
        </test>
    </tests>
    <help><![CDATA[
.. class:: infomark

**What it does**

This tool finds markers (differentially expressed genes) for each of the identity classes in a dataset.
It outputs a text file containing a ranked list of putative markers, and associated statistics (p-values, ROC score, etc.)

p-value adjustment is performed using bonferroni correction based on the total
number of genes in the dataset. Other correction methods are not recommended,
as Seurat pre-filters genes using the arguments above, reducing the number of
tests performed. Lastly, as Aaron Lun has pointed out, p-values should be
interpreted cautiously, as the genes used for clustering are the same genes
tested for differential expression.

@SEURAT_INTRO@

-----

**Inputs**

    * RDS object

-----

**Outputs**

    * Text file

.. _Seurat: https://www.nature.com/articles/nbt.4096
.. _Satija Lab: https://satijalab.org/seurat/

@VERSION_HISTORY@
]]></help>
      <expand macro="citations" />
</tool>