view seacr.xml @ 0:c8ff96b9fb97 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/seacr commit 77e83ae050cdc9cbb8099a31375d1dbc004e15e8"
author iuc
date Wed, 12 Feb 2020 11:27:36 -0500
parents
children 66b7657f0bd6
line wrap: on
line source

<?xml version="1.0"?>
<tool id="chromhmm_seacr" name="SEACR " version="@TOOL_VERSION@+@WRAPPER_VERSION@">
    <description>for sparse enrichment analysis</description>
    <macros>
        <token name="@TOOL_VERSION@">1.3</token>
        <token name="@WRAPPER_VERSION@">galaxy0</token>
    </macros>
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">seacr</requirement>
        <requirement type="package" version="2.29.2">bedtools</requirement>
    </requirements>
    <stdio></stdio>
    <command detect_errors="exit_code"><![CDATA[
        SEACR_@TOOL_VERSION@.sh
            '$bedgraph'
            #if $control_cond.control_sel == 'f'
                '$control_cond.f'
            #else if $control_cond.control_sel == 't'
                '$control_cond.t'
            #end if
            '$normalize'
            '$mode'
            'results'
        ]]></command>
    <inputs>
        <param argument="bedgraph" type="data" format="bedgraph" label="Select target bedgraph file" help="Target data bedgraph file in UCSC bedgraph format that omits regions containing 0 signal."/>
        <conditional name="control_cond">
            <param name="control_sel" type="select" label="Select control type">
                <option value="f">Control bedgraph</option>
                <option value="t">Threshold</option>
            </param>
            <when value="f">
                <param argument="f" type="data" format="bedgraph" label="Select control bedgraph file" help="Control (IgG) data bedgraph file to generate an empirical threshold for peak calling."/>
            </when>
            <when value="t">
                <param argument="t" type="float" value="" min="0.0" max="1.0" label="Set threshold" help="A numeric threshold n between 0 and 1 returns the top n fraction of peaks based on total signal within peaks."/>
            </when>
        </conditional>
        <param name="normalize" type="boolean" truevalue="norm" falsevalue="non" checked="true" label="Normalize control to target data" help="'norm' denotes normalization of control to target data, 'non' skips this behavior. 'norm' is recommended unless experimental and control data are already rigorously normalized to each other (e.g. via spike-in)."/>
        <param name="mode" type="select" label="Select mode" help="'relaxed' uses a total signal threshold between the knee and peak of the total signal curve, and corresponds to the 'relaxed' mode described in the text, whereas 'stringent' uses the peak of the curve, and corresponds to 'stringent' mode.">
            <option value="relaxed">relaxed</option>
            <option value="stringent">stringent</option>
        </param>
    </inputs>
    <outputs>
        <data name="out_s" format="tabular" from_work_dir="results.stringent.bed" label="${tool.name} on ${on_string}: stringent">
            <filter>mode == 'stringent'</filter>
        </data>
        <data name="out_r" format="tabular" from_work_dir="results.relaxed.bed" label="${tool.name} on ${on_string}: relaxed">
            <filter>mode == 'relaxed'</filter>
        </data>
    </outputs>
    <tests>
        <!-- #1 -->
        <test expect_num_outputs="1">
            <param name="bedgraph" value="test.bedgraph"/>
            <conditional name="control_cond">
                <param name="control_sel" value="t"/>
                <param name="t" value="0.2"/>
            </conditional>
            <param name="normalize" value="norm"/>
            <param name="mode" value="stringent"/>
            <output name="out_s">
                <assert_contents>
                    <has_n_lines n="103"/>
                    <has_line line="I&#009;3463&#009;3717&#009;609&#009;4&#009;I:3557-3599"/>
                    <has_line line="I&#009;225524&#009;225978&#009;1100&#009;5&#009;I:225655-225672"/>
                </assert_contents>
            </output>
        </test>
        <!-- #2 -->
        <test expect_num_outputs="1">
            <param name="bedgraph" value="test.bedgraph"/>
            <conditional name="control_cond">
                <param name="control_sel" value="t"/>
                <param name="t" value="0.1"/>
            </conditional>
            <param name="normalize" value="non"/>
            <param name="mode" value="relaxed"/>
            <output name="out_r">
                <assert_contents>
                    <has_n_lines n="483"/>
                    <has_line line="I&#009;507&#009;665&#009;200&#009;2&#009;I:565-607"/>
                    <has_line line="I&#009;226156&#009;226330&#009;199&#009;2&#009;I:226231-226256"/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
.. class:: infomark

**What it does**

SEACR (Sparse Enrichment Analysis for CUT&RUN) is intended to call peaks and enriched regions from sparse CUT&RUN or chromatin profiling data in which background is dominated by "zeroes" (i.e. regions with no read coverage).

CUT&RUN is an efficient epigenome profiling method that identifies sites of DNA binding protein enrichment genome-wide with high signal to noise and low sequencing requirements. Currently, the analysis of CUT&RUN data is complicated by its exceptionally low background, which renders programs designed for analysis of ChIP-seq data vulnerable to oversensitivity in identifying sites of protein binding.

SEACR is a highly selective peak caller that definitively validates the accuracy of CUT&RUN for datasets with known true negatives. SEACR uses the global distribution of background signal to calibrate a simple threshold for peak calling. SEACR discriminates between true and false-positive peaks with near-perfect specificity from gold standard CUT&RUN datasets and efficiently identifies enriched regions for several different protein targets.

**Input**

SEACR requires files in UCSC bedgraph format from paired-end sequencing as input, which can be generated from read pair BED files.

**Output**

Results are stored in BED files with the folowing format.

::

    <chr>   <start> <end>   <total signal>  <max signal>    <max signal region>

with

- <chr> Chromosome
- <start> Start coordinate
- <end> End coordinate
- <total signal> Total signal contained within denoted coordinates
- <max signal> Maximum bedgraph signal attained at any base pair within denoted coordinates
- <max signal region> Region representing the farthest upstream and farthest downstream bases within the denoted coordinates that are represented by the maximum bedgraph signal

.. class:: infomark

**References**

More information are available on `github <https://github.com/FredHutch/SEACR>`_. A web interface can be found `here <https://seacr.fredhutch.org>`_.
    ]]></help>
    <citations>
        <citation type="doi">10.1186/s13072-019-0287-4</citation>
    </citations>
</tool>