view sinto_fragments.xml @ 2:29e6bfb2cf49 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/sinto commit e61735c77351c0e1886b16f04687f384b8f4d2c8
author iuc
date Wed, 19 Apr 2023 13:58:26 +0000
parents 02f96be6c3c9
children
line wrap: on
line source

<tool id="sinto_fragments" name="Sinto fragments" profile="20.01" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
    <description>Create an ATAC-seq fragment file from a BAM file</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">sinto</requirement>
    </requirements>
    <version_command>sinto --version</version_command>
    <command><![CDATA[
    ln -s '$bam' 'input.bam' && 
    ln -s '$bam.metadata.bam_index' 'input.bam.bai' &&
    sinto fragments 
    --bam 'input.bam'
    --min_mapq $min_mapq 
    --barcodetag '$barcodetag' 
    --barcode_regex '${ str( $barcode_regex ) }'
    --max_distance $max_distance
    --min_distance $min_distance
    --shift_plus $shift_plus
    --shift_minus $shift_minus
    $collapse_within
    --fragments $fragments
    --nproc "\${GALAXY_SLOTS:-1}"
]]>    </command>
    <inputs>
        <param argument="--bam" type="data" format="bam" label="Input BAM file" />
        <param argument="--min_mapq" type="integer" value="30" min="0" label="Minimum MAPQ required to retain fragment" />
        <param argument="--barcodetag" type="text" value="CB" label="Read tag storing cell barcode information" />
        <param argument="--barcode_regex" type="text" value="[^:]*" label="Regular expression used to extract cell barcode from read name" 
            help="[^:]* matches all characters up to the first colon" >
            <sanitizer>
                <valid initial="string.printable">
                    <remove value="&apos;" />
                </valid>
            </sanitizer>
        </param>
        <param argument="--max_distance" type="integer" value="5000" min="0" label="Maximum distance between integration sites for the fragment to be retained" 
            help="Allows filtering of implausible fragments that likely result from incorrect mapping positions."/>
        <param argument="--min_distance" type="integer" value="10" min="0" label="Minimum distance between integration sites for the fragment to be retained" 
            help="Allows filtering of implausible fragments that likely result from incorrect mapping positions."/>
        <param argument="--shift_plus" type="integer" value="4" label=" Number of bases to shift Tn5 insertion position by on the forward strand" />
        <param argument="--shift_minus" type="integer" value="-5" label="Number of bases to shift Tn5 insertion position by on the reverse strand" />
        <param argument="--collapse_within" type="boolean" truevalue="--collapse_within" falsevalue="" label="Take cell barcode into account 
            when collapsing duplicate fragments" help="Setting this flag means that fragments with the same coordinates can be identified 
            provided they originate from a different cell barcode." />
    </inputs>
    <outputs>
        <data name='fragments' format='bed' label="${tool.name} on ${on_string}: fragments BED" />
    </outputs>
    <tests>
        <test expect_num_outputs="1">
            <param name="bam" ftype="bam" value="aligned.bam" />
            <output name="fragments">
                <assert_contents>
                    <has_text_matching expression="chrY\t2790323\t2790683\tAACTGGTAGCTCCGGT\t2" />
                    <has_text_matching expression="chrY\t3680091\t3680228\tGACCGACGTACGGTTT\t2" />
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="bam" ftype="bam" value="aligned.bam" />
            <param name="shift_plus" value="4" />
            <param name="shift_minus" value="5" />
            <output name="fragments">
                <assert_contents>
                    <has_text_matching expression="chrY\t2790323\t2790693\tAACTGGTAGCTCCGGT\t2" />
                    <has_text_matching expression="chrY\t3680091\t3680238\tGACCGACGTACGGTTT\t2" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[

Sinto: single-cell analysis tools
--------------------------------------------------------------------------------------------------------------
An ATAC-seq fragment file can be created from a BAM file using the fragments command. The fragment file contains the position of each 
Tn5 integration site, the cell barcode associated with the fragment, and the number of times the fragment was sequenced. PCR duplicates are collapsed.

**Input**
A BAM file. Alignments in the input BAM file must contain cell barcodes either as a part of read names or in a tag (typically, in CB tag).

**Output**
The fragment file. It contains the positions of Tn5 integration sites, the cell barcode that the DNA fragment originated from, and the number of times 
the fragment was sequenced.

    ]]>    </help>
    <expand macro="citations" />
</tool>