view umi-tools_counts.xml @ 14:43bcc103b1e9 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit 11a7415c7f8a44a3f990080533c1de43a41d1e2e
author iuc
date Fri, 28 Feb 2025 20:40:52 +0000
parents 71ad4a56c40c
children
line wrap: on
line source

<tool id="umi_tools_count" name="UMI-tools count" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>performs quantification of UMIs from BAM files</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="bio_tools"/>
    <expand macro="requirements"/>
    <command detect_errors="exit_code"><![CDATA[
        #import re
        @LINK_SAM_BAM_INPUT@        

        umi_tools count
            '$wide_format_cell_counts'
            @BARCODE_OPTIONS@
            @UMI_GROUPING_OPTIONS@
            @SC_OPTIONS@
            @SAMBAM_OPTIONS@
            @ADVANCED_OPTIONS@
            -I '$input_file' -S '$out_counts'
            @LOG@
        #if str($cond_extra.prepender) != "none":
            #if str($cond_extra.prepender) == "string":
                #set $replacer = str($cond_extra.custom_label)
            #else
                #set $replacer = re.sub('[^\w\_]+', '_', str($input.element_identifier.rsplit('.',1)[0]))
            #end if
            && sed -i -r '1s|\b([ACGT]+)\b|'"$replacer"'_\1|g' '$out_counts'
        #end if
    ]]></command>
    <inputs>
        <param name="input" type="data" format="sam,bam" label="Reads to deduplicate in SAM or BAM format" help="Please use the samtools sort tool to ensure a correct BAM input" />
        <param name="wide_format_cell_counts" type="boolean" truevalue="--wide-format-cell-counts" falsevalue="" checked="true" label="Output a matrix of genes and cells, instead of a flat file" />
        <expand macro="barcode_options_macro"/>
        <expand macro="umi_grouping_options_macro"/>
        <expand macro="sambam_options_macro"/>
        <expand macro="sc_options_macro"/>
        <expand macro="advanced_options_macro"/>
        <conditional name="cond_extra" >
            <param name="prepender" type="select" label="Prepend a label to all column headers" help="This preserves uniqueness when merging with other files with the same headers. Note: filename must not contain a '.' character" >
                <option value="none" selected="true" >No modifications</option>
                <option value="string">Custom Label</option>
                <option value="dataset name">Dataset Name</option>
            </param>
            <when value="none"></when>
            <when value="dataset name"></when>
            <when value="string">
                <param name="custom_label" type="text" label="Label to Prepend" >
                    <sanitizer invalid_char="">
                        <valid initial="string.letters,string.digits">
                            <add value="-"/>
                            <add value="_"/>
                            <add value="."/>
                        </valid>
                    </sanitizer>
                </param>
            </when>
        </conditional>
        <expand macro="log_input_macro"/>
    </inputs>
    <outputs>
        <data name="out_counts" format="tabular" />
        <expand macro="log_output_macro"/>
    </outputs>
    <tests>
        <test expect_num_outputs="1"><!--count_single_gene_tag:-->
            <param name="input" value="chr19_gene_tags.bam" />
            <section name="advanced">
                <param name="random_seed" value="123456789" />
            </section>
            <section name="sc">
                <param name="gene_tag" value="XF" />
                <param name="skip_tags_regex" value="^[__|Unassigned]" />
                <param name="per_cell" value="false" />
            </section>
            <conditional name="bc">
                <param name="extract_umi_method" value="umis" />
            </conditional>
            <section name="umi">
                <param name="method" value="directional" />
            </section>
            <param name="wide_format_cell_counts" value="false" />
            <output name="out_counts" value="count_single_gene_tag.tsv" />
        </test>
        <test expect_num_outputs="1"><!--count_single_gene_tag .. with sam input-->
            <param name="input" value="chr19_gene_tags.sam" />
            <section name="advanced">
                <param name="random_seed" value="123456789" />
            </section>
            <section name="sc">
                <param name="gene_tag" value="XF" />
                <param name="skip_tags_regex" value="^[__|Unassigned]" />
                <param name="per_cell" value="false" />
            </section>
            <conditional name="bc">
                <param name="extract_umi_method" value="umis" />
            </conditional>
            <section name="umi">
                <param name="method" value="directional" />
            </section>
            <param name="wide_format_cell_counts" value="false" />
            <output name="out_counts" value="count_single_gene_tag.tsv" />
        </test>
        <test expect_num_outputs="1"><!--count_single_cells_gene_tag:-->
            <param name="input" value="chr19_gene_tags.bam" />
            <section name="advanced">
                <param name="random_seed" value="123456789" />
            </section>
            <section name="sc">
                <param name="gene_tag" value="XF" />
                <param name="skip_tags_regex" value="^[__|Unassigned]" />
                <param name="per_cell" value="true" />
            </section>
            <conditional name="bc">
                <param name="extract_umi_method" value="umis" />
            </conditional>
            <section name="umi">
                <param name="method" value="directional" />
            </section>
            <param name="wide_format_cell_counts" value="false" />
            <output name="out_counts" value="count_single_cells_gene_tag.tsv" />
        </test>
        <test expect_num_outputs="1"><!--count_single_cells_wide_gene_tag:-->
            <param name="input" value="chr19_gene_tags.bam" />
            <section name="advanced">
                <param name="random_seed" value="123456789" />
            </section>
            <section name="sc">
                <param name="gene_tag" value="XF" />
                <param name="skip_tags_regex" value="^[__|Unassigned]" />
                <param name="per_cell" value="true" />
            </section>
            <conditional name="bc">
                <param name="extract_umi_method" value="umis" />
            </conditional>
            <section name="umi">
                <param name="method" value="directional" />
            </section>
            <param name="wide_format_cell_counts" value="true" />
            <output name="out_counts" value="count_single_cells_gene_tag_wide.tsv" />
        </test>
        <test expect_num_outputs="1"><!-- count ENSDARG00000019692, with defaults -->
            <param name="input" value="fc.ENSDARG00000019692.bam" />
            <section name="advanced">
                <param name="random_seed" value="0" />
            </section>
            <section name="sc">
                <param name="gene_tag" value="XT" />
                <param name="per_cell" value="true" />
            </section>
            <section name="umi">
                <param name="method" value="unique" />
            </section>
            <output name="out_counts" value="fc.ENSDARG00000019692.counts" />
        </test>
        <test expect_num_outputs="1"><!-- count ENSDARG00000019692, relabel string -->
            <param name="input" value="fc.ENSDARG00000019692.bam" />
            <section name="advanced">
                <param name="random_seed" value="0" />
            </section>
            <section name="sc">
                <param name="gene_tag" value="XT" />
                <param name="per_cell" value="true" />
            </section>
            <section name="umi">
                <param name="method" value="unique" />
            </section>
            <conditional name="cond_extra" >
                <param name="prepender" value="string" />
                <param name="custom_label" value="test" />
            </conditional>
            <output name="out_counts" value="fc.ENSDARG00000019692.counts.test" />
        </test>
        <test expect_num_outputs="1"><!-- count ENSDARG00000019692, relabel filename -->
            <param name="input" value="fc.ENSDARG00000019692.bam" />
            <section name="advanced">
                <param name="random_seed" value="0" />
            </section>
            <section name="sc">
                <param name="gene_tag" value="XT" />
                <param name="per_cell" value="true" />
            </section>
            <section name="umi">
                <param name="method" value="unique" />
            </section>
            <conditional name="cond_extra" >
                <param name="prepender" value="dataset name" />
            </conditional>
            <output name="out_counts" value="fc.ENSDARG00000019692.counts.name" />
        </test>
    </tests>
    <help><![CDATA[

count - Count reads per gene from BAM using UMIs and mapping coordinates
========================================================================

This tool is only designed to work with library preparation
methods where the fragmentation occurs after amplification, as per
most single cell RNA-Seq methods (e.g 10x, inDrop, Drop-seq, SCRB-seq
and CEL-seq2). Since the precise mapping co-ordinate is not longer
informative for such library preparations, it is simplified to the
gene. This is a reasonable approach providing the number of available
UMIs is sufficiently high and the sequencing depth is sufficiently low
that the probability of two reads from the same gene having the same
UMIs is acceptably low.

If you want to count reads per gene for library preparations which
fragment prior to amplification (e.g bulk RNA-Seq), please use
``umi_tools dedup`` to remove the duplicate reads as this will use the
full information from the mapping co-ordinate. Then use a read
counting tool such as FeatureCounts or HTSeq to count the reads per
gene.

In the rare case of bulk RNA-Seq using a library preparation method
with fragmentation after amplification, one can still use ``count`` but
note that it has not been tested on bulk RNA-Seq.

This tool deviates from group and dedup in that the ``--per-gene`` option
is hardcoded on.

@BARCODE_HELP@

@UMI_GROUPING_HELP@

    ]]></help>
    <expand macro="citations" />
</tool>