view hal_halCoverage.xml @ 1:0f5bcf7e18a7 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/haltools commit f46096b336bf460424143fc9d633b5e7c20bf1cf
author iuc
date Thu, 05 Mar 2026 11:31:35 +0000
parents 2ead05695f9f
children
line wrap: on
line source

<tool id="hal_halcoverage" name="halCoverage" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>calculates coverage by sampling bases</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/> 
    <expand macro="stdio"/>
    <command detect_errors="aggressive"><![CDATA[
        set -o pipefail; ## Sets the pipeline’s exit code to halCoverage’s on failure.
        halCoverage
            --numSamples $numSamples
            --seed $seed
            $bySequence
            ## Pipes output to replace commas with tabs. Output is mostly numerical, and Genome names contain no commas, as this would invalidate the HAL Newick tree.
            '$input_hal' '$refGenome' | tr ',' '\t' > '$out_file' 
    ]]></command>
    <inputs>
        <expand macro="input_hal"/>
        <expand macro="params_refGenome"/>
        <expand macro="params_numSamples"/>
        <expand macro="params_seed"/>
        <param argument="--bySequence" type="boolean" truevalue="--bySequence" falsevalue="" checked="false" label="Coverage breakdown by sequence" help="Provide coverage breakdown by sequence in reference genome"/>
    </inputs>
    <outputs>
        <data name="out_file" format="tabular" label="${tool.name} on ${on_string}: ${refGenome}"/>
    </outputs>
    <tests>
        <test expect_num_outputs="1">
            <param name="input_hal" value="halTest.hal"/>
            <param name="refGenome" value="Genome_1"/>
            <param name="seed" value="100"/>
            <output name="out_file" ftype="tabular">
                <assert_contents>
                    <has_line line="Genome&#009; sitesCovered1Times&#009; sitesCovered2Times&#009; sitesCovered3Times&#009; sitesCovered4Times&#009; sitesCovered5Times"/>
                    <has_line line="Genome_2&#009; 856475&#009; 482343&#009; 482343&#009; 214021&#009; 0"/>
                    <has_n_lines n="4"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="input_hal" value="halTest.hal"/>
            <param name="refGenome" value="Genome_1"/>
            <param name="numSamples" value="1000"/>
            <param name="seed" value="100"/>
            <output name="out_file" ftype="tabular">
                <assert_contents>
                    <has_line line="Genome&#009; sitesCovered1Times&#009; sitesCovered2Times&#009; sitesCovered3Times&#009; sitesCovered4Times&#009; sitesCovered5Times"/>
                    <has_line line="Genome_2&#009; 839&#009; 500&#009; 500&#009; 233&#009; 0"/>
                    <has_n_lines n="4"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="input_hal" value="halTest.hal"/>
            <param name="refGenome" value="Genome_1"/>
            <param name="bySequence" value="true"/>
            <param name="seed" value="100"/>
            <output name="out_file" ftype="tabular">
                <assert_contents>
                    <has_line line="Genome&#009; sitesCovered1Times&#009; sitesCovered2Times&#009; sitesCovered3Times&#009; sitesCovered4Times&#009; sitesCovered5Times"/>
                    <has_line line="Genome_2&#009; 856475&#009; 482343&#009; 482343&#009; 214021&#009; 0"/>
                    <has_line line="Coverage on Genome_1_seq"/>
                    <has_n_lines n="9"/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
halCoverage estimates how well a reference genome is represented in a HAL alignment by randomly sampling bases. 
It takes a HAL file and a reference genome name as input and outputs a table summarizing the result. 
A random seed can be adjusted for reproducibility.

Use it for assessing alignment completeness and identifying underrepresented regions in a genome alignment.

-----

.. class:: warningmark

Running the tool on a HAL file in mmap format may fail or run infinite if 'Coverage breakdown by sequence' is enabled, while the HDF5 format can run successfully. It is recommended to convert the input to HDF5 format first using halExtract.
    ]]></help>
    <expand macro="citation"/>
    <expand macro="creator"/>
</tool>