view hal_halCoverage.xml @ 2:6ee278f53917 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/haltools commit 30ac0af6dfa729277011bcd73cd5560efa03fb97
author iuc
date Wed, 18 Mar 2026 15:17:13 +0000
parents 2ead05695f9f
children
line wrap: on
line source

<tool id="hal_halcoverage" name="halCoverage" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>calculates coverage by sampling bases</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/> 
    <expand macro="stdio"/>
    <command detect_errors="aggressive"><![CDATA[
        set -o pipefail; ## Sets the pipeline’s exit code to halCoverage’s on failure.
        halCoverage
            --numSamples $numSamples
            --seed $seed
            $bySequence
            ## Pipes output to replace commas with tabs. Output is mostly numerical, and Genome names contain no commas, as this would invalidate the HAL Newick tree.
            '$input_hal' '$refGenome' | sed $'s/, */\t/g' > '$out_file' 
    ]]></command>
    <inputs>
        <expand macro="input_hal"/>
        <expand macro="params_refGenome"/>
        <expand macro="params_numSamples"/>
        <expand macro="params_seed"/>
        <param argument="--bySequence" type="boolean" truevalue="--bySequence" falsevalue="" checked="false" label="Coverage breakdown by sequence" help="Provide coverage breakdown by sequence in reference genome"/>
    </inputs>
    <outputs>
        <data name="out_file" format="tabular" label="${tool.name} on ${on_string}: ${refGenome}"/>
    </outputs>
    <tests>
        <test expect_num_outputs="1">
            <param name="input_hal" value="halTest.hal"/>
            <param name="refGenome" value="Genome_1"/>
            <param name="seed" value="100"/>
            <output name="out_file" ftype="tabular">
                <assert_contents>
                    <has_line line="Genome&#009;sitesCovered1Times&#009;sitesCovered2Times&#009;sitesCovered3Times&#009;sitesCovered4Times&#009;sitesCovered5Times"/>
                    <has_line line="Genome_2&#009;856475&#009;482343&#009;482343&#009;214021&#009;0"/>
                    <has_n_lines n="4"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="input_hal" value="halTest.hal"/>
            <param name="refGenome" value="Genome_1"/>
            <param name="numSamples" value="1000"/>
            <param name="seed" value="100"/>
            <output name="out_file" ftype="tabular">
                <assert_contents>
                    <has_line line="Genome&#009;sitesCovered1Times&#009;sitesCovered2Times&#009;sitesCovered3Times&#009;sitesCovered4Times&#009;sitesCovered5Times"/>
                    <has_line line="Genome_2&#009;839&#009;500&#009;500&#009;233&#009;0"/>
                    <has_n_lines n="4"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="input_hal" value="halTest.hal"/>
            <param name="refGenome" value="Genome_1"/>
            <param name="bySequence" value="true"/>
            <param name="seed" value="100"/>
            <output name="out_file" ftype="tabular">
                <assert_contents>
                    <has_line line="Genome&#009;sitesCovered1Times&#009;sitesCovered2Times&#009;sitesCovered3Times&#009;sitesCovered4Times&#009;sitesCovered5Times"/>
                    <has_line line="Genome_2&#009;856475&#009;482343&#009;482343&#009;214021&#009;0"/>
                    <has_line line="Coverage on Genome_1_seq"/>
                    <has_n_lines n="9"/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
halCoverage estimates how well a reference genome is represented in a HAL alignment by randomly sampling bases. 
It takes a HAL file and a reference genome name as input and outputs a table summarizing the result. 
A random seed can be adjusted for reproducibility.

Use it for assessing alignment completeness and identifying underrepresented regions in a genome alignment.

-----

.. class:: warningmark

Running the tool on a HAL file in mmap format may fail or run infinite if 'Coverage breakdown by sequence' is enabled, while the HDF5 format can run successfully. It is recommended to convert the input to HDF5 format first using halExtract.
    ]]></help>
    <expand macro="citation"/>
    <expand macro="creator"/>
</tool>