view biscot.xml @ 0:ef4b5ce71745 draft default tip

planemo upload for repository https://github.com/bgruening/iuc/tree/master/tools/biscot commit cce7b9b092030d3bb23a706ca3bf2dece9c20b5f
author iuc
date Fri, 06 Jan 2023 22:10:44 +0000
parents
children
line wrap: on
line source

<tool id="biscot" name="BiSCoT" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01">
    <description>Bionano scaffolding correction</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <command detect_errors="exit_code"><![CDATA[
        biscot
        --cmap-ref '${cmap_ref}'
        --cmap-1 '${cmap_1}'
        --xmap-1 '${xmap_1}'
        --key $key
        --contigs $contigs
        #if $secondary_map.cmap_2
            --cmap-2 '${cmap_2}'
            --xmap-2 '${xmap_2}'
        #end if
        #if $xmap_2enz
            --xmap-2enz '${xmap_2enz}'
        #end if
        ## $aggressive # disabled due to dependency conflicts
        $only_confirmed_pos
    ]]>    </command>
    <inputs>
        <param argument="--cmap-ref" type="data" format="cmap" label="Reference CMAP file" help="This file describes the positions of enzymatic labelling sites on the reference genome" />
        <param argument="--cmap-1" type="data" format="cmap" label="Query CMAP file" help="This file describes the positions of enzymatic labelling sites on the contigs" />
        <param argument="--xmap-1" type="data" format="xml,txt" label="XML configuration file" help="This file describes the alignments of contig labels on the anchor" />
        <param argument="--contigs" type="data" format="fasta" label="Contigs file" help="Contigs file (FASTA) that was scaffolded by the Bionano scaffold" />
        <param argument="--key" type="data" format="tabular" label="Bionano key file" help="Bionano key file giving the correspondance between maps and contigs" />
        <section name="secondary_map" title="Secondary map">
            <param argument="--cmap-2" type="data" format="cmap" optional="true" label="Query CMAP file" help="This file describes the positions of enzymatic labelling sites on the contigs" />
            <param argument="--xmap-2" type="data" format="xml,txt" optional="true" label="XML configuration file" help="This file describes the alignments of contig labels on the anchor" />
        </section>
        <param argument="--xmap-2enz" type="data" format="xml,txt" optional="true" label="Two enzymes XML configuration file" help="This argument is used to provide the final XMAP file containing 
            the mappings of labels of both enzymes. This argument is useful (and recommended) to ensure that no mapping has been missed inside one of the individual XMAP file" />
        <param argument="--aggressive" type="boolean" truevalue="--aggressive" falsevalue="" checked="false" 
            label="Enable BLAT phase" help=" enables the sequence similarity scaffolding. In a first phase, BiSCoT will search similarities between contigs based on label mappings. If this parameter is set, 
                BiSCoT will search for sequence similarity to close gaps created by the first step" />
        <param argument="--only-confirmed-pos" type="boolean" truevalue="--only-confirmed-pos" falsevalue="" checked="false" 
            label="Only confirmed positions" help="To be retained, an alignment position in --xmap-1 or --xmap-2 has to be present in --xmap-2enz" />
        <param name="log_file" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Generate log file" help="Log file can be helpful for debuggin purposes" />
    </inputs>
    <outputs>
        <data name="log" format="txt"  from_work_dir="biscot/biscot.log" label="${tool.name} on ${on_string}: log file">
            <filter>log_file</filter>
        </data>
        <data name="fasta" format="fasta" from_work_dir="biscot/scaffolds.fasta" label="${tool.name} on ${on_string}: scaffolds (FASTA)"/>
        <data name="agp" format="agp"  from_work_dir="biscot/scaffolds.agp" label="${tool.name} on ${on_string}: scaffolds (AGP)"/>
  </outputs>
    <tests>
        <test expect_num_outputs="3">
            <param name="cmap_ref" value="reference_cmap_file.cmap"/>
            <param name="cmap_1" value="query_cmap_file.cmap"/>
            <param name="xmap_1" value="xml_configuration_file.xmap"/>
            <param name="contigs" value="small.fasta.gz"/>
            <param name="key" value="bionano_key_file.txt"/>
            <param name="log_file" value="true"/>
            <output name="log">
                <assert_contents>
                    <has_text text="Final AGP and scaffolds file are in biscot/scaffolds.agp and biscot/scaffolds.fasta"/>
                    <has_text text="Converting agp file to fasta"/>
                    <has_n_lines n="13"/>
                </assert_contents>
            </output>
            <output name="fasta">
                <assert_contents>
                    <has_text text="Super-Scaffold_1"/>
                    <has_text text="TCATCGGTTAAATCATCGCCCAGAAATACGGGCGT"/>
                    <has_size value="2097147"/>
                </assert_contents>
            </output>
            <output name="agp">
                <assert_contents>
                    <has_text text="Super-Scaffold_1"/>
                    <has_text text="4753350"/>
                    <has_n_lines n="3"/>
                </assert_contents>
            </output>
        </test>
    <test expect_num_outputs="2">
        <param name="cmap_ref" value="reference_cmap_file.cmap"/>
        <param name="cmap_1" value="query_cmap_file.cmap"/>
        <param name="xmap_1" value="xml_configuration_file.xmap"/>
        <param name="contigs" value="small.fasta.gz"/>
        <param name="key" value="bionano_key_file.txt"/>
        <param name="xmap_2enz" value="xml_configuration_file.xmap"/>
        <param name="aggressive" value="true"/>
        <param name="only_confirmed_pos" value="true"/>
        <section name="secondary_map">
            <param name="cmap_2" value="query_cmap_file.cmap"/>
            <param name="xmap_2" value="xml_configuration_file.xmap"/>
        </section>
        <output name="fasta">
            <assert_contents>
                <has_text text="Super-Scaffold_1"/>
                <has_text text="TCATCGGTTAAATCATCGCCCAGAAATACGGGCGT"/>
                <has_size value="2097147"/>
            </assert_contents>
        </output>
        <output name="agp">
            <assert_contents>
                <has_text text="Super-Scaffold_1"/>
                <has_text text="4753350"/>
                <has_n_lines n="3"/>
            </assert_contents>
        </output>
    </test>
    </tests>
    <help><![CDATA[

.. class:: infomark
            
**Purpose**

BiSCoT is a tool that examinates data generated during a previous Bionano scaffolding and merges contigs separated by a 13-Ns gap if needed. BiSCoT also re-evaluates gap sizes and searches for an 
alignment between two contigs if the gap size is inferior to 1,000 nucleotides. BiSCoT is therefore not a traditional scaffolder since it can only be used to improve an existing scaffolding, based
on an optical map.

-----

.. class:: infomark
            
**Required files**

During the scaffolding, the Bionano generates a visual representation of the hybrid scaffolds that is called an *anchor*. It also generates one *.key* file, which describes the mapping between map
identifiers and contig names, several CMAP files, which contain the position of enzymatic labelling sites on contig maps and on the anchor, and a XMAP file, that describes the alignment between a contig map
and an anchor. BiSCoT first loads the contigs into memory based on the key file. Then, the anchor CMAP file and contig CMAP files are loaded into memory. Finally, the XMAP file is parsed and loaded.

    ]]>    </help>
    <expand macro="citations"/>
</tool>