view salsa2.xml @ 3:f77f7a7f3b83 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/salsa2 commit 4904594e8df7cbd6eeee4be24023c6bd15e162de"
author iuc
date Thu, 11 Nov 2021 15:03:17 +0000
parents ab5b7f6b7198
children 9a22227bb6d0
line wrap: on
line source

<tool id="salsa" name="SALSA" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01">
    <description>scaffold long read assemblies with Hi-C</description>
    <xrefs>
        <xref type="bio.tools">SALSA</xref>
    </xrefs>
    <macros>
        <token name="@TOOL_VERSION@">2.3</token>
        <token name="@VERSION_SUFFIX@">2</token>
    </macros>
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">salsa2</requirement>
        <requirement type="package" version="1.11">samtools</requirement>
    </requirements>
    <command detect_errors="exit_code"><![CDATA[
ln -s '$fasta_in' input.fasta &&
samtools faidx input.fasta &&

run_pipeline.py
-a '$fasta_in'
-l input.fasta.fai
#if $enzyme_conditional.enzyme_options == 'preconfigured':
    #if $enzyme_conditional.preconfigured_enzymes == 'dovetail'
        -e 'GATC'
    #else if $enzyme_conditional.preconfigured_enzymes == 'arima1'
        -e 'GATC,GANTC'
    #else
        -e 'GATC,GANTC,CTNAG,TTAA'
    #end if
#else:
    -e '${enzyme_conditional.manual_enzyme}'
#end if
-b '$bed_file'
#if str($cutoff):
    -c '$cutoff'
#end if
#if $gfa_file:
    -g '$gfa_file'
#end if
#if $iter:
    -i '$iter'
#end if
-o ./out
    ]]></command>
    <inputs>
        <param name="fasta_in" type="data" format="fasta" label="Initial assembly file" help="Headers must not contain ':'."/>
        <param name="bed_file" type="data" format="bed" label="Bed alignment" help="To start scaffolding with SALSA, reads need to be mapped to the assembly. 
            BWA or BOWTIE2 are recommended. SALSA requires a bed file as the input. The alignment bam file can be converted using the bamToBed command from 
            the Bedtools package."/>
        <param name="cutoff" argument="-c" type="integer" min="1" label="Cutoff" optional="true" help="Minimum contig length to scaffold"/>
        <param name="gfa_file" argument="-g" type="data" format="gfa1,gfa2" optional="true" label="Sequence graphs" 
            help="An assembly graph can be optionally provided to guide the scaffolding, potentially reducing the scaffolding errors"/>
        <conditional name="enzyme_conditional">
            <param name="enzyme_options" type="select" label="Enzyme selection" help="Hi-C experiments can use different restriction enzymes.
                The enzyme frequency in contigs is used to normalize the Hi-C interaction frequency. Note that you need to specify the actual 
                sequence of the cutting site for a restriction enzyme and not the enzyme name. You can also specify DNASE as an enzyme if you 
                use an enzyme-free prep, e.g. Omin-C.">
                <option value="preconfigured">Preconfigured restriction enzymes</option>
                <option value="specific">Enter a specific sequence</option>
            </param>
            <when value="preconfigured">
                <param name="preconfigured_enzymes" type="select" multiple="true" label="Preconfigured enzymes">
                    <option value="dovetail">Dovetail Chicago, Dovetail Hi-C or Phase: GATC</option>
                    <option value="arima1">Arima Hi-C 1.0: GATC, GANTC</option>
                    <option value="arima2">Arima Hi-C 2.0: GATC, GANTC, CTNAG, TTAA</option>
                </param>
            </when>
            <when value="specific">
                <param name="manual_enzyme" argument="-e" type="text" label="Restriction enzyme sequence(s)"
                    help="Restriction enzyme sequence. If multiple were used, include all as a comma separated list without spaces (ex. 'GATC,AAGCTT').">
                    <validator type="expression" message="Only alphabetical letters and the comma can be used in to define restriction enzym sequences.">value.replace(',', '').isalpha()</validator>
                </param>
            </when>
        </conditional>
        <param name="iter" argument="-i" type="integer" min="0" max="20" label="Iterations" optional="true" 
            help="SALSA will scaffold through sequential iterations. The default number of iterations is 3. Increasing the number of iterations will 
                potentially increase the number of joins, however it could also introduce additional misjoins"/>
    </inputs>
    <outputs>
        <data name="scaffolds_fasta" format="fasta" from_work_dir="out/scaffolds_FINAL.fasta" label="${tool.name} on ${on_string}: FASTA assembly"/>
        <data name="scaffolds_agp" format="tabular" from_work_dir="out/scaffolds_FINAL.agp" label="${tool.name} on ${on_string}: agp output"/>
    </outputs>
    <tests>
        <test>
            <param name="fasta_in" value="test.fasta"/>
            <param name="length" value="test.fai"/>
            <param name="bed_file" value="test.bed"/>
            <param name="gfa_file" value="test.gfa1"/>
            <conditional name="enzyme_conditional">
                <param name="enzyme_options" value="specific"/>
                <param name="manual_enzyme" value="GATC,GANTC"/> 
            </conditional>
            <param name="enzyme" value="GATC,GANTC"/> 
            <param name="cutoff" value="1000"/>
            <param name="iter" value="3"/>
            <output name="scaffolds_fasta" file="out.fasta"/>
            <output name="scaffolds_agp" file="out.agp"/>
        </test>
        <!--Test manual enzyme-->
        <test>
            <param name="fasta_in" value="test.fasta"/>
            <param name="bed_file" value="test.bed"/>
            <param name="gfa_file" value="test.gfa1"/>
            <conditional name="enzyme_conditional">
                <param name="enzyme_options" value="specific"/>
                <param name="manual_enzyme" value="GATC,GANTC"/> 
            </conditional>
            <param name="cutoff" value="1000"/>
            <param name="iter" value="3"/>
            <output name="scaffolds_fasta" file="out.fasta"/>
            <output name="scaffolds_agp" file="out.agp"/>
        </test>
        <!--Test predefined enzyme-->
        <test>
            <param name="fasta_in" value="test.fasta"/>
            <param name="bed_file" value="test.bed"/>
            <param name="gfa_file" value="test.gfa1"/>
            <conditional name="enzyme_conditional">
                <param name="enzyme_options" value="preconfigured"/>
                <param name="preconfigured_enzymes" value="arima1"/> 
            </conditional>
            <param name="cutoff" value="1000"/>
            <param name="iter" value="3"/>
            <output name="scaffolds_fasta" file="out.fasta"/>
            <output name="scaffolds_agp" file="out.agp"/>
        </test>
    </tests>
    <help><![CDATA[
.. class:: infomark

**Purpose**

SALSA (Simple AssembLy ScAffolder) is a scaffolding tool based on a computational method that exploits the genomic proximity
information in Hi-C data sets for long range scaffolding of de novo genome assemblies.

----

.. class:: infomark

**Mapping reads**

To start the scaffolding, first step is to map reads to the assembly. We recommend using `BWA <https://usegalaxy.eu/root?tool_id=toolshed.g2.bx.psu.edu/repos/devteam/bwa/bwa_mem/0.7.17.2>`_ 
or `BOWTIE2 <https://usegalaxy.eu/root?tool_id=toolshed.g2.bx.psu.edu/repos/devteam/bowtie2/bowtie2/2.4.2+galaxy0>`_ aligner to map reads. The read mapping generates a bam file. SALSA requires 
BED file as the input. This can be done using the bamToBed command from the `Bedtools package <http://bedtools.readthedocs.io/en/latest/>`_. Also, SALSA requires BED files to be sorted by the 
read name, rather than the alignment coordinates. Once you have bam file, you can run following commands to get the bam file needed as an input to SALSA.

Since Hi-C reads and alignments contain experimental artifacts, the alignments needs some postprocessing. To align and postprocess 
the alignments, you can use the pipeline released by Arima Genomics which can be found in the `GitHub repository <https://github.com/ArimaGenomics>`_.

Additional information on how to generate/filter the bam `here <https://github.com/marbl/SALSA#mapping-reads>`_.

    ]]></help>
    <citations>
        <citation type="doi">10.1101/261149</citation>
        <citation type="doi">10.1186/s12864-017-3879-z</citation>
    </citations>
</tool>