Mercurial > repos > iuc > colibread_mapsembler2

<tool id="mapsembler2" name="Mapsembler2" version="2.2.4+galaxy0" profile="@PROFILE@">
    <description>is a targeted assembly software</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <requirements>
        <requirement type="package" version="2.2.4">mapsembler2</requirement>
    </requirements>
    <command><![CDATA[
        #import re
        #set $starter_filename = re.sub('[^\w_]', '_', $s.element_identifier) + ".fasta"
        #if $s.ext.endswith('.gz'):
            gunzip -c '${s}' > '${starter_filename}' &&
        #else:
            ln -sf '${s}' '${starter_filename}' &&
        #end if
        #set $samples = []
        #for $input in $r
            #set $base_filename = re.sub('[^\w_]', '_', $input.element_identifier)
            @single_reads@
            #silent $samples.append($filename)
        #end for
        run_mapsembler2_pipeline.sh
        -s '${starter_filename}'
        -r "${ ' '.join(['%s' % read for read in $samples]) }"
        -t ${t}
        -k ${k}
        -c ${c}
        -d ${d}
        -g ${g}
        -f ${f}
        -x ${x}
        -y ${y}
    ]]></command>
    <inputs>
        <!-- Input data files -->
        <param argument="-s" type="data" format="fasta,fasta.gz" label="Starters" help="Set of input sequences" />
        <param argument="-r" type="data" multiple="true" format="fasta,fasta.gz,fastq,fastq.gz" label="List of reads" />
        <param argument="-t" type="select" label="Select your output extension type">
            <option value="1">a strict sequence</option>
            <option value="2">a consensus sequence</option>
        </param>
        <param argument="-k" type="integer" label="Size of kmers" value="31" help="Set the length of used kmers. Must fit the compiled value. Only uneven number" />
        <param argument="-c" type="integer" label="Minimal coverage" value="5" help="Set the minimal coverage: Used by Phaser (don't use kmers with lower coverage) "/>
        <param argument="-d" type="integer" label="Number of authorized substitutions" value="1" help="Set the number of authorized substitutions used while mapping reads on finding SNPs"/>
        <param argument="-g" type="integer" label="Estimated genome size" value="10000000" help="Used only to control memory usage. e.g.3 billion (3000000000) uses 4Gb of RAM." />
        <param argument="-f" type="select" label="Process of search" help="Set the process of search in the graph" >
            <option value="1">Breadth</option>
            <option value="2">Depth</option>
        </param>
        <param argument="-x" type="integer" label="Max length of nodes" value="40" help="Set the maximal length of nodes"/>
        <param argument="-y" type="integer" label="Max depth of nodes" value="10000" help="Set the maximal depth of the graph"/>
    </inputs>
    <outputs>
        <data name="fasta" from_work_dir="res_k_*.fasta" format="fasta" label="Assembly with ${tool.name} on $on_string"/>
        <data name="coherent" from_work_dir="res_coherent_*.fasta" format="fasta" label="Coherent with ${tool.name} on $on_string" hidden="true"/>
        <data name="uncoherent" from_work_dir="res_uncoherent_*.fasta" format="fasta" label="Uncoherent with ${tool.name} on $on_string" hidden="true"/>
        <data name="extremities" from_work_dir="starter_extremities.fa" format="fasta" label="Starter extremities with ${tool.name} on $on_string" hidden="true"/>
    </outputs>
    <tests>
        <test>
            <param name="s" value="mapsembler2/starter.fa.gz" ftype="fasta.gz"/>
            <param name="r" value="mapsembler2/reads1.gz,mapsembler2/reads2.gz" ftype="fasta.gz"/>
            <param name="t" value="2"/>
            <output name="fasta" file="mapsembler2/assembly.fa"/>
        </test>
        <test>
            <param name="s" value="mapsembler2/starter.fa.gz" ftype="fasta.gz"/>
            <param name="r" value="mapsembler2/reads2.gz,mapsembler2/reads1.gz" ftype="fasta.gz"/>
            <param name="t" value="1"/>
            <output name="fasta" file="mapsembler2/assembly_2.fa"/>
            <output name="coherent" file="mapsembler2/coherent.fa"/>
            <output name="extremities" file="mapsembler2/starter_extremities.fa"/>
        </test>
    </tests>
    <help><![CDATA[

**Description**

Mapsembler2 is a targeted assembly software. It takes as input a set of NGS raw reads (fasta or fastq, gzipped or not) and a set of input sequences (starters). It first determines if each starter is read-coherent, e.g. whether reads confirm the presence of each starter in the original sequence. Then for each read-coherent starter, Mapsembler2 outputs its sequence neighborhood as a linear sequence or as a graph, depending on the user choice.
Mapsembler2 may be used for (not limited to):

· Validate an assembled sequence (input as starter), e.g. from a de Bruijn graph assembly where read-coherence was not enforced.

· Checks if a gene (input as starter) has an homolog in a set of reads.

· Checks if a known enzyme is present in a metagenomic NGS read set.

· Enrich unmappable reads by extending them, possibly making them mappable.

· Checks what happens at the extremities of a contig.

· Remove contaminants or symbiont reads from a read set

-------

**Web site**

http://colibread.inria.fr/mapsembler2/


    ]]></help>
    <expand macro="citations">
        <citation type="doi">10.1186/1471-2105-13-48</citation>
    </expand>

</tool>
author	iuc
date	Thu, 06 May 2021 12:46:55 +0000
parents	00fd0b4274fd
children