view progressivemauve.xml @ 1:bd1901839519 draft

planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author cpt
date Mon, 05 Jun 2023 02:46:06 +0000
parents
children faa752f96ab9
line wrap: on
line source

<tool id="progressivemauve" name="progressiveMauve" version="19.1.0.0" profile="16.04">
    <description>constructs multiple genome alignments</description>
    <macros>
        <import>macros.xml</import>
        <import>cpt-macros.xml</import>
    </macros>
    <requirements>
        <requirement type="package">progressivemauve</requirement>
    </requirements>
    <command detect_errors="aggressive"><![CDATA[
## Symlink files in with correct extensions
#for $file in '$sequences':
    ln -s '$file' `basename '$file'`;
#end for

progressiveMauve
## Input Options

#if $apply_backbone:
    --apply-backbone='$apply_backbone'
#end if
--island-gap-size='$island_gap_size'
'$mums'

#if $seed_weight:
    --seed-weight='$seed_weight'
#end if

#if $max_gapped_aligner_length:
    --max-gapped-aligner-length='$max_gapped_aligner_length'
#end if

#if $match_input:
    --match-input='$match_input'
#end if

'$collinear'
--scoring-scheme='$scoring_scheme'
'$no_weight_scaling'

--max-breakpoint-distance-scale='$max_breakpoint_distance_scale'
--conservation-distance-scale='$conservation_distance_scale'
'$skip_refinement'
'$skip_gapped_alignment'

#if $bp_dist_estimate_min_score:
    --bp-dist-estimate-min-score='$bp_dist_estimate_min_score'
#end if

#if $gap_open:
    --gap-open='$gap_open'
#end if

#if $gap_extend:
    --gap-extend='$gap_extend'
#end if

#if $weight:
    --weight='$weight'
#end if

#if $min_scaled_penalty:
    --min-scaled-penalty='$min_scaled_penalty'
#end if

--hmm-p-go-homologous='$hmm_p_go_homologous'
--hmm-p-go-unrelated='$hmm_p_go_unrelated'
--hmm-identity='$hmm_identity'

'$seed_family'
'$solid_seeds'
'$coding_seeds'
'$no_recursion'
'$disable_backbone'

## Outputs
--output='$output'
#if $output_guide_tree:
    --output-guide-tree='$output_guide_tree_file'
#end if

#if $output_backbone:
    --backbone-output='$output_backbone_file'
#end if

## Sequences
#for file in $sequences:
    `basename "${file}"`
#end for

]]></command>
    <inputs>
        <param type="data" format="fasta" name="sequences" multiple="True" label="Select sequences to align" help="in fasta format"/>
        <param type="data" format="xmfa" label="Apply Backbone" name="apply_backbone" optional="True" help="Read an existing sequence alignment in XMFA format and apply backbone statistics to it (--apply-backbone)"/>
        <param type="integer" label="Island gap size" value="20" name="island_gap_size" help="Alignment gaps above this size in nucleotides are considered to be islands (--island-gap-size)"/>
        <param type="boolean" truevalue="--disable-backbone" falsevalue="" name="disable_backbone" label="Disable backbone" help="Disable backbone detection (--disable-backbone)"/>
        <param type="boolean" truevalue="True" falsevalue="" name="output_guide_tree" label="Output Guide Tree" help="Write out the guide tree used for alignment to a file (--output-guide-tree)"/>
        <param type="boolean" truevalue="True" falsevalue="" name="output_backbone" label="Output Backbone" help="Write out the backbone to a file (--backbone-output)"/>
        <param type="boolean" truevalue="--mums" falsevalue="" label="MUMs" name="mums" help="Find MUMs only, do not attempt to determine locally collinear blocks (LCBs) (--mums)"/>
        <param type="integer" label="Seed weight" name="seed_weight" value="0" optional="True" help="Use the specified seed weight for calculating initial anchors (--seed-weight)"/>
        <param type="data" format="tabular" label="Match Input" name="match_input" optional="True" help="Use specified match file instead of searching for matches (--match-input)"/>
        <!--<param type="file" label="input-id-matrix" help="An identity matrix describing similarity among all pairs of input sequences/alignments (- -input-id-matrix)" />-->
        <param type="integer" label="Max gapped aligner length" value="0" optional="True" name="max_gapped_aligner_length" help="Maximum number of base pairs to attempt aligning with the gapped aligner (--max-gapped-aligner-length)"/>
        <param type="data" format="nhx" label="input-guide-tree" optional="True" name="input_guide_tree" help="A phylogenetic guide tree in Newick format that describes the order in which sequences will be aligned (--input-guide-tree)"/>
        <param type="boolean" truevalue="--collinear" falsevalue="" label="Collinear inputs" name="collinear" help="Assume that input sequences are collinear--they have no rearrangements (--collinear)"/>
        <param type="select" label="Scoring scheme" name="scoring_scheme" help="Selects the anchoring score function. (--scoring-scheme)">
            <option value="sp" selected="True">Extant sum-of-pairs (sp)</option>
            <option value="ancestral_sp">Sum-of-pairs + Ancestral (ancestral_sp)</option>
            <option value="ancestral">Ancestral (ancestral)</option>
        </param>
        <param type="boolean" truevalue="--no-weight-scaling" falsevalue="" label="No weight scaling" name="no_weight_scaling" help="Don't scale LCB weights by conservation distance and breakpoint distance (--no-weight-scaling)"/>
        <param type="float" min="0" max="1" label="max-breakpoint-distance-scale" value="0.5" name="max_breakpoint_distance_scale" help="Set the maximum weight scaling by breakpoint distance. (--max-breakpoint-distance-scale)"/>
        <param type="float" min="0" max="1" label="conservation-distance-scale" value="0.5" name="conservation_distance_scale" help="Scale conservation distances by this amount. (--conservation-distance-scale)"/>
        <param type="boolean" truevalue="--skip-refinement" falsevalue="" label="Skip refinement" name="skip_refinement" help="Do not perform iterative refinement (--skip-refinement)"/>
        <param type="boolean" truevalue="--skip-gapped-alignment" falsevalue="" label="Skip gapped alignment" name="skip_gapped_alignment" help="Do not perform gapped alignment (--skip-gapped-alignment)"/>
        <param type="integer" label="BP dist estimate min score" name="bp_dist_estimate_min_score" value="0" optional="True" help="Minimum LCB score for estimating pairwise breakpoint distance (--bp-dist-estimate-min-score)"/>
        <param type="integer" label="Gap open" name="gap_open" value="0" optional="True" help="Gap open penalty (--gap-open)"/>
        <param type="select" label="Repeat penalty" name="repeat_penalty" help="Sets whether the repeat scores go negative or go to zero for highly repetitive sequences. (--repeat-penalty)">
            <option value="negative" selected="True">Negative</option>
            <option value="zero">Zero</option>
        </param>
        <param type="integer" label="Gap extend" name="gap_extend" value="0" optional="True" help="Gap extend penalty (--gap-extend)"/>
        <!--<param type="data" label="Substitution matrix" -->
        <!--help="Nucleotide substitution matrix in NCBI format (- -substitution-matrix)" />-->
        <param type="integer" label="Weight" name="weight" value="0" optional="True" help="Minimum pairwise LCB score (--weight)"/>
        <param type="integer" label="Min scaled penalty" name="min_scaled_penalty" value="0" optional="True" help="Minimum breakpoint penalty after scaling the penalty by expected divergence (--min-scaled-penalty)"/>
        <param type="float" label="HMM p go homologous" name="hmm_p_go_homologous" min="0" max="1" value="0.00001" help="Probability of transitioning from the unrelated to the homologous state (--hmm-p-go-homologous)"/>
        <param type="float" label="HMM p go unrelated" name="hmm_p_go_unrelated" min="0" max="1" value="0.000000001" help="Probability of transitioning from the homologous to the unrelated state (--hmm-p-go-unrelated)"/>
        <param type="float" label="HMM identity" name="hmm_identity" min="0" max="1" value="0.7" help="Expected level of sequence identity among pairs of sequences(--hmm-identity)"/>
        <param type="boolean" truevalue="--seed-family" falsevalue="" label="Seed family" name="seed_family" help="Use a family of spaced seeds to improve sensitivity (--seed-family)"/>
        <param type="boolean" truevalue="--solid-seeds" falsevalue="" label="Solid seeds" name="solid_seeds" help="Use solid seeds. Do not permit substitutions in anchor matches. (--solid-seeds)"/>
        <param type="boolean" truevalue="--coding-seeds" falsevalue="" label="Coding seeds" name="coding_seeds" help="Use coding pattern seeds. Useful to generate matches coding regions with 3rd codon position degeneracy. (--coding-seeds)"/>
        <param type="boolean" truevalue="--no-recursion" falsevalue="" label="No recursion" name="no_recursion" help="Disable recursive anchor search (--no-recursion)"/>
    </inputs>
    <outputs>
        <data format="xmfa" name="output" label="${tool.name} alignment of ${on_string}">
            <change_format>
                <when input="mums" value="--mums" format="tabular"/>
            </change_format>
        </data>
        <data format="nhx" name="output_guide_tree_file" label="${tool.name} alignment of ${on_string}: Guide tree">
            <filter>output_guide_tree</filter>
        </data>
        <data format="tabular" name="output_backbone_file" label="${tool.name} alignment of ${on_string}: Backbone">
            <filter>output_backbone</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="sequences" value="phagey.fa,karma.fa"/>
            <output name="output" file="1.xmfa" lines_diff="20"/>
        </test>
        <test>
            <param name="sequences" value="merged.fa"/>
            <output name="output" file="2.xmfa" lines_diff="20"/>
        </test>
        <test>
            <param name="sequences" value="merged.fa"/>
            <param name="output_guide_tree" value="True"/>
            <output name="output" file="3.xmfa" lines_diff="20"/>
            <output name="output_guide_tree_file" file="3.nhx"/>
        </test>
        <test>
            <param name="sequences" value="merged.fa"/>
            <param name="mums" value="True"/>
            <output name="output" file="4.mums" compare="sim_size" delta="1000"/>
        </test>
        <test>
            <param name="sequences" value="merged.fa"/>
            <param name="match_input" value="4.mums"/>
            <output name="output" file="5.xmfa" lines_diff="24"/>
        </test>
    </tests>
    <help><![CDATA[
What it does
============

Mauve is a system for efficiently constructing multiple genome alignments in
the presence of large-scale evolutionary events such as rearrangement and
inversion. Multiple genome alignment provides a basis for research into
comparative genomics and the study of evolutionary dynamics. Aligning whole
genomes is a fundamentally different problem than aligning short sequences.

Mauve has been developed with the idea that a multiple genome aligner should
require only modest computational resources. It employs algorithmic techniques
that scale well in the amount of sequence being aligned. For example, a pair of
Y. pestis genomes can be aligned in under a minute, while a group of 9
divergent Enterobacterial genomes can be aligned in a few hours.


Example Usage
=============

+-----------------------------------+-------------+
| Usage                             | Notes       |
+===================================+=============+
| Align genomes                     |Simply       |
|                                   |select as    |
|                                   |many fasta   |
|                                   |files with   |
|                                   |one or more  |
|                                   |sequences as |
|                                   |necessary    |
+-----------------------------------+-------------+
| Align genomes but also save       |Use the      |
| the guide tree and produce a      |**Output     |
| backbone file                     |Guide Tree** |
|                                   |and **Output |
|                                   |Backbone**   |
|                                   |options      |
+-----------------------------------+-------------+
| Align genomes, but do not         |Use the      |
| detect forced alignment of        |**Disable    |
| unrelated sequences               |backbone**   |
|                                   |option       |
+-----------------------------------+-------------+
| Detect forced alignment of        |Use the      |
| unrelated sequence in the         |**Apply      |
| alignment produced                |Backbone**   |
| in previous example, use          |option and   |
| custom Homology HMM transition    |specify the  |
| parameters.                       |XMFA file    |
|                                   |produced     |
|                                   |in the       |
|                                   |previous     |
|                                   |example      |
+-----------------------------------+-------------+
| Compute ungapped                  |Use the      |
| local-multiple alignments among   |**MUMs**     |
| the input sequences               |option       |
+-----------------------------------+-------------+
| Compute an alignment of the       |Set the      |
| same genomes, using previously    |**Match      |
| computed local-multiple           |Input** to   |
| alignments                        |the tabular  |
|                                   |MUMs file    |
|                                   |produced in  |
|                                   |the previous |
|                                   |example      |
+-----------------------------------+-------------+
| Set a minimum scaled              |Use the      |
| breakpoint penalty to cope with   |**Min Scaled |
| the case where most genomes       |Penalty** and|
| are aligned correctly, but manual |set to a     |
| inspection reveals that           |value like   |
| a divergent genome has too        |5000         |
| many predicted rearrangements.    |             |
+-----------------------------------+-------------+
| Globally align a set of           |Use the      |
| collinear virus                   |**Colinear**,|
| genomes, using seed families      |**Seed       |
| to improve anchoring sensitivity  |Family**     |
| in regions below 70% sequence     |options      |
| identity.                         |             |
+-----------------------------------+-------------+

]]></help>
    <citations>
        <expand macro="citation/progressive_mauve"/>
        <expand macro="citation/mijalisrasche"/>
    </citations>
</tool>