view trimal.xml @ 2:e379c0202766 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/trimal commit f6575973c143041bfbcb8afa12077c77e65e91a5
author iuc
date Wed, 20 Nov 2024 22:30:36 +0000
parents 2a156ec81e7a
children
line wrap: on
line source

<tool id="trimal" name="trimAl" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.0" license="MIT">
    <description>for automated alignment trimming</description>
    <macros>
        <token name="@TOOL_VERSION@">1.5.0</token>
        <token name="@VERSION_SUFFIX@">1</token>
    </macros>
     <edam_topics>
        <edam_topic>topic_3293</edam_topic> <!-- phylogenetics -->
        <edam_topic>topic_0080</edam_topic> <!-- sequence analysis -->
    </edam_topics>
    <edam_operations>
        <edam_operation>operation_3192</edam_operation> <!--sequence trimming -->
    </edam_operations>
    <xrefs>
        <xref type="bio.tools">trimal</xref>
    </xrefs>
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">trimal</requirement>
    </requirements>
    <command detect_errors="aggressive"><![CDATA[

trimal -in '$in'
-out '$trimmed_output'
${out_format_selector}

#unless $trimming_mode.mode_selector == "manual"
  -htmlout '$html_summary'
#end unless

#if $trimming_mode.mode_selector == "custom"
  -gapthreshold $trimming_mode.gapthreshold
  -simthreshold $trimming_mode.simthreshold
  -cons $trimming_mode.cons

  #if $trimming_mode.w
    -w $trimming_mode.w
  #else
    #if $trimming_mode.gw
      -gw $trimming_mode.gw
    #end if
    #if $trimming_mode.sw
      -sw $trimming_mode.sw
    #end if
    #if $trimming_mode.cw
      -cw $trimming_mode.cw
    #end if
  #end if
#else if $trimming_mode.mode_selector == "manual"
  #if $trimming_mode.selectcols
    -selectcols { $trimming_mode.selectcols }
  #end if
  #if $trimming_mode.selectseqs
    -selectseqs { $trimming_mode.selectseqs }
  #end if
#else
    $trimming_mode.mode_selector
#end if

#if $resoverlap
  -resoverlap $resoverlap
#end if
#if $seqoverlap
  -seqoverlap $seqoverlap
#end if

#if $statistics
-sgc
-sgt
-ssc
-sst
-sident
-soverlap
| tee '$statsout'
#end if


    ]]></command>
    <inputs>
        <param argument="-in" type="data" format="fasta,clustal,pir,phylip,nexus,mega,txt" label="Alignment file (clustal, fasta, NBRF/PIR, nexus, phylip3.2, phylip)" />
        <conditional name="trimming_mode">
            <param name="mode_selector" type="select" label="Select trimming mode from the list">
                <option value="-nogaps">nogaps - remove all positions with gaps in the alignment.</option>
                <option value="-noallgaps">noallgaps - remove columns composed only by gaps.</option>
                <option value="-gappyout">gappyout - only uses information based on gaps' distribution.</option>
                <option value="-strict">strict - combine gappyout trimming with subsequent trimming based on an automatically selected similarity threshold. </option>
                <option value="-strictplus">strictplus - very similar to the strict method but the final step of the algorithm is slightly different. </option>
                <option value="-automated1">automated1 - heuristic approach to determine the optimal automatic method for trimming a given alignment. </option>
                <option value="custom">custom mode - define trimming parameters yourself.</option>
                <option value="manual">manual mode - specify columns or sequences to remove</option>
            </param>
            <when value="-nogaps" />
            <when value="-noallgaps"/>
            <when value="-gappyout"/>
            <when value="-strict"/>
            <when value="-strictplus"/>
            <when value="-automated1"/>
            <when value="manual">
                <param argument="-selectcols" type="text" optional="true" label="Columns to be removed from alignment" help="Example: '2,4,8-12'; Range: [0 - (Number of Columns - 1)]">
                    <validator type="regex" message="must be comma-separated list of whole numbers">(\d+)([,-]\d+)*</validator>
                </param>
                <param argument="-selectseqs" type="text" optional="true" label="Sequences to be removed from alignment" help="Example: '2,4,8-12'; Range: [0 - (Number of Sequences - 1)]">
                    <validator type="regex" message="must be comma-separated list of whole numbers">(\d+)([,-]\d+)*</validator>
                </param>
            </when>
            <when value="custom">
                <param argument="-gapthreshold" type="float" optional="true" value="0.9" min="0.0" max="1.0" label="Gap threshold" help="1 - (fraction of sequences with a gap allowed)."/>
                <param argument="-simthreshold" type="float" optional="true" value="0.9" min="0.0" max="1.0" label="Similarity threshold" help="Minimum average similarity allowed."/>
                 <param argument="-conthreshold" type="float" optional="true" min="0" max="100" label="Consistency Threshold. Minimum consistency value required" help="Range: [0 - 1]. Not allowed in combination with inputfile."/>
                <param argument="-cons" type="integer" optional="true" value="50" min="0" max="100" label="Minimum conservance percentage" help="Minimum percentage of the positions in the original alignment to conserve."/>
                <param argument="-clusters" type="integer" optional="true" min="0" label="Clusters. Get the most Nth representatives sequences from a given alignment." help="Range:  [1 - (Number of sequences)]"/>
                <param argument="-maxidentity" type="float" optional="true" min="0" max="1" label="Max Identity. Get the representatives sequences for a given identity threshold. " help="Range: [0 - 1]"/>

                <param argument="-w" type="integer" optional="true" min="0" label="Window frame size - general" help="score of position i is the average of the window (i - n) to (i + n). Will override specific sizes set below"/>
                <param argument="-gw" type="integer" optional="true" min="0" label="Window frame size applied to Gaps" help="score of position i is the average of the window (i - n) to (i + n)."/>
                <param argument="-sw" type="integer" optional="true" min="0" label="Window frame size applied to Similarity" help="score of position i is the average of the window (i - n) to (i + n)."/>
                <param argument="-cw" type="integer" optional="true" min="0" label="Window frame size applied to Consistency" help="score of position i is the average of the window (i - n) to (i + n)."/>


            </when>
        </conditional>
        <param argument="-resoverlap" type="float" optional="true" min="0" max="1" label="Residue overlap" help="Minimum overlap of a positions with other positions in the column to be considered a “good position”. Range: [0-1]. Must be used in combination with Sequence overlap."/>
        <param argument="-seqoverlap" type="integer" optional="true" min="0" max="100" label="Sequence overlap" help="Minimum percentage of “good positions” that a sequence must have in order to be conserved. Range: [0-100]. Must be used in combination with Residue overlap."/>

        <param name="out_format_selector" type="select" label="Select trimmed alignment output format from the list">
            <option value="" selected="true">Same as input (default)</option>
            <option value="-clustal">CLUSTAL format</option>
            <option value="-fasta">FASTA format</option>
            <option value="-fasta_m10">FASTA format. Sequences name length up to 10 characters.</option>
            <option value="-nbrf">NBRF/PIR format</option>
            <option value="-nexus">NEXUS format</option>
            <option value="-mega">MEGA format</option>
            <option value="-phylip">PHYLIP/PHYLIP4 format</option>
            <option value="-phylip_m10">PHYLIP/PHYLIP4 format. Sequences name length up to 10 characters</option>
            <option value="-phylip_paml">PHYLIP format compatible with PAML</option>
            <option value="-phylip_paml_m10">PHYLIP format compatible with PAML. Sequences name length up to 10 characters.</option>
            <option value="-phylip3.2">PHYLIP3.2 format</option>
            <option value="-phylip3.2_m10">PHYLIP3.2 format. Sequences name length up to 10 characters.</option>
        </param>
        <param name="statistics" type="boolean" checked="false" label="Output trimming statistics?"/>
    </inputs>
    <outputs>
        <data name="trimmed_output" format_source="in" label="${tool.name} on ${on_string}: Trimmed alignment.">
            <change_format>
                <when input="out_format_selector" value="-fasta" format="fasta" />
                <when input="out_format_selector" value="-fasta_m10" format="fasta" />
                <when input="out_format_selector" value="-phylip" format="phylip" />
                <when input="out_format_selector" value="-phylip_m10" format="phylip" />
                <when input="out_format_selector" value="-phylip_paml" format="phylip" />
                <when input="out_format_selector" value="-phylip_paml_m10" format="phylip" />
                <when input="out_format_selector" value="-phylip3.2" format="phylip" />
                <when input="out_format_selector" value="-phylip3.2_m10" format="phylip" />
                <when input="out_format_selector" value="-clustal" format="clustal" />
                <when input="out_format_selector" value="-mega" format="mega" />
                <when input="out_format_selector" value="-nbrf" format="pir" />
                <when input="out_format_selector" value="-nexus" format="nexus" />
            </change_format>
        </data>
        <data name="html_summary" format="html" label="${tool.name} on ${on_string}: HTML report">
            <filter> trimming_mode['mode_selector'] != 'manual' </filter>
        </data>
        <data name="statsout" format="txt" label="${tool.name} on ${on_string}: statistics">
            <filter> statistics </filter>
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="2"><!-- test automated trim setting -->
            <param name="in" value="example.009.AA.fasta"/>
            <param name="mode_selector" value="-gappyout" />
            <param name="out_format_selector" value="-mega" />
            <output name="trimmed_output" file="trimmed_example.009.AA.mega" ftype="mega" lines_diff="2"/>
            <output name="html_summary" file="trimmed_example.009.AA.html" ftype="html"/>
        </test>
        <test expect_num_outputs="2"><!-- test custom trim setting -->
            <param name="in" value="example.009.AA.fasta"/>
            <param name="mode_selector" value="custom" />
            <param name="gapthreshold" value="0.5" />
            <param name="simthreshold" value="0.5" />
            <param name="cons" value="5" />
            <param name="out_format_selector" value="-phylip_paml_m10" />
            <output name="trimmed_output" file="custom_trimmed_example.009.AA.phy" ftype="phylip"/>
            <output name="html_summary" file="custom_trimmed_example.009.AA.html" ftype="html"/>
        </test>
        <test expect_num_outputs="2"><!-- test additionalcustom trim setting -->
            <param name="in" value="example.009.AA.fasta"/>
            <param name="mode_selector" value="custom" />
            <param name="gapthreshold" value="0.75" />
            <param name="simthreshold" value="0.8" />
            <param name="cons" value="5" />
            <param name="conthreshold" value="0.5" />
            <param name="clusters" value="4" />
            <param name="maxidentity" value="0.8" />
            <param name="out_format_selector" value="-nexus" />
            <output name="trimmed_output" file="custom_trimmed_example.009.AA.nexus" ftype="nexus"/>
            <output name="html_summary" file="custom_trimmed_example.009.AA.html" ftype="html"/>
        </test>
         <test expect_num_outputs="2"><!-- test window settings -->
            <param name="in" value="example.005.AA.fasta"/>
            <param name="mode_selector" value="custom" />
            <param name="gapthreshold" value="0.5" />
            <param name="simthreshold" value="0.5" />
            <param name="conthreshold" value="0.5" />
            <param name="gw" value="3"/>
            <param name="sw" value="4"/>
            <param name="cw" value="5"/>
            <output name="trimmed_output" file="custom_trimmed_window_example.005.AA.fasta" ftype="fasta"/>
            <output name="html_summary" file="custom_trimmed_window_example.009.AA.html" ftype="html"/>
        </test>
        <test expect_num_outputs="3"><!-- test with phylip input and output statistics -->
            <param name="in" value="example.002.AA.phy" ftype="phylip"/>
            <param name="mode_selector" value="-strict"/>
            <param name="statistics" value="true"/>
            <output name="trimmed_output" file="example.002.AA.out.phy" ftype="phylip" lines_diff="0"/>
            <output name="html_summary" file="example.002.AA.report.html" ftype="html" lines_diff="0"/>
            <output name="statsout" file="example.002.AA.stats.txt" ftype="txt" lines_diff="0"/>
        </test>
        <test expect_num_outputs="2"><!-- test with overlap trimming -->
            <param name="in" value="example.005.AA.fasta" ftype="fasta"/>
            <param name="mode_selector" value="-nogaps"/>
            <param name="resoverlap" value="0.8"/>
            <param name="seqoverlap" value="6"/>
            <output name="trimmed_output" file="example.005.AA.out.overlaptrim.fasta" ftype="fasta" lines_diff="0"/>
            <output name="html_summary" file="example.005.AA.report.overlaptrim.html" ftype="html" lines_diff="0"/>
        </test>
        <test expect_num_outputs="1"><!-- test with manual trimming -->
            <param name="in" value="example.009.AA.fasta" ftype="fasta"/>
            <param name="mode_selector" value="manual"/>
            <param name="selectcols" value="2,4,7-9"/>
            <param name="selectseqs" value="1,3,5-7"/>
            <output name="trimmed_output" file="example.009.AA.out.notrim.fasta" ftype="fasta" lines_diff="0"/>
        </test>
    </tests>
    <help><![CDATA[

TrimAl is a tool for the automated removal of spurious sequences or poorly aligned regions from a multiple sequence alignment.

TrimAl can consider several parameters, alone or in multiple combinations, in order to select the most-reliable positions in the alignment.
These include the proportion of sequences with a gap, the level of residue similarity and, if several alignments for the same set of sequences are provided,
the consistency level of columns among alignments. Moreover, trimAl allows to manually select a set of columns and sequences to be removed from the alignment.

TrimAl implements a series of automated algorithms that trim the alignment searching for optimum thresholds based on inherent characteristics
of the input alignment, to be used so that the signal-to-noise ratio after alignment trimming phase is increased. Learn more about the available trimming modes here: https://trimal.readthedocs.io/en/latest/algorithms.html

Among trimAl's additional features, trimAl allows:

- getting the complementary alignment (columns that were trimmed),
- to compute statistics from the alignment,
- to select the output file format,
- to get a summary of trimAl's trimming in HTML and SVG formats,
- and many other options.

TrimAl webpage: https://trimal.readthedocs.io

License
-------
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, the last available version.
    ]]></help>
    <citations>
        <citation type="doi">10.1093/bioinformatics/btp348</citation>
    </citations>
</tool>