Mercurial > repos > rnateam > mafft
comparison mafft.xml @ 15:bf28a8cff401 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/mafft commit 2f6456c314c010fd73f5eeaf809a9afce47353af
author | bgruening |
---|---|
date | Wed, 20 Mar 2024 07:34:52 +0000 |
parents | 6f28e90db932 |
children | 8e649f27aa0d |
comparison
equal
deleted
inserted
replaced
14:6f28e90db932 | 15:bf28a8cff401 |
---|---|
1 <?xml version="1.0" encoding="UTF-8"?> | 1 <?xml version="1.0" encoding="UTF-8"?> |
2 <tool id="rbc_mafft" name="MAFFT" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | 2 <tool id="rbc_mafft" name="MAFFT" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> |
3 <description>Multiple alignment program for amino acid or nucleotide sequences</description> | 3 <description>Multiple alignment program for amino acid or nucleotide sequences</description> |
4 <macros> | 4 <macros> |
5 <import>macros.xml</import> | 5 <import>macros.xml</import> |
6 </macros> | 6 </macros> |
7 <expand macro="biotools"/> | 7 <expand macro="biotools"/> |
8 <expand macro="requirements" /> | 8 <expand macro="requirements" /> |
9 <stdio> | 9 <stdio> |
10 <exit_code range="1:" level="fatal" description="Error occurred. Please check Tool Standard Error" /> | 10 <exit_code range="1:" level="fatal" description="Error occurred. Please check Tool Standard Error" /> |
11 <exit_code range=":-1" level="fatal" description="Error occurred. Please check Tool Standard Error" /> | 11 <exit_code range=":-1" level="fatal" description="Error occurred. Please check Tool Standard Error" /> |
12 </stdio> | 12 </stdio> |
13 <version_command> <![CDATA[ | 13 <version_command><![CDATA[mafft --version]]></version_command> |
14 mafft --version | 14 <command><![CDATA[ |
15 ]]> | 15 ## Concatenate all input datasets no matter how they were provided |
16 </version_command> | 16 bash inputs.sh && |
17 <command> | 17 |
18 <![CDATA[ | 18 ## Count total number of sequences across input datasets |
19 | 19 ## Can't do this on the concatenated input data prepared above because it's |
20 #if $cond_flavour.flavourType == 'custom' | 20 ## just a regular file and we don't have Galaxy-generated metadata for it. |
21 #if $cond_flavour.dist_flavour.distance_method == '--fastapair' | 21 #set sequence_count = 0 |
22 export FASTA_4_MAFFT=`which fasta36`; | 22 #if $input.mapping == "implicit" |
23 #for $batch in $input.batches: | |
24 #set sequence_count += int($batch.inputs.metadata.sequences) | |
25 #end for | |
26 #elif $input.mapping == "merge" | |
27 #for $batch in $input.batches: | |
28 #for $dataset in $batch.inputs: | |
29 #set sequence_count += int($dataset.metadata.sequences) | |
30 #end for | |
31 #end for | |
32 #end if | |
33 | |
34 ## For those cases in which MAFFT needs fasta3, set an env variable to make it | |
35 ## find the executable. Necessary because the current version of MAFFT still | |
36 ## expects a fasta34 executable in path, but we bundle a newer version. | |
37 #if $flavour.type == "custom" | |
38 #if $flavour.guidetree.guidetree_generation == "original" | |
39 #if $flavour.guidetree.dist_flavour.distance_method == "--fastapair" | |
40 export FASTA_4_MAFFT=`which @FASTA3_EXEC@` && | |
41 #end if | |
42 #elif $flavour.guidetree.guidetree_generation == "parttree" | |
43 #if $flavour.guidetree.parttree_selection.parttree_option == "--fastaparttree" | |
44 export FASTA_4_MAFFT=`which @FASTA3_EXEC@` && | |
45 #end if | |
46 #end if | |
47 #end if | |
48 | |
49 ## groupsize warning | |
50 #if $flavour.type == "custom" | |
51 #if $flavour.guidetree.guidetree_generation == "parttree" | |
52 #if $flavour.guidetree.parttree_selection.groupsize > $sequence_count | |
53 echo "WARNING = Chosen groupsize number larger than number of input sequences. Not recommended for MAFFT." && | |
54 #end if | |
55 #end if | |
56 #end if | |
57 | |
58 ## run MAFFT with predefined MSA flavours or custom settings | |
59 #if $flavour.type == "custom" | |
60 mafft | |
61 #if $flavour.guidetree.guidetree_generation == "original" | |
62 #if $flavour.guidetree.dist_flavour.distance_method == "--6merpair" | |
63 --6merpair | |
64 --retree $flavour.guidetree.dist_flavour.retree | |
65 #elif $flavour.guidetree.dist_flavour.distance_method == "--globalpair" | |
66 --globalpair | |
67 --weighti $flavour.guidetree.dist_flavour.weighti | |
68 #if $flavour.guidetree.dist_flavour.treat_unrelated_segments.unalignlevel > 0 | |
69 --allowshift --unalignlevel $flavour.guidetree.dist_flavour.treat_unrelated_segments.unalignlevel | |
23 #end if | 70 #end if |
71 $flavour.guidetree.dist_flavour.treat_unrelated_segments.leavegappyregion | |
72 #elif $flavour.guidetree.dist_flavour.distance_method == "--localpair" | |
73 --localpair | |
74 --weighti $flavour.guidetree.dist_flavour.weighti | |
75 --lop $flavour.guidetree.dist_flavour.lop | |
76 --lep $flavour.guidetree.dist_flavour.lep | |
77 --lexp $flavour.guidetree.dist_flavour.lexp | |
78 #elif flavour.guidetree.dist_flavour.distance_method == "--genafpair" | |
79 --genafpair | |
80 --weighti $flavour.guidetree.dist_flavour.weighti | |
81 --lop $flavour.guidetree.dist_flavour.lop | |
82 --lep $flavour.guidetree.dist_flavour.lep | |
83 --lexp $flavour.guidetree.dist_flavour.lexp | |
84 --LOP $flavour.guidetree.dist_flavour.LOP | |
85 --LEXP $flavour.guidetree.dist_flavour.LEXP | |
86 #elif $flavour.guidetree.dist_flavour.distance_method == "--fastapair" | |
87 --fastapair | |
88 --weighti $flavour.guidetree.dist_flavour.weighti | |
24 #end if | 89 #end if |
25 | 90 #elif $flavour.guidetree.guidetree_generation == "parttree" |
26 #if $cond_flavour.flavourType != 'custom' | 91 $flavour.guidetree.parttree_selection.parttree_option |
27 $cond_flavour.flavourType | 92 --retree $flavour.guidetree.parttree_selection.retree |
28 #elif $cond_flavour.flavourType == 'custom' | 93 --partsize $flavour.guidetree.parttree_selection.partsize |
29 ### full parameter options | 94 #if $flavour.guidetree.parttree_selection.groupsize != -1 |
30 mafft | 95 --groupsize $flavour.guidetree.parttree_selection.groupsize |
31 $cond_flavour.dist_flavour.distance_method | |
32 #if $cond_flavour.dist_flavour.distance_method == '--6merpair' | |
33 --retree $cond_flavour.dist_flavour.retree | |
34 $cond_flavour.dist_flavour.distance_method.usetree.parttree | |
35 | |
36 #if $cond_flavour.dist_flavour.distance_method.usetree.parttree==--parttree | |
37 $cond_flavour.dist_flavour.distance_method.usetree.treedistance | |
38 $cond_flavour.dist_flavour.distance_method.usetree.partsize | |
39 $cond_flavour.dist_flavour.distance_method.usetree.groupsize | |
40 #end if | |
41 | |
42 #elif $cond_flavour.dist_flavour.distance_method == '--globalpair' | |
43 --weighti $cond_flavour.dist_flavour.weighti | |
44 #elif $cond_flavour.dist_flavour.distance_method == '--localpair' | |
45 --weighti $cond_flavour.dist_flavour.weighti | |
46 --lop $cond_flavour.dist_flavour.lop | |
47 --lep $cond_flavour.dist_flavour.lep | |
48 --lexp $cond_flavour.dist_flavour.lexp | |
49 #elif $cond_flavour.dist_flavour.distance_method == '--genafpair' | |
50 --weighti $cond_flavour.dist_flavour.weighti | |
51 --lop $cond_flavour.dist_flavour.lop | |
52 --lep $cond_flavour.dist_flavour.lep | |
53 --lexp $cond_flavour.dist_flavour.lexp | |
54 --LOP $cond_flavour.dist_flavour.skipLOP | |
55 --EXP $cond_flavour.dist_flavour.skipEXP 1 | |
56 #elif $cond_flavour.dist_flavour.distance_method == '--fastapair' | |
57 --weighti $cond_flavour.dist_flavour.weighti | |
58 #end if | |
59 --maxiterate $cond_flavour.iterations | |
60 $cond_flavour.fft | |
61 $cond_flavour.score | |
62 #end if | 96 #end if |
63 | 97 #end if |
64 ## specify threads to use | 98 ## progressive alignment calculation |
65 --thread \${GALAXY_SLOTS:-1} | 99 --maxiterate $flavour.progressive_alignment_calculation.maxiterate |
66 $datatype | 100 $flavour.progressive_alignment_calculation.fft |
67 --ep $ep | 101 $flavour.progressive_alignment_calculation.noscore |
68 --op $op | 102 #else |
69 | 103 $flavour.type |
70 #if $matrix_condition.matrix == "BLOSUM" | 104 #if $flavour.type == "mafft-ginsi" or "--globalpair" in str($flavour.type) |
71 --bl $matrix_condition.BLOSUM | 105 #if $flavour.treat_unrelated_segments.unalignlevel > 0 |
72 #elif $matrix_condition.matrix == "PAM" | 106 --allowshift --unalignlevel $flavour.treat_unrelated_segments.unalignlevel |
73 --jtt $matrix_condition.PAM | |
74 --tm $matrix_condition.tm | |
75 #elif $matrix_condition.matrix == "custom" | |
76 --aamatrix '$matrix_condition.matrixfile' | |
77 --fmodel $matrix_condition.fmodel | |
78 #end if | 107 #end if |
79 | 108 $flavour.treat_unrelated_segments.leavegappyregion |
80 $reorder | 109 #end if |
81 $getTree | 110 #end if |
82 $outputFormat | 111 |
83 '$inputSequences' > '$outputAlignment'; | 112 ## handle scoring matrix |
84 | 113 $datatype_selection.datatype |
85 #if $getTree == "--treeout" | 114 #if $datatype_selection.datatype != "" |
86 mv '${inputSequences}.tree' '$outputTree'; | 115 #if $datatype_selection.scoring_matrix.type == "custom" |
87 #end if | 116 --aamatrix '$datatype_selection.scoring_matrix.aamatrix' |
88 ]]> | 117 #else |
89 </command> | 118 $datatype_selection.scoring_matrix.type $datatype_selection.scoring_matrix.coefficient |
90 <inputs> | 119 #end if |
91 <param name="inputSequences" type="data" format="fasta" label="Sequences to align" help="Amino acid or nucleotide sequences in FASTA format."/> | 120 $datatype_selection.fmodel |
92 <param name="datatype" type="select" label="Data type"> | 121 ## gap penalties |
93 <option value="">Auto detection</option> | 122 #if $datatype_selection.gap_costs.use_defaults == "no" |
94 <option value="--nuc">Nucleic acids</option> | 123 --ep $datatype_selection.gap_costs.ep --op $datatype_selection.gap_costs.op |
95 <option value="--amino">Amino acids</option> | 124 #end if |
96 </param> | 125 #end if |
97 <conditional name="cond_flavour"> | 126 |
98 <param name="flavourType" type="select" label="MAFFT flavour" help="Run mafft with pre-defined input parameters. Specification of these parameters can be found in the help section."> | 127 |
99 <option value="mafft --auto">auto</option> | 128 ## output options |
100 <option value="mafft-fftns" selected="true">fftns</option> | 129 $reorder |
101 <option value="mafft-fftnsi">fftnsi</option> | 130 $outputFormat |
102 <option value="mafft-nwns">nwns</option> | 131 $treeout |
103 <option value="mafft-nwnsi">nwnsi</option> | 132 |
104 <option value="mafft-einsi">einsi</option> | 133 ## specify threads to use |
105 <option value="mafft-ginsi">ginsi</option> | 134 ## disable multithreading during iterative refinement step for reproducibility |
106 <option value="mafft-linsi">linsi</option> | 135 ## cmp. https://mafft.cbrc.jp/alignment/software/multithreading.html |
107 <option value="mafft-qinsi">qinsi</option> | 136 --thread \${GALAXY_SLOTS:-1} --threadit 0 |
108 <option value="mafft-xinsi">xinsi</option> | 137 |
109 <option value="custom">Custom Parameters</option> | 138 input.fa > '$outputAlignment' |
110 </param> | 139 |
111 <when value="mafft-fftns"/> | 140 ## Output alignment tree |
112 <when value="mafft --auto"/> | 141 #if $treeout |
113 <when value="mafft-fftnsi"/> | 142 && mv input.fa.tree '$outputTree' |
114 <when value="mafft-nwns"/> | 143 #end if |
115 <when value="mafft-nwnsi"/> | 144 ]]></command> |
116 <when value="mafft-einsi"/> | 145 <configfiles> |
117 <when value="mafft-ginsi"/> | 146 <configfile filename="inputs.sh"><![CDATA[ |
118 <when value="mafft-linsi"/> | 147 #if $input.mapping == "implicit" |
119 <when value="mafft-qinsi"/> | 148 #for $batch in $input.batches: |
120 <when value="mafft-xinsi"/> | 149 cat $batch.inputs >> input.fa |
121 <when value="custom"> | 150 #end for |
122 <conditional name="dist_flavour"> | 151 #elif $input.mapping == "merge" |
123 <param name="distance_method" type="select" display="radio" label="Distance method" help="Distance method must be chosen regarding your data"> | 152 #for $batch in $input.batches: |
124 <option value="--6merpair" selected="true">Shared 6mers distance (fastest)</option> | 153 #for $dataset in $batch.inputs: |
125 <option value="--globalpair">Global alignment (Needleman-Wunsch)</option> | 154 cat $dataset >> input.fa |
126 <option value="--localpair">Local alignment (Smith-Waterman)</option> | 155 #end for |
127 <option value="--genafpair">Local, affine gap cost</option> | 156 #end for |
128 <option value="--fastapair">All pairwise alignments are computed with FASTA</option> | 157 #end if |
129 </param> | 158 ]]></configfile> |
130 <when value="--6merpair"> | 159 </configfiles> |
131 <param name="retree" type="integer" value="2" min="1" max="100" label="Guide tree is built this number of times in the progressive stage." help="Valid with 6mer distance" /> | 160 <inputs> |
132 <conditional name="usetree"> | 161 <conditional name="input"> |
133 <param name="parttree" type="select" label="Use a fast tree-building method?" help="Recommended for a large number (> ~10,000) of sequences are input" > | 162 <param name="mapping" type="select" label="For multiple inputs generate" help="All you have is a single dataset with the sequences to align? You can skip this help text and continue with the default setting. For multiple input datasets, the first mode will launch separate MAFFT jobs for all sequences from the first, second, ..., n-th dataset/element from each input batch, respectively, resulting in n separate MSAs. The second mode will concatenate all input sequences from all inputs for a single run of MAFFT and will generate a single MSA."> |
134 <option value="--parttree" selected="true">Yes</option> | 163 <option value="implicit">one or several MSAs depending on input structure</option> |
135 <option value="">No</option> | 164 <option value="merge">a single MSA of all sequences from all inputs</option> |
136 </param> | 165 </param> |
137 <when value="--parttree"> | 166 <when value="implicit"> |
138 <param name="treedistance" type="select" display="radio" label="Distance method" help="Distance method must be chosen regarding your data"> | 167 <repeat name="batches" title="Input batch" default="1" min="1"> |
139 <option value="--fastaparttree" selected="true">Distances based on FASTA</option> | 168 <param name="inputs" type="data" format="fasta" label="Sequences to align" help="Amino acid or nucleotide sequences in FASTA format. Add Dataset for concatenation of every additional dataset with each file of the first upload panel"/> |
140 <option value="--dpparttree">Distances based on DP. (Needleman-Wunsch)</option> | 169 </repeat> |
141 </param> | 170 </when> |
142 <param name="partsize" type="integer" value="50" min="0" max="1000" label="Number of partitions in the PartTree algorithm" /> | 171 <when value="merge"> |
143 <param name="groupsize" type="integer" value="" min="0" max="1000" label="Do not make alignment larger than ... sequences" /> | 172 <repeat name="batches" title="Input batch" default="1" min="1"> |
144 </when> | 173 <param name="inputs" multiple="true" type="data" format="fasta" label="Sequences to align" help="Amino acid or nucleotide sequences in FASTA format."/> |
145 <when value=""/> | 174 </repeat> |
146 </conditional> | 175 </when> |
147 </when> | |
148 <when value="--globalpair"> | |
149 <param name="weighti" type="float" value="2.7" min="1" max="100" label="Weighting factor for the consistency term calculated from pairwise alignments." help="Valid when either of --globalpair, --localpair, --genafpair, --fastapair or --blastpair is selected." /> | |
150 </when> | |
151 <when value="--localpair"> | |
152 <param name="weighti" type="float" value="2.7" min="1" max="100" label="Weighting factor for the consistency term calculated from pairwise alignments." help="Valid when either of --globalpair, --localpair, --genafpair, --fastapair or --blastpair is selected." /> | |
153 <param name="lop" type="float" value="-2.0" label="Gap opening penalty at local pairwise alignment" help="-2.00 default value" /> | |
154 <param name="lep" type="float" value="0.1" label="Offset value at local pairwise alignment" help="0.1 default value" /> | |
155 <param name="lexp" type="float" value="-0.1" label="Gap extension penalty at local pairwise alignment." help="-0.1 default value" /> | |
156 </when> | |
157 <when value="--genafpair"> | |
158 <param name="weighti" type="float" value="2.7" min="1" max="100" label="Weighting factor for the consistency term calculated from pairwise alignments." help="Valid when either of --globalpair, --localpair, --genafpair, --fastapair or --blastpair is selected." /> | |
159 <param name="lop" type="float" value="-2.0" label="Gap opening penalty at local pairwise alignment" help="-2.00 default value" /> | |
160 <param name="lep" type="float" value="0.1" label="Offset value at local pairwise alignment" help="0.1 default value" /> | |
161 <param name="lexp" type="float" value="-0.1" label="Gap extension penalty at local pairwise alignment." help="-0.1 default value" /> | |
162 <param name="skipLOP" type="float" value="-6.00" label="Gap opening penalty to skip the alignment" help="-6.00 default value" /> | |
163 <param name="skipEXP" type="float" value="0.00" label="Gap extension penalty to skip the alignment" help="0 default value" /> | |
164 </when> | |
165 <when value="--fastapair"> | |
166 <param name="weighti" type="float" value="2.7" min="1" max="100" label="Weighting factor for the consistency term calculated from pairwise alignments." help="Valid when either of --globalpair, --localpair, --genafpair, --fastapair or --blastpair is selected." /> | |
167 </when> | |
168 </conditional> | 176 </conditional> |
169 <param name="iterations" type="integer" value="0" min="0" max="1000" label="Maximum number of iterations" help="1000 for maximum quality" /> | 177 <conditional name="datatype_selection"> |
170 <param name="fft" type="boolean" truevalue="--fft" falsevalue="--nofft" checked="True" label="Use FFT approximation in group-to-group alignment?" /> | 178 <param name="datatype" type="select" label="Type of sequences" help="The tool can try to detect the type of the input sequences, but you likely want to declare it explicitly. Doing so will also give you control over the scoring matrix used for the alignment, while autodetection will result in the Kimura PAM200 and the BLOSUM62 matrix being used for nucleic acids and protein alignments, respectively."> |
171 <param name="score" type="boolean" truevalue="" falsevalue="--noscore" checked="True" label="Check alignment score in the iterative refinement stage?" /> | 179 <option value="">auto-detect</option> |
172 </when> | 180 <option value="--nuc">Nucleic acids</option> |
173 </conditional> | 181 <option value="--amino">Amino acids</option> |
174 <param name="ep" type="float" value="0.0" label="Gap extend penalty for group-to-group alignment" help="Offset value, which works like gap extension penalty, for group-to-group alignment. For E-INS-i, 0 is recommended to allow large gaps" /> | 182 </param> |
175 <param name="op" type="float" value="1.53" label="Gap opening penalty at group-to-group alignment." help="1.53 default value" /> | 183 <when value="" /> |
176 <conditional name="matrix_condition"> | 184 <when value="--nuc"> |
177 <param name="matrix" type="select" label="Matrix selection" display="radio" help="Usefull only for amino acids" > | 185 <conditional name="scoring_matrix"> |
178 <option value="">No matrix</option> | 186 <param name="type" type="select" label="Type of scoring matrix" help="See the tool help below for details about the available options."> |
179 <option value="BLOSUM" selected="true">BLOSUM</option> | 187 <option value="--kimura">Kimura</option> |
180 <option value="PAM">PAM</option> | 188 </param> |
181 <option value="custom">Custom</option> | 189 <when value="--kimura"> |
182 </param> | 190 <param argument="--kimura" name="coefficient" type="integer" value="200" min="1" label="PAM value of the matrix" /> |
183 <when value=""/> | 191 </when> |
184 <when value="BLOSUM"> | 192 </conditional> |
185 <param name="BLOSUM" type="select" display="radio" label="Coefficient of the BLOSUM matrix"> | 193 <expand macro="misc_scoring_scheme" /> |
186 <option value="30">30</option> | 194 </when> |
187 <option value="45">45</option> | 195 <when value="--amino"> |
188 <option value="62" selected="true">62</option> | 196 <conditional name="scoring_matrix"> |
189 <option value="80">80</option> | 197 <param name="type" type="select" label="Type of scoring matrix" help="See the tool help below for details about the available options."> |
198 <option value="--bl" selected="true">BLOSUM</option> | |
199 <option value="--jtt">JTT</option> | |
200 <option value="--tm">transmembrane protein-optimized JTT</option> | |
201 <option value="custom">custom matrix</option> | |
202 </param> | |
203 <when value="--bl"> | |
204 <param argument="--bl" name="coefficient" type="select" display="radio" label="Coefficient of the BLOSUM matrix"> | |
205 <option value="30">30</option> | |
206 <option value="45">45</option> | |
207 <option value="62" selected="true">62</option> | |
208 <option value="80">80</option> | |
209 </param> | |
210 </when> | |
211 <when value="--jtt"> | |
212 <param argument="--jtt" name="coefficient" type="integer" value="200" min="1" label="PAM value of the matrix" /> | |
213 </when> | |
214 <when value="--tm"> | |
215 <param argument="--tm" name="coefficient" type="integer" value="200" min="1" label="PAM value of the matrix"/> | |
216 </when> | |
217 <when value="custom"> | |
218 <param argument="--aamatrix" type="data" format="txt" label="User-defined AA scoring matrix" help="The expected format of the matrix is the same as that used by BLAST."/> | |
219 </when> | |
220 </conditional> | |
221 <expand macro="misc_scoring_scheme" /> | |
222 </when> | |
223 </conditional> | |
224 <conditional name="flavour"> | |
225 <param name="type" type="select" label="MAFFT flavour" help="Run mafft with pre-defined input parameters. Specification of these parameters can be found in the help section. With 'Auto', the tool automatically selects an appropriate strategy from L-INS-i, FFT-NS-i and FFT-NS-2, according to data size from few to many respectively. Default setting: FFT-NS-2."> | |
226 <option value="mafft --auto">Auto</option> | |
227 <option value="mafft-fftns --retree 1">FFT-NS-1 (very fast, progressive method; use for >2,000 sequences)</option> | |
228 <option value="mafft-fftns" selected="true">FFT-NS-2 (fast, progressive method)</option> | |
229 <option value="mafft-nwns">NW-NS-2 (fast, progressive method without FFT approximation)</option> | |
230 <option value="mafft --retree 1 --maxiterate 0 --nofft --parttree">NW-NS-PartTree-1 (very fast, progressive method using the PartTree algorithm; for ~10,000 to ~50,000 sequences)</option> | |
231 <option value="mafft --maxiterate 0 --globalpair">G-INS-1 (slow, progressive method with an accurate guide tree)</option> | |
232 <option value="mafft-fftnsi">FFT-NS-i (slow, iterative refinement method)</option> | |
233 <option value="mafft-nwnsi">NW-NS-i (slow, iterative refinement method without FFT approximation)</option> | |
234 <option value="mafft-einsi">E-INS-i (very slow; use for <200 sequences with multiple conserved domains and long gaps)</option> | |
235 <option value="mafft-linsi">L-INS-i (very slow; use for <200 sequences with one conserved domain and long gaps)</option> | |
236 <option value="mafft-ginsi">G-INS-i (very slow; recommended for <200 sequences with global homology)</option> | |
237 <option value="custom">Custom Parameters</option> | |
238 </param> | |
239 <when value="mafft --auto"/> | |
240 <when value="mafft-fftns --retree 1"/> | |
241 <when value="mafft-fftns"/> | |
242 <when value="mafft-nwns"/> | |
243 <when value="mafft --retree 1 --maxiterate 0 --nofft --parttree"/> | |
244 <when value="mafft --maxiterate 0 --globalpair"> | |
245 <expand macro="global_align_options"/> | |
246 </when> | |
247 <when value="mafft-fftnsi"/> | |
248 <when value="mafft-nwnsi"/> | |
249 <when value="mafft-einsi"/> | |
250 <when value="mafft-linsi"/> | |
251 <when value="mafft-ginsi"> | |
252 <expand macro="global_align_options"/> | |
253 </when> | |
254 <when value="custom"> | |
255 <conditional name="guidetree"> | |
256 <param name="guidetree_generation" type="select" label="GuideTree-Generation" help="Parttree is recommended for a large number (> ~10,000) of sequences as input"> | |
257 <option value="original">Original guidetree building method of MAFFT</option> | |
258 <option value="parttree">Fast guidetree building method with PartTree-algorithm</option> | |
259 </param> | |
260 <when value="original"> | |
261 <conditional name="dist_flavour"> | |
262 <param name="distance_method" type="select" label="Distance method" help="Distance method must be chosen regarding your data"> | |
263 <option value="--6merpair" selected="true">Shared 6mers distance (fastest) (--6merpair)</option> | |
264 <option value="--globalpair">Global alignment (Needleman-Wunsch) (--globalpair)</option> | |
265 <option value="--localpair">Local alignment (Smith-Waterman) (--localpair)</option> | |
266 <option value="--genafpair">Local, affine gap cost (--genafpair)</option> | |
267 <option value="--fastapair">All pairwise alignments are computed with FASTA (--fastapair)</option> | |
268 </param> | |
269 <when value="--6merpair"> | |
270 <param argument="--retree" type="integer" value="2" min="1" max="3" label="Guide tree is build this number of times in the progressive stage."/> | |
271 </when> | |
272 <when value="--globalpair"> | |
273 <expand macro="global_align_options"/> | |
274 <expand macro="weighti_param" /> | |
275 </when> | |
276 <when value="--localpair"> | |
277 <expand macro="weighti_param" /> | |
278 <param argument="--lop" type="float" value="-2.0" label="Gap opening penalty at local pairwise alignment" help="-2.00 default value"/> | |
279 <param argument="--lep" type="float" value="0.1" label="Offset value at local pairwise alignment" help="0.1 default value"/> | |
280 <param argument="--lexp" type="float" value="-0.1" label="Gap extension penalty at local pairwise alignment." help="-0.1 default value" /> | |
281 </when> | |
282 <when value="--genafpair"> | |
283 <expand macro="weighti_param" /> | |
284 <param argument="--lop" type="float" value="-2.0" label="Gap opening penalty at local pairwise alignment" help="-2.00 default value" /> | |
285 <param argument="--lep" type="float" value="0.1" label="Offset value at local pairwise alignment" help="0.1 default value" /> | |
286 <param argument="--lexp" type="float" value="-0.1" label="Gap extension penalty at local pairwise alignment." help="-0.1 default value" /> | |
287 <param argument="--LOP" type="float" value="-6.00" label="Gap opening penalty to skip the alignment" help="-6.00 default value" /> | |
288 <param argument="--LEXP" type="float" value="0.00" label="Gap extension penalty to skip the alignment" help="0 default value" /> | |
289 </when> | |
290 <when value="--fastapair"> | |
291 <expand macro="weighti_param" /> | |
292 </when> | |
293 </conditional> | |
294 </when> | |
295 <when value="parttree"> | |
296 <conditional name="parttree_selection"> | |
297 <param name="parttree_option" type="select" label="Which distance for the fast tree-building method?"> | |
298 <option value="--parttree" selected="true">Fast tree-building method with the 6mer distance (--parttree)</option> | |
299 <option value="--fastaparttree">Distances based on FASTA (--fastaparttree)</option> | |
300 <option value="--dpparttree">Distances based on DP. (Needleman-Wunsch) (--dpparttree)</option> | |
301 </param> | |
302 <when value="--parttree"> | |
303 <expand macro="parttree_parameters" /> | |
304 </when> | |
305 <when value="--fastaparttree"> | |
306 <expand macro="parttree_parameters" /> | |
307 </when> | |
308 <when value="--dpparttree"> | |
309 <expand macro="parttree_parameters" /> | |
310 </when> | |
311 </conditional> | |
312 </when> | |
313 </conditional> | |
314 <section name="progressive_alignment_calculation" title="Progressive alignment calculation" expanded="true"> | |
315 <param argument="--maxiterate" type="integer" value="0" min="0" max="1000" label="Maximum number of iterations" help="1000 for maximum quality" /> | |
316 <param argument="--fft" type="boolean" truevalue="--fft" falsevalue="--nofft" checked="True" label="Use FFT approximation in group-to-group alignment?" /> | |
317 <param argument="--noscore" type="boolean" truevalue="" falsevalue="--noscore" checked="True" label="Check alignment score in the iterative refinement stage?" /> | |
318 </section> | |
319 </when> | |
320 </conditional> | |
321 <param argument="--reorder" type="boolean" truevalue="--reorder" falsevalue="" checked="False" label="Reorder output?" help="Default order is input order." /> | |
322 <param argument="--treeout" type="boolean" truevalue="--treeout" falsevalue="" checked="False" label="Keep alignment tree as output?" /> | |
323 <param name="outputFormat" type="select" label="Output format"> | |
324 <option value="" selected="true">FASTA</option> | |
325 <option value="--clustalout">ClustalW</option> | |
326 <option value="--phylipout">Phylip</option> | |
190 </param> | 327 </param> |
191 </when> | 328 </inputs> |
192 <when value="PAM"> | 329 <outputs> |
193 <param name="PAM" type="integer" value="80" min="1" max="350" label="Coefficient of the JTT PAM matrix" /> | 330 <data format="fasta" name="outputAlignment" label="${tool.name} on ${on_string}"> |
194 <param name="tm" type="integer" value="80" min="1" max="350" label="Coefficient of the transmembrane PAM matrix" /> | 331 <change_format> |
195 </when> | 332 <when input="outputFormat" value="--clustalout" format="clustal"/> |
196 <when value="custom"> | 333 <when input="outputFormat" value="--phylipout" format="phylip"/> |
197 <param name="matrixfile" type="data" format="txt" label="User-defined AA scoring matrix" help="The format of matrixfile is the same to that of BLAST. Ignored when nucleotide sequences are input."/> | 334 </change_format> |
198 <param name="fmodel" type="boolean" truevalue="--fmodel" falsevalue="" checked="False" label="Incorporate the AA/nuc composition information into the scoring matrix?" /> | 335 </data> |
199 </when> | 336 <data name="outputTree" format="txt" label="${tool.name} Guide Tree"> |
200 </conditional> | 337 <filter>treeout</filter> |
201 <param name="reorder" type="boolean" truevalue="--reorder" falsevalue="" checked="False" label="Reorder output?" /> | 338 </data> |
202 <param name="getTree" type="boolean" truevalue="--treeout" falsevalue="" checked="False" label="Display alignment tree ?" /> | 339 </outputs> |
203 <param name="outputFormat" type="select" label="Output format" help="Either FASTA or ClustalW"> | 340 <tests> |
204 <option value="" selected="true">FASTA</option> | 341 <test expect_num_outputs="1"> |
205 <option value="--clustalout">ClustalW</option> | 342 <conditional name="input"> |
206 <option value="--phylipout">Phylip</option> | 343 <param name="mapping" value="implicit"/> |
207 </param> | 344 <repeat name="batches"> |
208 </inputs> | 345 <param name="inputs" value="sample_amino.fa"/> |
209 <outputs> | 346 </repeat> |
210 <data format="fasta" name="outputAlignment" label="${tool.name} on ${on_string}"> | 347 </conditional> |
211 <change_format> | 348 <output name="outputAlignment" ftype="fasta" file="mafft_default.aln"/> |
212 <when input="outputFormat" value="--clustalout" format="clustal"/> | 349 </test> |
213 <when input="outputFormat" value="--phylipout" format="phylip"/> | 350 <!-- test autodetection of suitable algorithm from input; expected to choose L-INS-i --> |
214 </change_format> | 351 <test expect_num_outputs="1"> |
215 </data> | 352 <conditional name="input"> |
216 <data name="outputTree" format="txt" label="${tool.name} Guide Tree"> | 353 <param name="mapping" value="implicit"/> |
217 <filter>getTree == True</filter> | 354 <repeat name="batches"> |
218 </data> | 355 <param name="inputs" value="sample_amino.fa"/> |
219 </outputs> | 356 </repeat> |
220 <tests> | 357 </conditional> |
221 <test expect_num_outputs="1" > | 358 <conditional name="flavour"> |
222 <param name="inputSequences" value="sample.fa"/> | 359 <param name="type" value="mafft --auto"/> |
223 <param name="flavourType" value="mafft-fftns"/> | 360 </conditional> |
224 <param name="outputFormat" value=""/> | 361 <output name="outputAlignment" ftype="fasta" file="mafft_auto_linsi.aln"/> |
225 <output name="outputAlignment" ftype="fasta" file="mafft_fftns_result.aln"/> | 362 </test> |
226 </test> | 363 <!-- test explicit specification of L-INS-i mode --> |
227 <test expect_num_outputs="1" > | 364 <test expect_num_outputs="1"> |
228 <param name="inputSequences" value="sample.fa"/> | 365 <conditional name="input"> |
229 <param name="flavourType" value="mafft-nwns"/> | 366 <param name="mapping" value="implicit"/> |
230 <param name="outputFormat" value="--clustalout"/> | 367 <repeat name="batches"> |
231 <output name="outputAlignment" ftype="clustal" file="mafft_nwns_result.aln" lines_diff="2" /> | 368 <param name="inputs" value="sample_amino.fa"/> |
232 </test> | 369 </repeat> |
233 <!-- WARNING: the results of the following test depends on #threads. | 370 </conditional> |
234 The result seems deterministic for single threaded execution, i.e. GALAXY_SLOTS=1 planemo test | 371 <conditional name="flavour"> |
235 However, GH CI/CD uses 2 threads and results vary --> | 372 <param name="type" value="mafft-linsi"/> |
236 <test expect_num_outputs="1" > | 373 </conditional> |
237 <param name="inputSequences" value="sample.fa"/> | 374 <output name="outputAlignment" ftype="fasta" file="mafft_auto_linsi.aln"/> |
238 <param name="flavourType" value="custom"/> | 375 </test> |
239 <conditional name="matrix_condition"> | 376 <test expect_num_outputs="1"> |
240 <param name="matrix" value="BLOSUM"/> | 377 <conditional name="input"> |
241 </conditional> | 378 <param name="mapping" value="implicit"/> |
242 <param name="BLOSUM" value="62"/> | 379 <repeat name="batches"> |
243 <param name="distance_method" value="--fastapair"/> | 380 <param name="inputs" value="sample_amino.fa"/> |
244 <param name="weighti" value="2.7"/> | 381 </repeat> |
245 <param name="iterations" value="1000"/> | 382 </conditional> |
246 <param name="outputFormat" value="--clustalout"/> | 383 <conditional name="datatype_selection"> |
247 <output name="outputAlignment" ftype="clustal" file="mafft_custom_result.aln" compare="sim_size"> | 384 <param name="datatype" value="--amino"/> |
248 <assert_contents> | 385 <conditional name="scoring_matrix"> |
249 <has_n_lines n="458" delta="0"/> | 386 <param name="type" value="--bl"/> |
250 <has_text text="CLUSTAL format alignment by MAFFT F-INS-i"/> | 387 <param name="coefficient" value="80"/> |
251 <has_text text="NPIVYGISHPKY"/> | 388 </conditional> |
252 <has_text text="1=="/> | 389 </conditional> |
253 <has_text text="36=="/> | 390 <conditional name="flavour"> |
254 <has_line line="8=opsin, ------------------------------------------------------------"/> | 391 <param name="type" value="mafft-fftns"/> |
255 </assert_contents> | 392 </conditional> |
256 </output> | 393 <param name="outputFormat" value="--clustalout"/> |
257 </test> | 394 <output name="outputAlignment" ftype="clustal" file="mafft_explicit_amino_blosum80.clustal.aln" /> |
258 </tests> | 395 </test> |
259 <help> <![CDATA[ | 396 <test expect_num_outputs="1" > |
260 **What it does** | 397 <conditional name="input"> |
261 | 398 <param name="mapping" value="implicit"/> |
262 MAFFT is a multiple sequence alignment program for unix-like operating systems. | 399 <repeat name="batches"> |
263 It offers a range of multiple alignment methods, L-INS-i (accurate; for alignment of <∼200 sequences), | 400 <param name="inputs" value="sample_nuc.fa"/> |
264 FFT-NS-2 (fast; for alignment of <∼30,000 sequences), etc. | 401 </repeat> |
265 From the MAFFT man page, an overview of the different predefined flavours of the tool is as follows: | 402 </conditional> |
266 | 403 <conditional name="datatype_selection"> |
267 **Accuracy-oriented methods:** | 404 <param name="datatype" value="--nuc"/> |
268 | 405 <conditional name="scoring_matrix"> |
269 - L-INS-i (probably most accurate; recommended for <200 sequences; iterative refinement method incorporating local pairwise alignment information): | 406 <param name="type" value="--kimura"/> |
407 <param name="coefficient" value="40"/> | |
408 </conditional> | |
409 </conditional> | |
410 <conditional name="flavour"> | |
411 <param name="type" value="mafft-fftns"/> | |
412 </conditional> | |
413 <param name="outputFormat" value="--phylipout"/> | |
414 <output name="outputAlignment" ftype="phylip" file="mafft_kimura40.phylip.aln" /> | |
415 </test> | |
416 <test expect_num_outputs="1"> | |
417 <conditional name="input"> | |
418 <param name="mapping" value="implicit"/> | |
419 <repeat name="batches"> | |
420 <param name="inputs" value="sample_amino.fa"/> | |
421 </repeat> | |
422 </conditional> | |
423 <conditional name="datatype_selection"> | |
424 <param name="datatype" value="--amino"/> | |
425 </conditional> | |
426 <conditional name="flavour"> | |
427 <param name="type" value="custom"/> | |
428 <conditional name="guidetree"> | |
429 <param name="guidetree_generation" value="original"/> | |
430 <conditional name="dist_flavour"> | |
431 <param name="distance_method" value="--globalpair"/> | |
432 <param name="weighti" value="3"/> | |
433 </conditional> | |
434 </conditional> | |
435 <section name="progressive_alignment_calculation"> | |
436 <param name="maxiterate" value="1000"/> | |
437 </section> | |
438 </conditional> | |
439 <param name="outputFormat" value="--clustalout"/> | |
440 <output name="outputAlignment" ftype="clustal" file="mafft_custom_original.clustal.aln"> | |
441 </output> | |
442 </test> | |
443 <test expect_num_outputs="1"> | |
444 <conditional name="input"> | |
445 <param name="mapping" value="implicit"/> | |
446 <repeat name="batches"> | |
447 <param name="inputs" value="sample_amino.fa"/> | |
448 </repeat> | |
449 </conditional> | |
450 <conditional name="datatype_selection"> | |
451 <param name="datatype" value="--amino"/> | |
452 </conditional> | |
453 <conditional name="flavour"> | |
454 <param name="type" value="custom"/> | |
455 <conditional name="guidetree"> | |
456 <param name="guidetree_generation" value="parttree"/> | |
457 <conditional name="parttree_selection"> | |
458 <param name="parttree_option" value="--parttree"/> | |
459 <param name="retree" value="2"/> | |
460 </conditional> | |
461 </conditional> | |
462 </conditional> | |
463 <output name="outputAlignment" ftype="fasta" file="mafft_custom_parttree.aln" /> | |
464 </test> | |
465 <!-- test concatenation of multiple inputs --> | |
466 <test expect_num_outputs="2"> | |
467 <conditional name="input"> | |
468 <param name="mapping" value="merge"/> | |
469 <repeat name="batches"> | |
470 <param name="inputs" value="sample_amino.fa"/> | |
471 </repeat> | |
472 <repeat name="batches"> | |
473 <param name="inputs" value="sample_nuc.fa"/> | |
474 </repeat> | |
475 </conditional> | |
476 <param name="treeout" value="true"/> | |
477 <output name="outputAlignment" ftype="fasta"> | |
478 <metadata name="sequences" value="39"/> | |
479 </output> | |
480 </test> | |
481 </tests> | |
482 <help><![CDATA[ | |
483 **What it does** | |
484 | |
485 MAFFT is a multiple sequence alignment (MSA) program, which offers a range of multiple alignment methods. | |
486 | |
487 Input types and alignment scoring matrices | |
488 ------------------------------------------ | |
489 | |
490 For the alignment of *protein* sequences, you can choose between: | |
491 | |
492 - different flavors of BLOSUM matrices (`Henikoff S and Henikoff JG, 1992 <https://www.ncbi.nlm.nih.gov/pmc/articles/PMC50453/>`__) | |
493 - JTT matrices with any point accepted mutation (PAM) rate (`Jones, Taylor and Thornton, 1992 <https://pubmed.ncbi.nlm.nih.gov/1633570/>`__) | |
494 - PAM-based matrices optimized for transmembrane proteins (`Jones, Taylor and Thornton, 1994 <https://pubmed.ncbi.nlm.nih.gov/8112466/>`__) | |
495 | |
496 For nucleic acid sequence alignment, MAFFT uses Kimura's two parameter model (`Kimura 1980 <https://pubmed.ncbi.nlm.nih.gov/7463489/>`__) | |
497 with a transitions to transversions ratio of 2 (kappa 2), but lets you configure the PAM value. | |
498 | |
499 The tool can also try to autodetect the sequence type from the input(s). | |
500 In this mode, it will use the BLOSUM 62 matrix if it detects amino acids input, and the Kimura kappa 2 PAM200 matrix for nucleic acids. | |
501 | |
502 | |
503 Pre-configured MSA methods | |
504 -------------------------- | |
505 | |
506 From the `MAFFT man page <https://mafft.cbrc.jp/alignment/software/manual/manual.html>`__, an overview of the different predefined flavours of the tool. | |
507 | |
508 **Accuracy-oriented methods:** | |
509 | |
510 - *L-INS-i* (probably most accurate; recommended for <200 sequences; iterative refinement method incorporating local pairwise alignment information): | |
511 | |
270 - mafft --localpair --maxiterate 1000 input [> output] | 512 - mafft --localpair --maxiterate 1000 input [> output] |
271 - G-INS-i (suitable for sequences of similar lengths; recommended for <200 sequences; iterative refinement method incorporating global pairwise alignment information): | 513 - *G-INS-i* (suitable for sequences of similar lengths; recommended for <200 sequences; iterative refinement method incorporating global pairwise alignment information): |
514 | |
272 - mafft --globalpair --maxiterate 1000 input [> output] | 515 - mafft --globalpair --maxiterate 1000 input [> output] |
273 - E-INS-i (suitable for sequences containing large unalignable regions; recommended for <200 sequences): | 516 - *E-INS-i* (suitable for sequences containing large unalignable regions; recommended for <200 sequences): |
517 | |
274 - mafft --ep 0 --genafpair --maxiterate 1000 input [> output]. For E-INS-i, the --ep 0 option is recommended to allow large gaps. | 518 - mafft --ep 0 --genafpair --maxiterate 1000 input [> output]. For E-INS-i, the --ep 0 option is recommended to allow large gaps. |
275 | 519 |
276 **Speed-oriented methods:** | 520 **Speed-oriented methods:** |
277 | 521 |
278 - FFT-NS-i (iterative refinement method; two cycles only): | 522 - *FFT-NS-i* (iterative refinement method; two cycles only): |
279 - mafft --retree 2 --maxiterate 2 input [> output] | 523 |
280 - FFT-NS-i (iterative refinement method; max. 1000 iterations): | 524 - mafft --retree 2 --maxiterate 2 input [> output] |
281 - mafft --retree 2 --maxiterate 1000 input [> output] | 525 - *FFT-NS-2* (fast; progressive method): |
282 - FFT-NS-2 (fast; progressive method): | 526 |
283 - mafft --retree 2 --maxiterate 0 input [> output] | 527 - mafft --retree 2 --maxiterate 0 input [> output] |
284 - FFT-NS-1 (very fast; recommended for >2000 sequences; progressive method with a rough guide tree): | 528 - *NW-NS-i* (iterative refinement method without FFT approximation; two cycles only): |
285 - mafft --retree 1 --maxiterate 0 input [> output] | 529 |
286 - NW-NS-i (iterative refinement method without FFT approximation; two cycles only): | 530 - mafft --retree 2 --maxiterate 2 --nofft input [> output] |
287 - mafft --retree 2 --maxiterate 2 --nofft input [> output] | 531 - *NW-NS-2* (fast; progressive method without the FFT approximation): |
288 - NW-NS-2 (fast; progressive method without the FFT approximation): | 532 |
289 - mafft --retree 2 --maxiterate 0 --nofft input [> output] | 533 - mafft --retree 2 --maxiterate 0 --nofft input [> output] |
290 - NW-NS-PartTree-1 (recommended for ~10,000 to ~50,000 sequences; progressive method with the PartTree algorithm): | 534 - *NW-NS-PartTree-1* (recommended for ~10,000 to ~50,000 sequences; progressive method with the PartTree algorithm): |
291 - mafft --retree 1 --maxiterate 0 --nofft --parttree input [> output] | 535 |
292 | 536 - mafft --retree 1 --maxiterate 0 --nofft --parttree input [> output] |
293 **Options:** | 537 - *FFT-NS-1* (very fast; recommended for >2000 sequences; progressive method with a rough guide tree): |
294 | 538 |
295 - --auto Automatically selects an appropriate strategy from L-INS-i, FFT-NS-i and FFT-NS-2, according to data size. Default: off (always FFT-NS-2) | 539 - mafft --retree 1 --maxiterate 0 input [> output] |
296 - --adjustdirection Generate reverse complement sequences, as necessary, and align them together with the remaining sequences. In the case of protein alignment, these options are just ignored. | 540 ]]></help> |
297 - --op Gap opening penalty, default: 1.53 | 541 <expand macro="citations" /> |
298 - --ep Offset (works like gap extension penalty), default: 0.0 | |
299 - --maxiterate Maximum number of iterative refinement, default: 0 | |
300 - --clustalout Output: clustal format, default: fasta | |
301 - --retree number Guide tree is built number times in the progressive stage. Valid with 6mer distance. Default: 2 | |
302 ]]> | |
303 </help> | |
304 <expand macro="citations" /> | |
305 </tool> | 542 </tool> |