comparison kaks_analysis.xml @ 0:2f0b8e19286b draft

Uploaded
author greg
date Thu, 08 Jun 2017 12:53:38 -0400
parents
children 3aca88613abf
comparison
equal deleted inserted replaced
-1:000000000000 0:2f0b8e19286b
1 <tool id="plant_tribes_kaks_analysis" name="KaKsAnalysis" version="@WRAPPER_VERSION@.0">
2 <description>estimates paralogous and orthologous pairwise synonymous (Ks) and non-synonymous (Ka) substitution rates</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements_kaks_analysis" />
7 <command detect_errors="exit_code"><![CDATA[
8 #set output_dir = 'kaksAnalysis_dir'
9 #set comparison = $comparison_cond.comparison
10 #if str($options_type.options_type_selector) == 'advanced':
11 #set codeml_ctl_file_cond = $options_type.codeml_ctl_file_cond
12 #set codeml_ctl_file_select = $codeml_ctl_file_cond.codeml_ctl_file_select
13 #set fit_components_cond = $options_type.fit_components_cond
14 #set fit_components = $fit_components_cond.fit_components
15 #set recalibrate_cond = $options_type.recalibrate_cond
16 #set recalibrate = $recalibrate_cond.recalibrate
17 #set set_min_coverage_cond = $options_type.set_min_coverage_cond
18 #set set_min_coverage = $set_min_coverage_cond.set_min_coverage
19 #set set_lower_ks_limit_cond = $options_type.set_lower_ks_limit_cond
20 #set set_lower_ks_limit = $set_lower_ks_limit_cond.set_lower_ks_limit
21 #set set_upper_ks_limit_cond = $options_type.set_upper_ks_limit_cond
22 #set set_upper_ks_limit = $set_upper_ks_limit_cond.set_upper_ks_limit
23 #else:
24 #set codeml_ctl_file_select = 'no'
25 #set fit_components = 'no'
26 #set set_lower_ks_limit = 'no'
27 #set set_upper_ks_limit = 'no'
28 #end if
29 KaKsAnalysis
30 --num_threads \${GALAXY_SLOTS:-4}
31 --coding_sequences_species_1 '$coding_sequences_species_1'
32 --proteins_species_1 '$proteins_species_1'
33 --comparison $comparison
34 #if str($comparison) == 'orthologs':
35 --coding_sequences_species_2 '$comparison_cond.coding_sequences_species_2'
36 --proteins_species_2 '$comparison_cond.proteins_species_2'
37 #end if
38 #if str($options_type.options_type_selector) == 'advanced':
39 #if str($set_min_coverage) == 'yes':
40 --min_coverage $set_min_coverage_cond.min_coverage
41 #end if
42 #if str($recalibrate) == 'yes':
43 --recalibration_rate $recalibrate_cond.recalibration_rate
44 #end if
45 #if str($codeml_ctl_file_select) == 'yes':
46 --codeml_ctl_file '$codeml_ctl_file_cond.codeml_ctl_file'
47 # No else block needed here because the default codeml_ctl config
48 # will be used if the --codeml_ctl_file flag is missing.
49 #end if
50 #if str($fit_components) == 'yes':
51 --fit_components
52 --num_of_components $fit_components_cond.num_of_components
53 #end if
54 #if str($set_lower_ks_limit) == 'yes':
55 --min_ks $set_lower_ks_limit_cond.min_ks
56 #end if
57 #if str($set_upper_ks_limit) == 'yes':
58 --max_ks $set_upper_ks_limit_cond.max_ks
59 #end if
60 #end if
61 >/dev/null
62 && mv $output_dir/species1.fna '$output_species1_fna'
63 && mv $output_dir/species1.faa '$output_species1_faa'
64 #if str($comparison) == 'paralogs':
65 && mv $output_dir/species1.fna.blastn.paralogs '$output_species1_paralog'
66 #else:
67 && mv $output_dir/species2.faa '$output_species2_faa'
68 && mv $output_dir/species2.fna '$output_species2_fna'
69 && mv $output_dir/species1.fna.blastn.orthologs '$output_species1_ortholog'
70 && mv $output_dir/species2.fna.blastn.orthologs '$output_species2_ortholog'
71 #end if
72 && mv $output_dir/*.rbhb '$output_rbhb'
73 && mv $output_dir/*.kaks '$output_kaks'
74 #if str($fit_components) == 'yes':
75 && mv $output_dir/*.components '$output_components'
76 #end if
77 ]]></command>
78 <inputs>
79 <param name="coding_sequences_species_1" format="fasta" type="data" label="Coding sequences for the first species" />
80 <param name="proteins_species_1" format="fasta" type="data" label="Protein sequences for the first species" />
81 <conditional name="comparison_cond">
82 <param name="comparison" type="select" label="Type of sequence comparison">
83 <option value="paralogs" selected="true">Paralogous</option>
84 <option value="orthologs">Orthologous</option>
85 </param>
86 <when value="paralogs" />
87 <when value="orthologs">
88 <param name="coding_sequences_species_2" format="fasta" type="data" label="Coding sequences for the second species" />
89 <param name="proteins_species_2" format="fasta" type="data" label="Protein sequences for the second species" />
90 </when>
91 </conditional>
92 <conditional name="options_type">
93 <param name="options_type_selector" type="select" label="Options Configuration">
94 <option value="basic" selected="true">Basic</option>
95 <option value="advanced">Advanced</option>
96 </param>
97 <when value="basic" />
98 <when value="advanced">
99 <conditional name="set_min_coverage_cond">
100 <param name="set_min_coverage" type="select" label="Alignment coverage configuration">
101 <option value="no" selected="true">No</option>
102 <option value="yes">Yes</option>
103 </param>
104 <when value="no" />
105 <when value="yes">
106 <param name="min_coverage" type="float" value="0.5" min="0.3" max="1.0" label="match score" />
107 </when>
108 </conditional>
109 <conditional name="recalibrate_cond">
110 <param name="recalibrate" type="select" label="Species rates recalibration configuration">
111 <option value="no" selected="true">No</option>
112 <option value="yes">Yes</option>
113 </param>
114 <when value="no" />
115 <when value="yes">
116 <param name="recalibration_rate" type="float" value="0.0" min="0.0" label="Recalibration rate" />
117 </when>
118 </conditional>
119 <conditional name="codeml_ctl_file_cond">
120 <param name="codeml_ctl_file_select" type="select" label="PAML codeml configuration">
121 <option value="no" selected="true">No</option>
122 <option value="yes">Yes</option>
123 </param>
124 <when value="no" />
125 <when value="yes">
126 <param name="codeml_ctl_file" format="txt" type="data" label="PAML codeml control file" />
127 </when>
128 </conditional>
129 <conditional name="fit_components_cond">
130 <param name="fit_components" type="select" label="Rates clustering configuration">
131 <option value="no" selected="true">No</option>
132 <option value="yes">Yes</option>
133 </param>
134 <when value="no" />
135 <when value="yes">
136 <param name="num_of_components" type="integer" value="1" min="1" label="Number of components" />
137 </when>
138 </conditional>
139 <conditional name="set_lower_ks_limit_cond">
140 <param name="set_lower_ks_limit" type="select" label="Lower limit synonymous subsitution rates configuration">
141 <option value="no" selected="true">No</option>
142 <option value="yes">Yes</option>
143 </param>
144 <when value="no" />
145 <when value="yes">
146 <param name="min_ks" type="float" value="0.0" min="0.0" label="Minimum rate" />
147 </when>
148 </conditional>
149 <conditional name="set_upper_ks_limit_cond">
150 <param name="set_upper_ks_limit" type="select" label="Upper limit synonymous subsitution rates configuration">
151 <option value="no" selected="true">No</option>
152 <option value="yes">Yes</option>
153 </param>
154 <when value="no" />
155 <when value="yes">
156 <param name="max_ks" type="float" value="0.0" min="0.0" label="Maximum rate" />
157 </when>
158 </conditional>
159 </when>
160 </conditional>
161 <!-- Required due to the Emmix license -->
162 <param name="non_commercial_use" label="I certify that I am not using this tool for commercial purposes." type="boolean" truevalue="NON_COMMERCIAL_USE" falsevalue="COMMERCIAL_USE" checked="False">
163 <validator type="expression" message="This tool is only available for non-commercial use.">value == True</validator>
164 </param>
165 </inputs>
166 <outputs>
167 <data name="output_species1_fna" format="fasta" label="${tool.name} (coding sequences) on ${on_string}" />
168 <data name="output_species1_faa" format="fasta" label="${tool.name} (amino acids) on ${on_string}" />
169 <data name="output_species2_fna" format="fasta" label="${tool.name} (coding sequences) on ${on_string}">
170 <filter>comparison_cond['comparison'] == 'orthologs'</filter>
171 </data>
172 <data name="output_species2_faa" format="fasta" label="${tool.name} (amino acids) on ${on_string}">
173 <filter>comparison_cond['comparison'] == 'orthologs'</filter>
174 </data>
175 <data name="output_species1_paralog" format="tabular" label="${tool.name} (blastn results) on ${on_string}">
176 <filter>comparison_cond['comparison'] == 'paralogs'</filter>
177 </data>
178 <data name="output_species1_ortholog" format="tabular" label="${tool.name} (blastn results) on ${on_string}">
179 <filter>comparison_cond['comparison'] == 'orthologs'</filter>
180 </data>
181 <data name="output_species2_ortholog" format="tabular" label="${tool.name} (blastn results) on ${on_string}">
182 <filter>comparison_cond['comparison'] == 'orthologs'</filter>
183 </data>
184 <data name="output_rbhb" format="tabular" label="${tool.name} (paralogous pairs) on ${on_string}" />
185 <data name="output_kaks" format="tabular" label="${tool.name} on ${on_string}" />
186 <data name="output_components" format="tabular" label="${tool.name} (significant components in the ks distribution) on ${on_string}">
187 <filter>options_type['options_type_selector'] == 'advanced' and options_type['fit_components_cond']['fit_components'] == 'yes'</filter>
188 </data>
189 </outputs>
190 <tests>
191 <test>
192 <param name="coding_sequences_species_1" value="species1_cds.fasta" ftype="fasta"/>
193 <param name="proteins_species_1" value="species1_pep.fasta" ftype="fasta"/>
194 <param name="non_commercial_use" value="yes"/>
195 <output name="output_species1_fna" file="species1_cds.fasta" ftype="fasta" compare="contains"/>
196 <output name="output_species1_faa" file="species1_pep.fasta" ftype="fasta" compare="contains"/>
197 <output name="output_species1_paralog" file="output_blastn_results1.tabular" ftype="tabular" compare="contains"/>
198 <output name="output_rbhb" file="output_paralogous_pairs.tabular" ftype="tabular"/>
199 <output name="output_kaks" file="output1.tabular" ftype="tabular"/>
200 </test>
201 </tests>
202 <help>
203 This tool is one of the PlantTribes collection of automated modular analysis pipelines for comparative and evolutionary analyses
204 of genome-scale gene families and transcriptomes. This tool estimates paralogous and orthologous pairwise synonymous (Ks) and
205 non-synonymous (Ka) substitution rates for a set of gene coding sequences either produced by the AssemblyPostProcessor tool or
206 from an external source. Optionally, the resulting set of estimated Ks values can be clustered into components using a mixture
207 of multivariate normal distributions to identify significant duplication event(s) in a species or a pair of species.
208
209 -----
210
211 * **Required options**
212
213 - **Coding sequences for the first species** - coding sequence fasta file for the first species either produced by the AssemblyPostProcessor tool or from an external source selected from your history.
214 - **Protein sequences for the first species** - corresponding protein sequence fasta files for the first species either produced by the AssemblyPostProcessor tool or from an external source selected from your history.
215 - **Type of sequence comparison** - pairwise sequence comparison to determine homologous pairs. This can be either paralogous for self-species comparison or orthologous for cross-species comparison. Cross-species comparison requires input for the second species.
216
217 * **Other options**
218
219 - **Coding sequences for the second species** - coding sequence fasta file for the second species either produced by the AssemblyPostProcessor tool or from an external source selected from your history. This option is required only for orthologous comparison.
220 - **Protein sequences for the second species** - corresponding protein sequence fasta files for the second species either produced by the AssemblyPostProcessor tool or from an external source selected from your history. This option is required only for orthologous comparison.
221 - **Alignment coverage configuration** - select 'Yes' to set the minimum allowable alignment coverage length between homologous pairs. PlantTribes uses global codon alignment match score to determine the pairwise alignment coverage. By default, the match score is set to 0.5 if 'No' is selected.
222
223 - **match score** - number of base matches in a pairwise sequence alignment divided by the length of shorter sequence. Positions in the alignment corresponding to gaps are not considered. The score is restricted to the range 0.3 - 1.0.
224
225 - **Species rates recalibration configuration** - select 'Yes' to recalibrate synonymous substitution rates of a species using a predetermined evolutionary rate. Recalibration evolutionary rate can be determined from a species tree inferred from a collection of conserved single copy genes from taxa of interest as described in [7]. Rate recalibration applies only to paralogous comparisons.
226
227 - **recalibration rate** - a predetermined evolutionary recalibration rate.
228
229 - **PAML codeml configuration** - select 'Yes' to enable selection of a PAML codeml control file to carry out maximum likelihood analysis of protein-coding DNA sequences using codon substitution models. Template file "codeml.ctl.args" can be found in the scaffold data installed into Galaxy via the PlantTribes Scaffolds Download Data Manager tool, and are also available at the PlantTribes GitHub `repository`_. Default settings shown in the template are used if 'No' is selected.
230 - **Rates clustering configuration** - select 'Yes' to estimate clusters of synonymous substitution rates using a mixture of multivariate normal distributions which represent putative duplication event(s).
231
232 - **Number of components** - number of components to include in the normal mixture model.
233
234 - **Lower limit synonymous substitution rates configuration** - select 'Yes' to set the minimum allowable synonymous substitution rate to use in the normal mixtures cluster analysis to exclude young paralogs that arise from normal gene births and deaths in a genome.
235
236 - **Minimum rate** - minimum allowable synonymous substitution rate.
237
238 - **Upper limit synonymous substitution rates configuration** - select 'Yes' to set the maximum allowable synonymous substitution rate to use in the normal mixtures cluster analysis to exclude likely ancient paralogs in a genome.
239
240 - **Maximum rate** - maximum allowable synonymous substitution rate.
241
242 .. _repository: https://github.com/dePamphilis/PlantTribes/blob/master/config/codeml.ctl.args
243
244 </help>
245 <citations>
246 <expand macro="citation1" />
247 <citation type="bibtex">
248 @article{Wall2008,
249 journal = {Nucleic Acids Research},
250 author = {2. Wall PK, Leebens-Mack J, Muller KF, Field D, Altman NS},
251 title = {PlantTribes: a gene and gene family resource for comparative genomics in plants},
252 year = {2008},
253 volume = {36},
254 number = {suppl 1},
255 pages = {D970-D976},}
256 </citation>
257 <citation type="bibtex">
258 @article{Altschul1990,
259 journal = {Journal of molecular biology}
260 author = {3. Altschul SF, Gish W, Miller W, Myers EW, Lipman DJ},
261 title = {Basic local alignment search tool},
262 year = {1990},
263 volume = {215},
264 number = {3},
265 pages = {403-410},}
266 </citation>
267 <citation type="bibtex">
268 @article{Katoh2013,
269 journal = {Molecular biology and evolution},
270 author = {4. Katoh K, Standley DM},
271 title = {MAFFT multiple sequence alignment software version 7: improvements in performance and usability},
272 year = {2013},
273 volume = {30},
274 number = {4},
275 pages = {772-780},}
276 </citation>
277 <citation type="bibtex">
278 @article{Yang2007,
279 journal = {Molecular biology and evolution},
280 author = {5. Yang Z},
281 title = {PAML 4: phylogenetic analysis by maximum likelihood},
282 year = {2007},
283 volume = {24},
284 number = {8},
285 pages = {1586-1591},}
286 </citation>
287 <citation type="bibtex">
288 @article{McLachlan1999,
289 journal = {Journal of Statistical Software},
290 author = {6. McLachlan GJ, Peel D, Basford KE, Adams P},
291 title = {The EMMIX software for the fitting of mixtures of normal and t-components},
292 year = {1999},
293 volume = {4},
294 number = {2},
295 pages = {1-14},}
296 </citation>
297 <citation type="bibtex">
298 @article{Cui2006,
299 journal = {Genome Research},
300 author = {7. Cui L, Wall PK, Leebens-Mack JH, Lindsay BG, Soltis DE, Doyle JJ, Soltis PS, Carlson JE, Arumuganathan K, Barakat A, Albert VA},
301 title = {Widespread genome duplications throughout the history of flowering plants},
302 year = {2006},
303 volume = {16},
304 number = {6},
305 pages = {738-749},}
306 </citation>
307 </citations>
308 </tool>