comparison gene_family_classifier.xml @ 0:f8603464bea7 draft

Uploaded
author greg
date Thu, 08 Jun 2017 12:48:23 -0400
parents
children 95cb5d32a3b4
comparison
equal deleted inserted replaced
-1:000000000000 0:f8603464bea7
1 <tool id="plant_tribes_gene_family_classifier" name="GeneFamilyClassifier" version="@WRAPPER_VERSION@.0">
2 <description>classifies gene sequences into pre-computed orthologous gene family clusters</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements_gene_family_classifier" />
7 <command detect_errors="exit_code"><![CDATA[
8 #if str($options_type.options_type_selector) == 'advanced':
9 #set specify_super_orthogroups_cond = $options_type.specify_super_orthogroups_cond
10 #set specify_super_orthogroups = $specify_super_orthogroups_cond.specify_super_orthogroups
11 #set create_orthogroup_cond = $options_type.create_orthogroup_cond
12 #set create_orthogroup = $create_orthogroup_cond.create_orthogroup
13 #set specify_single_copy_cond = $options_type.specify_single_copy_cond
14 #set specify_single_copy = $specify_single_copy_cond.specify_single_copy
15 #if str($specify_super_orthogroups) == 'yes':
16 #set specify_super_orthos = True
17 #set super_orthogroups = $specify_super_orthogroups_cond.super_orthogroups
18 #else:
19 #set specify_super_orthos = False
20 #end if
21 #if str($create_orthogroup) == 'yes':
22 #set create_ortho_sequences = True
23 #set create_corresponding_coding_sequences_cond = $create_orthogroup_cond.create_corresponding_coding_sequences_cond
24 #if str($create_corresponding_coding_sequences_cond.create_corresponding_coding_sequences) == 'yes':
25 #set create_corresponding_coding_sequences = True
26 #else:
27 #set create_corresponding_coding_sequences = False
28 #end if
29 #else:
30 #set create_ortho_sequences = False
31 #set create_corresponding_coding_sequences = False
32 #end if
33 #if str($specify_single_copy) == 'yes':
34 #set single_copy_orthogroup = True
35 #set single_copy_cond = $specify_single_copy_cond.single_copy_cond
36 #set single_copy = $single_copy_cond.single_copy
37 #else:
38 #set single_copy_orthogroup = False
39 #end if
40 #else:
41 #set single_copy_orthogroup = False
42 #set create_ortho_sequences = False
43 #set create_corresponding_coding_sequences = False
44 #end if
45
46 python $__tool_directory__/gene_family_classifier.py
47 --input '$input'
48 --scaffold '$scaffold.fields.path'
49 --method $method
50 --classifier $save_hmmscan_log_cond.classifier
51 --config_dir '$scaffold.fields.path'
52 --num_threads \${GALAXY_SLOTS:-4}
53
54 #if str($options_type.options_type_selector) == 'advanced':
55 #if specify_super_orthos:
56 --super_orthogroups $super_orthogroups
57 #end if
58 #if $single_copy_orthogroup:
59 #if str($single_copy) == 'custom':
60 #set single_copy_custom_cond = $single_copy_cond.single_copy_custom_cond
61 #set single_copy_custom = $single_copy_custom_cond.single_copy_custom
62 #if str($single_copy_custom) == 'no':
63 --single_copy_custom 'default'
64 #else:
65 --single_copy_custom '$single_copy_custom_cond.single_copy_custom_config'
66 #end if
67 #else:
68 #if str($single_copy_cond.single_copy_taxa):
69 --single_copy_taxa $single_copy_cond.single_copy_taxa
70 #end if
71 #if str($single_copy_cond.taxa_present):
72 --taxa_present $single_copy_cond.taxa_present
73 #end if
74 #end if
75 #end if
76 #if $create_ortho_sequences:
77 --orthogroup_fasta "true"
78 #if $create_corresponding_coding_sequences:
79 --coding_sequences '$create_corresponding_coding_sequences_cond.coding_sequences'
80 #end if
81 #end if
82 #end if
83
84 #if (str($save_hmmscan_log_cond.classifier) == 'hmmscan' or str($save_hmmscan_log_cond.classifier) == 'both') and str($save_hmmscan_log_cond.save_hmmscan_log) == 'yes':
85 --save_hmmscan_log 'true'
86 --hmmscan_log '$hmmscan_log'
87 #end if
88 #if $create_ortho_sequences:
89 #if $create_corresponding_coding_sequences:
90 --output_ptorthocs '$output_ptorthocs'
91 --output_ptorthocs_dir '$output_ptorthocs.files_path'
92 #else:
93 --output_ptortho '$output_ptortho'
94 --output_ptortho_dir '$output_ptortho.files_path'
95 #end if
96 #end if
97 #if $single_copy_orthogroup:
98 #if $create_ortho_sequences:
99 --output_ptsco '$output_ptsco'
100 --output_ptsco_dir '$output_ptsco.files_path'
101 #end if
102 #end if
103 ]]></command>
104 <inputs>
105 <param name="input" format="fasta" type="data" label="Proteins fasta file"/>
106 <param name="scaffold" type="select" label="Gene family scaffold">
107 <options from_data_table="plant_tribes_scaffolds" />
108 <validator type="no_options" message="No PlantTribes scaffolds are available. Use the PlantTribes Scaffolds Download Data Manager tool in Galaxy to install and populate the PlantTribes scaffolds data table."/>
109 </param>
110 <param name="method" type="select" label="Protein clustering method">
111 <option value="gfam" selected="true">GFam</option>
112 <option value="orthofinder">OrthoFinder</option>
113 <option value="orthomcl">OrthoMCL</option>
114 </param>
115 <conditional name="save_hmmscan_log_cond">
116 <param name="classifier" type="select" label="Protein classifier">
117 <option value="blastp" selected="true">blastp</option>
118 <option value="hmmscan">hmmscan</option>
119 <option value="both">Both blastp and hmmscan</option>
120 </param>
121 <when value="blastp" />
122 <when value="hmmscan">
123 <param name="save_hmmscan_log" type="select" label="Save hmmscan log?">
124 <option value="no" selected="true">No</option>
125 <option value="yes">Yes</option>
126 </param>
127 </when>
128 <when value="both">
129 <param name="save_hmmscan_log" type="select" label="Save hmmscan log?">
130 <option value="no" selected="true">No</option>
131 <option value="yes">Yes</option>
132 </param>
133 </when>
134 </conditional>
135 <conditional name="options_type">
136 <param name="options_type_selector" type="select" label="Options configuration">
137 <option value="basic" selected="true">Basic</option>
138 <option value="advanced">Advanced</option>
139 </param>
140 <when value="basic" />
141 <when value="advanced">
142 <conditional name="specify_super_orthogroups_cond">
143 <param name="specify_super_orthogroups" type="select" label="Super orthogroups configuration">
144 <option value="no" selected="true">No</option>
145 <option value="yes">Yes</option>
146 </param>
147 <when value="no"/>
148 <when value="yes">
149 <param name="super_orthogroups" type="select" label="Clustering distance measure">
150 <option value="min_evalue" selected="true">minimum e-value</option>
151 <option value="avg_evalue">average e-value</option>
152 </param>
153 </when>
154 </conditional>
155 <conditional name="specify_single_copy_cond">
156 <param name="specify_single_copy" type="select" label="Single copy orthogroups configuration">
157 <option value="no" selected="true">No</option>
158 <option value="yes">Yes</option>
159 </param>
160 <when value="no"/>
161 <when value="yes">
162 <conditional name="single_copy_cond">
163 <param name="single_copy" type="select" label="Selection criterion">
164 <option value="taxa" selected="true">Global selection</option>
165 <option value="custom">Custom selection</option>
166 </param>
167 <when value="custom">
168 <conditional name="single_copy_custom_cond">
169 <param name="single_copy_custom" type="select" label="Custom selection configuration">
170 <option value="no" selected="true">No</option>
171 <option value="yes">Yes</option>
172 </param>
173 <when value="no"/>
174 <when value="yes">
175 <param name="single_copy_custom_config" format="txt" type="data" label="Custom selection file"/>
176 </when>
177 </conditional>
178 </when>
179 <when value="taxa">
180 <param name="single_copy_taxa" type="integer" optional="true" min="0" label="Minimum single copy taxa"/>
181 <param name="taxa_present" type="integer" optional="true" min="0" label="Minimum taxa present"/>
182 </when>
183 </conditional>
184 </when>
185 </conditional>
186 <conditional name="create_orthogroup_cond">
187 <param name="create_orthogroup" type="select" label="Orthogroups fasta configuration">
188 <option value="no" selected="true">No</option>
189 <option value="yes">Yes</option>
190 </param>
191 <when value="no" />
192 <when value="yes">
193 <conditional name="create_corresponding_coding_sequences_cond">
194 <param name="create_corresponding_coding_sequences" type="select" label="Orthogroups coding sequences">
195 <option value="no" selected="true">No</option>
196 <option value="yes">Yes</option>
197 </param>
198 <when value="no" />
199 <when value="yes">
200 <param name="coding_sequences" format="fasta" type="data" label="Coding sequences fasta file"/>
201 </when>
202 </conditional>
203 </when>
204 </conditional>
205 </when>
206 </conditional>
207 </inputs>
208 <outputs>
209 <data name="hmmscan_log" format="txt" label="${tool.name} (hmmscan.log) on ${on_string}">
210 <filter>save_hmmscan_log_cond['classifier'] in ['hmmscan', 'both'] and save_hmmscan_log_cond['save_hmmscan_log'] == 'yes'</filter>
211 </data>
212 <data name="output_ptortho" format="ptortho" label="${tool.name} (gene family clusters) on ${on_string}">
213 <filter>options_type['options_type_selector'] == 'advanced' and options_type['create_orthogroup_cond']['create_orthogroup'] == 'yes' and options_type['create_orthogroup_cond']['create_corresponding_coding_sequences_cond']['create_corresponding_coding_sequences'] == 'no'</filter>
214 </data>
215 <data name="output_ptorthocs" format="ptorthocs" label="${tool.name} (gene family clusters) on ${on_string}">
216 <filter>options_type['options_type_selector'] == 'advanced' and options_type['create_orthogroup_cond']['create_orthogroup'] == 'yes' and options_type['create_orthogroup_cond']['create_corresponding_coding_sequences_cond']['create_corresponding_coding_sequences'] == 'yes'</filter>
217 </data>
218 <data name="output_ptsco" format="tabular" label="${tool.name} (single copy orthogroups) on ${on_string}">
219 <filter>options_type['options_type_selector'] == 'advanced' and options_type['create_orthogroup_cond']['create_orthogroup'] == 'yes' and options_type['specify_single_copy_cond']['specify_single_copy'] == 'yes'</filter>
220 <change_format>
221 <when input="options_type.create_orthogroup_cond.create_corresponding_coding_sequences_cond.create_corresponding_coding_sequences" value="no" format="ptortho" />
222 <when input="options_type.create_orthogroup_cond.create_corresponding_coding_sequences_cond.create_corresponding_coding_sequences" value="yes" format="ptorthocs" />
223 </change_format>
224 </data>
225 <collection name="orthos" type="list">
226 <discover_datasets pattern="__name__" directory="geneFamilyClassification_dir" visible="false" ext="tabular" />
227 </collection>
228 </outputs>
229 <tests>
230 <test>
231 <param name="input" value="transcripts.cleaned.nr.pep" ftype="fasta"/>
232 <param name="scaffold" value="22Gv1.1"/>
233 <param name="method" value="orthomcl"/>
234 <param name="classifier" value="both"/>
235 <param name="options_type_selector" value="advanced"/>
236 <param name="create_orthogroup" value="yes"/>
237 <param name="create_corresponding_coding_sequences" value="yes"/>
238 <output name="output_ptorthocs" file="output.ptorthocs" ftype="ptorthocs"/>
239 <output_collection name="orthos" type="list">
240 <element name="proteins.blastp.22Gv1.1" file="proteins.blastp.22Gv1.1" ftype="tabular" compare="contains"/>
241 <element name="proteins.blastp.22Gv1.1.bestOrthos" file="proteins.blastp.22Gv1.1.bestOrthos" ftype="tabular" compare="contains"/>
242 <element name="proteins.both.22Gv1.1.bestOrthos" file="proteins.both.22Gv1.1.bestOrthos" ftype="tabular" compare="contains"/>
243 <element name="proteins.both.22Gv1.1.bestOrthos.summary" file="proteins.both.22Gv1.1.bestOrthos.summary" ftype="tabular" compare="contains"/>
244 <element name="proteins.hmmscan.22Gv1.1" file="proteins.hmmscan.22Gv1.1" ftype="tabular" compare="contains"/>
245 <element name="proteins.hmmscan.22Gv1.1.bestOrthos" file="proteins.hmmscan.22Gv1.1.bestOrthos" ftype="tabular" compare="contains"/>
246 </output_collection>
247 </test>
248 </tests>
249 <help>
250 This tool is one of the PlantTribes collection of automated modular analysis pipelines for comparative and evolutionary
251 analyses of genome-scale gene families and transcriptomes. This tool classifies gene coding sequences either produced by
252 the AssemblyPostProcessor tool or from an external source into pre-computed orthologous gene family clusters (orthogroups)
253 of a PlantTribes scaffold. Classified sequences are then assigned with the corresponding orthogroups’ metadata that includes
254 gene counts of backbone taxa, super clusters (super orthogoups) at multiple stringencies, and functional annotations from
255 sources such as Gene Ontology (GO), InterPro protein domains, TAIR, UniProtKB/TrEMBL, and UniProtKB/Swiss-Prot. Additionally,
256 sequences belonging to single/low-copy gene families that are mainly utilized in species tree inference can be determined.
257
258 -----
259
260 **Required options**
261
262 * **Proteins fasta file** - proteins fasta file either produced by the AssemblyPostProcessor tool or an external source selected from your history.
263 * **Gene family scaffold** - one of the PlantTribes gene family scaffolds [2-4] installed into Galaxy by the PlantTribes Scaffold Data Manager tool.
264 * **Protein clustering method** - gene family scaffold protein clustering method as described in the AssemblyPostProcessor tool.
265 * **Protein classifier** - classifier to assign protein sequences into a specified scaffold orthogroups. PlantTribes implements three classification approaches; blastp (faster)[5], hmmscan (slower but more sensitive assignment of divergent homologs)[6], and both blastp and hmmscan (disagreements resolved in favor of hmmscan; more exhaustive).
266
267 **Other options**
268
269 * **Super orthogroups configuration** - select ‘Yes’ to enable super orthogroups configuration options. Super orthogroups[7] are constructed through a second iteration of MCL clustering to connect distant, but potentially related orthogroup clusters.
270
271 * **Clustering distance measure** - distance measure used in merging orthogroup clusters into super orthogroup clusters. PlantTribes pre-computed super orthogroups are based on the minimum and average blastp e-value between all pairs of scaffold orthogroups used as the input matrix for MCL clustering algorithm[8].
272
273 * **Single copy orthogroups configuration** - select ‘Yes’ to enable single/low-copy orthogroups selection configuration options.
274
275 * **Selection criterion** - single/low-copy orthogroups selection criterion. PlantTribes provides custom and global selection criteria for selecting user-defined single/low-copy scaffold orthogoups.
276
277 * **Global selection configuration** - the upper limit values of the following two parameters vary depending on the selected gene family scaffold, and the tool will produce an error if the value exceeds the number of species in the circumscribed scaffold.
278
279 * **Minimum single copy taxa** - minimum number of taxa with single copy genes in the orthogroup.
280 * **Minimum taxa present** - minimum number of taxa present in the orthogroup.
281
282 * **Custom selection configuration** - select ‘Yes’ to enable selection of a single copy configuration file. Scaffold configuration templates (.singleCopy.config) of how to customize single/low-copy orthogroups selection can be found in the scaffold data installed into Galaxy via the PlantTribes Scaffolds Download Data Manager tool, and also available at the PlantTribes GitHub `repository`_. Single/low-copy settings shown in these templates are used as defaults if ‘No’ is selected.
283
284 * **Custom selection file** - select a single/low-copy customized configuration file from your history.
285
286 * **Orthogroups fasta configuration** - select ‘Yes’ to create proteins orthogroups fasta files for the classified sequences.
287
288 * **Orthogroups coding sequences** - select ‘Yes’ to create corresponding coding sequences orthogroup fasta files for the classified protein sequences. Requires coding sequences fasta file corresponding to the proteins fasta file to be selected from your history.
289
290 * **Coding sequences fasta file** - select coding sequences fasta file corresponding to the proteins fasta file from your history.
291
292 .. _repository: https://github.com/dePamphilis/PlantTribes/tree/master/config
293
294 </help>
295 <citations>
296 <expand macro="citation1" />
297 <citation type="bibtex">
298 @article{Sasidharan2012,
299 journal = {Nucleic Acids Research},
300 author = {2. Sasidharan R, Nepusz T, Swarbreck D, Huala E, Paccanaro A},
301 title = {GFam: a platform for automatic annotation of gene families},
302 year = {2012},
303 pages = {gks631},}
304 </citation>
305 <citation type="bibtex">
306 @article{Li2003,
307 journal = {Genome Research}
308 author = {3. Li L, Stoeckert CJ, Roos DS},
309 title = {OrthoMCL: identification of ortholog groups for eukaryotic genomes},
310 year = {2003},
311 volume = {13},
312 number = {9},
313 pages = {2178-2189},}
314 </citation>
315 <citation type="bibtex">
316 @article{Emms2015,
317 journal = {Genome Biology}
318 author = {4. Emms DM, Kelly S},
319 title = {OrthoFinder: solving fundamental biases in whole genome comparisons dramatically improves orthogroup inference accuracy},
320 year = {2015},
321 volume = {16},
322 number = {1},
323 pages = {157},}
324 </citation>
325 <citation type="bibtex">
326 @article{Altschul1990,
327 journal = {Journal of molecular biology}
328 author = {5. Altschul SF, Gish W, Miller W, Myers EW, Lipman DJ},
329 title = {Basic local alignment search tool},
330 year = {1990},
331 volume = {215},
332 number = {3},
333 pages = {403-410},}
334 </citation>
335 <citation type="bibtex">
336 @article{Eddy2009,
337 journal = {Genome Inform},
338 author = {6. Eddy SR},
339 title = {A new generation of homology search tools based on probabilistic inference},
340 year = {2009},
341 volume = {23},
342 number = {1},
343 pages = {205-211},}
344 </citation>
345 <citation type="bibtex">
346 @article{Wall2008,
347 journal = {Nucleic Acids Research},
348 author = {7. Wall PK, Leebens-Mack J, Muller KF, Field D, Altman NS},
349 title = {PlantTribes: a gene and gene family resource for comparative genomics in plants},
350 year = {2008},
351 volume = {36},
352 number = {suppl 1},
353 pages = {D970-D976},}
354 </citation>
355 <citation type="bibtex">
356 @article{Enright2002,
357 journal = {Nucleic acids research},
358 author = {8. Enright AJ, Van Dongen S, Ouzounis CA},
359 title = {n efficient algorithm for large-scale detection of protein families},
360 year = {2002},
361 volume = {30},
362 number = {7},
363 pages = {1575-1584},}
364 </citation>
365 </citations>
366 </tool>