2
|
1 <tool id="chewBBACA" name="chewBBACA" version="2.0">
|
|
2 <description>BSR-Based Allele Calling Algorithm
|
|
3 </description>
|
|
4 <requirements>
|
|
5 <requirement type="package" version="3.6">Python</requirement>
|
|
6 <requirement type="package" version="1.15.3">numpy</requirement>
|
|
7 <requirement type="package" version="0.19.1">scipy</requirement>
|
|
8 <requirement type="package" version="5.22.2.1">perl</requirement>
|
|
9 <requirement type="package" version="1.72">biopython</requirement>
|
|
10 <requirement type="package" version="3.8.1">plotly</requirement>
|
|
11 <requirement type="package" version="1.8.2">SPARQLWrapper</requirement>
|
|
12 <requirement type="package" version="0.23.4">pandas</requirement>
|
|
13 <requirement type="package" version="2.5.0">blast</requirement>
|
|
14 <requirement type="package" version="2.6.3">prodigal</requirement>
|
|
15 <requirement type="package" version="2.1">clustalw</requirement>
|
|
16 <requirement type="package" version="7.313">mafft</requirement>
|
|
17 <requirement type="package" version="2.0.16">chewbbaca</requirement>
|
|
18 </requirements>
|
|
19 <!-- basic error handling -->
|
|
20 <stdio>
|
|
21 <!-- Assume anything other than zero is an error -->
|
|
22 <exit_code range="1:" level="fatal" description="Tool exception" />
|
|
23 <exit_code range=":-1" level="fatal" description="Tool exception" />
|
|
24 </stdio>
|
|
25 <command>
|
|
26 perl
|
|
27 $__tool_directory__/chewBBACA.pl $selectFunction.myFunctions
|
|
28 #if str( $selectFunction.myFunctions ) in ('CreateSchema', 'AlleleCall')
|
|
29 '${",".join(map(str, $selectFunction.input1))}'
|
|
30 #elif str( $selectFunction.myFunctions ) in ('SchemaEvaluator', 'TestGenomeQuality', 'ExtractCgMLST', 'RemoveGenes')
|
|
31 $selectFunction.input1
|
|
32 #else
|
|
33 'NULL'
|
|
34 #end if
|
|
35 #if str( $selectFunction.myFunctions ) == 'AlleleCall'
|
|
36 #set $input_names = []
|
|
37 #for $input in $selectFunction.input1
|
|
38 $input_names.append($input.element_identifier)
|
|
39 #end for
|
|
40 #set $input_name = ",".join(map(str, $input_names))
|
|
41 '$input_name'
|
|
42 #else
|
|
43 'NULL'
|
|
44 #end if
|
|
45 #if str( $selectFunction.myFunctions ) == 'CreateSchema'
|
|
46 '$schema'
|
|
47 #elif str( $selectFunction.myFunctions ) == 'AlleleCall'
|
|
48 '$statistics,$contigsinfo,$alleles,$logginginfo,$repeatedloci'
|
|
49 #elif str( $selectFunction.myFunctions ) == 'SchemaEvaluator'
|
|
50 '$schemaplot'
|
|
51 #elif str( $selectFunction.myFunctions ) == 'TestGenomeQuality'
|
|
52 '$thresholdplot,$removedgenomes'
|
|
53 #elif str( $selectFunction.myFunctions ) == 'RemoveGenes'
|
|
54 '$removedgenes'
|
|
55 #else
|
|
56 'NULL'
|
|
57 #end if
|
|
58
|
|
59 "chewBBACA.py $selectFunction.myFunctions
|
|
60 #if str( $selectFunction.myFunctions ) in ('CreateSchema', 'AlleleCall', 'TestGenomeQuality', 'ExtractCgMLST')
|
|
61 -o output_dir
|
|
62 #end if
|
|
63 #if str( $selectFunction.myFunctions ) == 'RemoveGenes'
|
|
64 -o output_removegenes
|
|
65 #end if
|
|
66 #if str( $selectFunction.myFunctions ) in ('CreateSchema', 'AlleleCall')
|
|
67 -i input_dir
|
|
68 --cpu \${GALAXY_SLOTS:-4}
|
|
69 #end if
|
|
70 #if str( $selectFunction.myFunctions ) in ('SchemaEvaluator', 'TestGenomeQuality', 'ExtractCgMLST', 'RemoveGenes')
|
|
71 -i $selectFunction.input1
|
|
72 #end if
|
|
73 #if str( $selectFunction.myFunctions ) in ('CreateSchema', 'AlleleCall')
|
|
74 #if $selectFunction.bsr
|
|
75 --bsr $selectFunction.bsr
|
|
76 #end if
|
|
77
|
|
78 #if $selectFunction.ptfmode.ptf_select=="system"
|
|
79 --ptf ${ filter( lambda x: str( x[0] ) == str( $selectFunction.ptfmode.ptf_system ), $__app__.tool_data_tables[ 'chewbbaca_ptfs' ].get_fields() )[0][-1] }
|
|
80 #elif $selectFunction.ptfmode.ptf_select=="user"
|
|
81 --ptf $selectFunction.ptfmode.ptf_user
|
|
82 #end if
|
|
83 #end if
|
|
84 #if str( $selectFunction.myFunctions ) == 'CreateSchema'
|
|
85 #if $selectFunction.minBpLocus
|
|
86 -l $selectFunction.minBpLocus
|
|
87 #end if
|
|
88 #end if
|
|
89 #if str( $selectFunction.myFunctions ) == 'AlleleCall'
|
|
90 #if $selectFunction.mode.schema_select=="system"
|
|
91 -g ${ filter( lambda x: str( x[0] ) == str( $selectFunction.mode.reference ), $__app__.tool_data_tables[ 'chewbbaca_schemas' ].get_fields() )[0][-1] }
|
|
92 #else
|
|
93 -g $selectFunction.mode.genes
|
|
94 #end if
|
|
95 #end if
|
|
96 #if str( $selectFunction.myFunctions ) == 'SchemaEvaluator'
|
|
97 --cpu \${GALAXY_SLOTS:-4}
|
|
98 #if $selectFunction.conserved
|
|
99 -p
|
|
100 #end if
|
|
101 -l output_rms/SchemaEvaluator.html
|
|
102 -ta $selectFunction.ncbiTA
|
|
103 -t $selectFunction.threshold
|
|
104 #if $selectFunction.title
|
|
105 --title '$selectFunction.title'
|
|
106 #end if
|
|
107 -s $selectFunction.numBoxplots
|
|
108 #if $selectFunction.light
|
|
109 --light
|
|
110 #end if
|
|
111 #end if
|
|
112 #if str( $selectFunction.myFunctions ) == 'TestGenomeQuality'
|
|
113 -n $selectFunction.maxNumIterations
|
|
114 -t $selectFunction.maxThreshold
|
|
115 -s $selectFunction.stepThreshold
|
|
116 #end if
|
|
117 #if str( $selectFunction.myFunctions ) == 'ExtractCgMLST'
|
|
118 #if $selectFunction.genes
|
|
119 -r $selectFunction.genes
|
|
120 #end if
|
|
121 #if $selectFunction.genomes
|
|
122 -g $selectFunction.genomes
|
|
123 #end if
|
|
124 #if $selectFunction.maxPresence
|
|
125 -p $selectFunction.maxPresence
|
|
126 #end if
|
|
127 #end if
|
|
128 #if str( $selectFunction.myFunctions ) == 'RemoveGenes'
|
|
129 -g $selectFunction.genes
|
|
130 #end if
|
|
131 "
|
|
132 </command>
|
|
133
|
|
134 <inputs>
|
|
135 <conditional name="selectFunction">
|
|
136 <param name="myFunctions" type="select" label="Select function">
|
|
137 <option value="CreateSchema">CreateSchema: Create a gene by gene schema based on genomes</option>
|
|
138 <option value="AlleleCall" selected="true">AlleleCall: Perform allele call for target genomes</option>
|
|
139 <option value="SchemaEvaluator">SchemaEvaluator: Tool that builds an html output to better navigate/visualize your schema</option>
|
|
140 <option value="TestGenomeQuality">TestGenomeQuality: Analyze your allele call output to refine schemas</option>
|
|
141 <option value="ExtractCgMLST">ExtractCgMLST: Select a subset of loci without missing data (to be used as PHYLOViZ input)</option>
|
|
142 <option value="RemoveGenes">RemoveGenes: Remove a provided list of loci from your allele call output</option>
|
|
143 </param>
|
|
144 <when value="CreateSchema">
|
|
145 <param name="input1" format="fasta" type="data" multiple="true" label="Selection of genome files (fasta)" />
|
|
146 <param name="bsr" type="text" value="0.6" optional="true" label="minimum BSR score" />
|
|
147 <conditional name="ptfmode">
|
|
148 <param name="ptf_select" type="select" display="radio" label="Which Prodigal training file would you like to use?">
|
|
149 <option value="system" selected="True">System training file</option>
|
|
150 <option value="user">Own training file</option>
|
|
151 </param>
|
|
152 <when value="system">
|
|
153 <param name="ptf_system" type="select" label="Choose Prodigal training file">
|
|
154 <options from_data_table="chewbbaca_ptfs" />
|
|
155 </param>
|
|
156 </when>
|
|
157 <when value="user">
|
|
158 <param name="ptf_user" format="binary" type="data" optional="true" multiple="false" label="Prodigal Training File" />
|
|
159 </when>
|
|
160 </conditional>
|
|
161 <param name="minBpLocus" type="integer" value="200" optional="true" label="minimum bp locus lenght" help="Integer" />
|
|
162 </when>
|
|
163 <when value="AlleleCall">
|
|
164 <param name="input1" format="fasta" type="data" multiple="true" label="Selection of genome files (fasta)" />
|
|
165 <conditional name="mode">
|
|
166 <param name="schema_select" type="select" display="radio" label="Which schema would you like to use as a reference?">
|
|
167 <option value="system" selected="True">System reference</option>
|
|
168 <option value="user">Own reference</option>
|
|
169 </param>
|
|
170 <when value="system">
|
|
171 <param name="reference" type="select" label="Choose reference">
|
|
172 <options from_data_table="chewbbaca_schemas" />
|
|
173 </param>
|
|
174 </when>
|
|
175 <when value="user">
|
|
176 <param name="genes" format="txt" type="data" label="File with list of genes (fasta)" />
|
|
177 </when>
|
|
178 </conditional>
|
|
179 <param name="bsr" type="text" value="0.6" optional="true" label="minimum BSR score" />
|
|
180 <conditional name="ptfmode">
|
|
181 <param name="ptf_select" type="select" display="radio" label="Which Prodigal training file would you like to use?">
|
|
182 <option value="system">System training file</option>
|
|
183 <option value="user">Own training file</option>
|
|
184 <option value="noptf" selected="True">No training file</option>
|
|
185 </param>
|
|
186 <when value="system">
|
|
187 <param name="ptf_system" type="select" label="Choose Prodigal training file">
|
|
188 <options from_data_table="chewbbaca_ptfs" />
|
|
189 </param>
|
|
190 </when>
|
|
191 <when value="user">
|
|
192 <param name="ptf_user" format="binary" type="data" optional="true" multiple="false" label="Prodigal Training File" />
|
|
193 </when>
|
|
194 <when value="noptf" />
|
|
195 </conditional>
|
|
196 <!--<param name="forceContinue" type="boolean" truevalue="true" falsevalue="false" checked="False" label="force continue" />-->
|
|
197 <!--<param name="forceReset" type="boolean" truevalue="true" falsevalue="false" checked="False" label="force reset" />-->
|
|
198 <!--<param name="jsonFile" type="boolean" truevalue="true" falsevalue="false" checked="False" label="report in json file" />-->
|
|
199 </when>
|
|
200 <when value="SchemaEvaluator">
|
|
201 <param name="input1" format="txt" type="data" label="File with list of genes (fasta)" />
|
|
202 <param name="conserved" type="boolean" truevalue="true" falsevalue="false" checked="False" label="One bad allele still makes gene conserved" />
|
|
203 <param name="ncbiTA" type="integer" value="11" optional="true" label="ncbi translation table" help="Integer" />
|
|
204 <param name="threshold" type="float" value="0.05" optional="true" label="Threshold" />
|
|
205 <param name="title" type="text" value="My Analyzed wg/cg MLST Schema - Rate My Schema" optional="true" label="title on the html plot" />
|
|
206 <param name="numBoxplots" type="integer" value="500" optional="true" label="number of boxplots per page (more than 500 can make the page very slow)" help="Integer" />
|
|
207 <param name="light" type="boolean" truevalue="true" falsevalue="false" checked="False" label="skip clustal and mafft run" />
|
|
208 </when>
|
|
209 <when value="TestGenomeQuality">
|
|
210 <param name="input1" format="tsv" type="data" label="raw allele call matrix file" />
|
|
211 <param name="maxNumIterations" type="integer" value="12" label="maximum number of iterations" help="Each iteration removes a set of genomes over the defined threshold (-t) and recalculates all loci presence percentages" />
|
|
212 <param name="maxThreshold" type="integer" value="200" label="maximum threshold of bad calls above 95 percent" help="This threshold represents the maximum number of missing loci allowed, for each genome independently, before removing it (genome)" />
|
|
213 <param name="stepThreshold" type="integer" value="5" label="step between each threshold analysis (suggested 5)" help="Integer" />
|
|
214 </when>
|
|
215 <when value="ExtractCgMLST">
|
|
216 <param name="input1" format="tsv" type="data" label="input file to clean" />
|
|
217 <param name="genes" format="txt" type="data" optional="true" label="list of genes to remove, one per line" help="e.g. the list of gene detected by ParalogPrunning.py" />
|
|
218 <param name="genomes" format="txt" type="data" optional="true" label="list of genomes to remove, one per line" help="e.g. list of genomes to be removed selected based on testGenomeQuality results" />
|
|
219 <param name="maxPresence" type="float" value="1.0" optional="true" label="maximum presence (e.g 0.95)" >
|
|
220 <validator type="in_range" min="0" max="1" />
|
|
221 </param>
|
|
222 </when>
|
|
223 <when value="RemoveGenes">
|
|
224 <param name="input1" format="tsv" type="data" label="main matrix file from which to remove" />
|
|
225 <param name="genes" format="txt" type="data" label="list of genes to remove" />
|
|
226 </when>
|
|
227 </conditional>
|
|
228 </inputs>
|
|
229
|
|
230 <!-- define outputs -->
|
|
231 <outputs>
|
|
232 <data format="txt" name="schema" label="${tool.name}:CreateSchema on ${on_string}" >
|
|
233 <filter>selectFunction['myFunctions'] == "CreateSchema"</filter>
|
|
234 </data>
|
|
235 <data format="tsv" name="statistics" label="${tool.name}:AlleleCall on ${on_string}: Statistics" >
|
|
236 <filter>selectFunction['myFunctions'] == "AlleleCall"</filter>
|
|
237 </data>
|
|
238 <data format="tsv" name="contigsinfo" label="${tool.name}:AlleleCall on ${on_string}: Contigs Info" >
|
|
239 <filter>selectFunction['myFunctions'] == "AlleleCall"</filter>
|
|
240 </data>
|
|
241 <data format="tsv" name="alleles" label="${tool.name}:AlleleCall on ${on_string}: Alleles" >
|
|
242 <filter>selectFunction['myFunctions'] == "AlleleCall"</filter>
|
|
243 </data>
|
|
244 <data format="txt" name="logginginfo" label="${tool.name}:AlleleCall on ${on_string}: Logging info" >
|
|
245 <filter>selectFunction['myFunctions'] == "AlleleCall"</filter>
|
|
246 </data>
|
|
247 <data format="txt" name="repeatedloci" label="${tool.name}:AlleleCall on ${on_string}: Repeated Loci" >
|
|
248 <filter>selectFunction['myFunctions'] == "AlleleCall"</filter>
|
|
249 </data>
|
|
250 <data format="tar" name="schemaplot" label="${tool.name}:SchemaEvaluator on ${on_string}" >
|
|
251 <filter>selectFunction['myFunctions'] == "SchemaEvaluator"</filter>
|
|
252 </data>
|
|
253 <data format="html" name="thresholdplot" from_work_dir="output_dir/GenomeQualityPlot.html" label="${tool.name}:TestGenomeQuality on ${on_string}: Plot" >
|
|
254 <filter>selectFunction['myFunctions'] == "TestGenomeQuality"</filter>
|
|
255 </data>
|
|
256 <data format="tsv" name="removedgenomes" from_work_dir="output_dir/removedGenomes.txt" label="${tool.name}:TestGenomeQuality on ${on_string}: Removed genomes" >
|
|
257 <filter>selectFunction['myFunctions'] == "TestGenomeQuality"</filter>
|
|
258 </data>
|
|
259 <data format="tsv" name="cgmlst" from_work_dir="output_dir/cgMLST.tsv" label="${tool.name}:ExtractCgMLST on ${on_string}: cgMLST" >
|
|
260 <filter>selectFunction['myFunctions'] == "ExtractCgMLST"</filter>
|
|
261 </data>
|
|
262 <data format="txt" name="cgmlstschema" from_work_dir="output_dir/cgMLSTschema.txt" label="${tool.name}:ExtractCgMLST on ${on_string}: cgMLSTschema" >
|
|
263 <filter>selectFunction['myFunctions'] == "ExtractCgMLST"</filter>
|
|
264 </data>
|
|
265 <data format="tsv" name="removedgenes" from_work_dir="output_removegenes.tsv" label="${tool.name}:RemoveGenes on ${on_string}" >
|
|
266 <filter>selectFunction['myFunctions'] == "RemoveGenes"</filter>
|
|
267 </data>
|
|
268 </outputs>
|
|
269 <tests>
|
|
270 <test>
|
|
271 <param name="selectFunction['myFunctions']" value="AlleleCall" />
|
|
272 <param name="input1" value="a_contigs.fasta" ftype="fasta" />
|
|
273 <param name="selectFunction.mode['schema_select']" value="system" />
|
|
274 <param name="selectFunction.mode['reference']" value="schema_pubMLST" />
|
|
275 <param name="selectFunction.mode['ptf_select']" value="noptf" />
|
|
276 <output name="statistics" >
|
|
277 <assert_contents>
|
|
278 <has_text text="a_contigs.fasta" />
|
|
279 </assert_contents>
|
|
280 </output>
|
|
281 </test>
|
|
282 </tests>
|
|
283 <help>
|
|
284 **chewBBACA** stands for "BSR-Based Allele Calling Algorithm". The "chew" part could be thought of as "Comprehensive and Highly Efficient Workflow"
|
|
285 but at this point still it needs a bit of work to make that claim so we just add "chew" to add extra coolness to the software name.
|
|
286
|
|
287 The development of the tools have been supported by INNUENDO project (https://www.innuendoweb.org) co-funded by the European Food Safety Authority (EFSA), grant agreement GP/EFSA/AFSCO/2015/01/CT2
|
|
288 ("New approaches in identifying and characterizing microbial and chemical hazards") and by the ONEIDA project (LISBOA-01-0145-FEDER-016417) co-funded by FEEI - “Fundos Europeus Estruturais e de Investimento”
|
|
289 from “Programa Operacional Regional Lisboa 2020” and by national funds from FCT - “Fundação para a Ciência e a Tecnologia” and BacGenTrack (TUBITAK/0004/2014)
|
|
290 [FCT/ Scientific and Technological Research Council of Turkey (Türkiye Bilimsel ve Teknolojik Araşrrma Kurumu, TÜBITAK)].
|
|
291 </help>
|
|
292 <citations>
|
|
293 <citation type="bibtex">@ARTICLE{andrews_s,
|
|
294 author = {Rossi, M and Silva, M and Ribeiro-Gonçalves, B and Silva, DN and Machado, MP and Oleastro, M and Borges, V and Isidro, J and Gomes, JP and Vieira, L and Barker, DOR and Llarena, AK and Halkilahti,
|
|
295 J and Jaakkonen, A and Palma, F and Culebro, A and Kivistö, R and Hänninen, ML and Laukkanen-Ninios, R and Fredriksson-Ahomaa, M and Salmenlinna, S and Hakkinen, M and Garaizer, J and Bikandi, J and Hilbert,
|
|
296 F and Taboada, EN and Carriço, JA},
|
|
297 keywords = {bioinformatics, ngs, mlst},
|
|
298 title = {{INNUENDO whole and core genome MLST databases and schemas for foodborne pathogens}},
|
|
299 url = {https://github.com/TheInnuendoProject/chewBBACA_schemas}
|
|
300 }</citation>
|
|
301 <citation type="doi">10.1099/mgen.0.000166</citation>
|
|
302 <citation type="doi">10.1371/journal.pgen.1007261</citation>
|
|
303 </citations>
|
|
304 </tool>
|