comparison sistr_cmd.xml @ 0:ebee10be4297 draft

planemo upload commit 1ea98fb88a93a571beda5bbd56449c946860a258
author nml
date Wed, 01 Mar 2017 12:35:39 -0500
parents
children 9d7e381dfa5a
comparison
equal deleted inserted replaced
-1:000000000000 0:ebee10be4297
1 <tool id="sistr_cmd" name="sistr_cmd" version="0.3.4">
2 <description>
3 Salmonella In Silico Typing Resource commandline tool for serovar prediction
4 </description>
5 <requirements>
6 <requirement type="package" version="0.3.4">sistr_cmd</requirement>
7 </requirements>
8 <stdio>
9 <exit_code range="1:" />
10 </stdio>
11 <command><![CDATA[
12 sistr
13 #for $fasta in $input_fastas
14 -i '$fasta' '${$fasta.name.replace("." + $fasta.ext, "")}'
15 #end for
16 -f $output_format
17 #if $output_format == "tab"
18 -o sistr-report.tab
19 #elif $output_format == "csv"
20 -o sistr-report.csv
21 #elif $output_format == "json"
22 -o sistr-report.json
23 #end if
24 -p $cgmlst_profiles
25 -n $novel_alleles
26 -a $alleles_output
27 $use_full_cgmlst_db
28 $no_cgmlst
29 $run_mash
30 $qc
31 --threads "\${GALAXY_SLOTS:-1}"
32 -T "\${TMPDIR:-/tmp}"
33 $keep_tmp
34 $verbosity
35 ]]></command>
36 <inputs>
37 <param
38 name="input_fastas"
39 type="data"
40 label="Input Genome(s)"
41 optional="false"
42 multiple="true"
43 format="fasta"
44 />
45 <param
46 name="output_format"
47 type="select"
48 label="Results output format"
49 multiple="false">
50 <option value="tab" selected="true">
51 Tabular (tab-delimited values)
52 </option>
53 <option value="csv">
54 CSV (Comma Separated Values)
55 </option>
56 <option value="json">
57 JSON (JavaScript Object Notation)
58 </option>
59 </param>
60 <param
61 name="use_full_cgmlst_db"
62 type="boolean"
63 checked="false"
64 truevalue="--use-full-cgmlst-db"
65 falsevalue=""
66 label="Use full cgMLST database for serovar prediction. About 10X slower with equivalent results to reduced centroid allele database."
67 />
68 <param
69 name="run_mash"
70 type="boolean"
71 checked="true"
72 truevalue="--run-mash"
73 falsevalue=""
74 label="Run Mash MinHash-based serovar prediction"
75 />
76 <param
77 name="no_cgmlst"
78 type="boolean"
79 checked="false"
80 truevalue="--no-cgmlst"
81 falsevalue=""
82 label="Skip running cgMLST-based serovar prediction"
83 />
84 <param
85 name="qc"
86 type="boolean"
87 checked="true"
88 truevalue="--qc"
89 falsevalue=""
90 label="Basic QC of results"
91 />
92 <param
93 name="keep_tmp"
94 type="boolean"
95 checked="false"
96 falsevalue=""
97 truevalue="--keep-tmp"
98 label="Keep temporary analysis directory"
99 />
100 <param
101 name="verbosity"
102 type="select"
103 label="Logging verbosity">
104 <option value="">
105 Error messages only
106 </option>
107 <option value="-v">
108 Show warning messages
109 </option>
110 <option value="-vv" selected="true">
111 Show info messages
112 </option>
113 <option value="-vvv">
114 Show debug messages
115 </option>
116 </param>
117 </inputs>
118 <outputs>
119 <data
120 name="output_prediction_csv"
121 format="csv"
122 label="SISTR Results"
123 from_work_dir="sistr-report.csv">
124 <filter>output_format == "csv"</filter>
125 </data>
126 <data
127 name="output_prediction_json"
128 format="json"
129 label="SISTR Results"
130 from_work_dir="sistr-report.json">
131 <filter>output_format == "json"</filter>
132 </data>
133 <data
134 name="output_prediction_tab"
135 format="tabular"
136 label="SISTR Results"
137 from_work_dir="sistr-report.tab">
138 <filter>output_format == "tab"</filter>
139 </data>
140 <data
141 name="cgmlst_profiles"
142 format="csv"
143 label="cgMLST results" />
144 <data
145 name="novel_alleles"
146 format="fasta"
147 label="Novel cgMLST alleles" />
148 <data
149 name="alleles_output"
150 format="json"
151 label="cgMLST allele match results" />
152 </outputs>
153 <tests>
154 <test>
155 <param name="input_fastas" value="AE014613-699860.fasta"/>
156 <param name="output_format" value="tab"/>
157 <output
158 name="novel_alleles"
159 value="novel-alleles.fasta"
160 ftype="fasta"
161 compare="sim_size"/>
162 <output
163 name="cgmlst_profiles"
164 value="cgmlst-profiles.csv"
165 ftype="csv"
166 lines_diff="2">
167 <assert_contents>
168 <has_text text=",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3969539340,2545200385,225275747,2955003506,2353669245,2666669453,1672513023,3779563470,1301843222,2161147266,607954140,3680021500,2914087704,1062106200,3673111880,1314942441,1367997025,3293595301,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1528212814,2110459436,4160823845,1648892875,2084418558,1638162324,469721942,1317894045,1973458150,926214622,2197498164,398274060,,,,,,,,,,1123870984,278162969,490843778,3950769715,,,,,,4203409135,3569491948,,,,,,,1052128508,,,1510445340,,,4065472468,,,,,,1495737522,,,,,,,,3076491138,712233770,3105746335,625241463,3016847250,1928860657,2229984332,1341416065,2978539204,1175502179,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1328452594,2372254687,2640609716,3051501604,3258707132,,,,,,,,,,,,1007978530,,2019769394,1109678443,,,,,,,,,"/>
169 </assert_contents>
170 </output>
171 <output
172 name="output_prediction_tab"
173 value="sistr-results.tab"
174 ftype="tabular"
175 lines_diff="2">
176 <assert_contents>
177 <has_text text="AE014613-699860" />
178 <has_text text="Typhi" />
179 <has_text text="enterica" />
180 <has_text text="-:-:-" />
181 <has_n_columns n="19" />
182 </assert_contents>
183 </output>
184 <output
185 name="alleles_output"
186 value="alleles-output.json"
187 ftype="json"
188 compare="sim_size"/>
189 </test>
190 <test>
191 <param name="input_fastas" value="13-1101-Paratyphi_B.fasta"/>
192 <param name="output_format" value="tab"/>
193 <output
194 name="novel_alleles"
195 value="novel-alleles-13-1101.fasta"
196 ftype="fasta"
197 compare="sim_size"/>
198 <output
199 name="cgmlst_profiles"
200 value="cgmlst-profiles-13-1101.csv"
201 ftype="csv"
202 lines_diff="2">
203 </output>
204 <output
205 name="output_prediction_tab"
206 value="sistr-results-13-1101.tab"
207 ftype="tabular"
208 lines_diff="2">
209 <assert_contents>
210 <has_text text="13-1101-Paratyphi_B" />
211 <has_text text="Paratyphi B var. Java" />
212 <has_text text="enterica" />
213 <has_text text="1,4,[5],12" />
214 <has_text text="PASS" />
215 <has_text text="2375035975"/>
216 <has_n_columns n="21" />
217 </assert_contents>
218 </output>
219 <output
220 name="alleles_output"
221 value="alleles-output-13-1101.json"
222 ftype="json"
223 compare="sim_size"/>
224 </test>
225 <test>
226 <param name="input_fastas" value="13-1101 Paratyphi_B.fasta"/>
227 <param name="output_format" value="tab"/>
228 <output
229 name="novel_alleles"
230 value="novel-alleles-13-1101.fasta"
231 ftype="fasta"
232 compare="sim_size"/>
233 <output
234 name="cgmlst_profiles"
235 value="cgmlst-profiles-13-1101.csv"
236 ftype="csv"
237 lines_diff="2">
238 </output>
239 <output
240 name="output_prediction_tab"
241 value="sistr-results-13-1101.tab"
242 ftype="tabular"
243 lines_diff="2">
244 <assert_contents>
245 <has_text text="13-1101 Paratyphi_B" />
246 <has_text text="Paratyphi B var. Java" />
247 <has_text text="enterica" />
248 <has_text text="1,4,[5],12" />
249 <has_text text="PASS" />
250 <has_text text="2375035975"/>
251 <has_n_columns n="21" />
252 </assert_contents>
253 </output>
254 <output
255 name="alleles_output"
256 value="alleles-output-13-1101.json"
257 ftype="json"
258 compare="sim_size"/>
259 </test>
260 </tests>
261 <help>
262 <![CDATA[
263
264 Usage::
265
266 usage: sistr_cmd [-h] [-i fasta_path genome_name] [-f OUTPUT_FORMAT]
267 [-o OUTPUT_PREDICTION] [-p CGMLST_PROFILES]
268 [-n NOVEL_ALLELES] [-a ALLELES_OUTPUT] [-T TMP_DIR] [-K]
269 [--use-full-cgmlst-db] [--no-cgmlst] [-m] [--qc] [-t THREADS]
270 [-v] [-V]
271 [F [F ...]]
272
273 SISTR (Salmonella In Silico Typing Resource) Command-line Tool
274 ==============================================================
275 Serovar predictions from whole-genome sequence assemblies by determination of antigen gene and cgMLST gene alleles using BLAST.
276
277 Note about using the "--use-full-cgmlst-db" flag:
278 The "centroid" allele database is ~10% the size of the full set so analysis is much quicker with the "centroid" vs "full" set of alleles. Results between 2 cgMLST allele sets should not differ.
279
280 If you find this program useful in your research, please cite as:
281
282 The Salmonella In Silico Typing Resource (SISTR): an open web-accessible tool for rapidly typing and subtyping draft Salmonella genome assemblies.
283 Catherine Yoshida, Peter Kruczkiewicz, Chad R. Laing, Erika J. Lingohr, Victor P.J. Gannon, John H.E. Nash, Eduardo N. Taboada.
284 PLoS ONE 11(1): e0147101. doi: 10.1371/journal.pone.0147101
285
286 positional arguments:
287 F Input genome FASTA file
288
289 optional arguments:
290 -h, --help show this help message and exit
291 -i fasta_path genome_name, --input-fasta-genome-name fasta_path genome_name
292 fasta file path to genome name pair
293 -f OUTPUT_FORMAT, --output-format OUTPUT_FORMAT
294 Output format (json, csv, pickle)
295 -o OUTPUT_PREDICTION, --output-prediction OUTPUT_PREDICTION
296 SISTR serovar prediction output path
297 -p CGMLST_PROFILES, --cgmlst-profiles CGMLST_PROFILES
298 Output CSV file destination for cgMLST allelic
299 profiles
300 -n NOVEL_ALLELES, --novel-alleles NOVEL_ALLELES
301 Output FASTA file destination of novel cgMLST alleles
302 from input genomes
303 -a ALLELES_OUTPUT, --alleles-output ALLELES_OUTPUT
304 Output path of allele sequences and info to JSON
305 -T TMP_DIR, --tmp-dir TMP_DIR
306 Base temporary working directory for intermediate
307 analysis files.
308 -K, --keep-tmp Keep temporary analysis files.
309 --use-full-cgmlst-db Use the full set of cgMLST alleles which can include
310 highly similar alleles. By default the smaller
311 "centroid" alleles or representative alleles are used
312 for each marker.
313 --no-cgmlst Do not run cgMLST serovar prediction
314 -m, --run-mash Determine Mash MinHash genomic distances to Salmonella
315 genomes with trusted serovar designations. Mash binary
316 must be in accessible via $PATH (e.g. /usr/bin).
317 --qc Perform basic QC to provide level of confidence in
318 serovar prediction results.
319 -t THREADS, --threads THREADS
320 Number of parallel threads to run sistr_cmd analysis.
321 -v, --verbose Logging verbosity level (-v == show warnings; -vvv ==
322 show debug info)
323 -V, --version show program's version number and exit
324 ]]>
325
326 </help>
327 <citations>
328 <!-- Citation for SISTR PLOS ONE paper -->
329 <citation type="doi">10.1371/journal.pone.0147101</citation>
330 </citations>
331 </tool>