Mercurial > repos > bgruening > gfstats
comparison gfastats.xml @ 0:5f250ffcb1af draft default tip
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gfastats commit 6ca9363cc4e0da886aab9accd79d52663247af29"
author | bgruening |
---|---|
date | Tue, 08 Mar 2022 21:49:13 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:5f250ffcb1af |
---|---|
1 <tool id="gfastats" name="gfastats" version="@TOOL_VERSION@+galaxy@SUFFIX_VERSION@" profile="20.01"> | |
2 <description>the swiss army knife for genome assembly</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro="requirements" /> | |
7 <expand macro="biotools"/> | |
8 <version_command>gfastats --version</version_command> | |
9 <command detect_errors="exit_code"><![CDATA[ | |
10 gfastats | |
11 '$input_file' | |
12 #if $mode_condition.selector == 'statistics' | |
13 #if $mode_condition.statistics_condition.selector == 'assembly' | |
14 $mode_condition.statistics_condition.expected_genomesize | |
15 #end if | |
16 #end if | |
17 #if $target_condition.target_option == 'true' | |
18 $target_condition.target_sequence | |
19 #if $target_condition.include_bed | |
20 --include-bed $target_condition.include_bed | |
21 #end if | |
22 #if $target_condition.exclude_bed | |
23 --exclude-bed $target_condition.exclude_bed | |
24 #end if | |
25 #end if | |
26 #if $mode_condition.selector == 'manipulation' | |
27 #if $mode_condition.swiss_army_knife | |
28 -k $mode_condition.swiss_army_knife | |
29 #end if | |
30 #if $mode_condition.sort | |
31 --sort $mode_condition.sort | |
32 #end if | |
33 $mode_condition.homopolymer_compress | |
34 -o dataset.$mode_condition.output_condition.out_format | |
35 #if $mode_condition.output_condition.out_format == 'fasta' | |
36 #if $mode_condition.output_condition.line_length | |
37 --line-length $mode_condition.output_condition.line_length | |
38 #end if | |
39 #else if $mode_condition.output_condition.out_format == 'fasta.gz' | |
40 #if $mode_condition.output_condition.line_length | |
41 --line-length $mode_condition.output_condition.line_length | |
42 #end if | |
43 #end if | |
44 #else | |
45 #if $mode_condition.statistics_condition.selector == 'size' | |
46 --out-size $mode_condition.statistics_condition.out_size | |
47 #else if $mode_condition.statistics_condition.selector == 'coordinates' | |
48 --out-coord $mode_condition.statistics_condition.out_coord | |
49 #else if $mode_condition.statistics_condition.selector == 'assembly' | |
50 --nstar-report | |
51 #else | |
52 --seq-report | |
53 $mode_condition.statistics_condition.out_sequence | |
54 #end if | |
55 $mode_condition.tabular > '$stats' | |
56 #end if | |
57 #if $mode_condition.selector == 'manipulation' | |
58 && mv dataset* output_dataset | |
59 #end if | |
60 ]]></command> | |
61 <inputs> | |
62 <param name="input_file" argument="--fasta" type="data" | |
63 format="fasta,fastq,fastqsanger,gfa1,fasta.gz,fastq.gz,fastqsanger.gz,gfa1.gz" | |
64 label="Input file"/> | |
65 <conditional name="target_condition"> | |
66 <param name="target_option" type="select" label="Specify target sequences"> | |
67 <option value="false">Disabled</option> | |
68 <option value="true">Enabled</option> | |
69 </param> | |
70 <when value="false"/> | |
71 <when value="true"> | |
72 <param name="target_sequence" type="text" value="" label="Target sequence" help="Target specific sequence by header, optionally with coordinates: header[:start-end]"> | |
73 <sanitizer invalid_char=""> | |
74 <valid initial="string.digits,string.letters"> | |
75 <add value=":"/> | |
76 <add value="-"/> | |
77 <add value="_"/> | |
78 <add value="|"/> | |
79 <add value=" "/> | |
80 </valid> | |
81 </sanitizer> | |
82 <validator type="regex">[0-9A-Za-z:-_| ]+</validator> | |
83 </param> | |
84 <param argument="--include-bed" type="data" optional="true" | |
85 format="bed" label="Include specific intervals" | |
86 help="Generates output on a subset list of headers or coordinates | |
87 in 0-based bed format. It can be combined with --exclude-bed. Optional"/> | |
88 <param argument="--exclude-bed" type="data" format="bed" optional="true" | |
89 label="Exclude specific intervals" | |
90 help="Exclude a subset of headers or coordinates in 0-base bed format. It can be conmbined with --include-bed Optional"/> | |
91 </when> | |
92 <when value="false"/> | |
93 </conditional> | |
94 <conditional name="mode_condition"> | |
95 <param name="selector" type="select" label="Tool mode"> | |
96 <option value="statistics">Summary statistics generation</option> | |
97 <option value="manipulation">Genome assembly manipulation</option> | |
98 </param> | |
99 <when value="manipulation"> | |
100 <param argument="--swiss-army-knife" type="data" | |
101 format="text" label="SAK input file" optional="true" | |
102 help="Set of instructions provided as an ordered list"/> | |
103 <conditional name="output_condition"> | |
104 <param argument="--out-format" type="select" | |
105 label="Output format" help="Outputs selected sequences."> | |
106 <option value="fasta">FASTA</option> | |
107 <option value="fasta.gz">FASTA.gz</option> | |
108 <option value="fastq">FASTQ</option> | |
109 <option value="fastq.gz" selected="true">FASTQ.gz</option> | |
110 <option value="gfa">GFA</option> | |
111 <option value="gfa.gz">GFA.gz</option> | |
112 </param> | |
113 <when value="fasta"> | |
114 <expand macro="length_macro"/> | |
115 </when> | |
116 <when value="fasta.gz"> | |
117 <expand macro="length_macro"/> | |
118 </when> | |
119 <when value="fastq"/> | |
120 <when value="fastq.gz"/> | |
121 <when value="gfa"/> | |
122 <when value="gfa.gz"/> | |
123 </conditional> | |
124 <param argument="--sort" type="select" label="Sort sequences" help="Specify how to sort the sequences. Ascending/descending used the sequence/path header."> | |
125 <option value="" selected="true">Disabled</option> | |
126 <option value="ascending">Ascending</option> | |
127 <option value="descending">Descending</option> | |
128 <option value="largest">Largest</option> | |
129 <option value="smallest">Smallest</option> | |
130 </param> | |
131 <param argument="--homopolymer-compress" type="boolean" truevalue="--homopolymer-compress" falsevalue="" checked="false" | |
132 label="Homopolymer compression" help="Compress all the homopolymers in the input"/> | |
133 </when> | |
134 <when value="statistics"> | |
135 <conditional name="statistics_condition"> | |
136 <param name="selector" type="select" label="Report mode"> | |
137 <option value="assembly" selected="true">Genome assembly statistics (--nstar-report)</option> | |
138 <option value="size">Scaffold, contig or gap sizes (--out-size)</option> | |
139 <option value="coordinates">AGP, contig or gap coordinates (--out-coord)</option> | |
140 <option value="sequence">Sequence statistics (--seq-report)</option> | |
141 </param> | |
142 <when value="size"> | |
143 <param argument="--out-size" type="select" label="Feature for reporting sizes" | |
144 help="Generate a tabular file with the sequence sizes"> | |
145 <option value="s">Scaffolds</option> | |
146 <option value="c">Contigs</option> | |
147 <option value="g">Gaps</option> | |
148 </param> | |
149 </when> | |
150 <when value="coordinates"> | |
151 <param argument="--out-coord" type="select" label="BED coordinares feature" | |
152 help="Generates bed coordinates of given feature. Default: agp"> | |
153 <option value="a">AGP</option> | |
154 <option value="c">Contigs</option> | |
155 <option value="g">Gaps</option> | |
156 </param> | |
157 </when> | |
158 <when value="assembly"> | |
159 <param name="expected_genomesize" type="integer" min="0" optional="true" | |
160 label="Expected genome size" help="Estimated genome size. This parameter is optional, but required for NG* statistics."/> | |
161 </when> | |
162 <when value="sequence"> | |
163 <param argument="--out-sequence" type="boolean" truevalue="--out-sequence" falsevalue="" checked="false" | |
164 label="Report actual sequence" help="It reports also the actual sequence"/> | |
165 </when> | |
166 </conditional> | |
167 <param argument="--tabular" type="boolean" truevalue="--tabular" falsevalue="" checked="true" | |
168 label="Tabular-format output" help="Generate output in tabular format"/> | |
169 </when> | |
170 </conditional> | |
171 </inputs> | |
172 <outputs> | |
173 <data name="stats" format="tabular" label="${tool.name} on ${on_string}: stats"> | |
174 <filter>mode_condition['selector'] == 'statistics'</filter> | |
175 <change_format> | |
176 <when input="tabular" value="false" format="text"/> | |
177 </change_format> | |
178 </data> | |
179 <data name="output" format="fastq" from_work_dir="output_dataset" label="${tool.name} on ${on_string}: edited sequences"> | |
180 <filter>mode_condition['selector'] == 'manipulation'</filter> | |
181 <change_format> | |
182 <when input="mode_condition.output_condition.out_format" value="fasta" format="fasta"/> | |
183 <when input="mode_condition.output_condition.out_format" value="fasta.gz" format="fasta.gz"/> | |
184 <when input="mode_condition.output_condition.out_format" value="fastq" format="fastq"/> | |
185 <when input="mode_condition.output_condition.out_format" value="fastq.gz" format="fastq.gz"/> | |
186 <when input="mode_condition.output_condition.out_format" value="gfa" format="gfa1"/> | |
187 <when input="mode_condition.output_condition.out_format" value="gfa.gz" format="gfa1.gz"/> | |
188 </change_format> | |
189 </data> | |
190 </outputs> | |
191 <tests> | |
192 <!--Test 01 --> | |
193 <test expect_num_outputs="1"> | |
194 <param name="input_file" value="dataset_01.fastq.gz"/> | |
195 <conditional name="target_condition"> | |
196 <param name="target_condition" value="true"/> | |
197 <param name="target_sequence" value="S1_1"/> | |
198 </conditional> | |
199 <conditional name="mode_condition"> | |
200 <param name="selector" value="manipulation"/> | |
201 <param name="swiss_army_knife" value="swiss_army.sak"/> | |
202 <conditional name="output_condition"> | |
203 <param name="out_format" value="fasta.gz"/> | |
204 </conditional> | |
205 </conditional> | |
206 <output name="output" value="test_01.fasta.gz" ftype="fasta.gz"/> | |
207 </test> | |
208 <!--Test 02 --> | |
209 <test expect_num_outputs="1"> | |
210 <param name="input_file" value="dataset_01.fastq.gz"/> | |
211 <conditional name="target_condition"> | |
212 <param name="target_condition" value="true"/> | |
213 <param name="target_sequence" value="S1_1"/> | |
214 </conditional> | |
215 <conditional name="mode_condition"> | |
216 <param name="selector" value="statistics"/> | |
217 <conditional name="statistics_condition"> | |
218 <param name="selector" value="size"/> | |
219 <param name="out_size" value="c"/> | |
220 </conditional> | |
221 </conditional> | |
222 <output name="stats" value="test_02_stats.tabular" ftype="tabular"/> | |
223 </test> | |
224 <!--Test 03 --> | |
225 <test expect_num_outputs="1"> | |
226 <param name="input_file" value="dataset_02.fasta.gz"/> | |
227 <conditional name="mode_condition"> | |
228 <param name="selector" value="statistics"/> | |
229 <conditional name="statistics_condition"> | |
230 <param name="selector" value="sequence"/> | |
231 </conditional> | |
232 </conditional> | |
233 <output name="stats" value="test_03_stats.tabular" ftype="tabular"/> | |
234 </test> | |
235 <!--Test 04 --> | |
236 <test expect_num_outputs="1"> | |
237 <param name="input_file" value="dataset_03.fasta"/> | |
238 <conditional name="mode_condition"> | |
239 <param name="selector" value="statistics"/> | |
240 <conditional name="statistics_condition"> | |
241 <param name="selector" value="assembly"/> | |
242 <param name="expected_genomesize" value="600000"/> | |
243 </conditional> | |
244 </conditional> | |
245 <output name="stats" value="test_04_stats.tabular" ftype="tabular"/> | |
246 </test> | |
247 <!--Test 05 --> | |
248 <test expect_num_outputs="1"> | |
249 <param name="input_file" value="dataset_04.gfa"/> | |
250 <conditional name="mode_condition"> | |
251 <param name="selector" value="statistics"/> | |
252 <conditional name="statistics_condition"> | |
253 <param name="selector" value="coordinates"/> | |
254 <param name="out_coord" value="a"/> | |
255 </conditional> | |
256 </conditional> | |
257 <output name="stats" value="test_05_stats.tabular" ftype="tabular"/> | |
258 </test> | |
259 <!--Test 06 --> | |
260 <test expect_num_outputs="1"> | |
261 <param name="input_file" value="dataset_04.gfa"/> | |
262 <conditional name="mode_condition"> | |
263 <param name="selector" value="manipulation"/> | |
264 <conditional name="output_condition"> | |
265 <param name="out_format" value="fasta.gz"/> | |
266 </conditional> | |
267 </conditional> | |
268 <output name="output" value="test_06.fasta.gz" ftype="fasta.gz"/> | |
269 </test> | |
270 <!--Test 07 --> | |
271 <test expect_num_outputs="1"> | |
272 <param name="input_file" value="dataset_03.fasta"/> | |
273 <conditional name="mode_condition"> | |
274 <param name="selector" value="statistics"/> | |
275 <conditional name="statistics_condition"> | |
276 <param name="selector" value="assembly"/> | |
277 </conditional> | |
278 <param name="tabular" value="false"/> | |
279 </conditional> | |
280 <output name="stats" value="test_07_stats.tabular" ftype="tabular"/> | |
281 </test> | |
282 <!--Test 08 --> | |
283 <test expect_num_outputs="1"> | |
284 <param name="input_file" value="dataset_01.fastq.gz"/> | |
285 <conditional name="mode_condition"> | |
286 <param name="selector" value="manipulation"/> | |
287 <conditional name="output_condition"> | |
288 <param name="out_format" value="fasta.gz"/> | |
289 </conditional> | |
290 <param name="sort" value="ascending"/> | |
291 <param name="homopolymer_compress" value="true"/> | |
292 </conditional> | |
293 <output name="output" value="test_08.fasta.gz" ftype="fasta.gz"/> | |
294 </test> | |
295 </tests> | |
296 <help><![CDATA[ | |
297 | |
298 .. class:: infomark | |
299 | |
300 **Purpose** | |
301 | |
302 gfastats is a single fast and exhaustive tool for summary statistics and simultaneous genome assembly file manipulation. gfastats also allows seamless format conversion. | |
303 | |
304 | |
305 .. class:: infomark | |
306 | |
307 **Metrics details** | |
308 | |
309 Typical fast* metrics include: | |
310 | |
311 - Scaffold, contig and gap size | |
312 - Number of scaffolds, contigs and gaps | |
313 - Total length of scaffolds, contigs and gaps | |
314 - Scaffold, contig, gap N50 and statistics (full N*/NG* statistics with the --nstar-report flag) | |
315 - Area under the curve (AuN/AuNG) values for scaffolds, contigs and gaps | |
316 - Average scaffold, contig, gap size | |
317 - Largest scaffold, contig and gap | |
318 - Base composition and GC content | |
319 - Soft-masked base counts (lower case bases) | |
320 | |
321 | |
322 Typical gfa metrics include: | |
323 | |
324 - Number of nodes and edges | |
325 - Average degree | |
326 - Number of connected components, and length of the largets connected component | |
327 - Number of dead ends | |
328 - Number of disconnected components, and their total length | |
329 | |
330 | |
331 .. class:: infomark | |
332 | |
333 **Assembly manipulation** | |
334 | |
335 gfastats allows extensive assembly manipulation at the sequence level. Manipulation is achieved using a set of instructions provided as an ordered list in a file to the option **swiss army knife**. See the `instruction wiki <https://github.com/vgl-hub/gfastats/tree/main/instructions>`_ for a full list of instructions. | |
336 | |
337 ]]></help> | |
338 <expand macro="citations" /> | |
339 </tool> |