comparison meryl.xml @ 0:068920e730f4 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/meryl commit f94b7a1d482e29e10c33205ae5394c4c4a1e74e0"
author iuc
date Mon, 05 Apr 2021 15:04:36 +0000
parents
children e5b18909f73d
comparison
equal deleted inserted replaced
-1:000000000000 0:068920e730f4
1 <tool id='meryl' name='Meryl' version='@TOOL_VERSION@+@GALAXY_TOOL_VERSION@' profile='20.01'>
2 <description>a genomic k-mer counter and sequence utility</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro='edam_ontology' />
7 <expand macro='requirements' />
8 <version_command>meryl --version</version_command>
9 <command detect_errors='exit_code'><![CDATA[
10 #if $operation_type.command_type == 'count-kmers'
11 meryl
12 #if $operation_type.options_kmer_size.kmer_size == 'estimate'
13 #from math import log
14 $operation_type.options_kmer_size.input_kmer_size = int(log(int($operation_type.options_kmer_size.genome_size)*(1-float($operation_type.options_kmer_size.collision_rate))/float($operation_type.options_kmer_size.collision_rate))/log(4))
15 #end if
16 $operation_type.count_operations
17 k=$operation_type.options_kmer_size.input_kmer_size
18 $operation_type.input_reads
19 output read-db.meryl &&
20 echo 'K-mer size: ${$operation_type.options_kmer_size.input_kmer_size}' &&
21 tar -zcf read-db.meryldb read-db.meryl
22 #elif $operation_type.command_type == 'filter-kmers'
23 mkdir -p ./temp_db/ &&
24 tar -zxf $operation_type.input_meryldb_02 -C ./temp_db/ &&
25 mv ./temp_db/* tmp.meryl &&
26 meryl
27 $operation_type.filter_operations
28 $operation_type.N
29 tmp.meryl
30 output read-db.meryl &&
31 tar -zcf read-db.meryldb read-db.meryl
32 #elif $operation_type.command_type == 'arithmetic-kmers'
33 mkdir -p ./temp_db/ &&
34 tar -zxf $operation_type.input_meryldb_02 -C ./temp_db/ &&
35 mv ./temp_db/* tmp.meryl &&
36 meryl
37 $operation_type.arithmetic_operations
38 $operation_type.X
39 tmp.meryl
40 output read-db.meryl &&
41 tar -zcf read-db.meryldb read-db.meryl
42 #elif $operation_type.command_type == 'groups-kmers'
43 mkdir -p ./temp_db/ &&
44 tar -zxf $operation_type.input_meryldb_02 -C ./temp_db/ &&
45 mv ./temp_db/* tmp_01.meryl &&
46 tar -zxf $operation_type.input_meryldb_03 -C ./temp_db/ &&
47 mv ./temp_db/* tmp_02.meryl &&
48 meryl
49 $operation_type.groups_operations
50 tmp_01.meryl
51 tmp_02.meryl
52 output read-db.meryl &&
53 tar -zcf read-db.meryldb read-db.meryl
54 #elif $operation_type.command_type == 'histogram-kmers'
55 mkdir -p ./temp_db/ &&
56 tar -zxf $operation_type.input_meryldb_02 -C ./temp_db/ &&
57 mv ./temp_db/* tmp.meryl &&
58 meryl histogram tmp.meryl > read-db.hist
59 #elif $operation_type.command_type == 'trio-mode'
60 export MERQURY=\$(dirname \$(command -v merqury.sh))/../share/merqury/ &&
61 #for $i, $read in enumerate($paternal_reads):
62 mkdir 'paternal{$i}.meryl' &&
63 meryl count k=$operation_type.options_kmer_size.input_kmer_size '${read}' output 'paternal{$i}.meryl' &&
64 #end for
65 meryl union-sum paternal*.meryl output pat.meryl &&
66 #for $i, $read in enumerate($maternal_reads):
67 mkdir 'maternal{$i}.meryl' &&
68 meryl count k=$operation_type.options_kmer_size.input_kmer_size '${read}' output 'maternal{$i}.meryl' &&
69 #end for
70 meryl union-sum maternal*.meryl output mat.meryl &&
71 #for $i, $read in enumerate($child_reads):
72 mkdir 'child{$i}.meryl' &&
73 meryl count k=$operation_type.options_kmer_size.input_kmer_size '${read}' output 'child{$i}.meryl' &&
74 #end for
75 meryl union-sum child*.meryl output child.meryl &&
76 ## mat specific kmers
77 meryl difference mat.meryl pat.meryl output mat.only.meryl &&
78 meryl histogram mat.only.meryl > mat.only.hist &&
79 java -jar -Xmx1g \$MERQURY/eval/kmerHistToPloidyDepth.jar mat.only.hist > mat.only.ploidy &&
80 VAR=`sed -n 2p mat.only.ploidy | awk '{print \$NF}'` &&
81 meryl greater-than \$VAR output mat.only.filt.meryl mat.only.meryl &&
82
83 ## pat specific kmers
84 meryl difference pat.meryl mat.meryl output pat.only.meryl &&
85 meryl histogram pat.only.meryl > pat.only.hist &&
86 java -jar -Xmx1g \$MERQURY/eval/kmerHistToPloidyDepth.jar pat.only.hist > pat.only.ploidy &&
87 VAR=`sed -n 2p pat.only.ploidy | awk '{print \$NF}'` &&
88 meryl greater-than \$VAR output pat.only.filt.meryl pat.only.meryl &&
89
90 ## shared kmers
91 meryl intersect output shared.meryl mat.meryl pat.meryl &&
92
93 ## mat hapmers
94 meryl intersect output mat.inherited.meryl child.meryl mat.only.filt.meryl &&
95 meryl histogram mat.inherited.meryl > mat.inherited.hist &&
96 java -jar -Xmx1g \$MERQURY/eval/kmerHistToPloidyDepth.jar mat.inherited.hist > mat.inherited.ploidy &&
97 VAR=`sed -n 2p mat.inherited.ploidy | awk '{print \$NF}'` &&
98 meryl greater-than \$VAR output mat.hapmer.meryl mat.inherited.meryl &&
99 tar -czf 'mat.meryldb' mat.hapmer.meryl &&
100
101 ## pat hapmers
102 meryl intersect output pat.inherited.meryl child.meryl pat.only.filt.meryl &&
103 meryl histogram pat.inherited.meryl > pat.inherited.hist &&
104 java -jar -Xmx1g \$MERQURY/eval/kmerHistToPloidyDepth.jar pat.inherited.hist > pat.inherited.ploidy &&
105 VAR=`sed -n 2p pat.inherited.ploidy | awk '{print \$NF}'` &&
106 meryl greater-than \$VAR output pat.hapmer.meryl pat.inherited.meryl &&
107 tar -czf 'pat.meryldb' pat.hapmer.meryl &&
108
109 ## shared hapmers
110 meryl intersect output shared.inherited.meryl child.meryl shared.meryl &&
111 meryl histogram shared.inherited.meryl > shared.inherited.hist &&
112 java -jar -Xmx1g \$MERQURY/eval/kmerHistToPloidyDepth.jar shared.inherited.hist > shared.inherited.ploidy &&
113 VAR=`sed -n 2p shared.inherited.ploidy | awk '{print \$NF}'` &&
114 meryl greater-than \$VAR output shared.filt.meryl shared.inherited.meryl &&
115
116 ## child hapmers
117 meryl union-sum output child.inherited.meryl mat.inherited.meryl pat.inherited.meryl shared.inherited.meryl &&
118 meryl difference output read.only.meryl child.meryl child.inherited.meryl &&
119 tar -czf 'read-db.meryldb' read.only.meryl &&
120 echo 'K-mer size: ${$operation_type.options_kmer_size.input_kmer_size}'
121 #end if
122 ]]> </command>
123 <inputs>
124 <conditional name="operation_type">
125 <param name="command_type" type="select" label="Operation type selector" help="Select a type of operation">
126 <option value="count-kmers">Count operations</option>
127 <option value="filter-kmers">Filter operations</option>
128 <option value="arithmetic-kmers">Arithmetic operations on kmer counts</option>
129 <option value="groups-kmers">Operations on sets of k-mers</option>
130 <option value="histogram-kmers">Generate histogram dataset</option>
131 <option value="trio-mode">Build hap-mer dbs for trios</option>
132 </param>
133 <when value="count-kmers">
134 <param name="count_operations" type="select" label="Count operations" help="Select an operation to be executed">
135 <option value="count">Count: count the occurrences of canonical k-mers</option>
136 <option value="count-forward">Count-forward: count the occurreces of forward k-mers</option>
137 <option value="count-reverse">Count-reverse: count the occurreces of reverse k-mers</option>
138 </param>
139 <param name="input_reads" type="data" format="fastq,fasta" label="Input sequences" help="Select your reads in FASTA/FASTQ format." />
140 <conditional name="options_kmer_size">
141 <param name="kmer_size" type="select" label="K-mer size selector">
142 <option value="provide">Set a k-mer size</option>
143 <option value="estimate">Estimate the best k-mer size</option>
144 </param>
145 <when value="provide">
146 <param name="input_kmer_size" type="integer" min="1" max="50" value="" label="K-mer size" help="Haploid genome size or diploid genome size, depending on what we evaluate. In bp. Only required if the k-mer size is not provided." />
147 </when>
148 <when value="estimate">
149 <param name="genome_size" type="integer" min="1000" max="70000000000" value="1000" label="Genome size" help="Haploid genome size or diploid genome size, depending on what we evaluate. In bp. Only required if the k-mer size is not provided." />
150 <param name="collision_rate" type="float" min="0.0001" max="0.01" value="0.001" label="Tolerable collision rate" help="Tolerable collision rate. By default is 0.001." />
151 </when>
152 </conditional>
153 </when>
154 <when value="filter-kmers">
155 <param name="filter_operations" type="select" label="Filter operations" help="Select an operation to be executed">
156 <option value="less-than">Less-than: return k-mers that occur fewer than N times in the input</option>
157 <option value="greater-than">Greater-than: return k-mers that occur more than N times in the input</option>
158 <option value="equal-to">Equal-to: return k-mers that occur exactly N times in the input</option>
159 <option value="not-equal-to">Not-equal-to: return k-mers that do not occur exactly N times in the input</option>
160 </param>
161 <param name="input_meryldb_02" type="data" format="meryldb" label="Input meryldb" help="Select a meryldb dataset" />
162 <param name="N" type="integer" min="0" max="50000000000" value="" optional="true" label="Filter value" />
163 </when>
164 <when value="arithmetic-kmers">
165 <param name="arithmetic_operations" type="select" label="Arithmetic operations" help="Select an operation to be executed">
166 <option value="increase">Increase: add x to the count of each k-mer</option>
167 <option value="decrease">Decrease: subsctract x from the count of each k-mer</option>
168 <option value="multiply">Multiply: multiply the count of each k-mer by x</option>
169 <option value="divide">Divide: divide the count of each k-mer by x</option>
170 <option value="divide-round">Divide-round: divide the count of each k-mer by x and round th results</option>
171 <option value="modulo">Modulo: set the count of each k-mer to the remainder of the count divided by x</option>
172 </param>
173 <param name="input_meryldb_02" type="data" format="meryldb" label="Input meryldb" help="Select a meryldb dataset" />
174 <param name="X" type="integer" min="1" max="1000000" value="" optional="true" label="Operand" />
175 </when>
176 <when value="groups-kmers">
177 <param name="groups_operations" type="select" label="Operations on sets of kmers" help="Select an operation to be executed">
178 <option value="union">Union: return k-mers that occur in any input</option>
179 <option value="union-min">Union-min: return k-mers that occur in any input, set the count to the minimum count</option>
180 <option value="union-max">Union-max: return k-mers that occur in any input, set the count to the maximum count</option>
181 <option value="union-sum">Union-sum: return k-mers that occur in any input, set the count to the sum of the counts</option>
182 <option value="intersect">Intersect: return k-mers that occur in all inputs, set the count to the count in the first input</option>
183 <option value="intersect-min">Intersect-min: return k-mers that occur in all inputs, set the count to the minimum count</option>
184 <option value="intersect-max">Intersect-max: return k-mers that occur in all inputs, set the count to the maximum count</option>
185 <option value="intersect-sum">Intersect-sum: return k-mers that occur in all inputs, set the count to the sum of the counts</option>
186 <option value="subtract">Subtract: return k-mers that occur in the first input, subtracting counts from the other inputs</option>
187 <option value="difference">Difference: return k-mers that occur in the first input, but none of the other inputs</option>
188 <option value="symmetric-difference">Symmetric-difference: return k-mers that occur in exactly one input</option>
189 </param>
190 <param name="input_meryldb_02" type="data" format="meryldb" label="Input meryldb" />
191 <param name="input_meryldb_03" type="data" format="meryldb" label="Input meryldb" />
192 </when>
193 <when value="histogram-kmers">
194 <param name="input_meryldb_02" type="data" format="meryldb" label="Input meryldb" />
195 </when>
196 <when value="trio-mode">
197 <param name="child_reads" type="data" format="fastq,fasta" multiple="true" label="F1 reads" help="Select F1 reads used for generating the assembly" />
198 <param name="paternal_reads" type="data" format="fastq,fasta" multiple="true" optional="true" label="Paternal reads" help="Select the paternal reads used for generating the assembly" />
199 <param name="maternal_reads" type="data" format="fastq,fasta" multiple="true" optional="true" label="Maternal reads" help="Select the maternal reads used for generating the assembly" />
200 <conditional name="options_kmer_size">
201 <param name="kmer_size" type="select" label="K-mer size selector">
202 <option value="provide">Set a k-mer size</option>
203 <option value="estimate">Estimate the best k-mer size</option>
204 </param>
205 <when value="provide">
206 <param name="input_kmer_size" type="integer" min="1" max="50" value="" label="K-mer size" help="Haploid genome size or diploid genome size, depending on what we evaluate. In bp. Only required if the k-mer size is not provided." />
207 </when>
208 <when value="estimate">
209 <param name="genome_size" type="integer" min="1000" max="70000000000" value="1000" label="Genome size" help="Haploid genome size or diploid genome size, depending on what we evaluate. In bp. Only required if the k-mer size is not provided." />
210 <param name="collision_rate" type="float" min="0.0001" max="0.01" value="0.001" label="Tolerable collision rate" help="Tolerable collision rate. By default is 0.001." />
211 </when>
212 </conditional>
213 </when>
214 </conditional>
215 </inputs>
216 <outputs>
217 <data name="read_db" format="meryldb" from_work_dir="read-db.meryldb" label="${tool.name} on ${on_string}: read-db.meryldb">
218 <filter>operation_type["command_type"] != "histogram-kmers"</filter>
219 </data>
220 <data name="read_db_hist" format="tabular" from_work_dir="read-db.hist" label="${tool.name} on ${on_string}: read-db histogram">
221 <filter>operation_type["command_type"] == "histogram-kmers" or operation_type["command_type"] == "trio-mode"</filter>
222 </data>
223 <data name="pat_db" format="meryldb" from_work_dir="pat.meryldb" label="${tool.name} on ${on_string}: read-db.meryl">
224 <filter>operation_type["command_type"] == "trio-mode"</filter>
225 </data>
226 <data name="pat_db_hist" format="tabular" from_work_dir="pat.inherited.hist" label="${tool.name} on ${on_string}: paternal inherited histogram">
227 <filter>operation_type["command_type"] == "trio-mode"</filter>
228 </data>
229 <data name="mat_db" format="meryldb" from_work_dir="mat.meryldb" label="${tool.name} on ${on_string}: mat.meryl">
230 <filter>operation_type["command_type"] == "trio-mode"</filter>
231 </data>
232 <data name="mat_db_hist" format="tabular" from_work_dir="mat.inherited.hist" label="${tool.name} on ${on_string}: maternal inherited histogram">
233 <filter>operation_type["command_type"] == "trio-mode"</filter>
234 </data>
235 </outputs>
236 <tests>
237 <test expect_num_outputs="1">
238 <conditional name="operation_type">
239 <param name="command_type" value="count-kmers" />
240 <param name="count_operation" value="count" />
241 <param name="input_reads" value="child.fasta" />
242 <conditional name="options_kmer_size">
243 <param name="kmer_size" value="provide" />
244 <param name="input_kmer_size" value="7" />
245 </conditional>
246 </conditional>
247 <output name="read_db" ftype="meryldb">
248 <assert_contents>
249 <has_size value="22152" delta="300" />
250 </assert_contents>
251 </output>
252 <assert_stdout>
253 <has_line line="K-mer size: 7" />
254 </assert_stdout>
255 </test>
256 <test expect_num_outputs="1">
257 <conditional name="operation_type">
258 <param name="command_type" value="filter-kmers" />
259 <param name="filter_operations" value="less-than" />
260 <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" />
261 <param name="N" value="100" />
262 </conditional>
263 <output name="read_db" ftype="meryldb">
264 <assert_contents>
265 <has_size value="32077" delta="300" />
266 </assert_contents>
267 </output>
268 </test>
269 <test expect_num_outputs="1">
270 <conditional name="operation_type">
271 <param name="command_type" value="filter-kmers" />
272 <param name="filter_operations" value="greater-than" />
273 <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" />
274 <param name="N" value="80" />
275 </conditional>
276 <output name="read_db" ftype="meryldb">
277 <assert_contents>
278 <has_size value="49951" delta="300" />
279 </assert_contents>
280 </output>
281 </test>
282 <test expect_num_outputs="1">
283 <conditional name="operation_type">
284 <param name="command_type" value="filter-kmers" />
285 <param name="filter_operations" value="equal-to" />
286 <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" />
287 <param name="N" value="100" />
288 </conditional>
289 <output name="read_db" ftype="meryldb">
290 <assert_contents>
291 <has_size value="2621" delta="300" />
292 </assert_contents>
293 </output>
294 </test>
295 <test expect_num_outputs="1">
296 <conditional name="operation_type">
297 <param name="command_type" value="filter-kmers" />
298 <param name="filter_operations" value="not-equal-to" />
299 <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" />
300 <param name="N" value="100" />
301 </conditional>
302 <output name="read_db" ftype="meryldb">
303 <assert_contents>
304 <has_size value="59378" delta="300" />
305 </assert_contents>
306 </output>
307 </test>
308 <test expect_num_outputs="1">
309 <conditional name="operation_type">
310 <param name="command_type" value="arithmetic-kmers" />
311 <param name="arithmetic_operations" value="increase" />
312 <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" />
313 <param name="X" value="100000" />
314 </conditional>
315 <output name="read_db" ftype="meryldb">
316 <assert_contents>
317 <has_size value="59822" delta="300" />
318 </assert_contents>
319 </output>
320 </test>
321 <test expect_num_outputs="1">
322 <conditional name="operation_type">
323 <param name="command_type" value="arithmetic-kmers" />
324 <param name="arithmetic_operations" value="decrease" />
325 <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" />
326 <param name="X" value="100" />
327 </conditional>
328 <output name="read_db" ftype="meryldb">
329 <assert_contents>
330 <has_size value="42625" delta="300" />
331 </assert_contents>
332 </output>
333 </test>
334 <test expect_num_outputs="1">
335 <conditional name="operation_type">
336 <param name="command_type" value="arithmetic-kmers" />
337 <param name="arithmetic_operations" value="multiply" />
338 <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" />
339 <param name="X" value="3" />
340 </conditional>
341 <output name="read_db" ftype="meryldb">
342 <assert_contents>
343 <has_size value="60832" delta="300" />
344 </assert_contents>
345 </output>
346 </test>
347 <test expect_num_outputs="1">
348 <conditional name="operation_type">
349 <param name="command_type" value="arithmetic-kmers" />
350 <param name="arithmetic_operations" value="divide" />
351 <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" />
352 <param name="X" value="2" />
353 </conditional>
354 <output name="read_db" ftype="meryldb">
355 <assert_contents>
356 <has_size value="56569" delta="300" />
357 </assert_contents>
358 </output>
359 </test>
360 <test expect_num_outputs="1">
361 <conditional name="operation_type">
362 <param name="command_type" value="arithmetic-kmers" />
363 <param name="arithmetic_operations" value="divide-round" />
364 <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" />
365 <param name="X" value="2" />
366 </conditional>
367 <output name="read_db" ftype="meryldb">
368 <assert_contents>
369 <has_size value="56539" delta="300" />
370 </assert_contents>
371 </output>
372 </test>
373 <!-- test 11 -->
374 <test expect_num_outputs="1">
375 <conditional name="operation_type">
376 <param name="command_type" value="arithmetic-kmers" />
377 <param name="arithmetic_operations" value="modulo" />
378 <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" />
379 <param name="X" value="3" />
380 </conditional>
381 <output name="read_db" ftype="meryldb">
382 <assert_contents>
383 <has_size value="37542" delta="300" />
384 </assert_contents>
385 </output>
386 </test>
387 <test expect_num_outputs="1">
388 <conditional name="operation_type">
389 <param name="command_type" value="groups-kmers" />
390 <param name="groups_operations" value="union" />
391 <param name="input_meryldb_02" value="output_02.read-db.meryldb" ftype="meryldb" />
392 <param name="input_meryldb_03" value="output_03.read-db.meryldb" ftype="meryldb" />
393 </conditional>
394 <output name="read_db" ftype="meryldb">
395 <assert_contents>
396 <has_size value="36417" delta="300" />
397 </assert_contents>
398 </output>
399 </test>
400 <test expect_num_outputs="1">
401 <conditional name="operation_type">
402 <param name="command_type" value="groups-kmers" />
403 <param name="groups_operations" value="union-min" />
404 <param name="input_meryldb_02" value="output_02.read-db.meryldb" ftype="meryldb" />
405 <param name="input_meryldb_03" value="output_03.read-db.meryldb" ftype="meryldb" />
406 </conditional>
407 <output name="read_db" ftype="meryldb">
408 <assert_contents>
409 <has_size value="59226" delta="300" />
410 </assert_contents>
411 </output>
412 </test>
413 <test expect_num_outputs="1">
414 <conditional name="operation_type">
415 <param name="command_type" value="groups-kmers" />
416 <param name="groups_operations" value="union-max" />
417 <param name="input_meryldb_02" value="output_02.read-db.meryldb" ftype="meryldb" />
418 <param name="input_meryldb_03" value="output_03.read-db.meryldb" ftype="meryldb" />
419 </conditional>
420 <output name="read_db" ftype="meryldb">
421 <assert_contents>
422 <has_size value="59234" delta="300" />
423 </assert_contents>
424 </output>
425 </test>
426 <test expect_num_outputs="1">
427 <conditional name="operation_type">
428 <param name="command_type" value="groups-kmers" />
429 <param name="groups_operations" value="union-sum" />
430 <param name="input_meryldb_02" value="output_02.read-db.meryldb" ftype="meryldb" />
431 <param name="input_meryldb_03" value="output_03.read-db.meryldb" ftype="meryldb" />
432 </conditional>
433 <output name="read_db" ftype="meryldb">
434 <assert_contents>
435 <has_size value="59004" delta="300" />
436 </assert_contents>
437 </output>
438 </test>
439 <test expect_num_outputs="1">
440 <conditional name="operation_type">
441 <param name="command_type" value="groups-kmers" />
442 <param name="groups_operations" value="intersect" />
443 <param name="input_meryldb_02" value="output_02.read-db.meryldb" ftype="meryldb" />
444 <param name="input_meryldb_03" value="output_03.read-db.meryldb" ftype="meryldb" />
445 </conditional>
446 <output name="read_db" ftype="meryldb">
447 <assert_contents>
448 <has_size value="14951" delta="300" />
449 </assert_contents>
450 </output>
451 </test>
452 <test expect_num_outputs="1">
453 <conditional name="operation_type">
454 <param name="command_type" value="groups-kmers" />
455 <param name="groups_operations" value="intersect-min" />
456 <param name="input_meryldb_02" value="output_02.read-db.meryldb" ftype="meryldb" />
457 <param name="input_meryldb_03" value="output_03.read-db.meryldb" ftype="meryldb" />
458 </conditional>
459 <output name="read_db" ftype="meryldb">
460 <assert_contents>
461 <has_size value="14957" delta="300" />
462 </assert_contents>
463 </output>
464 </test>
465 <test expect_num_outputs="1">
466 <conditional name="operation_type">
467 <param name="command_type" value="groups-kmers" />
468 <param name="groups_operations" value="intersect-max" />
469 <param name="input_meryldb_02" value="output_02.read-db.meryldb" ftype="meryldb" />
470 <param name="input_meryldb_03" value="output_03.read-db.meryldb" ftype="meryldb" />
471 </conditional>
472 <output name="read_db" ftype="meryldb">
473 <assert_contents>
474 <has_size value="14956" delta="300" />
475 </assert_contents>
476 </output>
477 </test>
478 <test expect_num_outputs="1">
479 <conditional name="operation_type">
480 <param name="command_type" value="groups-kmers" />
481 <param name="groups_operations" value="intersect-sum" />
482 <param name="input_meryldb_02" value="output_02.read-db.meryldb" ftype="meryldb" />
483 <param name="input_meryldb_03" value="output_03.read-db.meryldb" ftype="meryldb" />
484 </conditional>
485 <output name="read_db" ftype="meryldb">
486 <assert_contents>
487 <has_size value="14953" delta="300" />
488 </assert_contents>
489 </output>
490 </test>
491 <!-- test 20 -->
492 <test expect_num_outputs="1">
493 <conditional name="operation_type">
494 <param name="command_type" value="groups-kmers" />
495 <param name="groups_operations" value="subtract" />
496 <param name="input_meryldb_02" value="output_02.read-db.meryldb" ftype="meryldb" />
497 <param name="input_meryldb_03" value="output_03.read-db.meryldb" ftype="meryldb" />
498 </conditional>
499 <output name="read_db" ftype="meryldb">
500 <assert_contents>
501 <has_size value="23999" delta="300" />
502 </assert_contents>
503 </output>
504 </test>
505 <test expect_num_outputs="1">
506 <conditional name="operation_type">
507 <param name="command_type" value="groups-kmers" />
508 <param name="groups_operations" value="difference" />
509 <param name="input_meryldb_02" value="output_02.read-db.meryldb" ftype="meryldb" />
510 <param name="input_meryldb_03" value="output_03.read-db.meryldb" ftype="meryldb" />
511 </conditional>
512 <output name="read_db" ftype="meryldb">
513 <assert_contents>
514 <has_size value="24016" delta="300" />
515 </assert_contents>
516 </output>
517 </test>
518 <test expect_num_outputs="1">
519 <conditional name="operation_type">
520 <param name="command_type" value="groups-kmers" />
521 <param name="groups_operations" value="symmetric-difference" />
522 <param name="input_meryldb_02" value="output_02.read-db.meryldb" ftype="meryldb" />
523 <param name="input_meryldb_03" value="output_03.read-db.meryldb" ftype="meryldb" />
524 </conditional>
525 <output name="read_db" ftype="meryldb">
526 <assert_contents>
527 <has_size value="57455" delta="300" />
528 </assert_contents>
529 </output>
530 </test>
531 <test expect_num_outputs="6">
532 <conditional name="operation_type">
533 <param name="command_type" value="trio-mode" />
534 <param name="child_reads" value="child.fasta" />
535 <param name="paternal_reads" value="paternal.fasta" />
536 <param name="maternal_reads" value="maternal.fasta" />
537 <conditional name="options_kmer_size">
538 <param name="kmer_size" value="provide" />
539 <param name="input_kmer_size" value="7" />
540 </conditional>
541 </conditional>
542 <output name="read_db" ftype="meryldb">
543 <assert_contents>
544 <has_size value="1573" delta="300" />
545 </assert_contents>
546 </output>
547 <output name="read_db_hist" file="output_23.read-db.hist" />
548 <output name="pat_db" ftype="meryldb">
549 <assert_contents>
550 <has_size value="1779" delta="300" />
551 </assert_contents>
552 </output>
553 <output name="pat_db_hist" file="output_23.pat.hist" />
554 <output name="mat_db" ftype="meryldb">
555 <assert_contents>
556 <has_size value="1569" delta="300" />
557 </assert_contents>
558 </output>
559 <output name="mat_db_hist" file="output_23.mat.hist" />
560 </test>
561 <test expect_num_outputs="1">
562 <conditional name="operation_type">
563 <param name="command_type" value="histogram-kmers" />
564 <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" />
565 </conditional>
566 <output name="read_db_hist" file="output_24.read-db.hist" />
567 </test>
568 </tests>
569 <help><![CDATA[
570
571 .. class:: infomark
572
573 **Purpose**
574
575 Meryl is the k-mer counter. It is built into the Celera assembler and is also available as a stand-alone application.
576 Meryl uses a sorting-based approach that sorts the k-mers in lexicographical order.
577
578 In addition of generating count-databases, meryl can perform simple operations on it.
579
580 -----
581
582 .. class:: infomark
583
584 **Basic functions**
585
586 The functions that meryl includes are described below:
587
588 ::
589
590 COUNT OPERATIONS
591 - Count: count the occurrences of canonical k-mers
592 - Count-forward: count the occurreces of forward k-mers
593 - Count-reverse: count the occurreces of reverse k-mers
594 FILTERING OPERATIONS
595 - Less-than: return k-mers that occur fewer than N times in the input
596 - Greater-than: return k-mers that occur more than N times in the input
597 - Equal-to: return k-mers that occur exactly N times in the input
598 - Not-equal-to: return k-mers that do not occur exactly N times in the input
599 ARITHMETIC OPERATIONS
600 - Increase: add x to the count of each k-mer
601 - Decrease: subsctract x from the count of each k-mer
602 - Multiply: multiply the count of each k-mer by x
603 - Divide: divide the count of each k-mer by x
604 - Divide-round: divide the count of each k-mer by x and round th results
605 - Modulo: set the count of each k-mer to the remainder of the count divided by x
606 OPERATIONS ON SETS
607 - Union-min: return k-mers that occur in any input, set the count to the minimum count
608 - Union-max: return k-mers that occur in any input, set the count to the maximum count
609 - Union-sum: return k-mers that occur in any input, set the count to the sum of the counts
610 - Intersect: return k-mers that occur in all inputs, set the count to the count in the first input
611 - Intersect-min: return k-mers that occur in all inputs, set the count to the minimum count
612 - Intersect-max: return k-mers that occur in all inputs, set the count to the maximum count
613 - Intersect-sum: return k-mers that occur in all inputs, set the count to the sum of the counts
614 - Subtract: return k-mers that occur in the first input, subtracting counts from the other inputs
615 - Difference: return k-mers that occur in the first input, but none of the other inputs
616 - Symmetric-difference: return k-mers that occur in exactly one input
617
618 -----
619
620 .. class:: infomark
621
622 **Additional function: build hap-mers dbs for trios**
623
624 In addition of the basic operations, this wrapper allows to build the hap-mers databases for trios, in accordance
625 with `merqury's recommended guidelines. <https://github.com/marbl/merqury/wiki/1.-Prepare-meryl-dbs#3-build-hap-mer-dbs-for-trios>`_
626
627 ]]> </help>
628 <expand macro="citations" />
629 </tool>