Mercurial > repos > iuc > meryl
comparison meryl.xml @ 0:068920e730f4 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/meryl commit f94b7a1d482e29e10c33205ae5394c4c4a1e74e0"
author | iuc |
---|---|
date | Mon, 05 Apr 2021 15:04:36 +0000 |
parents | |
children | e5b18909f73d |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:068920e730f4 |
---|---|
1 <tool id='meryl' name='Meryl' version='@TOOL_VERSION@+@GALAXY_TOOL_VERSION@' profile='20.01'> | |
2 <description>a genomic k-mer counter and sequence utility</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro='edam_ontology' /> | |
7 <expand macro='requirements' /> | |
8 <version_command>meryl --version</version_command> | |
9 <command detect_errors='exit_code'><![CDATA[ | |
10 #if $operation_type.command_type == 'count-kmers' | |
11 meryl | |
12 #if $operation_type.options_kmer_size.kmer_size == 'estimate' | |
13 #from math import log | |
14 $operation_type.options_kmer_size.input_kmer_size = int(log(int($operation_type.options_kmer_size.genome_size)*(1-float($operation_type.options_kmer_size.collision_rate))/float($operation_type.options_kmer_size.collision_rate))/log(4)) | |
15 #end if | |
16 $operation_type.count_operations | |
17 k=$operation_type.options_kmer_size.input_kmer_size | |
18 $operation_type.input_reads | |
19 output read-db.meryl && | |
20 echo 'K-mer size: ${$operation_type.options_kmer_size.input_kmer_size}' && | |
21 tar -zcf read-db.meryldb read-db.meryl | |
22 #elif $operation_type.command_type == 'filter-kmers' | |
23 mkdir -p ./temp_db/ && | |
24 tar -zxf $operation_type.input_meryldb_02 -C ./temp_db/ && | |
25 mv ./temp_db/* tmp.meryl && | |
26 meryl | |
27 $operation_type.filter_operations | |
28 $operation_type.N | |
29 tmp.meryl | |
30 output read-db.meryl && | |
31 tar -zcf read-db.meryldb read-db.meryl | |
32 #elif $operation_type.command_type == 'arithmetic-kmers' | |
33 mkdir -p ./temp_db/ && | |
34 tar -zxf $operation_type.input_meryldb_02 -C ./temp_db/ && | |
35 mv ./temp_db/* tmp.meryl && | |
36 meryl | |
37 $operation_type.arithmetic_operations | |
38 $operation_type.X | |
39 tmp.meryl | |
40 output read-db.meryl && | |
41 tar -zcf read-db.meryldb read-db.meryl | |
42 #elif $operation_type.command_type == 'groups-kmers' | |
43 mkdir -p ./temp_db/ && | |
44 tar -zxf $operation_type.input_meryldb_02 -C ./temp_db/ && | |
45 mv ./temp_db/* tmp_01.meryl && | |
46 tar -zxf $operation_type.input_meryldb_03 -C ./temp_db/ && | |
47 mv ./temp_db/* tmp_02.meryl && | |
48 meryl | |
49 $operation_type.groups_operations | |
50 tmp_01.meryl | |
51 tmp_02.meryl | |
52 output read-db.meryl && | |
53 tar -zcf read-db.meryldb read-db.meryl | |
54 #elif $operation_type.command_type == 'histogram-kmers' | |
55 mkdir -p ./temp_db/ && | |
56 tar -zxf $operation_type.input_meryldb_02 -C ./temp_db/ && | |
57 mv ./temp_db/* tmp.meryl && | |
58 meryl histogram tmp.meryl > read-db.hist | |
59 #elif $operation_type.command_type == 'trio-mode' | |
60 export MERQURY=\$(dirname \$(command -v merqury.sh))/../share/merqury/ && | |
61 #for $i, $read in enumerate($paternal_reads): | |
62 mkdir 'paternal{$i}.meryl' && | |
63 meryl count k=$operation_type.options_kmer_size.input_kmer_size '${read}' output 'paternal{$i}.meryl' && | |
64 #end for | |
65 meryl union-sum paternal*.meryl output pat.meryl && | |
66 #for $i, $read in enumerate($maternal_reads): | |
67 mkdir 'maternal{$i}.meryl' && | |
68 meryl count k=$operation_type.options_kmer_size.input_kmer_size '${read}' output 'maternal{$i}.meryl' && | |
69 #end for | |
70 meryl union-sum maternal*.meryl output mat.meryl && | |
71 #for $i, $read in enumerate($child_reads): | |
72 mkdir 'child{$i}.meryl' && | |
73 meryl count k=$operation_type.options_kmer_size.input_kmer_size '${read}' output 'child{$i}.meryl' && | |
74 #end for | |
75 meryl union-sum child*.meryl output child.meryl && | |
76 ## mat specific kmers | |
77 meryl difference mat.meryl pat.meryl output mat.only.meryl && | |
78 meryl histogram mat.only.meryl > mat.only.hist && | |
79 java -jar -Xmx1g \$MERQURY/eval/kmerHistToPloidyDepth.jar mat.only.hist > mat.only.ploidy && | |
80 VAR=`sed -n 2p mat.only.ploidy | awk '{print \$NF}'` && | |
81 meryl greater-than \$VAR output mat.only.filt.meryl mat.only.meryl && | |
82 | |
83 ## pat specific kmers | |
84 meryl difference pat.meryl mat.meryl output pat.only.meryl && | |
85 meryl histogram pat.only.meryl > pat.only.hist && | |
86 java -jar -Xmx1g \$MERQURY/eval/kmerHistToPloidyDepth.jar pat.only.hist > pat.only.ploidy && | |
87 VAR=`sed -n 2p pat.only.ploidy | awk '{print \$NF}'` && | |
88 meryl greater-than \$VAR output pat.only.filt.meryl pat.only.meryl && | |
89 | |
90 ## shared kmers | |
91 meryl intersect output shared.meryl mat.meryl pat.meryl && | |
92 | |
93 ## mat hapmers | |
94 meryl intersect output mat.inherited.meryl child.meryl mat.only.filt.meryl && | |
95 meryl histogram mat.inherited.meryl > mat.inherited.hist && | |
96 java -jar -Xmx1g \$MERQURY/eval/kmerHistToPloidyDepth.jar mat.inherited.hist > mat.inherited.ploidy && | |
97 VAR=`sed -n 2p mat.inherited.ploidy | awk '{print \$NF}'` && | |
98 meryl greater-than \$VAR output mat.hapmer.meryl mat.inherited.meryl && | |
99 tar -czf 'mat.meryldb' mat.hapmer.meryl && | |
100 | |
101 ## pat hapmers | |
102 meryl intersect output pat.inherited.meryl child.meryl pat.only.filt.meryl && | |
103 meryl histogram pat.inherited.meryl > pat.inherited.hist && | |
104 java -jar -Xmx1g \$MERQURY/eval/kmerHistToPloidyDepth.jar pat.inherited.hist > pat.inherited.ploidy && | |
105 VAR=`sed -n 2p pat.inherited.ploidy | awk '{print \$NF}'` && | |
106 meryl greater-than \$VAR output pat.hapmer.meryl pat.inherited.meryl && | |
107 tar -czf 'pat.meryldb' pat.hapmer.meryl && | |
108 | |
109 ## shared hapmers | |
110 meryl intersect output shared.inherited.meryl child.meryl shared.meryl && | |
111 meryl histogram shared.inherited.meryl > shared.inherited.hist && | |
112 java -jar -Xmx1g \$MERQURY/eval/kmerHistToPloidyDepth.jar shared.inherited.hist > shared.inherited.ploidy && | |
113 VAR=`sed -n 2p shared.inherited.ploidy | awk '{print \$NF}'` && | |
114 meryl greater-than \$VAR output shared.filt.meryl shared.inherited.meryl && | |
115 | |
116 ## child hapmers | |
117 meryl union-sum output child.inherited.meryl mat.inherited.meryl pat.inherited.meryl shared.inherited.meryl && | |
118 meryl difference output read.only.meryl child.meryl child.inherited.meryl && | |
119 tar -czf 'read-db.meryldb' read.only.meryl && | |
120 echo 'K-mer size: ${$operation_type.options_kmer_size.input_kmer_size}' | |
121 #end if | |
122 ]]> </command> | |
123 <inputs> | |
124 <conditional name="operation_type"> | |
125 <param name="command_type" type="select" label="Operation type selector" help="Select a type of operation"> | |
126 <option value="count-kmers">Count operations</option> | |
127 <option value="filter-kmers">Filter operations</option> | |
128 <option value="arithmetic-kmers">Arithmetic operations on kmer counts</option> | |
129 <option value="groups-kmers">Operations on sets of k-mers</option> | |
130 <option value="histogram-kmers">Generate histogram dataset</option> | |
131 <option value="trio-mode">Build hap-mer dbs for trios</option> | |
132 </param> | |
133 <when value="count-kmers"> | |
134 <param name="count_operations" type="select" label="Count operations" help="Select an operation to be executed"> | |
135 <option value="count">Count: count the occurrences of canonical k-mers</option> | |
136 <option value="count-forward">Count-forward: count the occurreces of forward k-mers</option> | |
137 <option value="count-reverse">Count-reverse: count the occurreces of reverse k-mers</option> | |
138 </param> | |
139 <param name="input_reads" type="data" format="fastq,fasta" label="Input sequences" help="Select your reads in FASTA/FASTQ format." /> | |
140 <conditional name="options_kmer_size"> | |
141 <param name="kmer_size" type="select" label="K-mer size selector"> | |
142 <option value="provide">Set a k-mer size</option> | |
143 <option value="estimate">Estimate the best k-mer size</option> | |
144 </param> | |
145 <when value="provide"> | |
146 <param name="input_kmer_size" type="integer" min="1" max="50" value="" label="K-mer size" help="Haploid genome size or diploid genome size, depending on what we evaluate. In bp. Only required if the k-mer size is not provided." /> | |
147 </when> | |
148 <when value="estimate"> | |
149 <param name="genome_size" type="integer" min="1000" max="70000000000" value="1000" label="Genome size" help="Haploid genome size or diploid genome size, depending on what we evaluate. In bp. Only required if the k-mer size is not provided." /> | |
150 <param name="collision_rate" type="float" min="0.0001" max="0.01" value="0.001" label="Tolerable collision rate" help="Tolerable collision rate. By default is 0.001." /> | |
151 </when> | |
152 </conditional> | |
153 </when> | |
154 <when value="filter-kmers"> | |
155 <param name="filter_operations" type="select" label="Filter operations" help="Select an operation to be executed"> | |
156 <option value="less-than">Less-than: return k-mers that occur fewer than N times in the input</option> | |
157 <option value="greater-than">Greater-than: return k-mers that occur more than N times in the input</option> | |
158 <option value="equal-to">Equal-to: return k-mers that occur exactly N times in the input</option> | |
159 <option value="not-equal-to">Not-equal-to: return k-mers that do not occur exactly N times in the input</option> | |
160 </param> | |
161 <param name="input_meryldb_02" type="data" format="meryldb" label="Input meryldb" help="Select a meryldb dataset" /> | |
162 <param name="N" type="integer" min="0" max="50000000000" value="" optional="true" label="Filter value" /> | |
163 </when> | |
164 <when value="arithmetic-kmers"> | |
165 <param name="arithmetic_operations" type="select" label="Arithmetic operations" help="Select an operation to be executed"> | |
166 <option value="increase">Increase: add x to the count of each k-mer</option> | |
167 <option value="decrease">Decrease: subsctract x from the count of each k-mer</option> | |
168 <option value="multiply">Multiply: multiply the count of each k-mer by x</option> | |
169 <option value="divide">Divide: divide the count of each k-mer by x</option> | |
170 <option value="divide-round">Divide-round: divide the count of each k-mer by x and round th results</option> | |
171 <option value="modulo">Modulo: set the count of each k-mer to the remainder of the count divided by x</option> | |
172 </param> | |
173 <param name="input_meryldb_02" type="data" format="meryldb" label="Input meryldb" help="Select a meryldb dataset" /> | |
174 <param name="X" type="integer" min="1" max="1000000" value="" optional="true" label="Operand" /> | |
175 </when> | |
176 <when value="groups-kmers"> | |
177 <param name="groups_operations" type="select" label="Operations on sets of kmers" help="Select an operation to be executed"> | |
178 <option value="union">Union: return k-mers that occur in any input</option> | |
179 <option value="union-min">Union-min: return k-mers that occur in any input, set the count to the minimum count</option> | |
180 <option value="union-max">Union-max: return k-mers that occur in any input, set the count to the maximum count</option> | |
181 <option value="union-sum">Union-sum: return k-mers that occur in any input, set the count to the sum of the counts</option> | |
182 <option value="intersect">Intersect: return k-mers that occur in all inputs, set the count to the count in the first input</option> | |
183 <option value="intersect-min">Intersect-min: return k-mers that occur in all inputs, set the count to the minimum count</option> | |
184 <option value="intersect-max">Intersect-max: return k-mers that occur in all inputs, set the count to the maximum count</option> | |
185 <option value="intersect-sum">Intersect-sum: return k-mers that occur in all inputs, set the count to the sum of the counts</option> | |
186 <option value="subtract">Subtract: return k-mers that occur in the first input, subtracting counts from the other inputs</option> | |
187 <option value="difference">Difference: return k-mers that occur in the first input, but none of the other inputs</option> | |
188 <option value="symmetric-difference">Symmetric-difference: return k-mers that occur in exactly one input</option> | |
189 </param> | |
190 <param name="input_meryldb_02" type="data" format="meryldb" label="Input meryldb" /> | |
191 <param name="input_meryldb_03" type="data" format="meryldb" label="Input meryldb" /> | |
192 </when> | |
193 <when value="histogram-kmers"> | |
194 <param name="input_meryldb_02" type="data" format="meryldb" label="Input meryldb" /> | |
195 </when> | |
196 <when value="trio-mode"> | |
197 <param name="child_reads" type="data" format="fastq,fasta" multiple="true" label="F1 reads" help="Select F1 reads used for generating the assembly" /> | |
198 <param name="paternal_reads" type="data" format="fastq,fasta" multiple="true" optional="true" label="Paternal reads" help="Select the paternal reads used for generating the assembly" /> | |
199 <param name="maternal_reads" type="data" format="fastq,fasta" multiple="true" optional="true" label="Maternal reads" help="Select the maternal reads used for generating the assembly" /> | |
200 <conditional name="options_kmer_size"> | |
201 <param name="kmer_size" type="select" label="K-mer size selector"> | |
202 <option value="provide">Set a k-mer size</option> | |
203 <option value="estimate">Estimate the best k-mer size</option> | |
204 </param> | |
205 <when value="provide"> | |
206 <param name="input_kmer_size" type="integer" min="1" max="50" value="" label="K-mer size" help="Haploid genome size or diploid genome size, depending on what we evaluate. In bp. Only required if the k-mer size is not provided." /> | |
207 </when> | |
208 <when value="estimate"> | |
209 <param name="genome_size" type="integer" min="1000" max="70000000000" value="1000" label="Genome size" help="Haploid genome size or diploid genome size, depending on what we evaluate. In bp. Only required if the k-mer size is not provided." /> | |
210 <param name="collision_rate" type="float" min="0.0001" max="0.01" value="0.001" label="Tolerable collision rate" help="Tolerable collision rate. By default is 0.001." /> | |
211 </when> | |
212 </conditional> | |
213 </when> | |
214 </conditional> | |
215 </inputs> | |
216 <outputs> | |
217 <data name="read_db" format="meryldb" from_work_dir="read-db.meryldb" label="${tool.name} on ${on_string}: read-db.meryldb"> | |
218 <filter>operation_type["command_type"] != "histogram-kmers"</filter> | |
219 </data> | |
220 <data name="read_db_hist" format="tabular" from_work_dir="read-db.hist" label="${tool.name} on ${on_string}: read-db histogram"> | |
221 <filter>operation_type["command_type"] == "histogram-kmers" or operation_type["command_type"] == "trio-mode"</filter> | |
222 </data> | |
223 <data name="pat_db" format="meryldb" from_work_dir="pat.meryldb" label="${tool.name} on ${on_string}: read-db.meryl"> | |
224 <filter>operation_type["command_type"] == "trio-mode"</filter> | |
225 </data> | |
226 <data name="pat_db_hist" format="tabular" from_work_dir="pat.inherited.hist" label="${tool.name} on ${on_string}: paternal inherited histogram"> | |
227 <filter>operation_type["command_type"] == "trio-mode"</filter> | |
228 </data> | |
229 <data name="mat_db" format="meryldb" from_work_dir="mat.meryldb" label="${tool.name} on ${on_string}: mat.meryl"> | |
230 <filter>operation_type["command_type"] == "trio-mode"</filter> | |
231 </data> | |
232 <data name="mat_db_hist" format="tabular" from_work_dir="mat.inherited.hist" label="${tool.name} on ${on_string}: maternal inherited histogram"> | |
233 <filter>operation_type["command_type"] == "trio-mode"</filter> | |
234 </data> | |
235 </outputs> | |
236 <tests> | |
237 <test expect_num_outputs="1"> | |
238 <conditional name="operation_type"> | |
239 <param name="command_type" value="count-kmers" /> | |
240 <param name="count_operation" value="count" /> | |
241 <param name="input_reads" value="child.fasta" /> | |
242 <conditional name="options_kmer_size"> | |
243 <param name="kmer_size" value="provide" /> | |
244 <param name="input_kmer_size" value="7" /> | |
245 </conditional> | |
246 </conditional> | |
247 <output name="read_db" ftype="meryldb"> | |
248 <assert_contents> | |
249 <has_size value="22152" delta="300" /> | |
250 </assert_contents> | |
251 </output> | |
252 <assert_stdout> | |
253 <has_line line="K-mer size: 7" /> | |
254 </assert_stdout> | |
255 </test> | |
256 <test expect_num_outputs="1"> | |
257 <conditional name="operation_type"> | |
258 <param name="command_type" value="filter-kmers" /> | |
259 <param name="filter_operations" value="less-than" /> | |
260 <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" /> | |
261 <param name="N" value="100" /> | |
262 </conditional> | |
263 <output name="read_db" ftype="meryldb"> | |
264 <assert_contents> | |
265 <has_size value="32077" delta="300" /> | |
266 </assert_contents> | |
267 </output> | |
268 </test> | |
269 <test expect_num_outputs="1"> | |
270 <conditional name="operation_type"> | |
271 <param name="command_type" value="filter-kmers" /> | |
272 <param name="filter_operations" value="greater-than" /> | |
273 <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" /> | |
274 <param name="N" value="80" /> | |
275 </conditional> | |
276 <output name="read_db" ftype="meryldb"> | |
277 <assert_contents> | |
278 <has_size value="49951" delta="300" /> | |
279 </assert_contents> | |
280 </output> | |
281 </test> | |
282 <test expect_num_outputs="1"> | |
283 <conditional name="operation_type"> | |
284 <param name="command_type" value="filter-kmers" /> | |
285 <param name="filter_operations" value="equal-to" /> | |
286 <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" /> | |
287 <param name="N" value="100" /> | |
288 </conditional> | |
289 <output name="read_db" ftype="meryldb"> | |
290 <assert_contents> | |
291 <has_size value="2621" delta="300" /> | |
292 </assert_contents> | |
293 </output> | |
294 </test> | |
295 <test expect_num_outputs="1"> | |
296 <conditional name="operation_type"> | |
297 <param name="command_type" value="filter-kmers" /> | |
298 <param name="filter_operations" value="not-equal-to" /> | |
299 <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" /> | |
300 <param name="N" value="100" /> | |
301 </conditional> | |
302 <output name="read_db" ftype="meryldb"> | |
303 <assert_contents> | |
304 <has_size value="59378" delta="300" /> | |
305 </assert_contents> | |
306 </output> | |
307 </test> | |
308 <test expect_num_outputs="1"> | |
309 <conditional name="operation_type"> | |
310 <param name="command_type" value="arithmetic-kmers" /> | |
311 <param name="arithmetic_operations" value="increase" /> | |
312 <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" /> | |
313 <param name="X" value="100000" /> | |
314 </conditional> | |
315 <output name="read_db" ftype="meryldb"> | |
316 <assert_contents> | |
317 <has_size value="59822" delta="300" /> | |
318 </assert_contents> | |
319 </output> | |
320 </test> | |
321 <test expect_num_outputs="1"> | |
322 <conditional name="operation_type"> | |
323 <param name="command_type" value="arithmetic-kmers" /> | |
324 <param name="arithmetic_operations" value="decrease" /> | |
325 <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" /> | |
326 <param name="X" value="100" /> | |
327 </conditional> | |
328 <output name="read_db" ftype="meryldb"> | |
329 <assert_contents> | |
330 <has_size value="42625" delta="300" /> | |
331 </assert_contents> | |
332 </output> | |
333 </test> | |
334 <test expect_num_outputs="1"> | |
335 <conditional name="operation_type"> | |
336 <param name="command_type" value="arithmetic-kmers" /> | |
337 <param name="arithmetic_operations" value="multiply" /> | |
338 <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" /> | |
339 <param name="X" value="3" /> | |
340 </conditional> | |
341 <output name="read_db" ftype="meryldb"> | |
342 <assert_contents> | |
343 <has_size value="60832" delta="300" /> | |
344 </assert_contents> | |
345 </output> | |
346 </test> | |
347 <test expect_num_outputs="1"> | |
348 <conditional name="operation_type"> | |
349 <param name="command_type" value="arithmetic-kmers" /> | |
350 <param name="arithmetic_operations" value="divide" /> | |
351 <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" /> | |
352 <param name="X" value="2" /> | |
353 </conditional> | |
354 <output name="read_db" ftype="meryldb"> | |
355 <assert_contents> | |
356 <has_size value="56569" delta="300" /> | |
357 </assert_contents> | |
358 </output> | |
359 </test> | |
360 <test expect_num_outputs="1"> | |
361 <conditional name="operation_type"> | |
362 <param name="command_type" value="arithmetic-kmers" /> | |
363 <param name="arithmetic_operations" value="divide-round" /> | |
364 <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" /> | |
365 <param name="X" value="2" /> | |
366 </conditional> | |
367 <output name="read_db" ftype="meryldb"> | |
368 <assert_contents> | |
369 <has_size value="56539" delta="300" /> | |
370 </assert_contents> | |
371 </output> | |
372 </test> | |
373 <!-- test 11 --> | |
374 <test expect_num_outputs="1"> | |
375 <conditional name="operation_type"> | |
376 <param name="command_type" value="arithmetic-kmers" /> | |
377 <param name="arithmetic_operations" value="modulo" /> | |
378 <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" /> | |
379 <param name="X" value="3" /> | |
380 </conditional> | |
381 <output name="read_db" ftype="meryldb"> | |
382 <assert_contents> | |
383 <has_size value="37542" delta="300" /> | |
384 </assert_contents> | |
385 </output> | |
386 </test> | |
387 <test expect_num_outputs="1"> | |
388 <conditional name="operation_type"> | |
389 <param name="command_type" value="groups-kmers" /> | |
390 <param name="groups_operations" value="union" /> | |
391 <param name="input_meryldb_02" value="output_02.read-db.meryldb" ftype="meryldb" /> | |
392 <param name="input_meryldb_03" value="output_03.read-db.meryldb" ftype="meryldb" /> | |
393 </conditional> | |
394 <output name="read_db" ftype="meryldb"> | |
395 <assert_contents> | |
396 <has_size value="36417" delta="300" /> | |
397 </assert_contents> | |
398 </output> | |
399 </test> | |
400 <test expect_num_outputs="1"> | |
401 <conditional name="operation_type"> | |
402 <param name="command_type" value="groups-kmers" /> | |
403 <param name="groups_operations" value="union-min" /> | |
404 <param name="input_meryldb_02" value="output_02.read-db.meryldb" ftype="meryldb" /> | |
405 <param name="input_meryldb_03" value="output_03.read-db.meryldb" ftype="meryldb" /> | |
406 </conditional> | |
407 <output name="read_db" ftype="meryldb"> | |
408 <assert_contents> | |
409 <has_size value="59226" delta="300" /> | |
410 </assert_contents> | |
411 </output> | |
412 </test> | |
413 <test expect_num_outputs="1"> | |
414 <conditional name="operation_type"> | |
415 <param name="command_type" value="groups-kmers" /> | |
416 <param name="groups_operations" value="union-max" /> | |
417 <param name="input_meryldb_02" value="output_02.read-db.meryldb" ftype="meryldb" /> | |
418 <param name="input_meryldb_03" value="output_03.read-db.meryldb" ftype="meryldb" /> | |
419 </conditional> | |
420 <output name="read_db" ftype="meryldb"> | |
421 <assert_contents> | |
422 <has_size value="59234" delta="300" /> | |
423 </assert_contents> | |
424 </output> | |
425 </test> | |
426 <test expect_num_outputs="1"> | |
427 <conditional name="operation_type"> | |
428 <param name="command_type" value="groups-kmers" /> | |
429 <param name="groups_operations" value="union-sum" /> | |
430 <param name="input_meryldb_02" value="output_02.read-db.meryldb" ftype="meryldb" /> | |
431 <param name="input_meryldb_03" value="output_03.read-db.meryldb" ftype="meryldb" /> | |
432 </conditional> | |
433 <output name="read_db" ftype="meryldb"> | |
434 <assert_contents> | |
435 <has_size value="59004" delta="300" /> | |
436 </assert_contents> | |
437 </output> | |
438 </test> | |
439 <test expect_num_outputs="1"> | |
440 <conditional name="operation_type"> | |
441 <param name="command_type" value="groups-kmers" /> | |
442 <param name="groups_operations" value="intersect" /> | |
443 <param name="input_meryldb_02" value="output_02.read-db.meryldb" ftype="meryldb" /> | |
444 <param name="input_meryldb_03" value="output_03.read-db.meryldb" ftype="meryldb" /> | |
445 </conditional> | |
446 <output name="read_db" ftype="meryldb"> | |
447 <assert_contents> | |
448 <has_size value="14951" delta="300" /> | |
449 </assert_contents> | |
450 </output> | |
451 </test> | |
452 <test expect_num_outputs="1"> | |
453 <conditional name="operation_type"> | |
454 <param name="command_type" value="groups-kmers" /> | |
455 <param name="groups_operations" value="intersect-min" /> | |
456 <param name="input_meryldb_02" value="output_02.read-db.meryldb" ftype="meryldb" /> | |
457 <param name="input_meryldb_03" value="output_03.read-db.meryldb" ftype="meryldb" /> | |
458 </conditional> | |
459 <output name="read_db" ftype="meryldb"> | |
460 <assert_contents> | |
461 <has_size value="14957" delta="300" /> | |
462 </assert_contents> | |
463 </output> | |
464 </test> | |
465 <test expect_num_outputs="1"> | |
466 <conditional name="operation_type"> | |
467 <param name="command_type" value="groups-kmers" /> | |
468 <param name="groups_operations" value="intersect-max" /> | |
469 <param name="input_meryldb_02" value="output_02.read-db.meryldb" ftype="meryldb" /> | |
470 <param name="input_meryldb_03" value="output_03.read-db.meryldb" ftype="meryldb" /> | |
471 </conditional> | |
472 <output name="read_db" ftype="meryldb"> | |
473 <assert_contents> | |
474 <has_size value="14956" delta="300" /> | |
475 </assert_contents> | |
476 </output> | |
477 </test> | |
478 <test expect_num_outputs="1"> | |
479 <conditional name="operation_type"> | |
480 <param name="command_type" value="groups-kmers" /> | |
481 <param name="groups_operations" value="intersect-sum" /> | |
482 <param name="input_meryldb_02" value="output_02.read-db.meryldb" ftype="meryldb" /> | |
483 <param name="input_meryldb_03" value="output_03.read-db.meryldb" ftype="meryldb" /> | |
484 </conditional> | |
485 <output name="read_db" ftype="meryldb"> | |
486 <assert_contents> | |
487 <has_size value="14953" delta="300" /> | |
488 </assert_contents> | |
489 </output> | |
490 </test> | |
491 <!-- test 20 --> | |
492 <test expect_num_outputs="1"> | |
493 <conditional name="operation_type"> | |
494 <param name="command_type" value="groups-kmers" /> | |
495 <param name="groups_operations" value="subtract" /> | |
496 <param name="input_meryldb_02" value="output_02.read-db.meryldb" ftype="meryldb" /> | |
497 <param name="input_meryldb_03" value="output_03.read-db.meryldb" ftype="meryldb" /> | |
498 </conditional> | |
499 <output name="read_db" ftype="meryldb"> | |
500 <assert_contents> | |
501 <has_size value="23999" delta="300" /> | |
502 </assert_contents> | |
503 </output> | |
504 </test> | |
505 <test expect_num_outputs="1"> | |
506 <conditional name="operation_type"> | |
507 <param name="command_type" value="groups-kmers" /> | |
508 <param name="groups_operations" value="difference" /> | |
509 <param name="input_meryldb_02" value="output_02.read-db.meryldb" ftype="meryldb" /> | |
510 <param name="input_meryldb_03" value="output_03.read-db.meryldb" ftype="meryldb" /> | |
511 </conditional> | |
512 <output name="read_db" ftype="meryldb"> | |
513 <assert_contents> | |
514 <has_size value="24016" delta="300" /> | |
515 </assert_contents> | |
516 </output> | |
517 </test> | |
518 <test expect_num_outputs="1"> | |
519 <conditional name="operation_type"> | |
520 <param name="command_type" value="groups-kmers" /> | |
521 <param name="groups_operations" value="symmetric-difference" /> | |
522 <param name="input_meryldb_02" value="output_02.read-db.meryldb" ftype="meryldb" /> | |
523 <param name="input_meryldb_03" value="output_03.read-db.meryldb" ftype="meryldb" /> | |
524 </conditional> | |
525 <output name="read_db" ftype="meryldb"> | |
526 <assert_contents> | |
527 <has_size value="57455" delta="300" /> | |
528 </assert_contents> | |
529 </output> | |
530 </test> | |
531 <test expect_num_outputs="6"> | |
532 <conditional name="operation_type"> | |
533 <param name="command_type" value="trio-mode" /> | |
534 <param name="child_reads" value="child.fasta" /> | |
535 <param name="paternal_reads" value="paternal.fasta" /> | |
536 <param name="maternal_reads" value="maternal.fasta" /> | |
537 <conditional name="options_kmer_size"> | |
538 <param name="kmer_size" value="provide" /> | |
539 <param name="input_kmer_size" value="7" /> | |
540 </conditional> | |
541 </conditional> | |
542 <output name="read_db" ftype="meryldb"> | |
543 <assert_contents> | |
544 <has_size value="1573" delta="300" /> | |
545 </assert_contents> | |
546 </output> | |
547 <output name="read_db_hist" file="output_23.read-db.hist" /> | |
548 <output name="pat_db" ftype="meryldb"> | |
549 <assert_contents> | |
550 <has_size value="1779" delta="300" /> | |
551 </assert_contents> | |
552 </output> | |
553 <output name="pat_db_hist" file="output_23.pat.hist" /> | |
554 <output name="mat_db" ftype="meryldb"> | |
555 <assert_contents> | |
556 <has_size value="1569" delta="300" /> | |
557 </assert_contents> | |
558 </output> | |
559 <output name="mat_db_hist" file="output_23.mat.hist" /> | |
560 </test> | |
561 <test expect_num_outputs="1"> | |
562 <conditional name="operation_type"> | |
563 <param name="command_type" value="histogram-kmers" /> | |
564 <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" /> | |
565 </conditional> | |
566 <output name="read_db_hist" file="output_24.read-db.hist" /> | |
567 </test> | |
568 </tests> | |
569 <help><![CDATA[ | |
570 | |
571 .. class:: infomark | |
572 | |
573 **Purpose** | |
574 | |
575 Meryl is the k-mer counter. It is built into the Celera assembler and is also available as a stand-alone application. | |
576 Meryl uses a sorting-based approach that sorts the k-mers in lexicographical order. | |
577 | |
578 In addition of generating count-databases, meryl can perform simple operations on it. | |
579 | |
580 ----- | |
581 | |
582 .. class:: infomark | |
583 | |
584 **Basic functions** | |
585 | |
586 The functions that meryl includes are described below: | |
587 | |
588 :: | |
589 | |
590 COUNT OPERATIONS | |
591 - Count: count the occurrences of canonical k-mers | |
592 - Count-forward: count the occurreces of forward k-mers | |
593 - Count-reverse: count the occurreces of reverse k-mers | |
594 FILTERING OPERATIONS | |
595 - Less-than: return k-mers that occur fewer than N times in the input | |
596 - Greater-than: return k-mers that occur more than N times in the input | |
597 - Equal-to: return k-mers that occur exactly N times in the input | |
598 - Not-equal-to: return k-mers that do not occur exactly N times in the input | |
599 ARITHMETIC OPERATIONS | |
600 - Increase: add x to the count of each k-mer | |
601 - Decrease: subsctract x from the count of each k-mer | |
602 - Multiply: multiply the count of each k-mer by x | |
603 - Divide: divide the count of each k-mer by x | |
604 - Divide-round: divide the count of each k-mer by x and round th results | |
605 - Modulo: set the count of each k-mer to the remainder of the count divided by x | |
606 OPERATIONS ON SETS | |
607 - Union-min: return k-mers that occur in any input, set the count to the minimum count | |
608 - Union-max: return k-mers that occur in any input, set the count to the maximum count | |
609 - Union-sum: return k-mers that occur in any input, set the count to the sum of the counts | |
610 - Intersect: return k-mers that occur in all inputs, set the count to the count in the first input | |
611 - Intersect-min: return k-mers that occur in all inputs, set the count to the minimum count | |
612 - Intersect-max: return k-mers that occur in all inputs, set the count to the maximum count | |
613 - Intersect-sum: return k-mers that occur in all inputs, set the count to the sum of the counts | |
614 - Subtract: return k-mers that occur in the first input, subtracting counts from the other inputs | |
615 - Difference: return k-mers that occur in the first input, but none of the other inputs | |
616 - Symmetric-difference: return k-mers that occur in exactly one input | |
617 | |
618 ----- | |
619 | |
620 .. class:: infomark | |
621 | |
622 **Additional function: build hap-mers dbs for trios** | |
623 | |
624 In addition of the basic operations, this wrapper allows to build the hap-mers databases for trios, in accordance | |
625 with `merqury's recommended guidelines. <https://github.com/marbl/merqury/wiki/1.-Prepare-meryl-dbs#3-build-hap-mer-dbs-for-trios>`_ | |
626 | |
627 ]]> </help> | |
628 <expand macro="citations" /> | |
629 </tool> |