comparison meryl.xml @ 4:d5d68989caec draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/meryl commit 51618128e9d1a23f1f3a8c9df7f2b23a2ba3f3c2"
author iuc
date Fri, 30 Apr 2021 06:40:29 +0000
parents 9f7a227436d6
children d63739222f68
comparison
equal deleted inserted replaced
3:9f7a227436d6 4:d5d68989caec
27 mkdir -p ./temp_db/ && 27 mkdir -p ./temp_db/ &&
28 tar -zxf $operation_type.input_meryldb_02 -C ./temp_db/ && 28 tar -zxf $operation_type.input_meryldb_02 -C ./temp_db/ &&
29 mv ./temp_db/* tmp.meryl && 29 mv ./temp_db/* tmp.meryl &&
30 meryl 30 meryl
31 $operation_type.filter_operations 31 $operation_type.filter_operations
32 $operation_type.N 32 #if $operation_type.filter_type.type == 'times'
33 $operation_type.filter_type.N
34 #elif $operation_type.filter_type.type == 'frequency'
35 distinct=${operation_type.filter_type.distinct}
36 #end if
33 tmp.meryl 37 tmp.meryl
34 output read-db.meryl && 38 output read-db.meryl &&
35 tar -zcf read-db.meryldb read-db.meryl 39 tar -zcf read-db.meryldb read-db.meryl
36 #elif $operation_type.command_type == 'arithmetic-kmers' 40 #elif $operation_type.command_type == 'arithmetic-kmers'
37 mkdir -p ./temp_db/ && 41 mkdir -p ./temp_db/ &&
58 #elif $operation_type.command_type == 'histogram-kmers' 62 #elif $operation_type.command_type == 'histogram-kmers'
59 mkdir -p ./temp_db/ && 63 mkdir -p ./temp_db/ &&
60 tar -zxf $operation_type.input_meryldb_02 -C ./temp_db/ && 64 tar -zxf $operation_type.input_meryldb_02 -C ./temp_db/ &&
61 mv ./temp_db/* tmp.meryl && 65 mv ./temp_db/* tmp.meryl &&
62 meryl histogram tmp.meryl > read-db.hist 66 meryl histogram tmp.meryl > read-db.hist
67 #elif $operation_type.command_type == 'print'
68 mkdir -p ./temp_db/ &&
69 tar -zxf $operation_type.input_meryldb_02 -C ./temp_db/ &&
70 mv ./temp_db/* tmp.meryl &&
71 meryl print tmp.meryl > read-db.tabular
63 #elif $operation_type.command_type == 'trio-mode' 72 #elif $operation_type.command_type == 'trio-mode'
64 export MERQURY=\$(dirname \$(command -v merqury.sh))/../share/merqury/ && 73 export MERQURY=\$(dirname \$(command -v merqury.sh))/../share/merqury/ &&
65 #if $operation_type.options_kmer_size.kmer_size == 'estimate' 74 #if $operation_type.options_kmer_size.kmer_size == 'estimate'
66 #from math import log 75 #from math import log
67 #set size=int(log(int($operation_type.options_kmer_size.genome_size)*(1-float($operation_type.options_kmer_size.collision_rate))/float($operation_type.options_kmer_size.collision_rate))/log(4)) 76 #set size=int(log(int($operation_type.options_kmer_size.genome_size)*(1-float($operation_type.options_kmer_size.collision_rate))/float($operation_type.options_kmer_size.collision_rate))/log(4))
136 <option value="count-kmers">Count operations</option> 145 <option value="count-kmers">Count operations</option>
137 <option value="filter-kmers">Filter operations</option> 146 <option value="filter-kmers">Filter operations</option>
138 <option value="arithmetic-kmers">Arithmetic operations on kmer counts</option> 147 <option value="arithmetic-kmers">Arithmetic operations on kmer counts</option>
139 <option value="groups-kmers">Operations on sets of k-mers</option> 148 <option value="groups-kmers">Operations on sets of k-mers</option>
140 <option value="histogram-kmers">Generate histogram dataset</option> 149 <option value="histogram-kmers">Generate histogram dataset</option>
150 <option value="print">Print kmer counts to a tabular file</option>
141 <option value="trio-mode">Build hap-mer dbs for trios</option> 151 <option value="trio-mode">Build hap-mer dbs for trios</option>
142 </param> 152 </param>
143 <when value="count-kmers"> 153 <when value="count-kmers">
144 <param name="count_operations" type="select" label="Count operations" help="Select an operation to be executed"> 154 <param name="count_operations" type="select" label="Count operations" help="Select an operation to be executed">
145 <option value="count">Count: count the occurrences of canonical k-mers</option> 155 <option value="count">Count: count the occurrences of canonical k-mers</option>
161 </when> 171 </when>
162 </conditional> 172 </conditional>
163 </when> 173 </when>
164 <when value="filter-kmers"> 174 <when value="filter-kmers">
165 <param name="filter_operations" type="select" label="Filter operations" help="Select an operation to be executed"> 175 <param name="filter_operations" type="select" label="Filter operations" help="Select an operation to be executed">
166 <option value="less-than">Less-than: return k-mers that occur fewer than N times in the input</option> 176 <option value="less-than">Less-than: return k-mers that occur fewer than a threshold value</option>
167 <option value="greater-than">Greater-than: return k-mers that occur more than N times in the input</option> 177 <option value="greater-than">Greater-than: return k-mers that occur more than a threshold value</option>
168 <option value="equal-to">Equal-to: return k-mers that occur exactly N times in the input</option> 178 <option value="equal-to">Equal-to: return k-mers that occur exactly a threshold value</option>
169 <option value="not-equal-to">Not-equal-to: return k-mers that do not occur exactly N times in the input</option> 179 <option value="not-equal-to">Not-equal-to: return k-mers that do not occur exactly a threshold value</option>
170 </param> 180 </param>
171 <param name="input_meryldb_02" type="data" format="meryldb" label="Input meryldb" help="Select a meryldb dataset" /> 181 <param name="input_meryldb_02" type="data" format="meryldb" label="Input meryldb" help="Select a meryldb dataset" />
172 <param name="N" type="integer" min="0" max="50000000000" value="" optional="true" label="Filter value" /> 182
183 <conditional name="filter_type">
184 <param name="type" type="select" label="Type of filtering">
185 <option value="times">Return kmers that occur N times in the input</option>
186 <option value="frequency">Return kmers occurring at specific frequencies</option>
187 </param>
188 <when value="times">
189 <param name="N" type="integer" min="0" max="50000000000" value="" optional="true" label="Number of times in the input" help="Return kmers that occur N times in the input."/>
190 </when>
191 <when value="frequency">
192 <param name="distinct" type="float" min="0" max="1" value="0.9998" optional="true" label="Frequency" help="Return kmers that at specific frequency (e.g. frequency = 0.9998 returns top 0.02% most frequent)"/>
193 </when>
194 </conditional>
173 </when> 195 </when>
174 <when value="arithmetic-kmers"> 196 <when value="arithmetic-kmers">
175 <param name="arithmetic_operations" type="select" label="Arithmetic operations" help="Select an operation to be executed"> 197 <param name="arithmetic_operations" type="select" label="Arithmetic operations" help="Select an operation to be executed">
176 <option value="increase">Increase: add x to the count of each k-mer</option> 198 <option value="increase">Increase: add x to the count of each k-mer</option>
177 <option value="decrease">Decrease: subsctract x from the count of each k-mer</option> 199 <option value="decrease">Decrease: subsctract x from the count of each k-mer</option>
201 <param name="input_meryldb_03" type="data" format="meryldb" label="Input meryldb" /> 223 <param name="input_meryldb_03" type="data" format="meryldb" label="Input meryldb" />
202 </when> 224 </when>
203 <when value="histogram-kmers"> 225 <when value="histogram-kmers">
204 <param name="input_meryldb_02" type="data" format="meryldb" label="Input meryldb" /> 226 <param name="input_meryldb_02" type="data" format="meryldb" label="Input meryldb" />
205 </when> 227 </when>
228 <when value="print">
229 <param name="input_meryldb_02" type="data" format="meryldb" label="Input meryldb" />
230 </when>
206 <when value="trio-mode"> 231 <when value="trio-mode">
207 <param name="child_reads" type="data" format="fastq,fasta" multiple="true" label="F1 reads" help="Select F1 reads used for generating the assembly" /> 232 <param name="child_reads" type="data" format="fastq,fasta" multiple="true" label="F1 reads" help="Select F1 reads used for generating the assembly" />
208 <param name="paternal_reads" type="data" format="fastq,fasta" multiple="true" optional="true" label="Paternal reads" help="Select the paternal reads used for generating the assembly" /> 233 <param name="paternal_reads" type="data" format="fastq,fasta" multiple="true" optional="true" label="Paternal reads" help="Select the paternal reads used for generating the assembly" />
209 <param name="maternal_reads" type="data" format="fastq,fasta" multiple="true" optional="true" label="Maternal reads" help="Select the maternal reads used for generating the assembly" /> 234 <param name="maternal_reads" type="data" format="fastq,fasta" multiple="true" optional="true" label="Maternal reads" help="Select the maternal reads used for generating the assembly" />
210 <conditional name="options_kmer_size"> 235 <conditional name="options_kmer_size">
223 </when> 248 </when>
224 </conditional> 249 </conditional>
225 </inputs> 250 </inputs>
226 <outputs> 251 <outputs>
227 <data name="read_db" format="meryldb" from_work_dir="read-db.meryldb" label="${tool.name} on ${on_string}: read-db.meryldb"> 252 <data name="read_db" format="meryldb" from_work_dir="read-db.meryldb" label="${tool.name} on ${on_string}: read-db.meryldb">
228 <filter>operation_type["command_type"] != "histogram-kmers"</filter> 253 <filter>operation_type["command_type"] != "histogram-kmers" and operation_type["command_type"] != "print"</filter>
229 </data> 254 </data>
230 <data name="read_db_hist" format="tabular" from_work_dir="read-db.hist" label="${tool.name} on ${on_string}: read-db histogram"> 255 <data name="read_db_hist" format="tabular" from_work_dir="read-db.hist" label="${tool.name} on ${on_string}: read-db histogram">
231 <filter>operation_type["command_type"] == "histogram-kmers" or operation_type["command_type"] == "trio-mode"</filter> 256 <filter>operation_type["command_type"] == "histogram-kmers" or operation_type["command_type"] == "trio-mode"</filter>
257 </data>
258 <data name="read_db_print" format="tabular" from_work_dir="read-db.tabular" label="${tool.name} on ${on_string}: kmer counts">
259 <filter>operation_type["command_type"] == "print"</filter>
232 </data> 260 </data>
233 <data name="pat_db" format="meryldb" from_work_dir="pat.meryldb" label="${tool.name} on ${on_string}: pat.meryldb"> 261 <data name="pat_db" format="meryldb" from_work_dir="pat.meryldb" label="${tool.name} on ${on_string}: pat.meryldb">
234 <filter>operation_type["command_type"] == "trio-mode"</filter> 262 <filter>operation_type["command_type"] == "trio-mode"</filter>
235 </data> 263 </data>
236 <data name="pat_db_hist" format="tabular" from_work_dir="pat.inherited.hist" label="${tool.name} on ${on_string}: paternal inherited histogram"> 264 <data name="pat_db_hist" format="tabular" from_work_dir="pat.inherited.hist" label="${tool.name} on ${on_string}: paternal inherited histogram">
266 <test expect_num_outputs="1"> 294 <test expect_num_outputs="1">
267 <conditional name="operation_type"> 295 <conditional name="operation_type">
268 <param name="command_type" value="filter-kmers" /> 296 <param name="command_type" value="filter-kmers" />
269 <param name="filter_operations" value="less-than" /> 297 <param name="filter_operations" value="less-than" />
270 <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" /> 298 <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" />
271 <param name="N" value="100" /> 299 <conditional name="filter_type">
300 <param name="N" value="100" />
301 </conditional>
272 </conditional> 302 </conditional>
273 <output name="read_db" ftype="meryldb"> 303 <output name="read_db" ftype="meryldb">
274 <assert_contents> 304 <assert_contents>
275 <has_size value="32077" delta="300" /> 305 <has_size value="32077" delta="300" />
276 </assert_contents> 306 </assert_contents>
279 <test expect_num_outputs="1"> 309 <test expect_num_outputs="1">
280 <conditional name="operation_type"> 310 <conditional name="operation_type">
281 <param name="command_type" value="filter-kmers" /> 311 <param name="command_type" value="filter-kmers" />
282 <param name="filter_operations" value="greater-than" /> 312 <param name="filter_operations" value="greater-than" />
283 <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" /> 313 <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" />
284 <param name="N" value="80" /> 314 <conditional name="filter_type">
315 <param name="N" value="80" />
316 </conditional>
285 </conditional> 317 </conditional>
286 <output name="read_db" ftype="meryldb"> 318 <output name="read_db" ftype="meryldb">
287 <assert_contents> 319 <assert_contents>
288 <has_size value="49643" delta="300" /> 320 <has_size value="49643" delta="300" />
321 </assert_contents>
322 </output>
323 </test>
324 <test expect_num_outputs="1">
325 <conditional name="operation_type">
326 <param name="command_type" value="filter-kmers" />
327 <param name="filter_operations" value="greater-than" />
328 <param name="input_meryldb_02" value="maternal.meryldb" ftype="meryldb" />
329 <conditional name="filter_type">
330 <param name="distinct" value="0.9998" />
331 </conditional>
332 </conditional>
333 <output name="read_db" ftype="meryldb" >
334 <assert_contents>
335 <has_size value="1634" delta="300" />
289 </assert_contents> 336 </assert_contents>
290 </output> 337 </output>
291 </test> 338 </test>
292 <test expect_num_outputs="1"> 339 <test expect_num_outputs="1">
293 <conditional name="operation_type"> 340 <conditional name="operation_type">
294 <param name="command_type" value="filter-kmers" /> 341 <param name="command_type" value="filter-kmers" />
295 <param name="filter_operations" value="equal-to" /> 342 <param name="filter_operations" value="equal-to" />
296 <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" /> 343 <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" />
297 <param name="N" value="100" /> 344 <conditional name="filter_type">
345 <param name="N" value="100" />
346 </conditional>
298 </conditional> 347 </conditional>
299 <output name="read_db" ftype="meryldb"> 348 <output name="read_db" ftype="meryldb">
300 <assert_contents> 349 <assert_contents>
301 <has_size value="2621" delta="300"/> 350 <has_size value="2621" delta="300"/>
302 </assert_contents> 351 </assert_contents>
305 <test expect_num_outputs="1"> 354 <test expect_num_outputs="1">
306 <conditional name="operation_type"> 355 <conditional name="operation_type">
307 <param name="command_type" value="filter-kmers" /> 356 <param name="command_type" value="filter-kmers" />
308 <param name="filter_operations" value="not-equal-to" /> 357 <param name="filter_operations" value="not-equal-to" />
309 <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" /> 358 <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" />
310 <param name="N" value="100" /> 359 <conditional name="filter_type">
360 <param name="N" value="100" />
361 </conditional>
311 </conditional> 362 </conditional>
312 <output name="read_db" ftype="meryldb"> 363 <output name="read_db" ftype="meryldb">
313 <assert_contents> 364 <assert_contents>
314 <has_size value="59378" delta="300" /> 365 <has_size value="59378" delta="300" />
315 </assert_contents> 366 </assert_contents>
573 <param name="command_type" value="histogram-kmers" /> 624 <param name="command_type" value="histogram-kmers" />
574 <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" /> 625 <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" />
575 </conditional> 626 </conditional>
576 <output name="read_db_hist" file="output_24.read-db.hist" /> 627 <output name="read_db_hist" file="output_24.read-db.hist" />
577 </test> 628 </test>
629 <test expect_num_outputs="1">
630 <conditional name="operation_type">
631 <param name="command_type" value="print" />
632 <param name="input_meryldb_02" value="maternal.meryldb" ftype="meryldb" />
633 </conditional>
634 <output name="read_db_print" file="output_25.read-db.tabular" sort="True"/>
635 </test>
578 </tests> 636 </tests>
579 <help><![CDATA[ 637 <help><![CDATA[
580 638
581 .. class:: infomark 639 .. class:: infomark
582 640
586 Meryl uses a sorting-based approach that sorts the k-mers in lexicographical order. 644 Meryl uses a sorting-based approach that sorts the k-mers in lexicographical order.
587 645
588 In addition of generating count-databases, meryl can perform simple operations on it. 646 In addition of generating count-databases, meryl can perform simple operations on it.
589 647
590 ----- 648 -----
591 649
592 .. class:: infomark 650 .. class:: infomark
593 651
594 **Basic functions** 652 **Basic functions**
595 653
596 The functions that meryl includes are described below: 654 The functions that meryl includes are described below:
597 655
598 :: 656 ::
624 - Subtract: return k-mers that occur in the first input, subtracting counts from the other inputs 682 - Subtract: return k-mers that occur in the first input, subtracting counts from the other inputs
625 - Difference: return k-mers that occur in the first input, but none of the other inputs 683 - Difference: return k-mers that occur in the first input, but none of the other inputs
626 - Symmetric-difference: return k-mers that occur in exactly one input 684 - Symmetric-difference: return k-mers that occur in exactly one input
627 685
628 ----- 686 -----
629 687
630 .. class:: infomark 688 .. class:: infomark
631 689
632 **Additional function: build hap-mers dbs for trios** 690 **Additional function: build hap-mers dbs for trios**
633 691
634 In addition of the basic operations, this wrapper allows to build the hap-mers databases for trios, in accordance 692 In addition of the basic operations, this wrapper allows to build the hap-mers databases for trios, in accordance
635 with `merqury's recommended guidelines. <https://github.com/marbl/merqury/wiki/1.-Prepare-meryl-dbs#3-build-hap-mer-dbs-for-trios>`_ 693 with `merqury's recommended guidelines. <https://github.com/marbl/merqury/wiki/1.-Prepare-meryl-dbs#3-build-hap-mer-dbs-for-trios>`_
636 694