Mercurial > repos > iuc > meryl
view meryl.xml @ 8:5ae3496f9ca3 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/meryl commit 0625ea0c1df66a724d0ab3769fcaee7a07810a20"
author | iuc |
---|---|
date | Thu, 12 Aug 2021 13:20:54 +0000 |
parents | 51c2360aa807 |
children | eadfd71dde37 |
line wrap: on
line source
<tool id='meryl' name='Meryl' version='@TOOL_VERSION@+@GALAXY_TOOL_VERSION@@SUFFIX_VERSION@' profile='20.01'> <description>a genomic k-mer counter and sequence utility</description> <macros> <import>macros.xml</import> </macros> <expand macro='edam_ontology' /> <expand macro='requirements' /> <version_command>meryl --version</version_command> <command detect_errors='exit_code'><![CDATA[ export GALAXY_MEMORY_GB=\$((\${GALAXY_MEMORY_MB:-8192}/1024)) && #if $operation_type.command_type == 'count-kmers' #if $operation_type.options_kmer_size.kmer_size == 'estimate' #from math import log #set size=int(log(int($operation_type.options_kmer_size.genome_size)*(1-float($operation_type.options_kmer_size.collision_rate))/float($operation_type.options_kmer_size.collision_rate))/log(4)) #elif $operation_type.options_kmer_size.kmer_size == 'provide' #set size=$operation_type.options_kmer_size.input_kmer_size #end if ln -s '$operation_type.input_reads' ./input.${operation_type.input_reads.ext} && meryl $operation_type.count_operations k=$size memory=\$GALAXY_MEMORY_GB threads=\${GALAXY_SLOTS:-1} ./input.${operation_type.input_reads.ext} output read-db.meryl && echo 'K-mer size: ${size}' && tar -zcf read-db.meryldb read-db.meryl #elif $operation_type.command_type == 'filter-kmers' mkdir -p ./temp_db/ && tar -zxf $operation_type.input_meryldb_02 -C ./temp_db/ && mv ./temp_db/* tmp.meryl && meryl $operation_type.filter_operations #if $operation_type.filter_type.type == 'times' $operation_type.filter_type.N #elif $operation_type.filter_type.type == 'frequency' distinct=${operation_type.filter_type.distinct} #end if tmp.meryl output read-db.meryl && tar -zcf read-db.meryldb read-db.meryl #elif $operation_type.command_type == 'arithmetic-kmers' mkdir -p ./temp_db/ && tar -zxf $operation_type.input_meryldb_02 -C ./temp_db/ && mv ./temp_db/* tmp.meryl && meryl $operation_type.arithmetic_operations $operation_type.X tmp.meryl output read-db.meryl && tar -zcf read-db.meryldb read-db.meryl #elif $operation_type.command_type == 'groups-kmers' #for $i,$mdb in enumerate($operation_type.input_meryldb_02) mkdir -p ./tmp_folder_$i/ && tar -zxf $mdb -C ./tmp_folder_$i && mv ./tmp_folder_$i/* db_'${i}'.meryl && #end for meryl $operation_type.groups_operations output read-db.meryl db_* && tar -zcf read-db.meryldb read-db.meryl #elif $operation_type.command_type == 'histogram-kmers' mkdir -p ./temp_db/ && tar -zxf $operation_type.input_meryldb_02 -C ./temp_db/ && mv ./temp_db/* tmp.meryl && meryl histogram tmp.meryl > read-db.hist #elif $operation_type.command_type == 'print' mkdir -p ./temp_db/ && tar -zxf $operation_type.input_meryldb_02 -C ./temp_db/ && mv ./temp_db/* tmp.meryl && meryl print tmp.meryl > read-db.tabular #elif $operation_type.command_type == 'trio-mode' export MERQURY=\$(dirname \$(command -v merqury.sh))/../share/merqury/ && #if $operation_type.options_kmer_size.kmer_size == 'estimate' #from math import log #set size=int(log(int($operation_type.options_kmer_size.genome_size)*(1-float($operation_type.options_kmer_size.collision_rate))/float($operation_type.options_kmer_size.collision_rate))/log(4)) #elif $operation_type.options_kmer_size.kmer_size == 'provide' #set size=$operation_type.options_kmer_size.input_kmer_size #end if #for $i, $read in enumerate($paternal_reads): mkdir 'paternal{$i}.meryl' && meryl count k=$size memory=\$GALAXY_MEMORY_GB threads=\${GALAXY_SLOTS:-1} '${read}' output 'paternal{$i}.meryl' && #end for meryl union-sum paternal*.meryl output pat.meryl && #for $i, $read in enumerate($maternal_reads): mkdir 'maternal{$i}.meryl' && meryl count k=$size memory=\$GALAXY_MEMORY_GB threads=\${GALAXY_SLOTS:-1} '${read}' output 'maternal{$i}.meryl' && #end for meryl union-sum maternal*.meryl output mat.meryl && #for $i, $read in enumerate($child_reads): mkdir 'child{$i}.meryl' && meryl count k=$size memory=\$GALAXY_MEMORY_GB threads=\${GALAXY_SLOTS:-1} '${read}' output 'child{$i}.meryl' && #end for meryl union-sum child*.meryl output child.meryl && ## mat specific kmers meryl difference mat.meryl pat.meryl output mat.only.meryl && meryl histogram mat.only.meryl > mat.only.hist && java -jar -Xmx1g \$MERQURY/eval/kmerHistToPloidyDepth.jar mat.only.hist > mat.only.ploidy && VAR=`sed -n 2p mat.only.ploidy | awk '{print \$NF}'` && meryl greater-than \$VAR output mat.only.filt.meryl mat.only.meryl && ## pat specific kmers meryl difference pat.meryl mat.meryl output pat.only.meryl && meryl histogram pat.only.meryl > pat.only.hist && java -jar -Xmx1g \$MERQURY/eval/kmerHistToPloidyDepth.jar pat.only.hist > pat.only.ploidy && VAR=`sed -n 2p pat.only.ploidy | awk '{print \$NF}'` && meryl greater-than \$VAR output pat.only.filt.meryl pat.only.meryl && ## shared kmers meryl intersect output shared.meryl mat.meryl pat.meryl && ## mat hapmers meryl intersect output mat.inherited.meryl child.meryl mat.only.filt.meryl && meryl histogram mat.inherited.meryl > mat.inherited.hist && java -jar -Xmx1g \$MERQURY/eval/kmerHistToPloidyDepth.jar mat.inherited.hist > mat.inherited.ploidy && VAR=`sed -n 2p mat.inherited.ploidy | awk '{print \$NF}'` && meryl greater-than \$VAR output mat.hapmer.meryl mat.inherited.meryl && tar -czf 'mat.meryldb' mat.hapmer.meryl && ## pat hapmers meryl intersect output pat.inherited.meryl child.meryl pat.only.filt.meryl && meryl histogram pat.inherited.meryl > pat.inherited.hist && java -jar -Xmx1g \$MERQURY/eval/kmerHistToPloidyDepth.jar pat.inherited.hist > pat.inherited.ploidy && VAR=`sed -n 2p pat.inherited.ploidy | awk '{print \$NF}'` && meryl greater-than \$VAR output pat.hapmer.meryl pat.inherited.meryl && tar -czf 'pat.meryldb' pat.hapmer.meryl && ## shared hapmers meryl intersect output shared.inherited.meryl child.meryl shared.meryl && meryl histogram shared.inherited.meryl > shared.inherited.hist && java -jar -Xmx1g \$MERQURY/eval/kmerHistToPloidyDepth.jar shared.inherited.hist > shared.inherited.ploidy && VAR=`sed -n 2p shared.inherited.ploidy | awk '{print \$NF}'` && meryl greater-than \$VAR output shared.filt.meryl shared.inherited.meryl && ## child hapmers meryl union-sum output child.inherited.meryl mat.inherited.meryl pat.inherited.meryl shared.inherited.meryl && meryl difference output read.only.meryl child.meryl child.inherited.meryl && tar -czf 'read-db.meryldb' read.only.meryl && echo 'K-mer size: ${size}' #end if ]]> </command> <inputs> <conditional name="operation_type"> <param name="command_type" type="select" label="Operation type selector" help="Select a type of operation"> <option value="count-kmers">Count operations</option> <option value="filter-kmers">Filter operations</option> <option value="arithmetic-kmers">Arithmetic operations on k-mer counts</option> <option value="groups-kmers">Operations on sets of k-mers</option> <option value="histogram-kmers">Generate histogram dataset</option> <option value="print">Print k-mer counts to a tabular file</option> <option value="trio-mode">Build hap-mer dbs for trios</option> </param> <when value="count-kmers"> <param name="count_operations" type="select" label="Count operations" help="Select an operation to be executed"> <option value="count">Count: count the occurrences of canonical k-mers</option> <option value="count-forward">Count-forward: count the occurreces of forward k-mers</option> <option value="count-reverse">Count-reverse: count the occurreces of reverse k-mers</option> </param> <param name="input_reads" type="data" format="fastq,fastq.gz,fasta,fasta.gz" label="Input sequences" help="Select your reads in FASTA/FASTQ format." /> <conditional name="options_kmer_size"> <param name="kmer_size" type="select" label="K-mer size selector"> <option value="provide">Set a k-mer size</option> <option value="estimate">Estimate the best k-mer size</option> </param> <when value="provide"> <param name="input_kmer_size" type="integer" min="1" max="50" value="" label="K-mer size" help="For a human genome, the best k-mer size is k=21 for both haploid (3.1G) or diploid (6.2G).." /> </when> <when value="estimate"> <param name="genome_size" type="integer" min="1000" max="70000000000" value="1000" label="Genome size" help="Haploid genome size or diploid genome size, depending on what we evaluate. In bp. Only required if the k-mer size is not provided." /> <param name="collision_rate" type="float" min="0.0001" max="0.01" value="0.001" label="Tolerable collision rate" help="Tolerable collision rate. By default is 0.001." /> </when> </conditional> </when> <when value="filter-kmers"> <param name="filter_operations" type="select" label="Filter operations" help="Select an operation to be executed"> <option value="less-than">Less-than: return k-mers that occur fewer than a threshold value</option> <option value="greater-than">Greater-than: return k-mers that occur more than a threshold value</option> <option value="equal-to">Equal-to: return k-mers that occur exactly a threshold value</option> <option value="not-equal-to">Not-equal-to: return k-mers that do not occur exactly a threshold value</option> </param> <param name="input_meryldb_02" type="data" format="meryldb" label="Input meryldb" help="Select a meryldb dataset" /> <conditional name="filter_type"> <param name="type" type="select" label="Type of filtering"> <option value="times">Return k-mers that occur N times in the input</option> <option value="frequency">Return k-mers occurring at specific frequencies</option> </param> <when value="times"> <param name="N" type="integer" min="0" max="50000000000" value="" optional="true" label="Number of times in the input" help="Return k-mers that occur N times in the input." /> </when> <when value="frequency"> <param name="distinct" type="float" min="0" max="1" value="0.9998" optional="true" label="Frequency" help="Return k-mers that at specific frequency (e.g. frequency = 0.9998 returns top 0.02% most frequent)" /> </when> </conditional> </when> <when value="arithmetic-kmers"> <param name="arithmetic_operations" type="select" label="Arithmetic operations" help="Select an operation to be executed"> <option value="increase">Increase: add x to the count of each k-mer</option> <option value="decrease">Decrease: subtract x from the count of each k-mer</option> <option value="multiply">Multiply: multiply the count of each k-mer by x</option> <option value="divide">Divide: divide the count of each k-mer by x</option> <option value="divide-round">Divide-round: divide the count of each k-mer by x and round th results</option> <option value="modulo">Modulo: set the count of each k-mer to the remainder of the count divided by x</option> </param> <param name="input_meryldb_02" type="data" format="meryldb" label="Input meryldb" help="Select a meryldb dataset" /> <param name="X" type="integer" min="1" max="1000000" value="" optional="true" label="Operand" /> </when> <when value="groups-kmers"> <param name="groups_operations" type="select" label="Operations on sets of k-mers" help="Select an operation to be executed"> <option value="union">Union: return k-mers that occur in any input</option> <option value="union-min">Union-min: return k-mers that occur in any input, set the count to the minimum count</option> <option value="union-max">Union-max: return k-mers that occur in any input, set the count to the maximum count</option> <option value="union-sum">Union-sum: return k-mers that occur in any input, set the count to the sum of the counts</option> <option value="intersect">Intersect: return k-mers that occur in all inputs, set the count to the count in the first input</option> <option value="intersect-min">Intersect-min: return k-mers that occur in all inputs, set the count to the minimum count</option> <option value="intersect-max">Intersect-max: return k-mers that occur in all inputs, set the count to the maximum count</option> <option value="intersect-sum">Intersect-sum: return k-mers that occur in all inputs, set the count to the sum of the counts</option> <option value="subtract">Subtract: return k-mers that occur in the first input, subtracting counts from the other inputs</option> <option value="difference">Difference: return k-mers that occur in the first input, but none of the other inputs</option> <option value="symmetric-difference">Symmetric-difference: return k-mers that occur in exactly one input</option> </param> <param name="input_meryldb_02" type="data" multiple="true" format="meryldb" label="Input meryldb" /> </when> <when value="histogram-kmers"> <param name="input_meryldb_02" type="data" format="meryldb" label="Input meryldb" /> </when> <when value="print"> <param name="input_meryldb_02" type="data" format="meryldb" label="Input meryldb" /> </when> <when value="trio-mode"> <param name="child_reads" type="data" format="fastq,fasta,fastq.gz,fasta.gz" multiple="true" label="F1 reads" help="Select F1 reads used for generating the assembly" /> <param name="paternal_reads" type="data" format="fastq,fasta,fastq.gz,fasta.gz" multiple="true" optional="true" label="Paternal reads" help="Select the paternal reads used for generating the assembly" /> <param name="maternal_reads" type="data" format="fastq,fasta,fastq.gz,fasta.gz" multiple="true" optional="true" label="Maternal reads" help="Select the maternal reads used for generating the assembly" /> <conditional name="options_kmer_size"> <param name="kmer_size" type="select" label="K-mer size selector"> <option value="provide">Set a k-mer size</option> <option value="estimate">Estimate the best k-mer size</option> </param> <when value="provide"> <param name="input_kmer_size" type="integer" min="1" max="50" value="" label="K-mer size" help="For a human genome, the best k-mer size is k=21 for both haploid (3.1G) or diploid (6.2G)." /> </when> <when value="estimate"> <param name="genome_size" type="integer" min="1000" max="70000000000" value="1000" label="Genome size" help="Haploid genome size or diploid genome size, depending on what we evaluate. In bp. Only required if the k-mer size is not provided." /> <param name="collision_rate" type="float" min="0.0001" max="0.01" value="0.001" label="Tolerable collision rate" help="Tolerable collision rate. By default is 0.001." /> </when> </conditional> </when> </conditional> </inputs> <outputs> <data name="read_db" format="meryldb" from_work_dir="read-db.meryldb" label="${tool.name} on ${on_string}: read-db.meryldb"> <filter>operation_type["command_type"] != "histogram-kmers" and operation_type["command_type"] != "print"</filter> </data> <data name="read_db_hist" format="tabular" from_work_dir="read-db.hist" label="${tool.name} on ${on_string}: read-db histogram"> <filter>operation_type["command_type"] == "histogram-kmers" or operation_type["command_type"] == "trio-mode"</filter> </data> <data name="read_db_print" format="tabular" from_work_dir="read-db.tabular" label="${tool.name} on ${on_string}: kmer counts"> <filter>operation_type["command_type"] == "print"</filter> </data> <data name="pat_db" format="meryldb" from_work_dir="pat.meryldb" label="${tool.name} on ${on_string}: pat.meryldb"> <filter>operation_type["command_type"] == "trio-mode"</filter> </data> <data name="pat_db_hist" format="tabular" from_work_dir="pat.inherited.hist" label="${tool.name} on ${on_string}: paternal inherited histogram"> <filter>operation_type["command_type"] == "trio-mode"</filter> </data> <data name="mat_db" format="meryldb" from_work_dir="mat.meryldb" label="${tool.name} on ${on_string}: mat.meryldb"> <filter>operation_type["command_type"] == "trio-mode"</filter> </data> <data name="mat_db_hist" format="tabular" from_work_dir="mat.inherited.hist" label="${tool.name} on ${on_string}: maternal inherited histogram"> <filter>operation_type["command_type"] == "trio-mode"</filter> </data> </outputs> <tests> <test expect_num_outputs="1"> <conditional name="operation_type"> <param name="command_type" value="count-kmers" /> <param name="count_operation" value="count" /> <param name="input_reads" value="child.fasta" /> <conditional name="options_kmer_size"> <param name="kmer_size" value="provide" /> <param name="input_kmer_size" value="7" /> </conditional> </conditional> <output name="read_db" ftype="meryldb"> <assert_contents> <has_size value="22152" delta="300" /> </assert_contents> </output> <assert_stdout> <has_line line="K-mer size: 7" /> </assert_stdout> </test> <test expect_num_outputs="1"> <conditional name="operation_type"> <param name="command_type" value="filter-kmers" /> <param name="filter_operations" value="less-than" /> <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" /> <conditional name="filter_type"> <param name="N" value="100" /> </conditional> </conditional> <output name="read_db" ftype="meryldb"> <assert_contents> <has_size value="32077" delta="300" /> </assert_contents> </output> </test> <test expect_num_outputs="1"> <conditional name="operation_type"> <param name="command_type" value="filter-kmers" /> <param name="filter_operations" value="greater-than" /> <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" /> <conditional name="filter_type"> <param name="N" value="80" /> </conditional> </conditional> <output name="read_db" ftype="meryldb"> <assert_contents> <has_size value="49643" delta="300" /> </assert_contents> </output> </test> <test expect_num_outputs="1"> <conditional name="operation_type"> <param name="command_type" value="filter-kmers" /> <param name="filter_operations" value="greater-than" /> <param name="input_meryldb_02" value="maternal.meryldb" ftype="meryldb" /> <conditional name="filter_type"> <param name="distinct" value="0.9998" /> </conditional> </conditional> <output name="read_db" ftype="meryldb"> <assert_contents> <has_size value="1634" delta="300" /> </assert_contents> </output> </test> <test expect_num_outputs="1"> <conditional name="operation_type"> <param name="command_type" value="filter-kmers" /> <param name="filter_operations" value="equal-to" /> <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" /> <conditional name="filter_type"> <param name="N" value="100" /> </conditional> </conditional> <output name="read_db" ftype="meryldb"> <assert_contents> <has_size value="2621" delta="300" /> </assert_contents> </output> </test> <test expect_num_outputs="1"> <conditional name="operation_type"> <param name="command_type" value="filter-kmers" /> <param name="filter_operations" value="not-equal-to" /> <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" /> <conditional name="filter_type"> <param name="N" value="100" /> </conditional> </conditional> <output name="read_db" ftype="meryldb"> <assert_contents> <has_size value="59100" delta="300" /> </assert_contents> </output> </test> <test expect_num_outputs="1"> <conditional name="operation_type"> <param name="command_type" value="arithmetic-kmers" /> <param name="arithmetic_operations" value="increase" /> <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" /> <param name="X" value="100000" /> </conditional> <output name="read_db" ftype="meryldb"> <assert_contents> <has_size value="59500" delta="300" /> </assert_contents> </output> </test> <test expect_num_outputs="1"> <conditional name="operation_type"> <param name="command_type" value="arithmetic-kmers" /> <param name="arithmetic_operations" value="decrease" /> <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" /> <param name="X" value="100" /> </conditional> <output name="read_db" ftype="meryldb"> <assert_contents> <has_size value="42313" delta="300" /> </assert_contents> </output> </test> <test expect_num_outputs="1"> <conditional name="operation_type"> <param name="command_type" value="arithmetic-kmers" /> <param name="arithmetic_operations" value="multiply" /> <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" /> <param name="X" value="3" /> </conditional> <output name="read_db" ftype="meryldb"> <assert_contents> <has_size value="60530" delta="300" /> </assert_contents> </output> </test> <test expect_num_outputs="1"> <conditional name="operation_type"> <param name="command_type" value="arithmetic-kmers" /> <param name="arithmetic_operations" value="divide" /> <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" /> <param name="X" value="2" /> </conditional> <output name="read_db" ftype="meryldb"> <assert_contents> <has_size value="56200" delta="300" /> </assert_contents> </output> </test> <test expect_num_outputs="1"> <conditional name="operation_type"> <param name="command_type" value="arithmetic-kmers" /> <param name="arithmetic_operations" value="divide-round" /> <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" /> <param name="X" value="2" /> </conditional> <output name="read_db" ftype="meryldb"> <assert_contents> <has_size value="56100" delta="300" /> </assert_contents> </output> </test> <!-- test 12 --> <test expect_num_outputs="1"> <conditional name="operation_type"> <param name="command_type" value="arithmetic-kmers" /> <param name="arithmetic_operations" value="modulo" /> <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" /> <param name="X" value="3" /> </conditional> <output name="read_db" ftype="meryldb"> <assert_contents> <has_size value="37501" delta="300" /> </assert_contents> </output> </test> <test expect_num_outputs="1"> <conditional name="operation_type"> <param name="command_type" value="groups-kmers" /> <param name="groups_operations" value="union" /> <param name="input_meryldb_02" value="output_02.read-db.meryldb,output_03.read-db.meryldb" ftype="meryldb" /> <param name="input_meryldb_03" value="" ftype="meryldb" /> </conditional> <output name="read_db" ftype="meryldb"> <assert_contents> <has_size value="36100" delta="300" /> </assert_contents> </output> </test> <test expect_num_outputs="1"> <conditional name="operation_type"> <param name="command_type" value="groups-kmers" /> <param name="groups_operations" value="union-min" /> <param name="input_meryldb_02" value="output_02.read-db.meryldb,output_03.read-db.meryldb" ftype="meryldb" /> </conditional> <output name="read_db" ftype="meryldb"> <assert_contents> <has_size value="58925" delta="350" /> </assert_contents> </output> </test> <test expect_num_outputs="1"> <conditional name="operation_type"> <param name="command_type" value="groups-kmers" /> <param name="groups_operations" value="union-max" /> <param name="input_meryldb_02" value="output_02.read-db.meryldb,output_03.read-db.meryldb" ftype="meryldb" /> </conditional> <output name="read_db" ftype="meryldb"> <assert_contents> <has_size value="58930" delta="300" /> </assert_contents> </output> </test> <test expect_num_outputs="1"> <conditional name="operation_type"> <param name="command_type" value="groups-kmers" /> <param name="groups_operations" value="union-sum" /> <param name="input_meryldb_02" value="output_02.read-db.meryldb,output_03.read-db.meryldb" ftype="meryldb" /> </conditional> <output name="read_db" ftype="meryldb"> <assert_contents> <has_size value="58600" delta="300" /> </assert_contents> </output> </test> <test expect_num_outputs="1"> <conditional name="operation_type"> <param name="command_type" value="groups-kmers" /> <param name="groups_operations" value="intersect" /> <param name="input_meryldb_02" value="output_02.read-db.meryldb,output_03.read-db.meryldb" ftype="meryldb" /> </conditional> <output name="read_db" ftype="meryldb"> <assert_contents> <has_size value="14951" delta="300" /> </assert_contents> </output> </test> <test expect_num_outputs="1"> <conditional name="operation_type"> <param name="command_type" value="groups-kmers" /> <param name="groups_operations" value="intersect-min" /> <param name="input_meryldb_02" value="output_02.read-db.meryldb,output_03.read-db.meryldb" ftype="meryldb" /> </conditional> <output name="read_db" ftype="meryldb"> <assert_contents> <has_size value="14957" delta="300" /> </assert_contents> </output> </test> <test expect_num_outputs="1"> <conditional name="operation_type"> <param name="command_type" value="groups-kmers" /> <param name="groups_operations" value="intersect-max" /> <param name="input_meryldb_02" value="output_02.read-db.meryldb,output_03.read-db.meryldb" ftype="meryldb" /> </conditional> <output name="read_db" ftype="meryldb"> <assert_contents> <has_size value="14956" delta="300" /> </assert_contents> </output> </test> <test expect_num_outputs="1"> <conditional name="operation_type"> <param name="command_type" value="groups-kmers" /> <param name="groups_operations" value="intersect-sum" /> <param name="input_meryldb_02" value="output_02.read-db.meryldb,output_03.read-db.meryldb" ftype="meryldb" /> </conditional> <output name="read_db" ftype="meryldb"> <assert_contents> <has_size value="14953" delta="300" /> </assert_contents> </output> </test> <!-- test 20 --> <test expect_num_outputs="1"> <conditional name="operation_type"> <param name="command_type" value="groups-kmers" /> <param name="groups_operations" value="subtract" /> <param name="input_meryldb_02" value="output_02.read-db.meryldb,output_03.read-db.meryldb" ftype="meryldb" /> </conditional> <output name="read_db" ftype="meryldb"> <assert_contents> <has_size value="23999" delta="300" /> </assert_contents> </output> </test> <test expect_num_outputs="1"> <conditional name="operation_type"> <param name="command_type" value="groups-kmers" /> <param name="groups_operations" value="difference" /> <param name="input_meryldb_02" value="output_02.read-db.meryldb,output_03.read-db.meryldb" ftype="meryldb" /> </conditional> <output name="read_db" ftype="meryldb"> <assert_contents> <has_size value="24016" delta="300" /> </assert_contents> </output> </test> <test expect_num_outputs="1"> <conditional name="operation_type"> <param name="command_type" value="groups-kmers" /> <param name="groups_operations" value="symmetric-difference" /> <param name="input_meryldb_02" value="output_02.read-db.meryldb,output_03.read-db.meryldb" ftype="meryldb" /> </conditional> <output name="read_db" ftype="meryldb"> <assert_contents> <has_size value="57455" delta="300" /> </assert_contents> </output> </test> <test expect_num_outputs="6"> <conditional name="operation_type"> <param name="command_type" value="trio-mode" /> <param name="child_reads" value="child.fasta" /> <param name="paternal_reads" value="paternal.fasta" /> <param name="maternal_reads" value="maternal.fasta" /> <conditional name="options_kmer_size"> <param name="kmer_size" value="provide" /> <param name="input_kmer_size" value="7" /> </conditional> </conditional> <output name="read_db" ftype="meryldb"> <assert_contents> <has_size value="1573" delta="300" /> </assert_contents> </output> <output name="read_db_hist" file="output_23.read-db.hist" /> <output name="pat_db" ftype="meryldb"> <assert_contents> <has_size value="1779" delta="300" /> </assert_contents> </output> <output name="pat_db_hist" file="output_23.pat.hist" /> <output name="mat_db" ftype="meryldb"> <assert_contents> <has_size value="1569" delta="300" /> </assert_contents> </output> <output name="mat_db_hist" file="output_23.mat.hist" /> </test> <test expect_num_outputs="1"> <conditional name="operation_type"> <param name="command_type" value="histogram-kmers" /> <param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb" /> </conditional> <output name="read_db_hist" file="output_24.read-db.hist" /> </test> <test expect_num_outputs="1"> <conditional name="operation_type"> <param name="command_type" value="print" /> <param name="input_meryldb_02" value="maternal.meryldb" ftype="meryldb" /> </conditional> <output name="read_db_print" file="output_25.read-db.tabular" sort="true" /> </test> <!--Test compressed files--> <test expect_num_outputs="1"> <conditional name="operation_type"> <param name="command_type" value="count-kmers" /> <param name="count_operation" value="count" /> <param name="input_reads" value="child.fasta.gz" ftype="fasta.gz" /> <conditional name="options_kmer_size"> <param name="kmer_size" value="provide" /> <param name="input_kmer_size" value="7" /> </conditional> </conditional> <output name="read_db" ftype="meryldb"> <assert_contents> <has_size value="22200" delta="300" /> </assert_contents> </output> <assert_stdout> <has_line line="K-mer size: 7" /> </assert_stdout> </test> </tests> <help><![CDATA[ .. class:: infomark **Purpose** Meryl is the k-mer counter. It is built into the Celera assembler and is also available as a stand-alone application. Meryl uses a sorting-based approach that sorts the k-mers in lexicographical order. In addition of generating count-databases, meryl can perform simple operations on it. ----- .. class:: infomark **Basic functions** The functions that meryl includes are described below: :: COUNT OPERATIONS - Count: count the occurrences of canonical k-mers - Count-forward: count the occurreces of forward k-mers - Count-reverse: count the occurreces of reverse k-mers FILTERING OPERATIONS - Less-than: return k-mers that occur fewer than N times in the input - Greater-than: return k-mers that occur more than N times in the input - Equal-to: return k-mers that occur exactly N times in the input - Not-equal-to: return k-mers that do not occur exactly N times in the input ARITHMETIC OPERATIONS - Increase: add x to the count of each k-mer - Decrease: subsctract x from the count of each k-mer - Multiply: multiply the count of each k-mer by x - Divide: divide the count of each k-mer by x - Divide-round: divide the count of each k-mer by x and round th results - Modulo: set the count of each k-mer to the remainder of the count divided by x OPERATIONS ON SETS - Union-min: return k-mers that occur in any input, set the count to the minimum count - Union-max: return k-mers that occur in any input, set the count to the maximum count - Union-sum: return k-mers that occur in any input, set the count to the sum of the counts - Intersect: return k-mers that occur in all inputs, set the count to the count in the first input - Intersect-min: return k-mers that occur in all inputs, set the count to the minimum count - Intersect-max: return k-mers that occur in all inputs, set the count to the maximum count - Intersect-sum: return k-mers that occur in all inputs, set the count to the sum of the counts - Subtract: return k-mers that occur in the first input, subtracting counts from the other inputs - Difference: return k-mers that occur in the first input, but none of the other inputs - Symmetric-difference: return k-mers that occur in exactly one input ----- .. class:: infomark **Additional function: build hap-mers dbs for trios** In addition of the basic operations, this wrapper allows to build the hap-mers databases for trios, in accordance with `merqury's recommended guidelines. <https://github.com/marbl/merqury/wiki/1.-Prepare-meryl-dbs#3-build-hap-mer-dbs-for-trios>`_ ]]> </help> <expand macro="citations" /> </tool>