diff trio-mode.xml @ 0:c85aec7804a4 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/meryl commit a95f5b04b1219489a327a622184633a561fe5ac0
author iuc
date Thu, 25 Apr 2024 21:01:24 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/trio-mode.xml	Thu Apr 25 21:01:24 2024 +0000
@@ -0,0 +1,146 @@
+<tool id="meryl_trio_mode" name="Meryl" version="@TOOL_VERSION@+@GALAXY_TOOL_VERSION@@SUFFIX_VERSION@" profile="@PROFILE@">
+    <description>build hap-mers databases for trios</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="edam_ontology"/>
+    <xrefs>
+        <xref type="bio.tools">meryl</xref>
+    </xrefs>
+    <expand macro="requirements"/>
+    <version_command>meryl --version</version_command>
+    <command detect_errors="exit_code"><![CDATA[
+    export GALAXY_MEMORY_GB=\$((\${GALAXY_MEMORY_MB:-8192}/1024)) &&
+    export MERQURY=\$(dirname \$(command -v merqury.sh))/../share/merqury/ &&
+    #if $options_kmer_size.kmer_size == 'estimate'
+        #from math import log
+        #set size=int(log(int($options_kmer_size.genome_size)*(1-float($options_kmer_size.collision_rate))/float($options_kmer_size.collision_rate))/log(4))
+    #elif $options_kmer_size.kmer_size == 'provide'
+        #set size=$options_kmer_size.input_kmer_size
+    #end if
+    #for $i, $read in enumerate($paternal_reads):
+        mkdir 'paternal${i}.meryl' &&
+        ln -s '$read' ./input_paternal_${i}.${read.ext} &&
+        meryl count k=$size memory=\$GALAXY_MEMORY_GB threads=\${GALAXY_SLOTS:-1} ./input_paternal_${i}.${read.ext} output 'paternal${i}.meryl' &&
+    #end for
+    meryl union-sum paternal*.meryl output pat.meryl &&
+    #for $i, $read in enumerate($maternal_reads):
+        mkdir 'maternal${i}.meryl' &&
+        ln -s '$read' ./input_maternal_${i}.${read.ext} &&
+        meryl count k=$size memory=\$GALAXY_MEMORY_GB threads=\${GALAXY_SLOTS:-1} ./input_maternal_${i}.${read.ext} output 'maternal${i}.meryl' &&
+    #end for
+    meryl union-sum maternal*.meryl output mat.meryl &&
+    #for $i, $read in enumerate($child_reads):
+        mkdir 'child${i}.meryl' &&
+        ln -s '$read' ./input_child_${i}.${read.ext} &&
+        meryl count k=$size memory=\$GALAXY_MEMORY_GB threads=\${GALAXY_SLOTS:-1} ./input_child_${i}.${read.ext} output 'child${i}.meryl' &&
+    #end for
+    meryl union-sum child*.meryl output child.meryl &&
+    meryl histogram child.meryl > read-db.hist &&                                          
+    tar -czf 'read-db.meryldb' child.meryl &&
+    
+    ## mat specific kmers
+    meryl difference mat.meryl pat.meryl output mat.only.meryl &&
+    meryl histogram mat.only.meryl > mat.only.hist &&
+    java -jar -Xmx1g \$MERQURY/eval/kmerHistToPloidyDepth.jar mat.only.hist > mat.only.ploidy &&
+    VAR=`sed -n 2p mat.only.ploidy | awk '{print \$NF}'` &&
+    meryl greater-than \$VAR output mat.only.filt.meryl mat.only.meryl &&
+
+    ## pat specific kmers
+    meryl difference pat.meryl mat.meryl output pat.only.meryl &&
+    meryl histogram pat.only.meryl > pat.only.hist &&
+    java -jar -Xmx1g \$MERQURY/eval/kmerHistToPloidyDepth.jar pat.only.hist > pat.only.ploidy &&
+    VAR=`sed -n 2p pat.only.ploidy | awk '{print \$NF}'` &&
+    meryl greater-than \$VAR output pat.only.filt.meryl pat.only.meryl &&
+
+    ## shared kmers
+    meryl intersect output shared.meryl mat.meryl pat.meryl &&
+
+    ## mat hapmers
+    meryl intersect output mat.inherited.meryl child.meryl mat.only.filt.meryl &&
+    meryl histogram mat.inherited.meryl > mat.inherited.hist &&
+    java -jar -Xmx1g \$MERQURY/eval/kmerHistToPloidyDepth.jar mat.inherited.hist > mat.inherited.ploidy &&
+    VAR=`sed -n 2p mat.inherited.ploidy | awk '{print \$NF}'` &&
+    meryl greater-than \$VAR output mat.hapmer.meryl mat.inherited.meryl &&
+    tar -czf 'mat.meryldb' mat.hapmer.meryl &&
+
+    ## pat hapmers
+    meryl intersect output pat.inherited.meryl child.meryl pat.only.filt.meryl &&
+    meryl histogram pat.inherited.meryl > pat.inherited.hist &&
+    java -jar -Xmx1g \$MERQURY/eval/kmerHistToPloidyDepth.jar pat.inherited.hist > pat.inherited.ploidy &&
+    VAR=`sed -n 2p pat.inherited.ploidy | awk '{print \$NF}'` &&
+    meryl greater-than \$VAR output pat.hapmer.meryl pat.inherited.meryl &&
+    tar -czf 'pat.meryldb' pat.hapmer.meryl &&
+
+    echo 'K-mer size: ${size}'
+    ]]></command>
+    <inputs>
+        <param name="child_reads" type="data" format="fastq,fasta,fastq.gz,fasta.gz" multiple="true" label="F1 reads" help="Select F1 reads used for generating the assembly"/>
+        <param name="paternal_reads" type="data" format="fastq,fasta,fastq.gz,fasta.gz" multiple="true" optional="true" label="Paternal reads" help="Select the paternal reads used for generating the assembly"/>
+        <param name="maternal_reads" type="data" format="fastq,fasta,fastq.gz,fasta.gz" multiple="true" optional="true" label="Maternal reads" help="Select the maternal reads used for generating the assembly"/>
+        <conditional name="options_kmer_size">
+            <param name="kmer_size" type="select" label="K-mer size selector">
+                <option value="provide">Set a k-mer size</option>
+                <option value="estimate">Estimate the best k-mer size</option>
+            </param>
+            <when value="provide">
+                <param name="input_kmer_size" type="integer" min="1" max="50" value="" label="K-mer size" help="For a human genome, the best k-mer size is k=21 for both haploid (3.1G) or diploid (6.2G)."/>
+            </when>
+            <when value="estimate">
+                <param name="genome_size" type="integer" min="1000" max="70000000000" value="1000" label="Genome size" help="Haploid genome size or diploid genome size, depending on what we evaluate. In bp. Only required if the k-mer size is not provided."/>
+                <param name="collision_rate" type="float" min="0.0001" max="0.01" value="0.001" label="Tolerable collision rate" help="Tolerable collision rate. By default is 0.001."/>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="read_db" format="meryldb" from_work_dir="read-db.meryldb" label="${tool.name} on ${on_string}: read-db.meryldb"/>
+        <data name="read_db_hist" format="tabular" from_work_dir="read-db.hist" label="${tool.name} on ${on_string}: read-db histogram"/>
+        <data name="pat_db" format="meryldb" from_work_dir="pat.meryldb" label="${tool.name} on ${on_string}: pat.meryldb"/>
+        <data name="pat_db_hist" format="tabular" from_work_dir="pat.inherited.hist" label="${tool.name} on ${on_string}: paternal inherited histogram"/>
+        <data name="mat_db" format="meryldb" from_work_dir="mat.meryldb" label="${tool.name} on ${on_string}: mat.meryldb"/>
+        <data name="mat_db_hist" format="tabular" from_work_dir="mat.inherited.hist" label="${tool.name} on ${on_string}: maternal inherited histogram"/>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="6">
+            <conditional name="options_kmer_size">
+                <param name="kmer_size" value="provide"/>
+                <param name="input_kmer_size" value="21"/>
+            </conditional>
+            <param name="maternal_reads" value="maternal.fasta"/>
+            <param name="paternal_reads" value="paternal.fasta"/>
+            <param name="child_reads" value="child.fasta"/>
+            <output name="read_db" ftype="meryldb">
+                <assert_contents>
+                    <has_size value="3362942" delta="2000"/>
+                    <expand macro="meryldb_archive_assumptions"/>
+                </assert_contents>
+            </output>
+            <output name="read_db_hist" file="output_23.read-db.hist"/>
+            <output name="pat_db" ftype="meryldb">
+                <assert_contents>
+                    <has_size value="120610" delta="400"/>
+                    <expand macro="meryldb_archive_assumptions"/>
+                </assert_contents>
+            </output>
+            <output name="pat_db_hist" file="output_23.pat.hist"/>
+            <output name="mat_db" ftype="meryldb">
+                <assert_contents>
+                    <has_size value="67883" delta="300"/>
+                    <expand macro="meryldb_archive_assumptions"/>
+                </assert_contents>
+            </output>
+            <output name="mat_db_hist" file="output_23.mat.hist"/>
+        </test>
+    </tests>
+    <help>
+
+.. class:: infomark
+
+**Purpose**
+
+Meryl is the k-mer counter. This tool builds hap-mer databases  for trios, in accordance
+with `merqury's recommended guidelines. &lt;https://github.com/marbl/merqury/wiki/1.-Prepare-meryl-dbs#3-build-hap-mer-dbs-for-trios&gt;`_
+
+        </help>
+    <expand macro="citations"/>
+</tool>