changeset 0:b9a78a7ca2f3 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/verkko commit 82321b91d95e2edf0e91a90d9aa3d96d63603141
author iuc
date Mon, 01 May 2023 19:20:11 +0000
parents
children cf54dafa0403
files test-data/1.meryldb test-data/2.meryldb test-data/test1.fastq.gz test-data/test2.fastq.gz test-data/test3.fastq.gz test-data/test4.fastq.gz test-data/test5.fastq verkko.xml
diffstat 3 files changed, 269 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
Binary file test-data/1.meryldb has changed
Binary file test-data/2.meryldb has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/verkko.xml	Mon May 01 19:20:11 2023 +0000
@@ -0,0 +1,269 @@
+<tool id="verkko" name="verkko" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05">
+    <description>hybrid genome assembly pipeline</description>
+    <macros>
+        <token name="@TOOL_VERSION@">1.2</token>
+        <token name="@VERSION_SUFFIX@">1</token>
+    </macros>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">verkko</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        #set $hififiles = []
+        #for $i, $datafile in enumerate($hifi):
+            #set $filename_gz = str($i) + "_hifi.fastq.gz"
+            #set $filename = str($i) + "_hifi.fastq"
+            #if $datafile.is_of_type("fastq") or $datafile.is_of_type("fastqsanger"):
+                ln -s '$datafile' '$filename' &&
+                #silent $hififiles.append($filename)
+            #else:
+                ln -s '$datafile' '$filename_gz' &&
+                #silent $hififiles.append($filename_gz)
+            #end if
+        #end for
+        #if $nano:
+            #set $nanofiles = []
+            #for $j, $ontfile in enumerate($nano):
+                #set $filename_gz = str($j) +  "_nano.fastq.gz"
+                #set $filename = str($j) +  "_nano.fastq"
+                #if $ontfile.is_of_type("fastq") or $ontfile.is_of_type("fastqsanger"):
+                    ln -s '${ontfile}' '$filename' &&
+                    #silent $nanofiles.append($filename)
+                #else:
+                    ln -s '$ontfile' '$filename_gz' &&
+                    #silent $nanofiles.append($filename_gz)
+                #end if
+            #end for
+        #end if
+        #if $algorithm.hap_kmer.hap_kmers == "yes":
+            mkdir 'h1.meryl' &&
+            tar -xzmf '$algorithm.hap_kmer.h1' -C 'h1.meryl' --strip-components 1 &&
+            mkdir h2.meryl &&
+            tar -xzmf '$algorithm.hap_kmer.h2' -C 'h2.meryl' --strip-components 1 &&
+        #end if
+        verkko --hifi
+        #for $k, $hifi_data in enumerate($hififiles):
+            '$hifi_data'
+        #end for
+        #if $nano:
+            --nano 
+            #for $k, $nano_data in enumerate($nanofiles):
+                '$nano_data'
+            #end for
+        #else:
+            --no-nano
+        #end if
+        $algorithm.no_correction
+        #if $algorithm.hap_kmer.hap_kmers == "yes":
+            --hap-kmers 'h1.meryl' 'h2.meryl' $algorithm.hap_kmer.type
+        #end if
+        #if str($algorithm.base_k):
+            --base-k $algorithm.base_k
+        #end if
+        #if str($algorithm.max_k):
+            --max-k $algorithm.max_k
+        #end if
+        #if str($algorithm.window):
+            --window $algorithm.window
+        #end if
+        #if str($algorithm.split_bases):
+            --split-bases $algorithm.split_bases
+        #end if
+        #if str($algorithm.split_reads):
+            --split-reads $algorithm.split_reads
+        #end if
+        #if str($algorithm.min_ont_length):
+             --min-ont-length $algorithm.min_ont_length
+        #end if
+        #if str($algorithm.correct_k_mer_size):
+            --correct-k-mer-size $algorithm.correct_k_mer_size
+        #end if
+        #if str($algorithm.correct_mer_threshold):
+            --correct-mer-threshold $algorithm.correct_mer_threshold
+        #end if
+        #if str($algorithm.correct_min_read_length):
+           --correct-min-read-length $algorithm.correct_min_read_length
+        #end if
+        #if str($algorithm.correct_min_overlap_length):
+            --correct-min-overlap-length $algorithm.correct_min_overlap_length
+        #end if
+        #if str($algorithm.correct_hash_bits):
+            --correct-hash-bits $algorithm.correct_hash_bits
+        #end if
+        #if str($algorithm.seed_min_length):
+            --seed-min-length $algorithm.seed_min_length
+        #end if
+        #if str($algorithm.seed_max_length):
+            --seed-max-length $algorithm.seed_max_length
+        #end if
+        #if str($algorithm.align_bandwidth):
+            --align-bandwidth $algorithm.align_bandwidth
+        #end if
+        #if str($algorithm.score_fraction):
+            --score-fraction $algorithm.score_fraction
+        #end if
+        #if str($algorithm.min_identity):
+            --min-identity $algorithm.min_identity
+        #end if
+        #if str($algorithm.min_score):
+            --min-score $algorithm.min_score
+        #end if
+        #if str($algorithm.end_clipping):
+            --end-clipping $algorithm.end_clipping
+        #end if
+        #if str($algorithm.incompatible_cutoff):
+            --incompatible-cutoff $algorithm.incompatible_cutoff
+        #end if
+        -d asm
+        ##This limits the amount of memory available to a given step 10 90% of maximum. This means 1 CPU, 
+        ##90% of max available memory, and max time 12 hrs for a given step
+        --ovs-run 1 \$((\${GALAXY_MEMORY_MB:-8192} * 9 / 10)) 12
+    ]]></command>
+    <inputs>
+        <param name="hifi" type="data" multiple="true" format="fastq,fastq.gz,fastqsanger,fastqsanger.gz" label="HiFi Reads Files" />
+        <param name="nano" type="data" multiple="true" optional="true" format="fastq,fastq.gz,fastqsanger,fastqsanger.gz" label="Oxford Nanopore Reads Files" />
+        <section name="algorithm" title="Algorithm parameters">
+            <param argument="--no-correction" type="boolean" truevalue="--no-correction" falsevalue="" label="Do not perform Canu correction on HiFi reads"/>
+            <conditional name="hap_kmer">
+                <param argument="--hap-kmers" type="select" label="Assign haplotypes from meryl databases">
+                    <option value="no">No</option>
+                    <option value="yes">Yes</option>
+                </param>
+                <when value="no"/>
+                <when value="yes">
+                    <param name="h1" type="data" format="meryldb" label="h1"/>
+                    <param name="h2" type="data" format="meryldb" label="h2"/>
+                    <param name="type" type="select" label="Type">
+                        <option value="trio">Trio</option>
+                        <option value="hic">HiC</option>
+                        <option value="strandseq">Strandseq</option>
+                    </param>
+                </when>
+            </conditional>
+            <param argument="--base-k" type="integer" label="Base k" min="0" optional="true"/>
+            <param argument="--max-k" type="integer" label="Max k" min="0" optional="true"/>
+            <param argument="--window" type="integer" label="Window" min="0" optional="true"/>
+            <param argument="--split-bases" type="integer" label="Split bases" min="0" optional="true"/>
+            <param argument="--split-reads" type="integer" label="Split reads" min="0" optional="true"/>
+            <param argument="--min-ont-length" type="integer" label="Min ont length" min="0" optional="true"/>
+            <param argument="--correct-k-mer-size" type="integer" label="Correct k mer size" min="0" optional="true"/>
+            <param argument="--correct-mer-threshold" type="integer" label="Correct mer threshold" min="0" optional="true"/>
+            <param argument="--correct-min-read-length" type="integer" label="Correct min read length" min="0" optional="true"/>
+            <param argument="--correct-min-overlap-length" type="integer" label=" Correct min overlap length" min="0" optional="true"/>
+            <param argument="--correct-hash-bits" type="integer" label="Correct hash bits" min="0" optional="true"/>
+            <param argument="--seed-min-length" type="integer" label="Seed min length" min="0" optional="true"/>
+            <param argument="--seed-max-length" type="integer" label="Seed max length" min="0" optional="true"/>
+            <param argument="--align-bandwidth" type="integer" label="Align bandwidth" min="0" optional="true"/>
+            <param argument="--score-fraction" type="float" label="Score fraction" min="0" optional="true"/>
+            <param argument="--min-identity" type="float" label="Min identity" min="0" optional="true"/>
+            <param argument="--min-score" type="integer" label="Min score" min="0" optional="true"/>
+            <param argument="--end-clipping" type="integer" label="End clipping" min="0" optional="true"/>
+            <param argument="--incompatible-cutoff" type="float" label="Incompatible cutoff" min="0" optional="true"/>
+        </section>
+    </inputs>
+    <outputs>
+        <!-- BASIC OUTFILES -->
+        <data name="assembly_fasta" format="fasta" label="Verkko on ${on_string}: assembly fasta" from_work_dir="asm/assembly.fasta"/>
+        <data name="hifi_coverage" format="tabular" label="Verkko on ${on_string}: hifi coverage" from_work_dir="asm/assembly.hifi-coverage.csv"/>
+        <data name="homo_gfa" format="gfa1" label="Verkko on ${on_string}: Final graph" from_work_dir="asm/assembly.homopolymer-compressed.gfa"/>
+        <data name="homo_layout" format="tabular" label="Verkko on ${on_string}: Homopolymer layout" from_work_dir="asm/assembly.homopolymer-compressed.layout"/>
+        <data name="homo_noseq" format="gfa1" label="Verkko on ${on_string}: Homopolymer noseq" from_work_dir="asm/assembly.homopolymer-compressed.noseq.gfa"/>
+        <data name="ont_coverage" format="tabular" label="Verkko on ${on_string}: ONT coverage" from_work_dir="asm/assembly.ont-coverage.csv"/>
+        <data name="hifi_corrected" format="fasta.gz" label="Verkko on ${on_string}: Hifi corrected reads" from_work_dir="asm/hifi-corrected.fasta.gz">
+            <filter>algorithm['no_correction'] == False</filter>
+        </data>
+        <!-- HAPLOTYPES OUT -->
+        <data name="unassigned" format="fasta" label="Verkko on ${on_string}: Unassigned sequences" from_work_dir="asm/assembly.unassigned.fasta">
+            <filter>algorithm['hap_kmer']['hap_kmers'] == "yes"</filter>
+        </data>
+        <data name="haplo_fasta_1" format="fasta" label="Verkko on ${on_string}: assembly haplotype fasta 1" from_work_dir="asm/assembly.haplotype1.fasta">
+            <filter>algorithm['hap_kmer']['hap_kmers'] == "yes"</filter>
+        </data>
+        <data name="haplo_fasta_2" format="fasta" label="Verkko on ${on_string}: assembly haplotype fasta 2" from_work_dir="asm/assembly.haplotype2.fasta">
+            <filter>algorithm['hap_kmer']['hap_kmers'] == "yes"</filter>
+        </data>
+    </outputs>
+    <tests>
+    <!-- ALL TESTS SHOULD FAIL, there is no sample data small enough to run in our tests, so it's checking the command itself -->
+        <test expect_exit_code="1" expect_failure="true">
+            <!-- Use nano, no haplotypes -->
+            <param name="hifi" value="test1.fastq.gz,test2.fastq.gz"/>
+            <param name="nano" value="test3.fastq.gz,test4.fastq.gz"/>
+            <section name="algorithm">
+                <param name="base_k" value="1003"/>
+                <param name="max_k" value="15001"/>
+                <param name="window" value="99"/>
+                <param name="split_bases" value="3000000001"/>
+                <param name="split_reads" value="150001"/>
+                <param name="min_ont_length" value="2"/>
+                <param name="correct_k_mer_size" value="29"/>
+                <param name="correct_mer_threshold" value="19"/>
+                <param name="correct_min_read_length" value="4001"/>
+                <param name="correct_min_overlap_length" value="2001"/>
+                <param name="correct_hash_bits" value="24"/>
+                <param name="seed_min_length" value="29"/>
+                <param name="seed_max_length" value="9999"/>
+                <param name="align_bandwidth" value="14"/>
+                <param name="score_fraction" value=".98"/>
+                <param name="min_identity" value=".84"/>
+                <param name="min_score" value="5001"/>
+                <param name="end_clipping" value="99"/>
+                <param name="incompatible_cutoff" value=".14"/>
+            </section>
+            <assert_command>
+                <has_text_matching expression="verkko --hifi"/>
+                <has_text_matching expression="--nano"/>
+                <has_text_matching expression="--base-k 1003"/>
+                <has_text_matching expression="--max-k 15001"/>
+                <has_text_matching expression="--window 99"/>
+                <has_text_matching expression="--split-bases 3000000001"/>
+                <has_text_matching expression="--split-reads 150001"/>
+                <has_text_matching expression="--min-ont-length 2"/>
+                <has_text_matching expression="--correct-k-mer-size 29"/>
+                <has_text_matching expression="--correct-mer-threshold 19"/>
+                <has_text_matching expression="--correct-min-read-length 4001"/>
+                <has_text_matching expression="--correct-min-overlap-length 2001"/>
+                <has_text_matching expression="--correct-hash-bits 24"/>
+                <has_text_matching expression="--seed-min-length 29"/>
+                <has_text_matching expression="--seed-max-length 9999"/>
+                <has_text_matching expression="--align-bandwidth 14 "/>
+                <has_text_matching expression="--score-fraction 0.98"/>
+                <has_text_matching expression="--min-identity 0.84"/>
+                <has_text_matching expression="--min-score 5001"/>
+                <has_text_matching expression="--end-clipping 99"/>
+                <has_text_matching expression=" --incompatible-cutoff 0.14"/>
+            </assert_command>
+            <assert_stderr>
+                <has_text text="job                            count    min threads    max threads"/>
+            </assert_stderr>
+        </test>
+        <test expect_exit_code="1" expect_failure="true">
+            <!-- No nano, use haplo, no canu correction -->
+            <param name="hifi" value="test5.fastq,test1.fastq.gz"/>
+            <section name="algorithm">
+                <conditional name="hap_kmer">
+                    <param name="hap_kmers" value="yes"/>
+                    <param name="h1" value="1.meryldb"/>
+                    <param name="h2" value="2.meryldb"/>
+                    <param name="type" value="trio"/>
+                </conditional>
+                <param name="no-correction" value=""/>
+            </section>
+            <assert_command>
+                <has_text_matching expression="verkko --hifi"/>
+                <has_text_matching expression="--no-nano"/>
+                <has_text_matching expression="--hap-kmers 'h1.meryl' 'h2.meryl' trio"/>
+            </assert_command>
+            <assert_stderr>
+                <has_text text="job                            count    min threads    max threads"/>
+            </assert_stderr>
+        </test>
+    </tests>
+    <help><![CDATA[
+        Verkko is a hybrid genome assembly pipeline developed for telomere-to-telomere assembly of PacBio HiFi and Oxford Nanopore reads. Verkko is Finnish for net, mesh and graph.
+
+        Verkko uses Canu to correct remaining errors in the HiFi reads, builds a multiplex de Bruijn graph using MBG, aligns the Oxford Nanopore reads to the graph using GraphAligner, progressively resolves loops and tangles first with the HiFi reads then with the aligned Oxford Nanopore reads, and finally creates contig consensus sequences using Canu's consensus module. 
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1101/2022.06.24.497523</citation>
+    </citations>
+</tool>