Mercurial > repos > iuc > unicycler
changeset 9:6e26c9afd301 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/unicycler commit d95fed0458171a099928d51e363865783f89717f
author | iuc |
---|---|
date | Sat, 24 Sep 2022 21:14:02 +0000 |
parents | 9e3e80cc4ad4 |
children | d10bdad2fd17 |
files | unicycler.xml |
diffstat | 1 files changed, 144 insertions(+), 63 deletions(-) [+] |
line wrap: on
line diff
--- a/unicycler.xml Wed Nov 18 20:26:04 2020 +0000 +++ b/unicycler.xml Sat Sep 24 21:14:02 2022 +0000 @@ -1,7 +1,11 @@ -<tool id="unicycler" name="Create assemblies with Unicycler" version="@VERSION@.0" profile="20.09"> +<tool id="unicycler" name="Create assemblies with Unicycler" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.09"> <macros> - <token name="@VERSION@">0.4.8</token> + <token name="@TOOL_VERSION@">0.5.0</token> + <token name="@VERSION_SUFFIX@">0</token> </macros> + <xrefs> + <xref type="bio.tools">unicycler</xref> + </xrefs> <edam_topics> <edam_topic>topic_0196</edam_topic> </edam_topics> @@ -9,7 +13,8 @@ <edam_operation>operation_0525</edam_operation> </edam_operations> <requirements> - <requirement type="package" version="@VERSION@">unicycler</requirement> + <requirement type="package" version="@TOOL_VERSION@">unicycler</requirement> + <requirement type="package" version="1.15.1">samtools</requirement> </requirements> <command detect_errors="exit_code"><![CDATA[ ## Preparing files @@ -57,13 +62,10 @@ #end if ln -s '${long}' '$lr' && #end if -## Get location for pilon installation -pilon=`pilon --jar_dir` && ## Running Unicycler unicycler -t "\${GALAXY_SLOTS:-4}" -o ./ --verbosity 3 ---pilon_path \$pilon #if str( $paired_unpaired.fastq_input_selector ) == "paired" -1 '$fq1' -2 '$fq2' @@ -86,7 +88,6 @@ #end if ## Spades Options section ## ---------------------------------------------------------- -$spades.no_correct --min_kmer_frac '$spades.min_kmer_frac' --max_kmer_frac '$spades.max_kmer_frac' #if str($spades.kmers) != '' @@ -105,12 +106,6 @@ #end if --start_gene_id '$rotation.start_gene_id' --start_gene_cov '$rotation.start_gene_cov' -## Pilon Options section -## ---------------------------------------------------------- -$pilon.no_pilon -#if str($pilon.min_polish_size) != '' - --min_polish_size '$pilon.min_polish_size' -#end if ## Graph cleaning Options sdection ## ---------------------------------------------------------- --min_component_size '$graph_clean.min_component_size' @@ -124,6 +119,15 @@ #if str($lr_align.low_score) != '' --low_score '$lr_align.low_score' #end if +$lr_align.no_simple_bridges +--keep $keep +#if $keep != '0' + && mkdir 'spades_graphs' + && mv 00*gfa './spades_graphs/' +#end if +#if $keep == '2' and $long + && samtools view -@ "\${GALAXY_SLOTS:-4}" -u 'read_alignment/long_read_alignments.sam' | samtools sort -@ "\${GALAXY_SLOTS:-4}" -o 'read_alignment/long_read_alignments.bam' +#end if ]]></command> <inputs> <conditional name="paired_unpaired"> @@ -160,8 +164,6 @@ <param argument="--min_anchor_seg_len" type="integer" min="0" optional="true" label="Unicycler will not use segments shorter than this as scaffolding anchors"/> <section name="spades" expanded="False" title="SPAdes options" help="Unicycler uses SPAdes to construct assembly graphs. You can modify some of the SPAdes settings here. Use this ONLY if you know what you are doing!"> - <param argument="--no_correct" type="boolean" checked="false" truevalue="--no_correct" falsevalue="" - label="Skip SPAdes error correction step" help="This option turns off SPAdes error correction. Generally it is highly recommended to use correction."/> <param argument="--min_kmer_frac" type="float" min="0" max="1" value="0.2" label="Lowest k-mer size for SPAdes assembly, expressed as a fraction of the read length"/> <param argument="--max_kmer_frac" type="float" min="0" max="1" value="0.95" @@ -183,11 +185,6 @@ <param argument="--start_gene_id" type="float" min="0" max="100" value="90" label="The minimum required BLAST percent identity for a start gene search"/> <param argument="--start_gene_cov" type="float" min="0" max="100" value="95" label="The minimum required BLAST percent coverage for a start gene search"/> </section> - <section name="pilon" title="Pilon options" expanded="false"> - <param argument="--no_pilon" type="boolean" checked="false" truevalue="--no_pilon" falsevalue="" - label="Do not use Pilon to polish the final assembly." help="Unicycler uses Pilon tool for polishing final assembly."/> - <param argument="--min_polish_size" type="integer" min="0" value="1000" label="Contigs shorter than this value (bp) will not be polished using Pilon"/> - </section> <section name="graph_clean" expanded="false" title="Graph cleaning options" help="These options control the removal of small leftover sequences after bridging is complete."> <param argument="--min_component_size" type="integer" min="0" value="1000" @@ -201,14 +198,27 @@ <param argument="--scores" type="text" value="3,-6,-5,-2" label="Comma-delimited string of alignment scores: match, mismatch, gap open, gap extend"/> <param argument="--low_score" optional="true" type="integer" value="" label="Score threshold - alignments below this are considered poor" help="default = set automatically"/> + <param argument="--no_simple_bridges" type="boolean" truevalue="--no_simple_bridges" falsevalue="" checked="false" label="Simple long-read bridging" help="Default: No" /> </section> + <param argument="--keep" type="select" label="Outputs to keep" help="Level of file retention. Default: 1"> + <option value="0">0: only keep final files</option> + <option value="1" selected="true">1: save graphs at main checkpoints</option> + <option value="2">2: also keep SAM</option> + </param> </inputs> <outputs> <data name="assembly_graph" format="gfa1" from_work_dir="assembly.gfa" label="${tool.name} on ${on_string}: Final Assembly Graph" /> <data name="assembly" format="fasta" from_work_dir="assembly.fasta" label="${tool.name} on ${on_string}: Final Assembly"/> + <collection name="spades_collection" type="list" label="${tool.name} on ${on_string}: SPAdes graphs"> + <discover_datasets pattern="__designation_and_ext__" format="gfa1" directory="spades_graphs"/> + <filter>keep != "0"</filter> + </collection> + <data name="bam_file" format="bam" from_work_dir="read_alignment/long_read_alignments.bam" label="${tool.name} on ${on_string}: Long read alignments BAM"> + <filter>keep == "2" and long</filter> + </data> </outputs> <tests> - <test> + <test expect_num_outputs="2"> <conditional name="paired_unpaired"> <param name="fastq_input_selector" value="paired" /> <param name="fastq_input1" value="phix_f.fq.gz" ftype="fastqsanger" /> @@ -218,7 +228,6 @@ <param name="min_fasta_length" value="100"/> <param name="linear_seqs" value="0"/> <section name="spades"> - <param name="no_correct" value="true"/> <param name="min_kmer_frac" value="0.2"/> <param name="max_kmer_frac" value="0.95"/> <param name="kmer_count" value="10"/> @@ -229,10 +238,6 @@ <param name="start_gene_id" value="90"/> <param name="start_gene_cov" value="95"/> </section> - <section name="pilon"> - <param name="no_pilon" value=""/> - <param name="min_polish_size" value="1000"/> - </section> <section name="graph_clean"> <param name="min_component_size" value="1000"/> <param name="min_dead_end_size" value="1000"/> @@ -240,6 +245,7 @@ <section name="lr_align"> <param name="scores" value="3,-6,-5,-2"/> </section> + <param name="keep" value="0"/> <output name="assembly_graph" ftype="gfa1"> <assert_contents> <has_line_matching expression="S\t1\t[ATCG]{5386,5386}\tLN:i:5386\tdp:f:1.0"/> @@ -262,7 +268,7 @@ during the minimap step (which seems to be compiled C code). A gist of the log can be found at: https://gist.github.com/jmchilton/b411b695170c1daea6589f5d76e326cb. --> - <test> + <test expect_num_outputs="2"> <conditional name="paired_unpaired"> <param name="fastq_input_selector" value="paired" /> <param name="fastq_input1" value="phix_f.fq.gz" ftype="fastqsanger.gz" /> @@ -273,7 +279,6 @@ <param name="min_fasta_length" value="100"/> <param name="linear_seqs" value="0"/> <section name="spades"> - <param name="no_correct" value="true"/> <param name="min_kmer_frac" value="0.2"/> <param name="max_kmer_frac" value="0.95"/> <param name="kmer_count" value="10"/> @@ -284,10 +289,6 @@ <param name="start_gene_id" value="90"/> <param name="start_gene_cov" value="95"/> </section> - <section name="pilon"> - <param name="no_pilon" value=""/> - <param name="min_polish_size" value="1000"/> - </section> <section name="graph_clean"> <param name="min_component_size" value="1000"/> <param name="min_dead_end_size" value="1000"/> @@ -295,6 +296,7 @@ <section name="lr_align"> <param name="scores" value="3,-6,-5,-2"/> </section> + <param name="keep" value="0"/> <output name="assembly_graph" ftype="gfa1"> <assert_contents> <has_line_matching expression="S\t1\t[ATCG]{5386,5386}\tLN:i:5386\tdp:f:1.0"/> @@ -306,7 +308,7 @@ </assert_contents> </output> </test> - <test> + <test expect_num_outputs="2"> <conditional name="paired_unpaired"> <param name="fastq_input_selector" value="paired_collection"/> <param name="fastq_input1"> @@ -320,7 +322,6 @@ <param name="min_fasta_length" value="100"/> <param name="linear_seqs" value="0"/> <section name="spades"> - <param name="no_correct" value="true"/> <param name="min_kmer_frac" value="0.2"/> <param name="max_kmer_frac" value="0.95"/> <param name="kmer_count" value="10"/> @@ -331,10 +332,6 @@ <param name="start_gene_id" value="90"/> <param name="start_gene_cov" value="95"/> </section> - <section name="pilon"> - <param name="no_pilon" value="true"/> - <param name="min_polish_size" value="1000"/> - </section> <section name="graph_clean"> <param name="min_component_size" value="1000"/> <param name="min_dead_end_size" value="1000"/> @@ -342,6 +339,112 @@ <section name="lr_align"> <param name="scores" value="3,-6,-5,-2"/> </section> + <param name="keep" value="0"/> + <output name="assembly_graph" ftype="gfa1"> + <assert_contents> + <has_line_matching expression="S\t1\t[ATCG]{5386,5386}\tLN:i:5386\tdp:f:1.0"/> + </assert_contents> + </output> + <output name="assembly" ftype="fasta"> + <assert_contents> + <has_text text="length=5386" /> + </assert_contents> + </output> + </test> + <test expect_num_outputs="2"> + <conditional name="paired_unpaired"> + <param name="fastq_input_selector" value="none"/> + </conditional> + <param name="min_anchor_seg_len" value="10"/> + <section name="spades"> + <param name="kmers" value="21,23"/> + </section> + <param name="long" value="only_long.fasta" ftype="fasta" /> + <param name="keep" value="0"/> + <output name="assembly_graph" ftype="gfa1"> + <assert_contents> + <has_text text="S" /> + </assert_contents> + </output> + <output name="assembly" ftype="fasta"> + <assert_contents> + <has_text text=">1" /> + </assert_contents> + </output> + </test> + <!-- Test keep value = 1 --> + <test expect_num_outputs="3"> + <conditional name="paired_unpaired"> + <param name="fastq_input_selector" value="paired" /> + <param name="fastq_input1" value="phix_f.fq.gz" ftype="fastqsanger" /> + <param name="fastq_input2" value="phix_r.fq.gz" ftype="fastqsanger" /> + </conditional> + <param name="mode" value="normal" /> + <param name="keep" value="1"/> + <output name="assembly_graph" ftype="gfa1"> + <assert_contents> + <has_line_matching expression="S\t1\t[ATCG]{5386,5386}\tLN:i:5386\tdp:f:1.0"/> + </assert_contents> + </output> + <output name="assembly" ftype="fasta"> + <assert_contents> + <has_text text="length=5386" /> + </assert_contents> + </output> + <output_collection name="spades_collection" type="list" count="14"> + <element name="001_spades_graph_k027"> + <assert_contents> + <has_text text="TTGAATGCCACCGGAGGCGGCTTTTTGACCGCCTCCAAAC"/> + </assert_contents> + </element> + </output_collection> + </test> + <!-- Test keep value = 2 --> + <test expect_num_outputs="4"> + <conditional name="paired_unpaired"> + <param name="fastq_input_selector" value="paired" /> + <param name="fastq_input1" value="phix_f.fq.gz" ftype="fastqsanger" /> + <param name="fastq_input2" value="phix_r.fq.gz" ftype="fastqsanger" /> + </conditional> + <param name="long" value="onp.fa" ftype="fasta" /> + <param name="mode" value="normal" /> + <param name="keep" value="2"/> + <output name="assembly_graph" ftype="gfa1"> + <assert_contents> + <has_line_matching expression="S\t1\t[ATCG]{5386,5386}\tLN:i:5386\tdp:f:1.0"/> + </assert_contents> + </output> + <output name="assembly" ftype="fasta"> + <assert_contents> + <has_text text="length=5386" /> + </assert_contents> + </output> + <output_collection name="spades_collection" type="list" count="14"> + <element name="001_spades_graph_k027"> + <assert_contents> + <has_text text="TTGAATGCCACCGGAGGCGGCTTTTTGACCGCCTCCAAAC"/> + </assert_contents> + </element> + </output_collection> + <output name="bam_file" ftype="bam"> + <assert_contents> + <has_size value="2084" delta="100"/> + </assert_contents> + </output> + </test> + <!-- Test no simple bridges option --> + <test expect_num_outputs="2"> + <conditional name="paired_unpaired"> + <param name="fastq_input_selector" value="paired" /> + <param name="fastq_input1" value="phix_f.fq.gz" ftype="fastqsanger" /> + <param name="fastq_input2" value="phix_r.fq.gz" ftype="fastqsanger" /> + </conditional> + <param name="long" value="onp.fa" ftype="fasta" /> + <param name="mode" value="normal" /> + <param name="keep" value="0"/> + <section name="lr_align"> + <param name="no_simple_bridges" value="true"/> + </section> <output name="assembly_graph" ftype="gfa1"> <assert_contents> <has_line_matching expression="S\t1\t[ATCG]{5386,5386}\tLN:i:5386\tdp:f:1.0"/> @@ -352,26 +455,9 @@ <has_text text="length=5386" /> </assert_contents> </output> - </test> - <test> - <conditional name="paired_unpaired"> - <param name="fastq_input_selector" value="none"/> - </conditional> - <param name="min_anchor_seg_len" value="10"/> - <section name="spades"> - <param name="kmers" value="21,23"/> - </section> - <param name="long" value="only_long.fasta" ftype="fasta" /> - <output name="assembly_graph" ftype="gfa1"> - <assert_contents> - <has_text text="S" /> - </assert_contents> - </output> - <output name="assembly" ftype="fasta"> - <assert_contents> - <has_text text=">1" /> - </assert_contents> - </output> + <assert_command> + <has_text text="--no_simple_bridges" /> + </assert_command> </test> </tests> <help><![CDATA[ @@ -436,11 +522,6 @@ .. _`Hamming graph`: https://en.wikipedia.org/wiki/Hamming_graph .. _`Hamming distance`: https://en.wikipedia.org/wiki/Hamming_distance -This following option turns error correction on and off:: - - --no_correct - Skip SPAdes error correction step - (default: conduct SPAdes error correction) -----