necat: necat.xml comparison

comparison necat.xml @ 0:6ee7eb5821f0 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/necat commit 6946d81de9419c90e9bc4ea2f7bd5e4168dd6dd6

author	iuc
date	Fri, 25 Nov 2022 14:24:27 +0000
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:6ee7eb5821f0
+<tool id="necat" name="necat" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01" license="MIT">
+<description>Error correction and de-novo assembly for ONT Nanopore reads</description>
+<macros>
+<import>macros.xml</import>
+</macros>
+<xrefs>
+<xref type="bio.tools">necat</xref>
+</xrefs>
+<requirements>
+<requirement type="package" version="@TOOL_VERSION@">necat</requirement>
+</requirements>
+<command detect_errors="exit_code"><![CDATA[
+## helper function
+#def make_filename($i, $input_param)
+#set ext = $input_param.extension
+#if $ext == "fastqsanger"
+#set $ext = "fastq"
+#end if
+#set filename = "reads_" + str($i) + "." + $ext
+#return $filename
+#end def
+## push each input file and everything in input collections into read_list.txt
+#set i = 1
+#for input in $input_fastqs
+#set filename = $make_filename($i, $input)
+cp '$input' $filename
+&& echo $filename >> read_list.txt &&
+#set i = $i + 1
+#end for
+## #for $i, $input in enumerate($input_fastqs):
+##     #set filename = 'reads_${i}.$input.ext'
+##     ln -s '$input' $filename &&
+##     echo $filename >> read_list.txt &&
+## #end for
+## necat commands
+necat correct '${job_configfile}'
+#if $assembly.should_assemble == "yes":
+&& necat assemble '${job_configfile}'
+&& necat bridge '${job_configfile}'
+#end if
+]]></command>
+<configfiles>
+<expand macro="job_conf" />
+</configfiles>
+<inputs>
+<param name="input_fastqs" type="data" format="fastq,fastq.gz,fasta,fasta.gz" multiple="true" label="Input reads" help="Input read files (FASTQ or FASTA). To select more than one file or collection from your history, use the 'ctrl' key" />
+<param name="genome_size" type="integer" value="" min="1" max="100000000000" label="Genome size" help="Estimated size of genome (bp)" />
+<param name="min_read_length" type="integer" value="1000" min="1" max="10000000" label="Min read length" help="Minimum length for input reads" />
+<param name="correction_coverage" type="integer" value="40" min="1" max="10000" label="Correction coverage" help="Number of reads to correct in terms of genome coverage. For a 4Gb genome, setting correction coverage = 10 will correct the longest 40Gb worth of reads from the input fastq. " />
+<conditional name="assembly">
+<param name="should_assemble" type="select" label="Assembly">
+<option value="no" selected="true">Don't perform assembly</option>
+<option value="yes">Perform assembly on corrected reads</option>
+</param>
+<when value="no" />
+<when value="yes">
+<param name="assembly_coverage" type="integer" value="30" min="1" max="10000" label="Assembly coverage" help="Number of reads to use in genome assembly in terms of genome coverage" />
+<param name="polish_contigs" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Polish contigs" help="Polish contigs as final step after briding" />
+</when>
+</conditional>
+<section name="adv" title="Advanced options" expanded="false" help="Warning: only change these if you really know what you are doing">
+<expand macro="overlap_sensitive_options" />
+<expand macro="consensus_sensitive_options" />
+<expand macro="overlap_fast_options" />
+<expand macro="consensus_fast_options" />
+<expand macro="trimming_overlap_options" />
+<expand macro="assembly_overlap_options" />
+<expand macro="assembly_overlap_filtering" />
+<expand macro="contig_assembly" />
+<expand macro="contig_bridging" />
+</section>
+</inputs>
+<outputs>
+<data name="out_reads" format="fasta.gz" from_work_dir="project/1-consensus/cns_final.fasta.gz" label="${tool.name} on ${on_string}: corrected reads" />
+<data name="out_assembly" format="fasta" from_work_dir="project/6-bridge_contigs/polished_contigs.fasta" label="${tool.name} on ${on_string}: bridged assembly">
+<filter>assembly['should_assemble'] == 'yes' and not assembly['polish_contigs']</filter>
+</data>
+<data name="out_polished_assembly" format="fasta" from_work_dir="project/6-bridge_contigs/bridged_contigs.fasta" label="${tool.name} on ${on_string}: polished assembly">
+<filter>assembly['should_assemble'] == 'yes' and assembly['polish_contigs']</filter>
+</data>
+</outputs>
+<tests>
+<!-- single input fastq -->
+<test expect_num_outputs="2">
+<param name="input_fastqs" value="test1.fa" />
+<param name="genome_size" value="13000" />
+<param name="min_read_length" value="1000" />
+<param name="correction_coverage" value="40" />
+<conditional name="assembly">
+<param name="should_assemble" value="yes" />
+<param name="assembly_coverage" value="30"/>
+<param name="polish_contigs" value="true"/>
+</conditional>
+<output name="out_reads" ftype="fasta.gz">
+<assert_contents>
+<has_size value="75000" delta="2000" />
+</assert_contents>
+</output>
+<output name="out_polished_assembly" ftype="fasta">
+<assert_contents>
+<has_line line="&#62;bctg00000000 000000F" />
+<has_size value="13000" delta="1000" />
+</assert_contents>
+</output>
+</test>
+<!-- multiple input files of different format -->
+<test expect_num_outputs="2">
+<param name="input_fastqs" value="test1_head.fastq,test1_tail.fasta" />
+<param name="genome_size" value="13000" />
+<param name="min_read_length" value="1000" />
+<param name="correction_coverage" value="40" />
+<conditional name="assembly">
+<param name="should_assemble" value="yes" />
+<param name="assembly_coverage" value="30"/>
+<param name="polish_contigs" value="true"/>
+</conditional>
+<output name="out_reads" ftype="fasta.gz">
+<assert_contents>
+<has_size value="29000" delta="2000" />
+</assert_contents>
+</output>
+<output name="out_polished_assembly" ftype="fasta">
+<assert_contents>
+<has_line line="&#62;bctg00000000 000000F" />
+<has_size value="13000" delta="1000" />
+</assert_contents>
+</output>
+</test>
+<!-- advanced params 1 -->
+<test expect_num_outputs="2">
+<param name="input_fastqs" value="test1.fa" />
+<param name="genome_size" value="13000" />
+<param name="min_read_length" value="1000" />
+<param name="correction_coverage" value="40" />
+<conditional name="assembly">
+<param name="should_assemble" value="yes" />
+<param name="assembly_coverage" value="30"/>
+<param name="polish_contigs" value="true"/>
+</conditional>
+<section name="adv">
+<section name="ovs">
+<param name="n" value="600" />
+<param name="k" value="14" />
+<param name="q" value="600" />
+<param name="z" value="15" />
+<param name="b" value="2500" />
+<param name="a" value="800" />
+<param name="d" value="0.25" />
+<param name="e" value="0.4" />
+<param name="m" value="600" />
+</section>
+</section>
+<output name="out_reads" ftype="fasta.gz">
+<assert_contents>
+<has_size value="75000" delta="2000" />
+</assert_contents>
+</output>
+<output name="out_polished_assembly" ftype="fasta">
+<assert_contents>
+<has_line line="&#62;bctg00000000 000000F" />
+<has_size value="13000" delta="1000" />
+</assert_contents>
+</output>
+</test>
+<!-- advanced params 2 -->
+<test expect_num_outputs="2">
+<param name="input_fastqs" value="test1.fa" />
+<param name="genome_size" value="13000" />
+<param name="min_read_length" value="1000" />
+<param name="correction_coverage" value="40" />
+<conditional name="assembly">
+<param name="should_assemble" value="yes" />
+<param name="assembly_coverage" value="30"/>
+<param name="polish_contigs" value="true"/>
+</conditional>
+<section name="adv">
+<section name="fol">
+<param name="min_length" value="2000" />
+<param name="max_length" value="200000" />
+<param name="min_aligned_length" value="2000" />
+<param name="max_overhang" value="20000" />
+<param name="min_coverage" value="5" />
+<param name="bestn" value="5" />
+<param name="overhang_local_deviation1" value="5" />
+</section>
+</section>
+<output name="out_reads" ftype="fasta.gz">
+<assert_contents>
+<has_size value="75000" delta="2000" />
+</assert_contents>
+</output>
+<output name="out_polished_assembly" ftype="fasta">
+<assert_contents>
+<has_line line="&#62;bctg00000000 000000F" />
+<has_size value="13000" delta="1000" />
+</assert_contents>
+</output>
+</test>
+<!-- advanced params 3 -->
+<test expect_num_outputs="2">
+<param name="input_fastqs" value="test1.fa" />
+<param name="genome_size" value="13000" />
+<param name="min_read_length" value="1000" />
+<param name="correction_coverage" value="40" />
+<conditional name="assembly">
+<param name="should_assemble" value="yes" />
+<param name="assembly_coverage" value="30"/>
+<param name="polish_contigs" value="true"/>
+</conditional>
+<section name="adv">
+<section name="fa">
+<param name="min_length" value="1000" />
+<param name="min_identity" value="40" />
+<param name="min_contig_length" value="600" />
+<param name="select_branch" value="true" />
+</section>
+</section>
+<output name="out_reads" ftype="fasta.gz">
+<assert_contents>
+<has_size value="75000" delta="2000" />
+</assert_contents>
+</output>
+<output name="out_polished_assembly" ftype="fasta">
+<assert_contents>
+<has_line line="&#62;bctg00000000 000000F" />
+<has_size value="13000" delta="1000" />
+</assert_contents>
+</output>
+</test>
+<!-- advanced params 4 -->
+<test expect_num_outputs="2">
+<param name="input_fastqs" value="test1.fa" />
+<param name="genome_size" value="13000" />
+<param name="min_read_length" value="1000" />
+<param name="correction_coverage" value="40" />
+<conditional name="assembly">
+<param name="should_assemble" value="yes" />
+<param name="assembly_coverage" value="30"/>
+<param name="polish_contigs" value="true"/>
+</conditional>
+<section name="adv">
+<section name="fcb">
+<param name="read_min_length" value="4000" />
+<param name="ctg_min_length" value="1000" />
+<param name="ctg2ctg_min_identity" value="90" />
+<param name="read2ctg_min_identity" value="60" />
+<param name="min_contig_length" value="1000" />
+</section>
+</section>
+<output name="out_reads" ftype="fasta.gz">
+<assert_contents>
+<has_size value="75000" delta="2000" />
+</assert_contents>
+</output>
+<output name="out_polished_assembly" ftype="fasta">
+<assert_contents>
+<has_line line="&#62;bctg00000000 000000F" />
+<has_size value="13000" delta="1000" />
+</assert_contents>
+</output>
+</test>
+</tests>
+<help><![CDATA[
+NECAT
+.....
+**What it does**
+| NECAT performs error correction to remove complex errors in nanopore reads. It can also optionally de novo assembly.
+| After assembly it is recommended to use MEDAKA for long-read polishing, then NextPolish for short-read polishing.
+|
+| Github: https://github.com/xiaochuanle/NECAT
+|
+**Input**
+- One or more files or collections containing sequence reads (fastq / fasta)
+**Output**
+- Corrected reads (fasta)
+- Genome assembly (fasta) (Optional)
+|
+**Advanced Settings**
+| Necat runs multiple subprograms in an assembly pipeline to create its final output.
+| Each subprogram does a specific task, then hands its output to the next.
+| The subprograms are listed in order below, alongside the settings which can be configured:
+|
+*oc2pmov*
+| Finds overlaps between raw-reads
+| *Overlap Sensitive Options & Overlap Fast Options*
+|
+-k <Integer>    kmer size
+-z <Integer>    scan window size
+-q <Integer>    kmer occurs > q times will be ignored
+-b <Integer>    block size
+-n <Integer>    number of candidates
+-a <Integer>    min align length
+-d <Real>       ddf score cutoff
+-e <Real>       sequencing error
+-m <Integer>    number of output
+|
+| DEFAULT OPTIONS:
+| -k 15 -z 10 -q 500 -b 2000 -s 3 -n 500 -a 500 -d 0.250000 -e 0.500000 -m 500 -t 1
+|
+|
+*oc2cns*
+| Creates consensus reads from raw-read overlaps
+| *Consensus Sensitive Options & Consensus Fast Options*
+|
+-a <Integer>    align length cutoff
+-x <Integer>    minimal coverage
+-y <Integer>    maximal coverage
+-l <Integer>    minimal length of corrected reads.
+-f <0 or 1>     full consensus or not: 1 = yes, 0 = no
+-e <Real>       sequencing error
+-p <Real>       minimal mapping ratio
+-r <0 or 1>     rescue long indels or not: 1 = yes, 0 = no
+-u <0 or 1>     use dynamic or fixed ident cutoff: 1 = fixed, 0 = dynamic
+|
+| DEFAULT OPTIONS:
+| -a 400 -x 4 -y 12 -l 500 -f 0 -e 0.500000 -p 0.800000 -t 1 -r 0 -u 0 -s 0
+|
+|
+*oc2asmpm*
+| Identifies corrected-read overlaps for assembly
+| *Trimming Overlap Options & Assembly Overlap Options*
+|
+-k <Integer>    kmer size
+-z <Integer>    scan window size
+-q <Integer>    kmer occurs > q times will be ignored
+-b <Integer>    block size
+-n <Integer>    number of candidates
+-a <Integer>    min align length
+-d <Real>       ddf score cutoff
+-e <Real>       sequencing error
+-m <Integer>    number of output
+|
+|
+*fsa_ol_filter*
+| Filters out low-quality corrected-read overlaps for assembly
+| *Assembly Overlap Filtering Options*
+|
+--min_length=INT                      minimum length of reads. default: 2500
+--max_length=INT                      maximum length of reads. default: 2147483647
+--min_identity=DOUBLE                 minimum identity of overlaps default: -1
+--min_aligned_length=INT              minimum aligned length of overlaps default: 2500
+--max_overhang=INT                    maximum overhang of overlaps, negative number = determined by the program. default: -1
+--min_coverage=INT                    minimum base coverage, negative number = determined by the program. default: -1
+--max_coverage=INT                    maximum base coverage, negative number = determined by the program default: -1
+--max_diff_coverage=INT               maximum difference of base coverage, negative number = determined by the program default: -1
+--coverage_discard=DOUBLE             discard ratio of base coverage. If max_coverage or max_diff_coverage is negative, it will be reset to (100-coverage_discard)th percentile. default: 0.01
+--bestn=INT                           output best n overlaps on 5' or 3' end for each read.  default: 10
+--genome_size=INT                     genome size. It determines the maximum length of reads with coverage together default: 0
+--coverage=INT                        coverage. It determines the maximum length of reads with genome_size together default: 40
+--identity_global_deviation1=DOUBLE   If min_identity < 0, min_identity is set to min(m, deviation1) - 1.4826*mad*deviation2 default: 98
+--identity_global_deviation2=DOUBLE   If min_identity < 0, min_identity is set to min(m, deviation1) - 1.4826*mad*deviation2 default: 6
+--overhang_global_deviation1=DOUBLE   If max_overhang < 0, max_overhang is set to max(m, deviation1) + 1.4826*mad*deviation2 default: 30
+--overhang_global_deviation2=DOUBLE   If max_overhang < 0, max_overhang is set to max(m, deviation1) + 1.4826*mad*deviation2 default: 6
+--identity_local_deviation1=DOUBLE    The local threshold of identity is set to min(m, deviation1) - 1.4826*mad*deviation2 default: 99
+--identity_local_deviation2=DOUBLE    The local threshold of identity is set to min(m, deviation1) - 1.4826*mad*deviation2 default: 6
+--overhang_local_deviation1=DOUBLE    The local threshold of overhang is set to max(m, deviation1) + 1.253*mad*deviation2 default: 10
+--overhang_local_deviation2=DOUBLE    The local threshold of overhang is set to max(m, deviation1) + 1.253*mad*deviation2 default: 6
+--identity_local_condition=INT        Local filtering conditions. 0 = overlap idenitity < threshold, 1 = overlap idenitity < threshold and query identity >= target identity default: 0
+--local_low_coverage=INT              If the coverage of reads is less than local_low_coverage, min_identity and max_overhang are used to filter out low-quality overlaps. Otherwise, the local threshold is used. default: 25
+|
+|
+*fsa_assemble*
+| Constructs contigs from filtered overlaps
+| *Contig Assembly Options*
+|
+--min_length=INT            minimum length of reads default: 0
+--min_identity=DOUBLE       minimum identity of overlaps default: 0
+--min_aligned_length=INT    minimum aligned length of overlaps default: 0
+--min_contig_length=INT     minimum length of contigs default: 500
+--select_branch=BOOL        select the most probable branch default: "no"
+--max_spur_length=INT       branches less the threshod are treated as spurs default: 50000
+|
+|
+*fsa_ctg_bridge*
+| Bridges contigs using input long raw-reads
+| *Contig Bridging Options*
+|
+--read_min_length=INT               minimum rawread length default: 5000
+--ctg_min_length=INT                minimum contig length default: 500
+--ctg2ctg_min_identity=DOUBLE       minimum identity of overlaps between contigs default: 95
+--ctg2ctg_max_overhang=INT          maximum overhang of overlaps between contigs default: 100
+--ctg2ctg_min_aligned_length=INT    minimum aligned length of overlaps between contigs default: 2000
+--read2ctg_min_identity=DOUBLE      minimum identity of overlaps between rawreads and contigs default: 80
+--read2ctg_max_overhang=INT         maximum overhang of overlaps between rawreads and contigs default: 500
+--read2ctg_min_aligned_length=INT   minimum aligned length of overlaps between rawreads and contigs default: 5000
+--read2ctg_min_coverage=INT         minimum coverage of links between rawreads and contigs default: 3
+--min_contig_length=INT             minimum length of bridged contig default: 500
+--select_branch=BOOL                select the most probable branch default: "no"
+--window_size=INT                   threshold is used to group rawreads that bridge contigs default: 1000
+|
+]]></help>
+<expand macro="citations" />
+</tool>

Mercurial > repos > iuc > necat

comparison necat.xml @ 0:6ee7eb5821f0 draft default tip