Mercurial > repos > iuc > necat
diff necat.xml @ 0:6ee7eb5821f0 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/necat commit 6946d81de9419c90e9bc4ea2f7bd5e4168dd6dd6
author | iuc |
---|---|
date | Fri, 25 Nov 2022 14:24:27 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/necat.xml Fri Nov 25 14:24:27 2022 +0000 @@ -0,0 +1,440 @@ +<tool id="necat" name="necat" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01" license="MIT"> + <description>Error correction and de-novo assembly for ONT Nanopore reads</description> + <macros> + <import>macros.xml</import> + </macros> + <xrefs> + <xref type="bio.tools">necat</xref> + </xrefs> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">necat</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + ## helper function + #def make_filename($i, $input_param) + #set ext = $input_param.extension + #if $ext == "fastqsanger" + #set $ext = "fastq" + #end if + #set filename = "reads_" + str($i) + "." + $ext + #return $filename + #end def + + ## push each input file and everything in input collections into read_list.txt + #set i = 1 + #for input in $input_fastqs + #set filename = $make_filename($i, $input) + cp '$input' $filename + && echo $filename >> read_list.txt && + #set i = $i + 1 + #end for + + ## #for $i, $input in enumerate($input_fastqs): + ## #set filename = 'reads_${i}.$input.ext' + ## ln -s '$input' $filename && + ## echo $filename >> read_list.txt && + ## #end for + + ## necat commands + necat correct '${job_configfile}' + #if $assembly.should_assemble == "yes": + && necat assemble '${job_configfile}' + && necat bridge '${job_configfile}' + #end if + ]]></command> + <configfiles> + <expand macro="job_conf" /> + </configfiles> + <inputs> + <param name="input_fastqs" type="data" format="fastq,fastq.gz,fasta,fasta.gz" multiple="true" label="Input reads" help="Input read files (FASTQ or FASTA). To select more than one file or collection from your history, use the 'ctrl' key" /> + + <param name="genome_size" type="integer" value="" min="1" max="100000000000" label="Genome size" help="Estimated size of genome (bp)" /> + <param name="min_read_length" type="integer" value="1000" min="1" max="10000000" label="Min read length" help="Minimum length for input reads" /> + <param name="correction_coverage" type="integer" value="40" min="1" max="10000" label="Correction coverage" help="Number of reads to correct in terms of genome coverage. For a 4Gb genome, setting correction coverage = 10 will correct the longest 40Gb worth of reads from the input fastq. " /> + <conditional name="assembly"> + <param name="should_assemble" type="select" label="Assembly"> + <option value="no" selected="true">Don't perform assembly</option> + <option value="yes">Perform assembly on corrected reads</option> + </param> + <when value="no" /> + <when value="yes"> + <param name="assembly_coverage" type="integer" value="30" min="1" max="10000" label="Assembly coverage" help="Number of reads to use in genome assembly in terms of genome coverage" /> + <param name="polish_contigs" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Polish contigs" help="Polish contigs as final step after briding" /> + </when> + </conditional> + + <section name="adv" title="Advanced options" expanded="false" help="Warning: only change these if you really know what you are doing"> + <expand macro="overlap_sensitive_options" /> + <expand macro="consensus_sensitive_options" /> + <expand macro="overlap_fast_options" /> + <expand macro="consensus_fast_options" /> + <expand macro="trimming_overlap_options" /> + <expand macro="assembly_overlap_options" /> + <expand macro="assembly_overlap_filtering" /> + <expand macro="contig_assembly" /> + <expand macro="contig_bridging" /> + </section> + </inputs> + <outputs> + <data name="out_reads" format="fasta.gz" from_work_dir="project/1-consensus/cns_final.fasta.gz" label="${tool.name} on ${on_string}: corrected reads" /> + <data name="out_assembly" format="fasta" from_work_dir="project/6-bridge_contigs/polished_contigs.fasta" label="${tool.name} on ${on_string}: bridged assembly"> + <filter>assembly['should_assemble'] == 'yes' and not assembly['polish_contigs']</filter> + </data> + <data name="out_polished_assembly" format="fasta" from_work_dir="project/6-bridge_contigs/bridged_contigs.fasta" label="${tool.name} on ${on_string}: polished assembly"> + <filter>assembly['should_assemble'] == 'yes' and assembly['polish_contigs']</filter> + </data> + </outputs> + <tests> + <!-- single input fastq --> + <test expect_num_outputs="2"> + <param name="input_fastqs" value="test1.fa" /> + <param name="genome_size" value="13000" /> + <param name="min_read_length" value="1000" /> + <param name="correction_coverage" value="40" /> + <conditional name="assembly"> + <param name="should_assemble" value="yes" /> + <param name="assembly_coverage" value="30"/> + <param name="polish_contigs" value="true"/> + </conditional> + <output name="out_reads" ftype="fasta.gz"> + <assert_contents> + <has_size value="75000" delta="2000" /> + </assert_contents> + </output> + <output name="out_polished_assembly" ftype="fasta"> + <assert_contents> + <has_line line=">bctg00000000 000000F" /> + <has_size value="13000" delta="1000" /> + </assert_contents> + </output> + </test> + <!-- multiple input files of different format --> + <test expect_num_outputs="2"> + <param name="input_fastqs" value="test1_head.fastq,test1_tail.fasta" /> + <param name="genome_size" value="13000" /> + <param name="min_read_length" value="1000" /> + <param name="correction_coverage" value="40" /> + <conditional name="assembly"> + <param name="should_assemble" value="yes" /> + <param name="assembly_coverage" value="30"/> + <param name="polish_contigs" value="true"/> + </conditional> + <output name="out_reads" ftype="fasta.gz"> + <assert_contents> + <has_size value="29000" delta="2000" /> + </assert_contents> + </output> + <output name="out_polished_assembly" ftype="fasta"> + <assert_contents> + <has_line line=">bctg00000000 000000F" /> + <has_size value="13000" delta="1000" /> + </assert_contents> + </output> + </test> + <!-- advanced params 1 --> + <test expect_num_outputs="2"> + <param name="input_fastqs" value="test1.fa" /> + <param name="genome_size" value="13000" /> + <param name="min_read_length" value="1000" /> + <param name="correction_coverage" value="40" /> + <conditional name="assembly"> + <param name="should_assemble" value="yes" /> + <param name="assembly_coverage" value="30"/> + <param name="polish_contigs" value="true"/> + </conditional> + <section name="adv"> + <section name="ovs"> + <param name="n" value="600" /> + <param name="k" value="14" /> + <param name="q" value="600" /> + <param name="z" value="15" /> + <param name="b" value="2500" /> + <param name="a" value="800" /> + <param name="d" value="0.25" /> + <param name="e" value="0.4" /> + <param name="m" value="600" /> + </section> + </section> + <output name="out_reads" ftype="fasta.gz"> + <assert_contents> + <has_size value="75000" delta="2000" /> + </assert_contents> + </output> + <output name="out_polished_assembly" ftype="fasta"> + <assert_contents> + <has_line line=">bctg00000000 000000F" /> + <has_size value="13000" delta="1000" /> + </assert_contents> + </output> + </test> + <!-- advanced params 2 --> + <test expect_num_outputs="2"> + <param name="input_fastqs" value="test1.fa" /> + <param name="genome_size" value="13000" /> + <param name="min_read_length" value="1000" /> + <param name="correction_coverage" value="40" /> + <conditional name="assembly"> + <param name="should_assemble" value="yes" /> + <param name="assembly_coverage" value="30"/> + <param name="polish_contigs" value="true"/> + </conditional> + <section name="adv"> + <section name="fol"> + <param name="min_length" value="2000" /> + <param name="max_length" value="200000" /> + <param name="min_aligned_length" value="2000" /> + <param name="max_overhang" value="20000" /> + <param name="min_coverage" value="5" /> + <param name="bestn" value="5" /> + <param name="overhang_local_deviation1" value="5" /> + </section> + </section> + <output name="out_reads" ftype="fasta.gz"> + <assert_contents> + <has_size value="75000" delta="2000" /> + </assert_contents> + </output> + <output name="out_polished_assembly" ftype="fasta"> + <assert_contents> + <has_line line=">bctg00000000 000000F" /> + <has_size value="13000" delta="1000" /> + </assert_contents> + </output> + </test> + <!-- advanced params 3 --> + <test expect_num_outputs="2"> + <param name="input_fastqs" value="test1.fa" /> + <param name="genome_size" value="13000" /> + <param name="min_read_length" value="1000" /> + <param name="correction_coverage" value="40" /> + <conditional name="assembly"> + <param name="should_assemble" value="yes" /> + <param name="assembly_coverage" value="30"/> + <param name="polish_contigs" value="true"/> + </conditional> + <section name="adv"> + <section name="fa"> + <param name="min_length" value="1000" /> + <param name="min_identity" value="40" /> + <param name="min_contig_length" value="600" /> + <param name="select_branch" value="true" /> + </section> + </section> + <output name="out_reads" ftype="fasta.gz"> + <assert_contents> + <has_size value="75000" delta="2000" /> + </assert_contents> + </output> + <output name="out_polished_assembly" ftype="fasta"> + <assert_contents> + <has_line line=">bctg00000000 000000F" /> + <has_size value="13000" delta="1000" /> + </assert_contents> + </output> + </test> + <!-- advanced params 4 --> + <test expect_num_outputs="2"> + <param name="input_fastqs" value="test1.fa" /> + <param name="genome_size" value="13000" /> + <param name="min_read_length" value="1000" /> + <param name="correction_coverage" value="40" /> + <conditional name="assembly"> + <param name="should_assemble" value="yes" /> + <param name="assembly_coverage" value="30"/> + <param name="polish_contigs" value="true"/> + </conditional> + <section name="adv"> + <section name="fcb"> + <param name="read_min_length" value="4000" /> + <param name="ctg_min_length" value="1000" /> + <param name="ctg2ctg_min_identity" value="90" /> + <param name="read2ctg_min_identity" value="60" /> + <param name="min_contig_length" value="1000" /> + </section> + </section> + <output name="out_reads" ftype="fasta.gz"> + <assert_contents> + <has_size value="75000" delta="2000" /> + </assert_contents> + </output> + <output name="out_polished_assembly" ftype="fasta"> + <assert_contents> + <has_line line=">bctg00000000 000000F" /> + <has_size value="13000" delta="1000" /> + </assert_contents> + </output> + </test> + </tests> + + <help><![CDATA[ + +NECAT +..... + +**What it does** + +| NECAT performs error correction to remove complex errors in nanopore reads. It can also optionally de novo assembly. +| After assembly it is recommended to use MEDAKA for long-read polishing, then NextPolish for short-read polishing. +| +| Github: https://github.com/xiaochuanle/NECAT +| + +**Input** + +- One or more files or collections containing sequence reads (fastq / fasta) + +**Output** + +- Corrected reads (fasta) +- Genome assembly (fasta) (Optional) + +| + +**Advanced Settings** + +| Necat runs multiple subprograms in an assembly pipeline to create its final output. +| Each subprogram does a specific task, then hands its output to the next. +| The subprograms are listed in order below, alongside the settings which can be configured: +| + +*oc2pmov* + +| Finds overlaps between raw-reads +| *Overlap Sensitive Options & Overlap Fast Options* +| + +-k <Integer> kmer size +-z <Integer> scan window size +-q <Integer> kmer occurs > q times will be ignored +-b <Integer> block size +-n <Integer> number of candidates +-a <Integer> min align length +-d <Real> ddf score cutoff +-e <Real> sequencing error +-m <Integer> number of output + +| + +| DEFAULT OPTIONS: +| -k 15 -z 10 -q 500 -b 2000 -s 3 -n 500 -a 500 -d 0.250000 -e 0.500000 -m 500 -t 1 + +| +| + +*oc2cns* + +| Creates consensus reads from raw-read overlaps +| *Consensus Sensitive Options & Consensus Fast Options* +| + +-a <Integer> align length cutoff +-x <Integer> minimal coverage +-y <Integer> maximal coverage +-l <Integer> minimal length of corrected reads. +-f <0 or 1> full consensus or not: 1 = yes, 0 = no +-e <Real> sequencing error +-p <Real> minimal mapping ratio +-r <0 or 1> rescue long indels or not: 1 = yes, 0 = no +-u <0 or 1> use dynamic or fixed ident cutoff: 1 = fixed, 0 = dynamic + +| + +| DEFAULT OPTIONS: +| -a 400 -x 4 -y 12 -l 500 -f 0 -e 0.500000 -p 0.800000 -t 1 -r 0 -u 0 -s 0 + +| +| + +*oc2asmpm* + +| Identifies corrected-read overlaps for assembly +| *Trimming Overlap Options & Assembly Overlap Options* +| + + +-k <Integer> kmer size +-z <Integer> scan window size +-q <Integer> kmer occurs > q times will be ignored +-b <Integer> block size +-n <Integer> number of candidates +-a <Integer> min align length +-d <Real> ddf score cutoff +-e <Real> sequencing error +-m <Integer> number of output + +| +| + +*fsa_ol_filter* + +| Filters out low-quality corrected-read overlaps for assembly +| *Assembly Overlap Filtering Options* +| + +--min_length=INT minimum length of reads. default: 2500 +--max_length=INT maximum length of reads. default: 2147483647 +--min_identity=DOUBLE minimum identity of overlaps default: -1 +--min_aligned_length=INT minimum aligned length of overlaps default: 2500 +--max_overhang=INT maximum overhang of overlaps, negative number = determined by the program. default: -1 +--min_coverage=INT minimum base coverage, negative number = determined by the program. default: -1 +--max_coverage=INT maximum base coverage, negative number = determined by the program default: -1 +--max_diff_coverage=INT maximum difference of base coverage, negative number = determined by the program default: -1 +--coverage_discard=DOUBLE discard ratio of base coverage. If max_coverage or max_diff_coverage is negative, it will be reset to (100-coverage_discard)th percentile. default: 0.01 +--bestn=INT output best n overlaps on 5' or 3' end for each read. default: 10 +--genome_size=INT genome size. It determines the maximum length of reads with coverage together default: 0 +--coverage=INT coverage. It determines the maximum length of reads with genome_size together default: 40 +--identity_global_deviation1=DOUBLE If min_identity < 0, min_identity is set to min(m, deviation1) - 1.4826*mad*deviation2 default: 98 +--identity_global_deviation2=DOUBLE If min_identity < 0, min_identity is set to min(m, deviation1) - 1.4826*mad*deviation2 default: 6 +--overhang_global_deviation1=DOUBLE If max_overhang < 0, max_overhang is set to max(m, deviation1) + 1.4826*mad*deviation2 default: 30 +--overhang_global_deviation2=DOUBLE If max_overhang < 0, max_overhang is set to max(m, deviation1) + 1.4826*mad*deviation2 default: 6 +--identity_local_deviation1=DOUBLE The local threshold of identity is set to min(m, deviation1) - 1.4826*mad*deviation2 default: 99 +--identity_local_deviation2=DOUBLE The local threshold of identity is set to min(m, deviation1) - 1.4826*mad*deviation2 default: 6 +--overhang_local_deviation1=DOUBLE The local threshold of overhang is set to max(m, deviation1) + 1.253*mad*deviation2 default: 10 +--overhang_local_deviation2=DOUBLE The local threshold of overhang is set to max(m, deviation1) + 1.253*mad*deviation2 default: 6 +--identity_local_condition=INT Local filtering conditions. 0 = overlap idenitity < threshold, 1 = overlap idenitity < threshold and query identity >= target identity default: 0 +--local_low_coverage=INT If the coverage of reads is less than local_low_coverage, min_identity and max_overhang are used to filter out low-quality overlaps. Otherwise, the local threshold is used. default: 25 + +| +| + +*fsa_assemble* + +| Constructs contigs from filtered overlaps +| *Contig Assembly Options* +| + +--min_length=INT minimum length of reads default: 0 +--min_identity=DOUBLE minimum identity of overlaps default: 0 +--min_aligned_length=INT minimum aligned length of overlaps default: 0 +--min_contig_length=INT minimum length of contigs default: 500 +--select_branch=BOOL select the most probable branch default: "no" +--max_spur_length=INT branches less the threshod are treated as spurs default: 50000 + +| +| + +*fsa_ctg_bridge* + +| Bridges contigs using input long raw-reads +| *Contig Bridging Options* +| + +--read_min_length=INT minimum rawread length default: 5000 +--ctg_min_length=INT minimum contig length default: 500 +--ctg2ctg_min_identity=DOUBLE minimum identity of overlaps between contigs default: 95 +--ctg2ctg_max_overhang=INT maximum overhang of overlaps between contigs default: 100 +--ctg2ctg_min_aligned_length=INT minimum aligned length of overlaps between contigs default: 2000 +--read2ctg_min_identity=DOUBLE minimum identity of overlaps between rawreads and contigs default: 80 +--read2ctg_max_overhang=INT maximum overhang of overlaps between rawreads and contigs default: 500 +--read2ctg_min_aligned_length=INT minimum aligned length of overlaps between rawreads and contigs default: 5000 +--read2ctg_min_coverage=INT minimum coverage of links between rawreads and contigs default: 3 +--min_contig_length=INT minimum length of bridged contig default: 500 +--select_branch=BOOL select the most probable branch default: "no" +--window_size=INT threshold is used to group rawreads that bridge contigs default: 1000 + +| + + + ]]></help> + <expand macro="citations" /> +</tool> \ No newline at end of file