Mercurial > repos > iuc > necat
view necat.xml @ 0:6ee7eb5821f0 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/necat commit 6946d81de9419c90e9bc4ea2f7bd5e4168dd6dd6
author | iuc |
---|---|
date | Fri, 25 Nov 2022 14:24:27 +0000 |
parents | |
children |
line wrap: on
line source
<tool id="necat" name="necat" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01" license="MIT"> <description>Error correction and de-novo assembly for ONT Nanopore reads</description> <macros> <import>macros.xml</import> </macros> <xrefs> <xref type="bio.tools">necat</xref> </xrefs> <requirements> <requirement type="package" version="@TOOL_VERSION@">necat</requirement> </requirements> <command detect_errors="exit_code"><![CDATA[ ## helper function #def make_filename($i, $input_param) #set ext = $input_param.extension #if $ext == "fastqsanger" #set $ext = "fastq" #end if #set filename = "reads_" + str($i) + "." + $ext #return $filename #end def ## push each input file and everything in input collections into read_list.txt #set i = 1 #for input in $input_fastqs #set filename = $make_filename($i, $input) cp '$input' $filename && echo $filename >> read_list.txt && #set i = $i + 1 #end for ## #for $i, $input in enumerate($input_fastqs): ## #set filename = 'reads_${i}.$input.ext' ## ln -s '$input' $filename && ## echo $filename >> read_list.txt && ## #end for ## necat commands necat correct '${job_configfile}' #if $assembly.should_assemble == "yes": && necat assemble '${job_configfile}' && necat bridge '${job_configfile}' #end if ]]></command> <configfiles> <expand macro="job_conf" /> </configfiles> <inputs> <param name="input_fastqs" type="data" format="fastq,fastq.gz,fasta,fasta.gz" multiple="true" label="Input reads" help="Input read files (FASTQ or FASTA). To select more than one file or collection from your history, use the 'ctrl' key" /> <param name="genome_size" type="integer" value="" min="1" max="100000000000" label="Genome size" help="Estimated size of genome (bp)" /> <param name="min_read_length" type="integer" value="1000" min="1" max="10000000" label="Min read length" help="Minimum length for input reads" /> <param name="correction_coverage" type="integer" value="40" min="1" max="10000" label="Correction coverage" help="Number of reads to correct in terms of genome coverage. For a 4Gb genome, setting correction coverage = 10 will correct the longest 40Gb worth of reads from the input fastq. " /> <conditional name="assembly"> <param name="should_assemble" type="select" label="Assembly"> <option value="no" selected="true">Don't perform assembly</option> <option value="yes">Perform assembly on corrected reads</option> </param> <when value="no" /> <when value="yes"> <param name="assembly_coverage" type="integer" value="30" min="1" max="10000" label="Assembly coverage" help="Number of reads to use in genome assembly in terms of genome coverage" /> <param name="polish_contigs" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Polish contigs" help="Polish contigs as final step after briding" /> </when> </conditional> <section name="adv" title="Advanced options" expanded="false" help="Warning: only change these if you really know what you are doing"> <expand macro="overlap_sensitive_options" /> <expand macro="consensus_sensitive_options" /> <expand macro="overlap_fast_options" /> <expand macro="consensus_fast_options" /> <expand macro="trimming_overlap_options" /> <expand macro="assembly_overlap_options" /> <expand macro="assembly_overlap_filtering" /> <expand macro="contig_assembly" /> <expand macro="contig_bridging" /> </section> </inputs> <outputs> <data name="out_reads" format="fasta.gz" from_work_dir="project/1-consensus/cns_final.fasta.gz" label="${tool.name} on ${on_string}: corrected reads" /> <data name="out_assembly" format="fasta" from_work_dir="project/6-bridge_contigs/polished_contigs.fasta" label="${tool.name} on ${on_string}: bridged assembly"> <filter>assembly['should_assemble'] == 'yes' and not assembly['polish_contigs']</filter> </data> <data name="out_polished_assembly" format="fasta" from_work_dir="project/6-bridge_contigs/bridged_contigs.fasta" label="${tool.name} on ${on_string}: polished assembly"> <filter>assembly['should_assemble'] == 'yes' and assembly['polish_contigs']</filter> </data> </outputs> <tests> <!-- single input fastq --> <test expect_num_outputs="2"> <param name="input_fastqs" value="test1.fa" /> <param name="genome_size" value="13000" /> <param name="min_read_length" value="1000" /> <param name="correction_coverage" value="40" /> <conditional name="assembly"> <param name="should_assemble" value="yes" /> <param name="assembly_coverage" value="30"/> <param name="polish_contigs" value="true"/> </conditional> <output name="out_reads" ftype="fasta.gz"> <assert_contents> <has_size value="75000" delta="2000" /> </assert_contents> </output> <output name="out_polished_assembly" ftype="fasta"> <assert_contents> <has_line line=">bctg00000000 000000F" /> <has_size value="13000" delta="1000" /> </assert_contents> </output> </test> <!-- multiple input files of different format --> <test expect_num_outputs="2"> <param name="input_fastqs" value="test1_head.fastq,test1_tail.fasta" /> <param name="genome_size" value="13000" /> <param name="min_read_length" value="1000" /> <param name="correction_coverage" value="40" /> <conditional name="assembly"> <param name="should_assemble" value="yes" /> <param name="assembly_coverage" value="30"/> <param name="polish_contigs" value="true"/> </conditional> <output name="out_reads" ftype="fasta.gz"> <assert_contents> <has_size value="29000" delta="2000" /> </assert_contents> </output> <output name="out_polished_assembly" ftype="fasta"> <assert_contents> <has_line line=">bctg00000000 000000F" /> <has_size value="13000" delta="1000" /> </assert_contents> </output> </test> <!-- advanced params 1 --> <test expect_num_outputs="2"> <param name="input_fastqs" value="test1.fa" /> <param name="genome_size" value="13000" /> <param name="min_read_length" value="1000" /> <param name="correction_coverage" value="40" /> <conditional name="assembly"> <param name="should_assemble" value="yes" /> <param name="assembly_coverage" value="30"/> <param name="polish_contigs" value="true"/> </conditional> <section name="adv"> <section name="ovs"> <param name="n" value="600" /> <param name="k" value="14" /> <param name="q" value="600" /> <param name="z" value="15" /> <param name="b" value="2500" /> <param name="a" value="800" /> <param name="d" value="0.25" /> <param name="e" value="0.4" /> <param name="m" value="600" /> </section> </section> <output name="out_reads" ftype="fasta.gz"> <assert_contents> <has_size value="75000" delta="2000" /> </assert_contents> </output> <output name="out_polished_assembly" ftype="fasta"> <assert_contents> <has_line line=">bctg00000000 000000F" /> <has_size value="13000" delta="1000" /> </assert_contents> </output> </test> <!-- advanced params 2 --> <test expect_num_outputs="2"> <param name="input_fastqs" value="test1.fa" /> <param name="genome_size" value="13000" /> <param name="min_read_length" value="1000" /> <param name="correction_coverage" value="40" /> <conditional name="assembly"> <param name="should_assemble" value="yes" /> <param name="assembly_coverage" value="30"/> <param name="polish_contigs" value="true"/> </conditional> <section name="adv"> <section name="fol"> <param name="min_length" value="2000" /> <param name="max_length" value="200000" /> <param name="min_aligned_length" value="2000" /> <param name="max_overhang" value="20000" /> <param name="min_coverage" value="5" /> <param name="bestn" value="5" /> <param name="overhang_local_deviation1" value="5" /> </section> </section> <output name="out_reads" ftype="fasta.gz"> <assert_contents> <has_size value="75000" delta="2000" /> </assert_contents> </output> <output name="out_polished_assembly" ftype="fasta"> <assert_contents> <has_line line=">bctg00000000 000000F" /> <has_size value="13000" delta="1000" /> </assert_contents> </output> </test> <!-- advanced params 3 --> <test expect_num_outputs="2"> <param name="input_fastqs" value="test1.fa" /> <param name="genome_size" value="13000" /> <param name="min_read_length" value="1000" /> <param name="correction_coverage" value="40" /> <conditional name="assembly"> <param name="should_assemble" value="yes" /> <param name="assembly_coverage" value="30"/> <param name="polish_contigs" value="true"/> </conditional> <section name="adv"> <section name="fa"> <param name="min_length" value="1000" /> <param name="min_identity" value="40" /> <param name="min_contig_length" value="600" /> <param name="select_branch" value="true" /> </section> </section> <output name="out_reads" ftype="fasta.gz"> <assert_contents> <has_size value="75000" delta="2000" /> </assert_contents> </output> <output name="out_polished_assembly" ftype="fasta"> <assert_contents> <has_line line=">bctg00000000 000000F" /> <has_size value="13000" delta="1000" /> </assert_contents> </output> </test> <!-- advanced params 4 --> <test expect_num_outputs="2"> <param name="input_fastqs" value="test1.fa" /> <param name="genome_size" value="13000" /> <param name="min_read_length" value="1000" /> <param name="correction_coverage" value="40" /> <conditional name="assembly"> <param name="should_assemble" value="yes" /> <param name="assembly_coverage" value="30"/> <param name="polish_contigs" value="true"/> </conditional> <section name="adv"> <section name="fcb"> <param name="read_min_length" value="4000" /> <param name="ctg_min_length" value="1000" /> <param name="ctg2ctg_min_identity" value="90" /> <param name="read2ctg_min_identity" value="60" /> <param name="min_contig_length" value="1000" /> </section> </section> <output name="out_reads" ftype="fasta.gz"> <assert_contents> <has_size value="75000" delta="2000" /> </assert_contents> </output> <output name="out_polished_assembly" ftype="fasta"> <assert_contents> <has_line line=">bctg00000000 000000F" /> <has_size value="13000" delta="1000" /> </assert_contents> </output> </test> </tests> <help><![CDATA[ NECAT ..... **What it does** | NECAT performs error correction to remove complex errors in nanopore reads. It can also optionally de novo assembly. | After assembly it is recommended to use MEDAKA for long-read polishing, then NextPolish for short-read polishing. | | Github: https://github.com/xiaochuanle/NECAT | **Input** - One or more files or collections containing sequence reads (fastq / fasta) **Output** - Corrected reads (fasta) - Genome assembly (fasta) (Optional) | **Advanced Settings** | Necat runs multiple subprograms in an assembly pipeline to create its final output. | Each subprogram does a specific task, then hands its output to the next. | The subprograms are listed in order below, alongside the settings which can be configured: | *oc2pmov* | Finds overlaps between raw-reads | *Overlap Sensitive Options & Overlap Fast Options* | -k <Integer> kmer size -z <Integer> scan window size -q <Integer> kmer occurs > q times will be ignored -b <Integer> block size -n <Integer> number of candidates -a <Integer> min align length -d <Real> ddf score cutoff -e <Real> sequencing error -m <Integer> number of output | | DEFAULT OPTIONS: | -k 15 -z 10 -q 500 -b 2000 -s 3 -n 500 -a 500 -d 0.250000 -e 0.500000 -m 500 -t 1 | | *oc2cns* | Creates consensus reads from raw-read overlaps | *Consensus Sensitive Options & Consensus Fast Options* | -a <Integer> align length cutoff -x <Integer> minimal coverage -y <Integer> maximal coverage -l <Integer> minimal length of corrected reads. -f <0 or 1> full consensus or not: 1 = yes, 0 = no -e <Real> sequencing error -p <Real> minimal mapping ratio -r <0 or 1> rescue long indels or not: 1 = yes, 0 = no -u <0 or 1> use dynamic or fixed ident cutoff: 1 = fixed, 0 = dynamic | | DEFAULT OPTIONS: | -a 400 -x 4 -y 12 -l 500 -f 0 -e 0.500000 -p 0.800000 -t 1 -r 0 -u 0 -s 0 | | *oc2asmpm* | Identifies corrected-read overlaps for assembly | *Trimming Overlap Options & Assembly Overlap Options* | -k <Integer> kmer size -z <Integer> scan window size -q <Integer> kmer occurs > q times will be ignored -b <Integer> block size -n <Integer> number of candidates -a <Integer> min align length -d <Real> ddf score cutoff -e <Real> sequencing error -m <Integer> number of output | | *fsa_ol_filter* | Filters out low-quality corrected-read overlaps for assembly | *Assembly Overlap Filtering Options* | --min_length=INT minimum length of reads. default: 2500 --max_length=INT maximum length of reads. default: 2147483647 --min_identity=DOUBLE minimum identity of overlaps default: -1 --min_aligned_length=INT minimum aligned length of overlaps default: 2500 --max_overhang=INT maximum overhang of overlaps, negative number = determined by the program. default: -1 --min_coverage=INT minimum base coverage, negative number = determined by the program. default: -1 --max_coverage=INT maximum base coverage, negative number = determined by the program default: -1 --max_diff_coverage=INT maximum difference of base coverage, negative number = determined by the program default: -1 --coverage_discard=DOUBLE discard ratio of base coverage. If max_coverage or max_diff_coverage is negative, it will be reset to (100-coverage_discard)th percentile. default: 0.01 --bestn=INT output best n overlaps on 5' or 3' end for each read. default: 10 --genome_size=INT genome size. It determines the maximum length of reads with coverage together default: 0 --coverage=INT coverage. It determines the maximum length of reads with genome_size together default: 40 --identity_global_deviation1=DOUBLE If min_identity < 0, min_identity is set to min(m, deviation1) - 1.4826*mad*deviation2 default: 98 --identity_global_deviation2=DOUBLE If min_identity < 0, min_identity is set to min(m, deviation1) - 1.4826*mad*deviation2 default: 6 --overhang_global_deviation1=DOUBLE If max_overhang < 0, max_overhang is set to max(m, deviation1) + 1.4826*mad*deviation2 default: 30 --overhang_global_deviation2=DOUBLE If max_overhang < 0, max_overhang is set to max(m, deviation1) + 1.4826*mad*deviation2 default: 6 --identity_local_deviation1=DOUBLE The local threshold of identity is set to min(m, deviation1) - 1.4826*mad*deviation2 default: 99 --identity_local_deviation2=DOUBLE The local threshold of identity is set to min(m, deviation1) - 1.4826*mad*deviation2 default: 6 --overhang_local_deviation1=DOUBLE The local threshold of overhang is set to max(m, deviation1) + 1.253*mad*deviation2 default: 10 --overhang_local_deviation2=DOUBLE The local threshold of overhang is set to max(m, deviation1) + 1.253*mad*deviation2 default: 6 --identity_local_condition=INT Local filtering conditions. 0 = overlap idenitity < threshold, 1 = overlap idenitity < threshold and query identity >= target identity default: 0 --local_low_coverage=INT If the coverage of reads is less than local_low_coverage, min_identity and max_overhang are used to filter out low-quality overlaps. Otherwise, the local threshold is used. default: 25 | | *fsa_assemble* | Constructs contigs from filtered overlaps | *Contig Assembly Options* | --min_length=INT minimum length of reads default: 0 --min_identity=DOUBLE minimum identity of overlaps default: 0 --min_aligned_length=INT minimum aligned length of overlaps default: 0 --min_contig_length=INT minimum length of contigs default: 500 --select_branch=BOOL select the most probable branch default: "no" --max_spur_length=INT branches less the threshod are treated as spurs default: 50000 | | *fsa_ctg_bridge* | Bridges contigs using input long raw-reads | *Contig Bridging Options* | --read_min_length=INT minimum rawread length default: 5000 --ctg_min_length=INT minimum contig length default: 500 --ctg2ctg_min_identity=DOUBLE minimum identity of overlaps between contigs default: 95 --ctg2ctg_max_overhang=INT maximum overhang of overlaps between contigs default: 100 --ctg2ctg_min_aligned_length=INT minimum aligned length of overlaps between contigs default: 2000 --read2ctg_min_identity=DOUBLE minimum identity of overlaps between rawreads and contigs default: 80 --read2ctg_max_overhang=INT maximum overhang of overlaps between rawreads and contigs default: 500 --read2ctg_min_aligned_length=INT minimum aligned length of overlaps between rawreads and contigs default: 5000 --read2ctg_min_coverage=INT minimum coverage of links between rawreads and contigs default: 3 --min_contig_length=INT minimum length of bridged contig default: 500 --select_branch=BOOL select the most probable branch default: "no" --window_size=INT threshold is used to group rawreads that bridge contigs default: 1000 | ]]></help> <expand macro="citations" /> </tool>