Mercurial > repos > iuc > valet
view valet.xml @ 0:56236acaad45 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/valet commit 30ecbd2ebd336d7002ca11abd69d600a24986156
author | iuc |
---|---|
date | Thu, 16 Nov 2017 08:54:46 -0500 |
parents | |
children |
line wrap: on
line source
<tool id="valet" name="VALET" version="@WRAPPER_VERSION@.0"> <description>to detect mis-assemblies in metagenomic assemblies</description> <macros> <token name="@WRAPPER_VERSION@">1.0</token> <token name="@INPUT_@"> 1.0 </token> <xml name="insert_size"> <param argument="--minins" type="integer" min="0" value="0" label="Min insert sizes for mate pairs" /> <param argument="--maxins" type="integer" min="0" value="500" label="Max insert sizes for mate pairs" /> </xml> <xml name="orientation"> <param argument="--orientation" type="select" label="Orientation of the mates" > <option value="fr">fr: mate 1 appears upstream of the reverse complement of mate 2 or mate 2 appears upstream of the reverse complement of mate 1</option> <option value="rf">rf: reverse-complemented mate 1 is upstream and forward-oriented mate 2 is downstream</option> <option value="ff">ff: both upstream mate 1 and downstream mate 2 are forward-oriented</option> </param> </xml> </macros> <requirements> <requirement type="package" version="@WRAPPER_VERSION@">valet</requirement> </requirements> <version_command>echo @WRAPPER_VERSION@</version_command> <command detect_errors="exit_code"> <![CDATA[ valet.py #set assembly_fasta = [] #set assembly_names = [] #for $repeat in $assembly $assembly_fasta.append(str($repeat.assembly_fasta)) $assembly_names.append(str($repeat.assembly_names)) #end for --assembly-fasta ${','.join($assembly_fasta)} --assembly-names ${','.join($assembly_names)} #if $input_reads.type == 'single' $input_reads.single_input_reads.type --reads '$input_reads.single_input_reads.reads' #else if $input_reads.type == 'paired' #set mate_1 = [] #set mate_2 = [] #set minins = [] #set maxins = [] #for $repeat in $input_reads.paired_input_reads.paired_reads $mate_1.append(str($repeat.mate_1)) $mate_2.append(str($repeat.mate_2)) $minins.append(str($repeat.minins)) $maxins.append(str($repeat.maxins)) #end for --1 ${','.join($mate_1)} --2 ${','.join($mate_2)} --minins ${','.join($minins)} --maxins ${','.join($maxins)} $input_reads.paired_input_reads.type --orientation '$input_reads.orientation' #else if $input_reads.type == 'paired_collection' #set mate_1 = [] #set mate_2 = [] #set minins = [] #set maxins = [] #for $repeat in $input_reads.paired_coll_input_reads.paired_collection_reads $mate_1.append(str($repeat.input.forward)) $mate_2.append(str($repeat.input.reverse)) $minins.append(str($repeat.minins)) $maxins.append(str($repeat.maxins)) #end for --1 ${','.join($mate_1)} --2 ${','.join($mate_2)} --minins ${','.join($minins)} --maxins ${','.join($maxins)} $input_reads.paired_coll_input_reads.type --orientation '$input_reads.orientation' #end if --output-dir output --window-size '$window_size' --threads \${GALAXY_SLOTS:-4} --max-alignments '$max_alignments' --min-coverage '$min_coverage' --coverage-multiplier '$coverage_multiplier' --min-suspicious '$min_suspicious' --suspicious-flank-size '$suspicious_flank_size' --min-contig-length '$min_contig_length' --ignore-ends '$ignore_ends' --breakpoint-bin '$breakpoint_bin' #if $orf_file --orf-file '$orf_file' #end if #if $coverage_file --coverage-file '$coverage_file' #end if --kmer '$kmer' --skip-reapr #for $repeat in $assembly && mv output/${repeat.assembly_names}/summary.bed output/${repeat.assembly_names}_summary.bed && mv output/${repeat.assembly_names}/summary.tsv output/${repeat.assembly_names}_summary.tsv && mv output/${repeat.assembly_names}/suspicious.bed output/${repeat.assembly_names}_suspicious.bed #end for ]]></command> <inputs> <repeat name="assembly" title="Candidate assemblies"> <param name="assembly_fasta" argument="--assembly-fasta" type="data" format="fasta" label="Candidate assembly file" /> <param name="assembly_names" argument="--assembly-names" type="text" value="" label="Name of the assembly"> <validator type="empty_field" message="A name is required"/> </param> </repeat> <conditional name="input_reads"> <param name="type" type="select" label="Type of input reads used for the assembly"> <option value="single">Single</option> <option value="paired">Paired</option> <option value="paired_collection">Paired-collection</option> </param> <when value="single"> <conditional name="single_input_reads"> <param name="type" type="select" label="Input format"> <option value="--fasta">Fasta</option> <option value="--fastq">FastQ</option> </param> <when value="--fasta"> <param argument="--reads" type="data" format="fasta" label="Assembly input reads" /> </when> <when value="--fastq"> <param argument="--reads" type="data" format="fastq" label="Assembly input reads" /> </when> </conditional> </when> <when value="paired"> <conditional name="paired_input_reads"> <param name="type" type="select" label="Input format"> <option value="--fasta">Fasta</option> <option value="--fastq">FastQ</option> </param> <when value="--fasta"> <repeat name="paired_reads" title="Mate pair reads"> <param name="mate_1" argument="--1" type="data" format="fasta" label="Assembly input first mate reads" /> <param name="mate_2" argument="--2" type="data" format="fasta" label="Assembly input second mate reads" /> <expand macro="insert_size"/> </repeat> </when> <when value="--fastq"> <repeat name="paired_reads" title="Mate pair reads"> <param name="mate_1" argument="--1" type="data" format="fastq" label="Assembly input first mate reads" /> <param name="mate_2" argument="--2" type="data" format="fastq" label="Assembly input second mate reads" /> <expand macro="insert_size"/> </repeat> </when> </conditional> <expand macro="orientation"/> </when> <when value="paired_collection"> <conditional name="paired_coll_input_reads"> <param name="type" type="select" label="Input format"> <option value="--fasta">Fasta</option> <option value="--fastq">FastQ</option> </param> <when value="--fasta"> <repeat name="paired_collection_reads" title="Mate paired read collections"> <param name="input" format="fasta" type="data_collection" collection_type="paired" label="Assembly input reads" /> <expand macro="insert_size"/> </repeat> </when> <when value="--fastq"> <repeat name="paired_collection_reads" title="Mate paired read collections"> <param name="input" format="fastq" type="data_collection" collection_type="paired" label="Assembly input reads" /> <expand macro="insert_size"/> </repeat> </when> </conditional> <expand macro="orientation"/> </when> </conditional> <param name="window_size" argument="--window-size" type="integer" min="0" value="501" label="Sliding window size when determining misassemblies" /> <param name="max_alignments" argument="--max-alignments" type="integer" min="0" value="10000" label="Bowtie2 parameter to set the max number of alignments" /> <param name="min_coverage" argument="--min-coverage" type="integer" min="0" value="0" label="Minimum average coverage to run misassembly detection" /> <param name="coverage_multiplier" argument="--coverage-multiplier" type="float" min="0" value="0" label="When binning by coverage, the new high = high + high * multiplier" /> <param name="min_suspicious" argument="--min-suspicious" type="integer" min="0" value="2" label="Minimum number of overlapping flagged miassemblies to mark region as suspicious" /> <param name="suspicious_flank_size" argument="--suspicious-flank-size" type="integer" min="0" value="2000" label="Mark region as suspicious if multiple signatures occur within this window size" /> <param name="min_contig_length" argument="--min-contig-length" type="integer" min="0" value="1000" label="Ignore contigs smaller than this length" /> <param name="ignore_ends" argument="--ignore-ends" type="integer" min="0" value="0" label="Ignore flagged regions within b bps from the ends of the contigs" /> <param name="breakpoint_bin" argument="--breakpoint-bin" type="integer" min="0" value="50" label="Bin sized used to find breakpoints" /> <param name="kmer" argument="--kmer" type="integer" min="0" value="15" label="Kmer length used for abundance estimation" /> <param name="coverage_file" argument="--coverage-file" type="data" format="tabular,txt" optional="true" label="Assembly created per-contig coverage file" /> <param name="orf_file" argument="--orf-file" type="data" format="gff,gtf" optional="true" label="File containing ORFs" /> </inputs> <outputs> <collection name="flagged" type="list" label="${tool.name} on ${on_string}: Flagged regions"> <discover_datasets pattern="(?P<designation>.+)_summary.bed" format="bed" directory="output"/> </collection> <collection name="suspicious" type="list" label="${tool.name} on ${on_string}: Suspicious regions"> <discover_datasets pattern="(?P<designation>.+)_suspicious.bed" format="bed" directory="output"/> </collection> <collection name="summary" type="list" label="${tool.name} on ${on_string}: Summary"> <discover_datasets pattern="(?P<designation>.+)_summary.tsv" format="tabular" directory="output"/> </collection> <data name="comparison_plot" format="pdf" from_work_dir="output/comparison_plots.pdf" label="${tool.name} on ${on_string}: Comparison plot" /> </outputs> <tests> <test> <repeat name="assembly"> <param name="assembly_fasta" value="c_rudii_reference.fna"/> <param name="assembly_names" value="reference"/> </repeat> <repeat name="assembly"> <param name="assembly_fasta" value="c_rudii_dup.fna"/> <param name="assembly_names" value="duplication"/> </repeat> <repeat name="assembly"> <param name="assembly_fasta" value="c_rudii_relocation.fna"/> <param name="assembly_names" value="relocation"/> </repeat> <repeat name="assembly"> <param name="assembly_fasta" value="c_rudii_reloc_dup.fna"/> <param name="assembly_names" value="reloc-dup"/> </repeat> <conditional name="input_reads"> <param name="type" value="paired"/> <conditional name="paired_input_reads"> <param name="type" value="--fastq"/> <repeat name="paired_reads"> <param name="mate_1" value="lib1.1.fastq" /> <param name="mate_2" value="lib1.2.fastq" /> <param name="minins" value="0"/> <param name="maxins" value="500" /> </repeat> </conditional> <param name="orientation" value="fr" /> </conditional> <param name="window_size" value="501"/> <param name="max_alignments" value="10000"/> <param name="min_coverage" value="0" /> <param name="coverage_multiplier" value="0"/> <param name="min_suspicious" value="2" /> <param name="suspicious_flank_size" value="2000" /> <param name="min_contig_length" value="1000"/> <param name="ignore_ends" value="0"/> <param name="breakpoint_bin" value="50" /> <param name="kmer" value="15" /> <param name="coverage_file" value="carsonella_asm.cvg" /> <output_collection name="flagged" type="list"> <element name="reference" ftype="bed" file="flagged_reference.bed"/> <element name="duplication" ftype="bed" file="flagged_duplication.bed"/> <element name="relocation" ftype="bed" file="flagged_relocation.bed"/> <element name="reloc-dup" ftype="bed" file="flagged_reloc-dup.bed"/> </output_collection> <output_collection name="suspicious" type="list"> <element name="reference" ftype="bed" file="suspicious_reference.bed"/> <element name="duplication" ftype="bed" file="suspicious_duplication.bed"/> <element name="relocation" ftype="bed" file="suspicious_relocation.bed"/> <element name="reloc-dup" ftype="bed" file="suspicious_reloc-dup.bed"/> </output_collection> <output_collection name="summary" type="list"> <element name="reference" ftype="tabular" file="summary_reference.tabular"/> <element name="duplication" ftype="tabular" file="summary_duplication.tabular"/> <element name="relocation" ftype="tabular" file="summary_relocation.tabular"/> <element name="reloc-dup" ftype="tabular" file="summary_reloc-dup.tabular"/> </output_collection> <output name="comparison_plot" file="test1_comparison_plot.pdf" compare="sim_size"/> </test> <test> <repeat name="assembly"> <param name="assembly_fasta" value="c_rudii_dup.fna"/> <param name="assembly_names" value="duplication"/> </repeat> <conditional name="input_reads"> <param name="type" value="paired_collection"/> <conditional name="paired_coll_input_reads"> <param name="type" value="--fastq"/> <repeat name="paired_collection_reads"> <param name="input"> <collection type="paired"> <element name="forward" value="lib1.1.fastq" ftype="fastq" /> <element name="reverse" value="lib1.2.fastq" ftype="fastq" /> </collection> </param> <param name="minins" value="0"/> <param name="maxins" value="500" /> </repeat> </conditional> <param name="orientation" value="fr" /> </conditional> <param name="window_size" value="501"/> <param name="max_alignments" value="10000"/> <param name="min_coverage" value="0" /> <param name="coverage_multiplier" value="0"/> <param name="min_suspicious" value="2" /> <param name="suspicious_flank_size" value="2000" /> <param name="min_contig_length" value="1000"/> <param name="ignore_ends" value="0"/> <param name="breakpoint_bin" value="50" /> <param name="kmer" value="15" /> <output_collection name="flagged" type="list"> <element name="duplication" ftype="bed" file="flagged_duplication.bed"/> </output_collection> <output_collection name="suspicious" type="list"> <element name="duplication" ftype="bed" file="suspicious_duplication.bed"/> </output_collection> <output_collection name="summary" type="list"> <element name="duplication" ftype="tabular" file="summary_duplication.tabular"/> </output_collection> <output name="comparison_plot" file="test2_comparison_plot.pdf" compare="sim_size"/> </test> </tests> <help><![CDATA[ **What it does** VALET is a de novo pipeline for detecting all types of mis-assemblies in metagenomic data sets. Its primarily adapts the approaches developed in the context of isolate genomes. To avoid false positives and false negatives because of uneven depth of coverage, VALET bins contig by coverage before applying these methods. Possible break points in the assembly are found by examining regions, where a large number of parts of the reads are unable to align. To identify break points, VALET uses the first and last third of each unaligned read, called sister reads. The sister reads are aligned independently to the reference genome, and then regions where the sister reads align to nonadjacent segments of the genome are flagged as mis-assemblies. For more details about the tool, please check: https://github.com/marbl/VALET ]]></help> <citations/> </tool>