comparison valet.xml @ 0:56236acaad45 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/valet commit 30ecbd2ebd336d7002ca11abd69d600a24986156
author iuc
date Thu, 16 Nov 2017 08:54:46 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:56236acaad45
1 <tool id="valet" name="VALET" version="@WRAPPER_VERSION@.0">
2 <description>to detect mis-assemblies in metagenomic assemblies</description>
3 <macros>
4 <token name="@WRAPPER_VERSION@">1.0</token>
5 <token name="@INPUT_@">
6 1.0
7 </token>
8 <xml name="insert_size">
9 <param argument="--minins" type="integer" min="0" value="0" label="Min insert sizes for mate pairs" />
10 <param argument="--maxins" type="integer" min="0" value="500" label="Max insert sizes for mate pairs" />
11 </xml>
12 <xml name="orientation">
13 <param argument="--orientation" type="select" label="Orientation of the mates" >
14 <option value="fr">fr: mate 1 appears upstream of the reverse complement of mate 2 or mate 2 appears upstream of the reverse
15 complement of mate 1</option>
16 <option value="rf">rf: reverse-complemented mate 1 is upstream and forward-oriented mate 2 is downstream</option>
17 <option value="ff">ff: both upstream mate 1 and downstream mate 2 are forward-oriented</option>
18 </param>
19 </xml>
20 </macros>
21 <requirements>
22 <requirement type="package" version="@WRAPPER_VERSION@">valet</requirement>
23 </requirements>
24 <version_command>echo @WRAPPER_VERSION@</version_command>
25 <command detect_errors="exit_code">
26 <![CDATA[
27 valet.py
28 #set assembly_fasta = []
29 #set assembly_names = []
30 #for $repeat in $assembly
31 $assembly_fasta.append(str($repeat.assembly_fasta))
32 $assembly_names.append(str($repeat.assembly_names))
33 #end for
34 --assembly-fasta ${','.join($assembly_fasta)}
35 --assembly-names ${','.join($assembly_names)}
36 #if $input_reads.type == 'single'
37 $input_reads.single_input_reads.type
38 --reads '$input_reads.single_input_reads.reads'
39 #else if $input_reads.type == 'paired'
40 #set mate_1 = []
41 #set mate_2 = []
42 #set minins = []
43 #set maxins = []
44 #for $repeat in $input_reads.paired_input_reads.paired_reads
45 $mate_1.append(str($repeat.mate_1))
46 $mate_2.append(str($repeat.mate_2))
47 $minins.append(str($repeat.minins))
48 $maxins.append(str($repeat.maxins))
49 #end for
50 --1 ${','.join($mate_1)}
51 --2 ${','.join($mate_2)}
52 --minins ${','.join($minins)}
53 --maxins ${','.join($maxins)}
54 $input_reads.paired_input_reads.type
55 --orientation '$input_reads.orientation'
56 #else if $input_reads.type == 'paired_collection'
57 #set mate_1 = []
58 #set mate_2 = []
59 #set minins = []
60 #set maxins = []
61 #for $repeat in $input_reads.paired_coll_input_reads.paired_collection_reads
62 $mate_1.append(str($repeat.input.forward))
63 $mate_2.append(str($repeat.input.reverse))
64 $minins.append(str($repeat.minins))
65 $maxins.append(str($repeat.maxins))
66 #end for
67 --1 ${','.join($mate_1)}
68 --2 ${','.join($mate_2)}
69 --minins ${','.join($minins)}
70 --maxins ${','.join($maxins)}
71 $input_reads.paired_coll_input_reads.type
72 --orientation '$input_reads.orientation'
73 #end if
74 --output-dir output
75 --window-size '$window_size'
76 --threads \${GALAXY_SLOTS:-4}
77 --max-alignments '$max_alignments'
78 --min-coverage '$min_coverage'
79 --coverage-multiplier '$coverage_multiplier'
80 --min-suspicious '$min_suspicious'
81 --suspicious-flank-size '$suspicious_flank_size'
82 --min-contig-length '$min_contig_length'
83 --ignore-ends '$ignore_ends'
84 --breakpoint-bin '$breakpoint_bin'
85 #if $orf_file
86 --orf-file '$orf_file'
87 #end if
88 #if $coverage_file
89 --coverage-file '$coverage_file'
90 #end if
91 --kmer '$kmer'
92 --skip-reapr
93 #for $repeat in $assembly
94 && mv output/${repeat.assembly_names}/summary.bed output/${repeat.assembly_names}_summary.bed
95 && mv output/${repeat.assembly_names}/summary.tsv output/${repeat.assembly_names}_summary.tsv
96 && mv output/${repeat.assembly_names}/suspicious.bed output/${repeat.assembly_names}_suspicious.bed
97 #end for
98 ]]></command>
99 <inputs>
100 <repeat name="assembly" title="Candidate assemblies">
101 <param name="assembly_fasta" argument="--assembly-fasta" type="data" format="fasta" label="Candidate assembly file" />
102 <param name="assembly_names" argument="--assembly-names" type="text" value="" label="Name of the assembly">
103 <validator type="empty_field" message="A name is required"/>
104 </param>
105 </repeat>
106 <conditional name="input_reads">
107 <param name="type" type="select" label="Type of input reads used for the assembly">
108 <option value="single">Single</option>
109 <option value="paired">Paired</option>
110 <option value="paired_collection">Paired-collection</option>
111 </param>
112 <when value="single">
113 <conditional name="single_input_reads">
114 <param name="type" type="select" label="Input format">
115 <option value="--fasta">Fasta</option>
116 <option value="--fastq">FastQ</option>
117 </param>
118 <when value="--fasta">
119 <param argument="--reads" type="data" format="fasta" label="Assembly input reads" />
120 </when>
121 <when value="--fastq">
122 <param argument="--reads" type="data" format="fastq" label="Assembly input reads" />
123 </when>
124 </conditional>
125 </when>
126 <when value="paired">
127 <conditional name="paired_input_reads">
128 <param name="type" type="select" label="Input format">
129 <option value="--fasta">Fasta</option>
130 <option value="--fastq">FastQ</option>
131 </param>
132 <when value="--fasta">
133 <repeat name="paired_reads" title="Mate pair reads">
134 <param name="mate_1" argument="--1" type="data" format="fasta" label="Assembly input first mate reads" />
135 <param name="mate_2" argument="--2" type="data" format="fasta" label="Assembly input second mate reads" />
136 <expand macro="insert_size"/>
137 </repeat>
138 </when>
139 <when value="--fastq">
140 <repeat name="paired_reads" title="Mate pair reads">
141 <param name="mate_1" argument="--1" type="data" format="fastq" label="Assembly input first mate reads" />
142 <param name="mate_2" argument="--2" type="data" format="fastq" label="Assembly input second mate reads" />
143 <expand macro="insert_size"/>
144 </repeat>
145 </when>
146 </conditional>
147 <expand macro="orientation"/>
148 </when>
149 <when value="paired_collection">
150 <conditional name="paired_coll_input_reads">
151 <param name="type" type="select" label="Input format">
152 <option value="--fasta">Fasta</option>
153 <option value="--fastq">FastQ</option>
154 </param>
155 <when value="--fasta">
156 <repeat name="paired_collection_reads" title="Mate paired read collections">
157 <param name="input" format="fasta" type="data_collection" collection_type="paired" label="Assembly input reads" />
158 <expand macro="insert_size"/>
159 </repeat>
160 </when>
161 <when value="--fastq">
162 <repeat name="paired_collection_reads" title="Mate paired read collections">
163 <param name="input" format="fastq" type="data_collection" collection_type="paired" label="Assembly input reads" />
164 <expand macro="insert_size"/>
165 </repeat>
166 </when>
167 </conditional>
168 <expand macro="orientation"/>
169 </when>
170 </conditional>
171 <param name="window_size" argument="--window-size" type="integer" min="0" value="501" label="Sliding window size when determining misassemblies" />
172 <param name="max_alignments" argument="--max-alignments" type="integer" min="0" value="10000" label="Bowtie2 parameter to set the max number of alignments" />
173 <param name="min_coverage" argument="--min-coverage" type="integer" min="0" value="0" label="Minimum average coverage to run misassembly detection" />
174 <param name="coverage_multiplier" argument="--coverage-multiplier" type="float" min="0" value="0" label="When binning by coverage, the new high = high + high * multiplier" />
175 <param name="min_suspicious" argument="--min-suspicious" type="integer" min="0" value="2" label="Minimum number of overlapping flagged miassemblies to mark region as suspicious" />
176 <param name="suspicious_flank_size" argument="--suspicious-flank-size" type="integer" min="0" value="2000" label="Mark region as suspicious if multiple signatures occur within this window size" />
177 <param name="min_contig_length" argument="--min-contig-length" type="integer" min="0" value="1000" label="Ignore contigs smaller than this length" />
178 <param name="ignore_ends" argument="--ignore-ends" type="integer" min="0" value="0" label="Ignore flagged regions within b bps from the ends of the contigs" />
179 <param name="breakpoint_bin" argument="--breakpoint-bin" type="integer" min="0" value="50" label="Bin sized used to find breakpoints" />
180 <param name="kmer" argument="--kmer" type="integer" min="0" value="15" label="Kmer length used for abundance estimation" />
181 <param name="coverage_file" argument="--coverage-file" type="data" format="tabular,txt" optional="true" label="Assembly created per-contig coverage file" />
182 <param name="orf_file" argument="--orf-file" type="data" format="gff,gtf" optional="true" label="File containing ORFs" />
183 </inputs>
184 <outputs>
185 <collection name="flagged" type="list" label="${tool.name} on ${on_string}: Flagged regions">
186 <discover_datasets pattern="(?P&lt;designation&gt;.+)_summary.bed" format="bed" directory="output"/>
187 </collection>
188 <collection name="suspicious" type="list" label="${tool.name} on ${on_string}: Suspicious regions">
189 <discover_datasets pattern="(?P&lt;designation&gt;.+)_suspicious.bed" format="bed" directory="output"/>
190 </collection>
191 <collection name="summary" type="list" label="${tool.name} on ${on_string}: Summary">
192 <discover_datasets pattern="(?P&lt;designation&gt;.+)_summary.tsv" format="tabular" directory="output"/>
193 </collection>
194 <data name="comparison_plot" format="pdf" from_work_dir="output/comparison_plots.pdf" label="${tool.name} on ${on_string}: Comparison plot" />
195 </outputs>
196 <tests>
197 <test>
198 <repeat name="assembly">
199 <param name="assembly_fasta" value="c_rudii_reference.fna"/>
200 <param name="assembly_names" value="reference"/>
201 </repeat>
202 <repeat name="assembly">
203 <param name="assembly_fasta" value="c_rudii_dup.fna"/>
204 <param name="assembly_names" value="duplication"/>
205 </repeat>
206 <repeat name="assembly">
207 <param name="assembly_fasta" value="c_rudii_relocation.fna"/>
208 <param name="assembly_names" value="relocation"/>
209 </repeat>
210 <repeat name="assembly">
211 <param name="assembly_fasta" value="c_rudii_reloc_dup.fna"/>
212 <param name="assembly_names" value="reloc-dup"/>
213 </repeat>
214 <conditional name="input_reads">
215 <param name="type" value="paired"/>
216 <conditional name="paired_input_reads">
217 <param name="type" value="--fastq"/>
218 <repeat name="paired_reads">
219 <param name="mate_1" value="lib1.1.fastq" />
220 <param name="mate_2" value="lib1.2.fastq" />
221 <param name="minins" value="0"/>
222 <param name="maxins" value="500" />
223 </repeat>
224 </conditional>
225 <param name="orientation" value="fr" />
226 </conditional>
227 <param name="window_size" value="501"/>
228 <param name="max_alignments" value="10000"/>
229 <param name="min_coverage" value="0" />
230 <param name="coverage_multiplier" value="0"/>
231 <param name="min_suspicious" value="2" />
232 <param name="suspicious_flank_size" value="2000" />
233 <param name="min_contig_length" value="1000"/>
234 <param name="ignore_ends" value="0"/>
235 <param name="breakpoint_bin" value="50" />
236 <param name="kmer" value="15" />
237 <param name="coverage_file" value="carsonella_asm.cvg" />
238 <output_collection name="flagged" type="list">
239 <element name="reference" ftype="bed" file="flagged_reference.bed"/>
240 <element name="duplication" ftype="bed" file="flagged_duplication.bed"/>
241 <element name="relocation" ftype="bed" file="flagged_relocation.bed"/>
242 <element name="reloc-dup" ftype="bed" file="flagged_reloc-dup.bed"/>
243 </output_collection>
244 <output_collection name="suspicious" type="list">
245 <element name="reference" ftype="bed" file="suspicious_reference.bed"/>
246 <element name="duplication" ftype="bed" file="suspicious_duplication.bed"/>
247 <element name="relocation" ftype="bed" file="suspicious_relocation.bed"/>
248 <element name="reloc-dup" ftype="bed" file="suspicious_reloc-dup.bed"/>
249 </output_collection>
250 <output_collection name="summary" type="list">
251 <element name="reference" ftype="tabular" file="summary_reference.tabular"/>
252 <element name="duplication" ftype="tabular" file="summary_duplication.tabular"/>
253 <element name="relocation" ftype="tabular" file="summary_relocation.tabular"/>
254 <element name="reloc-dup" ftype="tabular" file="summary_reloc-dup.tabular"/>
255 </output_collection>
256 <output name="comparison_plot" file="test1_comparison_plot.pdf" compare="sim_size"/>
257 </test>
258 <test>
259 <repeat name="assembly">
260 <param name="assembly_fasta" value="c_rudii_dup.fna"/>
261 <param name="assembly_names" value="duplication"/>
262 </repeat>
263 <conditional name="input_reads">
264 <param name="type" value="paired_collection"/>
265 <conditional name="paired_coll_input_reads">
266 <param name="type" value="--fastq"/>
267 <repeat name="paired_collection_reads">
268 <param name="input">
269 <collection type="paired">
270 <element name="forward" value="lib1.1.fastq" ftype="fastq" />
271 <element name="reverse" value="lib1.2.fastq" ftype="fastq" />
272 </collection>
273 </param>
274 <param name="minins" value="0"/>
275 <param name="maxins" value="500" />
276 </repeat>
277 </conditional>
278 <param name="orientation" value="fr" />
279 </conditional>
280 <param name="window_size" value="501"/>
281 <param name="max_alignments" value="10000"/>
282 <param name="min_coverage" value="0" />
283 <param name="coverage_multiplier" value="0"/>
284 <param name="min_suspicious" value="2" />
285 <param name="suspicious_flank_size" value="2000" />
286 <param name="min_contig_length" value="1000"/>
287 <param name="ignore_ends" value="0"/>
288 <param name="breakpoint_bin" value="50" />
289 <param name="kmer" value="15" />
290 <output_collection name="flagged" type="list">
291 <element name="duplication" ftype="bed" file="flagged_duplication.bed"/>
292 </output_collection>
293 <output_collection name="suspicious" type="list">
294 <element name="duplication" ftype="bed" file="suspicious_duplication.bed"/>
295 </output_collection>
296 <output_collection name="summary" type="list">
297 <element name="duplication" ftype="tabular" file="summary_duplication.tabular"/>
298 </output_collection>
299 <output name="comparison_plot" file="test2_comparison_plot.pdf" compare="sim_size"/>
300 </test>
301 </tests>
302 <help><![CDATA[
303 **What it does**
304
305 VALET is a de novo pipeline for detecting all types of mis-assemblies in metagenomic data sets.
306
307 Its primarily adapts the approaches developed in the context of isolate genomes. To avoid false positives and false
308 negatives because of uneven depth of coverage, VALET bins contig by coverage before applying these methods.
309
310 Possible break points in the assembly are found by examining regions, where a large number of parts of the reads are
311 unable to align. To identify break points, VALET uses the first and last third of each unaligned read, called sister
312 reads. The sister reads are aligned independently to the reference genome, and then regions where the sister reads
313 align to nonadjacent segments of the genome are flagged as mis-assemblies.
314
315 For more details about the tool, please check: https://github.com/marbl/VALET
316 ]]></help>
317 <citations/>
318 </tool>