Mercurial > repos > iuc > ragtag
comparison ragtag.xml @ 0:a04e64efa43a draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ragtag commit 4c4b2a548b4ce46da88810992459b3ac8581d035"
author | iuc |
---|---|
date | Wed, 10 Nov 2021 23:33:13 +0000 |
parents | |
children | d110a4141898 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a04e64efa43a |
---|---|
1 <tool id='ragtag' name='RagTag' version='@TOOL_VERSION@+galaxy@VERSION_SUFFIX@' profile='20.01'> | |
2 <description>reference-guided scaffolding of draft genomes</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro='xrefs' /> | |
7 <expand macro='requirements' /> | |
8 <command detect_errors='exit_code'><![CDATA[ | |
9 #if $mode_conditional.mode_option != 'merge' | |
10 #if $mode_conditional.advanced_options.mapping_conditional.mapping_option == 'nucmer' | |
11 #set $nucmer_params = '%s -l %s -c %s' % ($mode_conditional.advanced_options.mapping_conditional.anchor_mode, | |
12 $mode_conditional.advanced_options.mapping_conditional.l, | |
13 $mode_conditional.advanced_options.mapping_conditional.c) | |
14 #end if | |
15 #end if | |
16 #if $mode_conditional.mode_option == 'merge' | |
17 #set $input_files = list() | |
18 mkdir merge_files && | |
19 #for $i, $j in enumerate($mode_conditional.scaffold_files) | |
20 #set $out_file = './merge_files/scaffold_%s.agp' % $i | |
21 ln -s '${j}' $out_file && | |
22 $input_files.append($out_file) | |
23 #end for | |
24 #set $merge_files = " ".join($input_files) | |
25 #end if | |
26 ragtag.py $mode_conditional.mode_option -u | |
27 #if $mode_conditional.mode_option == 'correct' | |
28 @INPUTS@ | |
29 @COMMON_PARAMETERS@ | |
30 #if $mode_conditional.validation_conditional.validation_option == 'true' | |
31 -R '${mode_conditional.validation_conditional.R}' | |
32 -T $mode_conditional.validation_conditional.read_type | |
33 -v $mode_conditional.validation_conditional.v | |
34 #if $mode_conditional.validation_conditional.max_cov | |
35 --max-cov $mode_conditional.validation_conditional.max_cov | |
36 #end if | |
37 #if $mode_conditional.validation_conditional.min_cov | |
38 --min-cov $mode_conditional.validation_conditional.min_cov | |
39 #end if | |
40 #end if | |
41 -b $mode_conditional.advanced_options.b | |
42 #if $mode_conditional.advanced_options.missasembly_break | |
43 $mode_conditional.advanced_options.missasembly_break | |
44 #end if | |
45 #if $mode_conditional.advanced_options.gff | |
46 --gff '${mode_conditional.advanced_options.gff}' | |
47 #end if | |
48 --read-aligner 'minimap2' ## it is the only allowed | |
49 #else if $mode_conditional.mode_option == 'scaffold' | |
50 @INPUTS@ | |
51 @COMMON_PARAMETERS@ | |
52 -i $mode_conditional.advanced_options.i | |
53 -a $mode_conditional.advanced_options.a | |
54 -s $mode_conditional.advanced_options.s | |
55 #if $mode_conditional.advanced_options.gap_conditional.gap_option == 'true' | |
56 -r | |
57 -g '${mode_conditional.advanced_options.gap_conditional.g}' | |
58 -m '${mode_conditional.advanced_options.gap_conditional.m}' | |
59 #end if | |
60 #if $mode_conditional.advanced_options.unplaced_conditional.unplaced_option == 'true' | |
61 -C | |
62 #if $mode_conditional.advanced_options.unplaced_conditional.J | |
63 -J '${mode_conditional.advanced_options.unplaced_conditional.J}' | |
64 #end if | |
65 #end if | |
66 #else if $mode_conditional.mode_option == 'patch' | |
67 @INPUTS@ | |
68 @COMMON_PARAMETERS@ | |
69 -s $mode_conditional.advanced_options.s | |
70 -i $mode_conditional.advanced_options.i | |
71 #if $mode_conditional.advanced_options.patching_mode | |
72 $mode_conditional.advanced_options.patching_mode | |
73 #end if | |
74 #else | |
75 $assembly_fasta | |
76 #if $mode_conditional.scaffold_files | |
77 $merge_files | |
78 #end if | |
79 #if $mode_conditional.merging_options.j | |
80 -j $mode_conditional.merging_options.j | |
81 #end if | |
82 -l $mode_conditional.merging_options.l | |
83 -e $mode_conditional.merging_options.e | |
84 --gap-func $mode_conditional.merging_options.function_merging | |
85 #if $mode_conditional.hic_options.b | |
86 -b $mode_conditional.hic_options.b | |
87 -r $mode_conditional.hic_options.r | |
88 -p $mode_conditional.hic_options.p | |
89 #end if | |
90 #end if | |
91 -o ./ | |
92 #if $mode_conditional.mode_option != 'merge' | |
93 -t \${GALAXY_SLOTS:-2} | |
94 #end if | |
95 #if $mode_conditional.mode_option == 'patch' | |
96 && mv ragtag.patch.asm.paf.log ragtag.patch.log | |
97 #end if | |
98 ]]> </command> | |
99 <inputs> | |
100 <conditional name="mode_conditional"> | |
101 <param name="mode_option" type="select" label="Operation mode"> | |
102 <option value="correct">Correct: homology-based missasembly correction</option> | |
103 <option value="scaffold">Scaffold: homology-based assebly scaffolding</option> | |
104 <option value="patch">Patch: homology-based assembly patching</option> | |
105 <option value="merge">Merge: scaffolding merging</option> | |
106 </param> | |
107 <when value="correct"> | |
108 <expand macro="input_options"/> | |
109 <conditional name="validation_conditional"> | |
110 <param name="validation_option" type="select" label="Use validation reads"> | |
111 <option value="true">Enabled</option> | |
112 <option value="false" selected="true">Disabled</option> | |
113 </param> | |
114 <when value="true"> | |
115 <param argument="-R" type="data" format="fastq,fastqsanger" label="Validation reads" | |
116 help="Without validation, the module will break at any point of reference discordance as defined by the 'correction options'. | |
117 With validation, RagTag maps reads to the query assembly and verifies putative break points if they are near regions of | |
118 exceptionally low or high coverage. The reads used for validation should come from the same genotype as the query | |
119 assembly to ensure that coverage abnormalities don't arise from true biological variation" /> | |
120 <param name="read_type" type="select" label="Read type"> | |
121 <option value="sr">Illumina</option> | |
122 <option value="ont">Nanopore</option> | |
123 <option value="corr">Error corrected long-reads</option> | |
124 </param> | |
125 <param argument="-v" type="integer" min="0" value="10000" label="Coverage validation window size" | |
126 help="This parameter specifies the window around the putative misassembly break point that RagTag examines | |
127 for exceptionally low or high read coverage. The larger this window size, the more likely | |
128 it is to find an unrelated coverage abnormality"/> | |
129 <param argument="--max-cov" type="integer" min="0" value="" optional="true" label="Break sequences at regions at or above this coverage level"/> | |
130 <param argument="--min-cov" type="integer" min="0" value="" optional="true" label="Break sequences at regions at or below this coverage level"/> | |
131 </when> | |
132 <when value="false"/> | |
133 </conditional> | |
134 <section name="advanced_options" title="Advanced options"> | |
135 <expand macro="common_parameters"/> | |
136 <param argument="-b" type="integer" min="0" value="5000" label="Minimum break distance from contig ends" | |
137 help="Breaks will not be made within -b bp of query sequence termini"/> | |
138 <param name="missasembly_break" type="select" optional="true" label="Break misassebly option" | |
139 help="One can also direct RagTag to only break misassemblies between (--inter, query maps to >1 reference sequence) or within | |
140 (--intra, query maps discordantly to 1 reference sequence) reference sequences"> | |
141 <option value="--inter">Only break misassemblies between reference sequences (--inter)</option> | |
142 <option value="--intra">Only break missasemblies within reference sequences (--intra)</option> | |
143 </param> | |
144 <param argument="--gff" type="data" format="gff" optional="true" label="Don't break sequences within GFF intervals" | |
145 help=" If one has annotations associated with the query assembly, provide them with the --gff option to ensure that the query assembly | |
146 is never broken within annotation intervals. "/> | |
147 </section> | |
148 <param name="output_correct" type="select" multiple="true" label="Output files"> | |
149 <option value="fasta" selected="true">The corrected query assembly in FASTA format</option> | |
150 <option value="agp" selected="true">The AGP file defining the exact coordinates of query sequence breaks</option> | |
151 <option value="paf">The description of the approximate mapping positions between two set of sequences in PAF format</option> | |
152 <option value="log">Log file</option> | |
153 </param> | |
154 </when> | |
155 <when value="scaffold"> | |
156 <expand macro="input_options"/> | |
157 <section name="advanced_options" title="advanced options"> | |
158 <expand macro="common_parameters"/> | |
159 <param argument="-i" type="float" min="0" max="1" value="0.2" label="Minimum grouping confidence score" | |
160 help="The grouping confidence score is the number of base pairs a contig covered in its assigned reference chromosome | |
161 divided by the total number of covered base pairs in the entire reference genome"/> | |
162 <param argument="-a" type="float" min="0" max="1" value="0" label="Minimum location confidence score" | |
163 help="To create a metric associated with contig ordering confidence, Ragtag define a location confidence. First, the smallest | |
164 and largest alignment positions, with respect to the reference, between a contig and its assigned reference chromosome are found. | |
165 The location confidence is then calculated as the number of covered base pairs in this range divided by the total number of | |
166 base pairs in the range"/> | |
167 <param argument="-s" type="float" min="0" max="1" value="0" label="Minimum orientation confidence score" | |
168 help="To calculate the orientation confidence, each base pair in each alignment between a contig and its assigned reference chromosome | |
169 casts a vote for the orientation of its alignment. The orientation confidence is the number of votes for the assigned orientation of | |
170 the contig divided by the total number of votes"/> | |
171 <conditional name="gap_conditional"> | |
172 <param name="gap_option" type="select" label="Infer gap sizes" help="When disabled, all gaps are 100 bp (-r)"> | |
173 <option value="true" selected="true">Enabled</option> | |
174 <option value="false">Disabled</option> | |
175 </param> | |
176 <when value="true"> | |
177 <param argument="-g" type="integer" min="0" value="100" label="Minimum infered gap size" /> | |
178 <param argument="-m" type="integer" min="0" value="100000" label="Maximum inferred gap size"/> | |
179 </when> | |
180 <when value="false"/> | |
181 </conditional> | |
182 <conditional name="unplaced_conditional"> | |
183 <param name="unplaced_option" type="select" label="Concatenate unplaced contigs and make 'chr0' (-C)"> | |
184 <option value="true">Enabled</option> | |
185 <option value="false" selected="true">Disabled</option> | |
186 </param> | |
187 <when value="true"> | |
188 <param argument="-J" type="data" format="txt" optional="true" label="List of query headers to leave unplaceds and exclude form 'chr0'"/> | |
189 </when> | |
190 <when value="false"/> | |
191 </conditional> | |
192 </section> | |
193 <param name="output_scaffold" type="select" multiple="true" label="Output files"> | |
194 <option value="fasta" selected="true">The scaffolds in FASTA format, defined by the ordering and orientations of the sequences containted in the AGP file</option> | |
195 <option value="agp" selected="true">The ordering and orientations of query sequences in AGP format</option> | |
196 <option value="paf">The description of the approximate mapping positions between two set of sequences in PAF format</option> | |
197 <option value="confidence">Confidence score values</option> | |
198 <option value="stats">Summary statistics for the scaffolding process</option> | |
199 <option value="log">Log file</option> | |
200 </param> | |
201 </when> | |
202 <when value="patch"> | |
203 <expand macro="input_options"/> | |
204 <section name="advanced_options" title="advanced options"> | |
205 <expand macro="common_parameters"/> | |
206 <param argument="-s" type="integer" min="0" value="50000" label="Minimum merged alignment length" | |
207 help="After merging, alignments less than -s bp long will be removed"/> | |
208 <param argument="-i" type="float" min="0" max="1" value="0.05" label="Maximum merged alignment distance" | |
209 help="Maximum merged alignment distance from sequence terminus as fraction of the sequence length. Alignments must | |
210 be within -i bp of a target sequence terminus or gap to be considered for patchin "/> | |
211 <param name="patching_mode" type="select" optional="true" label="Patching mode"> | |
212 <option value="--fill-only">Only fill existing target gaps. Do not join target sequences</option> | |
213 <option value="--join_only">Only join and patch target sequences. DO not fill existing gaps</option> | |
214 </param> | |
215 </section> | |
216 <param name="output_patch" type="select" multiple="true" label="Output files"> | |
217 <option value="final_fasta" selected="true">The final FASTA file containing the patched assembly</option> | |
218 <option value="final_agp" selected="true">The final AGP file defining how final FASTA is built</option> | |
219 <option value="assembly_file" selected="true">Assembly alignment files</option> | |
220 <option value="split_assembly">The split target assembly and the renamed query assembly combined into one FASTA file</option> | |
221 <option value="split_description">An AGP file defining how the target assembly was split at gaps</option> | |
222 <option value="target_gaps">The target assembly split at gaps</option> | |
223 <option value="agp_renamed">An AGP file defining the new names for query sequences</option> | |
224 <option value="fasta_renamed">A FASTA file with the original query sequence, but with new names</option> | |
225 <option value="log">Log file</option> | |
226 </param> | |
227 </when> | |
228 <when value="merge"> | |
229 <param name="assembly_fasta" type="data" format="fasta" label="Assembly FASTA file"/> | |
230 <param name="scaffold_files" type="data" format="agp" multiple="true" optional="true" label="Scaffold AGP files"/> | |
231 <section name="merging_options" title="Merging options"> | |
232 <param argument="-j" type="data" format="txt" optional="true" label="List of query headers to leave unplaced"/> | |
233 <param argument="-l" type="integer" min="0" value="100000" label="Minimum assembly sequence length" | |
234 help="Assembly sequences shorter than -l will also be left unplaced."/> | |
235 <param argument="-e" type="float" min="0" value="0" label="Minimum edge weight" | |
236 help="The edges in the merging graph represent scaffolding adjacencies. If an AGP file supports a particular adjacency, | |
237 its weight is added to the edge weight. Any edges with a weight lower than the minimum edge weigth will be removed from the graph"/> | |
238 <param name="function_merging" type="select" label="Function for merging gap lengths" | |
239 help="Scaffold gaps can differ between input AGP files. For example, a Hi-C derived AGP file might place 100 bp gaps between sequences | |
240 while a reference-guided AGP file might infer gap sizes based on a reference genome. Use this parameter to specify how gap sizes | |
241 should be computed from the supporting AGP files (--gap-func)"> | |
242 <option value="min" selected="true">Min</option> | |
243 <option value="max">Max</option> | |
244 <option value="mean">Mean</option> | |
245 </param> | |
246 </section> | |
247 <section name="hic_options" title="HI-C options"> | |
248 <param argument="-b" type="data" format="bam" optional="true" label="Hi-C alignments" help="Sorted by read name"/> | |
249 <param argument="-r" type="text" value="" optional="true" label="Restriction enzymes/sites or 'DNase'" help="List of restrction enzimes/sites or 'DNase', separated by comma. E.g. GATC,GACC"> | |
250 <sanitizer invalid_char=""> | |
251 <valid initial="string.letters,string.digits"> | |
252 <add value="," /> | |
253 <add value="[" /> | |
254 <add value="]" /> | |
255 </valid> | |
256 </sanitizer> | |
257 <validator type="regex">[0-9a-zA-Z,\]\[]+</validator> | |
258 </param> | |
259 <param argument="-p" type="float" min="0" max="1" value="1" optional="true" label="Portion of the sequence termini to consider for links"/> | |
260 </section> | |
261 </when> | |
262 </conditional> | |
263 </inputs> | |
264 <outputs> | |
265 <!--Correct mode outputs--> | |
266 <data format="paf" name="correct_paf" from_work_dir="ragtag.correct.asm.paf" label="${tool.name} on ${on_string}: PAF"> | |
267 <filter>mode_conditional["mode_option"] == "correct" and "paf" in mode_conditional["output_correct"]</filter> | |
268 </data> | |
269 <data format="agp" name="correct_agp" from_work_dir="ragtag.correct.agp" label="${tool.name} on ${on_string}: AGP"> | |
270 <filter>mode_conditional["mode_option"] == "correct" and "agp" in mode_conditional["output_correct"]</filter> | |
271 </data> | |
272 <data format="fasta" name="correct_fasta" from_work_dir="ragtag.correct.fasta" label="${tool.name} on ${on_string}: FASTA"> | |
273 <filter>mode_conditional["mode_option"] == "correct" and "fasta" in mode_conditional["output_correct"]</filter> | |
274 </data> | |
275 <data format="txt" name="correct_log" from_work_dir="ragtag.correct.asm.paf.log" label="${tool.name} on ${on_string}: log"> | |
276 <filter>mode_conditional["mode_option"] == "correct" and "log" in mode_conditional["output_correct"]</filter> | |
277 </data> | |
278 <!--Scaffold mode outputs--> | |
279 <data format="paf" name="scaffold_paf" from_work_dir="ragtag.scaffold.asm.paf" label="${tool.name} on ${on_string}: PAF"> | |
280 <filter>mode_conditional["mode_option"] == "scaffold" and "paf" in mode_conditional["output_scaffold"]</filter> | |
281 </data> | |
282 <data format="agp" name="scaffold_agp" from_work_dir="ragtag.scaffold.agp" label="${tool.name} on ${on_string}: AGP"> | |
283 <filter>mode_conditional["mode_option"] == "scaffold" and "agp" in mode_conditional["output_scaffold"]</filter> | |
284 </data> | |
285 <data format="fasta" name="scaffold_fasta" from_work_dir="ragtag.scaffold.fasta" label="${tool.name} on ${on_string}: FASTA"> | |
286 <filter>mode_conditional["mode_option"] == "scaffold" and "fasta" in mode_conditional["output_scaffold"]</filter> | |
287 </data> | |
288 <data format="txt" name="scaffold_log" from_work_dir="ragtag.scaffold.asm.paf.log" label="${tool.name} on ${on_string}: log"> | |
289 <filter>mode_conditional["mode_option"] == "scaffold" and "log" in mode_conditional["output_scaffold"]</filter> | |
290 </data> | |
291 <data format="tabular" name="scaffold_stats" from_work_dir="ragtag.scaffold.stats" label="${tool.name} on ${on_string}: stats"> | |
292 <filter>mode_conditional["mode_option"] == "scaffold" and "stats" in mode_conditional["output_scaffold"]</filter> | |
293 </data> | |
294 <data format="tabular" name="scaffold_confidence" from_work_dir="ragtag.scaffold.confidence.txt" label="${tool.name} on ${on_string}: confidence"> | |
295 <filter>mode_conditional["mode_option"] == "scaffold" and "confidence" in mode_conditional["output_scaffold"]</filter> | |
296 </data> | |
297 <!--Patch mode outputs--> | |
298 <data format="agp" name="patch_agp" from_work_dir="ragtag.patch.agp" label="${tool.name} on ${on_string}: final AGP"> | |
299 <filter>mode_conditional["mode_option"] == "patch" and "final_agp" in mode_conditional["output_patch"]</filter> | |
300 </data> | |
301 <data format="paf" name="patch_paf" from_work_dir="ragtag.patch.asm.paf" label="${tool.name} on ${on_string}: final PAF"> | |
302 <filter>mode_conditional["mode_option"] == "patch" and "assembly_file" in mode_conditional["output_patch"]</filter> | |
303 </data> | |
304 <data format="txt" name="patch_log" from_work_dir="ragtag.patch.log" label="${tool.name} on ${on_string}: log"> | |
305 <filter>mode_conditional["mode_option"] == "patch" and "log" in mode_conditional["output_patch"]</filter> | |
306 </data> | |
307 <data format="fasta" name="patch_comps_fasta" from_work_dir="ragtag.patch.comps.fasta" label="${tool.name} on ${on_string}: components FASTA"> | |
308 <filter>mode_conditional["mode_option"] == "patch" and "split_assembly" in mode_conditional["output_patch"]</filter> | |
309 </data> | |
310 <data format="agp" name="patch_ctg_agp" from_work_dir="ragtag.patch.ctg.agp" label="${tool.name} on ${on_string}: contigs AGP"> | |
311 <filter>mode_conditional["mode_option"] == "patch" and "split_description" in mode_conditional["output_patch"]</filter> | |
312 </data> | |
313 <data format="fasta" name="patch_ctg_fasta" from_work_dir="ragtag.patch.ctg.fasta" label="${tool.name} on ${on_string}: contigs FASTA"> | |
314 <filter>mode_conditional["mode_option"] == "patch" and "target_gaps" in mode_conditional["output_patch"]</filter> | |
315 </data> | |
316 <data format="fasta" name="patch_fasta" from_work_dir="ragtag.patch.fasta" label="${tool.name} on ${on_string}: final FASTA"> | |
317 <filter>mode_conditional["mode_option"] == "patch" and "final_fasta" in mode_conditional["output_patch"]</filter> | |
318 </data> | |
319 <data format="agp" name="patch_rename_agp" from_work_dir="ragtag.patch.rename.agp" label="${tool.name} on ${on_string}: renamed AGP"> | |
320 <filter>mode_conditional["mode_option"] == "patch" and "agp_renamed" in mode_conditional["output_patch"]</filter> | |
321 </data> | |
322 <data format="fasta" name="patch_rename_fasta" from_work_dir="ragtag.patch.rename.fasta" label="${tool.name} on ${on_string}: renamed FASTA"> | |
323 <filter>mode_conditional["mode_option"] == "patch" and "fasta_renamed" in mode_conditional["output_patch"]</filter> | |
324 </data> | |
325 <!-- Merge mode outputs--> | |
326 <data format="agp" name="merge_agp" from_work_dir="ragtag.merge.agp" label="${tool.name} on ${on_string}: merged AGP"> | |
327 <filter>mode_conditional["mode_option"] == "merge"</filter> | |
328 </data> | |
329 <data format="fasta" name="merge_fasta" from_work_dir="ragtag.merge.fasta" label="${tool.name} on ${on_string}: merged FASTA"> | |
330 <filter>mode_conditional["mode_option"] == "merge"</filter> | |
331 </data> | |
332 </outputs> | |
333 <tests> | |
334 <test expect_num_outputs="4"> | |
335 <!--Test 01 correct mode minimap2--> | |
336 <conditional name="mode_conditional"> | |
337 <param name="mode_option" value="correct"/> | |
338 <param name="reference" value="genome.fna"/> | |
339 <param name="query" value="contigs.fna"/> | |
340 <param name="output_correct" value="fasta,agp,paf,log"/> | |
341 <section name="advanced_options"> | |
342 <param name="e" value="reference_headers_skip.txt"/> | |
343 <param name="j" value="query_headers_skip.txt"/> | |
344 <param name="f" value="1000"/> | |
345 <conditional name="mapping_conditional"> | |
346 <param name="mapping_option" value="minimap2"/> | |
347 <param name="mm2_params" value="asm5"/> | |
348 </conditional> | |
349 <param name="remove_small" value="false"/> | |
350 <param name="q" value="10"/> | |
351 <param name="d" value="100000"/> | |
352 <param name="b" value="5000"/> | |
353 <param name="missasembly_break" value="--inter"/> | |
354 <param name="gff" value="annotation.gff"/> | |
355 </section> | |
356 </conditional> | |
357 <output name="correct_paf" file="correct_paf_01.paf" ftype="paf"/> | |
358 <output name="correct_agp" file="correct_agp_01.agp" ftype="agp"/> | |
359 <output name="correct_fasta" file="correct_fasta_01.fasta" ftype="fasta"/> | |
360 <output name="correct_log" file="correct_log_01.txt" ftype="txt" lines_diff="20"/> | |
361 </test> | |
362 <!--Test 02 correct mode nucmer--> | |
363 <test expect_num_outputs="2"> | |
364 <conditional name="mode_conditional"> | |
365 <param name="mode_option" value="correct"/> | |
366 <param name="reference" value="genome.fna"/> | |
367 <param name="query" value="contigs.fna"/> | |
368 <param name="output_correct" value="fasta,agp"/> | |
369 <section name="advanced_options"> | |
370 <param name="f" value="1000"/> | |
371 <conditional name="mapping_conditional"> | |
372 <param name="mapping_option" value="nucmer"/> | |
373 </conditional> | |
374 <param name="remove_small" value="true"/> | |
375 <param name="q" value="10"/> | |
376 <param name="d" value="100000"/> | |
377 <param name="b" value="5000"/> | |
378 <param name="missasembly_break" value="--inter"/> | |
379 </section> | |
380 </conditional> | |
381 <output name="correct_fasta" file="correct_fasta_02.fasta" ftype="fasta"/> | |
382 <output name="correct_agp" file="correct_agp_02.agp" ftype="agp"/> | |
383 | |
384 </test> | |
385 <!--Test 03 scaffold mode--> | |
386 <test expect_num_outputs="6"> | |
387 <conditional name="mode_conditional"> | |
388 <param name="mode_option" value="scaffold"/> | |
389 <param name="reference" value="genome.fna"/> | |
390 <param name="query" value="contigs.fna"/> | |
391 <param name="output_scaffold" value="fasta,agp,paf,confidence,log,stats"/> | |
392 <section name="advanced_options"> | |
393 <param name="f" value="1000"/> | |
394 <param name="remove_small" value="true"/> | |
395 <param name="q" value="10"/> | |
396 <param name="d" value="100000"/> | |
397 <param name="i" value="0.2"/> | |
398 <param name="a" value="0"/> | |
399 <param name="s" value="0"/> | |
400 </section> | |
401 </conditional> | |
402 <output name="scaffold_paf" file="scaffold_paf_03.paf" ftype="paf"/> | |
403 <output name="scaffold_agp" file="scaffold_apg.03.agp" ftype="agp"/> | |
404 <output name="scaffold_fasta" file="scaffold_fasta_03.fasta" ftype="fasta"/> | |
405 <output name="scaffold_log" file="scaffold_log_03.txt" ftype="txt" lines_diff="20"/> | |
406 <output name="scaffold_stats" file="scaffold_stats_03.tabular" ftype="tabular"/> | |
407 <output name="scaffold_confidence" file="scaffold_confidence_03.tabular" ftype="tabular"/> | |
408 </test> | |
409 <!--Test 04 patch mode--> | |
410 <test expect_num_outputs="9"> | |
411 <conditional name="mode_conditional"> | |
412 <param name="mode_option" value="patch"/> | |
413 <param name="reference" value="genome.fna"/> | |
414 <param name="query" value="contigs.fna"/> | |
415 <param name="output_patch" value="final_fasta,final_agp,assembly_file,split_assembly,split_description,target_gaps,agp_renamed,fasta_renamed,log"/> | |
416 <section name="advanced_options"> | |
417 <param name="s" value="50000"/> | |
418 <param name="i" value="0.05"/> | |
419 </section> | |
420 </conditional> | |
421 <output name="patch_agp" file="patch_agp_04.agp" ftype="agp"/> | |
422 <output name="patch_paf" file="patch_paf_04.paf" ftype="paf"/> | |
423 <output name="patch_log" file="patch_log_04.txt" ftype="txt" lines_diff="20"/> | |
424 <output name="patch_comps_fasta" ftype="fasta"> | |
425 <assert_contents> | |
426 <has_size value="603691" delta="100" /> | |
427 </assert_contents> | |
428 </output> | |
429 <output name="patch_ctg_fasta" file="patch_ctg_fasta_04.fasta" ftype="fasta"/> | |
430 <output name="patch_ctg_agp" file="patch_ctg_fasta_04.agp" ftype="agp"/> | |
431 <output name="patch_fasta" file="patch_fasta_04.fasta" ftype="fasta"/> | |
432 <output name="patch_rename_agp" file="patch_rename_agp.agp" ftype="agp"/> | |
433 <output name="patch_rename_fasta" file="patch_rename_fasta.fasta" ftype="fasta"/> | |
434 </test> | |
435 <test expect_num_outputs="2"> | |
436 <!-- Test 05 merge mode--> | |
437 <conditional name="mode_conditional"> | |
438 <param name="mode_option" value="merge"/> | |
439 <param name="assembly_fasta" value="correct_fasta_01.fasta"/> | |
440 <param name="scaffold_files" value="correct_agp_01.agp,correct_agp_02.agp"/> | |
441 <section name="merging_options"> | |
442 <param name="l" value="100000"/> | |
443 <param name="e" value="0"/> | |
444 <param name="function_merging" value="min"/> | |
445 </section> | |
446 </conditional> | |
447 <output name="merge_agp" file="merge_agp_05.agp" ftype="agp"/> | |
448 <output name="merge_fasta" file="merge_fasta_05.fasta" ftype="fasta"/> | |
449 </test> | |
450 </tests> | |
451 <help><![CDATA[ | |
452 .. class:: infomark | |
453 | |
454 **Purpose** | |
455 | |
456 RagTag is a collection of software tools for scaffolding and improving modern genome assemblies. Tasks include: | |
457 | |
458 - Homology-based misassembly correction | |
459 - Homology-based assembly scaffolding and patching | |
460 - Scaffold merging | |
461 | |
462 ---- | |
463 | |
464 .. class:: infomark | |
465 | |
466 **Correct mode** | |
467 | |
468 RagTag offers a correction module that uses a reference genome to identify and correct potential misassemblies in a query assembly. | |
469 RagTag also provides the option to verify putative misassemblies by aligning reads (from the same genotype) to the query assembly | |
470 and observing read coverage near misassembly break points. In all cases, sequence is never added or subtracted. Query sequences | |
471 are only broken at points of putative misassembly. | |
472 | |
473 *Misassemblies vs true variation* | |
474 | |
475 Reference-guided misassembly signatures are sometimes caused by true biological structural variation if the reference and query assemblies | |
476 represent distinct genotypes (or haplotypes). The read validation feature should help to avoid some of these misassembly false positives, | |
477 and the validation sensitivity can be tuned with command line parameters. However, it is ultimately up to the discretion of the user to decide | |
478 if misassembly correction is appropriate. One should validate all RagTag results with independent data (usually physical, optical, or genetic | |
479 maps), when possible. | |
480 | |
481 ---- | |
482 | |
483 .. class:: infomark | |
484 | |
485 **Scaffold mode** | |
486 | |
487 Scaffolding is the process of ordering and orienting draft assembly (query) sequences into longer sequences. Gaps (stretches of "N" characters) | |
488 are placed between adjacent query sequences to indicate the presence of unknown sequence. RagTag uses whole-genome alignments to a reference | |
489 assembly to scaffold query sequences. RagTag does not alter input query sequence in any way and only orders and orients sequences, joining them with gaps. | |
490 | |
491 ---- | |
492 | |
493 .. class:: infomark | |
494 | |
495 **Patch mode** | |
496 | |
497 This mode uses one genome assembly to *patch* another genome assembly. We define two types of patches: | |
498 | |
499 - Fills are patches that fill assembly gaps. This process is like traditional gap-filling, though it uses an assembly instead of WGS sequencing reads. | |
500 - Joins are patches that join distinct contigs. This is essentially scaffolding and gap-filling in a single step. | |
501 | |
502 ---- | |
503 | |
504 .. class:: infomark | |
505 | |
506 **Merge mode** | |
507 | |
508 Draft genome assemblies are often scaffolded multiple times using different approaches. For example, one might scaffold an assembly using different genome | |
509 maps (physical, linkage, Hi-C, etc.), different methods, or different method parameters. RagTag merge is a tool to merge and reconcile different scaffoldings | |
510 of the same assembly. In this way, one can leverage the advantages of multiple techniques to synergistically improve scaffolding. | |
511 | |
512 Most tools write scaffolding results in the AGP file format, which encodes adjacency and gap information in a plain text file. To run RagTag merge, | |
513 one must supply the assembly in FASTA format and at least two AGP files that define a scaffolding of the assembly. Each AGP file can optionally be | |
514 assigned a weight, allowing users to assign the relative influence of each AGP on the final result. | |
515 | |
516 If available, users can supply Hi-C alignments to the draft assembly to resolve conflicts in the merging graph. In this scenario, the input AGP | |
517 files are used to build the initial graph, but then Hi-C alignments are used to re-weight the graph before computing the scaffolding solution. | |
518 | |
519 | |
520 **List of accepted restriction enzymes** | |
521 | |
522 List of all accepted restriction enzymes and their restriction sites: | |
523 | |
524 - HindIII: AAGCTT | |
525 - Sau3AI: GATC | |
526 - MboI: GATC | |
527 - DpnII: GATC | |
528 - HinfI: GA[ATCG]TC | |
529 - DdeI: CT[ATCG]AG | |
530 - MseI: TTAA | |
531 | |
532 For RagTag, use a comma separated list of enzymes or sites (or a mix). For example: | |
533 | |
534 - Arima Hi-C v1.0: *Sau3AI,HinfI* or *GATC,GA[ATCG]TC* | |
535 - Arima Hi-C v2.0: *Sau3AI,HinfI,DdeI,MseI* or *GATC,GA[ATCG]TC,CT[ATCG]AG,TTAA* | |
536 | |
537 Note that for restriction sites, wildcards are represented with python regex syntax, not IUPAC ambiguity codes. e.g. '[ATCG]' instead of 'N'. | |
538 | |
539 Restriction enzymes are not necessarily the enzyme used for sample prep. Each is only a enzyme that cuts at the corresponding restriction site. | |
540 | |
541 ]]> </help> | |
542 <expand macro="citations" /> | |
543 </tool> |