comparison hifiasm.xml @ 5:045c7c3d8e59 draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/hifiasm commit 1e4c6c2e48f5e95beed75bb76134cbf7fa55dc8a"
author bgruening
date Wed, 06 Oct 2021 20:11:11 +0000
parents 3f7be05a1597
children 5bec28269d95
comparison
equal deleted inserted replaced
4:3f7be05a1597 5:045c7c3d8e59
1 <tool id="hifiasm" name="Hifiasm" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"> 1 <tool id="hifiasm" name="Hifiasm" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
2 <description>haplotype-resolved de novo assembler for PacBio Hifi reads</description> 2 <description>haplotype-resolved de novo assembler for PacBio Hifi reads</description>
3 <macros> 3 <macros>
4 <token name="@TOOL_VERSION@">0.16.1</token> 4 <token name="@TOOL_VERSION@">0.16.1</token>
5 <token name="@VERSION_SUFFIX@">0</token> 5 <token name="@VERSION_SUFFIX@">1</token>
6 <token name="@FORMATS@">fasta,fasta.gz,fastq,fastq.gz</token> 6 <token name="@FORMATS@">fasta,fasta.gz,fastq,fastq.gz</token>
7 <xml name="reads"> 7 <xml name="reads">
8 <param name="reads" type="data" format="@FORMATS@" multiple="true" label="Input reads" /> 8 <param name="reads" type="data" format="@FORMATS@" multiple="true" label="Input reads" />
9 </xml> 9 </xml>
10 </macros> 10 </macros>
17 17
18 <![CDATA[ 18 <![CDATA[
19 #set $input_files = list() 19 #set $input_files = list()
20 #set $hap1_inputs = list() 20 #set $hap1_inputs = list()
21 #set $hap2_inputs = list() 21 #set $hap2_inputs = list()
22 #set $hic1_inputs = list()
23 #set $hic2_inputs = list()
22 #for idx, read in enumerate($mode.reads): 24 #for idx, read in enumerate($mode.reads):
23 #set $inputfile = 'input_%d.%s' % ($idx, $read.dataset.extension) 25 #set $inputfile = 'input_%d.%s' % ($idx, $read.dataset.extension)
24 ln -s '$read' $inputfile && 26 ln -s '$read' $inputfile &&
25 $input_files.append($inputfile) 27 $input_files.append($inputfile)
26 #end for 28 #end for
27 #set $input_filenames = ' '.join($input_files) 29 #set $input_filenames = ' '.join($input_files)
30 #if str($hic_partition.hic_partition_selector) == 'set'
31 mkdir HiCF HiCR &&
32 #for idx, read in enumerate($hic_partition.h1):
33 #set $inputfile = './HiCF/input_%d.%s' % ($idx, $read.dataset.extension)
34 ln -s '$read' $inputfile &&
35 $hic1_inputs.append($inputfile)
36 #end for
37 #for idx, read in enumerate($hic_partition.h2):
38 #set $inputfile = './HiCR/input_%d.%s' % ($idx, $read.dataset.extension)
39 ln -s '$read' $inputfile &&
40 $hic2_inputs.append($inputfile)
41 #end for
42 #end if
28 #if str($mode.mode_selector) == 'trio': 43 #if str($mode.mode_selector) == 'trio':
29 #for idx, read in enumerate($mode.hap1_reads): 44 #for idx, read in enumerate($mode.hap1_reads):
30 #set $inputfile = 'hap1_input_%d.%s' % ($idx, $read.dataset.extension) 45 #set $inputfile = 'hap1_input_%d.%s' % ($idx, $read.dataset.extension)
31 ln -s '$read' $inputfile && 46 ln -s '$read' $inputfile &&
32 $hap1_inputs.append($inputfile) 47 $hap1_inputs.append($inputfile)
91 --n-hap $purge_options.n_hap 106 --n-hap $purge_options.n_hap
92 #end if 107 #end if
93 #end if 108 #end if
94 109
95 #if str($hic_partition.hic_partition_selector) == 'set': 110 #if str($hic_partition.hic_partition_selector) == 'set':
96 --h1 '${ ' '.join(["%s" % $x for $x in $hic_partition.h1]) }' 111 --h1 '${ ' '.join(["%s" % $x for $x in $hic1_inputs]) }'
97 --h2 '${ ' '.join(["%s" % $x for $x in $hic_partition.h2]) }' 112 --h2 '${ ' '.join(["%s" % $x for $x in $hic2_inputs]) }'
98 #if $hic_partition.seed: 113 #if $hic_partition.seed:
99 --seed $hic_partition.seed 114 --seed $hic_partition.seed
100 #end if 115 #end if
101 #if $hic_partition.n_weight: 116 #if $hic_partition.n_weight:
102 --n-weight $hic_partition.n_weight 117 --n-weight $hic_partition.n_weight
134 <param name="max_kmers" argument="-c" type="integer" value="2" label="Lower bound of the binned k-mer's frequency" /> 149 <param name="max_kmers" argument="-c" type="integer" value="2" label="Lower bound of the binned k-mer's frequency" />
135 <param name="min_kmers" argument="-d" type="integer" value="5" label="Upper bound of the binned k-mer's frequency" /> 150 <param name="min_kmers" argument="-d" type="integer" value="5" label="Upper bound of the binned k-mer's frequency" />
136 <param name="yak_kmer_length" type="integer" min="0" max="64" value="31" label="Yak counter k-mer length" /> 151 <param name="yak_kmer_length" type="integer" min="0" max="64" value="31" label="Yak counter k-mer length" />
137 </when> 152 </when>
138 </conditional> 153 </conditional>
139 <param name="filter_bits" argument="-f" type="integer" min="0" value="37" label="Bits for bloom filter" help="A value of 0 disables the bloom filter" /> 154 <param name="filter_bits" argument="-f" type="integer" min="0" value="37" label="Bits for bloom filter" help="A value of 0 disables the bloom filter" />
155 <conditional name="assembly_options">
156 <param name="assembly_selector" type="select" label="Assembly options">
157 <option value="blank">Leave default</option>
158 <option value="set">Specify</option>
159 </param>
160 <when value="blank" />
161 <when value="set">
162 <param name="cleaning_rounds" argument="-a" type="integer" value="4" label="Cleaning rounds" />
163 <param name="adapter_length" argument="-z" type="integer" min="0" value="0" label="Length of adapters to be removed" />
164 <param name="pop_contigs" argument="-m" type="integer" value="10000000" label="Minimum contig bubble size" help="Pop contig graph bubbles smaller than this value" />
165 <param name="pop_unitigs" argument="-p" type="integer" value="100000" label="Minimum unitig bubble size" help="Pop unitig graph bubbles smaller than this value" />
166 <param name="remove_tips" argument="-n" type="integer" value="3" label="Tip unitigs" help="Keep only tip unitigs with a number of reads greater than or equal to this value" />
167 <param name="max_overlap" argument="-x" type="float" min="0" max="1" value="0.8" label="Maximum overlap drop ratio" help="This option is used with -r. Given a node N in the assembly graph, let max(N) be the length of the largest overlap of N. Hifiasm iteratively drops overlaps of N if their length/max(N) are below a threshold controlled by -x. Hifiasm applies -r rounds of short overlap removal with an increasing threshold between -x and -y"/>
168 <param name="min_overlap" argument="-y" type="float" min="0" max="1" value="0.2" label="Minimum overlap drop ratio" help="This option is used with -r. Given a node N in the assembly graph, let max(N) be the length of the largest overlap of N. Hifiasm iteratively drops overlaps of N if their length/max(N) are over a threshold controlled by -y. Hifiasm applies -r rounds of short overlap removal with an increasing threshold between -x and -y"/>
169 <param name="disable_post_join" argument="-u" type="boolean" truevalue="-u" falsevalue="" label="Skip post join contigs step" help="May improve N50" />
170 <param name="ignore_error_corrected" argument="-i" type="boolean" truevalue="-i" falsevalue="" value="False" label="Ignore error corrected reads and overlaps" help="Ignore error corrected reads and overlaps saved in prefix.*.bin files. Apart from assembly graphs, hifiasm also outputs three binary files that save alloverlap information during assembly step. With these files, hifiasm can avoid the time-consuming all-to-all overlap calculation step, and do the assembly directly and quickly. This might be helpful when users want to get an optimized assembly by multiple rounds of experiments with different parameters." />
171 <param argument="--hom-cov" type="integer" optional="True" value="" label="Homozygous read coverage" />
172 </when>
173 </conditional>
174 <conditional name="purge_options">
175 <param name="purge_selector" type="select" label="Options for purging duplicates">
176 <option value="blank">Leave default</option>
177 <option value="set">Specify</option>
178 </param>
179 <when value="blank" />
180 <when value="set">
181 <param name="purge_level" argument="-l" type="select" label="Purge level">
182 <option value="0" selected="true">None (0)</option>
183 <option value="1">Light (1)</option>
184 <option value="2">Aggressive (2)</option>
185 <option value="3">Aggressive - high heterozygosity rate (3)</option>
186 </param>
187 <param name="similarity_threshold" argument="-s" type="float" min="0" max="1" value="0.75" label="Similarity threshold for duplicate haplotigs" />
188 <param name="minimum_overlap" argument="-O" type="integer" value="1" label="Minimum overlapped reads for duplicate haplotigs" />
189 <param argument="--purge-max" type="integer" optional="true" label="Coverage upper bound" help="If not set, this will be determined automatically" />
190 <param argument="--n-hap" type="integer" min="0" value="" optional="true" label="Assumtion of haplotype number" help="A haplotype is defined as the combination of alleles for different polymorphisms that occur on the same chromosome." />
191 </when>
192 </conditional>
193 <conditional name="hic_partition">
194 <param name="hic_partition_selector" type="select" label="Options for Hi-C-partition">
195 <option value="blank">Leave default</option>
196 <option value="set">Specify</option>
197 </param>
198 <when value="blank" />
199 <when value="set">
200 <param argument="--h1" type="data" format="fastq,fastq.gz" multiple="true" label="Hi-C R1 reads" />
201 <param argument="--h2" type="data" format="fastq,fastq.gz" multiple="true" label="Hi-C R2 reads" />
202 <param argument="--seed" type="integer" min="1" value="" optional="true" label="RNG seed" />
203 <param argument="--n-weight" type="integer" min="1" value="" optional="true" label="Rounds of reweighting Hi-C links. Increasing this may improves phasing results but takes longer time" />
204 <param argument="--n-perturb" type="integer" min="1" value="" optional="true" label="Rounds of perturbation. Increasing this may improves phasing results but takes longer time" />
205 <param argument="--f-perturb" type="float" min="0" max="1" value="" optional="true" label="Fraction to flip for perturbation. Increasing this may improves phasing results but takes longer time" />
206 <param argument="--l-msjoin" type="integer" min="0" value="500000" label="Detect misjoined unitigs of greater than or equal to specified size" help="A value of 0 disables this filter"/>
207 </when>
208 </conditional>
140 <conditional name="advanced_options"> 209 <conditional name="advanced_options">
141 <param name="advanced_selector" type="select" label="Advanced options"> 210 <param name="advanced_selector" type="select" label="Advanced options">
142 <option value="blank">Leave default</option> 211 <option value="blank">Leave default</option>
143 <option value="set">Specify</option> 212 <option value="set">Specify</option>
144 </param> 213 </param>
164 </sanitizer> 233 </sanitizer>
165 <validator type="regex">[0-9kKmMGg]+</validator> 234 <validator type="regex">[0-9kKmMGg]+</validator>
166 </param> 235 </param>
167 </when> 236 </when>
168 </conditional> 237 </conditional>
169
170 <conditional name="assembly_options">
171 <param name="assembly_selector" type="select" label="Assembly options">
172 <option value="blank">Leave default</option>
173 <option value="set">Specify</option>
174 </param>
175 <when value="blank" />
176 <when value="set">
177 <param name="cleaning_rounds" argument="-a" type="integer" value="4" label="Cleaning rounds" />
178 <param name="adapter_length" argument="-z" type="integer" min="0" value="0" label="Length of adapters to be removed" />
179 <param name="pop_contigs" argument="-m" type="integer" value="10000000" label="Minimum contig bubble size" help="Pop contig graph bubbles smaller than this value" />
180 <param name="pop_unitigs" argument="-p" type="integer" value="100000" label="Minimum unitig bubble size" help="Pop unitig graph bubbles smaller than this value" />
181 <param name="remove_tips" argument="-n" type="integer" value="3" label="Tip unitigs" help="Keep only tip unitigs with a number of reads greater than or equal to this value" />
182 <param name="max_overlap" argument="-x" type="float" min="0" max="1" value="0.8" label="Maximum overlap drop ratio" help="This option is used with -r. Given a node N in the assembly graph, let max(N) be the length of the largest overlap of N. Hifiasm iteratively drops overlaps of N if their length/max(N) are below a threshold controlled by -x. Hifiasm applies -r rounds of short overlap removal with an increasing threshold between -x and -y"/>
183 <param name="min_overlap" argument="-y" type="float" min="0" max="1" value="0.2" label="Minimum overlap drop ratio" help="This option is used with -r. Given a node N in the assembly graph, let max(N) be the length of the largest overlap of N. Hifiasm iteratively drops overlaps of N if their length/max(N) are over a threshold controlled by -y. Hifiasm applies -r rounds of short overlap removal with an increasing threshold between -x and -y"/>
184 <param name="disable_post_join" argument="-u" type="boolean" truevalue="-u" falsevalue="" label="Skip post join contigs step" help="May improve N50" />
185 <param name="ignore_error_corrected" argument="-i" type="boolean" truevalue="-i" falsevalue="" value="False" label="Ignore error corrected reads and overlaps" help="Ignore error corrected reads and overlaps saved in prefix.*.bin files. Apart from assembly graphs, hifiasm also outputs three binary files that save alloverlap information during assembly step. With these files, hifiasm can avoid the time-consuming all-to-all overlap calculation step, and do the assembly directly and quickly. This might be helpful when users want to get an optimized assembly by multiple rounds of experiments with different parameters." />
186 <param argument="--hom-cov" type="integer" optional="True" value="" label="Homozygous read coverage" />
187 </when>
188 </conditional>
189
190 <conditional name="purge_options">
191 <param name="purge_selector" type="select" label="Options for purging duplicates">
192 <option value="blank">Leave default</option>
193 <option value="set">Specify</option>
194 </param>
195 <when value="blank" />
196 <when value="set">
197 <param name="purge_level" argument="-l" type="select" label="Purge level">
198 <option value="0" selected="true">None (0)</option>
199 <option value="1">Light (1)</option>
200 <option value="2">Aggressive (2)</option>
201 <option value="3">Aggressive - high heterozygosity rate (3)</option>
202 </param>
203 <param name="similarity_threshold" argument="-s" type="float" min="0" max="1" value="0.75" label="Similarity threshold for duplicate haplotigs" />
204 <param name="minimum_overlap" argument="-O" type="integer" value="1" label="Minimum overlapped reads for duplicate haplotigs" />
205 <param argument="--purge-max" type="integer" optional="true" label="Coverage upper bound" help="If not set, this will be determined automatically" />
206 <param argument="--n-hap" type="integer" min="0" value="" optional="true" label="Assumtion of haplotype number" help="A haplotype is defined as the combination of alleles for different polymorphisms that occur on the same chromosome." />
207 </when>
208 </conditional>
209
210 <conditional name="hic_partition">
211 <param name="hic_partition_selector" type="select" label="Options for Hi-C-partition">
212 <option value="blank">Leave default</option>
213 <option value="set">Specify</option>
214 </param>
215 <when value="blank" />
216 <when value="set">
217 <param argument="--h1" type="data" format="fastq,fastq.gz" multiple="true" label="Hi-C R1 reads" />
218 <param argument="--h2" type="data" format="fastq,fastq.gz" multiple="true" label="Hi-C R2 reads" />
219 <param argument="--seed" type="integer" min="1" value="" optional="true" label="RNG seed" />
220 <param argument="--n-weight" type="integer" min="1" value="" optional="true" label="Rounds of reweighting Hi-C links" />
221 <param argument="--n-perturb" type="integer" min="1" value="" optional="true" label="Rounds of perturbation" />
222 <param argument="--f-perturb" type="float" min="0" max="1" value="" optional="true" label="Fraction to flip for perturbation" />
223 <param argument="--l-msjoin" type="integer" min="0" value="500000" label="Detect misjoined unitigs of greater than or equal to specified size" help="A value of 0 disables this filter"/>
224 </when>
225 </conditional>
226 <param name="log_out" type="boolean" label="Output log file?" truevalue="yes" falsevalue="no"/> 238 <param name="log_out" type="boolean" label="Output log file?" truevalue="yes" falsevalue="no"/>
227 </inputs> 239 </inputs>
228 <outputs> 240 <outputs>
229 <data name="raw_unitigs" format="gfa1" from_work_dir="output.r_utg.gfa" label="${tool.name} on ${on_string}, haplotype-resolved raw unitig graph"> 241 <!--Standard mode-->
230 <filter>mode['mode_selector'] == 'standard'</filter> 242 <data name="raw_unitigs" format="gfa1" from_work_dir="output.r_utg.gfa" label="${tool.name} on ${on_string}: haplotype-resolved raw unitig graph">
231 </data> 243 <filter>mode['mode_selector'] == 'standard' and hic_partition['hic_partition_selector'] == 'blank'</filter>
232 <data name="raw_unitigs_trio" format="gfa1" from_work_dir="output.dip.r_utg.gfa" label="${tool.name} on ${on_string}, haplotype-resolved raw unitig graph"> 244 </data>
233 <filter>mode['mode_selector'] == 'trio'</filter> 245 <data name="processed_unitigs" format="gfa1" from_work_dir="output.p_utg.gfa" label="${tool.name} on ${on_string}: processed unitig graph">
234 </data> 246 <filter>mode['mode_selector'] == 'standard' and hic_partition['hic_partition_selector'] == 'blank'</filter>
235 <data name="processed_unitigs" format="gfa1" from_work_dir="output.p_utg.gfa" label="${tool.name} on ${on_string}, processed unitig graph"> 247 </data>
236 <filter>mode['mode_selector'] == 'standard'</filter> 248 <data name="primary_contig_graph" format="gfa1" from_work_dir="output.p_ctg.gfa" label="${tool.name} on ${on_string}: primary assembly contig graph">
237 </data> 249 <filter>mode['mode_selector'] == 'standard' and hic_partition['hic_partition_selector'] == 'blank'</filter>
238 <data name="primary_contig_graph" format="gfa1" from_work_dir="output.p_ctg.gfa" label="${tool.name} on ${on_string}, primary assembly contig graph"> 250 </data>
239 <filter>mode['mode_selector'] == 'standard'</filter> 251 <data name="alternate_contig_graph" format="gfa1" from_work_dir="output.a_ctg.gfa" label="${tool.name} on ${on_string}: alternate assembly contig graph">
240 </data> 252 <filter>mode['mode_selector'] == 'standard' and hic_partition['hic_partition_selector'] == 'blank'</filter>
241 <data name="alternate_contig_graph" format="gfa1" from_work_dir="output.a_ctg.gfa" label="${tool.name} on ${on_string}, alternate assembly contig graph"> 253 </data>
242 <filter>mode['mode_selector'] == 'standard'</filter> 254 <!--Trio outputs without Hi-c reads-->
243 </data> 255 <data name="hap1_contigs" format="gfa1" from_work_dir="output.dip.hap1.p_ctg.gfa" label="${tool.name} on ${on_string}: hap1.p_ctg contig graph">
244 <data name="hap1_contigs" format="gfa1" from_work_dir="output.hap1.p_ctg.gfa" label="${tool.name} on ${on_string}, hap1.p_ctg contig graph"> 256 <filter>mode['mode_selector'] == 'trio' and hic_partition['hic_partition_selector'] == 'blank'</filter>
245 <filter>mode['mode_selector'] == 'trio'</filter> 257 </data>
246 </data> 258 <data name="hap2_contigs" format="gfa1" from_work_dir="output.dip.hap2.p_ctg.gfa" label="${tool.name} on ${on_string}: hap2.p_ctg contig graph">
247 <data name="hap2_contigs" format="gfa1" from_work_dir="output.hap2.p_ctg.gfa" label="${tool.name} on ${on_string}, hap2.p_ctg contig graph"> 259 <filter>mode['mode_selector'] == 'trio' and hic_partition['hic_partition_selector'] == 'blank'</filter>
248 <filter>mode['mode_selector'] == 'trio'</filter> 260 </data>
249 </data> 261 <data name="raw_unitigs_trio" format="gfa1" from_work_dir="output.dip.r_utg.gfa" label="${tool.name} on ${on_string}: haplotype-resolved raw unitig graph">
250 <!-- Hi-C partition outputs --> 262 <filter>mode['mode_selector'] == 'trio' and hic_partition['hic_partition_selector'] == 'blank'</filter>
251 <data name="hic_contig_graph" format="gfa1" from_work_dir="output.hic.p_ctg.gfa" label="${tool.name} ${on_string}, HI-C contig graph"> 263 </data>
252 <filter>hic_partition['hic_partition_selector'] == 'set'</filter> 264 <data name="processed_unitigs_trio" format="gfa1" from_work_dir="output.dip.p_utg.gfa" label="${tool.name} on ${on_string}: haplotype-resolved processed unitig graph">
253 </data> 265 <filter>mode['mode_selector'] == 'trio' and hic_partition['hic_partition_selector'] == 'blank'</filter>
254 <data name="hic_balanced_contig_hap1_graph" format="gfa1" from_work_dir="output.bp.hap1.p_ctg.gfa" label="${tool.name} ${on_string}, HI-C hap1 balanced contig graph hap1"> 266 </data>
255 <filter>hic_partition['hic_partition_selector'] == 'set'</filter> 267 <!-- Stardand mode with Hi-C partition outputs -->
256 </data> 268 <data name="hic_pcontig_graph" format="gfa1" from_work_dir="output.hic.p_ctg.gfa" label="${tool.name} ${on_string}: Hi-C primary contig graph">
257 <data name="hic_balanced_contig_hap2_graph" format="gfa1" from_work_dir="output.bp.hap2.p_ctg.gfa" label="${tool.name} ${on_string}, HI-C hap2 balanced contig graph hap2"> 269 <filter>mode['mode_selector'] == 'standard' and hic_partition['hic_partition_selector'] == 'set'</filter>
258 <filter>hic_partition['hic_partition_selector'] == 'set'</filter> 270 </data>
259 </data> 271 <data name="hic_acontig_graph" format="gfa1" from_work_dir="output.hic.a_ctg.gfa" label="${tool.name} ${on_string}: Hi-C alternate contig graph">
260 <data name="log_file" format="txt" from_work_dir="output.log" label="${tool.name} ${on_string}, log file"> 272 <filter>mode['mode_selector'] == 'standard' and hic_partition['hic_partition_selector'] == 'set'</filter>
273 </data>
274 <data name="hic_balanced_contig_hap1_graph" format="gfa1" from_work_dir="output.bp.hap1.p_ctg.gfa" label="${tool.name} ${on_string}: Hi-C hap1 balanced contig graph hap1">
275 <filter>mode['mode_selector'] == 'standard' and hic_partition['hic_partition_selector'] == 'set'</filter>
276 </data>
277 <data name="hic_balanced_contig_hap2_graph" format="gfa1" from_work_dir="output.bp.hap2.p_ctg.gfa" label="${tool.name} ${on_string}: Hi-C hap2 balanced contig graph hap2">
278 <filter>mode['mode_selector'] == 'standard' and hic_partition['hic_partition_selector'] == 'set'</filter>
279 </data>
280 <!--Trio outputs with Hi-c reads-->
281 <data name="hap1_contigs_hic" format="gfa1" from_work_dir="output.hic.bench.r_utg.gfa" label="${tool.name} on ${on_string}: raw initig graph">
282 <filter>mode['mode_selector'] == 'trio' and hic_partition['hic_partition_selector'] == 'set'</filter>
283 </data>
284 <data name="hap2_contigs_hic" format="gfa1" from_work_dir="output.hic.bench.p_utg.gfa" label="${tool.name} on ${on_string}: processsed initig graph">
285 <filter>mode['mode_selector'] == 'trio' and hic_partition['hic_partition_selector'] == 'set'</filter>
286 </data>
287 <!--Log output-->
288 <data name="log_file" format="txt" from_work_dir="output.log" label="${tool.name} ${on_string}: log file">
261 <filter>log_out</filter> 289 <filter>log_out</filter>
262 </data> 290 </data>
263 </outputs> 291 </outputs>
264 <tests> 292 <tests>
265 <test> 293 <test expect_num_outputs="4">
266 <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" /> 294 <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" />
267 <param name="filter_bits" value="0" /> 295 <param name="filter_bits" value="0" />
268 <param name="mode_selector" value="standard" /> 296 <param name="mode_selector" value="standard" />
269 <output name="raw_unitigs" file="hifiasm-out1-raw.gfa" ftype="gfa1" /> 297 <output name="raw_unitigs" file="hifiasm-out1-raw.gfa" ftype="gfa1" />
270 <output name="processed_unitigs" file="hifiasm-out1-processed.gfa" ftype="gfa1" /> 298 <output name="processed_unitigs" file="hifiasm-out1-processed.gfa" ftype="gfa1" />
273 <assert_contents> 301 <assert_contents>
274 <has_size value="0"/> 302 <has_size value="0"/>
275 </assert_contents> 303 </assert_contents>
276 </output> 304 </output>
277 </test> 305 </test>
278 <test> 306 <test expect_num_outputs="4">
279 <param name="reads" value="hifiasm-in2-0.fa.gz,hifiasm-in2-1.fa.gz,hifiasm-in2-2.fa.gz,hifiasm-in2-3.fa.gz,hifiasm-in2-4.fa.gz" ftype="fasta.gz" /> 307 <param name="reads" value="hifiasm-in2-0.fa.gz,hifiasm-in2-1.fa.gz,hifiasm-in2-2.fa.gz,hifiasm-in2-3.fa.gz,hifiasm-in2-4.fa.gz" ftype="fasta.gz" />
280 <param name="filter_bits" value="0" /> 308 <param name="filter_bits" value="0" />
281 <param name="mode_selector" value="standard" /> 309 <param name="mode_selector" value="standard" />
282 <output name="raw_unitigs" file="hifiasm-out2-raw.gfa" ftype="gfa1" /> 310 <output name="raw_unitigs" file="hifiasm-out2-raw.gfa" ftype="gfa1" />
283 <output name="processed_unitigs" file="hifiasm-out2-processed.gfa" ftype="gfa1" /> 311 <output name="processed_unitigs" file="hifiasm-out2-processed.gfa" ftype="gfa1" />
287 <has_size value="0"/> 315 <has_size value="0"/>
288 </assert_contents> 316 </assert_contents>
289 </output> 317 </output>
290 </test> 318 </test>
291 <!-- Test logfile out--> 319 <!-- Test logfile out-->
292 <test> 320 <test expect_num_outputs="5">
293 <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" /> 321 <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" />
294 <param name="filter_bits" value="0" /> 322 <param name="filter_bits" value="0" />
295 <param name="mode_selector" value="standard" /> 323 <param name="mode_selector" value="standard" />
296 <param name="log_out" value="yes"/> 324 <param name="log_out" value="yes"/>
297 <output name="raw_unitigs" file="hifiasm-out1-raw.gfa" ftype="gfa1" /> 325 <output name="raw_unitigs" file="hifiasm-out1-raw.gfa" ftype="gfa1" />
306 <assert_contents> 334 <assert_contents>
307 <has_line line="[M::main] CMD: hifiasm -t 1 -o output -f 0 --primary input_0.fasta.gz"/> 335 <has_line line="[M::main] CMD: hifiasm -t 1 -o output -f 0 --primary input_0.fasta.gz"/>
308 </assert_contents> 336 </assert_contents>
309 </output> 337 </output>
310 </test> 338 </test>
311 339 <!--Test Hi-C reads-->
312 <!-- Test Hi-C 340 <test expect_num_outputs="4">
313 <test>
314 <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" /> 341 <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" />
315 <param name="filter_bits" value="0" /> 342 <param name="filter_bits" value="0" />
316 <param name="mode_selector" value="standard" /> 343 <param name="mode_selector" value="standard" />
317 <conditional name="hic_partition"> 344 <conditional name="hic_partition">
318 <param name="hic_partition_selector" value="set"/> 345 <param name="hic_partition_selector" value="set"/>
319 <param name="h1" value="r1_1.fq"/> 346 <param name="h1" value="hic_1.fastq.gz"/>
320 <param name="h2" value="r2_1.fq"/> 347 <param name="h2" value="hic_2.fastq.gz"/>
321 </conditional> 348 <param name="n_weight" value="1"/>
322 <output name="raw_unitigs" file="hifiasm-out3-raw.gfa" ftype="gfa1" /> 349 <param name="n_perturb" value="1"/>
323 <output name="processed_unitigs" file="hifiasm-out3-processed.gfa" ftype="gfa1" /> 350 <param name="l_perturb" value="0"/>
324 <output name="primary_contig_graph" file="hifiasm-out3-primary.gfa" ftype="gfa1" /> 351 <param name="l_msjoin" value="0"/>
325 <output name="alternate_contig_graph" ftype="gfa1"> 352 </conditional>
326 <assert_contents> 353 <output name="hic_pcontig_graph" file="hifiasm-out-hifi-p.gfa" ftype="gfa1" />
327 <has_size value="0"/> 354 <output name="hic_acontig_graph" file="hifiasm-out-hifi-a.gfa" ftype="gfa1" />
328 </assert_contents> 355 <output name="hic_balanced_contig_hap1_graph" ftype="gfa1">
329 </output> 356 <assert_contents>
330 </test> 357 <has_size value="0"/>
331 --> 358 </assert_contents>
359 </output>
360 <output name="hic_balanced_contig_hap1_graph" ftype="gfa1" >
361 <assert_contents>
362 <has_size value="0"/>
363 </assert_contents>
364 </output>
365 </test>
332 <!-- Test trio mode --> 366 <!-- Test trio mode -->
333 <test> 367 <test expect_num_outputs="4">
334 <param name="filter_bits" value="0"/> 368 <param name="filter_bits" value="0"/>
335 <conditional name="mode"> 369 <conditional name="mode">
336 <param name="mode_selector" value="trio"/> 370 <param name="mode_selector" value="trio"/>
337 <param name="reads" value="child.fasta.gz"/> 371 <param name="reads" value="child.fasta.gz"/>
338 <param name="hap1_reads" value="paternal.fasta.gz"/> 372 <param name="hap1_reads" value="paternal.fasta.gz"/>
343 <output name="raw_unitigs_trio" ftype="gfa1"> 377 <output name="raw_unitigs_trio" ftype="gfa1">
344 <assert_contents> 378 <assert_contents>
345 <has_size value="0"/> 379 <has_size value="0"/>
346 </assert_contents> 380 </assert_contents>
347 </output> 381 </output>
382 <output name="processed_unitigs_trio" ftype="gfa1">
383 <assert_contents>
384 <has_size value="0"/>
385 </assert_contents>
386 </output>
348 <output name="hap1_contigs"> 387 <output name="hap1_contigs">
349 <assert_contents> 388 <assert_contents>
350 <has_size value="0"/> 389 <has_size value="0"/>
351 </assert_contents> 390 </assert_contents>
352 </output> 391 </output>
355 <has_size value="0"/> 394 <has_size value="0"/>
356 </assert_contents> 395 </assert_contents>
357 </output> 396 </output>
358 </test> 397 </test>
359 <!-- Test ignore-error-corrected option --> 398 <!-- Test ignore-error-corrected option -->
360 <test> 399 <test expect_num_outputs="4">
361 <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" /> 400 <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" />
362 <param name="filter_bits" value="0" /> 401 <param name="filter_bits" value="0" />
363 <param name="mode_selector" value="standard" /> 402 <param name="mode_selector" value="standard" />
364 <conditional name="assembly_options"> 403 <conditional name="assembly_options">
365 <param name="assembly_selector" value="set"/> 404 <param name="assembly_selector" value="set"/>
373 <has_size value="0"/> 412 <has_size value="0"/>
374 </assert_contents> 413 </assert_contents>
375 </output> 414 </output>
376 </test> 415 </test>
377 <!-- Test expected haplotype number --> 416 <!-- Test expected haplotype number -->
378 <test> 417 <test expect_num_outputs="4">
379 <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" /> 418 <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" />
380 <param name="filter_bits" value="0" /> 419 <param name="filter_bits" value="0" />
381 <param name="mode_selector" value="standard" /> 420 <param name="mode_selector" value="standard" />
382 <conditional name="purge_options"> 421 <conditional name="purge_options">
383 <param name="purge_selector" value="set"/> 422 <param name="purge_selector" value="set"/>
391 <has_size value="0"/> 430 <has_size value="0"/>
392 </assert_contents> 431 </assert_contents>
393 </output> 432 </output>
394 </test> 433 </test>
395 <!-- Test min_hist_cnt option --> 434 <!-- Test min_hist_cnt option -->
396 <test> 435 <test expect_num_outputs="4">
397 <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" /> 436 <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" />
398 <param name="filter_bits" value="0" /> 437 <param name="filter_bits" value="0" />
399 <param name="mode_selector" value="standard" /> 438 <param name="mode_selector" value="standard" />
400 <conditional name="advanced_options"> 439 <conditional name="advanced_options">
401 <param name="advanced_selector" value="set"/> 440 <param name="advanced_selector" value="set"/>
409 <has_size value="0"/> 448 <has_size value="0"/>
410 </assert_contents> 449 </assert_contents>
411 </output> 450 </output>
412 </test> 451 </test>
413 <!-- Test max_kooc option --> 452 <!-- Test max_kooc option -->
414 <test> 453 <test expect_num_outputs="4">
415 <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" /> 454 <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" />
416 <param name="filter_bits" value="0" /> 455 <param name="filter_bits" value="0" />
417 <param name="mode_selector" value="standard" /> 456 <param name="mode_selector" value="standard" />
418 <conditional name="advanced_options"> 457 <conditional name="advanced_options">
419 <param name="advanced_selector" value="set"/> 458 <param name="advanced_selector" value="set"/>
427 <has_size value="0"/> 466 <has_size value="0"/>
428 </assert_contents> 467 </assert_contents>
429 </output> 468 </output>
430 </test> 469 </test>
431 <!-- Test hg-size option --> 470 <!-- Test hg-size option -->
432 <test> 471 <test expect_num_outputs="4">
433 <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" /> 472 <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" />
434 <param name="filter_bits" value="0" /> 473 <param name="filter_bits" value="0" />
435 <param name="mode_selector" value="standard" /> 474 <param name="mode_selector" value="standard" />
436 <conditional name="advanced_options"> 475 <conditional name="advanced_options">
437 <param name="advanced_selector" value="set"/> 476 <param name="advanced_selector" value="set"/>
445 <has_size value="0"/> 484 <has_size value="0"/>
446 </assert_contents> 485 </assert_contents>
447 </output> 486 </output>
448 </test> 487 </test>
449 <!-- Test ignore-error-corrected option --> 488 <!-- Test ignore-error-corrected option -->
450 <test> 489 <test expect_num_outputs="4">
451 <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" /> 490 <param name="reads" value="hifiasm-in1.fa.gz" ftype="fasta.gz" />
452 <param name="filter_bits" value="0" /> 491 <param name="filter_bits" value="0" />
453 <param name="mode_selector" value="standard" /> 492 <param name="mode_selector" value="standard" />
454 <conditional name="assembly_options"> 493 <conditional name="assembly_options">
455 <param name="assembly_selector" value="set"/> 494 <param name="assembly_selector" value="set"/>
464 </assert_contents> 503 </assert_contents>
465 </output> 504 </output>
466 </test> 505 </test>
467 </tests> 506 </tests>
468 <help><![CDATA[ 507 <help><![CDATA[
469 *********************************** 508 .. class:: infomark
470 HiFiASM - a fast de novo assembler 509
471 *********************************** 510 **HiFiASM - a fast de novo assembler**
511
472 512
473 Hifiasm is a fast haplotype-resolved de novo assembler for PacBio Hifi reads. It can assemble a human genome in several hours and works with the California redwood genome, one of the most complex genomes sequenced so far. Hifiasm can produce primary/alternate assemblies of quality competitive with the best assemblers. It also introduces a new graph binning algorithm and achieves the best haplotype-resolved assembly given trio data. 513 Hifiasm is a fast haplotype-resolved de novo assembler for PacBio Hifi reads. It can assemble a human genome in several hours and works with the California redwood genome, one of the most complex genomes sequenced so far. Hifiasm can produce primary/alternate assemblies of quality competitive with the best assemblers. It also introduces a new graph binning algorithm and achieves the best haplotype-resolved assembly given trio data.
474 514
475 #### Assembly mode 515 ----
516
517 .. class:: infomark
518
519 **Assembly mode**
520
476 - *Standard* 521 - *Standard*
477 - *Trio* When parental short reads are available, hifiasm can generate a pair of haplotype-resolved assemblies with trio binning. 522 - *Trio* When parental short reads are available, hifiasm can generate a pair of haplotype-resolved assemblies with trio binning.
478 523
479 #### Trio Options 524 ----
480 - *Haplotype 1 reads* : list of hap1/paternal read names 525
481 - *Haplotype 2 reads* : list of hap2/maternal read names 526 .. class:: infomark
482 - *Lower bound of the binned k-mer's frequency* 527
483 - *Upper bound of the binned k-mer's frequency* 528 **Outputs**
484 529
485 *Bits for bloom filter* (-f) - A value of 0 disables the bloom filter for small genomes. For genomes much larger than human, applying -f 38 or even - f39 is preferred to save memory on k-mer counting. 530 Non Trio assembly:
486 531
487 #### Advanced options
488 - *Length of adapters to be removed* Old HiFi reads may contain short adapter sequences at the ends of reads. You can specify 20 to trim both ends of reads by 20bp.
489 - *K-mer length* (must be <64)
490 - *Minimizer window size*
491 - *Drop K-mers* K-mers that occur more than this value multiplied by the coverage will be discarded
492 - *Maximum overlaps to consider* consider up to max(-D*coverage,-N) overlaps for each oriented read
493 - *Correction rounds* round of correction
494
495 #### Assembly options
496 - *Cleaning rounds* round of assembly cleaning
497 - *Minimum contig bubble* size Pop contig graph bubbles smaller than this value
498 - *Minimum unitig bubble* size Pop unitig graph bubbles smaller than this value
499 - *Tip unitigs* Keep only tip unitigs with a number of reads greater than or equal to this value
500 - *Maximum overlap drop ratio*
501 - *Minimum overlap drop ratio*
502 - *Skip post join contigs step* disable post join contigs step which may improve N50
503
504 #### Options for purging duplicates
505 - *Purge level* 0: no purging; 1: light; 2: aggressive [0 for trio; 2 for unzip]
506 - *Similarity threshold for duplicate haplotigs*
507 - *Minimum overlapped reads for duplicate haplotigs*
508 - *Coverage upper bound* If not set, this will be determined automatically
509 - *Experimental high-heterozygosity mode* enable this mode for high heterozygosity sample NB: May be unstable
510
511 #### Hi-C-partition options
512
513 - *RNG seed*
514 - *Rounds of reweighting Hi-C links* : increasing this may improves phasing results but takes longer time.
515 - *Rounds of perturbation* : increasing this may improves phasing results but takes longer time.
516 - *Fraction to flip for perturbation* : increasing this may improves phasing results but takes longer time.
517
518 ### Outputs
519
520 Non Trio assembly
521 - Haplotype-resolved raw unitig graph in GFA format. This graph keeps all haplotype information, including somatic mutations and recurrent sequencing errors. 532 - Haplotype-resolved raw unitig graph in GFA format. This graph keeps all haplotype information, including somatic mutations and recurrent sequencing errors.
522 - Haplotype-resolved processed unitig graph without small bubbles : Small bubbles might be caused by somatic mutations or noise in data, which are not the real haplotype information. 533 - Haplotype-resolved processed unitig graph without small bubbles : Small bubbles might be caused by somatic mutations or noise in data, which are not the real haplotype information.
523 - Primary assembly contig graph : This graph collapses different haplotypes. 534 - Primary assembly contig graph : This graph collapses different haplotypes.
524 - Alternate assembly contig graph : This graph consists of all assemblies that are discarded in primary contig graph. 535 - Alternate assembly contig graph : This graph consists of all assemblies that are discarded in primary contig graph.
525 536
526 537
527 Trio assembly 538 Trio assembly:
539
528 - Haplotype-resolved raw unitig graph in GFA format . This graph keeps all haplotype information. 540 - Haplotype-resolved raw unitig graph in GFA format . This graph keeps all haplotype information.
529 - Phased paternal/haplotype1 contig graph. This graph keeps the phased paternal/haplotype1 assembly. 541 - Phased paternal/haplotype1 contig graph. This graph keeps the phased paternal/haplotype1 assembly.
530 - Phased maternal/haplotype2 contig graph. This graph keeps the phased maternal/haplotype2 assembly. 542 - Phased maternal/haplotype2 contig graph. This graph keeps the phased maternal/haplotype2 assembly.
531
532 543
533 544
534 ]]></help> 545 ]]></help>
535 <citations> 546 <citations>
536 <citation type="doi">10.1038/s41592-020-01056-5</citation> 547 <citation type="doi">10.1038/s41592-020-01056-5</citation>