comparison preprocess.xml @ 0:59bc96331073 draft default tip

planemo upload for repository https://github.com/geraldinepascal/FROGS-wrappers/tree/v3.1.0 commit 08296fc88e3e938c482c631bd515b3b7a0499647
author frogs
date Thu, 28 Feb 2019 10:14:49 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:59bc96331073
1 <?xml version="1.0"?>
2 <!--
3 # Copyright (C) 2015 INRA
4 #
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
9 #
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
17 -->
18 <tool id="FROGS_preprocess" name="FROGS Pre-process" version="3.1">
19 <description>merging, denoising and dereplication.</description>
20 <requirements>
21 <requirement type="package" version="3.1.0">frogs</requirement>
22 </requirements>
23 <stdio>
24 <exit_code range="1:" />
25 <exit_code range=":-1" />
26 </stdio>
27 <command>
28 preprocess.py $sequencer_type.sequencer_selected
29 --output-dereplicated $dereplicated_file --output-count $count_file --summary $summary_file
30 --nb-cpus \${GALAXY_SLOTS:-1}
31 --min-amplicon-size $sequencer_type.min_amplicon_size --max-amplicon-size $sequencer_type.max_amplicon_size
32
33 #if $sequencer_type.sequencer_selected == "illumina"
34 #if $sequencer_type.sequencing_protocol.sequencing_protocol_selected == "standard"
35 --five-prim-primer $sequencer_type.sequencing_protocol.five_prim_primer --three-prim-primer $sequencer_type.sequencing_protocol.three_prim_primer
36 #else
37 --without-primers
38 #end if
39 #else
40 --five-prim-primer $sequencer_type.five_prim_primer --three-prim-primer $sequencer_type.three_prim_primer
41 #end if
42
43 #if $sequencer_type.input_type.input_type_selected == "archive"
44 --input-archive $sequencer_type.input_type.archive_file
45 #if $sequencer_type.sequencer_selected == "illumina" and $sequencer_type.input_type.archive_type.archive_type_selected == "already_merged"
46 --already-contiged
47 #elif $sequencer_type.sequencer_selected == "illumina"
48 --R1-size $sequencer_type.input_type.archive_type.R1_size --R2-size $sequencer_type.input_type.archive_type.R2_size
49 --mismatch-rate $sequencer_type.input_type.archive_type.mm_rate
50 --merge-software $sequencer_type.input_type.archive_type.merge_software_type.merge_software_selected
51 #if $sequencer_type.input_type.archive_type.merge_software_type.merge_software_selected == "flash"
52 --expected-amplicon-size $sequencer_type.input_type.archive_type.merge_software_type.expected_amplicon_size
53 #end if
54 #if $sequencer_type.input_type.archive_type.keep_unmerged
55 --keep-unmerged
56 #end if
57 #end if
58 #else
59 #set $sep = ' '
60 #if $sequencer_type.sequencer_selected == "illumina"
61 --samples-names
62 #for $current in $sequencer_type.input_type.files_by_samples_type.samples
63 $sep"${current.name.strip()}"
64 #end for
65 --input-R1
66 #for $current in $sequencer_type.input_type.files_by_samples_type.samples
67 $sep${current.R1_file}
68 #end for
69 #if $sequencer_type.input_type.files_by_samples_type.files_by_samples_type_selected == "already_merged"
70 --already-contiged
71 #else
72 --input-R2
73 #for $current in $sequencer_type.input_type.files_by_samples_type.samples
74 $sep${current.R2_file}
75 #end for
76 --R1-size $sequencer_type.input_type.files_by_samples_type.R1_size --R2-size $sequencer_type.input_type.files_by_samples_type.R2_size
77 --mismatch-rate $sequencer_type.input_type.files_by_samples_type.mm_rate
78 --merge-software $sequencer_type.input_type.files_by_samples_type.merge_software_type.merge_software_selected
79 #if $sequencer_type.input_type.files_by_samples_type.merge_software_type.merge_software_selected == "flash"
80 --expected-amplicon-size $sequencer_type.input_type.files_by_samples_type.expected_amplicon_size
81 #end if
82 #if $sequencer_type.input_type.files_by_samples_type.keep_unmerged
83 --keep-unmerged
84 #end if
85 #end if
86 #else
87 --input-R1
88 #for $current in $sequencer_type.input_type.samples
89 $sep${current.R1_file}
90 #end for
91 --samples-names
92 #for $current in $sequencer_type.input_type.samples
93 $sep"${current.name.strip()}"
94 #end for
95 #end if
96 #end if
97 </command>
98 <inputs>
99 <conditional name="sequencer_type">
100 <param name="sequencer_selected" type="select" label="Sequencer" help="Select the sequencing technology used to produce the sequences.">
101 <option value="illumina" selected="true">Illumina</option>
102 <option value="454">454</option>
103 </param>
104 <when value="illumina">
105 <!-- Samples -->
106 <conditional name="input_type">
107 <param name="input_type_selected" type="select" label="Input type" help="Samples files can be provided in single archive or with two files (R1 and R2) by sample.">
108 <option value="files_by_samples" selected="true">Files by samples</option>
109 <option value="archive">Archive</option>
110 </param>
111 <when value="archive">
112 <param name="archive_file" type="data" format="tar" label="Archive file" help="The tar file containing the sequences file(s) for each sample." optional="false" />
113 <conditional name="archive_type">
114 <param name="archive_type_selected" type="select" label="Reads already merged ?" help="The archive contains 1 file by sample : R1 and R2 are already merged by pair.">
115 <option value="paired" selected="true">No</option>
116 <option value="already_merged">Yes</option>
117 </param>
118 <!-- $sequencer_type.input_type.archive_type.archive_type_selected == "already_merged" -->
119 <when value="paired">
120 <!-- Reads size -->
121 <param name="R1_size" type="integer" label="Reads 1 size" help="The maximum read1 size." value="" optional="false" />
122 <param name="R2_size" type="integer" label="Reads 2 size" help="The maximum read2 size." value="" optional="false" />
123 <param name="mm_rate" type="float" label="mismatch rate." help="The maximum rate of mismatch in the overlap region" value="0.1" optional="false" />
124
125 <conditional name="merge_software_type">
126 <param name="merge_software_selected" type="select" label="Merge software" help="Select the software to merge paired-end reads.">
127 <option value="vsearch" selected="true">Vsearch</option>
128 <option value="flash">Flash</option>
129 </param>
130 <when value="flash">
131 <param name="expected_amplicon_size" type="integer" label="Expected amplicon size" help="Maximum amplicon length expected in approximately 90% of the amplicons." value="" />
132 </when>
133 <when value="vsearch"></when>
134 </conditional>
135 <param name="keep_unmerged" type="boolean" label="Would you like to keep unmerged reads?" help="No : Unmerged reads will be excluded; Yes : unmerged reads will be artificially combined with 100 N. (default No)" />
136 </when>
137 <when value="already_merged"></when>
138 </conditional>
139 </when>
140 <when value="files_by_samples">
141 <conditional name="files_by_samples_type">
142 <param name="files_by_samples_type_selected" type="select" label="Reads already contiged ?" help="The inputs contain 1 file by sample : R1 and R2 are already merged by pair.">
143 <option value="paired" selected="true">No</option>
144 <option value="already_merged">Yes</option>
145 </param>
146 <when value="paired">
147 <!-- Samples -->
148 <repeat name="samples" title="Samples" min="1">
149 <param name="name" type="text" label="Name" help="The sample name." optional="false">
150 <validator type="empty_field" message="This parameter is required." />
151 </param>
152 <param format="fastq" name="R1_file" type="data" label="Reads 1" help="R1 FASTQ file of paired-end reads." />
153 <param format="fastq" name="R2_file" type="data" label="reads 2" help="R2 FASTQ file of paired-end reads." />
154 </repeat>
155 <!-- Reads size -->
156 <param name="R1_size" type="integer" label="Reads 1 size" help="The maximum read1 size." value="" optional="false" />
157 <param name="R2_size" type="integer" label="Reads 2 size" help="The maximum read2 size." value="" optional="false" />
158 <param name="mm_rate" type="float" label="mismatch rate." help="The maximum rate of mismatches in the overlap region" value="0.1" optional="false" />
159 <conditional name="merge_software_type">
160 <param name="merge_software_selected" type="select" label="Merge software" help="Select the software to merge paired-end reads.">
161 <option value="vsearch" selected="true">Vsearch</option>
162 <option value="flash">Flash</option>
163 </param>
164 <when value="flash">
165 <param name="expected_amplicon_size" type="integer" label="Expected amplicon size" help="Maximum amplicon length expected in approximately 90% of the amplicons." value="" />
166 </when>
167 <when value="vsearch"></when>
168 </conditional>
169 <param name="keep_unmerged" type="boolean" label="Would you like to keep unmerged reads?" help="No : Unmerged reads will be excluded; Yes : unmerged reads will be artificially combined with 100 N. (default No)" />
170 </when>
171 <when value="already_merged">
172 <repeat name="samples" title="Samples" min="1">
173 <param name="name" type="text" label="Name" help="The sample name." optional="false">
174 <validator type="empty_field" message="This parameter is required." />
175 </param>
176 <param format="fastq" name="R1_file" type="data" label="Sequence file" help="FASTQ file of merged reads." />
177 </repeat>
178 </when>
179 </conditional>
180 </when>
181 </conditional>
182 <!-- Amplicons -->
183 <param name="min_amplicon_size" type="integer" label="Minimum amplicon size" help="The minimum size for the amplicons (with primers)." value="" optional="false" />
184 <param name="max_amplicon_size" type="integer" label="Maximum amplicon size" help="The maximum size for the amplicons (with primers)." value="" optional="false" />
185 <!-- Primers -->
186 <conditional name="sequencing_protocol">
187 <param name="sequencing_protocol_selected" type="select" label="Sequencing protocol" help="The protocol used for sequencing step: standard or custom with PCR primers as sequencing primers.">
188 <option value="standard" selected="true">Illumina standard</option>
189 <option value="without_primers">Custom protocol (Kozich et al. 2013)</option>
190 </param>
191 <when value="standard">
192 <param name="five_prim_primer" type="text" size="20" label="5' primer" help="The 5' primer sequence (wildcards are accepted). The orientation is detailed below in 'Primers parameters'." optional="false">
193 <validator type="empty_field" message="This parameter is required." />
194 </param>
195 <param name="three_prim_primer" type="text" size="20" label="3' primer" help="The 3' primer sequence (wildcards are accepted). The orientation is detailed below in 'Primers parameters'." optional="false">
196 <validator type="empty_field" message="This parameter is required." />
197 </param>
198 </when>
199 <when value="without_primers"></when>
200 </conditional>
201 </when>
202
203 <when value="454">
204 <!-- Samples -->
205 <conditional name="input_type">
206 <param name="input_type_selected" type="select" label="Input type" help="Samples files can be provided in single archive or with one file by sample.">
207 <option value="files_by_samples" selected="true">One file by sample</option>
208 <option value="archive">Archive</option>
209 </param>
210 <when value="archive">
211 <param name="archive_file" type="data" format="tar" label="Archive file" help="The tar file containing the sequences file for each sample." optional="false" />
212 </when>
213 <when value="files_by_samples">
214 <repeat name="samples" title="Samples" min="1">
215 <param name="name" type="text" label="Name" help="The sample name." optional="false" />
216 <param format="fastq" name="R1_file" type="data" label="Sequence file" help="FASTQ file of sample." />
217 </repeat>
218 </when>
219 </conditional>
220 <!-- Amplicons -->
221 <param name="min_amplicon_size" type="integer" label="Minimum amplicon size" help="The minimum size for the amplicons (with primers)." value="" optional="false" />
222 <param name="max_amplicon_size" type="integer" label="Maximum amplicon size" help="The maximum size for the amplicons (with primers)." value="" optional="false" />
223 <!-- Primers -->
224 <param name="five_prim_primer" type="text" size="20" label="5' primer" help="The 5' primer sequence (wildcards are accepted). The orientation is detailed below in 'Primers parameters'." optional="false">
225 <validator type="empty_field" message="This parameter is required." />
226 </param>
227 <param name="three_prim_primer" type="text" size="20" label="3' primer" help="The 3' primer sequence (wildcards are accepted). The orientation is detailed below in 'Primers parameters'." optional="false">
228 <validator type="empty_field" message="This parameter is required." />
229 </param>
230 </when>
231 </conditional>
232 </inputs>
233 <outputs>
234 <data format="fasta" name="dereplicated_file" label="${tool.name}: dereplicated.fasta" from_work_dir="dereplicated.fasta" />
235 <data format="tabular" name="count_file" label="${tool.name}: count.tsv" from_work_dir="count.tsv" />
236 <data format="html" name="summary_file" label="${tool.name}: report.html" from_work_dir="report.html" />
237 </outputs>
238 <tests>
239 <test>
240 <conditional name="sequencer_type">
241 <param name="sequencer_selected" value="illumina"/>
242 <conditional name="input_type">
243 <param name="input_type_selected" value="archive"/>
244 <param name="archive_file" ftype="tar" value="input/test_dataset.tar.gz"/>
245 <conditional name="archive_type">
246 <param name="archive_type_selected" value="paired"/>
247 <param name="R1_size" value="267"/>
248 <param name="R2_size" value="266"/>
249 <param name="mm_rate" value="0.15"/>
250 <conditional name="merge_software_type">
251 <param name="merge_software_selected" value="flash" />
252 <param name="expected_amplicon_size" value="420"/>
253 </conditional>
254 <param name="keep_unmerged" value="true"/>
255 </conditional>
256 </conditional>
257 <param name="min_amplicon_size" value="44"/>
258 <param name="max_amplicon_size" value="490"/>
259 <conditional name="sequencing_protocol">
260 <param name="sequencing_protocol_selected" value="standard"/>
261 <param name="five_prim_primer" value="GGCGVACGGGTGAGTAA"/>
262 <param name="three_prim_primer" value="GTGCCAGCNGCNGCGG"/>
263 </conditional>
264 </conditional>
265 <output name="dereplicated_file" file="references/01-prepro-flash.fasta"/>
266 <output name="count_file" file="references/01-prepro-flash.tsv"/>
267 <output name="summary_file" file="references/01-prepro-flash.html" compare="sim_size" delta="0"/>
268 <!--output name="summary_file">
269 <assert_contents>
270 <has_text_matching expression="FROGS\sPre-process" />
271 <has_text_matching expression="splA_01" />
272 </assert_contents>
273 </output-->
274 </test>
275 <test>
276 <conditional name="sequencer_type">
277 <param name="sequencer_selected" value="illumina"/>
278 <conditional name="input_type">
279 <param name="input_type_selected" value="archive"/>
280 <param name="archive_file" ftype="tar" value="input/test_dataset.tar.gz"/>
281 <conditional name="archive_type">
282 <param name="archive_type_selected" value="paired"/>
283 <param name="R1_size" value="267"/>
284 <param name="R2_size" value="266"/>
285 <param name="mm_rate" value="0.15"/>
286 <conditional name="merge_software_type">
287 <param name="merge_software_selected" value="vsearch" />
288 </conditional>
289 <param name="keep_unmerged" value="true"/>
290 </conditional>
291 </conditional>
292 <param name="min_amplicon_size" value="44"/>
293 <param name="max_amplicon_size" value="490"/>
294 <conditional name="sequencing_protocol">
295 <param name="sequencing_protocol_selected" value="standard"/>
296 <param name="five_prim_primer" value="GGCGVACGGGTGAGTAA"/>
297 <param name="three_prim_primer" value="GTGCCAGCNGCNGCGG"/>
298 </conditional>
299 </conditional>
300 <output name="dereplicated_file" file="references/01-prepro-vsearch.fasta"/>
301 <output name="count_file" file="references/01-prepro-vsearch.tsv"/>
302 <output name="summary_file" file="references/01-prepro-vsearch.html" compare="sim_size" delta="0"/>
303 <!--output name="summary_file">
304 <assert_contents>
305 <has_text_matching expression="FROGS\sPre-process" />
306 <has_text_matching expression="splA_01" />
307 </assert_contents>
308 </output-->
309 </test>
310 </tests>
311 <help>
312
313 .. image:: static/images/frogs_images/FROGS_logo.png
314 :height: 144
315 :width: 110
316
317
318 .. class:: infomark page-header h2
319
320 What it does
321
322 FROGS Pre-process filters and dereplicates amplicons for use in diversity analysis.
323
324 .. class:: infomark page-header h2
325
326 Inputs/Outputs
327
328 .. class:: h3
329
330 Inputs
331
332 Sample files added one after another or provide in an archive file (tar.gz).
333
334 .. container:: row
335
336 .. container:: col-md-6
337
338 **Illumina inputs**
339
340 :Usage: For samples sequenced in paired-end. The amplicon length must be inferior to the length of the R1 plus R2 length. R1 and R2 are merged by the common region.
341 :Files: One R1 and R2 by sample (format `FASTQ &lt;https://en.wikipedia.org/wiki/FASTA_format&gt;`_)
342 :Example: splA_R1.fastq.gz, splA_R2.fastq.gz, splB_R1.fastq.gz, splB_R2.fastq.gz
343
344 OR
345
346 :Usage: For samples sequenced in single-ends or when R1 and R2 reads are already merged.
347 :Files: One sequence file by sample (format `FASTQ &lt;https://en.wikipedia.org/wiki/FASTA_format&gt;`_).
348 :Example: splA.fastq.gz, splB.fastq.gz
349
350 .. container:: col-md-6
351
352 **454 inputs**
353
354 :Files: One sequence file by sample (format `FASTQ &lt;https://en.wikipedia.org/wiki/FASTA_format&gt;`_)
355 :Example: splA.fastq.gz, splB.fastq.gz
356
357 Remark: In an archive if you use R1 and R2 files they names must end with *_R1* and *_R2*.
358
359 .. class:: h3
360
361 Outputs
362
363 **Sequence file** (dereplicated.fasta):
364
365 Only one file with all samples sequences (format `FASTA &lt;https://en.wikipedia.org/wiki/FASTA_format&gt;`_). These sequences are dereplicated: strictly identical sequence are represented only once and the initial count is kept in count file.
366
367 **Count file** (count.tsv):
368
369 This file contains the count of all unique sequences in each sample (format `TSV &lt;https://en.wikipedia.org/wiki/Tab-separated_values&gt;`_).
370
371 **Summary file** (report.html):
372
373 This file reports the number of remaining sequences after each filter (format `HTML &lt;https://en.wikipedia.org/wiki/HTML&gt;`_).
374
375 .. image:: static/images/frogs_images/FROGS_preprocess_summary_v3.png
376 :height: 850
377 :width: 831
378
379 It also presents the length distribution of the remaining full amplicon sequences.
380
381 .. image:: static/images/frogs_images/FROGS_preprocess_lengthsSamples_v3.png
382 :height: 379
383 :width: 364
384
385 .. class:: infomark page-header h2
386
387 How it works
388
389 .. csv-table::
390 :header: "Steps", "Illumina", "454"
391 :widths: 5, 150, 150
392 :class: table table-striped
393
394 "1", "For un-merged data: merges R1 and R2 with a maximum of M% mismatch in the overlaped region(`VSEARCH &lt;https://github.com/torognes/vsearch/&gt;`_ or `FLASH &lt;https://ccb.jhu.edu/software/FLASH/&gt;`_ or optionnaly `PEAR &lt;https://sco.h-its.org/exelixis/web/software/pear/&gt;`_). Resulting un-merged reads may optionnaly be artificially combined by adding 100 N between the reads", "/"
395 "2", "If sequencing protocol is the illumina standard protocol : Removes sequences where the two primers are not present and then remove primers in the remaining sequence (`cutadapt &lt;http://cutadapt.readthedocs.org/en/latest/guide.html&gt;`_). The primer search accepts 10% of differences", "Removes sequences where the two primers are not present, removes primers sequence and reverse complement the sequences on strand - (`cutadapt &lt;http://cutadapt.readthedocs.org/en/latest/guide.html&gt;`_). The primer search accepts 10% of differences"
396 "3", "Filters sequences with ambiguous nucleotides and for merged sequences filters on their length which must be range between 'Minimum amplicon size - primer length' and 'Maximum amplicon size - primer length'", "the tool removes sequences with at least one homopolymer with more than seven nucleotides and with a distance of less than or equal to 10 nucleo-tides between two poor quality positions, i.e. with a Phred quality score lesser than 10"
397 "4", "Dereplicates sequences", "Dereplicates sequences"
398
399
400 .. class:: infomark page-header h2
401
402 Advices/details on parameters
403
404 .. class:: h3
405
406 What is the differency between overlapped sequences and combined sequences?
407
408 - **Case of a sequencing of overlapping sequences: case of 16S V3-V4 amplicon MiSeq sequencing**
409
410 .. image:: static/images/frogs_images/FROGS_preprocess_overlapped_sequence.png
411 :height: 261
412 :width: 531
413
414 - **Case of a sequencing of non-overlapping sequences: case of ITS1 amplicon MiSeq sequencing**
415
416 .. image:: static/images/frogs_images/FROGS_preprocess_combined_sequence1.png
417 :height: 279
418 :width: 797
419
420 .. class:: warningmark
421
422 **“FROGS combined” warning points**
423
424 Reads pair are not merged because:
425
426 - the real amplicon length is greater than de number of base sequences (500 bp for MiSeq 2x250bp)
427 - the overlapped region is smaller than 10 (fixed parameter in FROGS).
428
429 Thus, “FROGS combined” sequences are artificial and present particular features especially on size.
430 Imagine a MiSeq sequencing of 2x250pb with sequences that cannot overlap, the resulting “FROGS combined” sequences length will be 600 bp.
431
432 .. image:: static/images/frogs_images/FROGS_preprocess_combined_sequence2.png
433 :height: 357
434 :width: 798
435
436 .. class:: h3
437
438 Keeping or not un-merged paired reads
439
440 This option is usefull when and only when, targeted amplicon is longer than the sequencing technology can provide (ITS amplicon for example). In other case, carefully, you will only keep noise in your analysis.
441
442
443 .. class:: h3
444
445 Primers parameters
446
447 The (`Kozich et al. 2013 &lt;http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3753973/&gt;`_ ) protocol uses custom sequencing primers which are also the PCR primers. In this case the reads do not contain the PCR primers.
448
449 In case of Illumina standard protocol, the primers must be provided in 5' to 3' orientation.
450
451 .. role:: alert-info
452
453 Example:
454
455 5' :alert-info:`ATGCCC` GTCGTCGTAAAATGC :alert-info:`ATTTCAG` 3'
456
457 Value for parameter 5' primer: ATGCCC
458
459 Value for parameter 3' primer: ATTTCAG
460
461 .. class:: h3
462
463 FLASH : Amplicons sizes parameters
464
465 The two following images show two examples of perfect values fors sizes parameters.
466
467 .. image:: static/images/frogs_images/FROGS_preprocess_ampliconSize_unimodal_v3.png
468 :height: 415
469 :width: 676
470
471 .. image:: static/images/frogs_images/FROGS_preprocess_ampliconSize_multimodal_v3.png
472 :height: 415
473 :width: 676
474
475 Don't worry the "Expected amplicon size" does not need to be very accurate, and only necessary for sequences merging with FLASH.
476
477 .. class:: h3
478
479 If the filter 'merged' reduce drasticaly the number of sequences:
480
481 In un-merged Illumina data, the reduction of dataset by the merged filter is classicaly inferior than 20%. A loss of more than 20% in all samples can highlight a quality problem.
482
483 If the overlap between R1 and R2 is superior to 50 nucleotides and the quality of the end of the sequences is poor (see `FastQC &lt;http://www.bioinformatics.babraham.ac.uk/projects/fastqc/&gt;`_) you can try to cut the end of your sequences and relaunch the preprocess tool. You can either raise the mismatch percent in the overlapped region, but not too much!
484
485 ----
486
487 **Contact**
488
489 Contacts: frogs@inra.fr
490
491 Repository: https://github.com/geraldinepascal/FROGS
492 website: http://frogs.toulouse.inra.fr/
493
494 Please cite the **FROGS article**: *Escudie F., et al. Bioinformatics, 2018. FROGS: Find, Rapidly, OTUs with Galaxy Solution.*
495
496 </help>
497 <citations>
498 <citation type="doi">10.1093/bioinformatics/btx791</citation>
499 </citations>
500
501 </tool>