comparison pysradb.xml @ 0:1005ffbccd86 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/pysradb commit 95f13fef86ee81a617814a386cb371e94cf45577
author iuc
date Fri, 11 Nov 2022 07:35:39 +0000
parents
children f63cf0adfd87
comparison
equal deleted inserted replaced
-1:000000000000 0:1005ffbccd86
1 <tool id='pysradb_search' name='pysradb search' version='@TOOL_VERSION@+galaxy@SUFFIX_VERSION@' profile='20.01'>
2 <description>sequence metadata from SRA/ENA</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="bio_tools"/>
7 <expand macro='requirements' />
8 <command detect_errors='exit_code'><![CDATA[
9 pysradb search
10 --db $database
11 #if $conditional_platform.instrument
12 --query '${conditional_platform.instrument} ${query}'
13 #else
14 --query '${query}'
15 #end if
16 #if $conditional_mode.source
17 --source '${conditional_mode.source}'
18 #end if
19 #if $conditional_mode.selector == 'false'
20 --organism '${conditional_mode.organism}'
21 #end if
22 #if $conditional_platform.platform == 'illumina'
23 #if $conditional_platform.layout
24 --layout $conditional_platform.layout
25 #end if
26 #end if
27 --platform '$conditional_platform.platform'
28 #if $selection
29 --selection '${selection}'
30 #end if
31 --source $conditional_mode.source
32 #if $strategy
33 --strategy '${strategy}'
34 #end if
35
36 ## Optional fields
37 --max $output_options.max
38 #if $advanced.mbases
39 --mbases $advanced.mbases
40 #end if
41 #if $advanced.accession
42 --accession '${advanced.accession}'
43 #end if
44 #if $advanced.publication_date
45 --publication-date '${advanced.publication_date}'
46 #end if
47 --verbosity $output_options.verbosity
48 #if 'stats' in $output_options.output_files
49 --stats
50 #end if
51 #if 'graphs' in $output_options.output_files
52 --graphs 'daterange selection basecount'
53 #end if
54 --saveto 'output.tsv'
55 #if 'stats' in $output_options.output_files
56 > stats.txt
57 #if $output_options.verbosity == '2' or $output_options.verbosity == '3'
58 && echo ' Sequencing instrument:' >> stats.txt
59 #if $database == 'sra'
60 && gawk -F '\t' '{print $11}' 'output.tsv' | tail -n +2 | sort | uniq -c | gawk '{ print " ", $0 }' >> stats.txt
61 #else
62 && gawk -F '\t' '{print $12}' 'output.tsv' | tail -n +2 | sort | uniq -c | gawk '{ print " ", $0 }' >> stats.txt
63 #end if
64 #end if
65 && echo $'\n Query keyworkds: ${query}\n' >> stats.txt
66 #end if
67 #if 'graphs' in $output_options.output_files
68 && convert 'search_plots/Histogram*.svg' -set filename:fn '%[basename]' 'search_plots/%[filename:fn].jpg'
69 && mv 'search_plots/Histogram of Base Count'*.jpg 'search_plots/histogram_base_count.jpg'
70 && mv 'search_plots/Histogram of Library'*jpg 'search_plots/histogram_library.jpg'
71 && mv 'search_plots/Histogram of Publication'*.jpg 'search_plots/histogram_publication.jpg'
72 #end if
73 ]]></command>
74 <inputs>
75 <param name="database" type="select" label="Database to query"
76 help="Sequence Read Archive (SRA) data, is the largest publicly available repository of high throughput sequencing data. The European Nucleotide
77 Archive (ENA) provides a comprehensive record of nucleotide sequencing information.">
78 <option value="sra">SRA: Sequence Read Archive</option>
79 <option value="ena">ENA: European Nucleotide Archive</option>
80 </param>
81 <param argument="--query" type="text" value=""
82 label="Query keywords" help="Multiple keywords should be separated by spaces. Example: colorectal cancer">
83 <sanitizer invalid_char="">
84 <valid initial="string.letters,string.digits">
85 <add value="_" />
86 <add value="-" />
87 <add value=" " />
88 </valid>
89 </sanitizer>
90 <validator type="regex">[0-9a-zA-Z_ -]+</validator>
91 </param>
92 <conditional name="conditional_mode">
93 <param name="selector" type="select" label="Genetic material recovered directly from environmental or clinical samples" help="Enable this option if you are interested in metagenomic data. Default: disabled">
94 <option value="false" selected="true">Disabled</option>
95 <option value="true">Enabled</option>
96 </param>
97 <when value="false">
98 <expand macro="source_macro">
99 <option value="genomic">Genomic</option>
100 <option value="transcriptomic">Transcriptomic</option>
101 <option value="genomic single cell">Genomic single cell</option>
102 <option value="transcriptomic single cell">Transcriptomic single cell</option>
103 <option value="viral rna">Viral RNA</option>
104 <option value="synthetic">Synthetic</option>
105 <option value="other">Other</option>
106 </expand>
107 <param argument="--organism" type="text" value="Homo sapiens" label="Scientific name of the sample organism" help="The scientific name of a species that is set by binomial nomenclature entails two parts: generic name (or genus name) and specific name">
108 <sanitizer invalid_char="">
109 <valid initial="string.letters">
110 <add value=" " />
111 </valid>
112 </sanitizer>
113 <validator type="regex" message="This field is compulsory. Please only use letters or whitespace">[a-zA-Z ]+</validator>
114 </param>
115 </when>
116 <when value="true">
117 <expand macro="source_macro">
118 <option value="metagenomic">Metagenomic</option>
119 <option value="metatranscriptomic">Metatranscriptomic</option>
120 </expand>
121 </when>
122 </conditional>
123 <conditional name="conditional_platform">
124 <param argument="--platform" type="select" label="Platform" help="Sequencing platform used for generating the reads">
125 <option value="illumina">Illumina</option>
126 <option value="oxford nanopore">Oxford Nanopore</option>
127 <option value="pacbio smrt">PacBio SMRT</option>
128 </param>
129 <when value="illumina">
130 <expand macro="instrument_macro" >
131 <option value="NovaSeq 6000">NovaSeq 6000</option>
132 <option value="HiSeq X Ten">HiSeq X Ten</option>
133 <option value="NextSeq 2000">NextSeq 2000</option>
134 <option value="NextSeq 550">NextSeq 550</option>
135 <option value="NextSeq 500">NextSeq 500</option>
136 <option value="HiSeq 4000">HiSeq 4000</option>
137 <option value="HiSeq 3000">HiSeq 3000</option>
138 <option value="HiSeq 2500">HiSeq 2500</option>
139 <option value="HiSeq 2000">HiSeq 2000</option>
140 <option value="MiSeq">MiSeq</option>
141 <option value="Genome Analyzer IIx">Genome Analyzer IIx</option>
142 <option value="Genome Analyzer">Genome Analyzer</option>
143 </expand>
144 <param argument="--layout" type="select" optional="true" label="Library layout" help="Paired-end reads improve the ability to identify the relative positions of various reads in the
145 genome, making it much more effective than single-end reading in resolving structural rearrangements such as gene insertions, deletions, or inversions. Note: Only available for the Illumina platform">
146 <option value="single">Single reads</option>
147 <option value="paired">Paired reads</option>
148 </param>
149 </when>
150 <when value="pacbio smrt">
151 <expand macro="instrument_macro">
152 <option value="PacBio RS">PacBio RS</option>
153 <option value="PacBio RS II">PacBio RS II </option>
154 <option value="Sequel">Sequel</option>
155 <option value="Sequel II">Sequel II</option>
156
157 </expand>
158 </when>
159 <when value="oxford nanopore">
160 <expand macro="instrument_macro" >
161 <option value="MinION">MinION</option>
162 <option value="PromethION">PromethION</option>
163 <option value="GridION">GridION</option>
164 </expand>
165 </when>
166 </conditional>
167
168 <param argument="--strategy" type="select" optional="true" label="Library preparation strategy" help="Sequencing technique intended for the library">
169 <option value="amplicon">Amplicon</option>
170 <option value="atac seq">ATAC-seq</option>
171 <option value="bisulfite seq">Bisulfite-seq</option>
172 <option value="chia pet">Chromatin interaction analysis with paired-end tag (ChIA-PET)</option>
173 <option value="chip">ChIP</option>
174 <option value="chip seq">ChIP-seq</option>
175 <option value="cts">Coding transcriptome sequencing (CTS)</option>
176 <option value="dnase hypersensitivity">DNase-seq</option>
177 <option value="est">EST</option>
178 <option value="faire seq">FAIRE-seq</option>
179 <option value="fl cdna">FL-cDNA</option>
180 <option value="gbs">Genotyping-by-sequencing (GBS)</option>
181 <option value="hi c">Hi-C</option>
182 <option value="medip seq">Methylated DNA immunoprecipitation sequencing (MeDIP-Seq)</option>
183 <option value="mbd seq">Methyl-binding domain sequencing (MBD-seq)</option>
184 <option value="mnase seq">Micrococcal nuclease sequencing (MNase-seq)</option>
185 <option value="mirna seq">miRNA-seq</option>
186 <option value="mre seq">Methylation sensitive restriction enzyme sequencing (MRE-seq)</option>
187 <option value="ncrna seq">ncRNA-seq</option>
188 <option value="other">Other</option>
189 <option value="poolclone">POOLCLONE</option>
190 <option value="rad seq">Restriction site-associated DNA sequencing (RAD-seq)</option>
191 <option value="rip seq">RNA immunoprecipitation sequencing (RIP-seq)</option>
192 <option value="rna seq">RNA-seq</option>
193 <option value="selex">Systematic evolution of ligands by exponential enrichment (SELEX)</option>
194 <option value="synthetic long read">Synthetic long-read sequencing</option>
195 <option value="target capture">Target capture sequencing (TCS)</option>
196 <option value="tn seq">Tn-seq</option>
197 <option value="wcs">Whole chromosome shotgun (WCS)</option>
198 <option value="wga">Whole genome amplification (WGA)</option>
199 <option value="wgs">Whole genome sequencing (WGS) </option>
200 <option value="wxs">Whole exome sequencing (WXS)</option>
201 </param>
202 <param argument="--selection" type="select" optional="true" label="Library selection" help="Method used to select and/or enrich the material being sequenced">
203 <option value="5 methylcytidine antibody">5-methylcytidine antibody</option>
204 <option value="cage">CAGE</option>
205 <option value="cdna">cDNA</option>
206 <option value="chip">ChIP</option>
207 <option value="chip seq">ChIP-seq</option>
208 <option value="dnase">DNAse</option>
209 <option value="hmpr">Hypomethylated partial restriction (HMPR)</option>
210 <option value="hybrid selection">Hybrid selection</option>
211 <option value="inverse rrna">Inverse RNA</option>
212 <option value="mbd2 protein methyl cpg binding domain">Methylated DNA binding domain protein 2 (MBD2)</option>
213 <option value="mda">Multiple displacement amplification (MDA)</option>
214 <option value="mf">Mechanical fragmentation (MF)</option>
215 <option value="mnase">Micrococcal nuclease (MNase)</option>
216 <option value="msII">MSII</option>
217 <option value="oligo dt">Oligo(dT)</option>
218 <option value="padlock proves capture method">Padlock probe capture</option>
219 <option value="pcr">PCR</option>
220 <option value="polya">PolyA</option>
221 <option value="race">Rapid amplification of cDNA ends (RACE)</option>
222 <option value="random">Random</option>
223 <option value="random pcr">Random PCR</option>
224 <option value="reduced representation">Reduced representation</option>
225 <option value="repeat fractionation">Repeat fractionation</option>
226 <option value="restriction digest">Restriction digest</option>
227 <option value="rt pcr">RT-PCR</option>
228 <option value="size fractionation">Size fractionation</option>
229 </param>
230 <section name="advanced" title="Advanced options">
231 <param argument="--accession" type="text" value="" optional="true" label="Accession number" help="Relevant study/experiment/sample/run accession number">
232 <sanitizer invalid_char="">
233 <valid initial="string.letters,string.digits"/>
234 </sanitizer>
235 <validator type="regex">[0-9a-zA-Z]+</validator>
236 </param>
237 <param argument="--mbases" type="integer" min="0" value="" optional="true"
238 label="Sample size" help="Size of the sample rounded to the nearest megabase" />
239 <param argument="--publication-date" type="text" value="" optional="true"
240 label="Publication date" help="Publication date of the run in the format dd-mm-yyyy.
241 If a date range is desired, enter the start date, followed by end date, separated by a colon ':'.
242 Example: 01-01-2010:31-12-2010">
243 <sanitizer invalid_char="">
244 <valid initial="string.digits">
245 <add value=":" />
246 <add value="-" />
247 </valid>
248 </sanitizer>
249 <validator type="regex">[0-9:-]+</validator>
250 </param>
251 </section>
252 <section name="output_options" title="Output options">
253 <param argument="--verbosity" type="select" label="Level of search result details" help="Default: 2">
254 <option value="0">0: run accession only</option>
255 <option value="1">1: run accession and experiment title</option>
256 <option value="2" selected="true">2: accession numbers, titles and sequencing information</option>
257 <option value="3">3: all available metadata</option>
258 </param>
259 <param argument="--max" type="integer" min="1" max="10000" value="100" label="Maximum number of entries"
260 help="Note: If the maximum number set is large, querying the SRA database will take significantly longer due to API limits" />
261 <param name="output_files" type="select" multiple="true" display="checkboxes" label="Additional output files">
262 <option value="stats" selected="true">Statistics for the search query (--stats)</option>
263 <option value="graphs">Generates graphs to illustrate the search result (--graphs)</option>
264 </param>
265 </section>
266 </inputs>
267 <outputs>
268 <data name="metadata_file" from_work_dir="output.tsv" format="tsv" label="${tool.name} on ${on_string}: metadata"/>
269 <collection name="graphs_collection" type="list" label="${tool.name} on ${on_string}: search plots">
270 <discover_datasets pattern="(?P&lt;name&gt;.+)\.jpg" format="jpg" directory="search_plots" />
271 <filter>'graphs' in output_options['output_files']</filter>
272 </collection>
273 <data name="stats" from_work_dir="stats.txt" format="txt" label="${tool.name} on ${on_string}: stats">
274 <filter>'stats' in output_options['output_files']</filter>
275 </data>
276 </outputs>
277 <tests>
278 <!-- Test 01: default options -->
279 <test expect_num_outputs="3">
280 <param name="database" value="sra"/>
281 <param name="query" value="cancer"/>
282 <conditional name="conditional_mode">
283 <param name="selector" value="false"/>
284 <param name="organism" value="Homo sapiens"/>
285 <param name="source" value="transcriptomic"/>
286 </conditional>
287 <section name="output_options">
288 <param name="max" value="100"/>
289 <param name="output_files" value="stats,graphs"/>
290 </section>
291 <output name="metadata_file" file="test_01.tabular" ftype="tsv"/>
292 <output name="stats" file="test_01_stats.txt" ftype="txt"/>
293 <output_collection name="graphs_collection" type="list" count="3">
294 <element name="histogram_base_count" file="test_01_histogram1.jpg" ftype="jpg" compare='sim_size'/>
295 <element name="histogram_library" file="test_01_histogram2.jpg" ftype="jpg" compare='sim_size'/>
296 <element name="histogram_publication" file="test_01_histogram3.jpg" ftype="jpg" compare='sim_size'/>
297 </output_collection>
298 </test>
299 <!-- Test 02: Specific search options-->
300 <test expect_num_outputs="2">
301 <param name="database" value="sra"/>
302 <param name="query" value="colorectal cancer"/>
303 <conditional name="conditional_platform">
304 <param name="platform" value="illumina"/>
305 <param name="instrument" value="NovaSeq 6000"/>
306 </conditional>
307 <param name="strategy" value="wga"/>
308 <param name="selection" value="pcr"/>
309 <conditional name="conditional_mode">
310 <param name="selector" value="false"/>
311 <param name="source" value="genomic single cell"/>
312 <param name="organism" value="Homo sapiens"/>
313 </conditional>
314 <section name="advanced">
315 <param name="publication_date" value="01-11-2022"/>
316 <param name="accession" value="SRX18108950"/>
317 <param name="verbosity" value="3"/>
318 </section>
319 <output name="metadata_file" file="test_02.tabular" ftype="tsv"/>
320 <output name="stats" file="test_02_stats.txt" ftype="txt"/>
321 </test>
322 <!-- Test 03: ENA database and verbosity 1-->
323 <test expect_num_outputs="2">
324 <param name="database" value="ena"/>
325 <param name="query" value="cancer"/>
326 <param name="strategy" value="wxs"/>
327 <param name="selection" value="random"/>
328 <section name="output_options">
329 <param name="verbosity" value="1"/>
330 <param name="max" value="50"/>
331 </section>
332 <conditional name="conditional_mode">
333 <param name="selector" value="false"/>
334 <param name="source" value="genomic"/>
335 <param name="organism" value="homo sapiens"/>
336 </conditional>
337 <output name="metadata_file" file="test_03.tabular" ftype="tsv"/>
338 <output name="stats" file="test_03.txt" ftype="txt"/>
339 </test>
340 <!-- Test 04: Test metatranscriptome query -->
341 <test expect_num_outputs="2">
342 <param name="database" value="sra"/>
343 <param name="query" value="escherichia"/>
344 <section name="advanced">
345 <param name="mbases" value="100"/>
346 </section>
347 <conditional name="conditional_mode">
348 <param name="selector" value="true"/>
349 <param name="source" value="metagenomic"/>
350 </conditional>
351 <output name="metadata_file" file="test_04.tabular" ftype="tsv"/>
352 <output name="stats" file="test_04.txt" ftype="txt"/>
353 </test>
354 <!-- Test 05: Test nanopore data -->
355 <test expect_num_outputs="2">
356 <param name="database" value="sra"/>
357 <param name="query" value="cancer"/>
358 <conditional name="conditional_platform">
359 <param name="platform" value="oxford nanopore"/>
360 <param name="instrument" value="MinION"/>
361 </conditional>
362 <param name="strategy" value="rna seq"/>
363 <param name="selection" value="cdna"/>
364 <conditional name="conditional_mode">
365 <param name="selector" value="false"/>
366 <param name="source" value="transcriptomic"/>
367 <param name="organism" value="Homo sapiens"/>
368 </conditional>
369 <output name="metadata_file" file="test_05.tabular" ftype="tsv"/>
370 <output name="stats" file="test_05_stats.txt" ftype="txt"/>
371 </test>
372 <!-- Test 06: Different specie -->
373 <test expect_num_outputs="2">
374 <param name="database" value="sra"/>
375 <param name="query" value="stress"/>
376 <conditional name="conditional_platform">
377 <param name="platform" value="illumina"/>
378 <param name="instrument" value="NextSeq 500"/>
379 </conditional>
380 <param name="strategy" value="rna seq"/>
381 <param name="selection" value="random"/>
382 <conditional name="conditional_mode">
383 <param name="selector" value="false"/>
384 <param name="source" value="transcriptomic"/>
385 <param name="organism" value="Arabidopsis thaliana"/>
386 </conditional>
387 <section name="output_options">
388 <param name="verbosity" value="3"/>
389 <param name="max" value="20"/>
390 </section>
391 <output name="metadata_file" file="test_06.tabular" ftype="tsv"/>
392 <output name="stats" file="test_06_stats.txt" ftype="txt"/>
393 </test>
394 </tests>
395 <help><![CDATA[
396 .. class:: infomark
397
398 **Purpose**
399
400 pysradb allows to retrieve metadata, such as run accession numbers, from SRA and ENA based on multiple criteria:
401
402 - Database: SRA or ENA
403 - Query keywords
404 - Accession number: a relevant study/experiment/sample/run accession number
405 - Organism: scientific name of the sample organism
406 - Library layout: paired or single-end reads
407 - Sample size: rounded to the nearest megabase
408 - Publication date
409 - Sequencing platform: Illumina, Nanopore or PacBio
410 - Library selection: method used to select and/or enrich the material being sequenced
411 - Library source: Type of source material that is being sequenced
412 - Library preparation strategy: sequencing technique intended for the library
413
414 ------
415
416 .. class:: infomark
417
418 **Outputs**
419
420 pysradb generates three different output types:
421
422 - Raw metadata file
423 - Statistics for the search query
424 - Graphs to illustrate the search results
425
426 ------
427
428 .. class:: infomark
429
430 **Sequencing instruments**
431
432 **Comparisons between HiSeq instruments**
433
434 HiSeq 3000/4000 provides some improvements with respect the previous model HiSeq 2500:
435
436 - HiSeq 3000/4000 genere up to 1.5 Tb and 5 Tb reads per run.
437 - HiSeq 3000/4000 use patterned flow cell technology originally developed for HiSeq X platforms.
438 - HiSeq 3000/4000 run 3 times faster and yield 65% more reads per lane.
439 - HiSeq 3000/4000 patterned flow cells contain billions of nanowells at fixed, known positions on the flow cell. The structured organization enables clustering at higher densities compared to non-pattern HiSeq designs.
440
441 However, the HiSeq 3000/4000 also have some also some limitations with respect to HiSeq 2500:
442
443 - HiSeq 3000/4000 are not recommended for low complexity sequencing. Applications such as non-unique amplicons, 16S, are currently not recommended.
444 - Libraries with low complexity within the first 25 bases of a read are not expected to produce high quality data.
445 - Library size restrictions. Libraries that are too long can result in polyclonal clusters that span more than 1 well, these will not pass filter. Smaller libraries will preferentially amplify with Illumina's new kinetic exclusion amplification so tight library distributions ranging from 300-500 bp are recommended.
446 - Very low tolerance for adapter dimers. Even as little as 1% adapter dimer can take up ~6% of sequencing reads, 10% contamination will take up 84% of reads. Illumina recommends you keep adapter contamination below 0.5% of your entire library.
447 - Higher duplication rates as compared to HiSeq 2500.
448 - Low quality read 2 (entire HiSeq 3000 install base is affected).
449
450 HiSeq 3000/4000 support DNA-seq, RNA-seq , ChIP-Seq, mate-pair, small RNA and exome library preparation. Any library preparation where there is enough sequence diversity is currently supported. Amplicon, 16S and applications with low sequencing diversity are currently not supported on the HiSeq 3000 / 4000.
451
452 HiSeq 2500 is considered the most reliable model according to different sources.
453
454 **What type of read quality is expected from the HiSeq 3000/4000 ?**
455
456 - 2 x 50bp ≥85% bases > Q30
457 - 2 x 75bp ≥80% bases > Q30
458 - 2 x 150bp ≥75% of bases >Q30
459
460 **What is the difference between MiSeq and HiSeq?**
461
462 HiSeq and MiSeq platforms are among the most widely used platform to study microbial communities. But the two platforms differ in the length and amount of reads.
463 MiSeq can run 600 cycles to produce 200 million 300 bp reads, on the other hand, HiSeq 2500 can run 500 cycles to produce 120 million 250 bp.
464
465 **What are the differences between HiSeq and NovaSeq?**
466
467 The Illumina NovaSeq provides a massive upgrade in sequencing throughput compared to the HiSeq 4000. There are more stringent library requirements and requires a
468 larger sample size. Due to the vast amount of data produced by the NovaSeq and the known issue of index swapping, unique dual-indexed libraries are required.
469
470 **What are the characteristics of HiSeq X instruments?**
471
472 - HiSeq X is recommended for whole genome sequencing only (including whole bisulfite sequencing). This means that it is not adequate for RNA-seq, exome, ChIP-seq or small RNA-seq applications.
473 - Plant and animal samples can be sequenced on the HiSeq X.
474 - Expect coverate is over 30x or approximately 375 million reads per lane by loading one sample per lane.
475 - Hiseq X Ten generates utilize 2x150 base pair read configurations and has slightly better GC coverage than the HiSeq 2500.
476
477 **What are the differences between MiSeq and Nextseq?**
478
479 The NextSeq Series of systems delivers the power of high-throughput sequencing with the simplicity of a desktop sequencer. NextSeq instruments represent an improvement when compared with Miseq, despite generating sorter reads (150bp, compared to MiSeq 250bp). NextSeq is recommended in
480 the following applications & methods:
481
482 - Exome & large panel sequencing (enrichment-based)
483 - Single-cell profiling (scRNA-Seq, scDNA-Seq, oligo tagging assays)
484 - Transcriptome sequencing (total RNA-Seq, mRNA-Seq, gene expression profiling)
485 - Methylation sequencing
486 - Metagenomic profiling (shotgun metagenomics, metatranscriptomics)
487 - Cell-free sequencing & liquid biopsy analysis
488
489 Regarding the maximum number of reads per ran, MiSeq can generate 25 million, vs 400 million generated by the Nextseq 550 instrument. MiSeq recommended for sequencing samples of low diversity.
490
491 **What are the differences between HiSeq and NextSeq?**
492
493 The main technical difference between HiSeq and NextSeq will be the number of dyes each machines use. HiSeq uses traditional color coding with four different dyes, while NextSeq uses two dyes. This does not give any practical differences in terms of the data quality, but the trend in illumina sequencers are more into the direction of reducing the number of dyes.
494
495 **What is the difference between Nextseq and NovaSeq?**
496
497 The NovaSeq 6000 system offers deep and broad coverage and is recommended for large whole-genome sequencing (human, plant, animal) projects. It generates 250 bp reads,
498 with 20 billion maximum reads per run. NovaSeq 6000 instruments have not application based restrictions.
499
500 **Illumina maximum read-length summary**
501
502 - MiSeq: between 300 and 600 bp
503 - NextSeq: 300 bp
504 - HiSeq 2500: between 250 and 500 bp (depending of the sofware)
505 - HiSeq 4000: 150 bp
506 - HiSeq X: 150 bp
507
508 **Nanopore models - single-molecule ultra-long-read sequencing**
509
510 Nanopore sequencing provides the longest read lengths, from 500 bp to the current record of 2.3 Mb, with 10-30-kb genomic libraries being common. Even after error correction, sequencing error rates of corrected nanopore reads (1.5-9%) are still higher than those of corrected PacBio reads (<1%).
511
512 **PacBio SMRT instruments - single-molecule long-read low-error rate sequencing**
513
514 PacBio Sequel II CLR sequencing represents a major advancement in sequencing throughput over previous PacBio platforms with the production of more sequencing data and longer reads versus RS II and the Sequel I.
515 The PacBio HiFi sequencing method yields highly accurate long-read sequencing datasets with read lengths averaging 10-25 kb and accuracies greater than 99.5%.
516
517
518 ]]> </help>
519 <expand macro="citations" />
520 </tool>