comparison dorado.xml @ 0:63d8ecfcfab1 draft

planemo upload for repository https://github.com/usegalaxy-au/tools-au/tree/main/tools/dorado commit 0e768f088307f927787041b98504c594c6bcbe0f
author galaxy-australia
date Fri, 28 Jun 2024 03:39:11 +0000
parents
children fc5b6491cf78
comparison
equal deleted inserted replaced
-1:000000000000 0:63d8ecfcfab1
1 <tool id="dorado" name="Dorado" version="@VERSION@+galaxy0" python_template_version="3.5" profile="21.05">
2 <description>basecaller for raw Oxford Nanopore data</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="xrefs"/>
7 <expand macro="requirements"/>
8 <command detect_errors="exit_code"><![CDATA[
9
10 ln -s '$pod5_file' ./reads.pod5
11
12 &&
13
14 dorado basecaller
15 --trim '${trim}'
16 #if $kit_name
17 --kit-name '${kit_name}'
18 #end if
19 '${model.fields.path}'
20 reads.pod5
21 > calls.bam
22
23 &&
24
25 dorado summary
26 calls.bam
27 > summary.tsv
28
29 ]]></command>
30 <inputs>
31 <!-- FIXME: add pod5 datatype to Galaxy and change here.
32 https://github.com/galaxyproject/galaxy/pull/18419 -->
33 <param name="pod5_file" type="data" format="binary" label="Raw pod5 file" help="Only pod5 is supported. You can convert fast5 to pod5 with the fast5 to pod5 tool."/>
34 <param name="model" type="select" label="Basecalling model. See the Help section for info on model names.">
35 <options from_data_table="dorado_models">
36 <!-- only allow models that shipped in this container -->
37 <filter type="static_value" column="1" value="@CONTAINER_HASH@"/>
38 </options>
39 </param>
40 <param type="select" argument="--trim" label="DNA adapter and primer trimming" help="Detect and remove any adapter and/or primer sequences from the beginning and end of DNA reads. Note that if you intend to demultiplex the reads, trimming adapters and primers could interfere with correct demultiplexing.">
41 <option value="all" selected="true">Any. Trim any detected adapters or primers.</option>
42 <option value="primers"> Primers. Trim any detected adapters or primers, but if barcoding is enabled the barcode sequences will not be trimmed.</option>
43 <option value="adapters"> Adapters. Trim any detected adapters, but primers will not be trimmed, and if barcoding is enabled then barcodes will not be trimmed either.</option>
44 <option value="none"> None. Nothing will be trimmed.</option>
45 </param>
46 <param type="select" argument="--kit-name" optional="true" label="Enable barcoding with the selected kit name." help="Reads are classified into their barcode groups during basecalling. The classification will be reflected in the read group name as well as in the BC tag of the output record.">
47 <option value="EXP-NBD103">EXP-NBD103</option>
48 <option value="EXP-NBD104">EXP-NBD104</option>
49 <option value="EXP-NBD114">EXP-NBD114</option>
50 <option value="EXP-NBD196">EXP-NBD196</option>
51 <option value="EXP-PBC001">EXP-PBC001</option>
52 <option value="EXP-PBC096">EXP-PBC096</option>
53 <option value="SQK-16S024">SQK-16S024</option>
54 <option value="SQK-16S114-24">SQK-16S114-24</option>
55 <option value="SQK-LWB001">SQK-LWB001</option>
56 <option value="SQK-MLK111-96-XL">SQK-MLK111-96-XL</option>
57 <option value="SQK-MLK114-96-XL">SQK-MLK114-96-XL</option>
58 <option value="SQK-NBD111-24">SQK-NBD111-24</option>
59 <option value="SQK-NBD111-96">SQK-NBD111-96</option>
60 <option value="SQK-NBD114-24">SQK-NBD114-24</option>
61 <option value="SQK-NBD114-96">SQK-NBD114-96</option>
62 <option value="SQK-PBK004">SQK-PBK004</option>
63 <option value="SQK-PCB109">SQK-PCB109</option>
64 <option value="SQK-PCB110">SQK-PCB110</option>
65 <option value="SQK-PCB111-24">SQK-PCB111-24</option>
66 <option value="SQK-PCB114-24">SQK-PCB114-24</option>
67 <option value="SQK-RAB201">SQK-RAB201</option>
68 <option value="SQK-RAB204">SQK-RAB204</option>
69 <option value="SQK-RBK001">SQK-RBK001</option>
70 <option value="SQK-RBK004">SQK-RBK004</option>
71 <option value="SQK-RBK110-96">SQK-RBK110-96</option>
72 <option value="SQK-RBK111-24">SQK-RBK111-24</option>
73 <option value="SQK-RBK111-96">SQK-RBK111-96</option>
74 <option value="SQK-RBK114-24">SQK-RBK114-24</option>
75 <option value="SQK-RBK114-96">SQK-RBK114-96</option>
76 <option value="SQK-RLB001">SQK-RLB001</option>
77 <option value="SQK-RPB004">SQK-RPB004</option>
78 <option value="SQK-RPB114-24">SQK-RPB114-24</option>
79 <option value="TWIST-16-UDI">TWIST-16-UDI</option>
80 <option value="TWIST-96A-UDI">TWIST-96A-UDI</option>
81 <option value="VSK-PTC001">VSK-PTC001</option>
82 <option value="VSK-VMK001">VSK-VMK001</option>
83 <option value="VSK-VMK004">VSK-VMK004</option>
84 <option value="VSK-VPS001">VSK-VPS001</option>
85 </param>
86 </inputs>
87 <outputs>
88 <data format="unsorted.bam" name="out_bam" label="Reads from ${on_string} basecalled by ${tool.name} with model ${model.fields.name}" from_work_dir="calls.bam"/>
89 <data format="tsv" name="out_tsv" label="${tool.name} sequencing summary for ${on_string}" from_work_dir="summary.tsv"/>
90 </outputs>
91 <tests>
92 <!-- test 1 -->
93 <test expect_num_outputs="2">
94 <param name="pod5_file" value="FAL00375_473bf0ed_0.ten_reads.pod5"/>
95 <param name="model" value="dna_r9.4.1_e8_fast@v3.4"/>
96 <param name="trim" value="all"/>
97 <output name="out_bam" ftype="unsorted.bam">
98 <assert_contents>
99 <has_size size="10000" delta="1000"/>
100 </assert_contents>
101 </output>
102 <output name="out_tsv" ftype="tsv">
103 <assert_contents>
104 <has_text text="00777c4b-cbd6-4a79-8647-bbe5f5f3f3bf"/>
105 </assert_contents>
106 </output>
107 </test>
108 <!-- test 2: trim parameter -->
109 <test expect_num_outputs="2">
110 <param name="pod5_file" value="FAL00375_473bf0ed_0.ten_reads.pod5"/>
111 <param name="model" value="dna_r9.4.1_e8_fast@v3.4"/>
112 <param name="trim" value="adapters"/>
113 <output name="out_bam" ftype="unsorted.bam">
114 <assert_contents>
115 <has_size size="10000" delta="1000"/>
116 </assert_contents>
117 </output>
118 <output name="out_tsv" ftype="tsv">
119 <assert_contents>
120 <has_text text="0072b26f-f37c-4517-afa7-621543ac2187"/>
121 </assert_contents>
122 </output>
123 </test>
124 <!-- test 3: barcode detection -->
125 <test expect_num_outputs="2">
126 <param name="pod5_file" value="SQK-RBK114_BC01_BC04_unclassified.pod5"/>
127 <param name="model" value="dna_r10.4.1_e8.2_400bps_hac@v4.3.0"/>
128 <param name="trim" value="all"/>
129 <param name="kit_name" value="SQK-RBK114-96"/>
130 <output name="out_bam" ftype="unsorted.bam">
131 <assert_contents>
132 <has_size size="10000" delta="1000"/>
133 </assert_contents>
134 </output>
135 <output name="out_tsv" ftype="tsv">
136 <assert_contents>
137 <has_size size="1103e241-dd7f-43bc-ae19-9a3c6326ad83"/>
138 <has_text text="SQK-RBK114-96_barcode04"/>
139 </assert_contents>
140 </output>
141 </test>
142 </tests>
143 <help><![CDATA[
144 Basecall raw Nanopore data using Oxford Nanopore’s open source
145 `dorado <https://github.com/nanoporetech/dorado/>`__ basecaller.
146
147 The input is pod5 format. If you have older data in fast5 format, you
148 can convert them using the ``fast5 to pod5`` convert tool.
149
150 Basecalling models
151 ------------------
152
153 **TLDR: to decide which model to use, see Oxford Nanopore’s** `table of
154 basecalling
155 models <https://github.com/nanoporetech/dorado/?tab=readme-ov-file#decoding-dorado-model-names>`__.
156
157 The names of Dorado models are structured with each segment
158 corresponding to a different aspect of the model separated by
159 underscores.
160
161 For example, the model ``dna_r10.4.1_e8.2_400bps_hac@v4.3.0`` can be
162 decoded as follows:
163
164 Analyte Type (``dna``):
165 - For DNA sequencing, it is represented as dna. If you are using a
166 Direct RNA Sequencing Kit, this will be rna002 or rna004,
167 depending on the kit.
168 Pore Type (``r10.4.1``):
169 - The type of flow cell used.
170 Chemistry Type (``e8.2``):
171 - The chemistry type, which corresponds to the kit used for
172 sequencing. For example, Kit 14 chemistry is denoted by e8.2 and
173 Kit 10 or Kit 9 are denoted by e8.
174 Translocation Speed (``400bps``):
175 - The speed of translocation selected at the run setup in MinKNOW
176 Model Type (``hac``):
177 - The size of the model, where larger models yield more accurate
178 basecalls but take more time. The three types of models are fast,
179 hac, and sup. The fast model is the quickest, sup is the most
180 accurate, and hac provides a balance between speed and accuracy.
181 Model Version Number (``v4.3.0``):
182 - The version of the model. Model updates are regularly released,
183 and higher version numbers typically signify greater accuracy.
184
185 ]]></help>
186 </tool>