comparison guppy_basecaller.xml @ 0:fb42dde97559 draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/guppy commit ebd2091cbe5b34821c7c1192949dbec5f4d2eb03-dirty"
author artbio
date Wed, 18 Nov 2020 23:26:35 +0000
parents
children 93b6cbff5ea4
comparison
equal deleted inserted replaced
-1:000000000000 0:fb42dde97559
1 <tool id="guppy-basecaller" name="Guppy basecaller wrapper" version="0.1.4" python_template_version="3.5">
2 <description>A simple wrapper for guppy basecaller that depends on configuration files</description>
3 <requirements>
4 </requirements>
5 <command detect_errors="exit_code"><![CDATA[
6
7 #for $file in $infiles:
8 ln -s $file ${file.element_identifier}.fast5 &&
9 #end for
10 tar xf $config &&
11 guppy_basecaller -i .
12 --save_path out
13 --data_path .
14 --config *.cfg
15 --num_callers 4
16 --records_per_fastq 0
17 --cpu_threads_per_caller \${GALAXY_SLOTS:-2}
18 --disable_pings
19 --qscore_filtering
20 --calib_detect
21 ]]></command>
22 <inputs>
23 <param name="infiles" type="data_collection" format="h5" label="Fast5 input (datatype h5)" multiple="true"/>
24 <param name="config" type="data" format="tar" label="Guppy basecall configuration model"/>
25 </inputs>
26 <outputs>
27 <data name="guppy_result" format="fastq">
28 <discover_datasets directory="out/PASS" ext="fastq" pattern=".+\.fastq" visible="true"/>
29 </data>
30 </outputs>
31 <help><![CDATA[
32 A wrapper for guppy basecaller. This expects two type of inputs: a collection of fast5 files,
33 and a configuration in the form of a tar file.
34
35 You can find configurations at https://github.com/nanoporetech/rerio,
36 and in particular the directory https://github.com/nanoporetech/rerio/basecall_models.
37
38 Each file there contains a URL you can download to use, for example
39 https://github.com/nanoporetech/rerio/blob/master/basecall_models/res_rna2_r941_min_flipflop_v001
40 points to 'https://nanoporetech.box.com/shared/static/84e1jeudx8lr8ay7e9u1ebnvx3bk2kjf.tgz'
41
42 When uploading these .tgz files take care to set the format to 'tar' (galaxy doesn't autodetect this?).
43
44 The results should be fastq files.
45
46 ------
47
48 guppy_basecaller --help
49 : Guppy Basecalling Software, (C) Oxford Nanopore Technologies, Limited. Version 3.6.1+249406c, client-server API version 1.1.0
50
51 **Usage**::
52
53 With config file::
54
55 guppy_basecaller -i <input path> -s <save path> -c <config file> [options]
56
57 With flowcell and kit name::
58
59 guppy_basecaller -i <input path> -s <save path> --flowcell <flowcell name>
60 --kit <kit name>
61
62 List supported flowcells and kits::
63
64 guppy_basecaller --print_workflows
65
66 Use server for basecalling::
67
68 guppy_basecaller -i <input path> -s <save path> -c <config file>
69 --port <server address> [options]
70
71
72 **Command line parameters**::
73
74 --trim_threshold arg Threshold above which data will be trimmed
75 (in standard deviations of current level
76 distribution).
77 --trim_min_events arg Adapter trimmer minimum stride intervals
78 after stall that must be seen.
79 --max_search_len arg Maximum number of samples to search through
80 for the stall
81 --override_scaling Manually provide scaling parameters rather
82 than estimating them from each read.
83 --scaling_med arg Median current value to use for manual
84 scaling.
85 --scaling_mad arg Median absolute deviation to use for manual
86 scaling.
87 --trim_strategy arg Trimming strategy to apply: 'dna' or 'rna'
88 (or 'none' to disable trimming)
89 --dmean_win_size arg Window size for coarse stall event
90 detection
91 --dmean_threshold arg Threhold for coarse stall event detection
92 --jump_threshold arg Threshold level for rna stall detection
93 --pt_scaling Enable polyT/adapter max detection for read
94 scaling.
95 --pt_median_offset arg Set polyT median offset for setting read
96 scaling median (default 2.5)
97 --adapter_pt_range_scale arg Set polyT/adapter range scale for setting
98 read scaling median absolute deviation
99 (default 5.2)
100 --pt_required_adapter_drop arg Set minimum required current drop from
101 adapter max to polyT detection. (default
102 30.0)
103 --pt_minimum_read_start_index arg Set minimum index for read start sample
104 required to attempt polyT scaling. (default
105 30)
106 --as_model_file arg Path to JSON model file for adapter
107 scaling.
108 --as_gpu_runners_per_device arg Number of runners per GPU device for
109 adapter scaling.
110 --as_cpu_threads_per_scaler arg Number of CPU worker threads per adapter
111 scaler
112 --as_reads_per_runner arg Maximum reads per runner for adapter
113 scaling.
114 --as_num_scalers arg Number of parallel scalers for adapter
115 scaling.
116 -m [ --model_file ] arg Path to JSON model file.
117 -k [ --kernel_path ] arg Path to GPU kernel files location (only
118 needed if builtin_scripts is false).
119 -x [ --device ] arg Specify basecalling device: 'auto', or
120 'cuda:<device_id>'.
121 --builtin_scripts arg Whether to use GPU kernels that were
122 included at compile-time.
123 --chunk_size arg Stride intervals per chunk.
124 --chunks_per_runner arg Maximum chunks per runner.
125 --chunks_per_caller arg Soft limit on number of chunks in each
126 caller's queue. New reads will not be
127 queued while this is exceeded.
128 --high_priority_threshold arg Number of high priority chunks to process
129 for each medium priority chunk.
130 --medium_priority_threshold arg Number of medium priority chunks to process
131 for each low priority chunk.
132 --overlap arg Overlap between chunks (in stride
133 intervals).
134 --gpu_runners_per_device arg Number of runners per GPU device.
135 --cpu_threads_per_caller arg Number of CPU worker threads per
136 basecaller.
137 --num_callers arg Number of parallel basecallers to create.
138 --post_out Return full posterior matrix in output
139 fast5 file and/or called read message from
140 server.
141 --stay_penalty arg Scaling factor to apply to stay probability
142 calculation during transducer decode.
143 --qscore_offset arg Qscore calibration offset.
144 --qscore_scale arg Qscore calibration scale factor.
145 --temp_weight arg Temperature adjustment for weight matrix in
146 softmax layer of RNN.
147 --temp_bias arg Temperature adjustment for bias vector in
148 softmax layer of RNN.
149 --qscore_filtering Enable filtering of reads into PASS/FAIL
150 folders based on min qscore.
151 --min_qscore arg Minimum acceptable qscore for a read to be
152 filtered into the PASS folder
153 --reverse_sequence arg Reverse the called sequence (for RNA
154 sequencing).
155 --u_substitution arg Substitute 'U' for 'T' in the called
156 sequence (for RNA sequencing).
157 --log_speed_frequency arg How often to print out basecalling speed.
158 --barcode_kits arg Space separated list of barcoding kit(s) or
159 expansion kit(s) to detect against. Must be
160 in double quotes.
161 --trim_barcodes Trim the barcodes from the output sequences
162 in the FastQ files.
163 --num_extra_bases_trim arg How vigorous to be in trimming the barcode.
164 Default is 0 i.e. the length of the
165 detected barcode. A positive integer means
166 extra bases will be trimmed, a negative
167 number is how many fewer bases (less
168 vigorous) will be trimmed.
169 --arrangements_files arg Files containing arrangements.
170 --score_matrix_filename arg File containing mismatch score matrix.
171 --start_gap1 arg Gap penalty for aligning before the
172 reference.
173 --end_gap1 arg Gap penalty for aligning after the
174 reference.
175 --open_gap1 arg Penalty for opening a new gap in the
176 reference.
177 --extend_gap1 arg Penalty for extending a gap in the
178 reference.
179 --start_gap2 arg Gap penalty for aligning before the query.
180 --end_gap2 arg Gap penalty for aligning after the query.
181 --open_gap2 arg Penalty for opening a new gap in the query.
182 --extend_gap2 arg Penalty for extending a gap in the query.
183 --min_score arg Minimum score to consider a valid
184 alignment.
185 --min_score_rear_override arg Minimum score to consider a valid alignment
186 for the rear barcode only (and min_score
187 will then be used for the front only when
188 this is set).
189 --front_window_size arg Window size for the beginning barcode.
190 --rear_window_size arg Window size for the ending barcode.
191 --require_barcodes_both_ends Reads will only be classified if there is a
192 barcode above the min_score at both ends of
193 the read.
194 --allow_inferior_barcodes Reads will still be classified even if both
195 the barcodes at the front and rear (if
196 applicable) were not the best scoring
197 barcodes above the min_score.
198 --detect_mid_strand_barcodes Search for barcodes through the entire
199 length of the read.
200 --min_score_mid_barcodes arg Minimum score for a barcode to be detected
201 in the middle of a read.
202 --num_barcoding_buffers arg Number of GPU memory buffers to allocate to
203 perform barcoding into. Controls level of
204 parallelism on GPU for barcoding.
205 --num_barcode_threads arg Number of worker threads to use for
206 barcoding.
207 --calib_detect Enable calibration strand detection and
208 filtering.
209 --calib_reference arg Reference FASTA file containing calibration
210 strand.
211 --calib_min_sequence_length arg Minimum sequence length for reads to be
212 considered candidate calibration strands.
213 --calib_max_sequence_length arg Maximum sequence length for reads to be
214 considered candidate calibration strands.
215 --calib_min_coverage arg Minimum reference coverage to pass
216 calibration strand detection.
217 --print_workflows Output available workflows.
218 --flowcell arg Flowcell to find a configuration for
219 --kit arg Kit to find a configuration for
220 -z [ --quiet ] Quiet mode. Nothing will be output to
221 STDOUT if this option is set.
222 --trace_categories_logs arg Enable trace logs - list of strings with
223 the desired names.
224 --verbose_logs Enable verbose logs.
225 --disable_pings Disable the transmission of telemetry
226 pings.
227 --ping_url arg URL to send pings to
228 --ping_segment_duration arg Duration in minutes of each ping segment.
229 -q [ --records_per_fastq ] arg Maximum number of records per fastq file, 0
230 means use a single file (per worker, per
231 run id).
232 --read_batch_size arg Maximum batch size, in reads, for grouping
233 input files.
234 --compress_fastq Compress fastq output files with gzip.
235 -i [ --input_path ] arg Path to input fast5 files.
236 --input_file_list arg Optional file containing list of input
237 fast5 files to process from the input_path.
238 -s [ --save_path ] arg Path to save fastq files.
239 -l [ --read_id_list ] arg File containing list of read ids to filter
240 to
241 -r [ --recursive ] Search for input files recursively.
242 --fast5_out Choice of whether to do fast5 output.
243 --resume Resume a previous basecall run using the
244 same output folder.
245 --progress_stats_frequency arg Frequency in seconds in which to report
246 progress statistics, if supplied will
247 replace the default progress display.
248 --max_block_size arg Maximum block size (in events) of basecall
249 messages to server.
250 -p [ --port ] arg Port for basecalling service.
251 --barcoding_config_file arg Configuration file to use for barcoding.
252 --num_barcode_threads arg Number of worker threads to use for
253 barcoding.
254 --disable_events Disable the transmission of event tables
255 when receiving reads back from the basecall
256 server.
257 --client_id arg Optional unique identifier (non-negative
258 integer) for this instance of the Guppy
259 Client Basecaller, if supplied will form
260 part of the output filenames.
261 --nested_output_folder If flagged output fastq files will be
262 written to a nested folder structure, based
263 on: protocol_group/sample/protocol/qscore_p
264 ass_fail/barcode_arrangement/
265 -h [ --help ] produce help message
266 -v [ --version ] print version number
267 -c [ --config ] arg Config file to use
268 -d [ --data_path ] arg Path to use for loading any data files the
269 application requires.
270
271
272 ------
273 ]]></help>
274 </tool>