Mercurial > repos > bgruening > whisper
comparison whisper.xml @ 0:2706ea308f94 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/whisper commit 65bc65452f0cb44220555d6295106ea525038c70
author | bgruening |
---|---|
date | Wed, 24 Apr 2024 22:10:03 +0000 |
parents | |
children | 02fa7b2245e6 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2706ea308f94 |
---|---|
1 <tool id="whisper" name="Speach to Text" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01" license="MIT"> | |
2 <description> | |
3 Transcribe audio or video files to text using the OpenAI Whisper | |
4 </description> | |
5 <macros> | |
6 <token name="@TOOL_VERSION@">20231117</token> | |
7 <token name="@VERSION_SUFFIX@">0</token> | |
8 </macros> | |
9 <requirements> | |
10 <container type="docker">quay.io/galaxy/whisper:20231117</container> | |
11 </requirements> | |
12 <command detect_errors="exit_code"><![CDATA[ | |
13 mkdir -p ./outs ./models && | |
14 ln -s '$infile' ./input.${infile.ext} && | |
15 whisper ./input.${infile.ext} | |
16 --model $model | |
17 --output_dir ./outs | |
18 --threads \${GALAXY_SLOTS:-2} | |
19 --task transcribe | |
20 --output_format all | |
21 --verbose False | |
22 --model_dir \${OPENAI_WHISPER_MODEL_DIR:-./models} | |
23 #if str($language).strip(): | |
24 --language '$language' | |
25 #end if | |
26 #if $advanced.temperature: | |
27 --temperature '$temperature' | |
28 #end if | |
29 #if $advanced.best_of: | |
30 --best_of '$advanced.best_of' | |
31 #end if | |
32 #if $advanced.beam_size: | |
33 --beam_size '$advanced.beam_size' | |
34 #end if | |
35 #if $advanced.patience: | |
36 --patience '$advanced.patience' | |
37 #end if | |
38 #if $advanced.length_penalty: | |
39 --length_penalty '$advanced.length_penalty' | |
40 #end if | |
41 #if $advanced.suppress_tokens: | |
42 --suppress_tokens '$advanced.suppress_tokens' | |
43 #end if | |
44 #if str($advanced.initial_prompt).strip(): | |
45 --initial_prompt '$advanced.initial_prompt' | |
46 #end if | |
47 #if $advanced.condition_on_previous_text: | |
48 --condition_on_previous_text '$advanced.condition_on_previous_text' | |
49 #end if | |
50 #if $advanced.temperature_increment_on_fallback: | |
51 --temperature_increment_on_fallback '$advanced.temperature_increment_on_fallback' | |
52 #end if | |
53 #if $advanced.compression_ratio_threshold: | |
54 --compression_ratio_threshold '$advanced.compression_ratio_threshold' | |
55 #end if | |
56 #if $advanced.logprob_threshold: | |
57 --logprob_threshold '$advanced.logprob_threshold' | |
58 #end if | |
59 #if $advanced.no_speech_threshold: | |
60 --no_speech_threshold '$advanced.no_speech_threshold' | |
61 #end if | |
62 #if $advanced.word_timestamps.word_timestamps == "True": | |
63 --word_timestamps '$advanced.word_timestamps.word_timestamps' | |
64 #if $advanced.word_timestamps.highlight_words: | |
65 --highlight_words '$advanced.word_timestamps.highlight_words' | |
66 #end if | |
67 #if $advanced.word_timestamps.max_line_width: | |
68 --max_line_width '$advanced.word_timestamps.max_line_width' | |
69 #end if | |
70 #if $advanced.word_timestamps.max_line_count: | |
71 --max_line_count '$advanced.word_timestamps.max_line_count' | |
72 #end if | |
73 #if $advanced.word_timestamps.max_words_per_line: | |
74 --max_words_per_line '$advanced.word_timestamps.max_words_per_line' | |
75 #end if | |
76 #end if | |
77 ]]> | |
78 </command> | |
79 <environment_variables> | |
80 <!-- we will disable the progress bar which is printed to stderr --> | |
81 <environment_variable name="TQDM_DISABLE">1</environment_variable> | |
82 </environment_variables> | |
83 <inputs> | |
84 <param name="infile" type="data" format="wav,mp3,mkv,flv,mpg,ogg,wma,mp4" label="Select audio or video file" /> | |
85 <param argument="--model" type="select" label="Speech to Text Model"> | |
86 <option value="tiny">Tiny (~32x faster than the large model)</option> | |
87 <option value="base">Base (~16x faster than the large model)</option> | |
88 <option value="small" selected="true">Small (~6x faster than the large model)</option> | |
89 <option value="medium">Medium (~2x faster than the large model)</option> | |
90 <option value="large">Large</option> | |
91 </param> | |
92 <param argument="--language" type="select" label="Language"> | |
93 <option value="">Auto (detect language)</option> | |
94 <option value="Afrikaans">Afrikaans</option> | |
95 <option value="Albanian">Albanian</option> | |
96 <option value="Amharic">Amharic</option> | |
97 <option value="Arabic">Arabic</option> | |
98 <option value="Armenian">Armenian</option> | |
99 <option value="Assamese">Assamese</option> | |
100 <option value="Azerbaijani">Azerbaijani</option> | |
101 <option value="Bashkir">Bashkir</option> | |
102 <option value="Basque">Basque</option> | |
103 <option value="Belarusian">Belarusian</option> | |
104 <option value="Bengali">Bengali</option> | |
105 <option value="Bosnian">Bosnian</option> | |
106 <option value="Breton">Breton</option> | |
107 <option value="Bulgarian">Bulgarian</option> | |
108 <option value="Burmese">Burmese</option> | |
109 <option value="Cantonese">Cantonese</option> | |
110 <option value="Castilian">Castilian</option> | |
111 <option value="Catalan">Catalan</option> | |
112 <option value="Chinese">Chinese</option> | |
113 <option value="Croatian">Croatian</option> | |
114 <option value="Czech">Czech</option> | |
115 <option value="Danish">Danish</option> | |
116 <option value="Dutch">Dutch</option> | |
117 <option value="English">English</option> | |
118 <option value="Estonian">Estonian</option> | |
119 <option value="Faroese">Faroese</option> | |
120 <option value="Finnish">Finnish</option> | |
121 <option value="Flemish">Flemish</option> | |
122 <option value="French">French</option> | |
123 <option value="Galician">Galician</option> | |
124 <option value="Georgian">Georgian</option> | |
125 <option value="German">German</option> | |
126 <option value="Greek">Greek</option> | |
127 <option value="Gujarati">Gujarati</option> | |
128 <option value="Haitian">Haitian</option> | |
129 <option value="Haitian Creole">Haitian Creole</option> | |
130 <option value="Hausa">Hausa</option> | |
131 <option value="Hawaiian">Hawaiian</option> | |
132 <option value="Hebrew">Hebrew</option> | |
133 <option value="Hindi">Hindi</option> | |
134 <option value="Hungarian">Hungarian</option> | |
135 <option value="Icelandic">Icelandic</option> | |
136 <option value="Indonesian">Indonesian</option> | |
137 <option value="Italian">Italian</option> | |
138 <option value="Japanese">Japanese</option> | |
139 <option value="Javanese">Javanese</option> | |
140 <option value="Kannada">Kannada</option> | |
141 <option value="Kazakh">Kazakh</option> | |
142 <option value="Khmer">Khmer</option> | |
143 <option value="Korean">Korean</option> | |
144 <option value="Lao">Lao</option> | |
145 <option value="Latin">Latin</option> | |
146 <option value="Latvian">Latvian</option> | |
147 <option value="Letzeburgesch">Letzeburgesch</option> | |
148 <option value="Lingala">Lingala</option> | |
149 <option value="Lithuanian">Lithuanian</option> | |
150 <option value="Luxembourgish">Luxembourgish</option> | |
151 <option value="Macedonian">Macedonian</option> | |
152 <option value="Malagasy">Malagasy</option> | |
153 <option value="Malay">Malay</option> | |
154 <option value="Malayalam">Malayalam</option> | |
155 <option value="Maltese">Maltese</option> | |
156 <option value="Mandarin">Mandarin</option> | |
157 <option value="Maori">Maori</option> | |
158 <option value="Marathi">Marathi</option> | |
159 <option value="Moldavian">Moldavian</option> | |
160 <option value="Moldovan">Moldovan</option> | |
161 <option value="Mongolian">Mongolian</option> | |
162 <option value="Myanmar">Myanmar</option> | |
163 <option value="Nepali">Nepali</option> | |
164 <option value="Norwegian">Norwegian</option> | |
165 <option value="Nynorsk">Nynorsk</option> | |
166 <option value="Occitan">Occitan</option> | |
167 <option value="Panjabi">Panjabi</option> | |
168 <option value="Pashto">Pashto</option> | |
169 <option value="Persian">Persian</option> | |
170 <option value="Polish">Polish</option> | |
171 <option value="Portuguese">Portuguese</option> | |
172 <option value="Punjabi">Punjabi</option> | |
173 <option value="Pushto">Pushto</option> | |
174 <option value="Romanian">Romanian</option> | |
175 <option value="Russian">Russian</option> | |
176 <option value="Sanskrit">Sanskrit</option> | |
177 <option value="Serbian">Serbian</option> | |
178 <option value="Shona">Shona</option> | |
179 <option value="Sindhi">Sindhi</option> | |
180 <option value="Sinhala">Sinhala</option> | |
181 <option value="Sinhalese">Sinhalese</option> | |
182 <option value="Slovak">Slovak</option> | |
183 <option value="Slovenian">Slovenian</option> | |
184 <option value="Somali">Somali</option> | |
185 <option value="Spanish">Spanish</option> | |
186 <option value="Sundanese">Sundanese</option> | |
187 <option value="Swahili">Swahili</option> | |
188 <option value="Swedish">Swedish</option> | |
189 <option value="Tagalog">Tagalog</option> | |
190 <option value="Tajik">Tajik</option> | |
191 <option value="Tamil">Tamil</option> | |
192 <option value="Tatar">Tatar</option> | |
193 <option value="Telugu">Telugu</option> | |
194 <option value="Thai">Thai</option> | |
195 <option value="Tibetan">Tibetan</option> | |
196 <option value="Turkish">Turkish</option> | |
197 <option value="Turkmen">Turkmen</option> | |
198 <option value="Ukrainian">Ukrainian</option> | |
199 <option value="Urdu">Urdu</option> | |
200 <option value="Uzbek">Uzbek</option> | |
201 <option value="Valencian">Valencian</option> | |
202 <option value="Vietnamese">Vietnamese</option> | |
203 <option value="Welsh">Welsh</option> | |
204 <option value="Yiddish">Yiddish</option> | |
205 <option value="Yoruba">Yoruba</option> | |
206 </param> | |
207 <param argument="--output_format" type="select" label="Output Format" multiple="true"> | |
208 <option value="txt" selected="true">Text</option> | |
209 <option value="json">JSON</option> | |
210 <option value="srt">SubRip</option> | |
211 <option value="vtt">WebVTT</option> | |
212 <option value="tsv">Tab-separated values</option> | |
213 </param> | |
214 <section name="advanced" title="Advanced Options"> | |
215 <param argument="--temperature" type="integer" value="0" optional="true" label="Temperature" help="Temperature to use for sampling" /> | |
216 <param argument="--best_of" type="integer" value="5" optional="true" label="Best of" help="Number of candidates when sampling with non-zero temperature" /> | |
217 <param argument="--beam_size" type="integer" value="5" optional="true" label="Beam size" help="Number of beams in beam search, only applicable when temperature is zero" /> | |
218 <param argument="--patience" type="float" value="" optional="true" label="Optional patience value to use in beam decoding" help="As in https://arxiv.org/abs/2204.05424, the default (1.0) is equivalent to conventional beam search" /> | |
219 <param argument="--length_penalty" type="float" value="" optional="true" label="Optional token length penalty coefficient (alpha)" help="As in https://arxiv.org/abs/1609.08144, uses simple length normalization by default" /> | |
220 <param argument="--suppress_tokens" type="integer" value="-1" optional="true" label="Suppress tokens" help="Comma-separated list of token ids to suppress during sampling; -1 will suppress most special characters except common punctuations" /> | |
221 <param argument="--initial_prompt" type="text" value="" optional="true" label="Initial prompt" help="Optional text to provide as a prompt for the first window" /> | |
222 <param argument="--condition_on_previous_text" type="boolean" truevalue="True" falsevalue="False" checked="true" optional="true" label="Condition on previous text" help="If True, provide the previous output of the model as a prompt for the next window; disabling may make the text inconsistent across windows, but the model becomes less prone to getting stuck in a failure loop" /> | |
223 <param argument="--temperature_increment_on_fallback" type="float" value="0.2" optional="true" label="Temperature increment on fallback" help="Temperature to increase when falling back when the decoding fails to meet either of the thresholds below" /> | |
224 <param argument="--compression_ratio_threshold" type="float" value="2.4" optional="true" label="Compression ratio threshold" help="If the gzip compression ratio is higher than this value, treat the decoding as failed" /> | |
225 <param argument="--logprob_threshold" type="float" value="-1.0" optional="true" label="Logprob threshold" help="If the average log probability is lower than this value, treat the decoding as failed" /> | |
226 <param argument="--no_speech_threshold" type="float" value="0.6" optional="true" label="No speech threshold" help="If the probability of the |nospeech| token is higher than this value AND the decoding has failed due to `logprob_threshold`, consider the segment as silence" /> | |
227 <conditional name="word_timestamps"> | |
228 <param argument="--word_timestamps" type="select" label="Extract word timestamps?" help="(experimental) Extract word-level timestamps and refine the results based on them"> | |
229 <option value="False">False</option> | |
230 <option value="True">True</option> | |
231 </param> | |
232 <when value="True"> | |
233 <param argument="--highlight_words" type="boolean" truevalue="True" falsevalue="False" value="False" optional="true" label="Highlight words" help="Underline each word as it is spoken in srt and vtt" /> | |
234 <param argument="--max_line_width" type="integer" value="" optional="true" label="Max line width" help="The maximum number of characters in a line before breaking the line" /> | |
235 <param argument="--max_line_count" type="integer" value="" optional="true" label="Max line count" help="The maximum number of lines in a segment" /> | |
236 <param argument="--max_words_per_line" type="integer" value="" optional="true" label="Max words per line" help="No effect with --max_line_width. the maximum number of words in a segment" /> | |
237 </when> | |
238 <when value="False"> | |
239 </when> | |
240 </conditional> | |
241 </section> | |
242 </inputs> | |
243 <outputs> | |
244 <data name="output_txt" format="txt" from_work_dir="./outs/input.txt" label="${tool.name} on ${on_string}.txt"> | |
245 <filter>'txt' in output_format</filter> | |
246 </data> | |
247 <data name="output_json" format="json" from_work_dir="./outs/input.json" label="${tool.name} on ${on_string}.json"> | |
248 <filter>'json' in output_format</filter> | |
249 </data> | |
250 <data name="output_srt" format="txt" from_work_dir="./outs/input.srt" label="${tool.name} on ${on_string}.srt"> | |
251 <filter>'srt' in output_format</filter> | |
252 </data> | |
253 <data name="output_vtt" format="txt" from_work_dir="./outs/input.vtt" label="${tool.name} on ${on_string}.vtt"> | |
254 <filter>'vtt' in output_format</filter> | |
255 </data> | |
256 <data name="output_tsv" format="tabular" from_work_dir="./outs/input.tsv" label="${tool.name} on ${on_string}.tsv"> | |
257 <filter>'tsv' in output_format</filter> | |
258 </data> | |
259 </outputs> | |
260 <tests> | |
261 <test expect_num_outputs="1"> | |
262 <param name="infile" value="english.wav" ftype="wav"/> | |
263 <param name="model" value="tiny"/> | |
264 <param name="language" value="English"/> | |
265 <param name="output_format" value="txt"/> | |
266 <output name="output_txt" file="transcribe.txt"/> | |
267 </test> | |
268 <test expect_num_outputs="3"> | |
269 <param name="infile" value="english.wav" ftype="wav"/> | |
270 <param name="model" value="tiny"/> | |
271 <param name="language" value="English"/> | |
272 <param name="output_format" value="srt,tsv,json"/> | |
273 <output name="output_srt" file="transcribe_english.srt"/> | |
274 <output name="output_tsv" file="transcribe_english.tsv"/> | |
275 <output name="output_json"> | |
276 <assert_contents> | |
277 <has_text text="21-year-old Jesus joined Manchester City last year in January 2017 from"/> | |
278 <has_text text="temperature"/> | |
279 <has_text text="no_speech_prob"/> | |
280 <has_text text="English"/> | |
281 <has_n_lines n="1"/> | |
282 </assert_contents> | |
283 </output> | |
284 </test> | |
285 <test expect_num_outputs="1"> | |
286 <param name="infile" value="german.wav" ftype="wav"/> | |
287 <param name="model" value="small"/> | |
288 <param name="language" value="German"/> | |
289 <param name="output_format" value="txt"/> | |
290 <output name="output_txt" file="transcribe_german.txt"/> | |
291 </test> | |
292 <test expect_num_outputs="1"> | |
293 <param name="infile" value="german_english.mp3" ftype="mp3"/> | |
294 <param name="model" value="medium"/> | |
295 <param name="output_format" value="txt"/> | |
296 <output name="output_txt" file="transcribe_german_english.txt"/> | |
297 </test> | |
298 <test expect_num_outputs="2"> | |
299 <param name="infile" value="persian.wav" ftype="wav"/> | |
300 <param name="model" value="medium"/> | |
301 <param name="language" value="Persian"/> | |
302 <param name="output_format" value="srt,json"/> | |
303 <section name="advanced"> | |
304 <param name="condition_on_previous_text" value="False"/> | |
305 <conditional name="word_timestamps"> | |
306 <param name="word_timestamps" value="True"/> | |
307 <param name="max_words_per_line" value="5"/> | |
308 </conditional> | |
309 </section> | |
310 <output name="output_srt"> | |
311 <assert_contents> | |
312 <has_n_lines n="32"/> | |
313 </assert_contents> | |
314 </output> | |
315 <output name="output_json"> | |
316 <assert_contents> | |
317 <has_text text="\u0628\u0631\u062e\u06cc \u0627\u0632 \u0627\u06cc\u0646"/> | |
318 <has_text text="temperature"/> | |
319 <has_text text="no_speech_prob"/> | |
320 <has_text text="Persian"/> | |
321 <has_n_lines n="1"/> | |
322 </assert_contents> | |
323 </output> | |
324 </test> | |
325 </tests> | |
326 <help><![CDATA[ | |
327 | |
328 .. class:: infomark | |
329 | |
330 **What it does** | |
331 | |
332 Transcribe audio or video files to text using the `Whisper from OpenAI <https://github.com/openai/whisper>`_. | |
333 | |
334 Usage | |
335 ..... | |
336 | |
337 | |
338 **Input** | |
339 Audio or video file to transcribe in one of wav, mp3, mkv, flv, mpg, ogg, wma, or mp4. | |
340 | |
341 | |
342 **Output** | |
343 Transcribed text in the selected format. The output can be in text, JSON, SubRip, WebVTT, or tab-separated values (tabular) format. | |
344 ]]></help> | |
345 <creator> | |
346 <person givenName="Alireza" familyName="Heidari" url="http://github.com/itisalirh"/> | |
347 </creator> | |
348 <citations> | |
349 <citation type="bibtex"> | |
350 @misc{openai2022whisper, | |
351 title={Whisper}, | |
352 author={OpenAI}, | |
353 year={2022}, | |
354 url={https://github.com/openai/whisper} | |
355 } | |
356 </citation> | |
357 </citations> | |
358 </tool> |