comparison rbpbench.xml @ 0:7dd2835ce566 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/rbpbench commit 0e21bd630200c1f199db8ba5d83b81d4214fc59f
author rnateam
date Sun, 03 Dec 2023 12:51:54 +0000
parents
children 26c64157456b
comparison
equal deleted inserted replaced
-1:000000000000 0:7dd2835ce566
1 <tool id="rbpbench" name="RBPBench" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@profile@">
2
3 <description>- Evaluate CLIP-seq and other genomic region data using a comprehensive collection of RBP binding motifs</description>
4 <macros>
5 <import>macros.xml</import>
6 </macros>
7 <expand macro="bio_tools"/>
8 <expand macro="requirements"/>
9
10 <command detect_errors="exit_code"><![CDATA[
11 #if $action_type.action_type_selector == 'search_motifs':
12 @PREPARE_REF@
13 rbpbench search
14 --in '$action_type.search_bed_file'
15 --out ./
16 --genome reference.fa
17 --method-id '$action_type.search_method_id'
18 --data-id '$action_type.search_data_id'
19 --rbps
20 #if str($action_type.select_db.select_db_selector) == "default_db":
21 #if str($action_type.select_db.select_rbps.select_rbps_selector) == "list_db_rbps":
22 #if str($action_type.select_db.select_rbps.database) != "None":
23 #for $rbp_id in str($action_type.select_db.select_rbps.database).split(','):
24 $rbp_id
25 #end for
26 #end if
27 #else:
28 ALL
29 #end if
30 #else:
31 $action_type.select_db.custom_db_rbp_ids_list
32 #end if
33
34 #if str($action_type.user_rbp.user_rbp_selector) == "sequence":
35 USER
36 --user-rbp-id $action_type.user_rbp.seq_rbp_id
37 --user-meme-xml '$action_type.user_rbp.meme_motifs_xml'
38 #elif str($action_type.user_rbp.user_rbp_selector) == "structure":
39 USER
40 --user-rbp-id $action_type.user_rbp.str_rbp_id
41 --user-cm '$action_type.user_rbp.cm_model_file'
42 #end if
43
44 #if str($action_type.select_db.select_db_selector) == "custom_db":
45 --custom-db-id $action_type.select_db.custom_db_id
46 --custom-db-info $action_type.select_db.custom_db_info_file
47 #if $action_type.select_db.custom_db_meme_xml_file:
48 --custom-db-meme-xml '$action_type.select_db.custom_db_meme_xml_file'
49 #end if
50 #if $action_type.select_db.custom_db_cm_model_file:
51 --custom-db-cm '$action_type.select_db.custom_db_cm_model_file'
52 #end if
53 #end if
54 @COMMON_PARAMS@
55 #if $action_type.report_plotting_options.set_rbp_id:
56 --set-rbp-id '$action_type.report_plotting_options.set_rbp_id'
57 #end if
58 --motif-distance-plot-range $action_type.report_plotting_options.motif_distance_plot_range
59 --motif-min-pair-count $action_type.report_plotting_options.motif_min_pair_count
60 --rbp-min-pair-count $action_type.report_plotting_options.rbp_min_pair_count
61 #if $action_type.report_plotting_options.gtf_file:
62 --gtf '$action_type.report_plotting_options.gtf_file'
63 #end if
64 #if $action_type.report_plotting_options.tr_list_file:
65 --tr-list '$action_type.report_plotting_options.tr_list_file'
66 #end if
67 #if $action_type.report_plotting_options.list_tr_biotypes:
68 --tr-types '$action_type.report_plotting_options.list_tr_biotypes'
69 #end if
70 --upset-plot-min-degree $action_type.report_plotting_options.upset_plot_min_degree
71 #if $action_type.report_plotting_options.upset_plot_max_degree:
72 --upset-plot-max-degree '$action_type.report_plotting_options.upset_plot_max_degree'
73 #end if
74 --upset-plot-min-subset-size $action_type.report_plotting_options.upset_plot_min_subset_size
75
76 $action_type.search_output_options.search_report
77 $action_type.search_output_options.search_plot_motifs
78
79 #if $action_type.report_plotting_options.store_sort_js_in_html:
80 --sort-js-mode 3
81 #else:
82 --sort-js-mode 2
83 #end if
84 #if $action_type.report_plotting_options.plotly_js_source == 1:
85 #if $action_type.report_plotting_options.plotly_plot_code_in_html:
86 --plotly-js-mode 6
87 #else:
88 --plotly-js-mode 3
89 #end if
90 #elif $action_type.report_plotting_options.plotly_js_source == 2:
91 #if $action_type.report_plotting_options.plotly_plot_code_in_html:
92 --plotly-js-mode 5
93 #else:
94 --plotly-js-mode 1
95 #end if
96 #else:
97 #if $action_type.report_plotting_options.plotly_plot_code_in_html:
98 --plotly-js-mode 7
99 #else:
100 --plotly-js-mode 4
101 #end if
102 #end if
103
104 #if $action_type.search_output_options.search_report:
105 &&
106 sed -i 's/html_report_plots\///g' ./report.rbpbench_search.html
107 &&
108 cp ./report.rbpbench_search.html $search_report_html_file
109 &&
110 mkdir '$search_report_html_file.extra_files_path'
111 &&
112 cp -r ./html_report_plots/* '$search_report_html_file.extra_files_path'
113 #end if
114
115 #if $action_type.search_output_options.search_plot_motifs:
116 &&
117 sed -i 's/html_motif_plots\///g' ./motif_plots.rbpbench_search.html
118 &&
119 cp ./motif_plots.rbpbench_search.html $motif_plots_html_file
120 &&
121 mkdir '$motif_plots_html_file.extra_files_path'
122 &&
123 cp -r ./html_motif_plots/* '$motif_plots_html_file.extra_files_path'
124 #end if
125
126
127 #elif $action_type.action_type_selector == 'batch_search_motifs':
128 @PREPARE_REF@
129 rbpbench batch
130 --out ./
131 --genome reference.fa
132 --bed
133 #for $i in $action_type.dataset_inputs:
134 $i.dataset_bed_file
135 #end for
136 --rbp-list
137 #for $i in $action_type.dataset_inputs:
138 $i.dataset_rbp_id
139 #end for
140 --data-list
141 #for $i in $action_type.dataset_inputs:
142 $i.dataset_data_id
143 #end for
144 --method-list
145 #for $i in $action_type.dataset_inputs:
146 $i.dataset_method_id
147 #end for
148 @COMMON_PARAMS@
149
150 #elif $action_type.action_type_selector == 'batch_table_search_motifs':
151 @PREPARE_REF@
152 batch_table_wrapper_rbpbench.py
153 --out ./
154 --genome reference.fa
155 --table '$action_type.batch_table_file'
156 --paths
157 #for $i in $action_type.batch_table_bed_collection:
158 $i
159 #end for
160 --ids
161 #for $i in $action_type.batch_table_bed_collection:
162 $i.element_identifier
163 #end for
164 @COMMON_PARAMS@
165
166 #elif $action_type.action_type_selector == 'plot_nt_dist':
167 @PREPARE_REF@
168 rbpbench dist
169 --in '$action_type.dist_bed_file'
170 --out ./
171 --genome reference.fa
172 --cp-mode $action_type.dist_options.dist_cp_mode
173 --ext $action_type.dist_options.dist_ext
174 $action_type.dist_options.dist_plot_pdf
175
176
177 #elif $action_type.action_type_selector == 'compare_search_results':
178 rbpbench compare
179 --in
180 #for $in_file in $action_type.input_tables:
181 $in_file
182 #end for
183 --out ./
184 #if $action_type.compare_output_options.store_sort_js_in_html:
185 --sort-js-mode 3
186 #else:
187 --sort-js-mode 2
188 #end if
189
190 &&
191 sed -i 's/html_report_plots\///g' ./report.rbpbench_compare.html
192 &&
193 cp ./report.rbpbench_compare.html $compare_report_html_file
194 &&
195 mkdir '$compare_report_html_file.extra_files_path'
196 &&
197 cp -r ./html_report_plots/* '$compare_report_html_file.extra_files_path'
198
199 #end if
200
201 ]]></command>
202
203 <inputs>
204
205 <conditional name="action_type">
206
207 <param name="action_type_selector" type="select" label="Select RBPBench program mode">
208 <option value="search_motifs" selected="true">Search RBP binding motifs in genomic regions</option>
209 <option value="batch_search_motifs">Search RBP binding motifs in genomic regions (multiple inputs)</option>
210 <option value="batch_table_search_motifs">Search RBP binding motifs in genomic regions (data collection input)</option>
211 <option value="plot_nt_dist">Plot nucleotide distribution at genomic positions</option>
212 <option value="compare_search_results">Compare different search results</option>
213 </param>
214
215 <!-- rbpbench search -->
216 <when value="search_motifs">
217 <param name="search_bed_file" type="data" format="bed" label="Genomic regions BED file"
218 help="Genomic regions (e.g. RBP binding sites) in BED format (>= 6-columns) for RBP binding motif search"/>
219 <conditional name="reference_genome">
220 <param label="Select reference genome sequence (according to provided BED file)" name="reference_genome_selector" type="select">
221 <option selected="True" value="builtin">Select built-in genome sequence</option>
222 <option value="history">Select genome sequence from history</option>
223 </param>
224 <when value="builtin">
225 <param label="Select built-in genome sequence" name="builtin_genome" type="select">
226 <options from_data_table="fasta_indexes">
227 <filter column="2" type="sort_by" />
228 <validator message="No genomes are available for the selected input dataset" type="no_options" />
229 </options>
230 </param>
231 </when>
232 <when value="history">
233 <param format="fasta" type="data" name="history_genome" label="Select genome sequence in FASTA format from history"/>
234 </when>
235 </conditional>
236 <conditional name="select_db">
237 <param name="select_db_selector" type="select" label="Select RBP binding motif database">
238 <option selected="True" value="default_db" >Human motif database with 259 RBPs and 605 RBP binding motifs (catrapid.omics.v2.1.human.6plus)</option>
239 <option value="custom_db" >Provide a custom motif database</option>
240 </param>
241 <when value="default_db">
242
243 <conditional name="select_rbps">
244 <param name="select_rbps_selector" type="select" label="Select RBPs for motif search">
245 <option selected="True" value="list_db_rbps" >Select individual RBPs </option>
246 <option value="all_db_rbps" >Select all 259 RBPs</option>
247 </param>
248 <when value="list_db_rbps">
249 <param name="database" label="Select RBPs" type="select" multiple="true">
250 <options from_data_table='rbp_ids_table' />
251 </param>
252 </when>
253 <when value="all_db_rbps">
254 <!-- do nothing -->
255 </when>
256 </conditional>
257 </when>
258 <when value="custom_db">
259 <param type="text" name="custom_db_id" value="custom_db_id"
260 label="Custom motif database ID"
261 help="Set ID / name for provided custom motif database (default: custom_db_id)"/>
262
263 <param type="data" format="txt,tabular" name="custom_db_info_file"
264 label="Custom motif database info table file"
265 help="Provide custom motif database info table file containing RBP ID -> motif ID -> motif type assignments. The motif IDs must correspond to the provided custom MEME / DREME XML and/or covariance models file(s)."/>
266
267 <param type="data" format="memexml" name="custom_db_meme_xml_file" optional="True"
268 label="Custom motif database MEME / DREME XML file"
269 help="Provide custom motif database MEME / DREME XML file containing sequence motifs"/>
270
271 <param type="data" format="cm" name="custom_db_cm_model_file" optional="True"
272 label="Custom motif database covariance models file"
273 help="Provide custom motif database covariance models file containing structure motif(s) (i.e., covariance model(s))"/>
274
275 <param name="custom_db_rbp_ids_list" type="text" value="RBP1 RBP2 RBP3"
276 label="Specify RBP IDs from custom motif database"
277 help="Provide list of RBP IDs found in custom motif database to use for motif search. This can be a single ID, as well as several (separated by spaces, as in the example). To search using all RBPs in the custom database, simply type ALL."/>
278 </when>
279 </conditional>
280
281 <conditional name="user_rbp">
282 <param label="Add user-supplied motifs" name="user_rbp_selector" type="select">
283 <option selected="True" value="no">No</option>
284 <option value="sequence">Yes (MEME/DREME XML based sequence motif(s))</option>
285 <option value="structure">Yes (Covariance model based structure motif(s))</option>
286 </param>
287 <when value="sequence">
288 <param format="memexml" type="data" name="meme_motifs_xml"
289 label="DREME or MEME motifs XML file"
290 help="DREME or MEME output XML file containing sequence motif(s)"/>
291 <param type="text" name="seq_rbp_id" value="User_RBP"
292 label="RBP ID"
293 help="RBP ID (RBP name) for provided sequence motif(s). Make sure to provide user RBP IDs and motif IDs that are different from selected database RBP and motif IDs."/>
294 </when>
295 <when value="structure">
296 <param format="cm" type="data" name="cm_model_file"
297 label="Covariance models file"
298 help="Covariance models file containing structure motif(s)"/>
299 <param type="text" name="str_rbp_id" value="User_RBP"
300 label="RBP ID"
301 help="RBP ID (RBP name) for provided structure motif(s). Make sure to provide user RBP IDs and motif IDs that are different from selected database RBP and motif IDs."/>
302 </when>
303 <when value="no">
304 <!-- do nothing -->
305 </when>
306 </conditional>
307
308 <param type="text" name="search_method_id" value="method_id"
309 label="Method ID"
310 help="Method ID which can be used to describe the peak calling method (e.g. clipper_idr). This ID (together with data ID and set RBP ID(s)) defines which search results get compared in RBPBench's comparison mode (see Help below for more details)."/>
311 <param type="text" name="search_data_id" value="data_id"
312 label="Data ID"
313 help="Data ID which can be used to describe from which cell type and/or CLIP-seq protocol the data originates (e.g. k562_eclip or pum2_k562_eclip). This ID (together with method ID and set RBP ID(s)) defines which search results get compared in RBPBench's comparison mode (see Help below for more details)."/>
314
315 <section name="search_options" title="Motif search settings">
316 <param name="search_ext" type="text" value="0"
317 label="Up- and downstream extension of genomic regions"
318 help='Up- and downstream extension of genomic regions in nucleotides (nt). E.g. set to "30" to extend 30 nt on both sides, or "20,10" for different up- and downstream extension (default: 0)'/>
319 <param name="search_fimo_pval" type="float" value="0.001"
320 label="FIMO p-value threshold"
321 help='FIMO p-value threshold (FIMO option: --thresh) for reporting motif hits (default: 0.001)'/>
322 <param name="search_bed_score_col" type="integer" value="5"
323 label="BED score column used for p-value calculations"
324 help="Score column of genomic regions BED file used for p-value calculations. BED score can be e.g. log2 fold change or -log10 p-value of the region (default: 5)"/>
325 <param name="search_unstranded" label="Treat genomic regions in BED file as NOT strand-specific" type="boolean"
326 truevalue="--unstranded" falsevalue="" checked="False"
327 help="Set if genomic regions in BED file are NOT strand-specific, i.e., to look for motifs on both strands of the provided regions. Note that the two strands of a region will still be counted as one region (change with option below) (default: False)"/>
328 <param name="search_unstranded_ct" label="Count each genomic region twice for RBP hit statistics" type="boolean"
329 truevalue="--unstranded-ct" falsevalue="" checked="False"
330 help="Count each genomic region twice for RBP hit statistics when non-strand-specific option above is enabled (default: False)"/>
331 <param format="txt" type="data" name="fimo_nt_freqs_file" optional="True"
332 label="Provide FIMO nucleotide frequencies file"
333 help="Provide FIMO nucleotide frequencies (FIMO option: --bifile) file. By default, an internal frequencies file optimized for human transcripts is used"/>
334 </section>
335
336 <section name="report_plotting_options" title="HTML report options">
337
338 <param format="gtf" type="data" name="gtf_file" optional="True"
339 label="GTF file to add genomic annotations to input regions"
340 help="Provide GTF file with genomic annotations to add to HTML report plots (e.g. from GENCODE or Ensembl). By default, the most prominent transcripts will be extracted and used for functional annotation. Alternatively, provide a list of expressed transcripts via --tr-list option (together with --gtf containing the transcripts). Note that currently only features on standard chromosomes (1,2,..,X,Y,MT) are used for annotation"/>
341 <param format="txt" type="data" name="tr_list_file" optional="True"
342 label="Transcript IDs file"
343 help="Supply file with transcript IDs (one ID per row) to define which transcripts to use from GTF file for adding functional annotations to HTML report plots"/>
344 <param name="list_tr_biotypes" type="text" optional="True"
345 label="List of transcript biotypes"
346 help="List of transcript biotypes to consider from GTF file. By default an internal selection of transcript biotypes is used (in addition to intron, CDS, UTR, intergenic). Provide a list of IDs separated by spaces. Note that provided biotype IDs need to be in GTF file!"/>
347
348 <param name="upset_plot_min_degree" type="integer" value="2"
349 label="Upset plot minimum degree parameter"
350 help="Upset plot minimum degree parameter for HTML report upset plot. This defines the minimum number of RBPs for a combination to be included (default: 2)"/>
351 <param name="upset_plot_max_degree" type="integer" value="" optional="True"
352 label="Upset plot maximum degree parameter"
353 help="Upset plot maximum degree parameter for HTML report upset plot. By default no maximum degree is set. Useful together with minimum degree to look at specific degrees (e.g. only 2, or between 2 and 3) (default: None)"/>
354 <param name="upset_plot_min_subset_size" type="integer" value="5"
355 label="Upset plot minimum subset size parameter"
356 help="Upset plot minimum subset size parameter for HTML report upset plot. This defines the minimum number of hits for a specific RBP combination to be included (default: 5)"/>
357
358 <param type="text" name="set_rbp_id" optional="True"
359 label="Set reference RBP ID for plotting motif distances"
360 help="Set reference RBP ID to plot motif distances relative to motifs from this RBP (--set-rbp-id). Motif plot will be centered on best scoring motif of the RBP for each region. Note that set RBP ID needs to be one of the above selected RBP IDs!"/>
361 <param name="motif_distance_plot_range" type="integer" value="60"
362 label="BED score column used for p-value calculations"
363 help="Set range of motif distance plot. I.e., centered on the set RBP (--set-rbp-id) motifs, motifs within minus and plus --motif-distance-plot-range will be plotted (default: 60)"/>
364 <param name="motif_min_pair_count" type="integer" value="10"
365 label="Motif co-occurrence minimum pair count"
366 help="Minimum count of co-occurrences of a motif with set RBP ID (--set-rbp-id) motif to be reported and plotted (default: 10)"/>
367 <param name="rbp_min_pair_count" type="integer" value="10"
368 label="RBP co-occurrence minimum pair count"
369 help="Minimum amount of co-occurrences of motifs for an RBP ID compared to set RBP ID (--set-rbp-id) motifs to be reported and plotted (default: 10)"/>
370
371 <param name="store_sort_js_in_html" label="Store JS code for table sorting inside HTML?"
372 type="boolean" checked="False"
373 help="Store JavaScript code for table sorting inside output HTML files. By default code is stored locally in extra file located in HTML output folder."/>
374 <param name="plotly_js_source" type="integer" value="1" min="1" max="3"
375 label="Specify plotly JS code source"
376 help="1: Store plotly JavaScript code locally inside HTML output folder. 2: Add hyperlink to report HTML file (internet connection required). 3: Store code inside report HTML file (default: 1)"/>
377 <param name="plotly_plot_code_in_html" label="Store plotly plotting code inside HTML?"
378 type="boolean" checked="False"
379 help="Store plotly plotting code inside HTML. By default code is stored in separate HTML files in HTML report output folder."/>
380
381 </section>
382
383 <section name="search_output_options" title="Output options">
384 <param name="search_report" label="Output HTML report?" type="boolean"
385 truevalue="--report" falsevalue="" checked="True"
386 help="Generate an HTML report containing RBP co-occurrence + combination + distance statistics and plots (default: True)"/>
387 <param name="search_plot_motifs" label="Plot RBP motifs?" type="boolean"
388 truevalue="--plot-motifs" falsevalue="" checked="False"
389 help="Visualize selected RBP motifs, by outputting sequence logos and motif hit statistics into a separate HTML file (default: False)"/>
390 <param name="sites_bed_fasta_out" label="Output filtered genomic regions BED + FASTA files" type="boolean"
391 checked="False"
392 help="Output filtered genomic regions BED/FASTA file used for motif search. Filtered means that the actual regions used for motif search can differ from the input genomic regions, e.g. through default filtering by chromsome ID (only regions with valid IDs), removal of duplicated regions, or through optional extension of the regions"/>
393 <param name="motif_hits_bed_out" label="Output motif hits BED file" type="boolean"
394 checked="False"
395 help="Output motif hits BED file containing motif hits in provided genomic regions for selected RBPs"/>
396 <param name="contingency_table_out" label="Output contingency table containing co-occurrence p-values" type="boolean"
397 checked="False"
398 help="Output contingency table containing co-occurrence p-values (Fisher's exact test) between each RBP pair (see manual for more information)"/>
399 <param name="region_annotations_out" label="Output genomic region annotations table file?"
400 type="boolean" checked="False"
401 help="Output genomic region annotations table file containing assigned annotations for each BED input region. Note that a GTF file has to be provided (default: False)"/>
402 </section>
403 </when>
404
405 <!-- rbpbench batch -->
406 <when value="batch_search_motifs">
407 <repeat name="dataset_inputs" min="1" title="Dataset">
408 <param name="dataset_bed_file" type="data" format="bed"
409 label="Genomic regions BED file"
410 help="Genomic regions (e.g. RBP binding sites) in BED format (>= 6-columns) for RBP binding motif search"/>
411 <param name="dataset_rbp_id" label="Select RBP for motif search" type="select">
412 <options from_data_table='rbp_ids_table' />
413 </param>
414 <param type="text" name="dataset_method_id" value="method_id"
415 label="Method ID"
416 help="Method ID which can be used to describe the peak calling method (e.g. clipper_idr). This ID (together with data ID and set RBP ID) defines which search results get compared in RBPBench's comparison mode (see Help below for more details)."/>
417 <param type="text" name="dataset_data_id" value="data_id"
418 label="Data ID"
419 help="Data ID which can be used to describe from which cell type and/or CLIP-seq protocol the data originates (e.g. k562_eclip or pum2_k562_eclip). This ID (together with method ID set RBP ID) defines which search results get compared in RBPBench's comparison mode (see Help below for more details)."/>
420 </repeat>
421
422 <conditional name="reference_genome">
423 <param label="Select reference genome sequence (according to the provided BED files)" name="reference_genome_selector" type="select">
424 <option selected="True" value="builtin">Select built-in genome sequence</option>
425 <option value="history">Select genome sequence from history</option>
426 </param>
427 <when value="builtin">
428 <param label="Select built-in genome sequence" name="builtin_genome" type="select">
429 <options from_data_table="fasta_indexes">
430 <filter column="2" type="sort_by" />
431 <validator message="No genomes are available for the selected input dataset" type="no_options" />
432 </options>
433 </param>
434 </when>
435 <when value="history">
436 <param format="fasta" type="data" name="history_genome" label="Select genome sequence in FASTA format from history"/>
437 </when>
438 </conditional>
439
440 <section name="search_options" title="Motif search settings">
441 <param name="search_ext" type="text" value="0"
442 label="Up- and downstream extension of genomic regions"
443 help='Up- and downstream extension of genomic regions in nucleotides (nt). E.g. set to "30" to extend 30 nt on both sides, or "20,10" for different up- and downstream extension (default: 0)'/>
444 <param name="search_fimo_pval" type="float" value="0.001"
445 label="FIMO p-value threshold"
446 help='FIMO p-value threshold (FIMO option: --thresh) for reporting motif hits (default: 0.001)'/>
447 <param name="search_bed_score_col" type="integer" value="5"
448 label="BED score column used for p-value calculations"
449 help="Score column of genomic regions BED files used for p-value calculations. BED score can be e.g. log2 fold change or -log10 p-value of the region (default: 5)"/>
450 <param name="search_unstranded" label="Treat genomic regions in BED file as NOT strand-specific" type="boolean"
451 truevalue="--unstranded" falsevalue="" checked="False"
452 help="Set if genomic regions in BED files are NOT strand-specific, i.e., to look for motifs on both strands of the provided regions. Note that the two strands of a region will still be counted as one region (change with option below) (default: False)"/>
453 <param name="search_unstranded_ct" label="Count each genomic region twice for RBP hit statistics" type="boolean"
454 truevalue="--unstranded-ct" falsevalue="" checked="False"
455 help="Count each genomic region twice for RBP hit statistics when non-strand-specific option above is enabled (default: False)"/>
456 <param format="txt" type="data" name="fimo_nt_freqs_file" optional="True"
457 label="Provide FIMO nucleotide frequencies file"
458 help="Provide FIMO nucleotide frequencies (FIMO option: --bifile) file. By default, an internal frequencies file optimized for human transcripts is used"/>
459 </section>
460
461 <section name="search_output_options" title="Output options">
462 <param name="batch_motif_hits_bed_out" label="Output motif hits BED file" type="boolean"
463 checked="False"
464 help="Output motif hits BED file containing motif hits for all input datasets"/>
465 </section>
466
467 </when>
468
469 <!-- rbpbench batch data collection + table -->
470 <when value="batch_table_search_motifs">
471
472 <param name="batch_table_bed_collection" type="data_collection" collection_type="list" format="bed"
473 label="Data collection containing genomic regions BED files"
474 help="Data collection containing genomic regions BED files to be processed. Note that dataset names inside collection must correspond to names given in the batch processing table file below"/>
475
476 <param name="batch_table_file" type="data" format="txt,tabular"
477 label="Provide batch processing table file"
478 help="Provide batch processing table file with one row for each batch job. Each row contains the tab-delimited information: RBP ID (RBP name), method ID, data ID, dataset name. The dataset name must be present in the supplied data collection of BED files"/>
479
480 <conditional name="reference_genome">
481 <param label="Select reference genome sequence (according to the provided BED files)" name="reference_genome_selector" type="select">
482 <option selected="True" value="builtin">Select built-in genome sequence</option>
483 <option value="history">Select genome sequence from history</option>
484 </param>
485 <when value="builtin">
486 <param label="Select built-in genome sequence" name="builtin_genome" type="select">
487 <options from_data_table="fasta_indexes">
488 <filter column="2" type="sort_by" />
489 <validator message="No genomes are available for the selected input dataset" type="no_options" />
490 </options>
491 </param>
492 </when>
493 <when value="history">
494 <param format="fasta" type="data" name="history_genome" label="Select genome sequence in FASTA format from history"/>
495 </when>
496 </conditional>
497
498 <section name="search_options" title="Motif search settings">
499 <param name="search_ext" type="text" value="0"
500 label="Up- and downstream extension of genomic regions"
501 help='Up- and downstream extension of genomic regions in nucleotides (nt). E.g. set to "30" to extend 30 nt on both sides, or "20,10" for different up- and downstream extension (default: 0)'/>
502 <param name="search_fimo_pval" type="float" value="0.001"
503 label="FIMO p-value threshold"
504 help='FIMO p-value threshold (FIMO option: --thresh) for reporting motif hits (default: 0.001)'/>
505 <param name="search_bed_score_col" type="integer" value="5"
506 label="BED score column used for p-value calculations"
507 help="Score column of genomic regions BED files used for p-value calculations. BED score can be e.g. log2 fold change or -log10 p-value of the region (default: 5)"/>
508 <param name="search_unstranded" label="Treat genomic regions in BED file as NOT strand-specific" type="boolean"
509 truevalue="--unstranded" falsevalue="" checked="False"
510 help="Set if genomic regions in BED files are NOT strand-specific, i.e., to look for motifs on both strands of the provided regions. Note that the two strands of a region will still be counted as one region (change with option below) (default: False)"/>
511 <param name="search_unstranded_ct" label="Count each genomic region twice for RBP hit statistics" type="boolean"
512 truevalue="--unstranded-ct" falsevalue="" checked="False"
513 help="Count each genomic region twice for RBP hit statistics when non-strand-specific option above is enabled (default: False)"/>
514 <param format="txt" type="data" name="fimo_nt_freqs_file" optional="True"
515 label="Provide FIMO nucleotide frequencies file"
516 help="Provide FIMO nucleotide frequencies (FIMO option: --bifile) file. By default, an internal frequencies file optimized for human transcripts is used"/>
517 </section>
518
519 <section name="search_output_options" title="Output options">
520 <param name="batch_table_motif_hits_bed_out" label="Output motif hits BED file" type="boolean"
521 checked="False"
522 help="Output motif hits BED file containing motif hits for all input datasets"/>
523 </section>
524
525 </when>
526
527 <!-- rbpbench dist -->
528 <when value="plot_nt_dist">
529
530 <param name="dist_bed_file" type="data" format="bed"
531 label="Genomic regions BED file"
532 help="Genomic regions (e.g. RBP binding sites) in BED format. Zero position for plotting can be defined in options"/>
533
534 <conditional name="reference_genome">
535 <param label="Select reference genome sequence (according to BED file)" name="reference_genome_selector" type="select">
536 <option selected="True" value="history">Select genome sequence from history</option>
537 <option value="builtin">Select built-in genome sequence</option>
538 </param>
539 <when value="history">
540 <param format="fasta" type="data" name="history_genome" label="Select genome sequence in FASTA format from history"/>
541 </when>
542 <when value="builtin">
543 <param label="Select built-in genome sequence" name="builtin_genome" type="select">
544 <options from_data_table="fasta_indexes">
545 <filter column="2" type="sort_by" />
546 <validator message="No genomes are available for the selected input dataset" type="no_options" />
547 </options>
548 </param>
549 </when>
550 </conditional>
551
552 <section name="dist_options" title="Nucleotide distribution plot settings">
553 <param name="dist_cp_mode" type="integer" value="1" min="1" max="3"
554 label="Define zero position for plotting"
555 help="Define which position of genomic sites to use as zero position for plotting. 1: upstream end position, 2: center position, 3: downstream end position (default: 1)"/>
556 <param name="dist_ext" type="integer" value="10"
557 label="Up- and downstream extension of defined genomic positions"
558 help="Up- and downstream extension of defined genomic positions in nucleotides (nt) to include in plotting (default: 10)"/>
559 <param name="dist_plot_pdf" label="Plot as PDF?" type="boolean"
560 truevalue="--plot-pdf" falsevalue="" checked="False"
561 help="Plot nucleotide distribution as PDF (default: PNG)"/>
562 <param name="sites_bed_fasta_out" label="Output genomic regions BED + FASTA files" type="boolean"
563 checked="False"
564 help="Output genomic regions BED/FASTA file used for plotting"/>
565 </section>
566
567 </when>
568
569 <!-- rbpbench compare -->
570 <when value="compare_search_results">
571
572 <param name="input_tables" type="data" format="tabular" multiple="true"
573 label="Motif search results"
574 help="Supply motif search results table files for comparison. These are the hit statistics table files output by single or batch motif search jobs. Both RBP and motif hit statistics table files are needed, and can be from any single or batch search job."/>
575
576 <section name="compare_output_options" title="Output options">
577 <param name="compared_motif_hits_table" label="Output compared motif hits table file" type="boolean"
578 checked="False"
579 help="Output compared motif hits table file showing motif hits and which data or method IDs contain them"/>
580 <param name="compared_motif_hits_bed" label="Output compared motif hits BED file" type="boolean"
581 checked="False"
582 help="Output compared motif hits table file showing motif hits and which data or method IDs contain them in BED format"/>
583 <param name="comparisons_stats_out" label="Output comparison statistics table file" type="boolean"
584 checked="False"
585 help="Output comparison statistics table file containing the statistics found in the comparison HTML report"/>
586 <param name="store_sort_js_in_html" label="Store JS code for table sorting inside HTML?"
587 type="boolean" checked="False"
588 help="Store JavaScript code for table sorting inside output HTML files. By default code is stored locally in extra file located in HTML output folder."/>
589 </section>
590 </when>
591
592 </conditional>
593
594 </inputs>
595
596 <outputs>
597
598 <!-- rbpbench search outputs -->
599 <data name="rbp_hit_stats_file" format="tabular" from_work_dir="rbp_hit_stats.tsv" label="${tool.name} on ${on_string}: RBP hit statistics table file">
600 <filter>action_type["action_type_selector"] == "search_motifs"</filter>
601 </data>
602 <data name="motif_hit_stats_file" format="tabular" from_work_dir="motif_hit_stats.tsv" label="${tool.name} on ${on_string}: Motif hit statistics table file">
603 <filter>action_type["action_type_selector"] == "search_motifs"</filter>
604 </data>
605 <data name="search_report_html_file" format="html" from_work_dir="report.rbpbench_search.html" label="${tool.name} on ${on_string}: Search report HTML file">
606 <filter>action_type["action_type_selector"] == "search_motifs" and action_type["search_output_options"]["search_report"]</filter>
607 </data>
608 <data name="motif_plots_html_file" format="html" from_work_dir="motif_plots.rbpbench_search.html" label="${tool.name} on ${on_string}: Motif plots HTML file">
609 <filter>action_type["action_type_selector"] == "search_motifs" and action_type["search_output_options"]["search_plot_motifs"]</filter>
610 </data>
611 <data name="in_sites_bed_file" format="bed" from_work_dir="in_sites.filtered.bed" label="${tool.name} on ${on_string}: Genomic regions used for motif search BED file">
612 <filter>action_type["action_type_selector"] == "search_motifs" and action_type["search_output_options"]["sites_bed_fasta_out"]</filter>
613 </data>
614 <data name="in_sites_fa_file" format="fasta" from_work_dir="in_sites.filtered.fa" label="${tool.name} on ${on_string}: Genomic regions used for motif search FASTA file">
615 <filter>action_type["action_type_selector"] == "search_motifs" and action_type["search_output_options"]["sites_bed_fasta_out"]</filter>
616 </data>
617 <data name="motif_hits_bed_file" format="bed" from_work_dir="motif_hits.rbpbench_search.bed" label="${tool.name} on ${on_string}: Motif hits on genomic regions BED file">
618 <filter>action_type["action_type_selector"] == "search_motifs" and action_type["search_output_options"]["motif_hits_bed_out"]</filter>
619 </data>
620 <data name="contingency_table_file" format="tabular" from_work_dir="contingency_table_results.tsv" label="${tool.name} on ${on_string}: RBP co-occurrence contingency table file">
621 <filter>action_type["action_type_selector"] == "search_motifs" and action_type["search_output_options"]["contingency_table_out"]</filter>
622 </data>
623 <data name="region_annotations_file" format="tabular" from_work_dir="region_annotations.tsv" label="${tool.name} on ${on_string}: genomic region annotations table file">
624 <filter>action_type["action_type_selector"] == "search_motifs" and action_type["search_output_options"]["region_annotations_out"] and action_type["report_plotting_options"]["gtf_file"]</filter>
625 </data>
626
627 <!-- rbpbench batch outputs -->
628 <data name="batch_rbp_hit_stats_file" format="tabular" from_work_dir="rbp_hit_stats.tsv" label="${tool.name} on ${on_string}: Batch RBP hit statistics table file">
629 <filter>action_type["action_type_selector"] == "batch_search_motifs"</filter>
630 </data>
631 <data name="batch_motif_hit_stats_file" format="tabular" from_work_dir="motif_hit_stats.tsv" label="${tool.name} on ${on_string}: Batch motif hit statistics table file">
632 <filter>action_type["action_type_selector"] == "batch_search_motifs"</filter>
633 </data>
634 <data name="batch_motif_hits_bed_file" format="bed" from_work_dir="motif_hits.rbpbench_batch.bed" label="${tool.name} on ${on_string}: Batch motif hits on genomic regions BED files">
635 <filter>action_type["action_type_selector"] == "batch_search_motifs" and action_type["search_output_options"]["batch_motif_hits_bed_out"]</filter>
636 </data>
637
638 <!-- rbpbench batch table outputs -->
639 <data name="batch_table_rbp_hit_stats_file" format="tabular" from_work_dir="rbp_hit_stats.tsv" label="${tool.name} on ${on_string}: Batch data collection RBP hit statistics table file">
640 <filter>action_type["action_type_selector"] == "batch_table_search_motifs"</filter>
641 </data>
642 <data name="batch_table_motif_hit_stats_file" format="tabular" from_work_dir="motif_hit_stats.tsv" label="${tool.name} on ${on_string}: Batch data collection motif hit statistics table file">
643 <filter>action_type["action_type_selector"] == "batch_table_search_motifs"</filter>
644 </data>
645 <data name="batch_table_motif_hits_bed_file" format="bed" from_work_dir="motif_hits.rbpbench_batch.bed" label="${tool.name} on ${on_string}: Motif hits on genomic regions BED files from data collection">
646 <filter>action_type["action_type_selector"] == "batch_table_search_motifs" and action_type["search_output_options"]["batch_table_motif_hits_bed_out"]</filter>
647 </data>
648
649 <!-- rbpbench dist outputs -->
650 <data name="nt_dist_plot_png_file" format="png" from_work_dir="nt_dist_zero_pos.png" label="${tool.name} on ${on_string}: Nucleotide distribution plot PNG file">
651 <filter>action_type["action_type_selector"] == "plot_nt_dist" and not action_type["dist_options"]["dist_plot_pdf"]</filter>
652 </data>
653 <data name="nt_dist_plot_pdf_file" format="pdf" from_work_dir="nt_dist_zero_pos.pdf" label="${tool.name} on ${on_string}: Nucleotide distribution plot PDF file">
654 <filter>action_type["action_type_selector"] == "plot_nt_dist" and action_type["dist_options"]["dist_plot_pdf"]</filter>
655 </data>
656 <data name="plot_sites_bed_file" format="bed" from_work_dir="in_sites.filtered.bed" label="${tool.name} on ${on_string}: Genomic regions used for plotting BED file">
657 <filter>action_type["action_type_selector"] == "plot_nt_dist" and action_type["dist_options"]["sites_bed_fasta_out"]</filter>
658 </data>
659 <data name="plot_sites_fa_file" format="fasta" from_work_dir="in_sites.filtered.fa" label="${tool.name} on ${on_string}: Genomic regions used for plotting FASTA file">
660 <filter>action_type["action_type_selector"] == "plot_nt_dist" and action_type["dist_options"]["sites_bed_fasta_out"]</filter>
661 </data>
662
663 <!-- rbpbench compare outputs -->
664 <data name="compare_report_html_file" format="html" from_work_dir="report.rbpbench_compare.html" label="${tool.name} on ${on_string}: Comparison report HTML file">
665 <filter>action_type["action_type_selector"] == "compare_search_results"</filter>
666 </data>
667 <data name="compared_motif_hits_bed_file" format="bed" from_work_dir="motif_hits.rbpbench_compare.bed" label="${tool.name} on ${on_string}: Compared motif hits BED file">
668 <filter>action_type["action_type_selector"] == "compare_search_results" and action_type["compare_search_results"]["compared_motif_hits_bed"]</filter>
669 </data>
670 <data name="compared_motif_hits_table_file" format="tabular" from_work_dir="motif_hits.rbpbench_compare.tsv" label="${tool.name} on ${on_string}: Compared motif hits table file">
671 <filter>action_type["action_type_selector"] == "compare_search_results" and action_type["compare_search_results"]["compared_motif_hits_table"]</filter>
672 </data>
673 <data name="compared_stats_table_file" format="tabular" from_work_dir="comparison_stats.rbpbench_compare.tsv" label="${tool.name} on ${on_string}: Comparison statistics table file">
674 <filter>action_type["action_type_selector"] == "compare_search_results" and action_type["compare_search_results"]["comparisons_stats_out"]</filter>
675 </data>
676
677 </outputs>
678
679 <tests>
680
681 <!-- rbpbench search tests -->
682 <test expect_num_outputs="7">
683 <param name="action_type_selector" value="search_motifs"/>
684 <param name="reference_genome_selector" value="history" />
685 <param name="history_genome" value="test.fa" />
686 <param name="search_bed_file" value="test.bed" ftype="bed"/>
687 <param name="select_rbps_selector" value="list_db_rbps"/>
688 <param name="database" value="PUM1,PUM2" />
689 <param name="search_report" value="True"/>
690 <param name="search_plot_motifs" value="False"/>
691 <param name="sites_bed_fasta_out" value="True"/>
692 <param name="motif_hits_bed_out" value="True"/>
693 <param name="contingency_table_out" value="True"/>
694 <output name="rbp_hit_stats_file" file="rbp_hit_stats.rbpbench_search.tsv" compare="sim_size"/>
695 <output name="motif_hit_stats_file" file="motif_hit_stats.rbpbench_search.tsv" compare="sim_size"/>
696 <output name="search_report_html_file" file="report.rbpbench_search.html" compare="sim_size"/>
697 <output name="in_sites_bed_file" file="in_sites.filtered.rbpbench_search.bed"/>
698 <output name="in_sites_fa_file" file="in_sites.filtered.rbpbench_search.fa"/>
699 <output name="motif_hits_bed_file" file="motif_hits.rbpbench_search.bed"/>
700 <output name="contingency_table_file" file="contingency_table_results.rbpbench_search.tsv"/>
701 </test>
702 <!-- test builtin fasta -->
703 <test expect_num_outputs="7">
704 <param name="action_type_selector" value="search_motifs"/>
705 <param name="reference_genome_selector" value="builtin" />
706 <param name="builtin_genome" value="testid" />
707 <param name="search_bed_file" value="test.bed" ftype="bed"/>
708 <param name="select_rbps_selector" value="list_db_rbps"/>
709 <param name="database" value="PUM1,PUM2" />
710 <param name="search_report" value="True"/>
711 <param name="search_plot_motifs" value="False"/>
712 <param name="sites_bed_fasta_out" value="True"/>
713 <param name="motif_hits_bed_out" value="True"/>
714 <param name="contingency_table_out" value="True"/>
715 <output name="rbp_hit_stats_file" file="rbp_hit_stats.rbpbench_search.tsv" compare="sim_size"/>
716 <output name="motif_hit_stats_file" file="motif_hit_stats.rbpbench_search.tsv" compare="sim_size"/>
717 <output name="search_report_html_file" file="report.rbpbench_search.html" compare="sim_size"/>
718 <output name="in_sites_bed_file" file="in_sites.filtered.rbpbench_search.bed"/>
719 <output name="in_sites_fa_file" file="in_sites.filtered.rbpbench_search.fa"/>
720 <output name="motif_hits_bed_file" file="motif_hits.rbpbench_search.bed"/>
721 <output name="contingency_table_file" file="contingency_table_results.rbpbench_search.tsv"/>
722 </test>
723
724 <test expect_num_outputs="2">
725 <param name="action_type_selector" value="search_motifs"/>
726 <param name="reference_genome_selector" value="history" />
727 <param name="history_genome" value="test.slbp_user.fa" />
728 <param name="search_bed_file" value="test.slbp_user.bed" ftype="bed"/>
729 <param name="user_rbp_selector" value="structure"/>
730 <param name="cm_model_file" value="SLBP_USER.cm" />
731 <param name="str_rbp_id" value="SLBP_USER" />
732 <param name="search_report" value="False"/>
733 <output name="rbp_hit_stats_file" file="rbp_hit_stats.rbpbench_search.slbp_user.tsv" compare="sim_size"/>
734 <output name="motif_hit_stats_file" file="motif_hit_stats.rbpbench_search.slbp_user.tsv" compare="sim_size"/>
735 </test>
736 <test expect_num_outputs="3">
737 <param name="action_type_selector" value="search_motifs"/>
738 <param name="reference_genome_selector" value="history" />
739 <param name="history_genome" value="test.fa" />
740 <param name="search_bed_file" value="test.bed" ftype="bed"/>
741 <param name="select_db_selector" value="default_db"/>
742 <param name="select_rbps_selector" value="all_db_rbps"/>
743 <param name="search_report" value="False"/>
744 <param name="search_plot_motifs" value="False"/>
745 <param name="sites_bed_fasta_out" value="False"/>
746 <param name="motif_hits_bed_out" value="True"/>
747 <param name="contingency_table_out" value="False"/>
748 <output name="motif_hits_bed_file" file="motif_hits.rbpbench_search.test_all.bed"/>
749 </test>
750 <test expect_num_outputs="4">
751 <param name="action_type_selector" value="search_motifs"/>
752 <param name="reference_genome_selector" value="history" />
753 <param name="history_genome" value="test.fa" />
754 <param name="search_bed_file" value="test.bed" ftype="bed"/>
755 <param name="gtf_file" value="test_search.gtf" ftype="gtf"/>
756 <param name="select_db_selector" value="default_db"/>
757 <param name="select_rbps_selector" value="all_db_rbps"/>
758 <param name="search_report" value="True"/>
759 <param name="search_plot_motifs" value="False"/>
760 <param name="sites_bed_fasta_out" value="False"/>
761 <param name="motif_hits_bed_out" value="False"/>
762 <param name="contingency_table_out" value="False"/>
763 <param name="region_annotations_out" value="True"/>
764 <output name="region_annotations_file" file="test_search_gtf.region_annotations.tsv"/>
765 </test>
766 <test expect_num_outputs="3">
767 <param name="action_type_selector" value="search_motifs"/>
768 <param name="reference_genome_selector" value="history" />
769 <param name="history_genome" value="test.fa" />
770 <param name="search_bed_file" value="test.bed" ftype="bed"/>
771 <param name="select_db_selector" value="custom_db"/>
772 <param name="custom_db_id" value="custom_yo" />
773 <param name="custom_db_info_file" value="test_custom.info.txt" />
774 <param name="custom_db_meme_xml_file" value="test_custom.seq_motifs.meme" />
775 <param name="custom_db_cm_model_file" value="test_custom.str_motifs.cm" />
776 <param name="custom_db_rbp_ids_list" value="PUM1 PUM2 SLBP" />
777 <param name="search_report" value="False"/>
778 <param name="search_plot_motifs" value="False"/>
779 <param name="sites_bed_fasta_out" value="False"/>
780 <param name="motif_hits_bed_out" value="True"/>
781 <param name="contingency_table_out" value="False"/>
782 <param name="region_annotations_out" value="False"/>
783 <output name="motif_hits_bed_file" file="test_custom.motif_hits.rbpbench_search.bed"/>
784 </test>
785
786 <!-- rbpbench batch tests -->
787 <test expect_num_outputs="3">
788 <param name="action_type_selector" value="batch_search_motifs"/>
789 <param name="reference_genome_selector" value="history" />
790 <param name="history_genome" value="test.fa" />
791 <repeat name="dataset_inputs">
792 <param name="dataset_bed_file" value="test.bed"/>
793 <param name="dataset_method_id" value="method-id1" />
794 <param name="dataset_data_id" value="data-id1" />
795 <param name="dataset_rbp_id" value="PUM1" />
796 </repeat>
797 <repeat name="dataset_inputs">
798 <param name="dataset_bed_file" value="test.bed"/>
799 <param name="dataset_method_id" value="method-id2" />
800 <param name="dataset_data_id" value="data-id2" />
801 <param name="dataset_rbp_id" value="PUM2" />
802 </repeat>
803 <param name="batch_motif_hits_bed_out" value="True"/>
804 <output name="batch_rbp_hit_stats_file" file="rbp_hit_stats.test_batch.tsv" compare="sim_size"/>
805 <output name="batch_motif_hit_stats_file" file="motif_hit_stats.test_batch.tsv" compare="sim_size"/>
806 <output name="batch_motif_hits_bed_file" file="motif_hits.rbpbench_batch.test_batch.bed"/>
807 </test>
808
809 <!-- rbpbench batch table tests -->
810 <test expect_num_outputs="3">
811 <param name="action_type_selector" value="batch_table_search_motifs"/>
812 <param name="reference_genome_selector" value="history" />
813 <param name="history_genome" value="test.fa" />
814 <param name="batch_table_bed_collection">
815 <collection type="list">
816 <element name="test1.bed" value="test1.bed"/>
817 <element name="test2.bed" value="test2.bed"/>
818 </collection>
819 </param>
820 <param name="batch_table_file" value="test_table.txt"/>
821 <param name="batch_table_motif_hits_bed_out" value="True"/>
822 <output name="batch_table_rbp_hit_stats_file" file="rbp_hit_stats.table_test.tsv" compare="sim_size"/>
823 <output name="batch_table_motif_hit_stats_file" file="motif_hit_stats.table_test.tsv" compare="sim_size"/>
824 <output name="batch_table_motif_hits_bed_file" file="motif_hits.rbpbench_batch.table_test.bed"/>
825 </test>
826
827 <!-- rbpbench compare tests -->
828 <test expect_num_outputs="4">
829 <param name="action_type_selector" value="compare_search_results"/>
830 <param name="input_tables" value="rbp_hit_stats.compare_test.dewseq.tsv,rbp_hit_stats.compare_test.clipper_idr.tsv,motif_hit_stats.compare_test.dewseq.tsv,motif_hit_stats.compare_test.clipper_idr.tsv" ftype="tabular" />
831 <param name="compared_motif_hits_table" value="True"/>
832 <param name="compared_motif_hits_bed" value="True"/>
833 <param name="comparisons_stats_out" value="True"/>
834 <output name="compare_report_html_file" file="report.rbpbench_compare.test.html" compare="sim_size"/>
835 <output name="compared_motif_hits_bed_file" file="motif_hits.rbpbench_compare.test.bed"/>
836 <output name="compared_motif_hits_table_file" file="motif_hits.rbpbench_compare.test.tsv"/>
837 <output name="compared_stats_table_file" file="comparison_stats.rbpbench_compare.test.tsv"/>
838 </test>
839
840 </tests>
841 <help><![CDATA[
842
843
844 **What is RBPBench?**
845
846
847 RBPBench_ is multi-function tool to evaluate CLIP-seq and other genomic region
848 data using a comprehensive collection of known RNA-binding protein (RBP) binding motifs.
849 RBPBench can be used for a variety of purposes, from RBP motif search (database or
850 user-supplied RBPs) in genomic regions, over motif co-occurrence analysis, to benchmarking
851 CLIP-seq peak caller methods as well as comparisons across cell types and
852 CLIP-seq protocols.
853
854 -----
855
856 **RBPBench program modes**
857
858 RBPBench on Galaxy provides the following main functions (Choose on top via "Select RBPBench program mode"):
859
860 1) Search RBP binding motifs in genomic regions
861 2) Search RBP binding motifs in genomic regions (multiple inputs)
862 3) Search RBP binding motifs in genomic regions (data collection input)
863 4) Plot nucleotide distribution at genomic positions
864 5) Compare different search results
865
866
867 **1. Search RBP binding motifs in genomic regions**
868
869 In this mode we can select any number of RBPs of interest and search for RBP binding motifs in a given
870 set of genomic regions (*Genomic regions BED file*). A built-in high-quality database of human RBP binding motifs
871 (currently containing 259 RBPs and 605 motifs) is used by default. Moreover, users can add own motifs
872 (*Add user-supplied motifs*), as well as defining their own database (*Provide a custom RBP motif database*).
873 Both sequence (MEME/DREME XML format) and structure motifs (covariance models) are supported.
874 Comprehensive hit statistics (both on RBP and single motif level) are output as table files,
875 together with an informative HTML report containing various plots and tables
876 (see Output options to control what files are output).
877 Hit statistics output table formats are described in the RBPBench documentation_.
878 The HTML report includes statistics for each RBP on enrichment of motifs in higher scoring regions,
879 as well as a heatmap of RBP co-occurrences in genomic regions, and an upset plot
880 on present RBP combinations (*HTML report options* for finetuning).
881 If a GTF file is provided (*HTML report options -> GTF file*), genomic region annotations are also added to the regions and plots.
882 Furthermore, motif distances (RBP and motif level) can be plotted relative to a set reference RBP
883 (*HTML report options -> Set reference RBP ID*).
884 Motif search settings can be adapted, e.g. to apply up- and/or downstream extension to the genomic regions
885 before search. Motifs for selected RBPs can also be plotted in a separate HTML file (*Output options -> Plot RBP motifs*).
886 To compare motif search results (mode: *Compare different search results*),
887 data ID and method ID can be set accordingly (more details in sections 2, 3, and 5).
888
889
890 **2. Search RBP binding motifs in genomic regions (multiple inputs)**
891
892 This mode allows the input of more than one set of genomic regions (via *+ Insert Dataset*).
893 For each input, an RBP for motif search needs to be selected. Optionally (for comparing
894 different search results), descriptive data + method IDs can be added (also see *Compare different search results*).
895 For example, if two different peak calling methods (method1, method2) have been used to
896 extract RBP binding regions from CLIP-seq data of RBP RBPX, and we want to compare these two methods later on, we would:
897 *+ Insert Dataset*: input the set (i.e., BED file) produced by method1, choose the CLIP-ped RBP (RBPX) + add method ID "method1".
898 *+ Insert Dataset*: input the set produced by method2, again choose RBPX, and add method ID "method2".
899 The data ID we keep constant, ideally choosing an ID that describes the data (e.g. cell type, CLIP-seq protocol, CLIP-ped RBP).
900 For example, if the cell type is K562, and the CLIP-seq protocol is eCLIP, we could specify
901 the data ID "K562_eCLIP" or "RBPX_K562_eCLIP". We can repeat this for other proteins by
902 adding the respective inputs. Finally, for comparing the two methods,
903 all we need to do is to use the two produced hit statistics output tables (RBP + motif hit statistics)
904 as inputs in *Compare different search results* mode.
905 The same also works the other way around, by keeping the method ID constant and changing the data ID.
906 For example, if we want to compare motif search results across different cell types, we can use
907 different data IDs while keeping the method ID.
908
909
910 **3. Search RBP binding motifs in genomic regions (data collection input)**
911
912 This mode is identical to the previous one (multiple inputs), except that instead of
913 manually defining each input (dataset, RBP, method ID, data ID), we simply
914 input a table containing all the information, as well as a dataset collection containing the datasets.
915 It is thus the preferable mode if we want to compare a large number of datasets
916 (concept of comparing sets via method ID and data ID described in the previous section).
917 The input table (batch processing table file) has the following format
918 (tab-separated columns: RBP ID, method ID, data ID, BED genomic regions file name):
919
920 ========== ============ =============== =============================
921 PUM1 method1 K562_eCLIP PUM1.K562_eclip.method1.bed
922 PUM1 method2 K562_eCLIP PUM1.K562_eclip.method2.bed
923 PUM1 method3 K562_eCLIP PUM1.K562_eclip.method3.bed
924 PUM2 method1 K562_eCLIP PUM2.K562_eclip.method1.bed
925 PUM2 method2 K562_eCLIP PUM2.K562_eclip.method2.bed
926 PUM2 method3 K562_eCLIP PUM2.K562_eclip.method3.bed
927 SLBP method1 K562_eCLIP SLBP.K562_eclip.method1.bed
928 SLBP method2 K562_eCLIP SLBP.K562_eclip.method2.bed
929 SLBP method3 K562_eCLIP SLBP.K562_eclip.method3.bed
930 ========== ============ =============== =============================
931
932 NOTE that the table file name needs to correspond to the name of the dataset inside the
933 dataset collection. Conveniently, if you upload files to Galaxy and make a dataset collection out of them,
934 the dataset names will correspond to the uploaded file names.
935 In the above table, we would produce search results for three different
936 methods, on three different RBPs.
937 Likewise, if we would want to compare motif search results across cell types,
938 the table could look like this:
939
940 ========== ============ =============== =============================
941 PUM1 method1 K562_eCLIP PUM1.K562_eclip.method1.bed
942 PUM1 method1 HepG2_eCLIP PUM1.HepG2_eclip.method1.bed
943 PUM2 method1 K562_eCLIP PUM2.K562_eclip.method1.bed
944 PUM2 method1 HepG2_eCLIP PUM2.HepG2_eclip.method1.bed
945 SLBP method1 K562_eCLIP SLBP.K562_eclip.method1.bed
946 SLBP method1 HepG2_eCLIP SLBP.HepG2_eclip.method1.bed
947 ========== ============ =============== =============================
948
949 Here we would create motif search results across cell types K562 and HepG2, while keeping the peak calling
950 method ID constant ("method1").
951 As with the two already discussed search modes,
952 the resulting hit statistics output table files (RBP + motif hit statistics)
953 can subsequently serve as inputs to RBPBench's comparison mode (*Compare different search results*, section 5).
954
955
956 **4. Plot nucleotide distribution at genomic positions**
957
958 In this mode, a set of genomic regions is input and the nucleotide distribution is plotted
959 around a defined center positions (*Nucleotide distribution plot settings -> Define zero position for plotting*). By default,
960 the upstream end position of each region is used (other choices are center and downstream end).
961 This for example enables us to look at CLIP-seq crosslink positions and potential nucleotide biases at these sites.
962
963
964 **5. Compare different search results**
965
966 This mode is used to compare different motif search results (produced by any of the three motif search modes
967 described above). Inputs are the RBP and motif hit statistics table files output by the motif search modes.
968 As exemplified in the previous sections, the set method IDs and
969 data IDs (together with the selected RBP IDs) define what gets compared in comparison mode.
970 Based on the IDs in the input tables, RBPBench looks for combinations of RBP ID+method ID+data ID, and produces
971 method-ID-centered (with fixed RBP ID + data ID) and / or data-ID-centered (with fixed RBP ID + method ID) comparisons.
972 At least two different IDs are needed for a comparison (e.g. two different method IDs or two different data IDs, with same RBP ID).
973 The comparison results are presented in an HTML report file, containing a hit statistics table and a
974 Venn diagram plot for each found combination. Moreover, the report results are output as table files,
975 and the combined motifs are output in BED format, for a data ID / method ID centered comparison e.g. inside a Genome Viewer.
976 Comparing numbers of unique and shared motif hits between methods also serves as a way of benchmarking different methods.
977 Since no ground truth (i.e., set of true / experimentally verified transcriptome-wide binding sites of an RBP) exists, one obvious way to
978 benchmark peak calling methods is to look at the enrichment of known RBP binding motifs in regions reported by the peak callers.
979 RBPBench makes such evaluations easy, especially by combining modes 2,3, and 5.
980
981
982 -----
983
984 **Tool documentation & repository**
985
986 For more information (including a webserver tutorial) please visit the RBPBench website:
987
988 https://backofenlab.github.io/RBPBench
989
990
991 The RBPBench repository can be found at:
992
993 https://github.com/michauhl/RBPBench
994
995 The GitHub repository hosts the command line version of RBPBench and also includes a
996 comprehensive manual with installation instructions and various usage examples.
997
998
999 .. _RBPBench: https://github.com/michauhl/RBPBench
1000 .. _documentation: https://github.com/michauhl/RBPBench#hit-statistics-table-files
1001
1002 ]]></help>
1003 </tool>