Mercurial > repos > rnateam > rbpbench
comparison rbpbench.xml @ 0:7dd2835ce566 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/rbpbench commit 0e21bd630200c1f199db8ba5d83b81d4214fc59f
author | rnateam |
---|---|
date | Sun, 03 Dec 2023 12:51:54 +0000 |
parents | |
children | 26c64157456b |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:7dd2835ce566 |
---|---|
1 <tool id="rbpbench" name="RBPBench" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@profile@"> | |
2 | |
3 <description>- Evaluate CLIP-seq and other genomic region data using a comprehensive collection of RBP binding motifs</description> | |
4 <macros> | |
5 <import>macros.xml</import> | |
6 </macros> | |
7 <expand macro="bio_tools"/> | |
8 <expand macro="requirements"/> | |
9 | |
10 <command detect_errors="exit_code"><![CDATA[ | |
11 #if $action_type.action_type_selector == 'search_motifs': | |
12 @PREPARE_REF@ | |
13 rbpbench search | |
14 --in '$action_type.search_bed_file' | |
15 --out ./ | |
16 --genome reference.fa | |
17 --method-id '$action_type.search_method_id' | |
18 --data-id '$action_type.search_data_id' | |
19 --rbps | |
20 #if str($action_type.select_db.select_db_selector) == "default_db": | |
21 #if str($action_type.select_db.select_rbps.select_rbps_selector) == "list_db_rbps": | |
22 #if str($action_type.select_db.select_rbps.database) != "None": | |
23 #for $rbp_id in str($action_type.select_db.select_rbps.database).split(','): | |
24 $rbp_id | |
25 #end for | |
26 #end if | |
27 #else: | |
28 ALL | |
29 #end if | |
30 #else: | |
31 $action_type.select_db.custom_db_rbp_ids_list | |
32 #end if | |
33 | |
34 #if str($action_type.user_rbp.user_rbp_selector) == "sequence": | |
35 USER | |
36 --user-rbp-id $action_type.user_rbp.seq_rbp_id | |
37 --user-meme-xml '$action_type.user_rbp.meme_motifs_xml' | |
38 #elif str($action_type.user_rbp.user_rbp_selector) == "structure": | |
39 USER | |
40 --user-rbp-id $action_type.user_rbp.str_rbp_id | |
41 --user-cm '$action_type.user_rbp.cm_model_file' | |
42 #end if | |
43 | |
44 #if str($action_type.select_db.select_db_selector) == "custom_db": | |
45 --custom-db-id $action_type.select_db.custom_db_id | |
46 --custom-db-info $action_type.select_db.custom_db_info_file | |
47 #if $action_type.select_db.custom_db_meme_xml_file: | |
48 --custom-db-meme-xml '$action_type.select_db.custom_db_meme_xml_file' | |
49 #end if | |
50 #if $action_type.select_db.custom_db_cm_model_file: | |
51 --custom-db-cm '$action_type.select_db.custom_db_cm_model_file' | |
52 #end if | |
53 #end if | |
54 @COMMON_PARAMS@ | |
55 #if $action_type.report_plotting_options.set_rbp_id: | |
56 --set-rbp-id '$action_type.report_plotting_options.set_rbp_id' | |
57 #end if | |
58 --motif-distance-plot-range $action_type.report_plotting_options.motif_distance_plot_range | |
59 --motif-min-pair-count $action_type.report_plotting_options.motif_min_pair_count | |
60 --rbp-min-pair-count $action_type.report_plotting_options.rbp_min_pair_count | |
61 #if $action_type.report_plotting_options.gtf_file: | |
62 --gtf '$action_type.report_plotting_options.gtf_file' | |
63 #end if | |
64 #if $action_type.report_plotting_options.tr_list_file: | |
65 --tr-list '$action_type.report_plotting_options.tr_list_file' | |
66 #end if | |
67 #if $action_type.report_plotting_options.list_tr_biotypes: | |
68 --tr-types '$action_type.report_plotting_options.list_tr_biotypes' | |
69 #end if | |
70 --upset-plot-min-degree $action_type.report_plotting_options.upset_plot_min_degree | |
71 #if $action_type.report_plotting_options.upset_plot_max_degree: | |
72 --upset-plot-max-degree '$action_type.report_plotting_options.upset_plot_max_degree' | |
73 #end if | |
74 --upset-plot-min-subset-size $action_type.report_plotting_options.upset_plot_min_subset_size | |
75 | |
76 $action_type.search_output_options.search_report | |
77 $action_type.search_output_options.search_plot_motifs | |
78 | |
79 #if $action_type.report_plotting_options.store_sort_js_in_html: | |
80 --sort-js-mode 3 | |
81 #else: | |
82 --sort-js-mode 2 | |
83 #end if | |
84 #if $action_type.report_plotting_options.plotly_js_source == 1: | |
85 #if $action_type.report_plotting_options.plotly_plot_code_in_html: | |
86 --plotly-js-mode 6 | |
87 #else: | |
88 --plotly-js-mode 3 | |
89 #end if | |
90 #elif $action_type.report_plotting_options.plotly_js_source == 2: | |
91 #if $action_type.report_plotting_options.plotly_plot_code_in_html: | |
92 --plotly-js-mode 5 | |
93 #else: | |
94 --plotly-js-mode 1 | |
95 #end if | |
96 #else: | |
97 #if $action_type.report_plotting_options.plotly_plot_code_in_html: | |
98 --plotly-js-mode 7 | |
99 #else: | |
100 --plotly-js-mode 4 | |
101 #end if | |
102 #end if | |
103 | |
104 #if $action_type.search_output_options.search_report: | |
105 && | |
106 sed -i 's/html_report_plots\///g' ./report.rbpbench_search.html | |
107 && | |
108 cp ./report.rbpbench_search.html $search_report_html_file | |
109 && | |
110 mkdir '$search_report_html_file.extra_files_path' | |
111 && | |
112 cp -r ./html_report_plots/* '$search_report_html_file.extra_files_path' | |
113 #end if | |
114 | |
115 #if $action_type.search_output_options.search_plot_motifs: | |
116 && | |
117 sed -i 's/html_motif_plots\///g' ./motif_plots.rbpbench_search.html | |
118 && | |
119 cp ./motif_plots.rbpbench_search.html $motif_plots_html_file | |
120 && | |
121 mkdir '$motif_plots_html_file.extra_files_path' | |
122 && | |
123 cp -r ./html_motif_plots/* '$motif_plots_html_file.extra_files_path' | |
124 #end if | |
125 | |
126 | |
127 #elif $action_type.action_type_selector == 'batch_search_motifs': | |
128 @PREPARE_REF@ | |
129 rbpbench batch | |
130 --out ./ | |
131 --genome reference.fa | |
132 --bed | |
133 #for $i in $action_type.dataset_inputs: | |
134 $i.dataset_bed_file | |
135 #end for | |
136 --rbp-list | |
137 #for $i in $action_type.dataset_inputs: | |
138 $i.dataset_rbp_id | |
139 #end for | |
140 --data-list | |
141 #for $i in $action_type.dataset_inputs: | |
142 $i.dataset_data_id | |
143 #end for | |
144 --method-list | |
145 #for $i in $action_type.dataset_inputs: | |
146 $i.dataset_method_id | |
147 #end for | |
148 @COMMON_PARAMS@ | |
149 | |
150 #elif $action_type.action_type_selector == 'batch_table_search_motifs': | |
151 @PREPARE_REF@ | |
152 batch_table_wrapper_rbpbench.py | |
153 --out ./ | |
154 --genome reference.fa | |
155 --table '$action_type.batch_table_file' | |
156 --paths | |
157 #for $i in $action_type.batch_table_bed_collection: | |
158 $i | |
159 #end for | |
160 --ids | |
161 #for $i in $action_type.batch_table_bed_collection: | |
162 $i.element_identifier | |
163 #end for | |
164 @COMMON_PARAMS@ | |
165 | |
166 #elif $action_type.action_type_selector == 'plot_nt_dist': | |
167 @PREPARE_REF@ | |
168 rbpbench dist | |
169 --in '$action_type.dist_bed_file' | |
170 --out ./ | |
171 --genome reference.fa | |
172 --cp-mode $action_type.dist_options.dist_cp_mode | |
173 --ext $action_type.dist_options.dist_ext | |
174 $action_type.dist_options.dist_plot_pdf | |
175 | |
176 | |
177 #elif $action_type.action_type_selector == 'compare_search_results': | |
178 rbpbench compare | |
179 --in | |
180 #for $in_file in $action_type.input_tables: | |
181 $in_file | |
182 #end for | |
183 --out ./ | |
184 #if $action_type.compare_output_options.store_sort_js_in_html: | |
185 --sort-js-mode 3 | |
186 #else: | |
187 --sort-js-mode 2 | |
188 #end if | |
189 | |
190 && | |
191 sed -i 's/html_report_plots\///g' ./report.rbpbench_compare.html | |
192 && | |
193 cp ./report.rbpbench_compare.html $compare_report_html_file | |
194 && | |
195 mkdir '$compare_report_html_file.extra_files_path' | |
196 && | |
197 cp -r ./html_report_plots/* '$compare_report_html_file.extra_files_path' | |
198 | |
199 #end if | |
200 | |
201 ]]></command> | |
202 | |
203 <inputs> | |
204 | |
205 <conditional name="action_type"> | |
206 | |
207 <param name="action_type_selector" type="select" label="Select RBPBench program mode"> | |
208 <option value="search_motifs" selected="true">Search RBP binding motifs in genomic regions</option> | |
209 <option value="batch_search_motifs">Search RBP binding motifs in genomic regions (multiple inputs)</option> | |
210 <option value="batch_table_search_motifs">Search RBP binding motifs in genomic regions (data collection input)</option> | |
211 <option value="plot_nt_dist">Plot nucleotide distribution at genomic positions</option> | |
212 <option value="compare_search_results">Compare different search results</option> | |
213 </param> | |
214 | |
215 <!-- rbpbench search --> | |
216 <when value="search_motifs"> | |
217 <param name="search_bed_file" type="data" format="bed" label="Genomic regions BED file" | |
218 help="Genomic regions (e.g. RBP binding sites) in BED format (>= 6-columns) for RBP binding motif search"/> | |
219 <conditional name="reference_genome"> | |
220 <param label="Select reference genome sequence (according to provided BED file)" name="reference_genome_selector" type="select"> | |
221 <option selected="True" value="builtin">Select built-in genome sequence</option> | |
222 <option value="history">Select genome sequence from history</option> | |
223 </param> | |
224 <when value="builtin"> | |
225 <param label="Select built-in genome sequence" name="builtin_genome" type="select"> | |
226 <options from_data_table="fasta_indexes"> | |
227 <filter column="2" type="sort_by" /> | |
228 <validator message="No genomes are available for the selected input dataset" type="no_options" /> | |
229 </options> | |
230 </param> | |
231 </when> | |
232 <when value="history"> | |
233 <param format="fasta" type="data" name="history_genome" label="Select genome sequence in FASTA format from history"/> | |
234 </when> | |
235 </conditional> | |
236 <conditional name="select_db"> | |
237 <param name="select_db_selector" type="select" label="Select RBP binding motif database"> | |
238 <option selected="True" value="default_db" >Human motif database with 259 RBPs and 605 RBP binding motifs (catrapid.omics.v2.1.human.6plus)</option> | |
239 <option value="custom_db" >Provide a custom motif database</option> | |
240 </param> | |
241 <when value="default_db"> | |
242 | |
243 <conditional name="select_rbps"> | |
244 <param name="select_rbps_selector" type="select" label="Select RBPs for motif search"> | |
245 <option selected="True" value="list_db_rbps" >Select individual RBPs </option> | |
246 <option value="all_db_rbps" >Select all 259 RBPs</option> | |
247 </param> | |
248 <when value="list_db_rbps"> | |
249 <param name="database" label="Select RBPs" type="select" multiple="true"> | |
250 <options from_data_table='rbp_ids_table' /> | |
251 </param> | |
252 </when> | |
253 <when value="all_db_rbps"> | |
254 <!-- do nothing --> | |
255 </when> | |
256 </conditional> | |
257 </when> | |
258 <when value="custom_db"> | |
259 <param type="text" name="custom_db_id" value="custom_db_id" | |
260 label="Custom motif database ID" | |
261 help="Set ID / name for provided custom motif database (default: custom_db_id)"/> | |
262 | |
263 <param type="data" format="txt,tabular" name="custom_db_info_file" | |
264 label="Custom motif database info table file" | |
265 help="Provide custom motif database info table file containing RBP ID -> motif ID -> motif type assignments. The motif IDs must correspond to the provided custom MEME / DREME XML and/or covariance models file(s)."/> | |
266 | |
267 <param type="data" format="memexml" name="custom_db_meme_xml_file" optional="True" | |
268 label="Custom motif database MEME / DREME XML file" | |
269 help="Provide custom motif database MEME / DREME XML file containing sequence motifs"/> | |
270 | |
271 <param type="data" format="cm" name="custom_db_cm_model_file" optional="True" | |
272 label="Custom motif database covariance models file" | |
273 help="Provide custom motif database covariance models file containing structure motif(s) (i.e., covariance model(s))"/> | |
274 | |
275 <param name="custom_db_rbp_ids_list" type="text" value="RBP1 RBP2 RBP3" | |
276 label="Specify RBP IDs from custom motif database" | |
277 help="Provide list of RBP IDs found in custom motif database to use for motif search. This can be a single ID, as well as several (separated by spaces, as in the example). To search using all RBPs in the custom database, simply type ALL."/> | |
278 </when> | |
279 </conditional> | |
280 | |
281 <conditional name="user_rbp"> | |
282 <param label="Add user-supplied motifs" name="user_rbp_selector" type="select"> | |
283 <option selected="True" value="no">No</option> | |
284 <option value="sequence">Yes (MEME/DREME XML based sequence motif(s))</option> | |
285 <option value="structure">Yes (Covariance model based structure motif(s))</option> | |
286 </param> | |
287 <when value="sequence"> | |
288 <param format="memexml" type="data" name="meme_motifs_xml" | |
289 label="DREME or MEME motifs XML file" | |
290 help="DREME or MEME output XML file containing sequence motif(s)"/> | |
291 <param type="text" name="seq_rbp_id" value="User_RBP" | |
292 label="RBP ID" | |
293 help="RBP ID (RBP name) for provided sequence motif(s). Make sure to provide user RBP IDs and motif IDs that are different from selected database RBP and motif IDs."/> | |
294 </when> | |
295 <when value="structure"> | |
296 <param format="cm" type="data" name="cm_model_file" | |
297 label="Covariance models file" | |
298 help="Covariance models file containing structure motif(s)"/> | |
299 <param type="text" name="str_rbp_id" value="User_RBP" | |
300 label="RBP ID" | |
301 help="RBP ID (RBP name) for provided structure motif(s). Make sure to provide user RBP IDs and motif IDs that are different from selected database RBP and motif IDs."/> | |
302 </when> | |
303 <when value="no"> | |
304 <!-- do nothing --> | |
305 </when> | |
306 </conditional> | |
307 | |
308 <param type="text" name="search_method_id" value="method_id" | |
309 label="Method ID" | |
310 help="Method ID which can be used to describe the peak calling method (e.g. clipper_idr). This ID (together with data ID and set RBP ID(s)) defines which search results get compared in RBPBench's comparison mode (see Help below for more details)."/> | |
311 <param type="text" name="search_data_id" value="data_id" | |
312 label="Data ID" | |
313 help="Data ID which can be used to describe from which cell type and/or CLIP-seq protocol the data originates (e.g. k562_eclip or pum2_k562_eclip). This ID (together with method ID and set RBP ID(s)) defines which search results get compared in RBPBench's comparison mode (see Help below for more details)."/> | |
314 | |
315 <section name="search_options" title="Motif search settings"> | |
316 <param name="search_ext" type="text" value="0" | |
317 label="Up- and downstream extension of genomic regions" | |
318 help='Up- and downstream extension of genomic regions in nucleotides (nt). E.g. set to "30" to extend 30 nt on both sides, or "20,10" for different up- and downstream extension (default: 0)'/> | |
319 <param name="search_fimo_pval" type="float" value="0.001" | |
320 label="FIMO p-value threshold" | |
321 help='FIMO p-value threshold (FIMO option: --thresh) for reporting motif hits (default: 0.001)'/> | |
322 <param name="search_bed_score_col" type="integer" value="5" | |
323 label="BED score column used for p-value calculations" | |
324 help="Score column of genomic regions BED file used for p-value calculations. BED score can be e.g. log2 fold change or -log10 p-value of the region (default: 5)"/> | |
325 <param name="search_unstranded" label="Treat genomic regions in BED file as NOT strand-specific" type="boolean" | |
326 truevalue="--unstranded" falsevalue="" checked="False" | |
327 help="Set if genomic regions in BED file are NOT strand-specific, i.e., to look for motifs on both strands of the provided regions. Note that the two strands of a region will still be counted as one region (change with option below) (default: False)"/> | |
328 <param name="search_unstranded_ct" label="Count each genomic region twice for RBP hit statistics" type="boolean" | |
329 truevalue="--unstranded-ct" falsevalue="" checked="False" | |
330 help="Count each genomic region twice for RBP hit statistics when non-strand-specific option above is enabled (default: False)"/> | |
331 <param format="txt" type="data" name="fimo_nt_freqs_file" optional="True" | |
332 label="Provide FIMO nucleotide frequencies file" | |
333 help="Provide FIMO nucleotide frequencies (FIMO option: --bifile) file. By default, an internal frequencies file optimized for human transcripts is used"/> | |
334 </section> | |
335 | |
336 <section name="report_plotting_options" title="HTML report options"> | |
337 | |
338 <param format="gtf" type="data" name="gtf_file" optional="True" | |
339 label="GTF file to add genomic annotations to input regions" | |
340 help="Provide GTF file with genomic annotations to add to HTML report plots (e.g. from GENCODE or Ensembl). By default, the most prominent transcripts will be extracted and used for functional annotation. Alternatively, provide a list of expressed transcripts via --tr-list option (together with --gtf containing the transcripts). Note that currently only features on standard chromosomes (1,2,..,X,Y,MT) are used for annotation"/> | |
341 <param format="txt" type="data" name="tr_list_file" optional="True" | |
342 label="Transcript IDs file" | |
343 help="Supply file with transcript IDs (one ID per row) to define which transcripts to use from GTF file for adding functional annotations to HTML report plots"/> | |
344 <param name="list_tr_biotypes" type="text" optional="True" | |
345 label="List of transcript biotypes" | |
346 help="List of transcript biotypes to consider from GTF file. By default an internal selection of transcript biotypes is used (in addition to intron, CDS, UTR, intergenic). Provide a list of IDs separated by spaces. Note that provided biotype IDs need to be in GTF file!"/> | |
347 | |
348 <param name="upset_plot_min_degree" type="integer" value="2" | |
349 label="Upset plot minimum degree parameter" | |
350 help="Upset plot minimum degree parameter for HTML report upset plot. This defines the minimum number of RBPs for a combination to be included (default: 2)"/> | |
351 <param name="upset_plot_max_degree" type="integer" value="" optional="True" | |
352 label="Upset plot maximum degree parameter" | |
353 help="Upset plot maximum degree parameter for HTML report upset plot. By default no maximum degree is set. Useful together with minimum degree to look at specific degrees (e.g. only 2, or between 2 and 3) (default: None)"/> | |
354 <param name="upset_plot_min_subset_size" type="integer" value="5" | |
355 label="Upset plot minimum subset size parameter" | |
356 help="Upset plot minimum subset size parameter for HTML report upset plot. This defines the minimum number of hits for a specific RBP combination to be included (default: 5)"/> | |
357 | |
358 <param type="text" name="set_rbp_id" optional="True" | |
359 label="Set reference RBP ID for plotting motif distances" | |
360 help="Set reference RBP ID to plot motif distances relative to motifs from this RBP (--set-rbp-id). Motif plot will be centered on best scoring motif of the RBP for each region. Note that set RBP ID needs to be one of the above selected RBP IDs!"/> | |
361 <param name="motif_distance_plot_range" type="integer" value="60" | |
362 label="BED score column used for p-value calculations" | |
363 help="Set range of motif distance plot. I.e., centered on the set RBP (--set-rbp-id) motifs, motifs within minus and plus --motif-distance-plot-range will be plotted (default: 60)"/> | |
364 <param name="motif_min_pair_count" type="integer" value="10" | |
365 label="Motif co-occurrence minimum pair count" | |
366 help="Minimum count of co-occurrences of a motif with set RBP ID (--set-rbp-id) motif to be reported and plotted (default: 10)"/> | |
367 <param name="rbp_min_pair_count" type="integer" value="10" | |
368 label="RBP co-occurrence minimum pair count" | |
369 help="Minimum amount of co-occurrences of motifs for an RBP ID compared to set RBP ID (--set-rbp-id) motifs to be reported and plotted (default: 10)"/> | |
370 | |
371 <param name="store_sort_js_in_html" label="Store JS code for table sorting inside HTML?" | |
372 type="boolean" checked="False" | |
373 help="Store JavaScript code for table sorting inside output HTML files. By default code is stored locally in extra file located in HTML output folder."/> | |
374 <param name="plotly_js_source" type="integer" value="1" min="1" max="3" | |
375 label="Specify plotly JS code source" | |
376 help="1: Store plotly JavaScript code locally inside HTML output folder. 2: Add hyperlink to report HTML file (internet connection required). 3: Store code inside report HTML file (default: 1)"/> | |
377 <param name="plotly_plot_code_in_html" label="Store plotly plotting code inside HTML?" | |
378 type="boolean" checked="False" | |
379 help="Store plotly plotting code inside HTML. By default code is stored in separate HTML files in HTML report output folder."/> | |
380 | |
381 </section> | |
382 | |
383 <section name="search_output_options" title="Output options"> | |
384 <param name="search_report" label="Output HTML report?" type="boolean" | |
385 truevalue="--report" falsevalue="" checked="True" | |
386 help="Generate an HTML report containing RBP co-occurrence + combination + distance statistics and plots (default: True)"/> | |
387 <param name="search_plot_motifs" label="Plot RBP motifs?" type="boolean" | |
388 truevalue="--plot-motifs" falsevalue="" checked="False" | |
389 help="Visualize selected RBP motifs, by outputting sequence logos and motif hit statistics into a separate HTML file (default: False)"/> | |
390 <param name="sites_bed_fasta_out" label="Output filtered genomic regions BED + FASTA files" type="boolean" | |
391 checked="False" | |
392 help="Output filtered genomic regions BED/FASTA file used for motif search. Filtered means that the actual regions used for motif search can differ from the input genomic regions, e.g. through default filtering by chromsome ID (only regions with valid IDs), removal of duplicated regions, or through optional extension of the regions"/> | |
393 <param name="motif_hits_bed_out" label="Output motif hits BED file" type="boolean" | |
394 checked="False" | |
395 help="Output motif hits BED file containing motif hits in provided genomic regions for selected RBPs"/> | |
396 <param name="contingency_table_out" label="Output contingency table containing co-occurrence p-values" type="boolean" | |
397 checked="False" | |
398 help="Output contingency table containing co-occurrence p-values (Fisher's exact test) between each RBP pair (see manual for more information)"/> | |
399 <param name="region_annotations_out" label="Output genomic region annotations table file?" | |
400 type="boolean" checked="False" | |
401 help="Output genomic region annotations table file containing assigned annotations for each BED input region. Note that a GTF file has to be provided (default: False)"/> | |
402 </section> | |
403 </when> | |
404 | |
405 <!-- rbpbench batch --> | |
406 <when value="batch_search_motifs"> | |
407 <repeat name="dataset_inputs" min="1" title="Dataset"> | |
408 <param name="dataset_bed_file" type="data" format="bed" | |
409 label="Genomic regions BED file" | |
410 help="Genomic regions (e.g. RBP binding sites) in BED format (>= 6-columns) for RBP binding motif search"/> | |
411 <param name="dataset_rbp_id" label="Select RBP for motif search" type="select"> | |
412 <options from_data_table='rbp_ids_table' /> | |
413 </param> | |
414 <param type="text" name="dataset_method_id" value="method_id" | |
415 label="Method ID" | |
416 help="Method ID which can be used to describe the peak calling method (e.g. clipper_idr). This ID (together with data ID and set RBP ID) defines which search results get compared in RBPBench's comparison mode (see Help below for more details)."/> | |
417 <param type="text" name="dataset_data_id" value="data_id" | |
418 label="Data ID" | |
419 help="Data ID which can be used to describe from which cell type and/or CLIP-seq protocol the data originates (e.g. k562_eclip or pum2_k562_eclip). This ID (together with method ID set RBP ID) defines which search results get compared in RBPBench's comparison mode (see Help below for more details)."/> | |
420 </repeat> | |
421 | |
422 <conditional name="reference_genome"> | |
423 <param label="Select reference genome sequence (according to the provided BED files)" name="reference_genome_selector" type="select"> | |
424 <option selected="True" value="builtin">Select built-in genome sequence</option> | |
425 <option value="history">Select genome sequence from history</option> | |
426 </param> | |
427 <when value="builtin"> | |
428 <param label="Select built-in genome sequence" name="builtin_genome" type="select"> | |
429 <options from_data_table="fasta_indexes"> | |
430 <filter column="2" type="sort_by" /> | |
431 <validator message="No genomes are available for the selected input dataset" type="no_options" /> | |
432 </options> | |
433 </param> | |
434 </when> | |
435 <when value="history"> | |
436 <param format="fasta" type="data" name="history_genome" label="Select genome sequence in FASTA format from history"/> | |
437 </when> | |
438 </conditional> | |
439 | |
440 <section name="search_options" title="Motif search settings"> | |
441 <param name="search_ext" type="text" value="0" | |
442 label="Up- and downstream extension of genomic regions" | |
443 help='Up- and downstream extension of genomic regions in nucleotides (nt). E.g. set to "30" to extend 30 nt on both sides, or "20,10" for different up- and downstream extension (default: 0)'/> | |
444 <param name="search_fimo_pval" type="float" value="0.001" | |
445 label="FIMO p-value threshold" | |
446 help='FIMO p-value threshold (FIMO option: --thresh) for reporting motif hits (default: 0.001)'/> | |
447 <param name="search_bed_score_col" type="integer" value="5" | |
448 label="BED score column used for p-value calculations" | |
449 help="Score column of genomic regions BED files used for p-value calculations. BED score can be e.g. log2 fold change or -log10 p-value of the region (default: 5)"/> | |
450 <param name="search_unstranded" label="Treat genomic regions in BED file as NOT strand-specific" type="boolean" | |
451 truevalue="--unstranded" falsevalue="" checked="False" | |
452 help="Set if genomic regions in BED files are NOT strand-specific, i.e., to look for motifs on both strands of the provided regions. Note that the two strands of a region will still be counted as one region (change with option below) (default: False)"/> | |
453 <param name="search_unstranded_ct" label="Count each genomic region twice for RBP hit statistics" type="boolean" | |
454 truevalue="--unstranded-ct" falsevalue="" checked="False" | |
455 help="Count each genomic region twice for RBP hit statistics when non-strand-specific option above is enabled (default: False)"/> | |
456 <param format="txt" type="data" name="fimo_nt_freqs_file" optional="True" | |
457 label="Provide FIMO nucleotide frequencies file" | |
458 help="Provide FIMO nucleotide frequencies (FIMO option: --bifile) file. By default, an internal frequencies file optimized for human transcripts is used"/> | |
459 </section> | |
460 | |
461 <section name="search_output_options" title="Output options"> | |
462 <param name="batch_motif_hits_bed_out" label="Output motif hits BED file" type="boolean" | |
463 checked="False" | |
464 help="Output motif hits BED file containing motif hits for all input datasets"/> | |
465 </section> | |
466 | |
467 </when> | |
468 | |
469 <!-- rbpbench batch data collection + table --> | |
470 <when value="batch_table_search_motifs"> | |
471 | |
472 <param name="batch_table_bed_collection" type="data_collection" collection_type="list" format="bed" | |
473 label="Data collection containing genomic regions BED files" | |
474 help="Data collection containing genomic regions BED files to be processed. Note that dataset names inside collection must correspond to names given in the batch processing table file below"/> | |
475 | |
476 <param name="batch_table_file" type="data" format="txt,tabular" | |
477 label="Provide batch processing table file" | |
478 help="Provide batch processing table file with one row for each batch job. Each row contains the tab-delimited information: RBP ID (RBP name), method ID, data ID, dataset name. The dataset name must be present in the supplied data collection of BED files"/> | |
479 | |
480 <conditional name="reference_genome"> | |
481 <param label="Select reference genome sequence (according to the provided BED files)" name="reference_genome_selector" type="select"> | |
482 <option selected="True" value="builtin">Select built-in genome sequence</option> | |
483 <option value="history">Select genome sequence from history</option> | |
484 </param> | |
485 <when value="builtin"> | |
486 <param label="Select built-in genome sequence" name="builtin_genome" type="select"> | |
487 <options from_data_table="fasta_indexes"> | |
488 <filter column="2" type="sort_by" /> | |
489 <validator message="No genomes are available for the selected input dataset" type="no_options" /> | |
490 </options> | |
491 </param> | |
492 </when> | |
493 <when value="history"> | |
494 <param format="fasta" type="data" name="history_genome" label="Select genome sequence in FASTA format from history"/> | |
495 </when> | |
496 </conditional> | |
497 | |
498 <section name="search_options" title="Motif search settings"> | |
499 <param name="search_ext" type="text" value="0" | |
500 label="Up- and downstream extension of genomic regions" | |
501 help='Up- and downstream extension of genomic regions in nucleotides (nt). E.g. set to "30" to extend 30 nt on both sides, or "20,10" for different up- and downstream extension (default: 0)'/> | |
502 <param name="search_fimo_pval" type="float" value="0.001" | |
503 label="FIMO p-value threshold" | |
504 help='FIMO p-value threshold (FIMO option: --thresh) for reporting motif hits (default: 0.001)'/> | |
505 <param name="search_bed_score_col" type="integer" value="5" | |
506 label="BED score column used for p-value calculations" | |
507 help="Score column of genomic regions BED files used for p-value calculations. BED score can be e.g. log2 fold change or -log10 p-value of the region (default: 5)"/> | |
508 <param name="search_unstranded" label="Treat genomic regions in BED file as NOT strand-specific" type="boolean" | |
509 truevalue="--unstranded" falsevalue="" checked="False" | |
510 help="Set if genomic regions in BED files are NOT strand-specific, i.e., to look for motifs on both strands of the provided regions. Note that the two strands of a region will still be counted as one region (change with option below) (default: False)"/> | |
511 <param name="search_unstranded_ct" label="Count each genomic region twice for RBP hit statistics" type="boolean" | |
512 truevalue="--unstranded-ct" falsevalue="" checked="False" | |
513 help="Count each genomic region twice for RBP hit statistics when non-strand-specific option above is enabled (default: False)"/> | |
514 <param format="txt" type="data" name="fimo_nt_freqs_file" optional="True" | |
515 label="Provide FIMO nucleotide frequencies file" | |
516 help="Provide FIMO nucleotide frequencies (FIMO option: --bifile) file. By default, an internal frequencies file optimized for human transcripts is used"/> | |
517 </section> | |
518 | |
519 <section name="search_output_options" title="Output options"> | |
520 <param name="batch_table_motif_hits_bed_out" label="Output motif hits BED file" type="boolean" | |
521 checked="False" | |
522 help="Output motif hits BED file containing motif hits for all input datasets"/> | |
523 </section> | |
524 | |
525 </when> | |
526 | |
527 <!-- rbpbench dist --> | |
528 <when value="plot_nt_dist"> | |
529 | |
530 <param name="dist_bed_file" type="data" format="bed" | |
531 label="Genomic regions BED file" | |
532 help="Genomic regions (e.g. RBP binding sites) in BED format. Zero position for plotting can be defined in options"/> | |
533 | |
534 <conditional name="reference_genome"> | |
535 <param label="Select reference genome sequence (according to BED file)" name="reference_genome_selector" type="select"> | |
536 <option selected="True" value="history">Select genome sequence from history</option> | |
537 <option value="builtin">Select built-in genome sequence</option> | |
538 </param> | |
539 <when value="history"> | |
540 <param format="fasta" type="data" name="history_genome" label="Select genome sequence in FASTA format from history"/> | |
541 </when> | |
542 <when value="builtin"> | |
543 <param label="Select built-in genome sequence" name="builtin_genome" type="select"> | |
544 <options from_data_table="fasta_indexes"> | |
545 <filter column="2" type="sort_by" /> | |
546 <validator message="No genomes are available for the selected input dataset" type="no_options" /> | |
547 </options> | |
548 </param> | |
549 </when> | |
550 </conditional> | |
551 | |
552 <section name="dist_options" title="Nucleotide distribution plot settings"> | |
553 <param name="dist_cp_mode" type="integer" value="1" min="1" max="3" | |
554 label="Define zero position for plotting" | |
555 help="Define which position of genomic sites to use as zero position for plotting. 1: upstream end position, 2: center position, 3: downstream end position (default: 1)"/> | |
556 <param name="dist_ext" type="integer" value="10" | |
557 label="Up- and downstream extension of defined genomic positions" | |
558 help="Up- and downstream extension of defined genomic positions in nucleotides (nt) to include in plotting (default: 10)"/> | |
559 <param name="dist_plot_pdf" label="Plot as PDF?" type="boolean" | |
560 truevalue="--plot-pdf" falsevalue="" checked="False" | |
561 help="Plot nucleotide distribution as PDF (default: PNG)"/> | |
562 <param name="sites_bed_fasta_out" label="Output genomic regions BED + FASTA files" type="boolean" | |
563 checked="False" | |
564 help="Output genomic regions BED/FASTA file used for plotting"/> | |
565 </section> | |
566 | |
567 </when> | |
568 | |
569 <!-- rbpbench compare --> | |
570 <when value="compare_search_results"> | |
571 | |
572 <param name="input_tables" type="data" format="tabular" multiple="true" | |
573 label="Motif search results" | |
574 help="Supply motif search results table files for comparison. These are the hit statistics table files output by single or batch motif search jobs. Both RBP and motif hit statistics table files are needed, and can be from any single or batch search job."/> | |
575 | |
576 <section name="compare_output_options" title="Output options"> | |
577 <param name="compared_motif_hits_table" label="Output compared motif hits table file" type="boolean" | |
578 checked="False" | |
579 help="Output compared motif hits table file showing motif hits and which data or method IDs contain them"/> | |
580 <param name="compared_motif_hits_bed" label="Output compared motif hits BED file" type="boolean" | |
581 checked="False" | |
582 help="Output compared motif hits table file showing motif hits and which data or method IDs contain them in BED format"/> | |
583 <param name="comparisons_stats_out" label="Output comparison statistics table file" type="boolean" | |
584 checked="False" | |
585 help="Output comparison statistics table file containing the statistics found in the comparison HTML report"/> | |
586 <param name="store_sort_js_in_html" label="Store JS code for table sorting inside HTML?" | |
587 type="boolean" checked="False" | |
588 help="Store JavaScript code for table sorting inside output HTML files. By default code is stored locally in extra file located in HTML output folder."/> | |
589 </section> | |
590 </when> | |
591 | |
592 </conditional> | |
593 | |
594 </inputs> | |
595 | |
596 <outputs> | |
597 | |
598 <!-- rbpbench search outputs --> | |
599 <data name="rbp_hit_stats_file" format="tabular" from_work_dir="rbp_hit_stats.tsv" label="${tool.name} on ${on_string}: RBP hit statistics table file"> | |
600 <filter>action_type["action_type_selector"] == "search_motifs"</filter> | |
601 </data> | |
602 <data name="motif_hit_stats_file" format="tabular" from_work_dir="motif_hit_stats.tsv" label="${tool.name} on ${on_string}: Motif hit statistics table file"> | |
603 <filter>action_type["action_type_selector"] == "search_motifs"</filter> | |
604 </data> | |
605 <data name="search_report_html_file" format="html" from_work_dir="report.rbpbench_search.html" label="${tool.name} on ${on_string}: Search report HTML file"> | |
606 <filter>action_type["action_type_selector"] == "search_motifs" and action_type["search_output_options"]["search_report"]</filter> | |
607 </data> | |
608 <data name="motif_plots_html_file" format="html" from_work_dir="motif_plots.rbpbench_search.html" label="${tool.name} on ${on_string}: Motif plots HTML file"> | |
609 <filter>action_type["action_type_selector"] == "search_motifs" and action_type["search_output_options"]["search_plot_motifs"]</filter> | |
610 </data> | |
611 <data name="in_sites_bed_file" format="bed" from_work_dir="in_sites.filtered.bed" label="${tool.name} on ${on_string}: Genomic regions used for motif search BED file"> | |
612 <filter>action_type["action_type_selector"] == "search_motifs" and action_type["search_output_options"]["sites_bed_fasta_out"]</filter> | |
613 </data> | |
614 <data name="in_sites_fa_file" format="fasta" from_work_dir="in_sites.filtered.fa" label="${tool.name} on ${on_string}: Genomic regions used for motif search FASTA file"> | |
615 <filter>action_type["action_type_selector"] == "search_motifs" and action_type["search_output_options"]["sites_bed_fasta_out"]</filter> | |
616 </data> | |
617 <data name="motif_hits_bed_file" format="bed" from_work_dir="motif_hits.rbpbench_search.bed" label="${tool.name} on ${on_string}: Motif hits on genomic regions BED file"> | |
618 <filter>action_type["action_type_selector"] == "search_motifs" and action_type["search_output_options"]["motif_hits_bed_out"]</filter> | |
619 </data> | |
620 <data name="contingency_table_file" format="tabular" from_work_dir="contingency_table_results.tsv" label="${tool.name} on ${on_string}: RBP co-occurrence contingency table file"> | |
621 <filter>action_type["action_type_selector"] == "search_motifs" and action_type["search_output_options"]["contingency_table_out"]</filter> | |
622 </data> | |
623 <data name="region_annotations_file" format="tabular" from_work_dir="region_annotations.tsv" label="${tool.name} on ${on_string}: genomic region annotations table file"> | |
624 <filter>action_type["action_type_selector"] == "search_motifs" and action_type["search_output_options"]["region_annotations_out"] and action_type["report_plotting_options"]["gtf_file"]</filter> | |
625 </data> | |
626 | |
627 <!-- rbpbench batch outputs --> | |
628 <data name="batch_rbp_hit_stats_file" format="tabular" from_work_dir="rbp_hit_stats.tsv" label="${tool.name} on ${on_string}: Batch RBP hit statistics table file"> | |
629 <filter>action_type["action_type_selector"] == "batch_search_motifs"</filter> | |
630 </data> | |
631 <data name="batch_motif_hit_stats_file" format="tabular" from_work_dir="motif_hit_stats.tsv" label="${tool.name} on ${on_string}: Batch motif hit statistics table file"> | |
632 <filter>action_type["action_type_selector"] == "batch_search_motifs"</filter> | |
633 </data> | |
634 <data name="batch_motif_hits_bed_file" format="bed" from_work_dir="motif_hits.rbpbench_batch.bed" label="${tool.name} on ${on_string}: Batch motif hits on genomic regions BED files"> | |
635 <filter>action_type["action_type_selector"] == "batch_search_motifs" and action_type["search_output_options"]["batch_motif_hits_bed_out"]</filter> | |
636 </data> | |
637 | |
638 <!-- rbpbench batch table outputs --> | |
639 <data name="batch_table_rbp_hit_stats_file" format="tabular" from_work_dir="rbp_hit_stats.tsv" label="${tool.name} on ${on_string}: Batch data collection RBP hit statistics table file"> | |
640 <filter>action_type["action_type_selector"] == "batch_table_search_motifs"</filter> | |
641 </data> | |
642 <data name="batch_table_motif_hit_stats_file" format="tabular" from_work_dir="motif_hit_stats.tsv" label="${tool.name} on ${on_string}: Batch data collection motif hit statistics table file"> | |
643 <filter>action_type["action_type_selector"] == "batch_table_search_motifs"</filter> | |
644 </data> | |
645 <data name="batch_table_motif_hits_bed_file" format="bed" from_work_dir="motif_hits.rbpbench_batch.bed" label="${tool.name} on ${on_string}: Motif hits on genomic regions BED files from data collection"> | |
646 <filter>action_type["action_type_selector"] == "batch_table_search_motifs" and action_type["search_output_options"]["batch_table_motif_hits_bed_out"]</filter> | |
647 </data> | |
648 | |
649 <!-- rbpbench dist outputs --> | |
650 <data name="nt_dist_plot_png_file" format="png" from_work_dir="nt_dist_zero_pos.png" label="${tool.name} on ${on_string}: Nucleotide distribution plot PNG file"> | |
651 <filter>action_type["action_type_selector"] == "plot_nt_dist" and not action_type["dist_options"]["dist_plot_pdf"]</filter> | |
652 </data> | |
653 <data name="nt_dist_plot_pdf_file" format="pdf" from_work_dir="nt_dist_zero_pos.pdf" label="${tool.name} on ${on_string}: Nucleotide distribution plot PDF file"> | |
654 <filter>action_type["action_type_selector"] == "plot_nt_dist" and action_type["dist_options"]["dist_plot_pdf"]</filter> | |
655 </data> | |
656 <data name="plot_sites_bed_file" format="bed" from_work_dir="in_sites.filtered.bed" label="${tool.name} on ${on_string}: Genomic regions used for plotting BED file"> | |
657 <filter>action_type["action_type_selector"] == "plot_nt_dist" and action_type["dist_options"]["sites_bed_fasta_out"]</filter> | |
658 </data> | |
659 <data name="plot_sites_fa_file" format="fasta" from_work_dir="in_sites.filtered.fa" label="${tool.name} on ${on_string}: Genomic regions used for plotting FASTA file"> | |
660 <filter>action_type["action_type_selector"] == "plot_nt_dist" and action_type["dist_options"]["sites_bed_fasta_out"]</filter> | |
661 </data> | |
662 | |
663 <!-- rbpbench compare outputs --> | |
664 <data name="compare_report_html_file" format="html" from_work_dir="report.rbpbench_compare.html" label="${tool.name} on ${on_string}: Comparison report HTML file"> | |
665 <filter>action_type["action_type_selector"] == "compare_search_results"</filter> | |
666 </data> | |
667 <data name="compared_motif_hits_bed_file" format="bed" from_work_dir="motif_hits.rbpbench_compare.bed" label="${tool.name} on ${on_string}: Compared motif hits BED file"> | |
668 <filter>action_type["action_type_selector"] == "compare_search_results" and action_type["compare_search_results"]["compared_motif_hits_bed"]</filter> | |
669 </data> | |
670 <data name="compared_motif_hits_table_file" format="tabular" from_work_dir="motif_hits.rbpbench_compare.tsv" label="${tool.name} on ${on_string}: Compared motif hits table file"> | |
671 <filter>action_type["action_type_selector"] == "compare_search_results" and action_type["compare_search_results"]["compared_motif_hits_table"]</filter> | |
672 </data> | |
673 <data name="compared_stats_table_file" format="tabular" from_work_dir="comparison_stats.rbpbench_compare.tsv" label="${tool.name} on ${on_string}: Comparison statistics table file"> | |
674 <filter>action_type["action_type_selector"] == "compare_search_results" and action_type["compare_search_results"]["comparisons_stats_out"]</filter> | |
675 </data> | |
676 | |
677 </outputs> | |
678 | |
679 <tests> | |
680 | |
681 <!-- rbpbench search tests --> | |
682 <test expect_num_outputs="7"> | |
683 <param name="action_type_selector" value="search_motifs"/> | |
684 <param name="reference_genome_selector" value="history" /> | |
685 <param name="history_genome" value="test.fa" /> | |
686 <param name="search_bed_file" value="test.bed" ftype="bed"/> | |
687 <param name="select_rbps_selector" value="list_db_rbps"/> | |
688 <param name="database" value="PUM1,PUM2" /> | |
689 <param name="search_report" value="True"/> | |
690 <param name="search_plot_motifs" value="False"/> | |
691 <param name="sites_bed_fasta_out" value="True"/> | |
692 <param name="motif_hits_bed_out" value="True"/> | |
693 <param name="contingency_table_out" value="True"/> | |
694 <output name="rbp_hit_stats_file" file="rbp_hit_stats.rbpbench_search.tsv" compare="sim_size"/> | |
695 <output name="motif_hit_stats_file" file="motif_hit_stats.rbpbench_search.tsv" compare="sim_size"/> | |
696 <output name="search_report_html_file" file="report.rbpbench_search.html" compare="sim_size"/> | |
697 <output name="in_sites_bed_file" file="in_sites.filtered.rbpbench_search.bed"/> | |
698 <output name="in_sites_fa_file" file="in_sites.filtered.rbpbench_search.fa"/> | |
699 <output name="motif_hits_bed_file" file="motif_hits.rbpbench_search.bed"/> | |
700 <output name="contingency_table_file" file="contingency_table_results.rbpbench_search.tsv"/> | |
701 </test> | |
702 <!-- test builtin fasta --> | |
703 <test expect_num_outputs="7"> | |
704 <param name="action_type_selector" value="search_motifs"/> | |
705 <param name="reference_genome_selector" value="builtin" /> | |
706 <param name="builtin_genome" value="testid" /> | |
707 <param name="search_bed_file" value="test.bed" ftype="bed"/> | |
708 <param name="select_rbps_selector" value="list_db_rbps"/> | |
709 <param name="database" value="PUM1,PUM2" /> | |
710 <param name="search_report" value="True"/> | |
711 <param name="search_plot_motifs" value="False"/> | |
712 <param name="sites_bed_fasta_out" value="True"/> | |
713 <param name="motif_hits_bed_out" value="True"/> | |
714 <param name="contingency_table_out" value="True"/> | |
715 <output name="rbp_hit_stats_file" file="rbp_hit_stats.rbpbench_search.tsv" compare="sim_size"/> | |
716 <output name="motif_hit_stats_file" file="motif_hit_stats.rbpbench_search.tsv" compare="sim_size"/> | |
717 <output name="search_report_html_file" file="report.rbpbench_search.html" compare="sim_size"/> | |
718 <output name="in_sites_bed_file" file="in_sites.filtered.rbpbench_search.bed"/> | |
719 <output name="in_sites_fa_file" file="in_sites.filtered.rbpbench_search.fa"/> | |
720 <output name="motif_hits_bed_file" file="motif_hits.rbpbench_search.bed"/> | |
721 <output name="contingency_table_file" file="contingency_table_results.rbpbench_search.tsv"/> | |
722 </test> | |
723 | |
724 <test expect_num_outputs="2"> | |
725 <param name="action_type_selector" value="search_motifs"/> | |
726 <param name="reference_genome_selector" value="history" /> | |
727 <param name="history_genome" value="test.slbp_user.fa" /> | |
728 <param name="search_bed_file" value="test.slbp_user.bed" ftype="bed"/> | |
729 <param name="user_rbp_selector" value="structure"/> | |
730 <param name="cm_model_file" value="SLBP_USER.cm" /> | |
731 <param name="str_rbp_id" value="SLBP_USER" /> | |
732 <param name="search_report" value="False"/> | |
733 <output name="rbp_hit_stats_file" file="rbp_hit_stats.rbpbench_search.slbp_user.tsv" compare="sim_size"/> | |
734 <output name="motif_hit_stats_file" file="motif_hit_stats.rbpbench_search.slbp_user.tsv" compare="sim_size"/> | |
735 </test> | |
736 <test expect_num_outputs="3"> | |
737 <param name="action_type_selector" value="search_motifs"/> | |
738 <param name="reference_genome_selector" value="history" /> | |
739 <param name="history_genome" value="test.fa" /> | |
740 <param name="search_bed_file" value="test.bed" ftype="bed"/> | |
741 <param name="select_db_selector" value="default_db"/> | |
742 <param name="select_rbps_selector" value="all_db_rbps"/> | |
743 <param name="search_report" value="False"/> | |
744 <param name="search_plot_motifs" value="False"/> | |
745 <param name="sites_bed_fasta_out" value="False"/> | |
746 <param name="motif_hits_bed_out" value="True"/> | |
747 <param name="contingency_table_out" value="False"/> | |
748 <output name="motif_hits_bed_file" file="motif_hits.rbpbench_search.test_all.bed"/> | |
749 </test> | |
750 <test expect_num_outputs="4"> | |
751 <param name="action_type_selector" value="search_motifs"/> | |
752 <param name="reference_genome_selector" value="history" /> | |
753 <param name="history_genome" value="test.fa" /> | |
754 <param name="search_bed_file" value="test.bed" ftype="bed"/> | |
755 <param name="gtf_file" value="test_search.gtf" ftype="gtf"/> | |
756 <param name="select_db_selector" value="default_db"/> | |
757 <param name="select_rbps_selector" value="all_db_rbps"/> | |
758 <param name="search_report" value="True"/> | |
759 <param name="search_plot_motifs" value="False"/> | |
760 <param name="sites_bed_fasta_out" value="False"/> | |
761 <param name="motif_hits_bed_out" value="False"/> | |
762 <param name="contingency_table_out" value="False"/> | |
763 <param name="region_annotations_out" value="True"/> | |
764 <output name="region_annotations_file" file="test_search_gtf.region_annotations.tsv"/> | |
765 </test> | |
766 <test expect_num_outputs="3"> | |
767 <param name="action_type_selector" value="search_motifs"/> | |
768 <param name="reference_genome_selector" value="history" /> | |
769 <param name="history_genome" value="test.fa" /> | |
770 <param name="search_bed_file" value="test.bed" ftype="bed"/> | |
771 <param name="select_db_selector" value="custom_db"/> | |
772 <param name="custom_db_id" value="custom_yo" /> | |
773 <param name="custom_db_info_file" value="test_custom.info.txt" /> | |
774 <param name="custom_db_meme_xml_file" value="test_custom.seq_motifs.meme" /> | |
775 <param name="custom_db_cm_model_file" value="test_custom.str_motifs.cm" /> | |
776 <param name="custom_db_rbp_ids_list" value="PUM1 PUM2 SLBP" /> | |
777 <param name="search_report" value="False"/> | |
778 <param name="search_plot_motifs" value="False"/> | |
779 <param name="sites_bed_fasta_out" value="False"/> | |
780 <param name="motif_hits_bed_out" value="True"/> | |
781 <param name="contingency_table_out" value="False"/> | |
782 <param name="region_annotations_out" value="False"/> | |
783 <output name="motif_hits_bed_file" file="test_custom.motif_hits.rbpbench_search.bed"/> | |
784 </test> | |
785 | |
786 <!-- rbpbench batch tests --> | |
787 <test expect_num_outputs="3"> | |
788 <param name="action_type_selector" value="batch_search_motifs"/> | |
789 <param name="reference_genome_selector" value="history" /> | |
790 <param name="history_genome" value="test.fa" /> | |
791 <repeat name="dataset_inputs"> | |
792 <param name="dataset_bed_file" value="test.bed"/> | |
793 <param name="dataset_method_id" value="method-id1" /> | |
794 <param name="dataset_data_id" value="data-id1" /> | |
795 <param name="dataset_rbp_id" value="PUM1" /> | |
796 </repeat> | |
797 <repeat name="dataset_inputs"> | |
798 <param name="dataset_bed_file" value="test.bed"/> | |
799 <param name="dataset_method_id" value="method-id2" /> | |
800 <param name="dataset_data_id" value="data-id2" /> | |
801 <param name="dataset_rbp_id" value="PUM2" /> | |
802 </repeat> | |
803 <param name="batch_motif_hits_bed_out" value="True"/> | |
804 <output name="batch_rbp_hit_stats_file" file="rbp_hit_stats.test_batch.tsv" compare="sim_size"/> | |
805 <output name="batch_motif_hit_stats_file" file="motif_hit_stats.test_batch.tsv" compare="sim_size"/> | |
806 <output name="batch_motif_hits_bed_file" file="motif_hits.rbpbench_batch.test_batch.bed"/> | |
807 </test> | |
808 | |
809 <!-- rbpbench batch table tests --> | |
810 <test expect_num_outputs="3"> | |
811 <param name="action_type_selector" value="batch_table_search_motifs"/> | |
812 <param name="reference_genome_selector" value="history" /> | |
813 <param name="history_genome" value="test.fa" /> | |
814 <param name="batch_table_bed_collection"> | |
815 <collection type="list"> | |
816 <element name="test1.bed" value="test1.bed"/> | |
817 <element name="test2.bed" value="test2.bed"/> | |
818 </collection> | |
819 </param> | |
820 <param name="batch_table_file" value="test_table.txt"/> | |
821 <param name="batch_table_motif_hits_bed_out" value="True"/> | |
822 <output name="batch_table_rbp_hit_stats_file" file="rbp_hit_stats.table_test.tsv" compare="sim_size"/> | |
823 <output name="batch_table_motif_hit_stats_file" file="motif_hit_stats.table_test.tsv" compare="sim_size"/> | |
824 <output name="batch_table_motif_hits_bed_file" file="motif_hits.rbpbench_batch.table_test.bed"/> | |
825 </test> | |
826 | |
827 <!-- rbpbench compare tests --> | |
828 <test expect_num_outputs="4"> | |
829 <param name="action_type_selector" value="compare_search_results"/> | |
830 <param name="input_tables" value="rbp_hit_stats.compare_test.dewseq.tsv,rbp_hit_stats.compare_test.clipper_idr.tsv,motif_hit_stats.compare_test.dewseq.tsv,motif_hit_stats.compare_test.clipper_idr.tsv" ftype="tabular" /> | |
831 <param name="compared_motif_hits_table" value="True"/> | |
832 <param name="compared_motif_hits_bed" value="True"/> | |
833 <param name="comparisons_stats_out" value="True"/> | |
834 <output name="compare_report_html_file" file="report.rbpbench_compare.test.html" compare="sim_size"/> | |
835 <output name="compared_motif_hits_bed_file" file="motif_hits.rbpbench_compare.test.bed"/> | |
836 <output name="compared_motif_hits_table_file" file="motif_hits.rbpbench_compare.test.tsv"/> | |
837 <output name="compared_stats_table_file" file="comparison_stats.rbpbench_compare.test.tsv"/> | |
838 </test> | |
839 | |
840 </tests> | |
841 <help><![CDATA[ | |
842 | |
843 | |
844 **What is RBPBench?** | |
845 | |
846 | |
847 RBPBench_ is multi-function tool to evaluate CLIP-seq and other genomic region | |
848 data using a comprehensive collection of known RNA-binding protein (RBP) binding motifs. | |
849 RBPBench can be used for a variety of purposes, from RBP motif search (database or | |
850 user-supplied RBPs) in genomic regions, over motif co-occurrence analysis, to benchmarking | |
851 CLIP-seq peak caller methods as well as comparisons across cell types and | |
852 CLIP-seq protocols. | |
853 | |
854 ----- | |
855 | |
856 **RBPBench program modes** | |
857 | |
858 RBPBench on Galaxy provides the following main functions (Choose on top via "Select RBPBench program mode"): | |
859 | |
860 1) Search RBP binding motifs in genomic regions | |
861 2) Search RBP binding motifs in genomic regions (multiple inputs) | |
862 3) Search RBP binding motifs in genomic regions (data collection input) | |
863 4) Plot nucleotide distribution at genomic positions | |
864 5) Compare different search results | |
865 | |
866 | |
867 **1. Search RBP binding motifs in genomic regions** | |
868 | |
869 In this mode we can select any number of RBPs of interest and search for RBP binding motifs in a given | |
870 set of genomic regions (*Genomic regions BED file*). A built-in high-quality database of human RBP binding motifs | |
871 (currently containing 259 RBPs and 605 motifs) is used by default. Moreover, users can add own motifs | |
872 (*Add user-supplied motifs*), as well as defining their own database (*Provide a custom RBP motif database*). | |
873 Both sequence (MEME/DREME XML format) and structure motifs (covariance models) are supported. | |
874 Comprehensive hit statistics (both on RBP and single motif level) are output as table files, | |
875 together with an informative HTML report containing various plots and tables | |
876 (see Output options to control what files are output). | |
877 Hit statistics output table formats are described in the RBPBench documentation_. | |
878 The HTML report includes statistics for each RBP on enrichment of motifs in higher scoring regions, | |
879 as well as a heatmap of RBP co-occurrences in genomic regions, and an upset plot | |
880 on present RBP combinations (*HTML report options* for finetuning). | |
881 If a GTF file is provided (*HTML report options -> GTF file*), genomic region annotations are also added to the regions and plots. | |
882 Furthermore, motif distances (RBP and motif level) can be plotted relative to a set reference RBP | |
883 (*HTML report options -> Set reference RBP ID*). | |
884 Motif search settings can be adapted, e.g. to apply up- and/or downstream extension to the genomic regions | |
885 before search. Motifs for selected RBPs can also be plotted in a separate HTML file (*Output options -> Plot RBP motifs*). | |
886 To compare motif search results (mode: *Compare different search results*), | |
887 data ID and method ID can be set accordingly (more details in sections 2, 3, and 5). | |
888 | |
889 | |
890 **2. Search RBP binding motifs in genomic regions (multiple inputs)** | |
891 | |
892 This mode allows the input of more than one set of genomic regions (via *+ Insert Dataset*). | |
893 For each input, an RBP for motif search needs to be selected. Optionally (for comparing | |
894 different search results), descriptive data + method IDs can be added (also see *Compare different search results*). | |
895 For example, if two different peak calling methods (method1, method2) have been used to | |
896 extract RBP binding regions from CLIP-seq data of RBP RBPX, and we want to compare these two methods later on, we would: | |
897 *+ Insert Dataset*: input the set (i.e., BED file) produced by method1, choose the CLIP-ped RBP (RBPX) + add method ID "method1". | |
898 *+ Insert Dataset*: input the set produced by method2, again choose RBPX, and add method ID "method2". | |
899 The data ID we keep constant, ideally choosing an ID that describes the data (e.g. cell type, CLIP-seq protocol, CLIP-ped RBP). | |
900 For example, if the cell type is K562, and the CLIP-seq protocol is eCLIP, we could specify | |
901 the data ID "K562_eCLIP" or "RBPX_K562_eCLIP". We can repeat this for other proteins by | |
902 adding the respective inputs. Finally, for comparing the two methods, | |
903 all we need to do is to use the two produced hit statistics output tables (RBP + motif hit statistics) | |
904 as inputs in *Compare different search results* mode. | |
905 The same also works the other way around, by keeping the method ID constant and changing the data ID. | |
906 For example, if we want to compare motif search results across different cell types, we can use | |
907 different data IDs while keeping the method ID. | |
908 | |
909 | |
910 **3. Search RBP binding motifs in genomic regions (data collection input)** | |
911 | |
912 This mode is identical to the previous one (multiple inputs), except that instead of | |
913 manually defining each input (dataset, RBP, method ID, data ID), we simply | |
914 input a table containing all the information, as well as a dataset collection containing the datasets. | |
915 It is thus the preferable mode if we want to compare a large number of datasets | |
916 (concept of comparing sets via method ID and data ID described in the previous section). | |
917 The input table (batch processing table file) has the following format | |
918 (tab-separated columns: RBP ID, method ID, data ID, BED genomic regions file name): | |
919 | |
920 ========== ============ =============== ============================= | |
921 PUM1 method1 K562_eCLIP PUM1.K562_eclip.method1.bed | |
922 PUM1 method2 K562_eCLIP PUM1.K562_eclip.method2.bed | |
923 PUM1 method3 K562_eCLIP PUM1.K562_eclip.method3.bed | |
924 PUM2 method1 K562_eCLIP PUM2.K562_eclip.method1.bed | |
925 PUM2 method2 K562_eCLIP PUM2.K562_eclip.method2.bed | |
926 PUM2 method3 K562_eCLIP PUM2.K562_eclip.method3.bed | |
927 SLBP method1 K562_eCLIP SLBP.K562_eclip.method1.bed | |
928 SLBP method2 K562_eCLIP SLBP.K562_eclip.method2.bed | |
929 SLBP method3 K562_eCLIP SLBP.K562_eclip.method3.bed | |
930 ========== ============ =============== ============================= | |
931 | |
932 NOTE that the table file name needs to correspond to the name of the dataset inside the | |
933 dataset collection. Conveniently, if you upload files to Galaxy and make a dataset collection out of them, | |
934 the dataset names will correspond to the uploaded file names. | |
935 In the above table, we would produce search results for three different | |
936 methods, on three different RBPs. | |
937 Likewise, if we would want to compare motif search results across cell types, | |
938 the table could look like this: | |
939 | |
940 ========== ============ =============== ============================= | |
941 PUM1 method1 K562_eCLIP PUM1.K562_eclip.method1.bed | |
942 PUM1 method1 HepG2_eCLIP PUM1.HepG2_eclip.method1.bed | |
943 PUM2 method1 K562_eCLIP PUM2.K562_eclip.method1.bed | |
944 PUM2 method1 HepG2_eCLIP PUM2.HepG2_eclip.method1.bed | |
945 SLBP method1 K562_eCLIP SLBP.K562_eclip.method1.bed | |
946 SLBP method1 HepG2_eCLIP SLBP.HepG2_eclip.method1.bed | |
947 ========== ============ =============== ============================= | |
948 | |
949 Here we would create motif search results across cell types K562 and HepG2, while keeping the peak calling | |
950 method ID constant ("method1"). | |
951 As with the two already discussed search modes, | |
952 the resulting hit statistics output table files (RBP + motif hit statistics) | |
953 can subsequently serve as inputs to RBPBench's comparison mode (*Compare different search results*, section 5). | |
954 | |
955 | |
956 **4. Plot nucleotide distribution at genomic positions** | |
957 | |
958 In this mode, a set of genomic regions is input and the nucleotide distribution is plotted | |
959 around a defined center positions (*Nucleotide distribution plot settings -> Define zero position for plotting*). By default, | |
960 the upstream end position of each region is used (other choices are center and downstream end). | |
961 This for example enables us to look at CLIP-seq crosslink positions and potential nucleotide biases at these sites. | |
962 | |
963 | |
964 **5. Compare different search results** | |
965 | |
966 This mode is used to compare different motif search results (produced by any of the three motif search modes | |
967 described above). Inputs are the RBP and motif hit statistics table files output by the motif search modes. | |
968 As exemplified in the previous sections, the set method IDs and | |
969 data IDs (together with the selected RBP IDs) define what gets compared in comparison mode. | |
970 Based on the IDs in the input tables, RBPBench looks for combinations of RBP ID+method ID+data ID, and produces | |
971 method-ID-centered (with fixed RBP ID + data ID) and / or data-ID-centered (with fixed RBP ID + method ID) comparisons. | |
972 At least two different IDs are needed for a comparison (e.g. two different method IDs or two different data IDs, with same RBP ID). | |
973 The comparison results are presented in an HTML report file, containing a hit statistics table and a | |
974 Venn diagram plot for each found combination. Moreover, the report results are output as table files, | |
975 and the combined motifs are output in BED format, for a data ID / method ID centered comparison e.g. inside a Genome Viewer. | |
976 Comparing numbers of unique and shared motif hits between methods also serves as a way of benchmarking different methods. | |
977 Since no ground truth (i.e., set of true / experimentally verified transcriptome-wide binding sites of an RBP) exists, one obvious way to | |
978 benchmark peak calling methods is to look at the enrichment of known RBP binding motifs in regions reported by the peak callers. | |
979 RBPBench makes such evaluations easy, especially by combining modes 2,3, and 5. | |
980 | |
981 | |
982 ----- | |
983 | |
984 **Tool documentation & repository** | |
985 | |
986 For more information (including a webserver tutorial) please visit the RBPBench website: | |
987 | |
988 https://backofenlab.github.io/RBPBench | |
989 | |
990 | |
991 The RBPBench repository can be found at: | |
992 | |
993 https://github.com/michauhl/RBPBench | |
994 | |
995 The GitHub repository hosts the command line version of RBPBench and also includes a | |
996 comprehensive manual with installation instructions and various usage examples. | |
997 | |
998 | |
999 .. _RBPBench: https://github.com/michauhl/RBPBench | |
1000 .. _documentation: https://github.com/michauhl/RBPBench#hit-statistics-table-files | |
1001 | |
1002 ]]></help> | |
1003 </tool> |