Mercurial > repos > rnateam > rbpbench
diff rbpbench.xml @ 0:7dd2835ce566 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/rbpbench commit 0e21bd630200c1f199db8ba5d83b81d4214fc59f
author | rnateam |
---|---|
date | Sun, 03 Dec 2023 12:51:54 +0000 |
parents | |
children | 26c64157456b |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rbpbench.xml Sun Dec 03 12:51:54 2023 +0000 @@ -0,0 +1,1003 @@ +<tool id="rbpbench" name="RBPBench" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@profile@"> + + <description>- Evaluate CLIP-seq and other genomic region data using a comprehensive collection of RBP binding motifs</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="bio_tools"/> + <expand macro="requirements"/> + + <command detect_errors="exit_code"><![CDATA[ + #if $action_type.action_type_selector == 'search_motifs': + @PREPARE_REF@ + rbpbench search + --in '$action_type.search_bed_file' + --out ./ + --genome reference.fa + --method-id '$action_type.search_method_id' + --data-id '$action_type.search_data_id' + --rbps + #if str($action_type.select_db.select_db_selector) == "default_db": + #if str($action_type.select_db.select_rbps.select_rbps_selector) == "list_db_rbps": + #if str($action_type.select_db.select_rbps.database) != "None": + #for $rbp_id in str($action_type.select_db.select_rbps.database).split(','): + $rbp_id + #end for + #end if + #else: + ALL + #end if + #else: + $action_type.select_db.custom_db_rbp_ids_list + #end if + + #if str($action_type.user_rbp.user_rbp_selector) == "sequence": + USER + --user-rbp-id $action_type.user_rbp.seq_rbp_id + --user-meme-xml '$action_type.user_rbp.meme_motifs_xml' + #elif str($action_type.user_rbp.user_rbp_selector) == "structure": + USER + --user-rbp-id $action_type.user_rbp.str_rbp_id + --user-cm '$action_type.user_rbp.cm_model_file' + #end if + + #if str($action_type.select_db.select_db_selector) == "custom_db": + --custom-db-id $action_type.select_db.custom_db_id + --custom-db-info $action_type.select_db.custom_db_info_file + #if $action_type.select_db.custom_db_meme_xml_file: + --custom-db-meme-xml '$action_type.select_db.custom_db_meme_xml_file' + #end if + #if $action_type.select_db.custom_db_cm_model_file: + --custom-db-cm '$action_type.select_db.custom_db_cm_model_file' + #end if + #end if + @COMMON_PARAMS@ + #if $action_type.report_plotting_options.set_rbp_id: + --set-rbp-id '$action_type.report_plotting_options.set_rbp_id' + #end if + --motif-distance-plot-range $action_type.report_plotting_options.motif_distance_plot_range + --motif-min-pair-count $action_type.report_plotting_options.motif_min_pair_count + --rbp-min-pair-count $action_type.report_plotting_options.rbp_min_pair_count + #if $action_type.report_plotting_options.gtf_file: + --gtf '$action_type.report_plotting_options.gtf_file' + #end if + #if $action_type.report_plotting_options.tr_list_file: + --tr-list '$action_type.report_plotting_options.tr_list_file' + #end if + #if $action_type.report_plotting_options.list_tr_biotypes: + --tr-types '$action_type.report_plotting_options.list_tr_biotypes' + #end if + --upset-plot-min-degree $action_type.report_plotting_options.upset_plot_min_degree + #if $action_type.report_plotting_options.upset_plot_max_degree: + --upset-plot-max-degree '$action_type.report_plotting_options.upset_plot_max_degree' + #end if + --upset-plot-min-subset-size $action_type.report_plotting_options.upset_plot_min_subset_size + + $action_type.search_output_options.search_report + $action_type.search_output_options.search_plot_motifs + + #if $action_type.report_plotting_options.store_sort_js_in_html: + --sort-js-mode 3 + #else: + --sort-js-mode 2 + #end if + #if $action_type.report_plotting_options.plotly_js_source == 1: + #if $action_type.report_plotting_options.plotly_plot_code_in_html: + --plotly-js-mode 6 + #else: + --plotly-js-mode 3 + #end if + #elif $action_type.report_plotting_options.plotly_js_source == 2: + #if $action_type.report_plotting_options.plotly_plot_code_in_html: + --plotly-js-mode 5 + #else: + --plotly-js-mode 1 + #end if + #else: + #if $action_type.report_plotting_options.plotly_plot_code_in_html: + --plotly-js-mode 7 + #else: + --plotly-js-mode 4 + #end if + #end if + + #if $action_type.search_output_options.search_report: + && + sed -i 's/html_report_plots\///g' ./report.rbpbench_search.html + && + cp ./report.rbpbench_search.html $search_report_html_file + && + mkdir '$search_report_html_file.extra_files_path' + && + cp -r ./html_report_plots/* '$search_report_html_file.extra_files_path' + #end if + + #if $action_type.search_output_options.search_plot_motifs: + && + sed -i 's/html_motif_plots\///g' ./motif_plots.rbpbench_search.html + && + cp ./motif_plots.rbpbench_search.html $motif_plots_html_file + && + mkdir '$motif_plots_html_file.extra_files_path' + && + cp -r ./html_motif_plots/* '$motif_plots_html_file.extra_files_path' + #end if + + + #elif $action_type.action_type_selector == 'batch_search_motifs': + @PREPARE_REF@ + rbpbench batch + --out ./ + --genome reference.fa + --bed + #for $i in $action_type.dataset_inputs: + $i.dataset_bed_file + #end for + --rbp-list + #for $i in $action_type.dataset_inputs: + $i.dataset_rbp_id + #end for + --data-list + #for $i in $action_type.dataset_inputs: + $i.dataset_data_id + #end for + --method-list + #for $i in $action_type.dataset_inputs: + $i.dataset_method_id + #end for + @COMMON_PARAMS@ + + #elif $action_type.action_type_selector == 'batch_table_search_motifs': + @PREPARE_REF@ + batch_table_wrapper_rbpbench.py + --out ./ + --genome reference.fa + --table '$action_type.batch_table_file' + --paths + #for $i in $action_type.batch_table_bed_collection: + $i + #end for + --ids + #for $i in $action_type.batch_table_bed_collection: + $i.element_identifier + #end for + @COMMON_PARAMS@ + + #elif $action_type.action_type_selector == 'plot_nt_dist': + @PREPARE_REF@ + rbpbench dist + --in '$action_type.dist_bed_file' + --out ./ + --genome reference.fa + --cp-mode $action_type.dist_options.dist_cp_mode + --ext $action_type.dist_options.dist_ext + $action_type.dist_options.dist_plot_pdf + + + #elif $action_type.action_type_selector == 'compare_search_results': + rbpbench compare + --in + #for $in_file in $action_type.input_tables: + $in_file + #end for + --out ./ + #if $action_type.compare_output_options.store_sort_js_in_html: + --sort-js-mode 3 + #else: + --sort-js-mode 2 + #end if + + && + sed -i 's/html_report_plots\///g' ./report.rbpbench_compare.html + && + cp ./report.rbpbench_compare.html $compare_report_html_file + && + mkdir '$compare_report_html_file.extra_files_path' + && + cp -r ./html_report_plots/* '$compare_report_html_file.extra_files_path' + + #end if + + ]]></command> + + <inputs> + + <conditional name="action_type"> + + <param name="action_type_selector" type="select" label="Select RBPBench program mode"> + <option value="search_motifs" selected="true">Search RBP binding motifs in genomic regions</option> + <option value="batch_search_motifs">Search RBP binding motifs in genomic regions (multiple inputs)</option> + <option value="batch_table_search_motifs">Search RBP binding motifs in genomic regions (data collection input)</option> + <option value="plot_nt_dist">Plot nucleotide distribution at genomic positions</option> + <option value="compare_search_results">Compare different search results</option> + </param> + + <!-- rbpbench search --> + <when value="search_motifs"> + <param name="search_bed_file" type="data" format="bed" label="Genomic regions BED file" + help="Genomic regions (e.g. RBP binding sites) in BED format (>= 6-columns) for RBP binding motif search"/> + <conditional name="reference_genome"> + <param label="Select reference genome sequence (according to provided BED file)" name="reference_genome_selector" type="select"> + <option selected="True" value="builtin">Select built-in genome sequence</option> + <option value="history">Select genome sequence from history</option> + </param> + <when value="builtin"> + <param label="Select built-in genome sequence" name="builtin_genome" type="select"> + <options from_data_table="fasta_indexes"> + <filter column="2" type="sort_by" /> + <validator message="No genomes are available for the selected input dataset" type="no_options" /> + </options> + </param> + </when> + <when value="history"> + <param format="fasta" type="data" name="history_genome" label="Select genome sequence in FASTA format from history"/> + </when> + </conditional> + <conditional name="select_db"> + <param name="select_db_selector" type="select" label="Select RBP binding motif database"> + <option selected="True" value="default_db" >Human motif database with 259 RBPs and 605 RBP binding motifs (catrapid.omics.v2.1.human.6plus)</option> + <option value="custom_db" >Provide a custom motif database</option> + </param> + <when value="default_db"> + + <conditional name="select_rbps"> + <param name="select_rbps_selector" type="select" label="Select RBPs for motif search"> + <option selected="True" value="list_db_rbps" >Select individual RBPs </option> + <option value="all_db_rbps" >Select all 259 RBPs</option> + </param> + <when value="list_db_rbps"> + <param name="database" label="Select RBPs" type="select" multiple="true"> + <options from_data_table='rbp_ids_table' /> + </param> + </when> + <when value="all_db_rbps"> + <!-- do nothing --> + </when> + </conditional> + </when> + <when value="custom_db"> + <param type="text" name="custom_db_id" value="custom_db_id" + label="Custom motif database ID" + help="Set ID / name for provided custom motif database (default: custom_db_id)"/> + + <param type="data" format="txt,tabular" name="custom_db_info_file" + label="Custom motif database info table file" + help="Provide custom motif database info table file containing RBP ID -> motif ID -> motif type assignments. The motif IDs must correspond to the provided custom MEME / DREME XML and/or covariance models file(s)."/> + + <param type="data" format="memexml" name="custom_db_meme_xml_file" optional="True" + label="Custom motif database MEME / DREME XML file" + help="Provide custom motif database MEME / DREME XML file containing sequence motifs"/> + + <param type="data" format="cm" name="custom_db_cm_model_file" optional="True" + label="Custom motif database covariance models file" + help="Provide custom motif database covariance models file containing structure motif(s) (i.e., covariance model(s))"/> + + <param name="custom_db_rbp_ids_list" type="text" value="RBP1 RBP2 RBP3" + label="Specify RBP IDs from custom motif database" + help="Provide list of RBP IDs found in custom motif database to use for motif search. This can be a single ID, as well as several (separated by spaces, as in the example). To search using all RBPs in the custom database, simply type ALL."/> + </when> + </conditional> + + <conditional name="user_rbp"> + <param label="Add user-supplied motifs" name="user_rbp_selector" type="select"> + <option selected="True" value="no">No</option> + <option value="sequence">Yes (MEME/DREME XML based sequence motif(s))</option> + <option value="structure">Yes (Covariance model based structure motif(s))</option> + </param> + <when value="sequence"> + <param format="memexml" type="data" name="meme_motifs_xml" + label="DREME or MEME motifs XML file" + help="DREME or MEME output XML file containing sequence motif(s)"/> + <param type="text" name="seq_rbp_id" value="User_RBP" + label="RBP ID" + help="RBP ID (RBP name) for provided sequence motif(s). Make sure to provide user RBP IDs and motif IDs that are different from selected database RBP and motif IDs."/> + </when> + <when value="structure"> + <param format="cm" type="data" name="cm_model_file" + label="Covariance models file" + help="Covariance models file containing structure motif(s)"/> + <param type="text" name="str_rbp_id" value="User_RBP" + label="RBP ID" + help="RBP ID (RBP name) for provided structure motif(s). Make sure to provide user RBP IDs and motif IDs that are different from selected database RBP and motif IDs."/> + </when> + <when value="no"> + <!-- do nothing --> + </when> + </conditional> + + <param type="text" name="search_method_id" value="method_id" + label="Method ID" + help="Method ID which can be used to describe the peak calling method (e.g. clipper_idr). This ID (together with data ID and set RBP ID(s)) defines which search results get compared in RBPBench's comparison mode (see Help below for more details)."/> + <param type="text" name="search_data_id" value="data_id" + label="Data ID" + help="Data ID which can be used to describe from which cell type and/or CLIP-seq protocol the data originates (e.g. k562_eclip or pum2_k562_eclip). This ID (together with method ID and set RBP ID(s)) defines which search results get compared in RBPBench's comparison mode (see Help below for more details)."/> + + <section name="search_options" title="Motif search settings"> + <param name="search_ext" type="text" value="0" + label="Up- and downstream extension of genomic regions" + help='Up- and downstream extension of genomic regions in nucleotides (nt). E.g. set to "30" to extend 30 nt on both sides, or "20,10" for different up- and downstream extension (default: 0)'/> + <param name="search_fimo_pval" type="float" value="0.001" + label="FIMO p-value threshold" + help='FIMO p-value threshold (FIMO option: --thresh) for reporting motif hits (default: 0.001)'/> + <param name="search_bed_score_col" type="integer" value="5" + label="BED score column used for p-value calculations" + help="Score column of genomic regions BED file used for p-value calculations. BED score can be e.g. log2 fold change or -log10 p-value of the region (default: 5)"/> + <param name="search_unstranded" label="Treat genomic regions in BED file as NOT strand-specific" type="boolean" + truevalue="--unstranded" falsevalue="" checked="False" + help="Set if genomic regions in BED file are NOT strand-specific, i.e., to look for motifs on both strands of the provided regions. Note that the two strands of a region will still be counted as one region (change with option below) (default: False)"/> + <param name="search_unstranded_ct" label="Count each genomic region twice for RBP hit statistics" type="boolean" + truevalue="--unstranded-ct" falsevalue="" checked="False" + help="Count each genomic region twice for RBP hit statistics when non-strand-specific option above is enabled (default: False)"/> + <param format="txt" type="data" name="fimo_nt_freqs_file" optional="True" + label="Provide FIMO nucleotide frequencies file" + help="Provide FIMO nucleotide frequencies (FIMO option: --bifile) file. By default, an internal frequencies file optimized for human transcripts is used"/> + </section> + + <section name="report_plotting_options" title="HTML report options"> + + <param format="gtf" type="data" name="gtf_file" optional="True" + label="GTF file to add genomic annotations to input regions" + help="Provide GTF file with genomic annotations to add to HTML report plots (e.g. from GENCODE or Ensembl). By default, the most prominent transcripts will be extracted and used for functional annotation. Alternatively, provide a list of expressed transcripts via --tr-list option (together with --gtf containing the transcripts). Note that currently only features on standard chromosomes (1,2,..,X,Y,MT) are used for annotation"/> + <param format="txt" type="data" name="tr_list_file" optional="True" + label="Transcript IDs file" + help="Supply file with transcript IDs (one ID per row) to define which transcripts to use from GTF file for adding functional annotations to HTML report plots"/> + <param name="list_tr_biotypes" type="text" optional="True" + label="List of transcript biotypes" + help="List of transcript biotypes to consider from GTF file. By default an internal selection of transcript biotypes is used (in addition to intron, CDS, UTR, intergenic). Provide a list of IDs separated by spaces. Note that provided biotype IDs need to be in GTF file!"/> + + <param name="upset_plot_min_degree" type="integer" value="2" + label="Upset plot minimum degree parameter" + help="Upset plot minimum degree parameter for HTML report upset plot. This defines the minimum number of RBPs for a combination to be included (default: 2)"/> + <param name="upset_plot_max_degree" type="integer" value="" optional="True" + label="Upset plot maximum degree parameter" + help="Upset plot maximum degree parameter for HTML report upset plot. By default no maximum degree is set. Useful together with minimum degree to look at specific degrees (e.g. only 2, or between 2 and 3) (default: None)"/> + <param name="upset_plot_min_subset_size" type="integer" value="5" + label="Upset plot minimum subset size parameter" + help="Upset plot minimum subset size parameter for HTML report upset plot. This defines the minimum number of hits for a specific RBP combination to be included (default: 5)"/> + + <param type="text" name="set_rbp_id" optional="True" + label="Set reference RBP ID for plotting motif distances" + help="Set reference RBP ID to plot motif distances relative to motifs from this RBP (--set-rbp-id). Motif plot will be centered on best scoring motif of the RBP for each region. Note that set RBP ID needs to be one of the above selected RBP IDs!"/> + <param name="motif_distance_plot_range" type="integer" value="60" + label="BED score column used for p-value calculations" + help="Set range of motif distance plot. I.e., centered on the set RBP (--set-rbp-id) motifs, motifs within minus and plus --motif-distance-plot-range will be plotted (default: 60)"/> + <param name="motif_min_pair_count" type="integer" value="10" + label="Motif co-occurrence minimum pair count" + help="Minimum count of co-occurrences of a motif with set RBP ID (--set-rbp-id) motif to be reported and plotted (default: 10)"/> + <param name="rbp_min_pair_count" type="integer" value="10" + label="RBP co-occurrence minimum pair count" + help="Minimum amount of co-occurrences of motifs for an RBP ID compared to set RBP ID (--set-rbp-id) motifs to be reported and plotted (default: 10)"/> + + <param name="store_sort_js_in_html" label="Store JS code for table sorting inside HTML?" + type="boolean" checked="False" + help="Store JavaScript code for table sorting inside output HTML files. By default code is stored locally in extra file located in HTML output folder."/> + <param name="plotly_js_source" type="integer" value="1" min="1" max="3" + label="Specify plotly JS code source" + help="1: Store plotly JavaScript code locally inside HTML output folder. 2: Add hyperlink to report HTML file (internet connection required). 3: Store code inside report HTML file (default: 1)"/> + <param name="plotly_plot_code_in_html" label="Store plotly plotting code inside HTML?" + type="boolean" checked="False" + help="Store plotly plotting code inside HTML. By default code is stored in separate HTML files in HTML report output folder."/> + + </section> + + <section name="search_output_options" title="Output options"> + <param name="search_report" label="Output HTML report?" type="boolean" + truevalue="--report" falsevalue="" checked="True" + help="Generate an HTML report containing RBP co-occurrence + combination + distance statistics and plots (default: True)"/> + <param name="search_plot_motifs" label="Plot RBP motifs?" type="boolean" + truevalue="--plot-motifs" falsevalue="" checked="False" + help="Visualize selected RBP motifs, by outputting sequence logos and motif hit statistics into a separate HTML file (default: False)"/> + <param name="sites_bed_fasta_out" label="Output filtered genomic regions BED + FASTA files" type="boolean" + checked="False" + help="Output filtered genomic regions BED/FASTA file used for motif search. Filtered means that the actual regions used for motif search can differ from the input genomic regions, e.g. through default filtering by chromsome ID (only regions with valid IDs), removal of duplicated regions, or through optional extension of the regions"/> + <param name="motif_hits_bed_out" label="Output motif hits BED file" type="boolean" + checked="False" + help="Output motif hits BED file containing motif hits in provided genomic regions for selected RBPs"/> + <param name="contingency_table_out" label="Output contingency table containing co-occurrence p-values" type="boolean" + checked="False" + help="Output contingency table containing co-occurrence p-values (Fisher's exact test) between each RBP pair (see manual for more information)"/> + <param name="region_annotations_out" label="Output genomic region annotations table file?" + type="boolean" checked="False" + help="Output genomic region annotations table file containing assigned annotations for each BED input region. Note that a GTF file has to be provided (default: False)"/> + </section> + </when> + + <!-- rbpbench batch --> + <when value="batch_search_motifs"> + <repeat name="dataset_inputs" min="1" title="Dataset"> + <param name="dataset_bed_file" type="data" format="bed" + label="Genomic regions BED file" + help="Genomic regions (e.g. RBP binding sites) in BED format (>= 6-columns) for RBP binding motif search"/> + <param name="dataset_rbp_id" label="Select RBP for motif search" type="select"> + <options from_data_table='rbp_ids_table' /> + </param> + <param type="text" name="dataset_method_id" value="method_id" + label="Method ID" + help="Method ID which can be used to describe the peak calling method (e.g. clipper_idr). This ID (together with data ID and set RBP ID) defines which search results get compared in RBPBench's comparison mode (see Help below for more details)."/> + <param type="text" name="dataset_data_id" value="data_id" + label="Data ID" + help="Data ID which can be used to describe from which cell type and/or CLIP-seq protocol the data originates (e.g. k562_eclip or pum2_k562_eclip). This ID (together with method ID set RBP ID) defines which search results get compared in RBPBench's comparison mode (see Help below for more details)."/> + </repeat> + + <conditional name="reference_genome"> + <param label="Select reference genome sequence (according to the provided BED files)" name="reference_genome_selector" type="select"> + <option selected="True" value="builtin">Select built-in genome sequence</option> + <option value="history">Select genome sequence from history</option> + </param> + <when value="builtin"> + <param label="Select built-in genome sequence" name="builtin_genome" type="select"> + <options from_data_table="fasta_indexes"> + <filter column="2" type="sort_by" /> + <validator message="No genomes are available for the selected input dataset" type="no_options" /> + </options> + </param> + </when> + <when value="history"> + <param format="fasta" type="data" name="history_genome" label="Select genome sequence in FASTA format from history"/> + </when> + </conditional> + + <section name="search_options" title="Motif search settings"> + <param name="search_ext" type="text" value="0" + label="Up- and downstream extension of genomic regions" + help='Up- and downstream extension of genomic regions in nucleotides (nt). E.g. set to "30" to extend 30 nt on both sides, or "20,10" for different up- and downstream extension (default: 0)'/> + <param name="search_fimo_pval" type="float" value="0.001" + label="FIMO p-value threshold" + help='FIMO p-value threshold (FIMO option: --thresh) for reporting motif hits (default: 0.001)'/> + <param name="search_bed_score_col" type="integer" value="5" + label="BED score column used for p-value calculations" + help="Score column of genomic regions BED files used for p-value calculations. BED score can be e.g. log2 fold change or -log10 p-value of the region (default: 5)"/> + <param name="search_unstranded" label="Treat genomic regions in BED file as NOT strand-specific" type="boolean" + truevalue="--unstranded" falsevalue="" checked="False" + help="Set if genomic regions in BED files are NOT strand-specific, i.e., to look for motifs on both strands of the provided regions. Note that the two strands of a region will still be counted as one region (change with option below) (default: False)"/> + <param name="search_unstranded_ct" label="Count each genomic region twice for RBP hit statistics" type="boolean" + truevalue="--unstranded-ct" falsevalue="" checked="False" + help="Count each genomic region twice for RBP hit statistics when non-strand-specific option above is enabled (default: False)"/> + <param format="txt" type="data" name="fimo_nt_freqs_file" optional="True" + label="Provide FIMO nucleotide frequencies file" + help="Provide FIMO nucleotide frequencies (FIMO option: --bifile) file. By default, an internal frequencies file optimized for human transcripts is used"/> + </section> + + <section name="search_output_options" title="Output options"> + <param name="batch_motif_hits_bed_out" label="Output motif hits BED file" type="boolean" + checked="False" + help="Output motif hits BED file containing motif hits for all input datasets"/> + </section> + + </when> + + <!-- rbpbench batch data collection + table --> + <when value="batch_table_search_motifs"> + + <param name="batch_table_bed_collection" type="data_collection" collection_type="list" format="bed" + label="Data collection containing genomic regions BED files" + help="Data collection containing genomic regions BED files to be processed. Note that dataset names inside collection must correspond to names given in the batch processing table file below"/> + + <param name="batch_table_file" type="data" format="txt,tabular" + label="Provide batch processing table file" + help="Provide batch processing table file with one row for each batch job. Each row contains the tab-delimited information: RBP ID (RBP name), method ID, data ID, dataset name. The dataset name must be present in the supplied data collection of BED files"/> + + <conditional name="reference_genome"> + <param label="Select reference genome sequence (according to the provided BED files)" name="reference_genome_selector" type="select"> + <option selected="True" value="builtin">Select built-in genome sequence</option> + <option value="history">Select genome sequence from history</option> + </param> + <when value="builtin"> + <param label="Select built-in genome sequence" name="builtin_genome" type="select"> + <options from_data_table="fasta_indexes"> + <filter column="2" type="sort_by" /> + <validator message="No genomes are available for the selected input dataset" type="no_options" /> + </options> + </param> + </when> + <when value="history"> + <param format="fasta" type="data" name="history_genome" label="Select genome sequence in FASTA format from history"/> + </when> + </conditional> + + <section name="search_options" title="Motif search settings"> + <param name="search_ext" type="text" value="0" + label="Up- and downstream extension of genomic regions" + help='Up- and downstream extension of genomic regions in nucleotides (nt). E.g. set to "30" to extend 30 nt on both sides, or "20,10" for different up- and downstream extension (default: 0)'/> + <param name="search_fimo_pval" type="float" value="0.001" + label="FIMO p-value threshold" + help='FIMO p-value threshold (FIMO option: --thresh) for reporting motif hits (default: 0.001)'/> + <param name="search_bed_score_col" type="integer" value="5" + label="BED score column used for p-value calculations" + help="Score column of genomic regions BED files used for p-value calculations. BED score can be e.g. log2 fold change or -log10 p-value of the region (default: 5)"/> + <param name="search_unstranded" label="Treat genomic regions in BED file as NOT strand-specific" type="boolean" + truevalue="--unstranded" falsevalue="" checked="False" + help="Set if genomic regions in BED files are NOT strand-specific, i.e., to look for motifs on both strands of the provided regions. Note that the two strands of a region will still be counted as one region (change with option below) (default: False)"/> + <param name="search_unstranded_ct" label="Count each genomic region twice for RBP hit statistics" type="boolean" + truevalue="--unstranded-ct" falsevalue="" checked="False" + help="Count each genomic region twice for RBP hit statistics when non-strand-specific option above is enabled (default: False)"/> + <param format="txt" type="data" name="fimo_nt_freqs_file" optional="True" + label="Provide FIMO nucleotide frequencies file" + help="Provide FIMO nucleotide frequencies (FIMO option: --bifile) file. By default, an internal frequencies file optimized for human transcripts is used"/> + </section> + + <section name="search_output_options" title="Output options"> + <param name="batch_table_motif_hits_bed_out" label="Output motif hits BED file" type="boolean" + checked="False" + help="Output motif hits BED file containing motif hits for all input datasets"/> + </section> + + </when> + + <!-- rbpbench dist --> + <when value="plot_nt_dist"> + + <param name="dist_bed_file" type="data" format="bed" + label="Genomic regions BED file" + help="Genomic regions (e.g. RBP binding sites) in BED format. Zero position for plotting can be defined in options"/> + + <conditional name="reference_genome"> + <param label="Select reference genome sequence (according to BED file)" name="reference_genome_selector" type="select"> + <option selected="True" value="history">Select genome sequence from history</option> + <option value="builtin">Select built-in genome sequence</option> + </param> + <when value="history"> + <param format="fasta" type="data" name="history_genome" label="Select genome sequence in FASTA format from history"/> + </when> + <when value="builtin"> + <param label="Select built-in genome sequence" name="builtin_genome" type="select"> + <options from_data_table="fasta_indexes"> + <filter column="2" type="sort_by" /> + <validator message="No genomes are available for the selected input dataset" type="no_options" /> + </options> + </param> + </when> + </conditional> + + <section name="dist_options" title="Nucleotide distribution plot settings"> + <param name="dist_cp_mode" type="integer" value="1" min="1" max="3" + label="Define zero position for plotting" + help="Define which position of genomic sites to use as zero position for plotting. 1: upstream end position, 2: center position, 3: downstream end position (default: 1)"/> + <param name="dist_ext" type="integer" value="10" + label="Up- and downstream extension of defined genomic positions" + help="Up- and downstream extension of defined genomic positions in nucleotides (nt) to include in plotting (default: 10)"/> + <param name="dist_plot_pdf" label="Plot as PDF?" type="boolean" + truevalue="--plot-pdf" falsevalue="" checked="False" + help="Plot nucleotide distribution as PDF (default: PNG)"/> + <param name="sites_bed_fasta_out" label="Output genomic regions BED + FASTA files" type="boolean" + checked="False" + help="Output genomic regions BED/FASTA file used for plotting"/> + </section> + + </when> + + <!-- rbpbench compare --> + <when value="compare_search_results"> + + <param name="input_tables" type="data" format="tabular" multiple="true" + label="Motif search results" + help="Supply motif search results table files for comparison. These are the hit statistics table files output by single or batch motif search jobs. Both RBP and motif hit statistics table files are needed, and can be from any single or batch search job."/> + + <section name="compare_output_options" title="Output options"> + <param name="compared_motif_hits_table" label="Output compared motif hits table file" type="boolean" + checked="False" + help="Output compared motif hits table file showing motif hits and which data or method IDs contain them"/> + <param name="compared_motif_hits_bed" label="Output compared motif hits BED file" type="boolean" + checked="False" + help="Output compared motif hits table file showing motif hits and which data or method IDs contain them in BED format"/> + <param name="comparisons_stats_out" label="Output comparison statistics table file" type="boolean" + checked="False" + help="Output comparison statistics table file containing the statistics found in the comparison HTML report"/> + <param name="store_sort_js_in_html" label="Store JS code for table sorting inside HTML?" + type="boolean" checked="False" + help="Store JavaScript code for table sorting inside output HTML files. By default code is stored locally in extra file located in HTML output folder."/> + </section> + </when> + + </conditional> + + </inputs> + + <outputs> + + <!-- rbpbench search outputs --> + <data name="rbp_hit_stats_file" format="tabular" from_work_dir="rbp_hit_stats.tsv" label="${tool.name} on ${on_string}: RBP hit statistics table file"> + <filter>action_type["action_type_selector"] == "search_motifs"</filter> + </data> + <data name="motif_hit_stats_file" format="tabular" from_work_dir="motif_hit_stats.tsv" label="${tool.name} on ${on_string}: Motif hit statistics table file"> + <filter>action_type["action_type_selector"] == "search_motifs"</filter> + </data> + <data name="search_report_html_file" format="html" from_work_dir="report.rbpbench_search.html" label="${tool.name} on ${on_string}: Search report HTML file"> + <filter>action_type["action_type_selector"] == "search_motifs" and action_type["search_output_options"]["search_report"]</filter> + </data> + <data name="motif_plots_html_file" format="html" from_work_dir="motif_plots.rbpbench_search.html" label="${tool.name} on ${on_string}: Motif plots HTML file"> + <filter>action_type["action_type_selector"] == "search_motifs" and action_type["search_output_options"]["search_plot_motifs"]</filter> + </data> + <data name="in_sites_bed_file" format="bed" from_work_dir="in_sites.filtered.bed" label="${tool.name} on ${on_string}: Genomic regions used for motif search BED file"> + <filter>action_type["action_type_selector"] == "search_motifs" and action_type["search_output_options"]["sites_bed_fasta_out"]</filter> + </data> + <data name="in_sites_fa_file" format="fasta" from_work_dir="in_sites.filtered.fa" label="${tool.name} on ${on_string}: Genomic regions used for motif search FASTA file"> + <filter>action_type["action_type_selector"] == "search_motifs" and action_type["search_output_options"]["sites_bed_fasta_out"]</filter> + </data> + <data name="motif_hits_bed_file" format="bed" from_work_dir="motif_hits.rbpbench_search.bed" label="${tool.name} on ${on_string}: Motif hits on genomic regions BED file"> + <filter>action_type["action_type_selector"] == "search_motifs" and action_type["search_output_options"]["motif_hits_bed_out"]</filter> + </data> + <data name="contingency_table_file" format="tabular" from_work_dir="contingency_table_results.tsv" label="${tool.name} on ${on_string}: RBP co-occurrence contingency table file"> + <filter>action_type["action_type_selector"] == "search_motifs" and action_type["search_output_options"]["contingency_table_out"]</filter> + </data> + <data name="region_annotations_file" format="tabular" from_work_dir="region_annotations.tsv" label="${tool.name} on ${on_string}: genomic region annotations table file"> + <filter>action_type["action_type_selector"] == "search_motifs" and action_type["search_output_options"]["region_annotations_out"] and action_type["report_plotting_options"]["gtf_file"]</filter> + </data> + + <!-- rbpbench batch outputs --> + <data name="batch_rbp_hit_stats_file" format="tabular" from_work_dir="rbp_hit_stats.tsv" label="${tool.name} on ${on_string}: Batch RBP hit statistics table file"> + <filter>action_type["action_type_selector"] == "batch_search_motifs"</filter> + </data> + <data name="batch_motif_hit_stats_file" format="tabular" from_work_dir="motif_hit_stats.tsv" label="${tool.name} on ${on_string}: Batch motif hit statistics table file"> + <filter>action_type["action_type_selector"] == "batch_search_motifs"</filter> + </data> + <data name="batch_motif_hits_bed_file" format="bed" from_work_dir="motif_hits.rbpbench_batch.bed" label="${tool.name} on ${on_string}: Batch motif hits on genomic regions BED files"> + <filter>action_type["action_type_selector"] == "batch_search_motifs" and action_type["search_output_options"]["batch_motif_hits_bed_out"]</filter> + </data> + + <!-- rbpbench batch table outputs --> + <data name="batch_table_rbp_hit_stats_file" format="tabular" from_work_dir="rbp_hit_stats.tsv" label="${tool.name} on ${on_string}: Batch data collection RBP hit statistics table file"> + <filter>action_type["action_type_selector"] == "batch_table_search_motifs"</filter> + </data> + <data name="batch_table_motif_hit_stats_file" format="tabular" from_work_dir="motif_hit_stats.tsv" label="${tool.name} on ${on_string}: Batch data collection motif hit statistics table file"> + <filter>action_type["action_type_selector"] == "batch_table_search_motifs"</filter> + </data> + <data name="batch_table_motif_hits_bed_file" format="bed" from_work_dir="motif_hits.rbpbench_batch.bed" label="${tool.name} on ${on_string}: Motif hits on genomic regions BED files from data collection"> + <filter>action_type["action_type_selector"] == "batch_table_search_motifs" and action_type["search_output_options"]["batch_table_motif_hits_bed_out"]</filter> + </data> + + <!-- rbpbench dist outputs --> + <data name="nt_dist_plot_png_file" format="png" from_work_dir="nt_dist_zero_pos.png" label="${tool.name} on ${on_string}: Nucleotide distribution plot PNG file"> + <filter>action_type["action_type_selector"] == "plot_nt_dist" and not action_type["dist_options"]["dist_plot_pdf"]</filter> + </data> + <data name="nt_dist_plot_pdf_file" format="pdf" from_work_dir="nt_dist_zero_pos.pdf" label="${tool.name} on ${on_string}: Nucleotide distribution plot PDF file"> + <filter>action_type["action_type_selector"] == "plot_nt_dist" and action_type["dist_options"]["dist_plot_pdf"]</filter> + </data> + <data name="plot_sites_bed_file" format="bed" from_work_dir="in_sites.filtered.bed" label="${tool.name} on ${on_string}: Genomic regions used for plotting BED file"> + <filter>action_type["action_type_selector"] == "plot_nt_dist" and action_type["dist_options"]["sites_bed_fasta_out"]</filter> + </data> + <data name="plot_sites_fa_file" format="fasta" from_work_dir="in_sites.filtered.fa" label="${tool.name} on ${on_string}: Genomic regions used for plotting FASTA file"> + <filter>action_type["action_type_selector"] == "plot_nt_dist" and action_type["dist_options"]["sites_bed_fasta_out"]</filter> + </data> + + <!-- rbpbench compare outputs --> + <data name="compare_report_html_file" format="html" from_work_dir="report.rbpbench_compare.html" label="${tool.name} on ${on_string}: Comparison report HTML file"> + <filter>action_type["action_type_selector"] == "compare_search_results"</filter> + </data> + <data name="compared_motif_hits_bed_file" format="bed" from_work_dir="motif_hits.rbpbench_compare.bed" label="${tool.name} on ${on_string}: Compared motif hits BED file"> + <filter>action_type["action_type_selector"] == "compare_search_results" and action_type["compare_search_results"]["compared_motif_hits_bed"]</filter> + </data> + <data name="compared_motif_hits_table_file" format="tabular" from_work_dir="motif_hits.rbpbench_compare.tsv" label="${tool.name} on ${on_string}: Compared motif hits table file"> + <filter>action_type["action_type_selector"] == "compare_search_results" and action_type["compare_search_results"]["compared_motif_hits_table"]</filter> + </data> + <data name="compared_stats_table_file" format="tabular" from_work_dir="comparison_stats.rbpbench_compare.tsv" label="${tool.name} on ${on_string}: Comparison statistics table file"> + <filter>action_type["action_type_selector"] == "compare_search_results" and action_type["compare_search_results"]["comparisons_stats_out"]</filter> + </data> + + </outputs> + + <tests> + + <!-- rbpbench search tests --> + <test expect_num_outputs="7"> + <param name="action_type_selector" value="search_motifs"/> + <param name="reference_genome_selector" value="history" /> + <param name="history_genome" value="test.fa" /> + <param name="search_bed_file" value="test.bed" ftype="bed"/> + <param name="select_rbps_selector" value="list_db_rbps"/> + <param name="database" value="PUM1,PUM2" /> + <param name="search_report" value="True"/> + <param name="search_plot_motifs" value="False"/> + <param name="sites_bed_fasta_out" value="True"/> + <param name="motif_hits_bed_out" value="True"/> + <param name="contingency_table_out" value="True"/> + <output name="rbp_hit_stats_file" file="rbp_hit_stats.rbpbench_search.tsv" compare="sim_size"/> + <output name="motif_hit_stats_file" file="motif_hit_stats.rbpbench_search.tsv" compare="sim_size"/> + <output name="search_report_html_file" file="report.rbpbench_search.html" compare="sim_size"/> + <output name="in_sites_bed_file" file="in_sites.filtered.rbpbench_search.bed"/> + <output name="in_sites_fa_file" file="in_sites.filtered.rbpbench_search.fa"/> + <output name="motif_hits_bed_file" file="motif_hits.rbpbench_search.bed"/> + <output name="contingency_table_file" file="contingency_table_results.rbpbench_search.tsv"/> + </test> + <!-- test builtin fasta --> + <test expect_num_outputs="7"> + <param name="action_type_selector" value="search_motifs"/> + <param name="reference_genome_selector" value="builtin" /> + <param name="builtin_genome" value="testid" /> + <param name="search_bed_file" value="test.bed" ftype="bed"/> + <param name="select_rbps_selector" value="list_db_rbps"/> + <param name="database" value="PUM1,PUM2" /> + <param name="search_report" value="True"/> + <param name="search_plot_motifs" value="False"/> + <param name="sites_bed_fasta_out" value="True"/> + <param name="motif_hits_bed_out" value="True"/> + <param name="contingency_table_out" value="True"/> + <output name="rbp_hit_stats_file" file="rbp_hit_stats.rbpbench_search.tsv" compare="sim_size"/> + <output name="motif_hit_stats_file" file="motif_hit_stats.rbpbench_search.tsv" compare="sim_size"/> + <output name="search_report_html_file" file="report.rbpbench_search.html" compare="sim_size"/> + <output name="in_sites_bed_file" file="in_sites.filtered.rbpbench_search.bed"/> + <output name="in_sites_fa_file" file="in_sites.filtered.rbpbench_search.fa"/> + <output name="motif_hits_bed_file" file="motif_hits.rbpbench_search.bed"/> + <output name="contingency_table_file" file="contingency_table_results.rbpbench_search.tsv"/> + </test> + + <test expect_num_outputs="2"> + <param name="action_type_selector" value="search_motifs"/> + <param name="reference_genome_selector" value="history" /> + <param name="history_genome" value="test.slbp_user.fa" /> + <param name="search_bed_file" value="test.slbp_user.bed" ftype="bed"/> + <param name="user_rbp_selector" value="structure"/> + <param name="cm_model_file" value="SLBP_USER.cm" /> + <param name="str_rbp_id" value="SLBP_USER" /> + <param name="search_report" value="False"/> + <output name="rbp_hit_stats_file" file="rbp_hit_stats.rbpbench_search.slbp_user.tsv" compare="sim_size"/> + <output name="motif_hit_stats_file" file="motif_hit_stats.rbpbench_search.slbp_user.tsv" compare="sim_size"/> + </test> + <test expect_num_outputs="3"> + <param name="action_type_selector" value="search_motifs"/> + <param name="reference_genome_selector" value="history" /> + <param name="history_genome" value="test.fa" /> + <param name="search_bed_file" value="test.bed" ftype="bed"/> + <param name="select_db_selector" value="default_db"/> + <param name="select_rbps_selector" value="all_db_rbps"/> + <param name="search_report" value="False"/> + <param name="search_plot_motifs" value="False"/> + <param name="sites_bed_fasta_out" value="False"/> + <param name="motif_hits_bed_out" value="True"/> + <param name="contingency_table_out" value="False"/> + <output name="motif_hits_bed_file" file="motif_hits.rbpbench_search.test_all.bed"/> + </test> + <test expect_num_outputs="4"> + <param name="action_type_selector" value="search_motifs"/> + <param name="reference_genome_selector" value="history" /> + <param name="history_genome" value="test.fa" /> + <param name="search_bed_file" value="test.bed" ftype="bed"/> + <param name="gtf_file" value="test_search.gtf" ftype="gtf"/> + <param name="select_db_selector" value="default_db"/> + <param name="select_rbps_selector" value="all_db_rbps"/> + <param name="search_report" value="True"/> + <param name="search_plot_motifs" value="False"/> + <param name="sites_bed_fasta_out" value="False"/> + <param name="motif_hits_bed_out" value="False"/> + <param name="contingency_table_out" value="False"/> + <param name="region_annotations_out" value="True"/> + <output name="region_annotations_file" file="test_search_gtf.region_annotations.tsv"/> + </test> + <test expect_num_outputs="3"> + <param name="action_type_selector" value="search_motifs"/> + <param name="reference_genome_selector" value="history" /> + <param name="history_genome" value="test.fa" /> + <param name="search_bed_file" value="test.bed" ftype="bed"/> + <param name="select_db_selector" value="custom_db"/> + <param name="custom_db_id" value="custom_yo" /> + <param name="custom_db_info_file" value="test_custom.info.txt" /> + <param name="custom_db_meme_xml_file" value="test_custom.seq_motifs.meme" /> + <param name="custom_db_cm_model_file" value="test_custom.str_motifs.cm" /> + <param name="custom_db_rbp_ids_list" value="PUM1 PUM2 SLBP" /> + <param name="search_report" value="False"/> + <param name="search_plot_motifs" value="False"/> + <param name="sites_bed_fasta_out" value="False"/> + <param name="motif_hits_bed_out" value="True"/> + <param name="contingency_table_out" value="False"/> + <param name="region_annotations_out" value="False"/> + <output name="motif_hits_bed_file" file="test_custom.motif_hits.rbpbench_search.bed"/> + </test> + + <!-- rbpbench batch tests --> + <test expect_num_outputs="3"> + <param name="action_type_selector" value="batch_search_motifs"/> + <param name="reference_genome_selector" value="history" /> + <param name="history_genome" value="test.fa" /> + <repeat name="dataset_inputs"> + <param name="dataset_bed_file" value="test.bed"/> + <param name="dataset_method_id" value="method-id1" /> + <param name="dataset_data_id" value="data-id1" /> + <param name="dataset_rbp_id" value="PUM1" /> + </repeat> + <repeat name="dataset_inputs"> + <param name="dataset_bed_file" value="test.bed"/> + <param name="dataset_method_id" value="method-id2" /> + <param name="dataset_data_id" value="data-id2" /> + <param name="dataset_rbp_id" value="PUM2" /> + </repeat> + <param name="batch_motif_hits_bed_out" value="True"/> + <output name="batch_rbp_hit_stats_file" file="rbp_hit_stats.test_batch.tsv" compare="sim_size"/> + <output name="batch_motif_hit_stats_file" file="motif_hit_stats.test_batch.tsv" compare="sim_size"/> + <output name="batch_motif_hits_bed_file" file="motif_hits.rbpbench_batch.test_batch.bed"/> + </test> + + <!-- rbpbench batch table tests --> + <test expect_num_outputs="3"> + <param name="action_type_selector" value="batch_table_search_motifs"/> + <param name="reference_genome_selector" value="history" /> + <param name="history_genome" value="test.fa" /> + <param name="batch_table_bed_collection"> + <collection type="list"> + <element name="test1.bed" value="test1.bed"/> + <element name="test2.bed" value="test2.bed"/> + </collection> + </param> + <param name="batch_table_file" value="test_table.txt"/> + <param name="batch_table_motif_hits_bed_out" value="True"/> + <output name="batch_table_rbp_hit_stats_file" file="rbp_hit_stats.table_test.tsv" compare="sim_size"/> + <output name="batch_table_motif_hit_stats_file" file="motif_hit_stats.table_test.tsv" compare="sim_size"/> + <output name="batch_table_motif_hits_bed_file" file="motif_hits.rbpbench_batch.table_test.bed"/> + </test> + + <!-- rbpbench compare tests --> + <test expect_num_outputs="4"> + <param name="action_type_selector" value="compare_search_results"/> + <param name="input_tables" value="rbp_hit_stats.compare_test.dewseq.tsv,rbp_hit_stats.compare_test.clipper_idr.tsv,motif_hit_stats.compare_test.dewseq.tsv,motif_hit_stats.compare_test.clipper_idr.tsv" ftype="tabular" /> + <param name="compared_motif_hits_table" value="True"/> + <param name="compared_motif_hits_bed" value="True"/> + <param name="comparisons_stats_out" value="True"/> + <output name="compare_report_html_file" file="report.rbpbench_compare.test.html" compare="sim_size"/> + <output name="compared_motif_hits_bed_file" file="motif_hits.rbpbench_compare.test.bed"/> + <output name="compared_motif_hits_table_file" file="motif_hits.rbpbench_compare.test.tsv"/> + <output name="compared_stats_table_file" file="comparison_stats.rbpbench_compare.test.tsv"/> + </test> + + </tests> + <help><![CDATA[ + + +**What is RBPBench?** + + +RBPBench_ is multi-function tool to evaluate CLIP-seq and other genomic region +data using a comprehensive collection of known RNA-binding protein (RBP) binding motifs. +RBPBench can be used for a variety of purposes, from RBP motif search (database or +user-supplied RBPs) in genomic regions, over motif co-occurrence analysis, to benchmarking +CLIP-seq peak caller methods as well as comparisons across cell types and +CLIP-seq protocols. + +----- + +**RBPBench program modes** + +RBPBench on Galaxy provides the following main functions (Choose on top via "Select RBPBench program mode"): + +1) Search RBP binding motifs in genomic regions +2) Search RBP binding motifs in genomic regions (multiple inputs) +3) Search RBP binding motifs in genomic regions (data collection input) +4) Plot nucleotide distribution at genomic positions +5) Compare different search results + + +**1. Search RBP binding motifs in genomic regions** + +In this mode we can select any number of RBPs of interest and search for RBP binding motifs in a given +set of genomic regions (*Genomic regions BED file*). A built-in high-quality database of human RBP binding motifs +(currently containing 259 RBPs and 605 motifs) is used by default. Moreover, users can add own motifs +(*Add user-supplied motifs*), as well as defining their own database (*Provide a custom RBP motif database*). +Both sequence (MEME/DREME XML format) and structure motifs (covariance models) are supported. +Comprehensive hit statistics (both on RBP and single motif level) are output as table files, +together with an informative HTML report containing various plots and tables +(see Output options to control what files are output). +Hit statistics output table formats are described in the RBPBench documentation_. +The HTML report includes statistics for each RBP on enrichment of motifs in higher scoring regions, +as well as a heatmap of RBP co-occurrences in genomic regions, and an upset plot +on present RBP combinations (*HTML report options* for finetuning). +If a GTF file is provided (*HTML report options -> GTF file*), genomic region annotations are also added to the regions and plots. +Furthermore, motif distances (RBP and motif level) can be plotted relative to a set reference RBP +(*HTML report options -> Set reference RBP ID*). +Motif search settings can be adapted, e.g. to apply up- and/or downstream extension to the genomic regions +before search. Motifs for selected RBPs can also be plotted in a separate HTML file (*Output options -> Plot RBP motifs*). +To compare motif search results (mode: *Compare different search results*), +data ID and method ID can be set accordingly (more details in sections 2, 3, and 5). + + +**2. Search RBP binding motifs in genomic regions (multiple inputs)** + +This mode allows the input of more than one set of genomic regions (via *+ Insert Dataset*). +For each input, an RBP for motif search needs to be selected. Optionally (for comparing +different search results), descriptive data + method IDs can be added (also see *Compare different search results*). +For example, if two different peak calling methods (method1, method2) have been used to +extract RBP binding regions from CLIP-seq data of RBP RBPX, and we want to compare these two methods later on, we would: +*+ Insert Dataset*: input the set (i.e., BED file) produced by method1, choose the CLIP-ped RBP (RBPX) + add method ID "method1". +*+ Insert Dataset*: input the set produced by method2, again choose RBPX, and add method ID "method2". +The data ID we keep constant, ideally choosing an ID that describes the data (e.g. cell type, CLIP-seq protocol, CLIP-ped RBP). +For example, if the cell type is K562, and the CLIP-seq protocol is eCLIP, we could specify +the data ID "K562_eCLIP" or "RBPX_K562_eCLIP". We can repeat this for other proteins by +adding the respective inputs. Finally, for comparing the two methods, +all we need to do is to use the two produced hit statistics output tables (RBP + motif hit statistics) +as inputs in *Compare different search results* mode. +The same also works the other way around, by keeping the method ID constant and changing the data ID. +For example, if we want to compare motif search results across different cell types, we can use +different data IDs while keeping the method ID. + + +**3. Search RBP binding motifs in genomic regions (data collection input)** + +This mode is identical to the previous one (multiple inputs), except that instead of +manually defining each input (dataset, RBP, method ID, data ID), we simply +input a table containing all the information, as well as a dataset collection containing the datasets. +It is thus the preferable mode if we want to compare a large number of datasets +(concept of comparing sets via method ID and data ID described in the previous section). +The input table (batch processing table file) has the following format +(tab-separated columns: RBP ID, method ID, data ID, BED genomic regions file name): + +========== ============ =============== ============================= +PUM1 method1 K562_eCLIP PUM1.K562_eclip.method1.bed +PUM1 method2 K562_eCLIP PUM1.K562_eclip.method2.bed +PUM1 method3 K562_eCLIP PUM1.K562_eclip.method3.bed +PUM2 method1 K562_eCLIP PUM2.K562_eclip.method1.bed +PUM2 method2 K562_eCLIP PUM2.K562_eclip.method2.bed +PUM2 method3 K562_eCLIP PUM2.K562_eclip.method3.bed +SLBP method1 K562_eCLIP SLBP.K562_eclip.method1.bed +SLBP method2 K562_eCLIP SLBP.K562_eclip.method2.bed +SLBP method3 K562_eCLIP SLBP.K562_eclip.method3.bed +========== ============ =============== ============================= + +NOTE that the table file name needs to correspond to the name of the dataset inside the +dataset collection. Conveniently, if you upload files to Galaxy and make a dataset collection out of them, +the dataset names will correspond to the uploaded file names. +In the above table, we would produce search results for three different +methods, on three different RBPs. +Likewise, if we would want to compare motif search results across cell types, +the table could look like this: + +========== ============ =============== ============================= +PUM1 method1 K562_eCLIP PUM1.K562_eclip.method1.bed +PUM1 method1 HepG2_eCLIP PUM1.HepG2_eclip.method1.bed +PUM2 method1 K562_eCLIP PUM2.K562_eclip.method1.bed +PUM2 method1 HepG2_eCLIP PUM2.HepG2_eclip.method1.bed +SLBP method1 K562_eCLIP SLBP.K562_eclip.method1.bed +SLBP method1 HepG2_eCLIP SLBP.HepG2_eclip.method1.bed +========== ============ =============== ============================= + +Here we would create motif search results across cell types K562 and HepG2, while keeping the peak calling +method ID constant ("method1"). +As with the two already discussed search modes, +the resulting hit statistics output table files (RBP + motif hit statistics) +can subsequently serve as inputs to RBPBench's comparison mode (*Compare different search results*, section 5). + + +**4. Plot nucleotide distribution at genomic positions** + +In this mode, a set of genomic regions is input and the nucleotide distribution is plotted +around a defined center positions (*Nucleotide distribution plot settings -> Define zero position for plotting*). By default, +the upstream end position of each region is used (other choices are center and downstream end). +This for example enables us to look at CLIP-seq crosslink positions and potential nucleotide biases at these sites. + + +**5. Compare different search results** + +This mode is used to compare different motif search results (produced by any of the three motif search modes +described above). Inputs are the RBP and motif hit statistics table files output by the motif search modes. +As exemplified in the previous sections, the set method IDs and +data IDs (together with the selected RBP IDs) define what gets compared in comparison mode. +Based on the IDs in the input tables, RBPBench looks for combinations of RBP ID+method ID+data ID, and produces +method-ID-centered (with fixed RBP ID + data ID) and / or data-ID-centered (with fixed RBP ID + method ID) comparisons. +At least two different IDs are needed for a comparison (e.g. two different method IDs or two different data IDs, with same RBP ID). +The comparison results are presented in an HTML report file, containing a hit statistics table and a +Venn diagram plot for each found combination. Moreover, the report results are output as table files, +and the combined motifs are output in BED format, for a data ID / method ID centered comparison e.g. inside a Genome Viewer. +Comparing numbers of unique and shared motif hits between methods also serves as a way of benchmarking different methods. +Since no ground truth (i.e., set of true / experimentally verified transcriptome-wide binding sites of an RBP) exists, one obvious way to +benchmark peak calling methods is to look at the enrichment of known RBP binding motifs in regions reported by the peak callers. +RBPBench makes such evaluations easy, especially by combining modes 2,3, and 5. + + +----- + +**Tool documentation & repository** + +For more information (including a webserver tutorial) please visit the RBPBench website: + +https://backofenlab.github.io/RBPBench + + +The RBPBench repository can be found at: + +https://github.com/michauhl/RBPBench + +The GitHub repository hosts the command line version of RBPBench and also includes a +comprehensive manual with installation instructions and various usage examples. + + +.. _RBPBench: https://github.com/michauhl/RBPBench +.. _documentation: https://github.com/michauhl/RBPBench#hit-statistics-table-files + + ]]></help> +</tool>