Repository 'rbpbench'
hg clone https://toolshed.g2.bx.psu.edu/repos/rnateam/rbpbench

Changeset 0:7dd2835ce566 (2023-12-03)
Next changeset 1:b022c6591515 (2023-12-03)
Commit message:
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/rbpbench commit 0e21bd630200c1f199db8ba5d83b81d4214fc59f
added:
batch_table_wrapper.py
macros.xml
rbpbench.xml
test-data/SLBP_USER.cm
test-data/comparison_stats.rbpbench_compare.test.tsv
test-data/contingency_table_results.rbpbench_search.tsv
test-data/fasta_indexes.loc
test-data/in_sites.filtered.rbpbench_search.bed
test-data/in_sites.filtered.rbpbench_search.fa
test-data/motif_hit_stats.compare_test.clipper_idr.tsv
test-data/motif_hit_stats.compare_test.dewseq.tsv
test-data/motif_hit_stats.rbpbench_search.slbp_user.tsv
test-data/motif_hit_stats.rbpbench_search.tsv
test-data/motif_hit_stats.table_test.tsv
test-data/motif_hit_stats.test_batch.tsv
test-data/motif_hits.rbpbench_batch.table_test.bed
test-data/motif_hits.rbpbench_batch.test_batch.bed
test-data/motif_hits.rbpbench_compare.test.bed
test-data/motif_hits.rbpbench_compare.test.tsv
test-data/motif_hits.rbpbench_search.bed
test-data/motif_hits.rbpbench_search.test_all.bed
test-data/rbp_hit_stats.compare_test.clipper_idr.tsv
test-data/rbp_hit_stats.compare_test.dewseq.tsv
test-data/rbp_hit_stats.rbpbench_search.slbp_user.tsv
test-data/rbp_hit_stats.rbpbench_search.tsv
test-data/rbp_hit_stats.table_test.tsv
test-data/rbp_hit_stats.test_batch.tsv
test-data/report.rbpbench_compare.test.html
test-data/report.rbpbench_search.html
test-data/test.bed
test-data/test.fa
test-data/test.slbp_user.bed
test-data/test.slbp_user.fa
test-data/test1.bed
test-data/test2.bed
test-data/test_custom.info.txt
test-data/test_custom.motif_hits.rbpbench_search.bed
test-data/test_custom.seq_motifs.meme
test-data/test_custom.str_motifs.cm
test-data/test_search.gtf
test-data/test_search_gtf.region_annotations.tsv
test-data/test_table.txt
tool-data/fasta_indexes.loc.sample
tool-data/rbp_ids.catrapid.omics.v2.1.human.6plus.txt
tool_data_table_conf.xml.sample
tool_data_table_conf.xml.test
b
diff -r 000000000000 -r 7dd2835ce566 batch_table_wrapper.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/batch_table_wrapper.py Sun Dec 03 12:51:54 2023 +0000
[
b'@@ -0,0 +1,242 @@\n+#!/usr/bin/env python3\n+\n+import argparse\n+import os\n+import re\n+import subprocess\n+\n+\n+###############################################################################\n+\n+def setup_argument_parser():\n+    """Setup argparse parser."""\n+    help_description = """\n+    Python wrapper for RBPBench Galaxy wrapper to work with collections of\n+    input BED files (i.e. to process them with rbpbench batch).\n+    """\n+    # Define argument parser.\n+    p = argparse.ArgumentParser(add_help=False,\n+                                prog="batch_table_wrapper.py",\n+                                description=help_description,\n+                                formatter_class=argparse.MetavarTypeHelpFormatter)\n+\n+    # Required arguments.\n+    p.add_argument("-h", "--help",\n+                   action="help",\n+                   help="Print help message")\n+    p.add_argument("--table",\n+                   dest="in_table",\n+                   type=str,\n+                   metavar=\'str\',\n+                   required=True,\n+                   help="Input table file with data ID, method ID, RBP ID and file name (Galaxy element identifier in dataset collection) for each to be processed dataset by rbpbench batch")\n+    p.add_argument("--paths",\n+                   dest="in_paths",\n+                   type=str,\n+                   metavar=\'str\',\n+                   nargs=\'+\',\n+                   required=True,\n+                   help="List of Galaxy BED file paths (--files path1 path2 .. )")\n+    p.add_argument("--ids",\n+                   dest="in_ids",\n+                   type=str,\n+                   metavar=\'str\',\n+                   nargs=\'+\',\n+                   required=True,\n+                   help="List of Galaxy element identifiers, equal to the BED dataset names in the dataset collection (--ids id1 id2 .. )")\n+    p.add_argument("--genome",\n+                   dest="in_genome",\n+                   type=str,\n+                   metavar=\'str\',\n+                   required=True,\n+                   help="Genomic sequences file (currently supported formats: FASTA)")\n+    p.add_argument("--out",\n+                   dest="out_folder",\n+                   type=str,\n+                   metavar=\'str\',\n+                   required=True,\n+                   help="Batch results output folder")\n+    # Optional batch arguments.\n+    p.add_argument("--ext",\n+                   dest="ext_up_down",\n+                   type=str,\n+                   metavar=\'str\',\n+                   default="0",\n+                   help="Up- and downstream extension of --in sites in nucleotides (nt). Set e.g. --ext 30 for 30 nt on both sides, or --ext 20,10 for different up- and downstream extension (default: 0)")\n+    p.add_argument("--motif-db",\n+                   dest="motif_db",\n+                   type=int,\n+                   default=1,\n+                   choices=[1, 2, 3],\n+                   help="Motif database to use. 1: human RBP motifs full (259 RBPs, 605 motifs, human_v0.1), 2: human RBP motifs full (low frequencies not rounded, human_v0.1_no_round), 3: human RBP motifs eCLIP (107 RBPs, 316 motifs, human_eclip_v0.1) (default: 1)")\n+    p.add_argument("--fimo-nt-freqs",\n+                   dest="fimo_nt_freqs",\n+                   type=str,\n+                   metavar=\'str\',\n+                   default=False,\n+                   help="Provide FIMO nucleotide frequencies (FIMO option: --bifile) file (default: use internal frequencies file optimized for human transcripts)")\n+    p.add_argument("--fimo-pval",\n+                   dest="fimo_pval",\n+                   type=float,\n+                   metavar=\'float\',\n+                   default=0.001,\n+                   help="FIMO p-value threshold (FIMO option: --thresh) (default: 0.001)")\n+    p.add_argument("--bed-score-col",\n+                   dest="bed_score_col",\n+                   type=int,\n+                   metavar=\'int\',\n+                   default=5,\n+                   help="--in'..b'rt os.path.exists(path), "--paths %s file not found" % (path)\n+        if path not in paths_dic:\n+            paths_dic[path] = 1\n+        else:\n+            assert False, "--paths %s given > 1. Please provide unique paths" % (path)\n+        paths_list.append(path)\n+\n+    # IDs\n+    ids_dic = {}\n+    ids_list = []\n+    for id in args.in_ids:\n+        if id not in ids_dic:\n+            ids_dic[id] = 1\n+        else:\n+            assert False, "--ids \\"%s\\" given > 1. Please provide unique element identifiers (dataset names) inside the dataset collection, in order to unambiguously assign element ID to file path" % (id)\n+        ids_list.append(id)\n+\n+    id2path_dic = {}\n+    for idx, id in enumerate(ids_list):\n+        path = paths_list[idx]\n+        id2path_dic[id] = path\n+\n+    """\n+    Read in table.\n+\n+    Column format:\n+    rbp_id method_id data_id dataset_name\n+\n+    """\n+\n+    comb_ids_dic = {}\n+    id_collect_dic = {}\n+    id_collect_dic["rbp_id"] = []\n+    id_collect_dic["method_id"] = []\n+    id_collect_dic["data_id"] = []\n+    id_collect_dic["set_name"] = []\n+    id_collect_dic["path"] = []  # Galaxy file path.\n+\n+    print("Read in --table ... ")\n+\n+    with open(args.in_table) as f:\n+        for line in f:\n+\n+            if re.search("^#", line):\n+                continue\n+\n+            cols = line.strip().split("\\t")\n+\n+            assert len(cols) == 4, "line in --table with # cols != 4 (%i) encountered:%s" % (len(cols), line)\n+\n+            rbp_id = cols[0]\n+            method_id = cols[1]\n+            data_id = cols[2]\n+            set_name = cols[3]\n+\n+            if rbp_id == "rbp_id":\n+                continue\n+\n+            comb_id = "%s,%s,%s,%s" % (rbp_id, method_id, data_id, set_name)\n+\n+            if comb_id not in comb_ids_dic:\n+                comb_ids_dic[comb_id] = 1\n+            else:\n+                assert False, "data combination (\\"%s\\") appears > 1 in --table file. Please provide unique combinations for rbpbench batch calculation" % (comb_id)\n+\n+            assert set_name in ids_dic, "given dataset name \\"%s\\" from --table not part of given --ids. Please provide dataset names present in dataset collection" % (set_name)\n+\n+            id_collect_dic["rbp_id"].append(rbp_id)\n+            id_collect_dic["method_id"].append(method_id)\n+            id_collect_dic["data_id"].append(data_id)\n+            id_collect_dic["set_name"].append(set_name)\n+            id_collect_dic["path"].append(id2path_dic[set_name])\n+\n+    f.closed\n+\n+    assert id_collect_dic["rbp_id"], "nothing read in from --table. Please provide non-empty table in correct format (columns: rbp_id method_id data_id dataset_name)"\n+\n+    """\n+    Construct RBPBench batch call.\n+\n+    """\n+\n+    batch_call = "rbpbench batch"\n+    batch_call += " --out %s" % (args.out_folder)\n+    batch_call += " --genome %s" % (args.in_genome)\n+    batch_call += " --ext %s" % (args.ext_up_down)\n+    batch_call += " --motif-db %i" % (args.motif_db)\n+    if args.fimo_nt_freqs:\n+        batch_call += " --fimo-nt-freqs %s" % (args.fimo_nt_freqs)\n+    batch_call += " --fimo-pval %s" % (str(args.fimo_pval))\n+    batch_call += " --bed-score-col %i" % (args.bed_score_col)\n+    if args.unstranded:\n+        batch_call += " --unstranded"\n+    if args.unstranded_ct:\n+        batch_call += " --unstranded-ct"\n+\n+    rbp_ids = (" ").join(id_collect_dic["rbp_id"])\n+    method_ids = (" ").join(id_collect_dic["method_id"])\n+    data_ids = (" ").join(id_collect_dic["data_id"])\n+    paths = (" ").join(id_collect_dic["path"])\n+\n+    batch_call += " --rbp-list %s" % (rbp_ids)\n+    batch_call += " --method-list %s" % (method_ids)\n+    batch_call += " --data-list %s" % (data_ids)\n+    batch_call += " --bed %s" % (paths)\n+\n+    """\n+    Execute RBPBench batch call.\n+    """\n+\n+    print("")\n+    print("EXECUTING CALL:\\n%s" % (batch_call))\n+    output = subprocess.getoutput(batch_call)\n+    print("")\n+    print("RUN OUTPUT:\\n%s" % (output))\n+    print("")\n+    print("DONE.")\n'
b
diff -r 000000000000 -r 7dd2835ce566 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Sun Dec 03 12:51:54 2023 +0000
[
@@ -0,0 +1,40 @@
+<macros>
+    <token name="@TOOL_VERSION@">0.7</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@profile@">22.05</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">rbpbench</requirement>
+            <requirement type="package" version="5.5.4">meme</requirement>
+        </requirements>
+    </xml>
+    <xml name="bio_tools">
+        <xrefs>
+            <xref type="bio.tools">rbpbench</xref>
+        </xrefs>
+    </xml>
+    <!--xml name="citations">
+        <citations>
+            <citation type="doi"></citation>
+        </citations>
+    </xml-->
+    <token name="@PREPARE_REF@"><![CDATA[
+        ln -s -f
+        #if str($action_type.reference_genome.reference_genome_selector) == "history"
+            '$action_type.reference_genome.history_genome'
+        #else
+            '$action_type.reference_genome.builtin_genome.fields.path'
+        #end if
+        reference.fa &&
+    ]]></token>
+    <token name="@COMMON_PARAMS@"><![CDATA[
+        --ext $action_type.search_options.search_ext
+        --fimo-pval $action_type.search_options.search_fimo_pval
+        --bed-score-col $action_type.search_options.search_bed_score_col
+        $action_type.search_options.search_unstranded
+        $action_type.search_options.search_unstranded_ct
+        #if $action_type.search_options.fimo_nt_freqs_file:
+            --fimo-nt-freqs '$action_type.search_options.fimo_nt_freqs_file'
+        #end if
+    ]]></token>
+</macros>
b
diff -r 000000000000 -r 7dd2835ce566 rbpbench.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rbpbench.xml Sun Dec 03 12:51:54 2023 +0000
[
b'@@ -0,0 +1,1003 @@\n+<tool id="rbpbench" name="RBPBench" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@profile@">\n+\n+    <description>- Evaluate CLIP-seq and other genomic region data using a comprehensive collection of RBP binding motifs</description>\n+    <macros>\n+        <import>macros.xml</import>\n+    </macros>\n+    <expand macro="bio_tools"/>\n+    <expand macro="requirements"/>\n+\n+    <command detect_errors="exit_code"><![CDATA[\n+        #if $action_type.action_type_selector == \'search_motifs\':\n+            @PREPARE_REF@\n+            rbpbench search\n+                --in \'$action_type.search_bed_file\'\n+                --out ./\n+                --genome reference.fa\n+                --method-id \'$action_type.search_method_id\'\n+                --data-id \'$action_type.search_data_id\'\n+                --rbps\n+                #if str($action_type.select_db.select_db_selector) == "default_db":\n+                    #if str($action_type.select_db.select_rbps.select_rbps_selector) == "list_db_rbps":\n+                        #if str($action_type.select_db.select_rbps.database) != "None":\n+                            #for $rbp_id in str($action_type.select_db.select_rbps.database).split(\',\'):\n+                                $rbp_id\n+                            #end for\n+                        #end if\n+                    #else:\n+                        ALL\n+                    #end if\n+                #else:\n+                    $action_type.select_db.custom_db_rbp_ids_list\n+                #end if\n+\n+                #if str($action_type.user_rbp.user_rbp_selector) == "sequence":\n+                    USER\n+                    --user-rbp-id $action_type.user_rbp.seq_rbp_id\n+                    --user-meme-xml \'$action_type.user_rbp.meme_motifs_xml\'\n+                #elif str($action_type.user_rbp.user_rbp_selector) == "structure":\n+                    USER\n+                    --user-rbp-id $action_type.user_rbp.str_rbp_id\n+                    --user-cm \'$action_type.user_rbp.cm_model_file\'\n+                #end if\n+\n+                #if str($action_type.select_db.select_db_selector) == "custom_db":\n+                    --custom-db-id $action_type.select_db.custom_db_id\n+                    --custom-db-info $action_type.select_db.custom_db_info_file\n+                    #if $action_type.select_db.custom_db_meme_xml_file:\n+                        --custom-db-meme-xml \'$action_type.select_db.custom_db_meme_xml_file\'\n+                    #end if\n+                    #if $action_type.select_db.custom_db_cm_model_file:\n+                        --custom-db-cm \'$action_type.select_db.custom_db_cm_model_file\'\n+                    #end if\n+                #end if\n+                @COMMON_PARAMS@\n+                #if $action_type.report_plotting_options.set_rbp_id:\n+                    --set-rbp-id \'$action_type.report_plotting_options.set_rbp_id\'\n+                #end if\n+                --motif-distance-plot-range $action_type.report_plotting_options.motif_distance_plot_range\n+                --motif-min-pair-count $action_type.report_plotting_options.motif_min_pair_count\n+                --rbp-min-pair-count $action_type.report_plotting_options.rbp_min_pair_count\n+                #if $action_type.report_plotting_options.gtf_file:\n+                    --gtf \'$action_type.report_plotting_options.gtf_file\'\n+                #end if\n+                #if $action_type.report_plotting_options.tr_list_file:\n+                    --tr-list \'$action_type.report_plotting_options.tr_list_file\'\n+                #end if\n+                #if $action_type.report_plotting_options.list_tr_biotypes:\n+                    --tr-types \'$action_type.report_plotting_options.list_tr_biotypes\'\n+                #end if\n+                --upset-plot-min-degree $action_type.report_plotting_options.upset_plot_min_degree\n+                #if $action_type.report_plotting_options.upset_plot_max_degree:\n+                    --upset-plot-max-degree \'$action_type'..b'iles to Galaxy and make a dataset collection out of them, \n+the dataset names will correspond to the uploaded file names.\n+In the above table, we would produce search results for three different \n+methods, on three different RBPs. \n+Likewise, if we would want to compare motif search results across cell types, \n+the table could look like this:\n+\n+========== ============ =============== =============================\n+PUM1       method1      K562_eCLIP      PUM1.K562_eclip.method1.bed\n+PUM1       method1      HepG2_eCLIP     PUM1.HepG2_eclip.method1.bed\n+PUM2       method1      K562_eCLIP      PUM2.K562_eclip.method1.bed\n+PUM2       method1      HepG2_eCLIP     PUM2.HepG2_eclip.method1.bed\n+SLBP       method1      K562_eCLIP      SLBP.K562_eclip.method1.bed\n+SLBP       method1      HepG2_eCLIP     SLBP.HepG2_eclip.method1.bed\n+========== ============ =============== =============================\n+\n+Here we would create motif search results across cell types K562 and HepG2, while keeping the peak calling \n+method ID constant ("method1").\n+As with the two already discussed search modes, \n+the resulting hit statistics output table files (RBP + motif hit statistics) \n+can subsequently serve as inputs to RBPBench\'s comparison mode (*Compare different search results*, section 5).\n+\n+\n+**4. Plot nucleotide distribution at genomic positions**\n+\n+In this mode, a set of genomic regions is input and the nucleotide distribution is plotted \n+around a defined center positions (*Nucleotide distribution plot settings -> Define zero position for plotting*). By default, \n+the upstream end position of each region is used (other choices are center and downstream end).\n+This for example enables us to look at CLIP-seq crosslink positions and potential nucleotide biases at these sites.\n+\n+\n+**5. Compare different search results**\n+\n+This mode is used to compare different motif search results (produced by any of the three motif search modes \n+described above). Inputs are the RBP and motif hit statistics table files output by the motif search modes.\n+As exemplified in the previous sections, the set method IDs and \n+data IDs (together with the selected RBP IDs) define what gets compared in comparison mode.\n+Based on the IDs in the input tables, RBPBench looks for combinations of RBP ID+method ID+data ID, and produces \n+method-ID-centered (with fixed RBP ID + data ID) and / or data-ID-centered (with fixed RBP ID + method ID) comparisons.\n+At least two different IDs are needed for a comparison (e.g. two different method IDs or two different data IDs, with same RBP ID).\n+The comparison results are presented in an HTML report file, containing a hit statistics table and a \n+Venn diagram plot for each found combination. Moreover, the report results are output as table files,\n+and the combined motifs are output in BED format, for a data ID / method ID centered comparison e.g. inside a Genome Viewer.\n+Comparing numbers of unique and shared motif hits between methods also serves as a way of benchmarking different methods.\n+Since no ground truth (i.e., set of true / experimentally verified transcriptome-wide binding sites of an RBP) exists, one obvious way to \n+benchmark peak calling methods is to look at the enrichment of known RBP binding motifs in regions reported by the peak callers.\n+RBPBench makes such evaluations easy, especially by combining modes 2,3, and 5.\n+\n+\n+-----\n+\n+**Tool documentation & repository**\n+\n+For more information (including a webserver tutorial) please visit the RBPBench website:\n+\n+https://backofenlab.github.io/RBPBench\n+\n+\n+The RBPBench repository can be found at:\n+\n+https://github.com/michauhl/RBPBench\n+\n+The GitHub repository hosts the command line version of RBPBench and also includes a \n+comprehensive manual with installation instructions and various usage examples. \n+\n+\n+.. _RBPBench: https://github.com/michauhl/RBPBench\n+.. _documentation: https://github.com/michauhl/RBPBench#hit-statistics-table-files\n+\n+    ]]></help>\n+</tool>\n'
b
diff -r 000000000000 -r 7dd2835ce566 test-data/SLBP_USER.cm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/SLBP_USER.cm Sun Dec 03 12:51:54 2023 +0000
[
b"@@ -0,0 +1,385 @@\n+INFERNAL1/a [1.1.3 | Nov 2019]\n+NAME     Histone3\n+ACC      RF00032\n+DESC     Histone 3' UTR stem-loop\n+STATES   142\n+NODES    42\n+CLEN     46\n+W        57\n+ALPH     RNA\n+RF       no\n+CONS     yes\n+MAP      yes\n+DATE     Fri Apr  4 13:03:46 2014\n+COM      [1] /nfs/production/xfam/rfam/software/bin/cmbuild -F CM SEED\n+COM      [2] /nfs/production/xfam/rfam/software/bin/cmcalibrate --mpi CM\n+PBEGIN   0.05\n+PEND     0.05\n+WBETA    1e-07\n+QDBBETA1 1e-07\n+QDBBETA2 1e-15\n+N2OMEGA  1.52588e-05\n+N3OMEGA  1.52588e-05\n+ELSELF   -0.08926734\n+NSEQ     46\n+EFFN     46.000000\n+CKSUM    471917655\n+NULL     0.000  0.000  0.000  0.000 \n+GA       25.00\n+TC       25.00\n+NC       24.90\n+EFP7GF   -8.9961 0.74543\n+ECMLC    0.73248    -4.63583     3.49505     1600000      463131  0.002591\n+ECMGC    0.50982    -9.05666     1.92502     1600000      108029  0.003703\n+ECMLI    0.86412    -1.32523     4.80439     1600000      239617  0.005008\n+ECMGI    0.55516    -6.80684     2.91667     1600000       88393  0.004525\n+CM\n+                                             [ ROOT    0 ]      -      - - - - -\n+     S     0    -1 0     1     4     1     1    57    76 -10.705 -10.912  -0.004  -9.326                 \n+    IL     1     1 2     1     4     1    19    64    83  -1.686  -2.369  -1.117  -4.855                  0.000  0.000  0.000  0.000 \n+    IR     2     2 3     2     3     1    19    63    81  -1.442  -0.798  -4.142                          0.000  0.000  0.000  0.000 \n+                                             [ MATL    1 ]      1      - c - - -\n+    ML     3     2 3     5     3     1    19    57    76 -11.622  -0.002 -10.276                          0.274  0.676 -1.683 -0.183 \n+     D     4     2 3     5     3     0    15    58    76  -6.174  -1.687  -0.566                         \n+    IL     5     5 3     5     3     1    18    62    80  -1.442  -0.798  -4.142                          0.000  0.000  0.000  0.000 \n+                                             [ MATL    2 ]      2      - c - - -\n+    ML     6     5 3     8     3     1    18    56    75 -11.622  -0.002 -10.276                          0.562  0.685 -1.974 -0.595 \n+     D     7     5 3     8     3     0    15    57    75  -6.174  -1.687  -0.566                         \n+    IL     8     8 3     8     3     1    17    61    79  -1.442  -0.798  -4.142                          0.000  0.000  0.000  0.000 \n+                                             [ MATL    3 ]      3      - A - - -\n+    ML     9     8 3    11     3     1    17    55    74 -11.622  -0.002 -10.276                          1.588 -1.105 -4.684 -1.031 \n+     D    10     8 3    11     3     0    14    56    74  -6.174  -1.687  -0.566                         \n+    IL    11    11 3    11     3     1    17    60    78  -1.442  -0.798  -4.142                          0.000  0.000  0.000  0.000 \n+                                             [ MATL    4 ]      4      - A - - -\n+    ML    12    11 3    14     3     1    16    54    73 -11.622  -0.002 -10.276                          1.035 -0.025 -1.895 -0.517 \n+     D    13    11 3    14     3     0    14    55    73  -6.174  -1.687  -0.566                         \n+    IL    14    14 3    14     3     1    16    59    77  -1.442  -0.798  -4.142                          0.000  0.000  0.000  0.000 \n+                                             [ MATL    5 ]      5      - a - - -\n+    ML    15    14 3    17     3     1    15    53    72 -11.923  -0.687  -1.402                          0.970  0.662 -4.530 -1.266 \n+     D    16    14 3    17     3     0    14    52    71  -5.620  -0.734  -1.403                         \n+    IL    17    17 3    17     3     1    15    55    73  -1.925  -0.554  -4.164                          0.000  0.000  0.000  0.000 \n+                                             [ MATR    6 ]      -     47 - U - -\n+    MR    18    17 3    20     3     1    14    52    71 -11.239  -0.003  -9.556                         -1.280 -3.255 -0.399  1.446 \n"..b'0.01098  4.51736  1.09861  0.40547\n+     25   0.90589  0.99871  2.78941  1.79574     26 a - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.00416  6.17794  6.17794  1.46634  0.26236  1.09861  0.40547\n+     26   0.37936  2.45545  2.76975  1.78857     27 A - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.03553  6.17794  3.41622  1.46634  0.26236  1.09861  0.40547\n+     27   2.00288  0.98595  2.07379  1.00440     28 c - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.00429  6.14670  6.14670  1.46634  0.26236  0.11900  2.18757\n+     28   0.58277  2.57844  2.26067  1.34141     29 A - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.00416  6.17794  6.17794  1.46634  0.26236  1.09861  0.40547\n+     29   2.04103  1.64155  1.24300  0.94698     30 u - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.04224  6.17794  3.23682  1.46634  0.26236  1.09861  0.40547\n+     30   0.90025  2.40010  2.48509  0.86868     31 u - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.00432  6.14002  6.14002  1.46634  0.26236  0.10040  2.34838\n+     31   1.14658  1.52988  1.97258  1.11897     32 u - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.00416  6.17794  6.17794  1.46634  0.26236  1.09861  0.40547\n+     32   0.99477  1.56394  2.34075  1.12508     33 a - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.00416  6.17794  6.17794  1.46634  0.26236  1.09861  0.40547\n+     33   1.30260  0.91111  1.74486  1.88763     34 c - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.00416  6.17794  6.17794  1.46634  0.26236  1.09861  0.40547\n+     34   0.81644  1.95418  2.55798  1.08218     35 a - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.00416  6.17794  6.17794  1.46634  0.26236  1.09861  0.40547\n+     35   0.88707  2.68494  1.54697  1.18084     36 a - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.02155  6.17794  3.95035  1.46634  0.26236  1.09861  0.40547\n+     36   0.88692  1.97884  1.71659  1.30871     37 a - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.00423  6.16062  6.16062  1.46634  0.26236  0.19522  1.72966\n+     37   0.78649  1.68131  1.71782  1.72051     38 a - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.00416  6.17794  6.17794  1.46634  0.26236  1.09861  0.40547\n+     38   0.59588  2.08432  1.60986  2.08251     39 A - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.00416  6.17794  6.17794  1.46634  0.26236  1.09861  0.40547\n+     39   0.76886  1.58017  1.51684  2.19720     40 a - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.02859  6.17794  3.64554  1.46634  0.26236  1.09861  0.40547\n+     40   0.91147  2.90974  1.18115  1.44117     41 a - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.00426  6.15362  6.15362  1.46634  0.26236  0.14750  1.98676\n+     41   1.19023  1.82584  1.84655  0.97555     42 u - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.00416  6.17794  6.17794  1.46634  0.26236  1.09861  0.40547\n+     42   1.35396  2.21010  1.23260  1.07716     43 u - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.00416  6.17794  6.17794  1.46634  0.26236  1.09861  0.40547\n+     43   1.25472  1.71330  1.50591  1.16232     44 u - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.00416  6.17794  6.17794  1.46634  0.26236  1.09861  0.40547\n+     44   1.37043  1.66018  2.98397  0.68259     45 U - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.00416  6.17794  6.17794  1.46634  0.26236  1.09861  0.40547\n+     45   1.15612  1.70216  1.15511  1.67140     46 g - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.47997  6.17794  0.96989  1.46634  0.26236  1.09861  0.40547\n+     46   2.25321  3.55314  1.66807  0.38906     47 U - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.00335  5.70132        *  1.46634  0.26236  0.00000        *\n+//\n'
b
diff -r 000000000000 -r 7dd2835ce566 test-data/comparison_stats.rbpbench_compare.test.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/comparison_stats.rbpbench_compare.test.tsv Sun Dec 03 12:51:54 2023 +0000
b
@@ -0,0 +1,5 @@
+combined_id method_id data_id motif_db rbp_id c_regions c_uniq_motif_hits perc_reg_with_hits perc_uniq_motif_nts_eff_reg uniq_motif_hits_cal_1000nt
+k562_eclip,human_v0.1,PUM1 dewseq_w100_s5 k562_eclip human_v0.1 PUM1 23 24 43.47826086956522 3.2 4.859283255719781
+k562_eclip,human_v0.1,PUM1 clipper_idr k562_eclip human_v0.1 PUM1 32 8 18.75 2.691013935607881 3.8443056222969725
+k562_eclip,human_v0.1,PUM2 dewseq_w100_s5 k562_eclip human_v0.1 PUM2 70 448 92.85714285714286 14.084164436876673 31.46288362946836
+k562_eclip,human_v0.1,PUM2 clipper_idr k562_eclip human_v0.1 PUM2 77 219 76.62337662337663 17.64076214943267 46.88503532434168
b
diff -r 000000000000 -r 7dd2835ce566 test-data/contingency_table_results.rbpbench_search.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/contingency_table_results.rbpbench_search.tsv Sun Dec 03 12:51:54 2023 +0000
b
@@ -0,0 +1,3 @@
+#rbp_id PUM1 PUM2
+PUM1 1.0 1.0
+PUM2 1.0 1.0
b
diff -r 000000000000 -r 7dd2835ce566 test-data/fasta_indexes.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fasta_indexes.loc Sun Dec 03 12:51:54 2023 +0000
b
@@ -0,0 +1,30 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Samtools indexed sequences data files.  You will need
+#to create these data files and then create a fasta_indexes.loc file
+#similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The fasta_indexes.loc
+#file has this format (white space characters are TAB characters):
+#
+# <unique_build_id>     <dbkey> <display_name>  <file_base_path>
+#
+#So, for example, if you had hg19 Canonical indexed stored in
+#
+# /depot/data2/galaxy/hg19/sam/,
+#
+#then the fasta_indexes.loc entry would look like this:
+#
+#hg19canon      hg19    Human (Homo sapiens): hg19 Canonical    /depot/data2/galaxy/hg19/sam/hg19canon.fa
+#
+#and your /depot/data2/galaxy/hg19/sam/ directory
+#would contain hg19canon.fa and hg19canon.fa.fai files.
+#
+#Your fasta_indexes.loc file should include an entry per line for
+#each index set you have stored.  The file in the path does actually
+#exist, but it should never be directly used. Instead, the name serves
+#as a prefix for the index file.  For example:
+#
+#hg18canon      hg18    Human (Homo sapiens): hg18 Canonical    /depot/data2/galaxy/hg18/sam/hg18canon.fa
+#hg18full       hg18    Human (Homo sapiens): hg18 Full /depot/data2/galaxy/hg18/sam/hg18full.fa
+#hg19canon      hg19    Human (Homo sapiens): hg19 Canonical    /depot/data2/galaxy/hg19/sam/hg19canon.fa
+#hg19full       hg19    Human (Homo sapiens): hg19 Full /depot/data2/galaxy/hg19/sam/hg19full.fa
+testid testdbkey testdisplay ${__HERE__}/test.fa
\ No newline at end of file
b
diff -r 000000000000 -r 7dd2835ce566 test-data/in_sites.filtered.rbpbench_search.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/in_sites.filtered.rbpbench_search.bed Sun Dec 03 12:51:54 2023 +0000
b
@@ -0,0 +1,1 @@
+chr1 10 80 chr1:10-80(+) 0.0 +
b
diff -r 000000000000 -r 7dd2835ce566 test-data/in_sites.filtered.rbpbench_search.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/in_sites.filtered.rbpbench_search.fa Sun Dec 03 12:51:54 2023 +0000
b
@@ -0,0 +1,2 @@
+>chr1:10-80(+)
+ACTGGTTGTGATTTGTAGATACTGGCTCTTCTCAGATGAAGTTCCAGGATTATTCATTGAAAAAGGCTGG
b
diff -r 000000000000 -r 7dd2835ce566 test-data/motif_hit_stats.compare_test.clipper_idr.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/motif_hit_stats.compare_test.clipper_idr.tsv Sun Dec 03 12:51:54 2023 +0000
b
b'@@ -0,0 +1,228 @@\n+data_id\tmethod_id\trun_id\tmotif_db\tregion_id\trbp_id\tmotif_id\tchr_id\tgen_s\tgen_e\tstrand\tregion_s\tregion_e\tregion_len\tuniq_count\tfimo_score\tfimo_pval\tcms_score\tcms_eval\tinternal_id\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:43687539-43687632(+)\tPUM1\tPUM1_1\tchr21\t43687623\t43687629\t+\t84\t90\t93\t1\t9.0101\t0.000415\t-\t-\tpZFAGsHH\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:8397346-8397397(+)\tPUM1\tPUM1_1\tchr21\t8397362\t8397368\t+\t16\t22\t51\t1\t12.1616\t5.85e-05\t-\t-\tpZFAGsHH\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:8215025-8215091(+)\tPUM1\tPUM1_1\tchr21\t8215042\t8215048\t+\t17\t23\t66\t1\t6.9798\t0.000768\t-\t-\tpZFAGsHH\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:8215025-8215091(+)\tPUM1\tPUM1_1\tchr21\t8215045\t8215051\t+\t20\t26\t66\t1\t11.1515\t0.000238\t-\t-\tpZFAGsHH\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:8215025-8215091(+)\tPUM1\tPUM1_1\tchr21\t8215075\t8215081\t+\t50\t56\t66\t1\t11.1515\t0.000238\t-\t-\tpZFAGsHH\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:8214310-8214363(+)\tPUM1\tPUM1_1\tchr21\t8214324\t8214330\t+\t14\t20\t53\t1\t12.1616\t5.85e-05\t-\t-\tpZFAGsHH\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:36375568-36375657(+)\tPUM1\tPUM1_2\tchr21\t36375632\t36375640\t+\t64\t72\t89\t1\t-2.35354\t0.000906\t-\t-\tpZFAGsHH\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:32764345-32764372(-)\tPUM1\tPUM1_2\tchr21\t32764358\t32764366\t-\t7\t15\t27\t1\t6.31313\t0.000191\t-\t-\tpZFAGsHH\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:14371121-14371193(-)\tPUM2\tPUM2_1\tchr21\t14371170\t14371177\t-\t17\t24\t72\t1\t4.78788\t0.000283\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:14371121-14371193(-)\tPUM2\tPUM2_1\tchr21\t14371168\t14371175\t-\t19\t26\t72\t1\t4.69697\t0.000481\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:14371121-14371193(-)\tPUM2\tPUM2_1\tchr21\t14371143\t14371150\t-\t44\t51\t72\t1\t14.2121\t3.34e-05\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:32734876-32734935(-)\tPUM2\tPUM2_1\tchr21\t32734896\t32734903\t-\t33\t40\t59\t1\t11.8788\t8.1e-05\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:32734876-32734935(-)\tPUM2\tPUM2_1\tchr21\t32734887\t32734894\t-\t42\t49\t59\t1\t14.3636\t1.62e-05\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:32733994-32734078(-)\tPUM2\tPUM2_1\tchr21\t32734055\t32734062\t-\t17\t24\t84\t1\t4.78788\t0.000283\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:32733994-32734078(-)\tPUM2\tPUM2_1\tchr21\t32734041\t32734048\t-\t31\t38\t84\t1\t14.2121\t3.34e-05\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:14961306-14961341(-)\tPUM2\tPUM2_1\tchr21\t14961325\t14961332\t-\t10\t17\t35\t1\t12.9798\t4.95e-05\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:28945887-28945953(-)\tPUM2\tPUM2_1\tchr21\t28945911\t28945918\t-\t36\t43\t66\t1\t14.2121\t3.34e-05\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:33576734-33576815(+)\tPUM2\tPUM2_1\tchr21\t33576754\t33576761\t+\t20\t27\t81\t1\t4.78788\t0.000283\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:33576651-33576714(+)\tPUM2\tPUM2_1\tchr21\t33576671\t33576678\t+\t20\t27\t63\t1\t4.54545\t0.000789\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:33576651-33576714(+)\tPUM2\tPUM2_1\tchr21\t33576685\t33576692\t+\t34\t41\t63\t1\t4.54545\t0.000789\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:33576604-33576651(+)\tPUM2\tPUM2_1\tchr21\t33576643\t33576650\t+\t39\t46\t47\t1\t14.2121\t3.34e-05\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:36376271-36376367(+)\tPUM2\tPUM2_1\tchr21\t36376310\t36376317\t+\t39\t46\t96\t1\t14.3636\t1.62e-05\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:36376271-36376367(+)\tPUM2\tPUM2_1\tchr21\t36376312\t36376319\t+\t41\t48\t96\t1\t4.71717\t0.000332\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:37223448-37223498(-)\tPUM2\tPUM2_1\tchr21\t37223459\t37223466\t-\t33\t40\t50\t1\t12.0303\t6.48e-05\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:37202978-37203044(+)\tPUM2\tPUM2_1\tchr21\t37203027\t37203034\t+\t49\t56\t66\t1\t12.9798\t4.95e-05\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:46569364-46569436(+)\tPUM2\tPUM2_1\tchr21\t46569382\t46569389\t+\t18\t25\t72\t1\t4.63636'..b'0lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:33437436-33437487(+)\tPUM2\tPUM2_5\tchr21\t33437438\t33437447\t+\t2\t11\t51\t1\t11.9394\t4.69e-05\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:33437328-33437436(+)\tPUM2\tPUM2_5\tchr21\t33437398\t33437407\t+\t70\t79\t108\t1\t8.80808\t0.000397\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:33437328-33437436(+)\tPUM2\tPUM2_5\tchr21\t33437426\t33437435\t+\t98\t107\t108\t1\t8.26263\t0.000513\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:14961795-14961851(-)\tPUM2\tPUM2_5\tchr21\t14961842\t14961851\t-\t1\t10\t56\t1\t11.1212\t8.74e-05\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:14961851-14961895(-)\tPUM2\tPUM2_5\tchr21\t14961861\t14961870\t-\t26\t35\t44\t1\t10.5455\t0.000137\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:45512384-45512556(+)\tPUM2\tPUM2_5\tchr21\t45512511\t45512520\t+\t127\t136\t172\t1\t8.51515\t0.000442\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:33558801-33558874(+)\tPUM2\tPUM2_5\tchr21\t33558832\t33558841\t+\t31\t40\t73\t1\t7.71717\t0.000655\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:44861160-44861215(-)\tPUM2\tPUM2_5\tchr21\t44861192\t44861201\t-\t15\t24\t55\t1\t13.0505\t1.29e-05\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:14485280-14485326(-)\tPUM2\tPUM2_5\tchr21\t14485285\t14485294\t-\t33\t42\t46\t1\t11.1212\t8.74e-05\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:34105575-34105653(+)\tPUM2\tPUM2_5\tchr21\t34105600\t34105609\t+\t25\t34\t78\t1\t10.2525\t0.000164\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:34105575-34105653(+)\tPUM2\tPUM2_5\tchr21\t34105609\t34105618\t+\t34\t43\t78\t1\t10.7475\t0.000121\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:32734633-32734683(-)\tPUM2\tPUM2_5\tchr21\t32734647\t32734656\t-\t28\t37\t50\t1\t10.404\t0.000149\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:32734633-32734683(-)\tPUM2\tPUM2_5\tchr21\t32734638\t32734647\t-\t37\t46\t50\t1\t8.50505\t0.000445\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:44805626-44805677(-)\tPUM2\tPUM2_5\tchr21\t44805668\t44805677\t-\t1\t10\t51\t1\t13.6061\t6.92e-06\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:37513867-37513939(+)\tPUM2\tPUM2_5\tchr21\t37513905\t37513914\t+\t38\t47\t72\t1\t7.0404\t0.000882\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:14961937-14961964(-)\tPUM2\tPUM2_5\tchr21\t14961945\t14961954\t-\t11\t20\t27\t1\t10.8384\t0.000118\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:32734837-32734876(-)\tPUM2\tPUM2_5\tchr21\t32734862\t32734871\t-\t6\t15\t39\t1\t7.77778\t0.00064\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:34887678-34887756(-)\tPUM2\tPUM2_5\tchr21\t34887737\t34887746\t-\t11\t20\t78\t1\t7.24242\t0.000807\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:34887678-34887756(-)\tPUM2\tPUM2_5\tchr21\t34887732\t34887741\t-\t16\t25\t78\t1\t12.7172\t2.11e-05\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:34887678-34887756(-)\tPUM2\tPUM2_5\tchr21\t34887705\t34887714\t-\t43\t52\t78\t1\t13.9192\t3.92e-06\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:34887678-34887756(-)\tPUM2\tPUM2_5\tchr21\t34887703\t34887712\t-\t45\t54\t78\t1\t8.71717\t0.000411\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:34887678-34887756(-)\tPUM2\tPUM2_5\tchr21\t34887697\t34887706\t-\t51\t60\t78\t1\t9.17172\t0.00032\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:34887678-34887756(-)\tPUM2\tPUM2_5\tchr21\t34887695\t34887704\t-\t53\t62\t78\t1\t11.0303\t9.55e-05\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:36375329-36375363(+)\tPUM2\tPUM2_5\tchr21\t36375339\t36375348\t+\t10\t19\t34\t1\t7.25253\t0.000803\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:33578030-33578124(-)\tPUM2\tPUM2_5\tchr21\t33578071\t33578080\t-\t45\t54\t94\t1\t12.9394\t1.5e-05\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:31671330-31671381(-)\tPUM2\tPUM2_5\tchr21\t31671335\t31671344\t-\t38\t47\t51\t1\t8.55556\t0.000433\t-\t-\tAJ40lTA6\n+k562_eclip\tclipper_idr\trun_id\thuman_v0.1\tchr21:43983033-43983127(+)\tPUM2\tPUM2_5\tchr21\t43983057\t43983066\t+\t24\t33\t94\t1\t12.1717\t3.39e-05\t-\t-\tAJ40lTA6\n'
b
diff -r 000000000000 -r 7dd2835ce566 test-data/motif_hit_stats.compare_test.dewseq.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/motif_hit_stats.compare_test.dewseq.tsv Sun Dec 03 12:51:54 2023 +0000
b
b'@@ -0,0 +1,584 @@\n+data_id\tmethod_id\trun_id\tmotif_db\tregion_id\trbp_id\tmotif_id\tchr_id\tgen_s\tgen_e\tstrand\tregion_s\tregion_e\tregion_len\tuniq_count\tfimo_score\tfimo_pval\tcms_score\tcms_eval\tinternal_id\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:8214212-8214407(+)\tPUM1\tPUM1_1\tchr21\t8214298\t8214304\t+\t86\t92\t195\t1\t8.83838\t0.000473\t-\t-\tMVuT5ext\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:8214212-8214407(+)\tPUM1\tPUM1_1\tchr21\t8214324\t8214330\t+\t112\t118\t195\t1\t12.1616\t5.85e-05\t-\t-\tMVuT5ext\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:8218652-8218957(+)\tPUM1\tPUM1_1\tchr21\t8218698\t8218704\t+\t46\t52\t305\t1\t11.3232\t0.00018\t-\t-\tMVuT5ext\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:8213907-8214127(+)\tPUM1\tPUM1_1\tchr21\t8214075\t8214081\t+\t168\t174\t220\t1\t6.9798\t0.000768\t-\t-\tMVuT5ext\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:8217782-8218632(+)\tPUM1\tPUM1_1\tchr21\t8217841\t8217847\t+\t59\t65\t850\t1\t6.9798\t0.000768\t-\t-\tMVuT5ext\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:8217782-8218632(+)\tPUM1\tPUM1_1\tchr21\t8217884\t8217890\t+\t102\t108\t850\t1\t11.1515\t0.000238\t-\t-\tMVuT5ext\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:8217782-8218632(+)\tPUM1\tPUM1_1\tchr21\t8217961\t8217967\t+\t179\t185\t850\t1\t11.3232\t0.00018\t-\t-\tMVuT5ext\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:8217782-8218632(+)\tPUM1\tPUM1_1\tchr21\t8217962\t8217968\t+\t180\t186\t850\t1\t6.9798\t0.000768\t-\t-\tMVuT5ext\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:8401688-8401913(+)\tPUM1\tPUM1_1\tchr21\t8401737\t8401743\t+\t49\t55\t225\t1\t11.3232\t0.00018\t-\t-\tMVuT5ext\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:8397503-8397723(+)\tPUM1\tPUM1_1\tchr21\t8397514\t8397520\t+\t11\t17\t220\t1\t11.1515\t0.000238\t-\t-\tMVuT5ext\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:8397503-8397723(+)\tPUM1\tPUM1_1\tchr21\t8397534\t8397540\t+\t31\t37\t220\t1\t9.0101\t0.000415\t-\t-\tMVuT5ext\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:8397503-8397723(+)\tPUM1\tPUM1_1\tchr21\t8397536\t8397542\t+\t33\t39\t220\t1\t12.1616\t5.85e-05\t-\t-\tMVuT5ext\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:8397503-8397723(+)\tPUM1\tPUM1_1\tchr21\t8397540\t8397546\t+\t37\t43\t220\t1\t11.1515\t0.000238\t-\t-\tMVuT5ext\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:8397503-8397723(+)\tPUM1\tPUM1_1\tchr21\t8397544\t8397550\t+\t41\t47\t220\t1\t11.1515\t0.000238\t-\t-\tMVuT5ext\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:8397503-8397723(+)\tPUM1\tPUM1_1\tchr21\t8397554\t8397560\t+\t51\t57\t220\t1\t9.0101\t0.000415\t-\t-\tMVuT5ext\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:8397503-8397723(+)\tPUM1\tPUM1_1\tchr21\t8397556\t8397562\t+\t53\t59\t220\t1\t12.1616\t5.85e-05\t-\t-\tMVuT5ext\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:8397503-8397723(+)\tPUM1\tPUM1_1\tchr21\t8397680\t8397686\t+\t177\t183\t220\t1\t6.9798\t0.000768\t-\t-\tMVuT5ext\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:8397243-8397453(+)\tPUM1\tPUM1_1\tchr21\t8397336\t8397342\t+\t93\t99\t210\t1\t8.83838\t0.000473\t-\t-\tMVuT5ext\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:8397243-8397453(+)\tPUM1\tPUM1_1\tchr21\t8397362\t8397368\t+\t119\t125\t210\t1\t12.1616\t5.85e-05\t-\t-\tMVuT5ext\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:43687515-43687745(+)\tPUM1\tPUM1_1\tchr21\t43687623\t43687629\t+\t108\t114\t230\t1\t9.0101\t0.000415\t-\t-\tMVuT5ext\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:32764357-32764462(-)\tPUM1\tPUM1_2\tchr21\t32764358\t32764366\t-\t97\t105\t105\t1\t6.31313\t0.000191\t-\t-\tMVuT5ext\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:36375527-36375647(+)\tPUM1\tPUM1_2\tchr21\t36375632\t36375640\t+\t105\t113\t120\t1\t-2.35354\t0.000906\t-\t-\tMVuT5ext\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:36375527-36375647(+)\tPUM1\tPUM1_3\tchr21\t36375534\t36375541\t+\t7\t14\t120\t1\t12.5354\t6.6e-05\t-\t-\tMVuT5ext\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:36375527-36375647(+)\tPUM1\tPUM1_3\tchr21\t36375556\t36375563\t+\t29\t36\t120\t1\t4.61616\t0.000739\t-\t-\tMVuT5ext\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:5240799-5240909(-)\tPUM2\tPUM2_1\tchr21\t5240861\t5240868\t-\t42\t49\t110\t1\t14.2121\t3.34e-05\t-\t-\t-sbAn7Po\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:14371114-14371439(-)\tPU'..b'0\t1\t12.9394\t1.5e-05\t-\t-\t-sbAn7Po\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:38823273-38823463(+)\tPUM2\tPUM2_5\tchr21\t38823411\t38823420\t+\t138\t147\t190\t1\t8.33333\t0.00049\t-\t-\t-sbAn7Po\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:39175448-39175648(-)\tPUM2\tPUM2_5\tchr21\t39175591\t39175600\t-\t49\t58\t200\t1\t8.66667\t0.000419\t-\t-\t-sbAn7Po\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:39175448-39175648(-)\tPUM2\tPUM2_5\tchr21\t39175562\t39175571\t-\t78\t87\t200\t1\t11.3434\t7.4e-05\t-\t-\t-sbAn7Po\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:39175448-39175648(-)\tPUM2\tPUM2_5\tchr21\t39175515\t39175524\t-\t125\t134\t200\t1\t10.1616\t0.000175\t-\t-\t-sbAn7Po\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:39175448-39175648(-)\tPUM2\tPUM2_5\tchr21\t39175463\t39175472\t-\t177\t186\t200\t1\t10.1111\t0.000186\t-\t-\t-sbAn7Po\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:41988240-41988435(-)\tPUM2\tPUM2_5\tchr21\t41988361\t41988370\t-\t66\t75\t195\t1\t11.1515\t8.53e-05\t-\t-\t-sbAn7Po\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:41988240-41988435(-)\tPUM2\tPUM2_5\tchr21\t41988342\t41988351\t-\t85\t94\t195\t1\t9.05051\t0.000345\t-\t-\t-sbAn7Po\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:41988240-41988435(-)\tPUM2\tPUM2_5\tchr21\t41988257\t41988266\t-\t170\t179\t195\t1\t8.60606\t0.000427\t-\t-\t-sbAn7Po\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:43693279-43693514(+)\tPUM2\tPUM2_5\tchr21\t43693399\t43693408\t+\t120\t129\t235\t1\t7.35354\t0.00076\t-\t-\t-sbAn7Po\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:43693279-43693514(+)\tPUM2\tPUM2_5\tchr21\t43693415\t43693424\t+\t136\t145\t235\t1\t12.2929\t3.08e-05\t-\t-\t-sbAn7Po\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:43760950-43761125(+)\tPUM2\tPUM2_5\tchr21\t43761042\t43761051\t+\t92\t101\t175\t1\t12.1515\t3.69e-05\t-\t-\t-sbAn7Po\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:43982948-43983178(+)\tPUM2\tPUM2_5\tchr21\t43983057\t43983066\t+\t109\t118\t230\t1\t12.1717\t3.39e-05\t-\t-\t-sbAn7Po\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:44805616-44805856(-)\tPUM2\tPUM2_5\tchr21\t44805799\t44805808\t-\t49\t58\t240\t1\t6.82828\t0.000958\t-\t-\t-sbAn7Po\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:44805616-44805856(-)\tPUM2\tPUM2_5\tchr21\t44805668\t44805677\t-\t180\t189\t240\t1\t13.6061\t6.92e-06\t-\t-\t-sbAn7Po\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:44861162-44861271(-)\tPUM2\tPUM2_5\tchr21\t44861192\t44861201\t-\t71\t80\t109\t1\t13.0505\t1.29e-05\t-\t-\t-sbAn7Po\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:45512427-45512547(+)\tPUM2\tPUM2_5\tchr21\t45512511\t45512520\t+\t84\t93\t120\t1\t8.51515\t0.000442\t-\t-\t-sbAn7Po\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:45602615-45602770(+)\tPUM2\tPUM2_5\tchr21\t45602619\t45602628\t+\t4\t13\t155\t1\t7.25253\t0.000803\t-\t-\t-sbAn7Po\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:45602615-45602770(+)\tPUM2\tPUM2_5\tchr21\t45602697\t45602706\t+\t82\t91\t155\t1\t12.1515\t3.69e-05\t-\t-\t-sbAn7Po\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:45602615-45602770(+)\tPUM2\tPUM2_5\tchr21\t45602706\t45602715\t+\t91\t100\t155\t1\t7.77778\t0.00064\t-\t-\t-sbAn7Po\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:45602615-45602770(+)\tPUM2\tPUM2_5\tchr21\t45602731\t45602740\t+\t116\t125\t155\t1\t10.1717\t0.000174\t-\t-\t-sbAn7Po\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:46324121-46324226(+)\tPUM2\tPUM2_5\tchr21\t46324156\t46324165\t+\t35\t44\t105\t1\t11.1212\t8.74e-05\t-\t-\t-sbAn7Po\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:46569284-46569479(+)\tPUM2\tPUM2_5\tchr21\t46569299\t46569308\t+\t15\t24\t195\t1\t6.81818\t0.00096\t-\t-\t-sbAn7Po\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:46569284-46569479(+)\tPUM2\tPUM2_5\tchr21\t46569388\t46569397\t+\t104\t113\t195\t1\t11.5051\t6.62e-05\t-\t-\t-sbAn7Po\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:46569554-46569659(+)\tPUM2\tPUM2_5\tchr21\t46569555\t46569564\t+\t1\t10\t105\t1\t9.0101\t0.000356\t-\t-\t-sbAn7Po\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:46569554-46569659(+)\tPUM2\tPUM2_5\tchr21\t46569592\t46569601\t+\t38\t47\t105\t1\t11.2222\t8.1e-05\t-\t-\t-sbAn7Po\n+k562_eclip\tdewseq_w100_s5\trun_id\thuman_v0.1\tchr21:46569554-46569659(+)\tPUM2\tPUM2_5\tchr21\t46569649\t46569658\t+\t95\t104\t105\t1\t10.1919\t0.000172\t-\t-\t-sbAn7Po\n'
b
diff -r 000000000000 -r 7dd2835ce566 test-data/motif_hit_stats.rbpbench_search.slbp_user.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/motif_hit_stats.rbpbench_search.slbp_user.tsv Sun Dec 03 12:51:54 2023 +0000
b
@@ -0,0 +1,2 @@
+data_id method_id run_id motif_db region_id rbp_id motif_id chr_id gen_s gen_e strand region_s region_e region_len uniq_count fimo_score fimo_pval cms_score cms_eval internal_id
+data_id method_id run_id user chr1:50-113(+) SLBP_USER RF00032 chr1 89 113 + 39 63 63 1 - - 27.8 1.5e-08 RuDIjV1k
b
diff -r 000000000000 -r 7dd2835ce566 test-data/motif_hit_stats.rbpbench_search.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/motif_hit_stats.rbpbench_search.tsv Sun Dec 03 12:51:54 2023 +0000
b
@@ -0,0 +1,6 @@
+data_id method_id run_id motif_db region_id rbp_id motif_id chr_id gen_s gen_e strand region_s region_e region_len uniq_count fimo_score fimo_pval cms_score cms_eval internal_id
+data_id method_id run_id human_v0.1 chr1:10-80(+) PUM1 PUM1_3 chr1 24 31 + 14 21 70 1 12.8182 4.87e-05 - - h2FelLqa
+data_id method_id run_id human_v0.1 chr1:10-80(+) PUM2 PUM2_1 chr1 24 31 + 14 21 70 1 10.404 0.000113 - - HcD7_nJz
+data_id method_id run_id human_v0.1 chr1:10-80(+) PUM2 PUM2_3 chr1 24 31 + 14 21 70 1 6.20202 0.000212 - - HcD7_nJz
+data_id method_id run_id human_v0.1 chr1:10-80(+) PUM2 PUM2_4 chr1 24 31 + 14 21 70 1 15.8485 1.64e-05 - - HcD7_nJz
+data_id method_id run_id human_v0.1 chr1:10-80(+) PUM2 PUM2_5 chr1 22 31 + 12 21 70 1 10.8788 0.000113 - - HcD7_nJz
b
diff -r 000000000000 -r 7dd2835ce566 test-data/motif_hit_stats.table_test.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/motif_hit_stats.table_test.tsv Sun Dec 03 12:51:54 2023 +0000
b
@@ -0,0 +1,6 @@
+data_id method_id run_id motif_db region_id rbp_id motif_id chr_id gen_s gen_e strand region_s region_e region_len uniq_count fimo_score fimo_pval cms_score cms_eval internal_id
+did1 mid1 run_id human_v0.1 chr1:10-80(+) PUM1 PUM1_3 chr1 24 31 + 14 21 70 1 12.8182 4.87e-05 - - NInGOqIN
+did2 mid2 run_id human_v0.1 chr1:10-80(+) PUM2 PUM2_1 chr1 24 31 + 14 21 70 1 10.404 0.000113 - - u7eY1SoG
+did2 mid2 run_id human_v0.1 chr1:10-80(+) PUM2 PUM2_3 chr1 24 31 + 14 21 70 1 6.20202 0.000212 - - u7eY1SoG
+did2 mid2 run_id human_v0.1 chr1:10-80(+) PUM2 PUM2_4 chr1 24 31 + 14 21 70 1 15.8485 1.64e-05 - - u7eY1SoG
+did2 mid2 run_id human_v0.1 chr1:10-80(+) PUM2 PUM2_5 chr1 22 31 + 12 21 70 1 10.8788 0.000113 - - u7eY1SoG
b
diff -r 000000000000 -r 7dd2835ce566 test-data/motif_hit_stats.test_batch.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/motif_hit_stats.test_batch.tsv Sun Dec 03 12:51:54 2023 +0000
b
@@ -0,0 +1,6 @@
+data_id method_id run_id motif_db region_id rbp_id motif_id chr_id gen_s gen_e strand region_s region_e region_len uniq_count fimo_score fimo_pval cms_score cms_eval internal_id
+data-id1 method-id1 run_id human_v0.1 chr1:10-80(+) PUM1 PUM1_3 chr1 24 31 + 14 21 70 1 12.8182 4.87e-05 - - TT41sPJM
+data-id2 method-id2 run_id human_v0.1 chr1:10-80(+) PUM2 PUM2_1 chr1 24 31 + 14 21 70 1 10.404 0.000113 - - -dJ6C0OI
+data-id2 method-id2 run_id human_v0.1 chr1:10-80(+) PUM2 PUM2_3 chr1 24 31 + 14 21 70 1 6.20202 0.000212 - - -dJ6C0OI
+data-id2 method-id2 run_id human_v0.1 chr1:10-80(+) PUM2 PUM2_4 chr1 24 31 + 14 21 70 1 15.8485 1.64e-05 - - -dJ6C0OI
+data-id2 method-id2 run_id human_v0.1 chr1:10-80(+) PUM2 PUM2_5 chr1 22 31 + 12 21 70 1 10.8788 0.000113 - - -dJ6C0OI
b
diff -r 000000000000 -r 7dd2835ce566 test-data/motif_hits.rbpbench_batch.table_test.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/motif_hits.rbpbench_batch.table_test.bed Sun Dec 03 12:51:54 2023 +0000
b
@@ -0,0 +1,5 @@
+chr1 23 31 PUM1,PUM1_3;1;mid1,did1 0 + 12.8182 4.87e-05 -1.0 -1.0
+chr1 23 31 PUM2,PUM2_1;1;mid2,did2 0 + 10.404 0.000113 -1.0 -1.0
+chr1 23 31 PUM2,PUM2_3;1;mid2,did2 0 + 6.20202 0.000212 -1.0 -1.0
+chr1 23 31 PUM2,PUM2_4;1;mid2,did2 0 + 15.8485 1.64e-05 -1.0 -1.0
+chr1 21 31 PUM2,PUM2_5;1;mid2,did2 0 + 10.8788 0.000113 -1.0 -1.0
b
diff -r 000000000000 -r 7dd2835ce566 test-data/motif_hits.rbpbench_batch.test_batch.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/motif_hits.rbpbench_batch.test_batch.bed Sun Dec 03 12:51:54 2023 +0000
b
@@ -0,0 +1,5 @@
+chr1 23 31 PUM1,PUM1_3;1;method-id1,data-id1 0 + 12.8182 4.87e-05 -1.0 -1.0
+chr1 23 31 PUM2,PUM2_1;1;method-id2,data-id2 0 + 10.404 0.000113 -1.0 -1.0
+chr1 23 31 PUM2,PUM2_3;1;method-id2,data-id2 0 + 6.20202 0.000212 -1.0 -1.0
+chr1 23 31 PUM2,PUM2_4;1;method-id2,data-id2 0 + 15.8485 1.64e-05 -1.0 -1.0
+chr1 21 31 PUM2,PUM2_5;1;method-id2,data-id2 0 + 10.8788 0.000113 -1.0 -1.0
b
diff -r 000000000000 -r 7dd2835ce566 test-data/motif_hits.rbpbench_compare.test.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/motif_hits.rbpbench_compare.test.bed Sun Dec 03 12:51:54 2023 +0000
b
b'@@ -0,0 +1,479 @@\n+chr21\t43687623\t43687629\tPUM1_1;k562_eclip,human_v0.1,PUM1;clipper_idr,dewseq_w100_s5\t0\t+\n+chr21\t8397362\t8397368\tPUM1_1;k562_eclip,human_v0.1,PUM1;clipper_idr,dewseq_w100_s5\t0\t+\n+chr21\t8215042\t8215048\tPUM1_1;k562_eclip,human_v0.1,PUM1;clipper_idr\t0\t+\n+chr21\t8215045\t8215051\tPUM1_1;k562_eclip,human_v0.1,PUM1;clipper_idr\t0\t+\n+chr21\t8215075\t8215081\tPUM1_1;k562_eclip,human_v0.1,PUM1;clipper_idr\t0\t+\n+chr21\t8214324\t8214330\tPUM1_1;k562_eclip,human_v0.1,PUM1;clipper_idr,dewseq_w100_s5\t0\t+\n+chr21\t36375632\t36375640\tPUM1_2;k562_eclip,human_v0.1,PUM1;clipper_idr,dewseq_w100_s5\t0\t+\n+chr21\t32764358\t32764366\tPUM1_2;k562_eclip,human_v0.1,PUM1;clipper_idr,dewseq_w100_s5\t0\t-\n+chr21\t8214298\t8214304\tPUM1_1;k562_eclip,human_v0.1,PUM1;dewseq_w100_s5\t0\t+\n+chr21\t8218698\t8218704\tPUM1_1;k562_eclip,human_v0.1,PUM1;dewseq_w100_s5\t0\t+\n+chr21\t8214075\t8214081\tPUM1_1;k562_eclip,human_v0.1,PUM1;dewseq_w100_s5\t0\t+\n+chr21\t8217841\t8217847\tPUM1_1;k562_eclip,human_v0.1,PUM1;dewseq_w100_s5\t0\t+\n+chr21\t8217884\t8217890\tPUM1_1;k562_eclip,human_v0.1,PUM1;dewseq_w100_s5\t0\t+\n+chr21\t8217961\t8217967\tPUM1_1;k562_eclip,human_v0.1,PUM1;dewseq_w100_s5\t0\t+\n+chr21\t8217962\t8217968\tPUM1_1;k562_eclip,human_v0.1,PUM1;dewseq_w100_s5\t0\t+\n+chr21\t8401737\t8401743\tPUM1_1;k562_eclip,human_v0.1,PUM1;dewseq_w100_s5\t0\t+\n+chr21\t8397514\t8397520\tPUM1_1;k562_eclip,human_v0.1,PUM1;dewseq_w100_s5\t0\t+\n+chr21\t8397534\t8397540\tPUM1_1;k562_eclip,human_v0.1,PUM1;dewseq_w100_s5\t0\t+\n+chr21\t8397536\t8397542\tPUM1_1;k562_eclip,human_v0.1,PUM1;dewseq_w100_s5\t0\t+\n+chr21\t8397540\t8397546\tPUM1_1;k562_eclip,human_v0.1,PUM1;dewseq_w100_s5\t0\t+\n+chr21\t8397544\t8397550\tPUM1_1;k562_eclip,human_v0.1,PUM1;dewseq_w100_s5\t0\t+\n+chr21\t8397554\t8397560\tPUM1_1;k562_eclip,human_v0.1,PUM1;dewseq_w100_s5\t0\t+\n+chr21\t8397556\t8397562\tPUM1_1;k562_eclip,human_v0.1,PUM1;dewseq_w100_s5\t0\t+\n+chr21\t8397680\t8397686\tPUM1_1;k562_eclip,human_v0.1,PUM1;dewseq_w100_s5\t0\t+\n+chr21\t8397336\t8397342\tPUM1_1;k562_eclip,human_v0.1,PUM1;dewseq_w100_s5\t0\t+\n+chr21\t36375534\t36375541\tPUM1_3;k562_eclip,human_v0.1,PUM1;dewseq_w100_s5\t0\t+\n+chr21\t36375556\t36375563\tPUM1_3;k562_eclip,human_v0.1,PUM1;dewseq_w100_s5\t0\t+\n+chr21\t14371170\t14371177\tPUM2_1;k562_eclip,human_v0.1,PUM2;clipper_idr,dewseq_w100_s5\t0\t-\n+chr21\t14371168\t14371175\tPUM2_1;k562_eclip,human_v0.1,PUM2;clipper_idr,dewseq_w100_s5\t0\t-\n+chr21\t14371143\t14371150\tPUM2_1;k562_eclip,human_v0.1,PUM2;clipper_idr,dewseq_w100_s5\t0\t-\n+chr21\t32734896\t32734903\tPUM2_1;k562_eclip,human_v0.1,PUM2;clipper_idr,dewseq_w100_s5\t0\t-\n+chr21\t32734887\t32734894\tPUM2_1;k562_eclip,human_v0.1,PUM2;clipper_idr,dewseq_w100_s5\t0\t-\n+chr21\t32734055\t32734062\tPUM2_1;k562_eclip,human_v0.1,PUM2;clipper_idr,dewseq_w100_s5\t0\t-\n+chr21\t32734041\t32734048\tPUM2_1;k562_eclip,human_v0.1,PUM2;clipper_idr,dewseq_w100_s5\t0\t-\n+chr21\t14961325\t14961332\tPUM2_1;k562_eclip,human_v0.1,PUM2;clipper_idr,dewseq_w100_s5\t0\t-\n+chr21\t28945911\t28945918\tPUM2_1;k562_eclip,human_v0.1,PUM2;clipper_idr\t0\t-\n+chr21\t33576754\t33576761\tPUM2_1;k562_eclip,human_v0.1,PUM2;clipper_idr,dewseq_w100_s5\t0\t+\n+chr21\t33576671\t33576678\tPUM2_1;k562_eclip,human_v0.1,PUM2;clipper_idr,dewseq_w100_s5\t0\t+\n+chr21\t33576685\t33576692\tPUM2_1;k562_eclip,human_v0.1,PUM2;clipper_idr,dewseq_w100_s5\t0\t+\n+chr21\t33576643\t33576650\tPUM2_1;k562_eclip,human_v0.1,PUM2;clipper_idr,dewseq_w100_s5\t0\t+\n+chr21\t36376310\t36376317\tPUM2_1;k562_eclip,human_v0.1,PUM2;clipper_idr,dewseq_w100_s5\t0\t+\n+chr21\t36376312\t36376319\tPUM2_1;k562_eclip,human_v0.1,PUM2;clipper_idr,dewseq_w100_s5\t0\t+\n+chr21\t37223459\t37223466\tPUM2_1;k562_eclip,human_v0.1,PUM2;clipper_idr,dewseq_w100_s5\t0\t-\n+chr21\t37203027\t37203034\tPUM2_1;k562_eclip,human_v0.1,PUM2;clipper_idr,dewseq_w100_s5\t0\t+\n+chr21\t46569382\t46569389\tPUM2_1;k562_eclip,human_v0.1,PUM2;clipper_idr,dewseq_w100_s5\t0\t+\n+chr21\t36375544\t36375551\tPUM2_1;k562_eclip,human_v0.1,PUM2;clipper_idr,dewseq_w100_s5\t0\t+\n+chr21\t33560605\t33560612\tPUM2_1;k562_eclip,human_v0.1,PUM2;clipper_idr,dewseq_w100_s5\t0\t+\n+chr21\t14485730\t14485737\tPUM2_1;k562_eclip,human_v0.1,PUM2;clipper_idr,dewseq_w100_'..b'2;dewseq_w100_s5\t0\t-\n+chr21\t33524156\t33524165\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t-\n+chr21\t33524135\t33524144\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t-\n+chr21\t33576725\t33576734\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t+\n+chr21\t33558869\t33558878\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t+\n+chr21\t33558892\t33558901\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t+\n+chr21\t33560565\t33560574\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t+\n+chr21\t33560567\t33560576\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t+\n+chr21\t33560569\t33560578\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t+\n+chr21\t33560575\t33560584\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t+\n+chr21\t33560581\t33560590\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t+\n+chr21\t33560583\t33560592\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t+\n+chr21\t33560585\t33560594\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t+\n+chr21\t33560587\t33560596\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t+\n+chr21\t34105653\t34105662\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t+\n+chr21\t34098703\t34098712\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t+\n+chr21\t34100072\t34100081\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t+\n+chr21\t34100074\t34100083\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t+\n+chr21\t34100082\t34100091\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t+\n+chr21\t34788319\t34788328\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t-\n+chr21\t34887755\t34887764\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t-\n+chr21\t36375372\t36375381\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t+\n+chr21\t36375532\t36375541\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t+\n+chr21\t36375742\t36375751\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t+\n+chr21\t36375810\t36375819\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t+\n+chr21\t36375812\t36375821\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t+\n+chr21\t36375929\t36375938\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t+\n+chr21\t36376469\t36376478\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t+\n+chr21\t37223447\t37223456\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t-\n+chr21\t37223420\t37223429\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t-\n+chr21\t37515006\t37515015\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t+\n+chr21\t37514620\t37514629\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t+\n+chr21\t37514672\t37514681\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t+\n+chr21\t38823550\t38823559\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t+\n+chr21\t38823557\t38823566\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t+\n+chr21\t38823760\t38823769\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t+\n+chr21\t38823339\t38823348\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t+\n+chr21\t38823341\t38823350\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t+\n+chr21\t38823388\t38823397\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t+\n+chr21\t38823404\t38823413\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t+\n+chr21\t38823411\t38823420\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t+\n+chr21\t39175591\t39175600\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t-\n+chr21\t39175463\t39175472\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t-\n+chr21\t41988361\t41988370\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t-\n+chr21\t41988342\t41988351\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t-\n+chr21\t41988257\t41988266\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t-\n+chr21\t44805799\t44805808\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t-\n+chr21\t45602619\t45602628\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t+\n+chr21\t46569299\t46569308\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t+\n+chr21\t46569555\t46569564\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t+\n+chr21\t46569592\t46569601\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t+\n+chr21\t46569649\t46569658\tPUM2_5;k562_eclip,human_v0.1,PUM2;dewseq_w100_s5\t0\t+\n'
b
diff -r 000000000000 -r 7dd2835ce566 test-data/motif_hits.rbpbench_compare.test.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/motif_hits.rbpbench_compare.test.tsv Sun Dec 03 12:51:54 2023 +0000
b
b'@@ -0,0 +1,480 @@\n+combined_id\tmotif_hit_id\tmethod_data_ids_with_hit\n+k562_eclip,human_v0.1,PUM1\tchr21:43687623-43687629(+),PUM1_1\tclipper_idr,dewseq_w100_s5\n+k562_eclip,human_v0.1,PUM1\tchr21:8397362-8397368(+),PUM1_1\tclipper_idr,dewseq_w100_s5\n+k562_eclip,human_v0.1,PUM1\tchr21:8215042-8215048(+),PUM1_1\tclipper_idr\n+k562_eclip,human_v0.1,PUM1\tchr21:8215045-8215051(+),PUM1_1\tclipper_idr\n+k562_eclip,human_v0.1,PUM1\tchr21:8215075-8215081(+),PUM1_1\tclipper_idr\n+k562_eclip,human_v0.1,PUM1\tchr21:8214324-8214330(+),PUM1_1\tclipper_idr,dewseq_w100_s5\n+k562_eclip,human_v0.1,PUM1\tchr21:36375632-36375640(+),PUM1_2\tclipper_idr,dewseq_w100_s5\n+k562_eclip,human_v0.1,PUM1\tchr21:32764358-32764366(-),PUM1_2\tclipper_idr,dewseq_w100_s5\n+k562_eclip,human_v0.1,PUM1\tchr21:8214298-8214304(+),PUM1_1\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM1\tchr21:8218698-8218704(+),PUM1_1\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM1\tchr21:8214075-8214081(+),PUM1_1\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM1\tchr21:8217841-8217847(+),PUM1_1\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM1\tchr21:8217884-8217890(+),PUM1_1\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM1\tchr21:8217961-8217967(+),PUM1_1\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM1\tchr21:8217962-8217968(+),PUM1_1\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM1\tchr21:8401737-8401743(+),PUM1_1\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM1\tchr21:8397514-8397520(+),PUM1_1\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM1\tchr21:8397534-8397540(+),PUM1_1\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM1\tchr21:8397536-8397542(+),PUM1_1\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM1\tchr21:8397540-8397546(+),PUM1_1\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM1\tchr21:8397544-8397550(+),PUM1_1\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM1\tchr21:8397554-8397560(+),PUM1_1\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM1\tchr21:8397556-8397562(+),PUM1_1\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM1\tchr21:8397680-8397686(+),PUM1_1\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM1\tchr21:8397336-8397342(+),PUM1_1\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM1\tchr21:36375534-36375541(+),PUM1_3\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM1\tchr21:36375556-36375563(+),PUM1_3\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:14371170-14371177(-),PUM2_1\tclipper_idr,dewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:14371168-14371175(-),PUM2_1\tclipper_idr,dewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:14371143-14371150(-),PUM2_1\tclipper_idr,dewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:32734896-32734903(-),PUM2_1\tclipper_idr,dewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:32734887-32734894(-),PUM2_1\tclipper_idr,dewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:32734055-32734062(-),PUM2_1\tclipper_idr,dewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:32734041-32734048(-),PUM2_1\tclipper_idr,dewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:14961325-14961332(-),PUM2_1\tclipper_idr,dewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:28945911-28945918(-),PUM2_1\tclipper_idr\n+k562_eclip,human_v0.1,PUM2\tchr21:33576754-33576761(+),PUM2_1\tclipper_idr,dewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:33576671-33576678(+),PUM2_1\tclipper_idr,dewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:33576685-33576692(+),PUM2_1\tclipper_idr,dewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:33576643-33576650(+),PUM2_1\tclipper_idr,dewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:36376310-36376317(+),PUM2_1\tclipper_idr,dewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:36376312-36376319(+),PUM2_1\tclipper_idr,dewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:37223459-37223466(-),PUM2_1\tclipper_idr,dewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:37203027-37203034(+),PUM2_1\tclipper_idr,dewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:46569382-46569389(+),PUM2_1\tclipper_idr,dewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:36375544-36375551(+),PUM2_1\tclipper_idr,dewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:33560605-33560612(+),PUM2_1\tclipper_idr,dewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:14485730-14485737(-),PUM2_1\tclipper_idr,dewse'..b'_eclip,human_v0.1,PUM2\tchr21:33450622-33450631(-),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:33524156-33524165(-),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:33524135-33524144(-),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:33576725-33576734(+),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:33558869-33558878(+),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:33558892-33558901(+),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:33560565-33560574(+),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:33560567-33560576(+),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:33560569-33560578(+),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:33560575-33560584(+),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:33560581-33560590(+),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:33560583-33560592(+),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:33560585-33560594(+),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:33560587-33560596(+),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:34105653-34105662(+),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:34098703-34098712(+),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:34100072-34100081(+),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:34100074-34100083(+),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:34100082-34100091(+),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:34788319-34788328(-),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:34887755-34887764(-),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:36375372-36375381(+),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:36375532-36375541(+),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:36375742-36375751(+),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:36375810-36375819(+),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:36375812-36375821(+),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:36375929-36375938(+),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:36376469-36376478(+),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:37223447-37223456(-),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:37223420-37223429(-),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:37515006-37515015(+),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:37514620-37514629(+),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:37514672-37514681(+),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:38823550-38823559(+),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:38823557-38823566(+),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:38823760-38823769(+),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:38823339-38823348(+),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:38823341-38823350(+),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:38823388-38823397(+),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:38823404-38823413(+),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:38823411-38823420(+),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:39175591-39175600(-),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:39175463-39175472(-),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:41988361-41988370(-),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:41988342-41988351(-),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:41988257-41988266(-),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:44805799-44805808(-),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:45602619-45602628(+),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:46569299-46569308(+),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:46569555-46569564(+),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:46569592-46569601(+),PUM2_5\tdewseq_w100_s5\n+k562_eclip,human_v0.1,PUM2\tchr21:46569649-46569658(+),PUM2_5\tdewseq_w100_s5\n'
b
diff -r 000000000000 -r 7dd2835ce566 test-data/motif_hits.rbpbench_search.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/motif_hits.rbpbench_search.bed Sun Dec 03 12:51:54 2023 +0000
b
@@ -0,0 +1,5 @@
+chr1 23 31 PUM1,PUM1_3;1;method_id,data_id 0 + 12.8182 4.87e-05 -1.0 -1.0
+chr1 23 31 PUM2,PUM2_1;1;method_id,data_id 0 + 10.404 0.000113 -1.0 -1.0
+chr1 23 31 PUM2,PUM2_3;1;method_id,data_id 0 + 6.20202 0.000212 -1.0 -1.0
+chr1 23 31 PUM2,PUM2_4;1;method_id,data_id 0 + 15.8485 1.64e-05 -1.0 -1.0
+chr1 21 31 PUM2,PUM2_5;1;method_id,data_id 0 + 10.8788 0.000113 -1.0 -1.0
b
diff -r 000000000000 -r 7dd2835ce566 test-data/motif_hits.rbpbench_search.test_all.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/motif_hits.rbpbench_search.test_all.bed Sun Dec 03 12:51:54 2023 +0000
b
@@ -0,0 +1,56 @@
+chr1 66 76 ACIN1,ACIN1_2;1;method_id,data_id 0 + 7.83168 0.000976 -1.0 -1.0
+chr1 35 42 ADAR,ADAR_2;1;method_id,data_id 0 + 7.76761 0.000162 -1.0 -1.0
+chr1 21 28 BUD13,BUD13_4;1;method_id,data_id 0 + 8.50505 0.000528 -1.0 -1.0
+chr1 16 23 CELF6,CELF6_1;1;method_id,data_id 0 + 7.54545 0.000859 -1.0 -1.0
+chr1 20 32 DDX6,DDX6_1;1;method_id,data_id 0 + 11.1616 5.8e-05 -1.0 -1.0
+chr1 47 53 DDX54,DDX54_2;1;method_id,data_id 0 + 10.0101 0.000506 -1.0 -1.0
+chr1 20 31 DGCR8,DGCR8_1;1;method_id,data_id 0 + 10.4646 0.000102 -1.0 -1.0
+chr1 57 64 ELAVL4,ELAVL4_1;1;method_id,data_id 0 + 8.90909 0.000307 -1.0 -1.0
+chr1 31 39 EWSR1,EWSR1_2;1;method_id,data_id 0 + 7.39394 0.000281 -1.0 -1.0
+chr1 25 35 FMR1,FMR1_1;1;method_id,data_id 0 + -1.28283 0.000939 -1.0 -1.0
+chr1 55 62 G3BP2,G3BP2_1;1;method_id,data_id 0 + 10.2323 0.00027 -1.0 -1.0
+chr1 20 29 GPKOW,GPKOW_1;1;method_id,data_id 0 + 7.56566 0.000629 -1.0 -1.0
+chr1 42 49 GPKOW,GPKOW_2;1;method_id,data_id 0 + 7.66337 0.000616 -1.0 -1.0
+chr1 13 20 HNRNPM,HNRNPM_2;1;method_id,data_id 0 + 9.36 0.000225 -1.0 -1.0
+chr1 65 74 LSM11,LSM11_4;1;method_id,data_id 0 + 7.32323 0.000681 -1.0 -1.0
+chr1 43 53 MTPAP,MTPAP_1;1;method_id,data_id 0 + 7.51485 0.000853 -1.0 -1.0
+chr1 65 75 NUP42,NUP42_1;1;method_id,data_id 0 + 7.56566 0.000678 -1.0 -1.0
+chr1 67 74 PABPC1,PABPC1_2;1;method_id,data_id 0 + 8.22 0.000763 -1.0 -1.0
+chr1 69 76 PABPC4,PABPC4_1;1;method_id,data_id 0 + 7.46 0.000936 -1.0 -1.0
+chr1 48 56 PCBP1,PCBP1_3;1;method_id,data_id 0 + 4.08081 0.00076 -1.0 -1.0
+chr1 35 41 PTBP1,PTBP1_5;1;method_id,data_id 0 + 9.37374 0.000653 -1.0 -1.0
+chr1 23 31 PUM1,PUM1_3;1;method_id,data_id 0 + 12.8182 4.87e-05 -1.0 -1.0
+chr1 23 31 PUM2,PUM2_1;1;method_id,data_id 0 + 10.404 0.000113 -1.0 -1.0
+chr1 23 31 PUM2,PUM2_3;1;method_id,data_id 0 + 6.20202 0.000212 -1.0 -1.0
+chr1 23 31 PUM2,PUM2_4;1;method_id,data_id 0 + 15.8485 1.64e-05 -1.0 -1.0
+chr1 21 31 PUM2,PUM2_5;1;method_id,data_id 0 + 10.8788 0.000113 -1.0 -1.0
+chr1 35 47 QKI,QKI_4;1;method_id,data_id 0 + -5.90909 0.000181 -1.0 -1.0
+chr1 35 44 RBM5,RBM5_1;1;method_id,data_id 0 + 7.52525 5.21e-05 -1.0 -1.0
+chr1 60 67 RC3H1,RC3H1_2;1;method_id,data_id 0 + 7.76768 0.000997 -1.0 -1.0
+chr1 13 27 TROVE2,TROVE2_2;1;method_id,data_id 0 + -20.1313 0.00042 -1.0 -1.0
+chr1 27 41 TROVE2,TROVE2_2;1;method_id,data_id 0 + -20.1919 0.000704 -1.0 -1.0
+chr1 32 40 TROVE2,TROVE2_5;1;method_id,data_id 0 + 8.83 0.000368 -1.0 -1.0
+chr1 59 72 TROVE2,TROVE2_7;1;method_id,data_id 0 + -12.6061 0.000794 -1.0 -1.0
+chr1 65 76 SAFB2,SAFB2_1;1;method_id,data_id 0 + 6.32 0.00098 -1.0 -1.0
+chr1 67 74 SART3,SART3_1;1;method_id,data_id 0 + 8.22 0.000763 -1.0 -1.0
+chr1 18 29 SF3A3,SF3A3_1;1;method_id,data_id 0 + 7.58416 0.000793 -1.0 -1.0
+chr1 35 46 SF3A3,SF3A3_1;1;method_id,data_id 0 + 10.3168 0.000128 -1.0 -1.0
+chr1 32 40 SF3A3,SF3A3_4;1;method_id,data_id 0 + 9.57 0.000254 -1.0 -1.0
+chr1 22 30 SF3B4,SF3B4_1;1;method_id,data_id 0 + 7.34 0.000985 -1.0 -1.0
+chr1 43 51 SRSF1,SRSF1_4;1;method_id,data_id 0 + 7.93939 0.000678 -1.0 -1.0
+chr1 50 59 SRSF2,SRSF2_8;1;method_id,data_id 0 + 11.4646 5.89e-05 -1.0 -1.0
+chr1 44 53 SRSF4,SRSF4_1;1;method_id,data_id 0 + 8.28283 0.000231 -1.0 -1.0
+chr1 44 52 SRSF5,SRSF5_1;1;method_id,data_id 0 + 3.93939 0.000409 -1.0 -1.0
+chr1 68 76 SRSF5,SRSF5_1;1;method_id,data_id 0 + 10.8081 0.0002 -1.0 -1.0
+chr1 67 78 SRSF7,SRSF7_2;1;method_id,data_id 0 + 7.27723 0.000652 -1.0 -1.0
+chr1 12 21 TAF15,TAF15_3;1;method_id,data_id 0 + 7.19 0.000808 -1.0 -1.0
+chr1 42 51 TRA2A,TRA2A_5;1;method_id,data_id 0 + 7.68317 0.000727 -1.0 -1.0
+chr1 26 33 TUT1,TUT1_1;1;method_id,data_id 0 + 9.56566 0.000191 -1.0 -1.0
+chr1 26 33 TUT1,TUT1_2;1;method_id,data_id 0 + 9.56566 0.000191 -1.0 -1.0
+chr1 36 45 U2AF1,U2AF1_1;1;method_id,data_id 0 + 10.03 0.000165 -1.0 -1.0
+chr1 36 45 U2AF2,U2AF2_1;1;method_id,data_id 0 + 9.9697 0.000192 -1.0 -1.0
+chr1 25 32 UNK,UNK_2;1;method_id,data_id 0 + 7.78 0.000861 -1.0 -1.0
+chr1 60 68 ZFP36,ZFP36_1;1;method_id,data_id 0 + 9.28 0.000317 -1.0 -1.0
+chr1 59 68 ZFP36L2,ZFP36L2_1;1;method_id,data_id 0 + 8.33333 0.000104 -1.0 -1.0
+chr1 59 68 ZFP36L2,ZFP36L2_2;1;method_id,data_id 0 + 8.33333 0.000104 -1.0 -1.0
+chr1 28 46 SLBP,RF00032;1;method_id,data_id 0 + 13.8 4.2e-05 -1.0 -1.0
b
diff -r 000000000000 -r 7dd2835ce566 test-data/rbp_hit_stats.compare_test.clipper_idr.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rbp_hit_stats.compare_test.clipper_idr.tsv Sun Dec 03 12:51:54 2023 +0000
b
@@ -0,0 +1,3 @@
+data_id method_id run_id motif_db rbp_id c_regions mean_reg_len median_reg_len min_reg_len max_reg_len called_reg_size effective_reg_size c_reg_with_hits perc_reg_with_hits c_motif_hits c_uniq_motif_hits c_uniq_motif_nts perc_uniq_motif_nts_cal_reg perc_uniq_motif_nts_eff_reg uniq_motif_hits_cal_1000nt uniq_motif_hits_eff_1000nt wc_pval seq_motif_ids seq_motif_hits str_motif_ids str_motif_hits internal_id
+k562_eclip clipper_idr run_id human_v0.1 PUM1 32 65.03 61 10 202 2081 2081 6 18.75 8 8 56 2.691013935607881 2.691013935607881 3.8443056222969725 3.8443056222969725 0.12193332097392165 PUM1_1,PUM1_2,PUM1_3 6,2,0 - - pZFAGsHH
+k562_eclip clipper_idr run_id human_v0.1 PUM2 77 60.66 55 3 172 4671 4671 59 76.62337662337663 219 219 824 17.64076214943267 17.64076214943267 46.88503532434168 46.88503532434168 0.4736101622265477 PUM2_1,PUM2_2,PUM2_3,PUM2_4,PUM2_5 68,16,39,24,72 - - AJ40lTA6
b
diff -r 000000000000 -r 7dd2835ce566 test-data/rbp_hit_stats.compare_test.dewseq.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rbp_hit_stats.compare_test.dewseq.tsv Sun Dec 03 12:51:54 2023 +0000
b
@@ -0,0 +1,3 @@
+data_id method_id run_id motif_db rbp_id c_regions mean_reg_len median_reg_len min_reg_len max_reg_len called_reg_size effective_reg_size c_reg_with_hits perc_reg_with_hits c_motif_hits c_uniq_motif_hits c_uniq_motif_nts perc_uniq_motif_nts_cal_reg perc_uniq_motif_nts_eff_reg uniq_motif_hits_cal_1000nt uniq_motif_hits_eff_1000nt wc_pval seq_motif_ids seq_motif_hits str_motif_ids str_motif_hits internal_id
+k562_eclip dewseq_w100_s5 run_id human_v0.1 PUM1 23 214.74 195 68 850 4939 4750 10 43.47826086956522 24 24 152 3.0775460619558612 3.2 4.859283255719781 5.052631578947368 0.0071490418994820636 PUM1_1,PUM1_2,PUM1_3 20,2,2 - - MVuT5ext
+k562_eclip dewseq_w100_s5 run_id human_v0.1 PUM2 70 203.41 187 100 745 14239 12333 65 92.85714285714286 559 448 1737 12.198890371514853 14.084164436876673 31.46288362946836 36.32530608935377 0.9472904311972452 PUM2_1,PUM2_2,PUM2_3,PUM2_4,PUM2_5 138,27,69,48,166 - - -sbAn7Po
b
diff -r 000000000000 -r 7dd2835ce566 test-data/rbp_hit_stats.rbpbench_search.slbp_user.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rbp_hit_stats.rbpbench_search.slbp_user.tsv Sun Dec 03 12:51:54 2023 +0000
b
@@ -0,0 +1,2 @@
+data_id method_id run_id motif_db rbp_id c_regions mean_reg_len median_reg_len min_reg_len max_reg_len called_reg_size effective_reg_size c_reg_with_hits perc_reg_with_hits c_motif_hits c_uniq_motif_hits c_uniq_motif_nts perc_uniq_motif_nts_cal_reg perc_uniq_motif_nts_eff_reg uniq_motif_hits_cal_1000nt uniq_motif_hits_eff_1000nt wc_pval seq_motif_ids seq_motif_hits str_motif_ids str_motif_hits internal_id
+data_id method_id run_id user SLBP_USER 1 63 63 63 63 63 63 1 100.0 1 1 25 39.682539682539684 39.682539682539684 15.873015873015873 15.873015873015873 1.0 - - RF00032 1 RuDIjV1k
b
diff -r 000000000000 -r 7dd2835ce566 test-data/rbp_hit_stats.rbpbench_search.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rbp_hit_stats.rbpbench_search.tsv Sun Dec 03 12:51:54 2023 +0000
b
@@ -0,0 +1,3 @@
+data_id method_id run_id motif_db rbp_id c_regions mean_reg_len median_reg_len min_reg_len max_reg_len called_reg_size effective_reg_size c_reg_with_hits perc_reg_with_hits c_motif_hits c_uniq_motif_hits c_uniq_motif_nts perc_uniq_motif_nts_cal_reg perc_uniq_motif_nts_eff_reg uniq_motif_hits_cal_1000nt uniq_motif_hits_eff_1000nt wc_pval seq_motif_ids seq_motif_hits str_motif_ids str_motif_hits internal_id
+data_id method_id run_id human_v0.1 PUM1 1 70 70 70 70 70 70 1 100.0 1 1 8 11.428571428571429 11.428571428571429 14.285714285714285 14.285714285714285 1.0 PUM1_1,PUM1_2,PUM1_3 0,0,1 - - h2FelLqa
+data_id method_id run_id human_v0.1 PUM2 1 70 70 70 70 70 70 1 100.0 4 4 10 14.285714285714285 14.285714285714285 57.14285714285714 57.14285714285714 1.0 PUM2_1,PUM2_2,PUM2_3,PUM2_4,PUM2_5 1,0,1,1,1 - - HcD7_nJz
b
diff -r 000000000000 -r 7dd2835ce566 test-data/rbp_hit_stats.table_test.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rbp_hit_stats.table_test.tsv Sun Dec 03 12:51:54 2023 +0000
b
@@ -0,0 +1,3 @@
+data_id method_id run_id motif_db rbp_id c_regions mean_reg_len median_reg_len min_reg_len max_reg_len called_reg_size effective_reg_size c_reg_with_hits perc_reg_with_hits c_motif_hits c_uniq_motif_hits c_uniq_motif_nts perc_uniq_motif_nts_cal_reg perc_uniq_motif_nts_eff_reg uniq_motif_hits_cal_1000nt uniq_motif_hits_eff_1000nt wc_pval seq_motif_ids seq_motif_hits str_motif_ids str_motif_hits internal_id
+did1 mid1 run_id human_v0.1 PUM1 1 70 70 70 70 70 70 1 100.0 1 1 8 11.428571428571429 11.428571428571429 14.285714285714285 14.285714285714285 1.0 PUM1_1,PUM1_2,PUM1_3 0,0,1 - - NInGOqIN
+did2 mid2 run_id human_v0.1 PUM2 1 70 70 70 70 70 70 1 100.0 4 4 10 14.285714285714285 14.285714285714285 57.14285714285714 57.14285714285714 1.0 PUM2_1,PUM2_2,PUM2_3,PUM2_4,PUM2_5 1,0,1,1,1 - - u7eY1SoG
b
diff -r 000000000000 -r 7dd2835ce566 test-data/rbp_hit_stats.test_batch.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rbp_hit_stats.test_batch.tsv Sun Dec 03 12:51:54 2023 +0000
b
@@ -0,0 +1,3 @@
+data_id method_id run_id motif_db rbp_id c_regions mean_reg_len median_reg_len min_reg_len max_reg_len called_reg_size effective_reg_size c_reg_with_hits perc_reg_with_hits c_motif_hits c_uniq_motif_hits c_uniq_motif_nts perc_uniq_motif_nts_cal_reg perc_uniq_motif_nts_eff_reg uniq_motif_hits_cal_1000nt uniq_motif_hits_eff_1000nt wc_pval seq_motif_ids seq_motif_hits str_motif_ids str_motif_hits internal_id
+data-id1 method-id1 run_id human_v0.1 PUM1 1 70 70 70 70 70 70 1 100.0 1 1 8 11.428571428571429 11.428571428571429 14.285714285714285 14.285714285714285 1.0 PUM1_1,PUM1_2,PUM1_3 0,0,1 - - TT41sPJM
+data-id2 method-id2 run_id human_v0.1 PUM2 1 70 70 70 70 70 70 1 100.0 4 4 10 14.285714285714285 14.285714285714285 57.14285714285714 57.14285714285714 1.0 PUM2_1,PUM2_2,PUM2_3,PUM2_4,PUM2_5 1,0,1,1,1 - - -dJ6C0OI
b
diff -r 000000000000 -r 7dd2835ce566 test-data/report.rbpbench_compare.test.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/report.rbpbench_compare.test.html Sun Dec 03 12:51:54 2023 +0000
b
@@ -0,0 +1,98 @@
+<p><head>
+<title>RBPBench - Motif Search Comparison Report</title></p>
+<script src="/home/malong/miniconda3/envs/rbpbench/lib/python3.9/site-packages/rbpbench/content/sorttable.js" type="text/javascript"></script>
+<p></head></p>
+<h1>Comparison Report</h1>
+<p>List of available comparison statistics generated
+by RBPBench (rbpbench compare):</p>
+<ul>
+<li><a href="#method-tab-1">k562_eclip,human_v0.1,PUM1 method comparison table</a></li>
+<li><a href="#method-tab-2">k562_eclip,human_v0.1,PUM2 method comparison table</a></li>
+<li><a href="#method-venn-1">k562_eclip,human_v0.1,PUM1 method comparison plot</a></li>
+<li><a href="#method-venn-2">k562_eclip,human_v0.1,PUM2 method comparison plot</a></li>
+</ul>
+<p>&nbsp;</p>
+<h2 id="method-tab-1">k562_eclip,human_v0.1,PUM1 method comparison statistics</h2>
+<p><strong>Table:</strong> RBP motif hit statistics for combined ID "k562_eclip,human_v0.1,PUM1" (data ID, motif database ID, RBP ID) over different methods (method ID column).</p>
+<table class="sortable">
+<thead>
+<tr>
+<th style="text-align: center;">Method ID</th>
+<th style="text-align: center;"># regions</th>
+<th style="text-align: center;"># motif hits</th>
+<th style="text-align: center;">% regions with motifs</th>
+<th style="text-align: center;">% motif nucleotides</th>
+<th style="text-align: center;"># motif hits per 1000 nt</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td style="text-align: center;">dewseq_w100_s5</td>
+<td style="text-align: center;">23</td>
+<td style="text-align: center;">24</td>
+<td style="text-align: center;">43.48</td>
+<td style="text-align: center;">3.20</td>
+<td style="text-align: center;">4.86</td>
+</tr>
+<tr>
+<td style="text-align: center;">clipper_idr</td>
+<td style="text-align: center;">32</td>
+<td style="text-align: center;">8</td>
+<td style="text-align: center;">18.75</td>
+<td style="text-align: center;">2.69</td>
+<td style="text-align: center;">3.84</td>
+</tr>
+</tbody>
+</table>
+<p>&nbsp;
+&nbsp;</p>
+<p>Column IDs have the following meanings: <strong>Method ID</strong> -&gt; method ID set for dataset (typically peak calling method ID), <strong># regions</strong> -&gt; number of peak regions used for motif search, <strong># motif hits</strong> -&gt; number of unique motif hits in peak regions (removed double counts), <strong>% regions with motifs</strong> -&gt; percentage of peak regions with motif hits, <strong>% motif nucleotides</strong> -&gt; percentage of unique motif nucleotides over effective peak region size (overlapping regions merged), <strong># motif hits per 1000 nt</strong> -&gt; number of motif hits over 1000 nt of called peak region size (overlapping regions NOT merged).</p>
+<h2 id="method-tab-2">k562_eclip,human_v0.1,PUM2 method comparison statistics</h2>
+<p><strong>Table:</strong> RBP motif hit statistics for combined ID "k562_eclip,human_v0.1,PUM2" (data ID, motif database ID, RBP ID) over different methods (method ID column).</p>
+<table class="sortable">
+<thead>
+<tr>
+<th style="text-align: center;">Method ID</th>
+<th style="text-align: center;"># regions</th>
+<th style="text-align: center;"># motif hits</th>
+<th style="text-align: center;">% regions with motifs</th>
+<th style="text-align: center;">% motif nucleotides</th>
+<th style="text-align: center;"># motif hits per 1000 nt</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td style="text-align: center;">dewseq_w100_s5</td>
+<td style="text-align: center;">70</td>
+<td style="text-align: center;">448</td>
+<td style="text-align: center;">92.86</td>
+<td style="text-align: center;">14.08</td>
+<td style="text-align: center;">31.46</td>
+</tr>
+<tr>
+<td style="text-align: center;">clipper_idr</td>
+<td style="text-align: center;">77</td>
+<td style="text-align: center;">219</td>
+<td style="text-align: center;">76.62</td>
+<td style="text-align: center;">17.64</td>
+<td style="text-align: center;">46.89</td>
+</tr>
+</tbody>
+</table>
+<p>&nbsp;
+&nbsp;</p>
+<p>Column IDs have the following meanings: <strong>Method ID</strong> -&gt; method ID set for dataset (typically peak calling method ID), <strong># regions</strong> -&gt; number of peak regions used for motif search, <strong># motif hits</strong> -&gt; number of unique motif hits in peak regions (removed double counts), <strong>% regions with motifs</strong> -&gt; percentage of peak regions with motif hits, <strong>% motif nucleotides</strong> -&gt; percentage of unique motif nucleotides over effective peak region size (overlapping regions merged), <strong># motif hits per 1000 nt</strong> -&gt; number of motif hits over 1000 nt of called peak region size (overlapping regions NOT merged).</p>
+<h2 id="method-venn-1">k562_eclip,human_v0.1,PUM1 method comparison plot</h2>
+<p>Based on the same combined ID "k562_eclip,human_v0.1,PUM1" (data ID, motif database ID, RBP ID), motif hit occurrences for 2 different methods (clipper_idr,dewseq_w100_s5) are compared via Venn diagram.
+Any given motif hit can either be found only by one method, or be identified by any set (&gt;=2) of methods (intersection areas).</p>
+<p><img src="html_report_plots/venn_diagram.method_comp.k562_eclip,human_v0.1,PUM1.png" alt="dataset comparison plot k562_eclip,human_v0.1,PUM1
+title="dataset comparison plot k562_eclip,human_v0.1,PUM1" width="700" /></p>
+<p><strong>Figure:</strong> Venn diagram of motif hit occurrences for 2 different methods (clipper_idr,dewseq_w100_s5) with identical combined ID (k562_eclip,human_v0.1,PUM1) + corresponding percentages of total motif hits for each region (method exclusive and intersection(s)).</p>
+<p>&nbsp;</p>
+<h2 id="method-venn-2">k562_eclip,human_v0.1,PUM2 method comparison plot</h2>
+<p>Based on the same combined ID "k562_eclip,human_v0.1,PUM2" (data ID, motif database ID, RBP ID), motif hit occurrences for 2 different methods (clipper_idr,dewseq_w100_s5) are compared via Venn diagram.
+Any given motif hit can either be found only by one method, or be identified by any set (&gt;=2) of methods (intersection areas).</p>
+<p><img src="html_report_plots/venn_diagram.method_comp.k562_eclip,human_v0.1,PUM2.png" alt="dataset comparison plot k562_eclip,human_v0.1,PUM2
+title="dataset comparison plot k562_eclip,human_v0.1,PUM2" width="700" /></p>
+<p><strong>Figure:</strong> Venn diagram of motif hit occurrences for 2 different methods (clipper_idr,dewseq_w100_s5) with identical combined ID (k562_eclip,human_v0.1,PUM2) + corresponding percentages of total motif hits for each region (method exclusive and intersection(s)).</p>
+<p>&nbsp;</p>
b
diff -r 000000000000 -r 7dd2835ce566 test-data/report.rbpbench_search.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/report.rbpbench_search.html Sun Dec 03 12:51:54 2023 +0000
[
@@ -0,0 +1,83 @@
+<p><head>
+<title>RBPBench - Search Report</title></p>
+<script src="/home/uhlm/Programme/miniconda3/envs/rbpbench/lib/python3.11/site-packages/rbpbench/content/sorttable.js" type="text/javascript"></script>
+<p></head></p>
+<h1>Search report</h1>
+<p>List of available statistics and plots generated
+by RBPBench (rbpbench search --report):</p>
+<ul>
+<li><a href="#rbp-enrich-stats">RBP motif enrichment statistics</a></li>
+<li><a href="#cooc-heat-map">RBP co-occurrences heat map</a></li>
+<li><a href="#corr-heat-map">RBP correlations heat map</a>
+&nbsp;</li>
+</ul>
+<h2 id="rbp-enrich-stats">RBP motif enrichment statistics</h2>
+<p><strong>Table:</strong> RBP motif enrichment statistics. Given a score for each genomic region (# input regions = 1), 
+RBPbench checks whether motifs are enriched 
+in higher-scoring regions (using Wilcoxon rank-sum test). A low Wilcoxon rank-sum test p-value for a given RBP thus indicates 
+that higher-scoring regions are more likely to contain motif hits of the respective RBP. NOTE that if scores associated to 
+input genomic regions are all the same, p-values become meaningless (i.e., they result in p-values of 1.0).</p>
+<table class="sortable">
+<thead>
+<tr>
+<th style="text-align: center;">RBP ID</th>
+<th style="text-align: center;"># hit regions</th>
+<th style="text-align: center;">% hit regions</th>
+<th style="text-align: center;"># motif hits</th>
+<th style="text-align: center;">p-value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td style="text-align: center;">PUM1</td>
+<td style="text-align: center;">1</td>
+<td style="text-align: center;">100.00</td>
+<td style="text-align: center;">1</td>
+<td style="text-align: center;">1.0</td>
+</tr>
+<tr>
+<td style="text-align: center;">PUM2</td>
+<td style="text-align: center;">1</td>
+<td style="text-align: center;">100.00</td>
+<td style="text-align: center;">4</td>
+<td style="text-align: center;">1.0</td>
+</tr>
+</tbody>
+</table>
+<p>&nbsp;
+&nbsp;</p>
+<p>Column IDs have the following meanings: <strong>RBP ID</strong> -&gt; RBP ID from database or user-defined (typically RBP name), <strong># hit regions</strong> -&gt; number of input genomic regions with motif hits (after filtering and optional extension), <strong>% hit regions</strong> -&gt; percentage of hit regions over all regions (i.e. how many input regions contain &gt;= 1 RBP binding motif), <strong># motif hits</strong> -&gt; number of unique motif hits in input regions (removed double counts), <strong>p-value</strong> -&gt; Wilcoxon rank-sum test p-value.</p>
+<h2 id="cooc-heat-map">RBP co-occurrences heat map</h2>
+<p>RBP co-occurrences heat map.</p>
+<div class=class="container-fluid" style="margin-top:40px">
+<iframe src="html_report_plots/co-occurrence_plot.plotly.html" width="1200" height="1200"></iframe>
+</div>
+
+<p><strong>Figure:</strong> Heat map of co-occurrences (Fisher's exact test p-values) between RBPs. 
+Legend color: negative logarithm (base 10) of Fisher's exact test p-value.
+Hover box: 1) RBP1. 2) RBP2. 3) p-value: Fisher's exact test p-value (calculated based on contingency table between RBP1 and RBP2). 
+4) RBPs compaired. 5) Counts[]: Contingency table of co-occurrence counts (i.e., number of genomic regions with/without shared motif hits) between compaired RBPs, 
+with format [[A, B], [C, D]], where 
+A: RBP1 AND RBP2, 
+B: NOT RBP1 AND RBP2
+C: RBP1 AND NOT RBP2
+D: NOT RBP1 AND NOT RBP2. </p>
+<p>&nbsp;</p>
+<h2 id="corr-heat-map">RBP correlations heat map</h2>
+<p>RBP correlations heat map.</p>
+<div class=class="container-fluid" style="margin-top:40px">
+<iframe src="html_report_plots/correlation_plot.plotly.html" width="1200" height="1200"></iframe>
+</div>
+
+<p><strong>Figure:</strong> Heat map of correlations (Pearson correlation coefficients) between RBPs. 
+Genomic regions are labelled 1 or 0 (RBP motif present or not), resulting in a vector of 1s and 0s for each RBP.
+Correlations are then calculated by comparing vectors for every pair of RBPs.
+Legend color: Pearson correlation coefficient. 
+Hover box: 1) RBP1. 2) RBP2.
+3) RBPs compaired. 5) Counts[]: Contingency table of co-occurrence counts (i.e., number of genomic regions with/without shared motif hits) between compaired RBPs, 
+with format [[A, B], [C, D]], where 
+A: RBP1 AND RBP2, 
+B: NOT RBP1 AND RBP2
+C: RBP1 AND NOT RBP2
+D: NOT RBP1 AND NOT RBP2. </p>
+<p>&nbsp;</p>
b
diff -r 000000000000 -r 7dd2835ce566 test-data/test.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test.bed Sun Dec 03 12:51:54 2023 +0000
b
@@ -0,0 +1,1 @@
+chr1 10 80 PUM1_K562_IDR 0 +
b
diff -r 000000000000 -r 7dd2835ce566 test-data/test.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test.fa Sun Dec 03 12:51:54 2023 +0000
b
@@ -0,0 +1,2 @@
+>chr1
+TCTTATTAATACTGGTTGTGATTTGTAGATACTGGCTCTTCTCAGATGAAGTTCCAGGATTATTCATTGAAAAAGGCTGGGTACATGAC
b
diff -r 000000000000 -r 7dd2835ce566 test-data/test.slbp_user.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test.slbp_user.bed Sun Dec 03 12:51:54 2023 +0000
b
@@ -0,0 +1,1 @@
+chr1 50 113 SLBP_K562_IDR 0 +
b
diff -r 000000000000 -r 7dd2835ce566 test-data/test.slbp_user.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test.slbp_user.fa Sun Dec 03 12:51:54 2023 +0000
b
@@ -0,0 +1,2 @@
+>chr1
+AACATCCAGGCTGTGCTACTGCCCAAGAAGACCGAGAGTCACCACAAGGCCAAAGGCAAATAATGTCTCCATAGAATCACTTTCCAATACAACGGCTCTTTTCAGAGCCACCT
b
diff -r 000000000000 -r 7dd2835ce566 test-data/test1.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test1.bed Sun Dec 03 12:51:54 2023 +0000
b
@@ -0,0 +1,1 @@
+chr1 10 80 PUM1_K562_IDR 0 +
b
diff -r 000000000000 -r 7dd2835ce566 test-data/test2.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test2.bed Sun Dec 03 12:51:54 2023 +0000
b
@@ -0,0 +1,1 @@
+chr1 10 80 PUM1_K562_IDR 0 +
b
diff -r 000000000000 -r 7dd2835ce566 test-data/test_custom.info.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_custom.info.txt Sun Dec 03 12:51:54 2023 +0000
b
@@ -0,0 +1,10 @@
+RBP_motif_id RBP_name Motif_type Organism
+PUM1_1 PUM1 meme_xml human
+PUM1_2 PUM1 meme_xml human
+PUM1_3 PUM1 meme_xml human
+PUM2_1 PUM2 meme_xml human
+PUM2_2 PUM2 meme_xml human
+PUM2_3 PUM2 meme_xml human
+PUM2_4 PUM2 meme_xml human
+PUM2_5 PUM2 meme_xml human
+RF00032 SLBP cm human
b
diff -r 000000000000 -r 7dd2835ce566 test-data/test_custom.motif_hits.rbpbench_search.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_custom.motif_hits.rbpbench_search.bed Sun Dec 03 12:51:54 2023 +0000
b
@@ -0,0 +1,6 @@
+chr1 23 31 PUM1,PUM1_3;1;method_id,data_id 0 + 12.8182 4.87e-05 -1.0 -1.0
+chr1 23 31 PUM2,PUM2_1;1;method_id,data_id 0 + 10.404 0.000113 -1.0 -1.0
+chr1 23 31 PUM2,PUM2_3;1;method_id,data_id 0 + 6.20202 0.000212 -1.0 -1.0
+chr1 23 31 PUM2,PUM2_4;1;method_id,data_id 0 + 15.8485 1.64e-05 -1.0 -1.0
+chr1 21 31 PUM2,PUM2_5;1;method_id,data_id 0 + 10.8788 0.000113 -1.0 -1.0
+chr1 28 46 SLBP,RF00032;1;method_id,data_id 0 + 13.8 4.2e-05 -1.0 -1.0
b
diff -r 000000000000 -r 7dd2835ce566 test-data/test_custom.seq_motifs.meme
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_custom.seq_motifs.meme Sun Dec 03 12:51:54 2023 +0000
b
@@ -0,0 +1,99 @@
+MEME version 5
+
+ALPHABET= ACGT
+
+strands: +
+
+Background letter frequencies
+A 0.250000 C 0.250000 G 0.250000 T 0.250000
+
+MOTIF PUM1_1 
+letter-probability matrix: alength= 4 w= 7 nsites= 20 E= 0
+ 0.000000  0.000000  1.000000  0.000000 
+ 0.000000  0.000000  1.000000  0.000000 
+ 0.000000  0.000000  1.000000  0.000000 
+ 0.223900  0.184800  0.212700  0.378600 
+ 0.000000  0.000000  1.000000  0.000000 
+ 0.000000  0.000000  0.835718  0.164282 
+ 0.041300  0.041200  0.876300  0.041200 
+
+MOTIF PUM1_2 
+letter-probability matrix: alength= 4 w= 9 nsites= 20 E= 0
+ 0.000000  0.000000  0.000000  1.000000 
+ 0.000000  0.000000  0.500000  0.500000 
+ 0.000000  0.000000  0.000000  1.000000 
+ 1.000000  0.000000  0.000000  0.000000 
+ 1.000000  0.000000  0.000000  0.000000 
+ 0.000000  0.000000  0.000000  1.000000 
+ 0.500000  0.000000  0.500000  0.000000 
+ 0.000000  0.000000  0.000000  1.000000 
+ 0.000000  0.000000  0.000000  1.000000 
+
+MOTIF PUM1_3 
+letter-probability matrix: alength= 4 w= 8 nsites= 20 E= 0
+ 0.000000  0.000000  0.000000  1.000000 
+ 0.000000  0.000000  1.000000  0.000000 
+ 0.000000  0.000000  0.000000  1.000000 
+ 0.919505  0.080495  0.000000  0.000000 
+ 0.145800  0.458500  0.168500  0.227200 
+ 1.000000  0.000000  0.000000  0.000000 
+ 0.000000  0.000000  0.100031  0.899969 
+ 0.867950  0.132050  0.000000  0.000000 
+
+MOTIF PUM2_1 
+letter-probability matrix: alength= 4 w= 8 nsites= 20 E= 0
+ 0.000000  0.000000  0.000000  1.000000 
+ 0.000000  0.000000  1.000000  0.000000 
+ 0.000000  0.000000  0.000000  1.000000 
+ 1.000000  0.000000  0.000000  0.000000 
+ 0.397900  0.158200  0.026000  0.417900 
+ 1.000000  0.000000  0.000000  0.000000 
+ 0.000000  0.000000  0.000000  1.000000 
+ 0.843493  0.000000  0.000000  0.156507 
+
+MOTIF PUM2_2 
+letter-probability matrix: alength= 4 w= 8 nsites= 20 E= 0
+ 0.000000  0.000000  0.000000  1.000000 
+ 0.000000  0.000000  1.000000  0.000000 
+ 0.000000  0.000000  0.000000  1.000000 
+ 1.000000  0.000000  0.000000  0.000000 
+ 0.000000  1.000000  0.000000  0.000000 
+ 1.000000  0.000000  0.000000  0.000000 
+ 0.000000  0.000000  0.000000  1.000000 
+ 0.000000  1.000000  0.000000  0.000000 
+
+MOTIF PUM2_3 
+letter-probability matrix: alength= 4 w= 8 nsites= 20 E= 0
+ 0.000000  0.000000  0.000000  1.000000 
+ 0.000000  0.000000  1.000000  0.000000 
+ 0.000000  0.000000  0.000000  1.000000 
+ 1.000000  0.000000  0.000000  0.000000 
+ 1.000000  0.000000  0.000000  0.000000 
+ 1.000000  0.000000  0.000000  0.000000 
+ 0.000000  0.000000  0.000000  1.000000 
+ 1.000000  0.000000  0.000000  0.000000 
+
+MOTIF PUM2_4 
+letter-probability matrix: alength= 4 w= 8 nsites= 20 E= 0
+ 0.000000  0.000000  0.000000  1.000000 
+ 0.000000  0.000000  1.000000  0.000000 
+ 0.000000  0.000000  0.000000  1.000000 
+ 1.000000  0.000000  0.000000  0.000000 
+ 0.000000  0.000000  1.000000  0.000000 
+ 1.000000  0.000000  0.000000  0.000000 
+ 0.000000  0.000000  0.000000  1.000000 
+ 1.000000  0.000000  0.000000  0.000000 
+
+MOTIF PUM2_5 
+letter-probability matrix: alength= 4 w= 10 nsites= 20 E= 0
+ 0.253775  0.137486  0.112989  0.495750 
+ 0.270800  0.141400  0.146600  0.441200 
+ 0.000000  0.000000  0.000000  1.000000 
+ 0.072132  0.000000  0.896223  0.031646 
+ 0.000000  0.000000  0.000000  1.000000 
+ 0.968974  0.000000  0.031026  0.000000 
+ 0.232400  0.319800  0.028700  0.419100 
+ 0.932355  0.000000  0.000000  0.067645 
+ 0.121986  0.000000  0.173847  0.704167 
+ 0.517200  0.063500  0.123600  0.295700 
+
b
diff -r 000000000000 -r 7dd2835ce566 test-data/test_custom.str_motifs.cm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_custom.str_motifs.cm Sun Dec 03 12:51:54 2023 +0000
[
b"@@ -0,0 +1,385 @@\n+INFERNAL1/a [1.1.3 | Nov 2019]\n+NAME     Histone3\n+ACC      RF00032\n+DESC     Histone 3' UTR stem-loop\n+STATES   142\n+NODES    42\n+CLEN     46\n+W        57\n+ALPH     RNA\n+RF       no\n+CONS     yes\n+MAP      yes\n+DATE     Fri Apr  4 13:03:46 2014\n+COM      [1] /nfs/production/xfam/rfam/software/bin/cmbuild -F CM SEED\n+COM      [2] /nfs/production/xfam/rfam/software/bin/cmcalibrate --mpi CM\n+PBEGIN   0.05\n+PEND     0.05\n+WBETA    1e-07\n+QDBBETA1 1e-07\n+QDBBETA2 1e-15\n+N2OMEGA  1.52588e-05\n+N3OMEGA  1.52588e-05\n+ELSELF   -0.08926734\n+NSEQ     46\n+EFFN     46.000000\n+CKSUM    471917655\n+NULL     0.000  0.000  0.000  0.000 \n+GA       25.00\n+TC       25.00\n+NC       24.90\n+EFP7GF   -8.9961 0.74543\n+ECMLC    0.73248    -4.63583     3.49505     1600000      463131  0.002591\n+ECMGC    0.50982    -9.05666     1.92502     1600000      108029  0.003703\n+ECMLI    0.86412    -1.32523     4.80439     1600000      239617  0.005008\n+ECMGI    0.55516    -6.80684     2.91667     1600000       88393  0.004525\n+CM\n+                                             [ ROOT    0 ]      -      - - - - -\n+     S     0    -1 0     1     4     1     1    57    76 -10.705 -10.912  -0.004  -9.326                 \n+    IL     1     1 2     1     4     1    19    64    83  -1.686  -2.369  -1.117  -4.855                  0.000  0.000  0.000  0.000 \n+    IR     2     2 3     2     3     1    19    63    81  -1.442  -0.798  -4.142                          0.000  0.000  0.000  0.000 \n+                                             [ MATL    1 ]      1      - c - - -\n+    ML     3     2 3     5     3     1    19    57    76 -11.622  -0.002 -10.276                          0.274  0.676 -1.683 -0.183 \n+     D     4     2 3     5     3     0    15    58    76  -6.174  -1.687  -0.566                         \n+    IL     5     5 3     5     3     1    18    62    80  -1.442  -0.798  -4.142                          0.000  0.000  0.000  0.000 \n+                                             [ MATL    2 ]      2      - c - - -\n+    ML     6     5 3     8     3     1    18    56    75 -11.622  -0.002 -10.276                          0.562  0.685 -1.974 -0.595 \n+     D     7     5 3     8     3     0    15    57    75  -6.174  -1.687  -0.566                         \n+    IL     8     8 3     8     3     1    17    61    79  -1.442  -0.798  -4.142                          0.000  0.000  0.000  0.000 \n+                                             [ MATL    3 ]      3      - A - - -\n+    ML     9     8 3    11     3     1    17    55    74 -11.622  -0.002 -10.276                          1.588 -1.105 -4.684 -1.031 \n+     D    10     8 3    11     3     0    14    56    74  -6.174  -1.687  -0.566                         \n+    IL    11    11 3    11     3     1    17    60    78  -1.442  -0.798  -4.142                          0.000  0.000  0.000  0.000 \n+                                             [ MATL    4 ]      4      - A - - -\n+    ML    12    11 3    14     3     1    16    54    73 -11.622  -0.002 -10.276                          1.035 -0.025 -1.895 -0.517 \n+     D    13    11 3    14     3     0    14    55    73  -6.174  -1.687  -0.566                         \n+    IL    14    14 3    14     3     1    16    59    77  -1.442  -0.798  -4.142                          0.000  0.000  0.000  0.000 \n+                                             [ MATL    5 ]      5      - a - - -\n+    ML    15    14 3    17     3     1    15    53    72 -11.923  -0.687  -1.402                          0.970  0.662 -4.530 -1.266 \n+     D    16    14 3    17     3     0    14    52    71  -5.620  -0.734  -1.403                         \n+    IL    17    17 3    17     3     1    15    55    73  -1.925  -0.554  -4.164                          0.000  0.000  0.000  0.000 \n+                                             [ MATR    6 ]      -     47 - U - -\n+    MR    18    17 3    20     3     1    14    52    71 -11.239  -0.003  -9.556                         -1.280 -3.255 -0.399  1.446 \n"..b'0.01098  4.51736  1.09861  0.40547\n+     25   0.90589  0.99871  2.78941  1.79574     26 a - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.00416  6.17794  6.17794  1.46634  0.26236  1.09861  0.40547\n+     26   0.37936  2.45545  2.76975  1.78857     27 A - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.03553  6.17794  3.41622  1.46634  0.26236  1.09861  0.40547\n+     27   2.00288  0.98595  2.07379  1.00440     28 c - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.00429  6.14670  6.14670  1.46634  0.26236  0.11900  2.18757\n+     28   0.58277  2.57844  2.26067  1.34141     29 A - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.00416  6.17794  6.17794  1.46634  0.26236  1.09861  0.40547\n+     29   2.04103  1.64155  1.24300  0.94698     30 u - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.04224  6.17794  3.23682  1.46634  0.26236  1.09861  0.40547\n+     30   0.90025  2.40010  2.48509  0.86868     31 u - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.00432  6.14002  6.14002  1.46634  0.26236  0.10040  2.34838\n+     31   1.14658  1.52988  1.97258  1.11897     32 u - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.00416  6.17794  6.17794  1.46634  0.26236  1.09861  0.40547\n+     32   0.99477  1.56394  2.34075  1.12508     33 a - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.00416  6.17794  6.17794  1.46634  0.26236  1.09861  0.40547\n+     33   1.30260  0.91111  1.74486  1.88763     34 c - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.00416  6.17794  6.17794  1.46634  0.26236  1.09861  0.40547\n+     34   0.81644  1.95418  2.55798  1.08218     35 a - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.00416  6.17794  6.17794  1.46634  0.26236  1.09861  0.40547\n+     35   0.88707  2.68494  1.54697  1.18084     36 a - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.02155  6.17794  3.95035  1.46634  0.26236  1.09861  0.40547\n+     36   0.88692  1.97884  1.71659  1.30871     37 a - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.00423  6.16062  6.16062  1.46634  0.26236  0.19522  1.72966\n+     37   0.78649  1.68131  1.71782  1.72051     38 a - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.00416  6.17794  6.17794  1.46634  0.26236  1.09861  0.40547\n+     38   0.59588  2.08432  1.60986  2.08251     39 A - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.00416  6.17794  6.17794  1.46634  0.26236  1.09861  0.40547\n+     39   0.76886  1.58017  1.51684  2.19720     40 a - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.02859  6.17794  3.64554  1.46634  0.26236  1.09861  0.40547\n+     40   0.91147  2.90974  1.18115  1.44117     41 a - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.00426  6.15362  6.15362  1.46634  0.26236  0.14750  1.98676\n+     41   1.19023  1.82584  1.84655  0.97555     42 u - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.00416  6.17794  6.17794  1.46634  0.26236  1.09861  0.40547\n+     42   1.35396  2.21010  1.23260  1.07716     43 u - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.00416  6.17794  6.17794  1.46634  0.26236  1.09861  0.40547\n+     43   1.25472  1.71330  1.50591  1.16232     44 u - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.00416  6.17794  6.17794  1.46634  0.26236  1.09861  0.40547\n+     44   1.37043  1.66018  2.98397  0.68259     45 U - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.00416  6.17794  6.17794  1.46634  0.26236  1.09861  0.40547\n+     45   1.15612  1.70216  1.15511  1.67140     46 g - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.47997  6.17794  0.96989  1.46634  0.26236  1.09861  0.40547\n+     46   2.25321  3.55314  1.66807  0.38906     47 U - - :\n+          1.38629  1.38629  1.38629  1.38629\n+          0.00335  5.70132        *  1.46634  0.26236  0.00000        *\n+//\n'
b
diff -r 000000000000 -r 7dd2835ce566 test-data/test_search.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_search.gtf Sun Dec 03 12:51:54 2023 +0000
b
@@ -0,0 +1,11 @@
+chr1 HAVANA gene 21 60 . + . gene_id "g1"; gene_type "protein_coding"; gene_name "GENE1";
+chr1 HAVANA transcript 21 60 . + . gene_id "g1"; transcript_id "g1_t1"; transcript_type "protein_coding"; gene_type "protein_coding"; transcript_name "g1_t1"; level 1; transcript_support_level "2"; tag "basic"; tag "Ensembl_canonical";
+chr1 HAVANA exon 21 60 . + . gene_id "g1"; transcript_id "g1_t1"; gene_type "protein_coding"; gene_name "GENE1"; transcript_type "protein_coding"; transcript_name "g1_t1-202"; exon_number 1; exon_id "g1_t1_e1"; level 1; transcript_support_level "1"; tag "basic"; tag "Ensembl_canonical";
+chr1 HAVANA CDS 31 44 . + . gene_id "g1"; transcript_id "g1_t1"; gene_type "protein_coding"; gene_name "GENE1"; transcript_type "protein_coding"; transcript_name "g1_t1-202"; exon_number 1; exon_id "g1_t1_e1"; level 1; transcript_support_level "1"; tag "basic"; tag "Ensembl_canonical";
+chr1 HAVANA transcript 21 60 . + . gene_id "g1"; transcript_id "g1_t2"; transcript_type "protein_coding"; gene_type "protein_coding"; transcript_name "g1_t2"; level 1; transcript_support_level "1"; tag "basic"; tag "Ensembl_canonical";
+chr1 HAVANA exon 21 60 . + . gene_id "g1"; transcript_id "g1_t2"; gene_type "protein_coding"; gene_name "GENE1"; transcript_type "protein_coding"; transcript_name "g1_t2-202"; exon_number 1; exon_id "g1_t2_e1"; level 1; transcript_support_level "1"; tag "basic"; tag "Ensembl_canonical";
+chr1 HAVANA CDS 31 44 . + . gene_id "g1"; transcript_id "g1_t2"; gene_type "protein_coding"; gene_name "GENE1"; transcript_type "protein_coding"; transcript_name "g1_t2-202"; exon_number 1; exon_id "g1_t2_e1"; level 1; transcript_support_level "1"; tag "basic"; tag "Ensembl_canonical";
+chr1 HAVANA gene 21 60 . - . gene_id "g2"; gene_type "lncRNA"; gene_name "GENE2";
+chr1 HAVANA transcript 20 60 . - . gene_id "g2"; transcript_id "g2_t1"; transcript_type "lncRNA"; gene_type "lncRNA"; transcript_name "g2_t1"; level 1; transcript_support_level "1"; tag "basic"; tag "Ensembl_canonical";
+chr1 HAVANA exon 51 60 . - . gene_id "g2"; transcript_id "g2_t1"; gene_type "lncRNA"; gene_name "GENE1"; transcript_type "lncRNA"; transcript_name "g2_t1-202"; exon_number 1; exon_id "g2_t1_e1"; level 1; transcript_support_level "1"; tag "basic"; tag "Ensembl_canonical";
+chr1 HAVANA exon 21 40 . - . gene_id "g2"; transcript_id "g2_t1"; gene_type "lncRNA"; gene_name "GENE1"; transcript_type "lncRNA"; transcript_name "g2_t1-202"; exon_number 2; exon_id "g2_t1_e1"; level 1; transcript_support_level "1"; tag "basic"; tag "Ensembl_canonical";
b
diff -r 000000000000 -r 7dd2835ce566 test-data/test_search_gtf.region_annotations.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_search_gtf.region_annotations.tsv Sun Dec 03 12:51:54 2023 +0000
b
@@ -0,0 +1,2 @@
+region_id gene_id gene_name transcript_id region_annotation transcript_biotype
+chr1:10-80(+) g1 GENE1 g1_t2 3'UTR protein_coding
b
diff -r 000000000000 -r 7dd2835ce566 test-data/test_table.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_table.txt Sun Dec 03 12:51:54 2023 +0000
b
@@ -0,0 +1,2 @@
+PUM1 mid1 did1 test1.bed
+PUM2 mid2 did2 test2.bed
b
diff -r 000000000000 -r 7dd2835ce566 tool-data/fasta_indexes.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/fasta_indexes.loc.sample Sun Dec 03 12:51:54 2023 +0000
b
@@ -0,0 +1,29 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Samtools indexed sequences data files.  You will need
+#to create these data files and then create a fasta_indexes.loc file
+#similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The fasta_indexes.loc
+#file has this format (white space characters are TAB characters):
+#
+# <unique_build_id>     <dbkey> <display_name>  <file_base_path>
+#
+#So, for example, if you had hg19 Canonical indexed stored in
+#
+# /depot/data2/galaxy/hg19/sam/,
+#
+#then the fasta_indexes.loc entry would look like this:
+#
+#hg19canon      hg19    Human (Homo sapiens): hg19 Canonical    /depot/data2/galaxy/hg19/sam/hg19canon.fa
+#
+#and your /depot/data2/galaxy/hg19/sam/ directory
+#would contain hg19canon.fa and hg19canon.fa.fai files.
+#
+#Your fasta_indexes.loc file should include an entry per line for
+#each index set you have stored.  The file in the path does actually
+#exist, but it should never be directly used. Instead, the name serves
+#as a prefix for the index file.  For example:
+#
+#hg18canon      hg18    Human (Homo sapiens): hg18 Canonical    /depot/data2/galaxy/hg18/sam/hg18canon.fa
+#hg18full       hg18    Human (Homo sapiens): hg18 Full /depot/data2/galaxy/hg18/sam/hg18full.fa
+#hg19canon      hg19    Human (Homo sapiens): hg19 Canonical    /depot/data2/galaxy/hg19/sam/hg19canon.fa
+#hg19full       hg19    Human (Homo sapiens): hg19 Full /depot/data2/galaxy/hg19/sam/hg19full.fa
b
diff -r 000000000000 -r 7dd2835ce566 tool-data/rbp_ids.catrapid.omics.v2.1.human.6plus.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/rbp_ids.catrapid.omics.v2.1.human.6plus.txt Sun Dec 03 12:51:54 2023 +0000
b
@@ -0,0 +1,259 @@
+A1CF
+ACIN1
+ACO1
+ADAR
+AGGF1
+AGO1
+AGO2
+AKAP1
+AKAP8
+ANKHD1
+AUH
+BCCIP
+BOLL
+BUD13
+CAPRIN1
+CBX7
+CDC40
+CELF1
+CELF2
+CELF4
+CELF5
+CELF6
+CNBP
+CNOT4
+CPEB1
+CPEB2
+CPEB4
+CPSF6
+CPSF7
+CSTF2
+CSTF2T
+DAZ3
+DAZAP1
+DDX3X
+DDX6
+DDX19B
+DDX24
+DDX54
+DDX55
+DDX58
+DDX59
+DGCR8
+DKC1
+EFTUD2
+EIF3D
+EIF4A3
+EIF4B
+EIF4G2
+ELAVL1
+ELAVL2
+ELAVL3
+ELAVL4
+ENOX1
+ERI1
+ESRP1
+ESRP2
+EWSR1
+FAM120A
+FASTKD2
+FBL
+FIP1L1
+FMR1
+FTO
+FUBP1
+FUBP3
+FUS
+FXR1
+FXR2
+G3BP1
+G3BP2
+GNL3
+GPKOW
+GRSF1
+GRWD1
+GTF2F1
+HLTF
+HNRNPA0
+HNRNPA1
+HNRNPA1L2
+HNRNPA2B1
+HNRNPA3
+HNRNPAB
+HNRNPC
+HNRNPCL1
+HNRNPD
+HNRNPDL
+HNRNPF
+HNRNPH1
+HNRNPH2
+HNRNPK
+HNRNPL
+HNRNPLL
+HNRNPM
+HNRNPU
+HNRNPUL1
+IFIH1
+IGF2BP1
+IGF2BP2
+IGF2BP3
+IGHMBP2
+ILF2
+ILF3
+KHDRBS1
+KHDRBS2
+KHDRBS3
+KHSRP
+LARP4
+LARP4B
+LIN28A
+LIN28B
+LSM11
+MATR3
+MBNL1
+METAP2
+MSI1
+MSI2
+MTPAP
+NCBP2
+NELFE
+NKRF
+NOL12
+NONO
+NOP56
+NOP58
+NOVA1
+NOVA2
+NPM1
+NSUN2
+NUMA1
+NUP42
+NXF1
+OAS1
+OBI1
+PABPC1
+PABPC3
+PABPC4
+PABPC5
+PABPN1
+PABPN1L
+PARP1
+PCBP1
+PCBP2
+PCBP4
+PPIE
+PPIG
+PPIL4
+PPRC1
+PRPF8
+PRR3
+PTBP1
+PTBP2
+PTBP3
+PUF60
+PUM1
+PUM2
+QKI
+RALY
+RALYL
+RANGAP1
+RBFOX1
+RBFOX2
+RBFOX3
+RBM3
+RBM4
+RBM4B
+RBM5
+RBM6
+RBM8A
+RBM10
+RBM14
+RBM15
+RBM15B
+RBM22
+RBM23
+RBM24
+RBM25
+RBM28
+RBM39
+RBM41
+RBM42
+RBM45
+RBM46
+RBM47
+RBMS1
+RBMS2
+RBMS3
+RBMX
+RBMY1A1
+RC3H1
+TROVE2
+RPS5
+SAFB2
+SAMD4A
+SART3
+SF1
+SF3A3
+SF3B4
+SFPQ
+SLTM
+SMNDC1
+SND1
+SNRNP70
+SNRPA
+SNRPB2
+SOX2
+SRP14
+SRP68
+SRRM4
+SRSF1
+SRSF2
+SRSF3
+SRSF4
+SRSF5
+SRSF6
+SRSF7
+SRSF8
+SRSF9
+SRSF10
+SRSF11
+SSB
+SUB1
+SUGP2
+SUPV3L1
+SYNCRIP
+TAF15
+TARBP2
+TARDBP
+TBRG4
+TIA1
+TIAL1
+TNRC6A
+TRA2A
+TRA2B
+TRNAU1AP
+TUT1
+U2AF1
+U2AF2
+UCHL5
+UNK
+UPF1
+WDR33
+XPO5
+XRCC6
+XRN2
+YBX1
+YBX2
+YBX3
+YTHDC1
+YWHAG
+ZC3H10
+ZCRB1
+ZFP36
+ZFP36L2
+ZNF184
+ZNF326
+ZNF622
+ZNF638
+ZRANB2
+SLBP
b
diff -r 000000000000 -r 7dd2835ce566 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Sun Dec 03 12:51:54 2023 +0000
b
@@ -0,0 +1,12 @@
+<tables>
+    <!-- Locations of built-in genome -->
+    <table name="fasta_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/fasta_indexes.loc" />
+    </table>
+    <!-- IDs table file -->
+    <table name="rbp_ids_table" comment_char="#">
+        <columns>value</columns>
+        <file path="${__HERE__}/tool-data/rbp_ids.catrapid.omics.v2.1.human.6plus.txt" />
+    </table>
+</tables>
\ No newline at end of file
b
diff -r 000000000000 -r 7dd2835ce566 tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Sun Dec 03 12:51:54 2023 +0000
b
@@ -0,0 +1,12 @@
+<tables>
+    <!-- Locations of built-in genome -->
+    <table name="fasta_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="${__HERE__}/test-data/fasta_indexes.loc" />
+    </table>
+    <!-- IDs table file -->
+    <table name="rbp_ids_table" comment_char="#">
+        <columns>value</columns>
+        <file path="${__HERE__}/tool-data/rbp_ids.catrapid.omics.v2.1.human.6plus.txt" />
+    </table>
+</tables>
\ No newline at end of file