Repository 'rbpbench'
hg clone https://toolshed.g2.bx.psu.edu/repos/rnateam/rbpbench

Changeset 1:b022c6591515 (2023-12-03)
Previous changeset 0:7dd2835ce566 (2023-12-03) Next changeset 2:26c64157456b (2023-12-11)
Commit message:
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/rbpbench commit f4a6b7942386dd6506275bc0ec6ec842bc58d5b0
modified:
tool_data_table_conf.xml.sample
tool_data_table_conf.xml.test
added:
tool-data/rbp_ids.catrapid.omics.v2.1.human.6plus.loc.sample
removed:
batch_table_wrapper.py
tool-data/rbp_ids.catrapid.omics.v2.1.human.6plus.txt
b
diff -r 7dd2835ce566 -r b022c6591515 batch_table_wrapper.py
--- a/batch_table_wrapper.py Sun Dec 03 12:51:54 2023 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,242 +0,0 @@\n-#!/usr/bin/env python3\n-\n-import argparse\n-import os\n-import re\n-import subprocess\n-\n-\n-###############################################################################\n-\n-def setup_argument_parser():\n-    """Setup argparse parser."""\n-    help_description = """\n-    Python wrapper for RBPBench Galaxy wrapper to work with collections of\n-    input BED files (i.e. to process them with rbpbench batch).\n-    """\n-    # Define argument parser.\n-    p = argparse.ArgumentParser(add_help=False,\n-                                prog="batch_table_wrapper.py",\n-                                description=help_description,\n-                                formatter_class=argparse.MetavarTypeHelpFormatter)\n-\n-    # Required arguments.\n-    p.add_argument("-h", "--help",\n-                   action="help",\n-                   help="Print help message")\n-    p.add_argument("--table",\n-                   dest="in_table",\n-                   type=str,\n-                   metavar=\'str\',\n-                   required=True,\n-                   help="Input table file with data ID, method ID, RBP ID and file name (Galaxy element identifier in dataset collection) for each to be processed dataset by rbpbench batch")\n-    p.add_argument("--paths",\n-                   dest="in_paths",\n-                   type=str,\n-                   metavar=\'str\',\n-                   nargs=\'+\',\n-                   required=True,\n-                   help="List of Galaxy BED file paths (--files path1 path2 .. )")\n-    p.add_argument("--ids",\n-                   dest="in_ids",\n-                   type=str,\n-                   metavar=\'str\',\n-                   nargs=\'+\',\n-                   required=True,\n-                   help="List of Galaxy element identifiers, equal to the BED dataset names in the dataset collection (--ids id1 id2 .. )")\n-    p.add_argument("--genome",\n-                   dest="in_genome",\n-                   type=str,\n-                   metavar=\'str\',\n-                   required=True,\n-                   help="Genomic sequences file (currently supported formats: FASTA)")\n-    p.add_argument("--out",\n-                   dest="out_folder",\n-                   type=str,\n-                   metavar=\'str\',\n-                   required=True,\n-                   help="Batch results output folder")\n-    # Optional batch arguments.\n-    p.add_argument("--ext",\n-                   dest="ext_up_down",\n-                   type=str,\n-                   metavar=\'str\',\n-                   default="0",\n-                   help="Up- and downstream extension of --in sites in nucleotides (nt). Set e.g. --ext 30 for 30 nt on both sides, or --ext 20,10 for different up- and downstream extension (default: 0)")\n-    p.add_argument("--motif-db",\n-                   dest="motif_db",\n-                   type=int,\n-                   default=1,\n-                   choices=[1, 2, 3],\n-                   help="Motif database to use. 1: human RBP motifs full (259 RBPs, 605 motifs, human_v0.1), 2: human RBP motifs full (low frequencies not rounded, human_v0.1_no_round), 3: human RBP motifs eCLIP (107 RBPs, 316 motifs, human_eclip_v0.1) (default: 1)")\n-    p.add_argument("--fimo-nt-freqs",\n-                   dest="fimo_nt_freqs",\n-                   type=str,\n-                   metavar=\'str\',\n-                   default=False,\n-                   help="Provide FIMO nucleotide frequencies (FIMO option: --bifile) file (default: use internal frequencies file optimized for human transcripts)")\n-    p.add_argument("--fimo-pval",\n-                   dest="fimo_pval",\n-                   type=float,\n-                   metavar=\'float\',\n-                   default=0.001,\n-                   help="FIMO p-value threshold (FIMO option: --thresh) (default: 0.001)")\n-    p.add_argument("--bed-score-col",\n-                   dest="bed_score_col",\n-                   type=int,\n-                   metavar=\'int\',\n-                   default=5,\n-                   help="--in'..b'rt os.path.exists(path), "--paths %s file not found" % (path)\n-        if path not in paths_dic:\n-            paths_dic[path] = 1\n-        else:\n-            assert False, "--paths %s given > 1. Please provide unique paths" % (path)\n-        paths_list.append(path)\n-\n-    # IDs\n-    ids_dic = {}\n-    ids_list = []\n-    for id in args.in_ids:\n-        if id not in ids_dic:\n-            ids_dic[id] = 1\n-        else:\n-            assert False, "--ids \\"%s\\" given > 1. Please provide unique element identifiers (dataset names) inside the dataset collection, in order to unambiguously assign element ID to file path" % (id)\n-        ids_list.append(id)\n-\n-    id2path_dic = {}\n-    for idx, id in enumerate(ids_list):\n-        path = paths_list[idx]\n-        id2path_dic[id] = path\n-\n-    """\n-    Read in table.\n-\n-    Column format:\n-    rbp_id method_id data_id dataset_name\n-\n-    """\n-\n-    comb_ids_dic = {}\n-    id_collect_dic = {}\n-    id_collect_dic["rbp_id"] = []\n-    id_collect_dic["method_id"] = []\n-    id_collect_dic["data_id"] = []\n-    id_collect_dic["set_name"] = []\n-    id_collect_dic["path"] = []  # Galaxy file path.\n-\n-    print("Read in --table ... ")\n-\n-    with open(args.in_table) as f:\n-        for line in f:\n-\n-            if re.search("^#", line):\n-                continue\n-\n-            cols = line.strip().split("\\t")\n-\n-            assert len(cols) == 4, "line in --table with # cols != 4 (%i) encountered:%s" % (len(cols), line)\n-\n-            rbp_id = cols[0]\n-            method_id = cols[1]\n-            data_id = cols[2]\n-            set_name = cols[3]\n-\n-            if rbp_id == "rbp_id":\n-                continue\n-\n-            comb_id = "%s,%s,%s,%s" % (rbp_id, method_id, data_id, set_name)\n-\n-            if comb_id not in comb_ids_dic:\n-                comb_ids_dic[comb_id] = 1\n-            else:\n-                assert False, "data combination (\\"%s\\") appears > 1 in --table file. Please provide unique combinations for rbpbench batch calculation" % (comb_id)\n-\n-            assert set_name in ids_dic, "given dataset name \\"%s\\" from --table not part of given --ids. Please provide dataset names present in dataset collection" % (set_name)\n-\n-            id_collect_dic["rbp_id"].append(rbp_id)\n-            id_collect_dic["method_id"].append(method_id)\n-            id_collect_dic["data_id"].append(data_id)\n-            id_collect_dic["set_name"].append(set_name)\n-            id_collect_dic["path"].append(id2path_dic[set_name])\n-\n-    f.closed\n-\n-    assert id_collect_dic["rbp_id"], "nothing read in from --table. Please provide non-empty table in correct format (columns: rbp_id method_id data_id dataset_name)"\n-\n-    """\n-    Construct RBPBench batch call.\n-\n-    """\n-\n-    batch_call = "rbpbench batch"\n-    batch_call += " --out %s" % (args.out_folder)\n-    batch_call += " --genome %s" % (args.in_genome)\n-    batch_call += " --ext %s" % (args.ext_up_down)\n-    batch_call += " --motif-db %i" % (args.motif_db)\n-    if args.fimo_nt_freqs:\n-        batch_call += " --fimo-nt-freqs %s" % (args.fimo_nt_freqs)\n-    batch_call += " --fimo-pval %s" % (str(args.fimo_pval))\n-    batch_call += " --bed-score-col %i" % (args.bed_score_col)\n-    if args.unstranded:\n-        batch_call += " --unstranded"\n-    if args.unstranded_ct:\n-        batch_call += " --unstranded-ct"\n-\n-    rbp_ids = (" ").join(id_collect_dic["rbp_id"])\n-    method_ids = (" ").join(id_collect_dic["method_id"])\n-    data_ids = (" ").join(id_collect_dic["data_id"])\n-    paths = (" ").join(id_collect_dic["path"])\n-\n-    batch_call += " --rbp-list %s" % (rbp_ids)\n-    batch_call += " --method-list %s" % (method_ids)\n-    batch_call += " --data-list %s" % (data_ids)\n-    batch_call += " --bed %s" % (paths)\n-\n-    """\n-    Execute RBPBench batch call.\n-    """\n-\n-    print("")\n-    print("EXECUTING CALL:\\n%s" % (batch_call))\n-    output = subprocess.getoutput(batch_call)\n-    print("")\n-    print("RUN OUTPUT:\\n%s" % (output))\n-    print("")\n-    print("DONE.")\n'
b
diff -r 7dd2835ce566 -r b022c6591515 tool-data/rbp_ids.catrapid.omics.v2.1.human.6plus.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/rbp_ids.catrapid.omics.v2.1.human.6plus.loc.sample Sun Dec 03 21:31:37 2023 +0000
b
@@ -0,0 +1,259 @@
+A1CF
+ACIN1
+ACO1
+ADAR
+AGGF1
+AGO1
+AGO2
+AKAP1
+AKAP8
+ANKHD1
+AUH
+BCCIP
+BOLL
+BUD13
+CAPRIN1
+CBX7
+CDC40
+CELF1
+CELF2
+CELF4
+CELF5
+CELF6
+CNBP
+CNOT4
+CPEB1
+CPEB2
+CPEB4
+CPSF6
+CPSF7
+CSTF2
+CSTF2T
+DAZ3
+DAZAP1
+DDX3X
+DDX6
+DDX19B
+DDX24
+DDX54
+DDX55
+DDX58
+DDX59
+DGCR8
+DKC1
+EFTUD2
+EIF3D
+EIF4A3
+EIF4B
+EIF4G2
+ELAVL1
+ELAVL2
+ELAVL3
+ELAVL4
+ENOX1
+ERI1
+ESRP1
+ESRP2
+EWSR1
+FAM120A
+FASTKD2
+FBL
+FIP1L1
+FMR1
+FTO
+FUBP1
+FUBP3
+FUS
+FXR1
+FXR2
+G3BP1
+G3BP2
+GNL3
+GPKOW
+GRSF1
+GRWD1
+GTF2F1
+HLTF
+HNRNPA0
+HNRNPA1
+HNRNPA1L2
+HNRNPA2B1
+HNRNPA3
+HNRNPAB
+HNRNPC
+HNRNPCL1
+HNRNPD
+HNRNPDL
+HNRNPF
+HNRNPH1
+HNRNPH2
+HNRNPK
+HNRNPL
+HNRNPLL
+HNRNPM
+HNRNPU
+HNRNPUL1
+IFIH1
+IGF2BP1
+IGF2BP2
+IGF2BP3
+IGHMBP2
+ILF2
+ILF3
+KHDRBS1
+KHDRBS2
+KHDRBS3
+KHSRP
+LARP4
+LARP4B
+LIN28A
+LIN28B
+LSM11
+MATR3
+MBNL1
+METAP2
+MSI1
+MSI2
+MTPAP
+NCBP2
+NELFE
+NKRF
+NOL12
+NONO
+NOP56
+NOP58
+NOVA1
+NOVA2
+NPM1
+NSUN2
+NUMA1
+NUP42
+NXF1
+OAS1
+OBI1
+PABPC1
+PABPC3
+PABPC4
+PABPC5
+PABPN1
+PABPN1L
+PARP1
+PCBP1
+PCBP2
+PCBP4
+PPIE
+PPIG
+PPIL4
+PPRC1
+PRPF8
+PRR3
+PTBP1
+PTBP2
+PTBP3
+PUF60
+PUM1
+PUM2
+QKI
+RALY
+RALYL
+RANGAP1
+RBFOX1
+RBFOX2
+RBFOX3
+RBM3
+RBM4
+RBM4B
+RBM5
+RBM6
+RBM8A
+RBM10
+RBM14
+RBM15
+RBM15B
+RBM22
+RBM23
+RBM24
+RBM25
+RBM28
+RBM39
+RBM41
+RBM42
+RBM45
+RBM46
+RBM47
+RBMS1
+RBMS2
+RBMS3
+RBMX
+RBMY1A1
+RC3H1
+TROVE2
+RPS5
+SAFB2
+SAMD4A
+SART3
+SF1
+SF3A3
+SF3B4
+SFPQ
+SLTM
+SMNDC1
+SND1
+SNRNP70
+SNRPA
+SNRPB2
+SOX2
+SRP14
+SRP68
+SRRM4
+SRSF1
+SRSF2
+SRSF3
+SRSF4
+SRSF5
+SRSF6
+SRSF7
+SRSF8
+SRSF9
+SRSF10
+SRSF11
+SSB
+SUB1
+SUGP2
+SUPV3L1
+SYNCRIP
+TAF15
+TARBP2
+TARDBP
+TBRG4
+TIA1
+TIAL1
+TNRC6A
+TRA2A
+TRA2B
+TRNAU1AP
+TUT1
+U2AF1
+U2AF2
+UCHL5
+UNK
+UPF1
+WDR33
+XPO5
+XRCC6
+XRN2
+YBX1
+YBX2
+YBX3
+YTHDC1
+YWHAG
+ZC3H10
+ZCRB1
+ZFP36
+ZFP36L2
+ZNF184
+ZNF326
+ZNF622
+ZNF638
+ZRANB2
+SLBP
\ No newline at end of file
b
diff -r 7dd2835ce566 -r b022c6591515 tool-data/rbp_ids.catrapid.omics.v2.1.human.6plus.txt
--- a/tool-data/rbp_ids.catrapid.omics.v2.1.human.6plus.txt Sun Dec 03 12:51:54 2023 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,259 +0,0 @@
-A1CF
-ACIN1
-ACO1
-ADAR
-AGGF1
-AGO1
-AGO2
-AKAP1
-AKAP8
-ANKHD1
-AUH
-BCCIP
-BOLL
-BUD13
-CAPRIN1
-CBX7
-CDC40
-CELF1
-CELF2
-CELF4
-CELF5
-CELF6
-CNBP
-CNOT4
-CPEB1
-CPEB2
-CPEB4
-CPSF6
-CPSF7
-CSTF2
-CSTF2T
-DAZ3
-DAZAP1
-DDX3X
-DDX6
-DDX19B
-DDX24
-DDX54
-DDX55
-DDX58
-DDX59
-DGCR8
-DKC1
-EFTUD2
-EIF3D
-EIF4A3
-EIF4B
-EIF4G2
-ELAVL1
-ELAVL2
-ELAVL3
-ELAVL4
-ENOX1
-ERI1
-ESRP1
-ESRP2
-EWSR1
-FAM120A
-FASTKD2
-FBL
-FIP1L1
-FMR1
-FTO
-FUBP1
-FUBP3
-FUS
-FXR1
-FXR2
-G3BP1
-G3BP2
-GNL3
-GPKOW
-GRSF1
-GRWD1
-GTF2F1
-HLTF
-HNRNPA0
-HNRNPA1
-HNRNPA1L2
-HNRNPA2B1
-HNRNPA3
-HNRNPAB
-HNRNPC
-HNRNPCL1
-HNRNPD
-HNRNPDL
-HNRNPF
-HNRNPH1
-HNRNPH2
-HNRNPK
-HNRNPL
-HNRNPLL
-HNRNPM
-HNRNPU
-HNRNPUL1
-IFIH1
-IGF2BP1
-IGF2BP2
-IGF2BP3
-IGHMBP2
-ILF2
-ILF3
-KHDRBS1
-KHDRBS2
-KHDRBS3
-KHSRP
-LARP4
-LARP4B
-LIN28A
-LIN28B
-LSM11
-MATR3
-MBNL1
-METAP2
-MSI1
-MSI2
-MTPAP
-NCBP2
-NELFE
-NKRF
-NOL12
-NONO
-NOP56
-NOP58
-NOVA1
-NOVA2
-NPM1
-NSUN2
-NUMA1
-NUP42
-NXF1
-OAS1
-OBI1
-PABPC1
-PABPC3
-PABPC4
-PABPC5
-PABPN1
-PABPN1L
-PARP1
-PCBP1
-PCBP2
-PCBP4
-PPIE
-PPIG
-PPIL4
-PPRC1
-PRPF8
-PRR3
-PTBP1
-PTBP2
-PTBP3
-PUF60
-PUM1
-PUM2
-QKI
-RALY
-RALYL
-RANGAP1
-RBFOX1
-RBFOX2
-RBFOX3
-RBM3
-RBM4
-RBM4B
-RBM5
-RBM6
-RBM8A
-RBM10
-RBM14
-RBM15
-RBM15B
-RBM22
-RBM23
-RBM24
-RBM25
-RBM28
-RBM39
-RBM41
-RBM42
-RBM45
-RBM46
-RBM47
-RBMS1
-RBMS2
-RBMS3
-RBMX
-RBMY1A1
-RC3H1
-TROVE2
-RPS5
-SAFB2
-SAMD4A
-SART3
-SF1
-SF3A3
-SF3B4
-SFPQ
-SLTM
-SMNDC1
-SND1
-SNRNP70
-SNRPA
-SNRPB2
-SOX2
-SRP14
-SRP68
-SRRM4
-SRSF1
-SRSF2
-SRSF3
-SRSF4
-SRSF5
-SRSF6
-SRSF7
-SRSF8
-SRSF9
-SRSF10
-SRSF11
-SSB
-SUB1
-SUGP2
-SUPV3L1
-SYNCRIP
-TAF15
-TARBP2
-TARDBP
-TBRG4
-TIA1
-TIAL1
-TNRC6A
-TRA2A
-TRA2B
-TRNAU1AP
-TUT1
-U2AF1
-U2AF2
-UCHL5
-UNK
-UPF1
-WDR33
-XPO5
-XRCC6
-XRN2
-YBX1
-YBX2
-YBX3
-YTHDC1
-YWHAG
-ZC3H10
-ZCRB1
-ZFP36
-ZFP36L2
-ZNF184
-ZNF326
-ZNF622
-ZNF638
-ZRANB2
-SLBP
b
diff -r 7dd2835ce566 -r b022c6591515 tool_data_table_conf.xml.sample
--- a/tool_data_table_conf.xml.sample Sun Dec 03 12:51:54 2023 +0000
+++ b/tool_data_table_conf.xml.sample Sun Dec 03 21:31:37 2023 +0000
b
@@ -7,6 +7,6 @@
     <!-- IDs table file -->
     <table name="rbp_ids_table" comment_char="#">
         <columns>value</columns>
-        <file path="${__HERE__}/tool-data/rbp_ids.catrapid.omics.v2.1.human.6plus.txt" />
+        <file path="tool-data/rbp_ids.catrapid.omics.v2.1.human.6plus.loc" />
     </table>
 </tables>
\ No newline at end of file
b
diff -r 7dd2835ce566 -r b022c6591515 tool_data_table_conf.xml.test
--- a/tool_data_table_conf.xml.test Sun Dec 03 12:51:54 2023 +0000
+++ b/tool_data_table_conf.xml.test Sun Dec 03 21:31:37 2023 +0000
b
@@ -7,6 +7,6 @@
     <!-- IDs table file -->
     <table name="rbp_ids_table" comment_char="#">
         <columns>value</columns>
-        <file path="${__HERE__}/tool-data/rbp_ids.catrapid.omics.v2.1.human.6plus.txt" />
+        <file path="${__HERE__}/tool-data/rbp_ids.catrapid.omics.v2.1.human.6plus.loc.sample" />
     </table>
 </tables>
\ No newline at end of file