changeset 1:b022c6591515 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/rbpbench commit f4a6b7942386dd6506275bc0ec6ec842bc58d5b0
author rnateam
date Sun, 03 Dec 2023 21:31:37 +0000
parents 7dd2835ce566
children 26c64157456b
files batch_table_wrapper.py tool-data/rbp_ids.catrapid.omics.v2.1.human.6plus.loc.sample tool-data/rbp_ids.catrapid.omics.v2.1.human.6plus.txt tool_data_table_conf.xml.sample tool_data_table_conf.xml.test
diffstat 5 files changed, 261 insertions(+), 503 deletions(-) [+]
line wrap: on
line diff
--- a/batch_table_wrapper.py	Sun Dec 03 12:51:54 2023 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,242 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-import os
-import re
-import subprocess
-
-
-###############################################################################
-
-def setup_argument_parser():
-    """Setup argparse parser."""
-    help_description = """
-    Python wrapper for RBPBench Galaxy wrapper to work with collections of
-    input BED files (i.e. to process them with rbpbench batch).
-    """
-    # Define argument parser.
-    p = argparse.ArgumentParser(add_help=False,
-                                prog="batch_table_wrapper.py",
-                                description=help_description,
-                                formatter_class=argparse.MetavarTypeHelpFormatter)
-
-    # Required arguments.
-    p.add_argument("-h", "--help",
-                   action="help",
-                   help="Print help message")
-    p.add_argument("--table",
-                   dest="in_table",
-                   type=str,
-                   metavar='str',
-                   required=True,
-                   help="Input table file with data ID, method ID, RBP ID and file name (Galaxy element identifier in dataset collection) for each to be processed dataset by rbpbench batch")
-    p.add_argument("--paths",
-                   dest="in_paths",
-                   type=str,
-                   metavar='str',
-                   nargs='+',
-                   required=True,
-                   help="List of Galaxy BED file paths (--files path1 path2 .. )")
-    p.add_argument("--ids",
-                   dest="in_ids",
-                   type=str,
-                   metavar='str',
-                   nargs='+',
-                   required=True,
-                   help="List of Galaxy element identifiers, equal to the BED dataset names in the dataset collection (--ids id1 id2 .. )")
-    p.add_argument("--genome",
-                   dest="in_genome",
-                   type=str,
-                   metavar='str',
-                   required=True,
-                   help="Genomic sequences file (currently supported formats: FASTA)")
-    p.add_argument("--out",
-                   dest="out_folder",
-                   type=str,
-                   metavar='str',
-                   required=True,
-                   help="Batch results output folder")
-    # Optional batch arguments.
-    p.add_argument("--ext",
-                   dest="ext_up_down",
-                   type=str,
-                   metavar='str',
-                   default="0",
-                   help="Up- and downstream extension of --in sites in nucleotides (nt). Set e.g. --ext 30 for 30 nt on both sides, or --ext 20,10 for different up- and downstream extension (default: 0)")
-    p.add_argument("--motif-db",
-                   dest="motif_db",
-                   type=int,
-                   default=1,
-                   choices=[1, 2, 3],
-                   help="Motif database to use. 1: human RBP motifs full (259 RBPs, 605 motifs, human_v0.1), 2: human RBP motifs full (low frequencies not rounded, human_v0.1_no_round), 3: human RBP motifs eCLIP (107 RBPs, 316 motifs, human_eclip_v0.1) (default: 1)")
-    p.add_argument("--fimo-nt-freqs",
-                   dest="fimo_nt_freqs",
-                   type=str,
-                   metavar='str',
-                   default=False,
-                   help="Provide FIMO nucleotide frequencies (FIMO option: --bifile) file (default: use internal frequencies file optimized for human transcripts)")
-    p.add_argument("--fimo-pval",
-                   dest="fimo_pval",
-                   type=float,
-                   metavar='float',
-                   default=0.001,
-                   help="FIMO p-value threshold (FIMO option: --thresh) (default: 0.001)")
-    p.add_argument("--bed-score-col",
-                   dest="bed_score_col",
-                   type=int,
-                   metavar='int',
-                   default=5,
-                   help="--in BED score column used for p-value calculations. BED score can be e.g. log2 fold change or -log10 p-value of the region (default: 5)")
-    p.add_argument("--unstranded",
-                   dest="unstranded",
-                   default=False,
-                   action="store_true",
-                   help="Set if --in BED regions are NOT strand-specific, i.e., to look for motifs on both strands of the provided regions. Note that the two strands of a region will still be counted as one region (change with --unstranded-ct) (default: False)")
-    p.add_argument("--unstranded-ct",
-                   dest="unstranded_ct",
-                   default=False,
-                   action="store_true",
-                   help="Count each --in region twice for RBP hit statistics when --unstranded is enabled. By default, two strands of one region are counted as one region for RBP hit statistics")
-    return p
-
-
-###############################################################################
-
-if __name__ == '__main__':
-
-    parser = setup_argument_parser()
-    args = parser.parse_args()
-
-    assert os.path.exists(args.in_table), "--table file \"%s\" not found" % (args.in_file)
-    assert os.path.exists(args.in_genome), "--genome file \"%s\" not found" % (args.in_genome)
-
-    c_paths = len(args.in_paths)
-    c_ids = len(args.in_ids)
-    assert c_paths == c_ids, "given # paths (--paths) != # ids (--ids) (%i != %i). Please provide one ID for each path" % (c_paths, c_ids)
-
-    """
-    Check given paths and IDs.
-
-    """
-
-    # Paths.
-    paths_dic = {}
-    paths_list = []
-    for path in args.in_paths:
-        assert os.path.exists(path), "--paths %s file not found" % (path)
-        if path not in paths_dic:
-            paths_dic[path] = 1
-        else:
-            assert False, "--paths %s given > 1. Please provide unique paths" % (path)
-        paths_list.append(path)
-
-    # IDs
-    ids_dic = {}
-    ids_list = []
-    for id in args.in_ids:
-        if id not in ids_dic:
-            ids_dic[id] = 1
-        else:
-            assert False, "--ids \"%s\" given > 1. Please provide unique element identifiers (dataset names) inside the dataset collection, in order to unambiguously assign element ID to file path" % (id)
-        ids_list.append(id)
-
-    id2path_dic = {}
-    for idx, id in enumerate(ids_list):
-        path = paths_list[idx]
-        id2path_dic[id] = path
-
-    """
-    Read in table.
-
-    Column format:
-    rbp_id method_id data_id dataset_name
-
-    """
-
-    comb_ids_dic = {}
-    id_collect_dic = {}
-    id_collect_dic["rbp_id"] = []
-    id_collect_dic["method_id"] = []
-    id_collect_dic["data_id"] = []
-    id_collect_dic["set_name"] = []
-    id_collect_dic["path"] = []  # Galaxy file path.
-
-    print("Read in --table ... ")
-
-    with open(args.in_table) as f:
-        for line in f:
-
-            if re.search("^#", line):
-                continue
-
-            cols = line.strip().split("\t")
-
-            assert len(cols) == 4, "line in --table with # cols != 4 (%i) encountered:%s" % (len(cols), line)
-
-            rbp_id = cols[0]
-            method_id = cols[1]
-            data_id = cols[2]
-            set_name = cols[3]
-
-            if rbp_id == "rbp_id":
-                continue
-
-            comb_id = "%s,%s,%s,%s" % (rbp_id, method_id, data_id, set_name)
-
-            if comb_id not in comb_ids_dic:
-                comb_ids_dic[comb_id] = 1
-            else:
-                assert False, "data combination (\"%s\") appears > 1 in --table file. Please provide unique combinations for rbpbench batch calculation" % (comb_id)
-
-            assert set_name in ids_dic, "given dataset name \"%s\" from --table not part of given --ids. Please provide dataset names present in dataset collection" % (set_name)
-
-            id_collect_dic["rbp_id"].append(rbp_id)
-            id_collect_dic["method_id"].append(method_id)
-            id_collect_dic["data_id"].append(data_id)
-            id_collect_dic["set_name"].append(set_name)
-            id_collect_dic["path"].append(id2path_dic[set_name])
-
-    f.closed
-
-    assert id_collect_dic["rbp_id"], "nothing read in from --table. Please provide non-empty table in correct format (columns: rbp_id method_id data_id dataset_name)"
-
-    """
-    Construct RBPBench batch call.
-
-    """
-
-    batch_call = "rbpbench batch"
-    batch_call += " --out %s" % (args.out_folder)
-    batch_call += " --genome %s" % (args.in_genome)
-    batch_call += " --ext %s" % (args.ext_up_down)
-    batch_call += " --motif-db %i" % (args.motif_db)
-    if args.fimo_nt_freqs:
-        batch_call += " --fimo-nt-freqs %s" % (args.fimo_nt_freqs)
-    batch_call += " --fimo-pval %s" % (str(args.fimo_pval))
-    batch_call += " --bed-score-col %i" % (args.bed_score_col)
-    if args.unstranded:
-        batch_call += " --unstranded"
-    if args.unstranded_ct:
-        batch_call += " --unstranded-ct"
-
-    rbp_ids = (" ").join(id_collect_dic["rbp_id"])
-    method_ids = (" ").join(id_collect_dic["method_id"])
-    data_ids = (" ").join(id_collect_dic["data_id"])
-    paths = (" ").join(id_collect_dic["path"])
-
-    batch_call += " --rbp-list %s" % (rbp_ids)
-    batch_call += " --method-list %s" % (method_ids)
-    batch_call += " --data-list %s" % (data_ids)
-    batch_call += " --bed %s" % (paths)
-
-    """
-    Execute RBPBench batch call.
-    """
-
-    print("")
-    print("EXECUTING CALL:\n%s" % (batch_call))
-    output = subprocess.getoutput(batch_call)
-    print("")
-    print("RUN OUTPUT:\n%s" % (output))
-    print("")
-    print("DONE.")
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/rbp_ids.catrapid.omics.v2.1.human.6plus.loc.sample	Sun Dec 03 21:31:37 2023 +0000
@@ -0,0 +1,259 @@
+A1CF
+ACIN1
+ACO1
+ADAR
+AGGF1
+AGO1
+AGO2
+AKAP1
+AKAP8
+ANKHD1
+AUH
+BCCIP
+BOLL
+BUD13
+CAPRIN1
+CBX7
+CDC40
+CELF1
+CELF2
+CELF4
+CELF5
+CELF6
+CNBP
+CNOT4
+CPEB1
+CPEB2
+CPEB4
+CPSF6
+CPSF7
+CSTF2
+CSTF2T
+DAZ3
+DAZAP1
+DDX3X
+DDX6
+DDX19B
+DDX24
+DDX54
+DDX55
+DDX58
+DDX59
+DGCR8
+DKC1
+EFTUD2
+EIF3D
+EIF4A3
+EIF4B
+EIF4G2
+ELAVL1
+ELAVL2
+ELAVL3
+ELAVL4
+ENOX1
+ERI1
+ESRP1
+ESRP2
+EWSR1
+FAM120A
+FASTKD2
+FBL
+FIP1L1
+FMR1
+FTO
+FUBP1
+FUBP3
+FUS
+FXR1
+FXR2
+G3BP1
+G3BP2
+GNL3
+GPKOW
+GRSF1
+GRWD1
+GTF2F1
+HLTF
+HNRNPA0
+HNRNPA1
+HNRNPA1L2
+HNRNPA2B1
+HNRNPA3
+HNRNPAB
+HNRNPC
+HNRNPCL1
+HNRNPD
+HNRNPDL
+HNRNPF
+HNRNPH1
+HNRNPH2
+HNRNPK
+HNRNPL
+HNRNPLL
+HNRNPM
+HNRNPU
+HNRNPUL1
+IFIH1
+IGF2BP1
+IGF2BP2
+IGF2BP3
+IGHMBP2
+ILF2
+ILF3
+KHDRBS1
+KHDRBS2
+KHDRBS3
+KHSRP
+LARP4
+LARP4B
+LIN28A
+LIN28B
+LSM11
+MATR3
+MBNL1
+METAP2
+MSI1
+MSI2
+MTPAP
+NCBP2
+NELFE
+NKRF
+NOL12
+NONO
+NOP56
+NOP58
+NOVA1
+NOVA2
+NPM1
+NSUN2
+NUMA1
+NUP42
+NXF1
+OAS1
+OBI1
+PABPC1
+PABPC3
+PABPC4
+PABPC5
+PABPN1
+PABPN1L
+PARP1
+PCBP1
+PCBP2
+PCBP4
+PPIE
+PPIG
+PPIL4
+PPRC1
+PRPF8
+PRR3
+PTBP1
+PTBP2
+PTBP3
+PUF60
+PUM1
+PUM2
+QKI
+RALY
+RALYL
+RANGAP1
+RBFOX1
+RBFOX2
+RBFOX3
+RBM3
+RBM4
+RBM4B
+RBM5
+RBM6
+RBM8A
+RBM10
+RBM14
+RBM15
+RBM15B
+RBM22
+RBM23
+RBM24
+RBM25
+RBM28
+RBM39
+RBM41
+RBM42
+RBM45
+RBM46
+RBM47
+RBMS1
+RBMS2
+RBMS3
+RBMX
+RBMY1A1
+RC3H1
+TROVE2
+RPS5
+SAFB2
+SAMD4A
+SART3
+SF1
+SF3A3
+SF3B4
+SFPQ
+SLTM
+SMNDC1
+SND1
+SNRNP70
+SNRPA
+SNRPB2
+SOX2
+SRP14
+SRP68
+SRRM4
+SRSF1
+SRSF2
+SRSF3
+SRSF4
+SRSF5
+SRSF6
+SRSF7
+SRSF8
+SRSF9
+SRSF10
+SRSF11
+SSB
+SUB1
+SUGP2
+SUPV3L1
+SYNCRIP
+TAF15
+TARBP2
+TARDBP
+TBRG4
+TIA1
+TIAL1
+TNRC6A
+TRA2A
+TRA2B
+TRNAU1AP
+TUT1
+U2AF1
+U2AF2
+UCHL5
+UNK
+UPF1
+WDR33
+XPO5
+XRCC6
+XRN2
+YBX1
+YBX2
+YBX3
+YTHDC1
+YWHAG
+ZC3H10
+ZCRB1
+ZFP36
+ZFP36L2
+ZNF184
+ZNF326
+ZNF622
+ZNF638
+ZRANB2
+SLBP
\ No newline at end of file
--- a/tool-data/rbp_ids.catrapid.omics.v2.1.human.6plus.txt	Sun Dec 03 12:51:54 2023 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,259 +0,0 @@
-A1CF
-ACIN1
-ACO1
-ADAR
-AGGF1
-AGO1
-AGO2
-AKAP1
-AKAP8
-ANKHD1
-AUH
-BCCIP
-BOLL
-BUD13
-CAPRIN1
-CBX7
-CDC40
-CELF1
-CELF2
-CELF4
-CELF5
-CELF6
-CNBP
-CNOT4
-CPEB1
-CPEB2
-CPEB4
-CPSF6
-CPSF7
-CSTF2
-CSTF2T
-DAZ3
-DAZAP1
-DDX3X
-DDX6
-DDX19B
-DDX24
-DDX54
-DDX55
-DDX58
-DDX59
-DGCR8
-DKC1
-EFTUD2
-EIF3D
-EIF4A3
-EIF4B
-EIF4G2
-ELAVL1
-ELAVL2
-ELAVL3
-ELAVL4
-ENOX1
-ERI1
-ESRP1
-ESRP2
-EWSR1
-FAM120A
-FASTKD2
-FBL
-FIP1L1
-FMR1
-FTO
-FUBP1
-FUBP3
-FUS
-FXR1
-FXR2
-G3BP1
-G3BP2
-GNL3
-GPKOW
-GRSF1
-GRWD1
-GTF2F1
-HLTF
-HNRNPA0
-HNRNPA1
-HNRNPA1L2
-HNRNPA2B1
-HNRNPA3
-HNRNPAB
-HNRNPC
-HNRNPCL1
-HNRNPD
-HNRNPDL
-HNRNPF
-HNRNPH1
-HNRNPH2
-HNRNPK
-HNRNPL
-HNRNPLL
-HNRNPM
-HNRNPU
-HNRNPUL1
-IFIH1
-IGF2BP1
-IGF2BP2
-IGF2BP3
-IGHMBP2
-ILF2
-ILF3
-KHDRBS1
-KHDRBS2
-KHDRBS3
-KHSRP
-LARP4
-LARP4B
-LIN28A
-LIN28B
-LSM11
-MATR3
-MBNL1
-METAP2
-MSI1
-MSI2
-MTPAP
-NCBP2
-NELFE
-NKRF
-NOL12
-NONO
-NOP56
-NOP58
-NOVA1
-NOVA2
-NPM1
-NSUN2
-NUMA1
-NUP42
-NXF1
-OAS1
-OBI1
-PABPC1
-PABPC3
-PABPC4
-PABPC5
-PABPN1
-PABPN1L
-PARP1
-PCBP1
-PCBP2
-PCBP4
-PPIE
-PPIG
-PPIL4
-PPRC1
-PRPF8
-PRR3
-PTBP1
-PTBP2
-PTBP3
-PUF60
-PUM1
-PUM2
-QKI
-RALY
-RALYL
-RANGAP1
-RBFOX1
-RBFOX2
-RBFOX3
-RBM3
-RBM4
-RBM4B
-RBM5
-RBM6
-RBM8A
-RBM10
-RBM14
-RBM15
-RBM15B
-RBM22
-RBM23
-RBM24
-RBM25
-RBM28
-RBM39
-RBM41
-RBM42
-RBM45
-RBM46
-RBM47
-RBMS1
-RBMS2
-RBMS3
-RBMX
-RBMY1A1
-RC3H1
-TROVE2
-RPS5
-SAFB2
-SAMD4A
-SART3
-SF1
-SF3A3
-SF3B4
-SFPQ
-SLTM
-SMNDC1
-SND1
-SNRNP70
-SNRPA
-SNRPB2
-SOX2
-SRP14
-SRP68
-SRRM4
-SRSF1
-SRSF2
-SRSF3
-SRSF4
-SRSF5
-SRSF6
-SRSF7
-SRSF8
-SRSF9
-SRSF10
-SRSF11
-SSB
-SUB1
-SUGP2
-SUPV3L1
-SYNCRIP
-TAF15
-TARBP2
-TARDBP
-TBRG4
-TIA1
-TIAL1
-TNRC6A
-TRA2A
-TRA2B
-TRNAU1AP
-TUT1
-U2AF1
-U2AF2
-UCHL5
-UNK
-UPF1
-WDR33
-XPO5
-XRCC6
-XRN2
-YBX1
-YBX2
-YBX3
-YTHDC1
-YWHAG
-ZC3H10
-ZCRB1
-ZFP36
-ZFP36L2
-ZNF184
-ZNF326
-ZNF622
-ZNF638
-ZRANB2
-SLBP
--- a/tool_data_table_conf.xml.sample	Sun Dec 03 12:51:54 2023 +0000
+++ b/tool_data_table_conf.xml.sample	Sun Dec 03 21:31:37 2023 +0000
@@ -7,6 +7,6 @@
     <!-- IDs table file -->
     <table name="rbp_ids_table" comment_char="#">
         <columns>value</columns>
-        <file path="${__HERE__}/tool-data/rbp_ids.catrapid.omics.v2.1.human.6plus.txt" />
+        <file path="tool-data/rbp_ids.catrapid.omics.v2.1.human.6plus.loc" />
     </table>
 </tables>
\ No newline at end of file
--- a/tool_data_table_conf.xml.test	Sun Dec 03 12:51:54 2023 +0000
+++ b/tool_data_table_conf.xml.test	Sun Dec 03 21:31:37 2023 +0000
@@ -7,6 +7,6 @@
     <!-- IDs table file -->
     <table name="rbp_ids_table" comment_char="#">
         <columns>value</columns>
-        <file path="${__HERE__}/tool-data/rbp_ids.catrapid.omics.v2.1.human.6plus.txt" />
+        <file path="${__HERE__}/tool-data/rbp_ids.catrapid.omics.v2.1.human.6plus.loc.sample" />
     </table>
 </tables>
\ No newline at end of file