diff autogenJB2.py @ 0:53c2be00bb6f draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 0a86c88a95b0d1cc49d84544136de6556b95320f
author bgruening
date Wed, 05 Jun 2024 08:15:49 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/autogenJB2.py	Wed Jun 05 08:15:49 2024 +0000
@@ -0,0 +1,228 @@
+import argparse
+import logging
+import os
+import sys
+
+from jbrowse2 import JbrowseConnector as jbC
+
+
+logging.basicConfig(level=logging.DEBUG)
+log = logging.getLogger("jbrowse")
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="", epilog="")
+    parser.add_argument("--sessName", help="Session name", default="AutoJBrowse")
+    parser.add_argument(
+        "--trackmeta",
+        help="Repeatable 'filename,filext,filepath,[bai/crai path for filesystem bam/cram]' for JBrowse2 tracks",
+        default=[],
+        action="append",
+    )
+    parser.add_argument(
+        "--referencemeta",
+        help="Repeatable 'filename, filext, filepath, ... ,' for JBrowse2 reference tracks - usually only one needed",
+        default=[],
+        action="append",
+    )
+    parser.add_argument(
+        "--pafmeta",
+        help="Repeatable. Each is a 'pafname, filext, filepath, ... ,' for a JBrowse2 paf track",
+        default=[],
+        action="append",
+    )
+    parser.add_argument(
+        "--pafreferencemeta",
+        help="Repeatable. Each is a 'pafname,refpath,refname' Every pafname must have one or more",
+        default=[],
+        action="append",
+    )
+    parser.add_argument(
+        "--jbrowse2path", help="Path to JBrowse2 directory in biocontainer or Conda"
+    )
+    parser.add_argument("--outdir", help="Output directory", required=True)
+    parser.add_argument("--version", "-V", action="version", version="%(prog)s 2.10.2")
+    args = parser.parse_args()
+    sessName = args.sessName
+    default_session_data = {}
+    # --trackmeta $jbrowseme[$key],$jbrowseme[$key].ext,'$key'
+    trackList = [x.strip().split(",") for x in args.trackmeta if x > ""]
+    refList = [x.strip().split(",") for x in args.referencemeta if x > ""]
+    if len(refList) > 0:
+        listgenomes = [f for f in refList if f[1] in ["fasta", "fasta.gz"]]
+        # assume no pafs here
+        if len(listgenomes) > 0:
+            genome_paths = [x[0] for x in listgenomes]
+            genome_names = [x[2] for x in listgenomes]
+            guseuri = []
+            for x in genome_paths:
+                if x.startswith("http://") or x.startswith("https://"):
+                    guseuri.append("yes")
+                else:
+                    guseuri.append("no")
+            jc = jbC(
+                outdir=args.outdir,
+                jbrowse2path=args.jbrowse2path,
+            )
+            genomes = (
+                [
+                    {
+                        "path": x,
+                        "label": genome_names[i],
+                        "useuri": guseuri[i],
+                        "meta": {
+                            "name": genome_names[i],
+                            "dataset_dname": genome_names[i],
+                        },
+                    }
+                    for i, x in enumerate(genome_paths)
+                ],
+            )
+            assref_name = jc.process_genomes(genomes[0])
+            if not default_session_data.get(assref_name, None):
+                default_session_data[assref_name] = {
+                    "tracks": [],
+                    "style": {},
+                    "style_labels": {},
+                    "visibility": {
+                        "default_on": [],
+                        "default_off": [],
+                    },
+                }
+            listtracks = trackList
+            # foo.paf must have a foo_paf.fasta or fasta.gz to match
+            tnames = [x[2] for x in listtracks]
+            texts = [x[1] for x in listtracks]
+            for i, track in enumerate(listtracks):
+                track_conf = {
+                    "trackfiles": [],
+                    "category": "autogenerated",
+                    "assemblyNames": assref_name,
+                }
+                tpath, trext, trackname = track[:3]
+                track_conf["dataset_id"] = trackname
+                useuri = "no"
+                if tpath.startswith("http://") or tpath.startswith("https://"):
+                    useuri = "yes"
+                if trext == "paf":
+                    refname = trackname + "_paf.fasta"
+                    refdat = [x[2] for x in listtracks if x[2] == refname]
+                    if not refdat:
+                        jc.logging.warn(
+                            "No reference file %s corresponding to paf file %s found. Not building - there must be a corresponding fasta for each paf"
+                            % (refname, trackname)
+                        )
+                        sys.exit(3)
+                    else:
+                        track_conf.update(
+                            {
+                                "conf": {
+                                    "options": {
+                                        "paf": {
+                                            "genome": refdat,
+                                            "genome_label": trackname,
+                                        }
+                                    }
+                                }
+                            }
+                        )
+                elif trext == "bam":
+                    ipath = track[3]
+                    if not os.path.exists(ipath):
+                        ipath = os.path.realpath(
+                            os.path.join(jc.outdir, trackname + ".bai")
+                        )
+                        cmd = [
+                            "samtools",
+                            "index",
+                            "-b",
+                            "-o",
+                            ipath,
+                            os.path.realpath(track[0]),
+                        ]
+                        sys.stdout.write("#### calling %s" % " ".join(cmd))
+                        jc.subprocess_check_call(cmd)
+                    track_conf.update(
+                        {"conf": {"options": {"bam": {"bam_index": " %s ~ %s," % (tpath, ipath)}}}}
+                    )
+                elif trext == "cram":
+                    ipath = track[3]
+                    if not os.path.exists(ipath):
+                        ipath = os.path.realpath(
+                            os.path.join("./", trackname + ".crai")
+                        )
+                        cmd = [
+                            "samtools",
+                            "index",
+                            "-c",
+                            "-o",
+                            ipath,
+                            os.path.realpath(track[0]),
+                        ]
+                        jc.subprocess_check_call(cmd)
+                    track_conf.update(
+                        {"conf": {"options": {"cram": {"cram_index": "%s ~ %s," % (tpath, ipath)}}}}
+                    )
+                track_conf["path"] = tpath
+                track_conf["format"] = trext
+                track_conf["name"] = trackname
+                track_conf["label"] = trackname
+                track_conf["trackfiles"].append((tpath, trext, useuri, trackname, {}))
+                keys = jc.process_annotations(track_conf)
+
+                if keys:
+                    for key in keys:
+                        if trext in [
+                            "bigwig",
+                            "gff",
+                            "gff3",
+                            "vcf",
+                            "maf",
+                            "bed",
+                            "hic"
+                        ]:
+                            default_session_data[assref_name]["visibility"]["default_on"].append(key)
+                        else:
+                            default_session_data[assref_name]["visibility"]["default_off"].append(
+                                key
+                            )
+                        if trext in ["gff", "gff3", "bed", "vcf", "maf", "blastxml"]:
+                            ttype = "LinearBasicDisplay"
+                            if trext == "vcf":
+                                ttype = "LinearVariantDisplay"
+                            style_json = {
+                                "type": ttype,
+                                "trackShowLabels": False,
+                                "trackShowDescriptions": False,
+                            }
+                            default_session_data[assref_name]["style"][key] = style_json
+                            default_session_data[assref_name]["tracks"].append(key)
+            # general_data = {
+            # "analytics": root.find("metadata/general/analytics").text,
+            # "primary_color": root.find("metadata/general/primary_color").text,
+            # "secondary_color": root.find("metadata/general/secondary_color").text,
+            # "tertiary_color": root.find("metadata/general/tertiary_color").text,
+            # "quaternary_color": root.find("metadata/general/quaternary_color").text,
+            # "font_size": root.find("metadata/general/font_size").text,
+            # }
+            jc.add_general_configuration({})
+            trackconf = jc.config_json.get("tracks", [])
+            for gnome in jc.genome_names:
+                trackconf += jc.tracksToAdd[gnome]
+            logging.debug(
+                "++++ adding trackconf=%s for gnome %s" % (trackconf, gnome)
+            )
+            jc.config_json["tracks"] = trackconf
+            assconf = jc.config_json.get("assemblies", [])
+            assconf += jc.assemblies
+            jc.config_json["assemblies"] = assconf
+            logging.debug("+++assemblies=%s, gnames=%s" % (assconf, jc.genome_names))
+            jc.write_config()
+            default_session_data.update({"session_name": sessName})
+            track_conf.update(default_session_data)
+            jc.add_default_session(default_session_data)
+            # jc.add_defsess_to_index(default_session_data)
+            # jc.text_index() not sure what broke here.
+    else:
+        sys.stderr.write(
+            "Collection has no suitable trackfiles for autogenJB2 - nothing to process"
+        )