diff autogenJB2.py @ 35:15da358c3108 draft

planemo upload for repository https://github.com/usegalaxy-eu/temporary-tools/tree/master/jbrowse2 commit 80b849766a962bac4bd0bb8cb69c118cc42699cd-dirty
author fubar
date Wed, 28 Feb 2024 10:08:57 +0000
parents 8f02a84ee278
children 5f39f745682f
line wrap: on
line diff
--- a/autogenJB2.py	Sun Feb 25 04:18:53 2024 +0000
+++ b/autogenJB2.py	Wed Feb 28 10:08:57 2024 +0000
@@ -1,65 +1,66 @@
 import argparse
-import re
+import logging
 import sys
 
-from jbrowse2 import jbrowseConnector as jbC
+from jbrowse2 import JbrowseConnector as jbC
+
+logging.basicConfig(level=logging.debug)
+log = logging.getLogger("jbrowse")
 
 
-def makeDefaultLocation(jc, defLoc=None):
-
-    refName = None
-    drdict = {
-        "reversed": False,
-        "assemblyName": jc.genome_name,
-        "start": 0,
-        "end": 100000,
-    }
+def makeDefaultLocation():
 
-    if defLoc:
-        loc_match = re.search(r"^([^:]+):([\d,]*)\.*([\d,]*)$", defLoc)
-        # allow commas like 100,000 but ignore as integer
-        if loc_match:
-            refName = loc_match.group(1)
-            drdict["refName"] = refName
-            if loc_match.group(2) > "":
-                drdict["start"] = int(loc_match.group(2).replace(",", ""))
-            if loc_match.group(3) > "":
-                drdict["end"] = int(loc_match.group(3).replace(",", ""))
-        else:
-            jc.logging.info(
-                "@@@ regexp could not match contig:start..end in the supplied location %s - please fix"
-                % defLoc
-            )
-    else:
-        drdict["refName"] = jc.genome_firstcontig
-    if drdict.get("refName", None):
-        jc.logging.info("@@@ defaultlocation %s for default session" % drdict)
-        return drdict
-    else:
-        jc.logging.info("@@@ no contig name found for default session - please add one!")
-        return None
+    refName = jc.genome_firstcontig
+    defloc  = "%s:100..10000" % refName
+    print ('defloc',defloc)
+    return defloc
 
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="", epilog="")
-    parser.add_argument("--sessname", help="Session name", default="AutoJBrowse")
+    parser.add_argument("--sessName", help="Session name", default="AutoJBrowse")
+    parser.add_argument(
+        "--trackmeta",
+        help="Repeatable of 'filename, filext,filepath, ... ,' for JBrowse2 tracks",
+        default=[],
+        action="append",
+    )
     parser.add_argument(
-        "--collection",
-        help="Collection of 'filepath, filename, filext' for JBrowse2",
+        "--referencemeta",
+        help="Repeatable 'filename, filext, filepath, ... ,' for JBrowse2 reference tracks",
         default=[],
-        action="extend",
+        action="append",
+    )
+    parser.add_argument(
+        "--pafmeta",
+        help="Repeatable. Each is a 'filename, filext, filepath, ... ,' for a JBrowse2 paf track",
+        default=[],
+        action="append",
     )
-    parser.add_argument("--version", "-V", action="version", version="%(prog)s 0.0.1")
+    parser.add_argument(
+        "--pafreferencemeta",
+        help="Repeatable. Each is a 'pafname,filepath,refname,filepath,refname....'. Every paf must have a corresponding one ",
+        default=[],
+        action="append",
+    )
+
+    parser.add_argument("--version", "-V", action="version", version="%(prog)s 2.10.2")
+    parser.add_argument("--outdir", help="Output directory", required=True)
     args = parser.parse_args()
-    sessName = args.sessname
-    flistList = [x.split(",") for x in args.collection]
-    if flistList:
-        listgenomes = [f for f in flistList if f[0].startswith("REFERENCE_")]
+    sessName = args.sessName
+    # --trackmeta $jbrowseme[$key],$jbrowseme[$key].ext,'$key'
+    trackList = [x.strip().split(",") for x in args.trackmeta if x > '']
+    refList = [x.strip().split(",") for x in args.referencemeta if x > '']
+    print("tracklist = %s\nreflist = %s" % (trackList,refList))
+    if len(refList) > 0:
+        listgenomes = [f for f in refList if f[1] in ['fasta', 'fasta.gz']]
+        # assume no pafs here
+        print('genomes=%s' % listgenomes)
         if len(listgenomes) > 0:
             genome_paths = [
-                x[1] for x in listgenomes
+                x[0] for x in listgenomes
             ]  # expect genome_1_genomename.fasta etc
-            genome_names = [x[0].split("REFERENCE_")[1] for x in listgenomes]
+            genome_names = [x[2] for x in listgenomes]
             jc = jbC(
                 outdir=args.outdir,
                 genomes=[
@@ -67,11 +68,13 @@
                         "path": x,
                         "meta": {
                             "name": genome_names[i],
+                            "dataset_dname": genome_names[i],
                         },
                     }
                     for i, x in enumerate(genome_paths)
                 ],
             )
+            sys.stdout.write('$$$ genome_paths:%s genome_names: %s' % (genome_paths,genome_names))
             jc.process_genomes()
             default_session_data = {
                 "visibility": {
@@ -81,42 +84,49 @@
                 "style": {},
                 "style_labels": {},
             }
-            defLoc = makeDefaultLocation(jc)
-            listtracks = [f for f in flistList if not f[0].startswith("REFERENCE_")]
+
+            listtracks = trackList
             # foo.paf must have a foo_paf.fasta or fasta.gz to match
-            tnames = [x[0] for x in listtracks]
-            texts = [x[2] for x in listtracks]
+            tnames = [x[2] for x in listtracks]
+            texts = [x[1] for x in listtracks]
             for i, track in enumerate(listtracks):
-                if track[2] == "paf":
-                    refname = track[0] + "_paf.fasta"
-                    refdat = [x[1] for x in listtracks if x[0] == refname]
+                tpath, trext, trackname = track[:3]
+                if trext == "paf":
+                    refname = trackname + "_paf.fasta"
+                    refdat = [x[2] for x in listtracks if x[2] == refname]
                     if not refdat:
                         jc.logging.warn(
                             "!! No reference file %s corresponding to paf file %s found. Not building - there must be a corresponding fasta for each paf"
-                            % (refname, tnames[i])
+                            % (refname, trackname)
                         )
                         sys.exit(3)
                     else:
                         track_conf = {
                             "conf": {
                                 "options": {
-                                    "paf": {"genome": refdat, "genome_label": track[0]}
+                                    "paf": {"genome": refdat, "genome_label": trackname}
                                 }
                             }
                         }
+                elif trext == 'bam':
+                     track_conf["conf"] = {"options": {"bam": {"bam_indices": {"bam_index": track[3]}}}}
+                elif trext == 'cram':
+                     track_conf["conf"] = {"options": {"cram": {"cram_indices": {"cram_index": track[3]}}}}
                 else:
                     track_conf = {}
-                track_conf["format"] = track[2]
-                track_conf["name"] = track[0]
-                track_conf["label"] = track[0]
-                track_conf["trackfiles"] = []
+                track_conf["format"] = trext
+                track_conf["name"] = trackname
+                track_conf["label"] = trackname
+                track_conf["trackfiles"] = [(tpath, trext, trackname,{}),]
+                track_conf["category"] = "Autogenerated"
                 keys = jc.process_annotations(track_conf)
 
                 if keys:
                     for key in keys:
-                        default_session_data["visibility"][
-                            track.attrib.get("visibility", "default_off")
-                        ].append(key)
+                        if trext in ["bigwig", "gff3", "gff", "vcf", "maf",]:
+                            default_session_data["visibility"]["default_on"].append(key)
+                        else:
+                            default_session_data["visibility"]["default_off"].append(key)
                         # if track_conf.get("style", None):
                         # default_session_data["style"][key] = track_conf[
                         # "style"
@@ -140,8 +150,10 @@
             else:
                 jc.config_json["tracks"] = jc.tracksToAdd
             jc.write_config()
-            defaultData = {"defaultLocation": defLoc, "session_name": sessName}
-            jc.add_default_session(defaultData)
+            defLoc = makeDefaultLocation()
+            default_session_data.update({"defaultLocation": defLoc, "session_name": sessName})
+            track_conf.update(default_session_data)
+            jc.add_default_session(default_session_data)
             # jc.text_index() not sure what broke here.
     else:
-        sys.stderr.write("!! empty collection supplied - nothing to process")
+        print("!! empty collection supplied - nothing to process")