diff autogenJB2.py @ 46:4181e97c70a7 draft

planemo upload for repository https://github.com/usegalaxy-eu/temporary-tools/tree/master/jbrowse2 commit 3a43e9e0ffce0966101203102e769d1ced28618a
author fubar
date Mon, 04 Mar 2024 09:47:19 +0000
parents bc57164eb270
children 460d5b6c5d98
line wrap: on
line diff
--- a/autogenJB2.py	Fri Mar 01 05:15:41 2024 +0000
+++ b/autogenJB2.py	Mon Mar 04 09:47:19 2024 +0000
@@ -1,5 +1,6 @@
 import argparse
 import logging
+import os
 import sys
 
 from jbrowse2 import JbrowseConnector as jbC
@@ -11,8 +12,7 @@
 def makeDefaultLocation():
 
     refName = jc.genome_firstcontig
-    defloc  = "%s:100..10000" % refName
-    print ('defloc',defloc)
+    defloc = "%s:100..10000" % refName
     return defloc
 
 
@@ -21,61 +21,66 @@
     parser.add_argument("--sessName", help="Session name", default="AutoJBrowse")
     parser.add_argument(
         "--trackmeta",
-        help="Repeatable of 'filename, filext,filepath, ... ,' for JBrowse2 tracks",
+        help="Repeatable 'filename,filext,filepath,[bai/crai path for filesystem bam/cram]' for JBrowse2 tracks",
         default=[],
         action="append",
     )
     parser.add_argument(
         "--referencemeta",
-        help="Repeatable 'filename, filext, filepath, ... ,' for JBrowse2 reference tracks",
+        help="Repeatable 'filename, filext, filepath, ... ,' for JBrowse2 reference tracks - usually only one needed",
         default=[],
         action="append",
     )
     parser.add_argument(
         "--pafmeta",
-        help="Repeatable. Each is a 'filename, filext, filepath, ... ,' for a JBrowse2 paf track",
+        help="Repeatable. Each is a 'pafname, filext, filepath, ... ,' for a JBrowse2 paf track",
         default=[],
         action="append",
     )
     parser.add_argument(
         "--pafreferencemeta",
-        help="Repeatable. Each is a 'pafname,filepath,refname,filepath,refname....'. Every paf must have a corresponding one ",
+        help="Repeatable. Each is a 'pafname,refpath,refname' Every pafname must have one or more",
         default=[],
         action="append",
     )
-    parser.add_argument("--jbrowse2path", help="Path to JBrowse2 directory in biocontainer or Conda")
+    parser.add_argument(
+        "--jbrowse2path", help="Path to JBrowse2 directory in biocontainer or Conda"
+    )
     parser.add_argument("--outdir", help="Output directory", required=True)
     parser.add_argument("--version", "-V", action="version", version="%(prog)s 2.10.2")
     args = parser.parse_args()
     sessName = args.sessName
     # --trackmeta $jbrowseme[$key],$jbrowseme[$key].ext,'$key'
-    trackList = [x.strip().split(",") for x in args.trackmeta if x > '']
-    refList = [x.strip().split(",") for x in args.referencemeta if x > '']
-    print("tracklist = %s\nreflist = %s" % (trackList,refList))
+    trackList = [x.strip().split(",") for x in args.trackmeta if x > ""]
+    refList = [x.strip().split(",") for x in args.referencemeta if x > ""]
     if len(refList) > 0:
-        listgenomes = [f for f in refList if f[1] in ['fasta', 'fasta.gz']]
+        listgenomes = [f for f in refList if f[1] in ["fasta", "fasta.gz"]]
         # assume no pafs here
-        print('genomes=%s' % listgenomes)
         if len(listgenomes) > 0:
-            genome_paths = [
-                x[0] for x in listgenomes
-            ]  # expect genome_1_genomename.fasta etc
+            genome_paths = [x[0] for x in listgenomes]
             genome_names = [x[2] for x in listgenomes]
+            guseuri = []
+            for x in genome_paths:
+                if x.startswith('http://') or x.startswith('https://'):
+                    guseuri.append('yes')
+                else:
+                    guseuri.append('no')
             jc = jbC(
                 outdir=args.outdir,
                 jbrowse2path=args.jbrowse2path,
                 genomes=[
                     {
                         "path": x,
-                        "meta": {
-                            "name": genome_names[i],
-                            "dataset_dname": genome_names[i],
-                        },
+                        "label": genome_names[i],
+                        "useuri": guseuri[i],
+                        "meta":  {"name": genome_names[i],
+                                            "dataset_dname": genome_names[i]
+                                        }
                     }
                     for i, x in enumerate(genome_paths)
                 ],
             )
-            sys.stdout.write('$$$ genome_paths:%s genome_names: %s' % (genome_paths,genome_names))
+
             jc.process_genomes()
             default_session_data = {
                 "visibility": {
@@ -109,33 +114,52 @@
                                 }
                             }
                         }
-                elif trext == 'bam':
-                     track_conf = {"conf": {"options": {"bam": {"bam_index": track[3]}}}}
-                elif trext == 'cram':
-                     track_conf = {"conf": {"options": {"cram": {"cram_index": track[3]}}}}
+                elif trext == "bam":
+                    ipath  = track[3]
+                    if not os.path.exists(ipath):
+                        ipath = os.path.realpath(os.path.join(jc.outdir, trackname + '.bai'))
+                        cmd = ["samtools", "index", "-b", "-o", ipath, os.path.realpath(track[0])]
+                        sys.stdout.write('#### calling %s' % ' '.join(cmd))
+                        jc.subprocess_check_call(cmd)
+                    track_conf = {"conf": {"options": {"bam": {"bam_index": ipath}}}}
+                elif trext == "cram":
+                    ipath  = track[3]
+                    if not os.path.exists(ipath):
+                        jc.logging.info('calling %s' % ' '.join(cmd))
+                        ipath = os.path.realpath(os.path.join('./', trackname + '.crai'))
+                        cmd = ["samtools", "index", "-c", "-o", ipath, os.path.realpath(track[0])]
+                        sys.stdout.write('#### calling %s' % ' '.join(cmd))
+                        jc.subprocess_check_call(cmd)
+                    track_conf = {"conf": {"options": {"cram": {"cram_index": ipath}}}}
                 else:
                     track_conf = {}
                 track_conf["format"] = trext
                 track_conf["name"] = trackname
                 track_conf["label"] = trackname
-                track_conf["trackfiles"] = [(tpath, trext, trackname,{}),]
+                useu = tpath.startswith('http://') or tpath.startswith('https://')
+                useuri = 'no'
+                if useu:
+                    useuri = 'yes'
+                track_conf["trackfiles"] = [
+                    (tpath, trext, useuri, trackname, {}),
+                ]
                 track_conf["category"] = "Autogenerated"
                 keys = jc.process_annotations(track_conf)
 
                 if keys:
                     for key in keys:
-                        if trext in ["bigwig", "gff3", "gff", "vcf", "maf",]:
+                        if trext in [
+                            "bigwig",
+                            "gff3",
+                            "gff",
+                            "vcf",
+                            "maf",
+                        ]:
                             default_session_data["visibility"]["default_on"].append(key)
                         else:
-                            default_session_data["visibility"]["default_off"].append(key)
-                        # if track_conf.get("style", None):
-                        # default_session_data["style"][key] = track_conf[
-                        # "style"
-                        # ]  # TODO do we need this anymore?
-                        # if track_conf.get("style_lables", None):
-                        # default_session_data["style_labels"][key] = track_conf.get(
-                        # "style_labels", None
-                        # )
+                            default_session_data["visibility"]["default_off"].append(
+                                key
+                            )
             # general_data = {
             # "analytics": root.find("metadata/general/analytics").text,
             # "primary_color": root.find("metadata/general/primary_color").text,
@@ -152,9 +176,13 @@
                 jc.config_json["tracks"] = jc.tracksToAdd
             jc.write_config()
             defLoc = makeDefaultLocation()
-            default_session_data.update({"defaultLocation": defLoc, "session_name": sessName})
+            default_session_data.update(
+                {"defaultLocation": defLoc, "session_name": sessName}
+            )
             track_conf.update(default_session_data)
             jc.add_default_session(default_session_data)
             # jc.text_index() not sure what broke here.
     else:
-        print("!! empty collection supplied - nothing to process")
+        sys.stderr.write(
+            "!!!! Collection has no suitable trackfiles for autogenJB2 - nothing to process"
+        )