Mercurial > repos > bgruening > jbrowse2
diff autogenJB2.py @ 0:53c2be00bb6f draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 0a86c88a95b0d1cc49d84544136de6556b95320f
author | bgruening |
---|---|
date | Wed, 05 Jun 2024 08:15:49 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/autogenJB2.py Wed Jun 05 08:15:49 2024 +0000 @@ -0,0 +1,228 @@ +import argparse +import logging +import os +import sys + +from jbrowse2 import JbrowseConnector as jbC + + +logging.basicConfig(level=logging.DEBUG) +log = logging.getLogger("jbrowse") + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="", epilog="") + parser.add_argument("--sessName", help="Session name", default="AutoJBrowse") + parser.add_argument( + "--trackmeta", + help="Repeatable 'filename,filext,filepath,[bai/crai path for filesystem bam/cram]' for JBrowse2 tracks", + default=[], + action="append", + ) + parser.add_argument( + "--referencemeta", + help="Repeatable 'filename, filext, filepath, ... ,' for JBrowse2 reference tracks - usually only one needed", + default=[], + action="append", + ) + parser.add_argument( + "--pafmeta", + help="Repeatable. Each is a 'pafname, filext, filepath, ... ,' for a JBrowse2 paf track", + default=[], + action="append", + ) + parser.add_argument( + "--pafreferencemeta", + help="Repeatable. Each is a 'pafname,refpath,refname' Every pafname must have one or more", + default=[], + action="append", + ) + parser.add_argument( + "--jbrowse2path", help="Path to JBrowse2 directory in biocontainer or Conda" + ) + parser.add_argument("--outdir", help="Output directory", required=True) + parser.add_argument("--version", "-V", action="version", version="%(prog)s 2.10.2") + args = parser.parse_args() + sessName = args.sessName + default_session_data = {} + # --trackmeta $jbrowseme[$key],$jbrowseme[$key].ext,'$key' + trackList = [x.strip().split(",") for x in args.trackmeta if x > ""] + refList = [x.strip().split(",") for x in args.referencemeta if x > ""] + if len(refList) > 0: + listgenomes = [f for f in refList if f[1] in ["fasta", "fasta.gz"]] + # assume no pafs here + if len(listgenomes) > 0: + genome_paths = [x[0] for x in listgenomes] + genome_names = [x[2] for x in listgenomes] + guseuri = [] + for x in genome_paths: + if x.startswith("http://") or x.startswith("https://"): + guseuri.append("yes") + else: + guseuri.append("no") + jc = jbC( + outdir=args.outdir, + jbrowse2path=args.jbrowse2path, + ) + genomes = ( + [ + { + "path": x, + "label": genome_names[i], + "useuri": guseuri[i], + "meta": { + "name": genome_names[i], + "dataset_dname": genome_names[i], + }, + } + for i, x in enumerate(genome_paths) + ], + ) + assref_name = jc.process_genomes(genomes[0]) + if not default_session_data.get(assref_name, None): + default_session_data[assref_name] = { + "tracks": [], + "style": {}, + "style_labels": {}, + "visibility": { + "default_on": [], + "default_off": [], + }, + } + listtracks = trackList + # foo.paf must have a foo_paf.fasta or fasta.gz to match + tnames = [x[2] for x in listtracks] + texts = [x[1] for x in listtracks] + for i, track in enumerate(listtracks): + track_conf = { + "trackfiles": [], + "category": "autogenerated", + "assemblyNames": assref_name, + } + tpath, trext, trackname = track[:3] + track_conf["dataset_id"] = trackname + useuri = "no" + if tpath.startswith("http://") or tpath.startswith("https://"): + useuri = "yes" + if trext == "paf": + refname = trackname + "_paf.fasta" + refdat = [x[2] for x in listtracks if x[2] == refname] + if not refdat: + jc.logging.warn( + "No reference file %s corresponding to paf file %s found. Not building - there must be a corresponding fasta for each paf" + % (refname, trackname) + ) + sys.exit(3) + else: + track_conf.update( + { + "conf": { + "options": { + "paf": { + "genome": refdat, + "genome_label": trackname, + } + } + } + } + ) + elif trext == "bam": + ipath = track[3] + if not os.path.exists(ipath): + ipath = os.path.realpath( + os.path.join(jc.outdir, trackname + ".bai") + ) + cmd = [ + "samtools", + "index", + "-b", + "-o", + ipath, + os.path.realpath(track[0]), + ] + sys.stdout.write("#### calling %s" % " ".join(cmd)) + jc.subprocess_check_call(cmd) + track_conf.update( + {"conf": {"options": {"bam": {"bam_index": " %s ~ %s," % (tpath, ipath)}}}} + ) + elif trext == "cram": + ipath = track[3] + if not os.path.exists(ipath): + ipath = os.path.realpath( + os.path.join("./", trackname + ".crai") + ) + cmd = [ + "samtools", + "index", + "-c", + "-o", + ipath, + os.path.realpath(track[0]), + ] + jc.subprocess_check_call(cmd) + track_conf.update( + {"conf": {"options": {"cram": {"cram_index": "%s ~ %s," % (tpath, ipath)}}}} + ) + track_conf["path"] = tpath + track_conf["format"] = trext + track_conf["name"] = trackname + track_conf["label"] = trackname + track_conf["trackfiles"].append((tpath, trext, useuri, trackname, {})) + keys = jc.process_annotations(track_conf) + + if keys: + for key in keys: + if trext in [ + "bigwig", + "gff", + "gff3", + "vcf", + "maf", + "bed", + "hic" + ]: + default_session_data[assref_name]["visibility"]["default_on"].append(key) + else: + default_session_data[assref_name]["visibility"]["default_off"].append( + key + ) + if trext in ["gff", "gff3", "bed", "vcf", "maf", "blastxml"]: + ttype = "LinearBasicDisplay" + if trext == "vcf": + ttype = "LinearVariantDisplay" + style_json = { + "type": ttype, + "trackShowLabels": False, + "trackShowDescriptions": False, + } + default_session_data[assref_name]["style"][key] = style_json + default_session_data[assref_name]["tracks"].append(key) + # general_data = { + # "analytics": root.find("metadata/general/analytics").text, + # "primary_color": root.find("metadata/general/primary_color").text, + # "secondary_color": root.find("metadata/general/secondary_color").text, + # "tertiary_color": root.find("metadata/general/tertiary_color").text, + # "quaternary_color": root.find("metadata/general/quaternary_color").text, + # "font_size": root.find("metadata/general/font_size").text, + # } + jc.add_general_configuration({}) + trackconf = jc.config_json.get("tracks", []) + for gnome in jc.genome_names: + trackconf += jc.tracksToAdd[gnome] + logging.debug( + "++++ adding trackconf=%s for gnome %s" % (trackconf, gnome) + ) + jc.config_json["tracks"] = trackconf + assconf = jc.config_json.get("assemblies", []) + assconf += jc.assemblies + jc.config_json["assemblies"] = assconf + logging.debug("+++assemblies=%s, gnames=%s" % (assconf, jc.genome_names)) + jc.write_config() + default_session_data.update({"session_name": sessName}) + track_conf.update(default_session_data) + jc.add_default_session(default_session_data) + # jc.add_defsess_to_index(default_session_data) + # jc.text_index() not sure what broke here. + else: + sys.stderr.write( + "Collection has no suitable trackfiles for autogenJB2 - nothing to process" + )