Mercurial > repos > fubar > jbrowse2
view autogenJB2.py @ 121:478ee3e780de draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit a6f013087aa66546a843298e2584604668f9e140
author | fubar |
---|---|
date | Thu, 03 Oct 2024 06:07:18 +0000 |
parents | 878c27dfea9d |
children |
line wrap: on
line source
import argparse import logging import os import sys from jbrowse2 import JbrowseConnector as jbC logging.basicConfig(level=logging.DEBUG) log = logging.getLogger("jbrowse") if __name__ == "__main__": parser = argparse.ArgumentParser(description="", epilog="") parser.add_argument("--sessName", help="Session name", default="AutoJBrowse") parser.add_argument( "--trackmeta", help="Repeatable 'filename,filext,filepath,[bai/crai path for filesystem bam/cram]' for JBrowse2 tracks", default=[], action="append", ) parser.add_argument( "--referencemeta", help="Repeatable 'filename, filext, filepath, ... ,' for JBrowse2 reference tracks - usually only one needed", default=[], action="append", ) parser.add_argument( "--pafmeta", help="Repeatable. Each is a 'pafname, filext, filepath, ... ,' for a JBrowse2 paf track", default=[], action="append", ) parser.add_argument( "--pafreferencemeta", help="Repeatable. Each is a 'pafname,refpath,refname' Every pafname must have one or more", default=[], action="append", ) parser.add_argument( "--jbrowse2path", help="Path to JBrowse2 directory in biocontainer or Conda" ) parser.add_argument("--outdir", help="Output directory", required=True) parser.add_argument("--version", "-V", action="version", version="%(prog)s 2.10.2") args = parser.parse_args() sessName = args.sessName default_session_data = {} # --trackmeta $jbrowseme[$key],$jbrowseme[$key].ext,'$key' trackList = [x.strip().split(",") for x in args.trackmeta if x > ""] refList = [x.strip().split(",") for x in args.referencemeta if x > ""] if len(refList) > 0: listgenomes = [f for f in refList if f[1] in ["fasta", "fasta.gz"]] # assume no pafs here if len(listgenomes) > 0: genome_paths = [x[0] for x in listgenomes] genome_names = [x[2] for x in listgenomes] guseuri = [] for x in genome_paths: if x.startswith("http://") or x.startswith("https://"): guseuri.append("yes") else: guseuri.append("no") jc = jbC( outdir=args.outdir, jbrowse2path=args.jbrowse2path, ) genomes = ( [ { "path": x, "label": genome_names[i], "useuri": guseuri[i], "meta": { "name": genome_names[i], "dataset_dname": genome_names[i], }, } for i, x in enumerate(genome_paths) ], ) assref_name = jc.process_genomes(genomes[0]) if not default_session_data.get(assref_name, None): default_session_data[assref_name] = { "tracks": [], "style": {}, "style_labels": {}, "visibility": { "default_on": [], "default_off": [], }, } listtracks = trackList # foo.paf must have a foo_paf.fasta or fasta.gz to match tnames = [x[2] for x in listtracks] texts = [x[1] for x in listtracks] if len(listtracks) == 0: sys.stderr.write( "Please add at least one track (bam,bed,bigwig,blastxml,cram,gff,hic,maf,paf or vcf) to the collection. No suitable track files for autogenJB2 - nothing to process" ) sys.exit(5) for i, track in enumerate(listtracks): track_conf = { "trackfiles": [], "category": "autogenerated", "assemblyNames": assref_name, } tpath, trext, trackname = track[:3] track_conf["dataset_id"] = trackname useuri = "no" if tpath.startswith("http://") or tpath.startswith("https://"): useuri = "yes" if trext == "paf": refname = trackname + "_paf.fasta" refdat = [x[2] for x in listtracks if x[2] == refname] if not refdat: jc.logging.warn( "No reference file %s corresponding to paf file %s found. Not building - there must be a corresponding fasta for each paf" % (refname, trackname) ) sys.exit(3) else: track_conf.update( { "conf": { "options": { "paf": { "genome": refdat, "genome_label": trackname, } } } } ) elif trext == "bam": ipath = track[3] if not os.path.exists(ipath): ipath = os.path.realpath( os.path.join(jc.outdir, trackname + ".bai") ) cmd = [ "samtools", "index", "-b", "-o", ipath, os.path.realpath(track[0]), ] sys.stdout.write("#### calling %s" % " ".join(cmd)) jc.subprocess_check_call(cmd) track_conf.update( {"conf": {"options": {"bam": {"bam_index": " %s ~ %s," % (tpath, ipath)}}}} ) elif trext == "cram": ipath = track[3] if not os.path.exists(ipath): ipath = os.path.realpath( os.path.join("./", trackname + ".crai") ) cmd = [ "samtools", "index", "-c", "-o", ipath, os.path.realpath(track[0]), ] jc.subprocess_check_call(cmd) track_conf.update( {"conf": {"options": {"cram": {"cram_index": "%s ~ %s," % (tpath, ipath)}}}} ) track_conf["path"] = tpath track_conf["format"] = trext track_conf["name"] = trackname track_conf["label"] = trackname track_conf["trackfiles"].append((tpath, trext, useuri, trackname, {})) keys = jc.process_annotations(track_conf) if keys: for key in keys: if trext in [ "bigwig", "gff", "gff3", "vcf", "maf", "bed", "hic" ]: default_session_data[assref_name]["visibility"]["default_on"].append(key) else: default_session_data[assref_name]["visibility"]["default_off"].append( key ) if trext in ["gff", "gff3", "bed", "vcf", "maf", "blastxml"]: ttype = "LinearBasicDisplay" if trext == "vcf": ttype = "LinearVariantDisplay" style_json = { "type": ttype, "trackShowLabels": False, "trackShowDescriptions": False, } default_session_data[assref_name]["style"][key] = style_json default_session_data[assref_name]["tracks"].append(key) # general_data = { # "analytics": root.find("metadata/general/analytics").text, # "primary_color": root.find("metadata/general/primary_color").text, # "secondary_color": root.find("metadata/general/secondary_color").text, # "tertiary_color": root.find("metadata/general/tertiary_color").text, # "quaternary_color": root.find("metadata/general/quaternary_color").text, # "font_size": root.find("metadata/general/font_size").text, # } jc.add_general_configuration({}) trackconf = jc.config_json.get("tracks", []) for gnome in jc.genome_names: trackconf += jc.tracksToAdd[gnome] logging.debug( "++++ adding trackconf=%s for gnome %s" % (trackconf, gnome) ) jc.config_json["tracks"] = trackconf assconf = jc.config_json.get("assemblies", []) assconf += jc.assemblies jc.config_json["assemblies"] = assconf logging.debug("+++assemblies=%s, gnames=%s" % (assconf, jc.genome_names)) jc.write_config() default_session_data.update({"session_name": sessName}) track_conf.update(default_session_data) jc.add_default_session(default_session_data) # jc.add_defsess_to_index(default_session_data) # jc.text_index() not sure what broke here. else: sys.stderr.write( "Please add a fasta genome reference to the collection. No suitable reference fasta for autogenJB2 - nothing to process" )