Mercurial > repos > bgruening > jbrowse2
comparison autogenJB2.py @ 0:53c2be00bb6f draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 0a86c88a95b0d1cc49d84544136de6556b95320f
| author | bgruening |
|---|---|
| date | Wed, 05 Jun 2024 08:15:49 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:53c2be00bb6f |
|---|---|
| 1 import argparse | |
| 2 import logging | |
| 3 import os | |
| 4 import sys | |
| 5 | |
| 6 from jbrowse2 import JbrowseConnector as jbC | |
| 7 | |
| 8 | |
| 9 logging.basicConfig(level=logging.DEBUG) | |
| 10 log = logging.getLogger("jbrowse") | |
| 11 | |
| 12 if __name__ == "__main__": | |
| 13 parser = argparse.ArgumentParser(description="", epilog="") | |
| 14 parser.add_argument("--sessName", help="Session name", default="AutoJBrowse") | |
| 15 parser.add_argument( | |
| 16 "--trackmeta", | |
| 17 help="Repeatable 'filename,filext,filepath,[bai/crai path for filesystem bam/cram]' for JBrowse2 tracks", | |
| 18 default=[], | |
| 19 action="append", | |
| 20 ) | |
| 21 parser.add_argument( | |
| 22 "--referencemeta", | |
| 23 help="Repeatable 'filename, filext, filepath, ... ,' for JBrowse2 reference tracks - usually only one needed", | |
| 24 default=[], | |
| 25 action="append", | |
| 26 ) | |
| 27 parser.add_argument( | |
| 28 "--pafmeta", | |
| 29 help="Repeatable. Each is a 'pafname, filext, filepath, ... ,' for a JBrowse2 paf track", | |
| 30 default=[], | |
| 31 action="append", | |
| 32 ) | |
| 33 parser.add_argument( | |
| 34 "--pafreferencemeta", | |
| 35 help="Repeatable. Each is a 'pafname,refpath,refname' Every pafname must have one or more", | |
| 36 default=[], | |
| 37 action="append", | |
| 38 ) | |
| 39 parser.add_argument( | |
| 40 "--jbrowse2path", help="Path to JBrowse2 directory in biocontainer or Conda" | |
| 41 ) | |
| 42 parser.add_argument("--outdir", help="Output directory", required=True) | |
| 43 parser.add_argument("--version", "-V", action="version", version="%(prog)s 2.10.2") | |
| 44 args = parser.parse_args() | |
| 45 sessName = args.sessName | |
| 46 default_session_data = {} | |
| 47 # --trackmeta $jbrowseme[$key],$jbrowseme[$key].ext,'$key' | |
| 48 trackList = [x.strip().split(",") for x in args.trackmeta if x > ""] | |
| 49 refList = [x.strip().split(",") for x in args.referencemeta if x > ""] | |
| 50 if len(refList) > 0: | |
| 51 listgenomes = [f for f in refList if f[1] in ["fasta", "fasta.gz"]] | |
| 52 # assume no pafs here | |
| 53 if len(listgenomes) > 0: | |
| 54 genome_paths = [x[0] for x in listgenomes] | |
| 55 genome_names = [x[2] for x in listgenomes] | |
| 56 guseuri = [] | |
| 57 for x in genome_paths: | |
| 58 if x.startswith("http://") or x.startswith("https://"): | |
| 59 guseuri.append("yes") | |
| 60 else: | |
| 61 guseuri.append("no") | |
| 62 jc = jbC( | |
| 63 outdir=args.outdir, | |
| 64 jbrowse2path=args.jbrowse2path, | |
| 65 ) | |
| 66 genomes = ( | |
| 67 [ | |
| 68 { | |
| 69 "path": x, | |
| 70 "label": genome_names[i], | |
| 71 "useuri": guseuri[i], | |
| 72 "meta": { | |
| 73 "name": genome_names[i], | |
| 74 "dataset_dname": genome_names[i], | |
| 75 }, | |
| 76 } | |
| 77 for i, x in enumerate(genome_paths) | |
| 78 ], | |
| 79 ) | |
| 80 assref_name = jc.process_genomes(genomes[0]) | |
| 81 if not default_session_data.get(assref_name, None): | |
| 82 default_session_data[assref_name] = { | |
| 83 "tracks": [], | |
| 84 "style": {}, | |
| 85 "style_labels": {}, | |
| 86 "visibility": { | |
| 87 "default_on": [], | |
| 88 "default_off": [], | |
| 89 }, | |
| 90 } | |
| 91 listtracks = trackList | |
| 92 # foo.paf must have a foo_paf.fasta or fasta.gz to match | |
| 93 tnames = [x[2] for x in listtracks] | |
| 94 texts = [x[1] for x in listtracks] | |
| 95 for i, track in enumerate(listtracks): | |
| 96 track_conf = { | |
| 97 "trackfiles": [], | |
| 98 "category": "autogenerated", | |
| 99 "assemblyNames": assref_name, | |
| 100 } | |
| 101 tpath, trext, trackname = track[:3] | |
| 102 track_conf["dataset_id"] = trackname | |
| 103 useuri = "no" | |
| 104 if tpath.startswith("http://") or tpath.startswith("https://"): | |
| 105 useuri = "yes" | |
| 106 if trext == "paf": | |
| 107 refname = trackname + "_paf.fasta" | |
| 108 refdat = [x[2] for x in listtracks if x[2] == refname] | |
| 109 if not refdat: | |
| 110 jc.logging.warn( | |
| 111 "No reference file %s corresponding to paf file %s found. Not building - there must be a corresponding fasta for each paf" | |
| 112 % (refname, trackname) | |
| 113 ) | |
| 114 sys.exit(3) | |
| 115 else: | |
| 116 track_conf.update( | |
| 117 { | |
| 118 "conf": { | |
| 119 "options": { | |
| 120 "paf": { | |
| 121 "genome": refdat, | |
| 122 "genome_label": trackname, | |
| 123 } | |
| 124 } | |
| 125 } | |
| 126 } | |
| 127 ) | |
| 128 elif trext == "bam": | |
| 129 ipath = track[3] | |
| 130 if not os.path.exists(ipath): | |
| 131 ipath = os.path.realpath( | |
| 132 os.path.join(jc.outdir, trackname + ".bai") | |
| 133 ) | |
| 134 cmd = [ | |
| 135 "samtools", | |
| 136 "index", | |
| 137 "-b", | |
| 138 "-o", | |
| 139 ipath, | |
| 140 os.path.realpath(track[0]), | |
| 141 ] | |
| 142 sys.stdout.write("#### calling %s" % " ".join(cmd)) | |
| 143 jc.subprocess_check_call(cmd) | |
| 144 track_conf.update( | |
| 145 {"conf": {"options": {"bam": {"bam_index": " %s ~ %s," % (tpath, ipath)}}}} | |
| 146 ) | |
| 147 elif trext == "cram": | |
| 148 ipath = track[3] | |
| 149 if not os.path.exists(ipath): | |
| 150 ipath = os.path.realpath( | |
| 151 os.path.join("./", trackname + ".crai") | |
| 152 ) | |
| 153 cmd = [ | |
| 154 "samtools", | |
| 155 "index", | |
| 156 "-c", | |
| 157 "-o", | |
| 158 ipath, | |
| 159 os.path.realpath(track[0]), | |
| 160 ] | |
| 161 jc.subprocess_check_call(cmd) | |
| 162 track_conf.update( | |
| 163 {"conf": {"options": {"cram": {"cram_index": "%s ~ %s," % (tpath, ipath)}}}} | |
| 164 ) | |
| 165 track_conf["path"] = tpath | |
| 166 track_conf["format"] = trext | |
| 167 track_conf["name"] = trackname | |
| 168 track_conf["label"] = trackname | |
| 169 track_conf["trackfiles"].append((tpath, trext, useuri, trackname, {})) | |
| 170 keys = jc.process_annotations(track_conf) | |
| 171 | |
| 172 if keys: | |
| 173 for key in keys: | |
| 174 if trext in [ | |
| 175 "bigwig", | |
| 176 "gff", | |
| 177 "gff3", | |
| 178 "vcf", | |
| 179 "maf", | |
| 180 "bed", | |
| 181 "hic" | |
| 182 ]: | |
| 183 default_session_data[assref_name]["visibility"]["default_on"].append(key) | |
| 184 else: | |
| 185 default_session_data[assref_name]["visibility"]["default_off"].append( | |
| 186 key | |
| 187 ) | |
| 188 if trext in ["gff", "gff3", "bed", "vcf", "maf", "blastxml"]: | |
| 189 ttype = "LinearBasicDisplay" | |
| 190 if trext == "vcf": | |
| 191 ttype = "LinearVariantDisplay" | |
| 192 style_json = { | |
| 193 "type": ttype, | |
| 194 "trackShowLabels": False, | |
| 195 "trackShowDescriptions": False, | |
| 196 } | |
| 197 default_session_data[assref_name]["style"][key] = style_json | |
| 198 default_session_data[assref_name]["tracks"].append(key) | |
| 199 # general_data = { | |
| 200 # "analytics": root.find("metadata/general/analytics").text, | |
| 201 # "primary_color": root.find("metadata/general/primary_color").text, | |
| 202 # "secondary_color": root.find("metadata/general/secondary_color").text, | |
| 203 # "tertiary_color": root.find("metadata/general/tertiary_color").text, | |
| 204 # "quaternary_color": root.find("metadata/general/quaternary_color").text, | |
| 205 # "font_size": root.find("metadata/general/font_size").text, | |
| 206 # } | |
| 207 jc.add_general_configuration({}) | |
| 208 trackconf = jc.config_json.get("tracks", []) | |
| 209 for gnome in jc.genome_names: | |
| 210 trackconf += jc.tracksToAdd[gnome] | |
| 211 logging.debug( | |
| 212 "++++ adding trackconf=%s for gnome %s" % (trackconf, gnome) | |
| 213 ) | |
| 214 jc.config_json["tracks"] = trackconf | |
| 215 assconf = jc.config_json.get("assemblies", []) | |
| 216 assconf += jc.assemblies | |
| 217 jc.config_json["assemblies"] = assconf | |
| 218 logging.debug("+++assemblies=%s, gnames=%s" % (assconf, jc.genome_names)) | |
| 219 jc.write_config() | |
| 220 default_session_data.update({"session_name": sessName}) | |
| 221 track_conf.update(default_session_data) | |
| 222 jc.add_default_session(default_session_data) | |
| 223 # jc.add_defsess_to_index(default_session_data) | |
| 224 # jc.text_index() not sure what broke here. | |
| 225 else: | |
| 226 sys.stderr.write( | |
| 227 "Collection has no suitable trackfiles for autogenJB2 - nothing to process" | |
| 228 ) |
