comparison autogenJB2.py @ 0:53c2be00bb6f draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 0a86c88a95b0d1cc49d84544136de6556b95320f
author bgruening
date Wed, 05 Jun 2024 08:15:49 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:53c2be00bb6f
1 import argparse
2 import logging
3 import os
4 import sys
5
6 from jbrowse2 import JbrowseConnector as jbC
7
8
9 logging.basicConfig(level=logging.DEBUG)
10 log = logging.getLogger("jbrowse")
11
12 if __name__ == "__main__":
13 parser = argparse.ArgumentParser(description="", epilog="")
14 parser.add_argument("--sessName", help="Session name", default="AutoJBrowse")
15 parser.add_argument(
16 "--trackmeta",
17 help="Repeatable 'filename,filext,filepath,[bai/crai path for filesystem bam/cram]' for JBrowse2 tracks",
18 default=[],
19 action="append",
20 )
21 parser.add_argument(
22 "--referencemeta",
23 help="Repeatable 'filename, filext, filepath, ... ,' for JBrowse2 reference tracks - usually only one needed",
24 default=[],
25 action="append",
26 )
27 parser.add_argument(
28 "--pafmeta",
29 help="Repeatable. Each is a 'pafname, filext, filepath, ... ,' for a JBrowse2 paf track",
30 default=[],
31 action="append",
32 )
33 parser.add_argument(
34 "--pafreferencemeta",
35 help="Repeatable. Each is a 'pafname,refpath,refname' Every pafname must have one or more",
36 default=[],
37 action="append",
38 )
39 parser.add_argument(
40 "--jbrowse2path", help="Path to JBrowse2 directory in biocontainer or Conda"
41 )
42 parser.add_argument("--outdir", help="Output directory", required=True)
43 parser.add_argument("--version", "-V", action="version", version="%(prog)s 2.10.2")
44 args = parser.parse_args()
45 sessName = args.sessName
46 default_session_data = {}
47 # --trackmeta $jbrowseme[$key],$jbrowseme[$key].ext,'$key'
48 trackList = [x.strip().split(",") for x in args.trackmeta if x > ""]
49 refList = [x.strip().split(",") for x in args.referencemeta if x > ""]
50 if len(refList) > 0:
51 listgenomes = [f for f in refList if f[1] in ["fasta", "fasta.gz"]]
52 # assume no pafs here
53 if len(listgenomes) > 0:
54 genome_paths = [x[0] for x in listgenomes]
55 genome_names = [x[2] for x in listgenomes]
56 guseuri = []
57 for x in genome_paths:
58 if x.startswith("http://") or x.startswith("https://"):
59 guseuri.append("yes")
60 else:
61 guseuri.append("no")
62 jc = jbC(
63 outdir=args.outdir,
64 jbrowse2path=args.jbrowse2path,
65 )
66 genomes = (
67 [
68 {
69 "path": x,
70 "label": genome_names[i],
71 "useuri": guseuri[i],
72 "meta": {
73 "name": genome_names[i],
74 "dataset_dname": genome_names[i],
75 },
76 }
77 for i, x in enumerate(genome_paths)
78 ],
79 )
80 assref_name = jc.process_genomes(genomes[0])
81 if not default_session_data.get(assref_name, None):
82 default_session_data[assref_name] = {
83 "tracks": [],
84 "style": {},
85 "style_labels": {},
86 "visibility": {
87 "default_on": [],
88 "default_off": [],
89 },
90 }
91 listtracks = trackList
92 # foo.paf must have a foo_paf.fasta or fasta.gz to match
93 tnames = [x[2] for x in listtracks]
94 texts = [x[1] for x in listtracks]
95 for i, track in enumerate(listtracks):
96 track_conf = {
97 "trackfiles": [],
98 "category": "autogenerated",
99 "assemblyNames": assref_name,
100 }
101 tpath, trext, trackname = track[:3]
102 track_conf["dataset_id"] = trackname
103 useuri = "no"
104 if tpath.startswith("http://") or tpath.startswith("https://"):
105 useuri = "yes"
106 if trext == "paf":
107 refname = trackname + "_paf.fasta"
108 refdat = [x[2] for x in listtracks if x[2] == refname]
109 if not refdat:
110 jc.logging.warn(
111 "No reference file %s corresponding to paf file %s found. Not building - there must be a corresponding fasta for each paf"
112 % (refname, trackname)
113 )
114 sys.exit(3)
115 else:
116 track_conf.update(
117 {
118 "conf": {
119 "options": {
120 "paf": {
121 "genome": refdat,
122 "genome_label": trackname,
123 }
124 }
125 }
126 }
127 )
128 elif trext == "bam":
129 ipath = track[3]
130 if not os.path.exists(ipath):
131 ipath = os.path.realpath(
132 os.path.join(jc.outdir, trackname + ".bai")
133 )
134 cmd = [
135 "samtools",
136 "index",
137 "-b",
138 "-o",
139 ipath,
140 os.path.realpath(track[0]),
141 ]
142 sys.stdout.write("#### calling %s" % " ".join(cmd))
143 jc.subprocess_check_call(cmd)
144 track_conf.update(
145 {"conf": {"options": {"bam": {"bam_index": " %s ~ %s," % (tpath, ipath)}}}}
146 )
147 elif trext == "cram":
148 ipath = track[3]
149 if not os.path.exists(ipath):
150 ipath = os.path.realpath(
151 os.path.join("./", trackname + ".crai")
152 )
153 cmd = [
154 "samtools",
155 "index",
156 "-c",
157 "-o",
158 ipath,
159 os.path.realpath(track[0]),
160 ]
161 jc.subprocess_check_call(cmd)
162 track_conf.update(
163 {"conf": {"options": {"cram": {"cram_index": "%s ~ %s," % (tpath, ipath)}}}}
164 )
165 track_conf["path"] = tpath
166 track_conf["format"] = trext
167 track_conf["name"] = trackname
168 track_conf["label"] = trackname
169 track_conf["trackfiles"].append((tpath, trext, useuri, trackname, {}))
170 keys = jc.process_annotations(track_conf)
171
172 if keys:
173 for key in keys:
174 if trext in [
175 "bigwig",
176 "gff",
177 "gff3",
178 "vcf",
179 "maf",
180 "bed",
181 "hic"
182 ]:
183 default_session_data[assref_name]["visibility"]["default_on"].append(key)
184 else:
185 default_session_data[assref_name]["visibility"]["default_off"].append(
186 key
187 )
188 if trext in ["gff", "gff3", "bed", "vcf", "maf", "blastxml"]:
189 ttype = "LinearBasicDisplay"
190 if trext == "vcf":
191 ttype = "LinearVariantDisplay"
192 style_json = {
193 "type": ttype,
194 "trackShowLabels": False,
195 "trackShowDescriptions": False,
196 }
197 default_session_data[assref_name]["style"][key] = style_json
198 default_session_data[assref_name]["tracks"].append(key)
199 # general_data = {
200 # "analytics": root.find("metadata/general/analytics").text,
201 # "primary_color": root.find("metadata/general/primary_color").text,
202 # "secondary_color": root.find("metadata/general/secondary_color").text,
203 # "tertiary_color": root.find("metadata/general/tertiary_color").text,
204 # "quaternary_color": root.find("metadata/general/quaternary_color").text,
205 # "font_size": root.find("metadata/general/font_size").text,
206 # }
207 jc.add_general_configuration({})
208 trackconf = jc.config_json.get("tracks", [])
209 for gnome in jc.genome_names:
210 trackconf += jc.tracksToAdd[gnome]
211 logging.debug(
212 "++++ adding trackconf=%s for gnome %s" % (trackconf, gnome)
213 )
214 jc.config_json["tracks"] = trackconf
215 assconf = jc.config_json.get("assemblies", [])
216 assconf += jc.assemblies
217 jc.config_json["assemblies"] = assconf
218 logging.debug("+++assemblies=%s, gnames=%s" % (assconf, jc.genome_names))
219 jc.write_config()
220 default_session_data.update({"session_name": sessName})
221 track_conf.update(default_session_data)
222 jc.add_default_session(default_session_data)
223 # jc.add_defsess_to_index(default_session_data)
224 # jc.text_index() not sure what broke here.
225 else:
226 sys.stderr.write(
227 "Collection has no suitable trackfiles for autogenJB2 - nothing to process"
228 )