comparison jbrowse2/jbrowse2.py @ 7:234cf4490901 draft

Uploaded
author fubar
date Fri, 05 Jan 2024 04:31:35 +0000
parents 88b9b105c09b
children 1e6128ccc82b
comparison
equal deleted inserted replaced
6:88b9b105c09b 7:234cf4490901
106 ) 106 )
107 return metadata 107 return metadata
108 108
109 109
110 class JbrowseConnector(object): 110 class JbrowseConnector(object):
111 def __init__(self, jbrowse, outdir, genomes, standalone=None): 111 def __init__(self, jbrowse, outdir, genomes):
112 self.debug = False 112 self.debug = False
113 self.usejson = True 113 self.usejson = True
114 self.giURL = GALAXY_INFRASTRUCTURE_URL 114 self.giURL = GALAXY_INFRASTRUCTURE_URL
115 self.jbrowse = jbrowse 115 self.jbrowse = jbrowse
116 self.outdir = outdir 116 self.outdir = outdir
117 os.makedirs(self.outdir, exist_ok=True) 117 os.makedirs(self.outdir, exist_ok=True)
118 self.genome_paths = genomes 118 self.genome_paths = genomes
119 self.standalone = standalone
120 self.trackIdlist = [] 119 self.trackIdlist = []
121 self.tracksToAdd = [] 120 self.tracksToAdd = []
122 self.config_json = {} 121 self.config_json = {}
123 self.config_json_file = os.path.realpath(os.path.join(outdir, "config.json")) 122 self.config_json_file = os.path.realpath(os.path.join(outdir, "config.json"))
124 self.clone_jbrowse(self.jbrowse, self.outdir) 123 self.clone_jbrowse(self.jbrowse, self.outdir)
172 assemblies = [] 171 assemblies = []
173 for i, genome_node in enumerate(self.genome_paths): 172 for i, genome_node in enumerate(self.genome_paths):
174 if self.debug: 173 if self.debug:
175 log.info("genome_node=%s" % str(genome_node)) 174 log.info("genome_node=%s" % str(genome_node))
176 genome_name = genome_node["meta"]["dataset_dname"] 175 genome_name = genome_node["meta"]["dataset_dname"]
177 dsId = genome_node["meta"]["dataset_id"]
178 fapath = genome_node["path"] 176 fapath = genome_node["path"]
179 if self.standalone == "complete": 177 faname = genome_name + ".fa.gz"
180 faname = genome_name + ".fa.gz" 178 fadest = os.path.realpath(os.path.join(self.outdir, faname))
181 fadest = os.path.realpath(os.path.join(self.outdir, faname)) 179 cmd = "bgzip -i -c %s > %s && samtools faidx %s" % (
182 cmd = "bgzip -i -c %s > %s && samtools faidx %s" % ( 180 fapath,
183 fapath, 181 fadest,
184 fadest, 182 fadest,
185 fadest, 183 )
186 ) 184 self.subprocess_popen(cmd)
187 self.subprocess_popen(cmd) 185 adapter = {
188 adapter = { 186 "type": "BgzipFastaAdapter",
189 "type": "BgzipFastaAdapter", 187 "fastaLocation": {
190 "fastaLocation": {
191 "uri": faname,
192 },
193 "faiLocation": {
194 "uri": faname + ".fai",
195 },
196 "gziLocation": {
197 "uri": faname + ".gzi",
198 },
199 }
200 else:
201 faurl = "%s/api/datasets/%s/display" % (self.giURL, dsId)
202 faname = genome_name + ".fa.fai"
203 fastalocation = {
204 "uri": faurl,
205 }
206 failocation = {
207 "uri": faname, 188 "uri": faname,
208 } 189 },
209 adapter = { 190 "faiLocation": {
210 "type": "IndexedFastaAdapter", 191 "uri": faname + ".fai",
211 "fastaLocation": fastalocation, 192 },
212 "faiLocation": failocation, 193 "gziLocation": {
213 } 194 "uri": faname + ".gzi",
214 195 },
215 cmd = ["samtools", "faidx", fapath, "--fai-idx", faname] 196 }
216 self.subprocess_check_call(cmd)
217 trackDict = { 197 trackDict = {
218 "name": genome_name, 198 "name": genome_name,
219 "sequence": { 199 "sequence": {
220 "type": "ReferenceSequenceTrack", 200 "type": "ReferenceSequenceTrack",
221 "trackId": genome_name, 201 "trackId": genome_name,
226 assemblies.append(trackDict) 206 assemblies.append(trackDict)
227 self.genome_name = genome_name 207 self.genome_name = genome_name
228 if self.usejson: 208 if self.usejson:
229 self.config_json["assemblies"] = assemblies 209 self.config_json["assemblies"] = assemblies
230 else: 210 else:
231 if self.standalone == "complete": 211 cmd = [
232 cmd = [ 212 "jbrowse",
233 "jbrowse", 213 "add-assembly",
234 "add-assembly", 214 faname,
235 faname, 215 "-t",
236 "-t", 216 "bgzipFasta",
237 "bgzipFasta", 217 "-n",
238 "-n", 218 genome_name,
239 genome_name, 219 "--load",
240 "--load", 220 "inPlace",
241 "inPlace", 221 "--faiLocation",
242 "--faiLocation", 222 faname + ".fai",
243 faname + ".fai", 223 "--gziLocation",
244 "--gziLocation", 224 faname + ".gzi",
245 faname + ".gzi", 225 "--target",
246 "--target", 226 self.outdir,
247 self.outdir, 227 ]
248 ]
249 else:
250 cmd = [
251 "jbrowse",
252 "add-assembly",
253 faname,
254 "-t",
255 "indexedFasta",
256 "-n",
257 genome_name,
258 "--load",
259 "inPlace",
260 "--faiLocation",
261 faname + ".fai",
262 "--target",
263 self.outdir,
264 ]
265 self.subprocess_check_call(cmd) 228 self.subprocess_check_call(cmd)
266 229
267 def add_default_view(self): 230 def add_default_view(self):
268 cmd = [ 231 cmd = [
269 "jbrowse", 232 "jbrowse",
277 "--target", 240 "--target",
278 self.config_json_file, 241 self.config_json_file,
279 "-v", 242 "-v",
280 " LinearGenomeView", 243 " LinearGenomeView",
281 ] 244 ]
282 if True or self.debug: 245 if self.debug:
283 log.info("### calling set-default-session with cmd=%s" % " ".join(cmd)) 246 log.info("### calling set-default-session with cmd=%s" % " ".join(cmd))
284 self.subprocess_check_call(cmd) 247 self.subprocess_check_call(cmd)
285 248
286 def write_config(self): 249 def write_config(self):
287 with open(self.config_json_file, "w") as fp: 250 with open(self.config_json_file, "w") as fp:
309 url = "%s/api/datasets/%s/display?to_ext=hic " % ( 272 url = "%s/api/datasets/%s/display?to_ext=hic " % (
310 self.giURL, 273 self.giURL,
311 dsId, 274 dsId,
312 ) 275 )
313 hname = trackData["name"] 276 hname = trackData["name"]
314 if self.standalone == "complete": 277 dest = os.path.realpath(os.path.join(self.outdir, hname))
315 dest = os.path.realpath(os.path.join(self.outdir, hname)) 278 url = hname
316 url = hname 279 cmd = ["cp", data, dest]
317 cmd = ["cp", data, dest] 280 self.subprocess_check_call(cmd)
318 self.subprocess_check_call(cmd) 281 floc = {
319 floc = { 282 "uri": hname,
320 "uri": hname, 283 }
321 }
322 else:
323 url = "%s/api/datasets/%s/display?to_ext=hic" % (self.giURL, dsId)
324 floc = {
325 "uri": url,
326 }
327 trackDict = { 284 trackDict = {
328 "type": "HicTrack", 285 "type": "HicTrack",
329 "trackId": tId, 286 "trackId": tId,
330 "name": hname, 287 "name": hname,
331 "assemblyNames": [self.genome_name], 288 "assemblyNames": [self.genome_name],
507 self.subprocess_check_call(cmd) 464 self.subprocess_check_call(cmd)
508 os.unlink(gff3) 465 os.unlink(gff3)
509 466
510 def add_bigwig(self, data, trackData): 467 def add_bigwig(self, data, trackData):
511 url = "%s.bw" % trackData["name"] 468 url = "%s.bw" % trackData["name"]
512 if self.standalone == "complete": 469 dest = os.path.realpath(os.path.join(self.outdir, url))
513 dest = os.path.realpath(os.path.join(self.outdir, url)) 470 cmd = ["cp", data, dest]
514 cmd = ["cp", data, dest] 471 self.subprocess_check_call(cmd)
515 self.subprocess_check_call(cmd) 472 bwloc = {"uri": url}
516 bwloc = {"uri": url}
517 else:
518 dsId = trackData["metadata"]["dataset_id"]
519 url = "%s/api/datasets/%s/display?to_ext=fasta" % (self.giURL, dsId)
520 bwloc = {"uri": url}
521 tId = trackData["label"] 473 tId = trackData["label"]
522 trackDict = { 474 trackDict = {
523 "type": "QuantitativeTrack", 475 "type": "QuantitativeTrack",
524 "trackId": tId, 476 "trackId": tId,
525 "name": url, 477 "name": url,
560 512
561 def add_bam(self, data, trackData, bamOpts, bam_index=None, **kwargs): 513 def add_bam(self, data, trackData, bamOpts, bam_index=None, **kwargs):
562 tId = trackData["label"] 514 tId = trackData["label"]
563 fname = "%s.bam" % trackData["label"] 515 fname = "%s.bam" % trackData["label"]
564 dest = os.path.realpath("%s/%s" % (self.outdir, fname)) 516 dest = os.path.realpath("%s/%s" % (self.outdir, fname))
565 if self.standalone == "complete": 517 url = fname
566 url = fname 518 self.subprocess_check_call(["cp", data, dest])
567 self.subprocess_check_call(["cp", data, dest]) 519 log.info("### copied %s to %s" % (data, dest))
568 log.info("### copied %s to %s" % (data, dest)) 520 bloc = {"uri": url}
569 bloc = {"uri": url}
570 else:
571 dsId = trackData["metadata"]["dataset_id"]
572 url = "%s/api/datasets/%s/display?to_ext=bam" % (self.giURL, dsId)
573 bloc = {"uri": url}
574 if bam_index is not None and os.path.exists(os.path.realpath(bam_index)): 521 if bam_index is not None and os.path.exists(os.path.realpath(bam_index)):
575 # bai most probably made by galaxy and stored in galaxy dirs, need to copy it to dest 522 # bai most probably made by galaxy and stored in galaxy dirs, need to copy it to dest
576 self.subprocess_check_call( 523 self.subprocess_check_call(
577 ["cp", os.path.realpath(bam_index), dest + ".bai"] 524 ["cp", os.path.realpath(bam_index), dest + ".bai"]
578 ) 525 )
924 "version.txt", 871 "version.txt",
925 "test_data", 872 "test_data",
926 ]: 873 ]:
927 cmd = ["rm", "-rf", os.path.join(self.outdir, fn)] 874 cmd = ["rm", "-rf", os.path.join(self.outdir, fn)]
928 self.subprocess_check_call(cmd) 875 self.subprocess_check_call(cmd)
876 cmd = ['cp', os.path.join(INSTALLED_TO, "servejb2.py"), self.outdir]
877 self.subprocess_check_call(cmd)
929 878
930 879
931 if __name__ == "__main__": 880 if __name__ == "__main__":
932 parser = argparse.ArgumentParser(description="", epilog="") 881 parser = argparse.ArgumentParser(description="", epilog="")
933 parser.add_argument("xml", type=argparse.FileType("r"), help="Track Configuration") 882 parser.add_argument("xml", type=argparse.FileType("r"), help="Track Configuration")
934 883
935 parser.add_argument("--jbrowse", help="Folder containing a jbrowse release") 884 parser.add_argument("--jbrowse", help="Folder containing a jbrowse release")
936 parser.add_argument("--outdir", help="Output directory", default="out") 885 parser.add_argument("--outdir", help="Output directory", default="out")
937 parser.add_argument(
938 "--standalone",
939 choices=["complete", "minimal", "data"],
940 help="Standalone mode includes a copy of JBrowse",
941 )
942 parser.add_argument("--version", "-V", action="version", version="%(prog)s 0.8.0") 886 parser.add_argument("--version", "-V", action="version", version="%(prog)s 0.8.0")
943 args = parser.parse_args() 887 args = parser.parse_args()
944 888
945 tree = ET.parse(args.xml.name) 889 tree = ET.parse(args.xml.name)
946 root = tree.getroot() 890 root = tree.getroot()
961 "path": os.path.realpath(x.attrib["path"]), 905 "path": os.path.realpath(x.attrib["path"]),
962 "meta": metadata_from_node(x.find("metadata")), 906 "meta": metadata_from_node(x.find("metadata")),
963 } 907 }
964 for x in root.findall("metadata/genomes/genome") 908 for x in root.findall("metadata/genomes/genome")
965 ], 909 ],
966 standalone=args.standalone,
967 ) 910 )
968 jc.process_genomes() 911 jc.process_genomes()
969 912
970 for track in root.findall("tracks/track"): 913 for track in root.findall("tracks/track"):
971 track_conf = {} 914 track_conf = {}