Mercurial > repos > bgruening > jbrowse2
comparison autogenJB2.py @ 0:53c2be00bb6f draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 0a86c88a95b0d1cc49d84544136de6556b95320f
author | bgruening |
---|---|
date | Wed, 05 Jun 2024 08:15:49 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:53c2be00bb6f |
---|---|
1 import argparse | |
2 import logging | |
3 import os | |
4 import sys | |
5 | |
6 from jbrowse2 import JbrowseConnector as jbC | |
7 | |
8 | |
9 logging.basicConfig(level=logging.DEBUG) | |
10 log = logging.getLogger("jbrowse") | |
11 | |
12 if __name__ == "__main__": | |
13 parser = argparse.ArgumentParser(description="", epilog="") | |
14 parser.add_argument("--sessName", help="Session name", default="AutoJBrowse") | |
15 parser.add_argument( | |
16 "--trackmeta", | |
17 help="Repeatable 'filename,filext,filepath,[bai/crai path for filesystem bam/cram]' for JBrowse2 tracks", | |
18 default=[], | |
19 action="append", | |
20 ) | |
21 parser.add_argument( | |
22 "--referencemeta", | |
23 help="Repeatable 'filename, filext, filepath, ... ,' for JBrowse2 reference tracks - usually only one needed", | |
24 default=[], | |
25 action="append", | |
26 ) | |
27 parser.add_argument( | |
28 "--pafmeta", | |
29 help="Repeatable. Each is a 'pafname, filext, filepath, ... ,' for a JBrowse2 paf track", | |
30 default=[], | |
31 action="append", | |
32 ) | |
33 parser.add_argument( | |
34 "--pafreferencemeta", | |
35 help="Repeatable. Each is a 'pafname,refpath,refname' Every pafname must have one or more", | |
36 default=[], | |
37 action="append", | |
38 ) | |
39 parser.add_argument( | |
40 "--jbrowse2path", help="Path to JBrowse2 directory in biocontainer or Conda" | |
41 ) | |
42 parser.add_argument("--outdir", help="Output directory", required=True) | |
43 parser.add_argument("--version", "-V", action="version", version="%(prog)s 2.10.2") | |
44 args = parser.parse_args() | |
45 sessName = args.sessName | |
46 default_session_data = {} | |
47 # --trackmeta $jbrowseme[$key],$jbrowseme[$key].ext,'$key' | |
48 trackList = [x.strip().split(",") for x in args.trackmeta if x > ""] | |
49 refList = [x.strip().split(",") for x in args.referencemeta if x > ""] | |
50 if len(refList) > 0: | |
51 listgenomes = [f for f in refList if f[1] in ["fasta", "fasta.gz"]] | |
52 # assume no pafs here | |
53 if len(listgenomes) > 0: | |
54 genome_paths = [x[0] for x in listgenomes] | |
55 genome_names = [x[2] for x in listgenomes] | |
56 guseuri = [] | |
57 for x in genome_paths: | |
58 if x.startswith("http://") or x.startswith("https://"): | |
59 guseuri.append("yes") | |
60 else: | |
61 guseuri.append("no") | |
62 jc = jbC( | |
63 outdir=args.outdir, | |
64 jbrowse2path=args.jbrowse2path, | |
65 ) | |
66 genomes = ( | |
67 [ | |
68 { | |
69 "path": x, | |
70 "label": genome_names[i], | |
71 "useuri": guseuri[i], | |
72 "meta": { | |
73 "name": genome_names[i], | |
74 "dataset_dname": genome_names[i], | |
75 }, | |
76 } | |
77 for i, x in enumerate(genome_paths) | |
78 ], | |
79 ) | |
80 assref_name = jc.process_genomes(genomes[0]) | |
81 if not default_session_data.get(assref_name, None): | |
82 default_session_data[assref_name] = { | |
83 "tracks": [], | |
84 "style": {}, | |
85 "style_labels": {}, | |
86 "visibility": { | |
87 "default_on": [], | |
88 "default_off": [], | |
89 }, | |
90 } | |
91 listtracks = trackList | |
92 # foo.paf must have a foo_paf.fasta or fasta.gz to match | |
93 tnames = [x[2] for x in listtracks] | |
94 texts = [x[1] for x in listtracks] | |
95 for i, track in enumerate(listtracks): | |
96 track_conf = { | |
97 "trackfiles": [], | |
98 "category": "autogenerated", | |
99 "assemblyNames": assref_name, | |
100 } | |
101 tpath, trext, trackname = track[:3] | |
102 track_conf["dataset_id"] = trackname | |
103 useuri = "no" | |
104 if tpath.startswith("http://") or tpath.startswith("https://"): | |
105 useuri = "yes" | |
106 if trext == "paf": | |
107 refname = trackname + "_paf.fasta" | |
108 refdat = [x[2] for x in listtracks if x[2] == refname] | |
109 if not refdat: | |
110 jc.logging.warn( | |
111 "No reference file %s corresponding to paf file %s found. Not building - there must be a corresponding fasta for each paf" | |
112 % (refname, trackname) | |
113 ) | |
114 sys.exit(3) | |
115 else: | |
116 track_conf.update( | |
117 { | |
118 "conf": { | |
119 "options": { | |
120 "paf": { | |
121 "genome": refdat, | |
122 "genome_label": trackname, | |
123 } | |
124 } | |
125 } | |
126 } | |
127 ) | |
128 elif trext == "bam": | |
129 ipath = track[3] | |
130 if not os.path.exists(ipath): | |
131 ipath = os.path.realpath( | |
132 os.path.join(jc.outdir, trackname + ".bai") | |
133 ) | |
134 cmd = [ | |
135 "samtools", | |
136 "index", | |
137 "-b", | |
138 "-o", | |
139 ipath, | |
140 os.path.realpath(track[0]), | |
141 ] | |
142 sys.stdout.write("#### calling %s" % " ".join(cmd)) | |
143 jc.subprocess_check_call(cmd) | |
144 track_conf.update( | |
145 {"conf": {"options": {"bam": {"bam_index": " %s ~ %s," % (tpath, ipath)}}}} | |
146 ) | |
147 elif trext == "cram": | |
148 ipath = track[3] | |
149 if not os.path.exists(ipath): | |
150 ipath = os.path.realpath( | |
151 os.path.join("./", trackname + ".crai") | |
152 ) | |
153 cmd = [ | |
154 "samtools", | |
155 "index", | |
156 "-c", | |
157 "-o", | |
158 ipath, | |
159 os.path.realpath(track[0]), | |
160 ] | |
161 jc.subprocess_check_call(cmd) | |
162 track_conf.update( | |
163 {"conf": {"options": {"cram": {"cram_index": "%s ~ %s," % (tpath, ipath)}}}} | |
164 ) | |
165 track_conf["path"] = tpath | |
166 track_conf["format"] = trext | |
167 track_conf["name"] = trackname | |
168 track_conf["label"] = trackname | |
169 track_conf["trackfiles"].append((tpath, trext, useuri, trackname, {})) | |
170 keys = jc.process_annotations(track_conf) | |
171 | |
172 if keys: | |
173 for key in keys: | |
174 if trext in [ | |
175 "bigwig", | |
176 "gff", | |
177 "gff3", | |
178 "vcf", | |
179 "maf", | |
180 "bed", | |
181 "hic" | |
182 ]: | |
183 default_session_data[assref_name]["visibility"]["default_on"].append(key) | |
184 else: | |
185 default_session_data[assref_name]["visibility"]["default_off"].append( | |
186 key | |
187 ) | |
188 if trext in ["gff", "gff3", "bed", "vcf", "maf", "blastxml"]: | |
189 ttype = "LinearBasicDisplay" | |
190 if trext == "vcf": | |
191 ttype = "LinearVariantDisplay" | |
192 style_json = { | |
193 "type": ttype, | |
194 "trackShowLabels": False, | |
195 "trackShowDescriptions": False, | |
196 } | |
197 default_session_data[assref_name]["style"][key] = style_json | |
198 default_session_data[assref_name]["tracks"].append(key) | |
199 # general_data = { | |
200 # "analytics": root.find("metadata/general/analytics").text, | |
201 # "primary_color": root.find("metadata/general/primary_color").text, | |
202 # "secondary_color": root.find("metadata/general/secondary_color").text, | |
203 # "tertiary_color": root.find("metadata/general/tertiary_color").text, | |
204 # "quaternary_color": root.find("metadata/general/quaternary_color").text, | |
205 # "font_size": root.find("metadata/general/font_size").text, | |
206 # } | |
207 jc.add_general_configuration({}) | |
208 trackconf = jc.config_json.get("tracks", []) | |
209 for gnome in jc.genome_names: | |
210 trackconf += jc.tracksToAdd[gnome] | |
211 logging.debug( | |
212 "++++ adding trackconf=%s for gnome %s" % (trackconf, gnome) | |
213 ) | |
214 jc.config_json["tracks"] = trackconf | |
215 assconf = jc.config_json.get("assemblies", []) | |
216 assconf += jc.assemblies | |
217 jc.config_json["assemblies"] = assconf | |
218 logging.debug("+++assemblies=%s, gnames=%s" % (assconf, jc.genome_names)) | |
219 jc.write_config() | |
220 default_session_data.update({"session_name": sessName}) | |
221 track_conf.update(default_session_data) | |
222 jc.add_default_session(default_session_data) | |
223 # jc.add_defsess_to_index(default_session_data) | |
224 # jc.text_index() not sure what broke here. | |
225 else: | |
226 sys.stderr.write( | |
227 "Collection has no suitable trackfiles for autogenJB2 - nothing to process" | |
228 ) |