comparison sm_api_wrapper.py @ 0:0c3f56c85e98 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/main/tools/swissmodel_modelling_api commit 43b5bef8757185b4c077effd0bad846f25d408db
author iuc
date Thu, 11 Dec 2025 19:32:14 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:0c3f56c85e98
1 """Wrapper for the SWISS-MODEL API."""
2
3 import argparse
4 import json
5 import os
6 import sys
7 import time
8 from urllib.parse import urlsplit
9
10 import requests
11
12
13 class _SmApiWhisperer:
14 """Parent class for talking to the SWISS-MODEL API."""
15
16 PROJECT_TYPE = ""
17
18 def __init__(self, targets, token, project_title="Untitled Project"):
19 self.project_id = None
20 self.project_title = project_title
21 self.targets = targets
22 self.token = token
23
24 def get_json_payload(self):
25 """Needs to be implemented per project type."""
26 raise NotImplementedError
27
28 def submit_request(self):
29 """Send off a request to the SM API."""
30 json_payload = self.get_json_payload()
31 json_payload["project_title"] = self.project_title
32 try:
33 response = requests.post(
34 f"https://swissmodel.expasy.org/{self.PROJECT_TYPE}",
35 headers={"Authorization": f"Token {self.token}"},
36 json=json_payload,
37 timeout=60,
38 )
39 except requests.exceptions.ConnectTimeout:
40 print(
41 "SWISS-MODEL seems to temporarily unavailable",
42 file=sys.stderr,
43 )
44 sys.exit(3)
45 if response.ok is not True:
46 raise RuntimeError(
47 f"Submitting modelling job failed ({response.status_code})"
48 )
49 self.project_id = response.json()["project_id"]
50
51 return response.status_code
52
53 def wait(self):
54 """Poll the API for job to be finished."""
55 response = None
56 # Wait at the end, there is a chance that this project is already
57 # available from cache.
58 while True:
59 # Update the status from the server
60 # response = requests.get(
61 # f"https://swissmodel.expasy.org/project/{self.project_id}/"
62 # + "models/summary/",
63 # headers={"Authorization": f"Token {self.token}"},
64 # timeout=360,
65 # )
66 response = requests.get(
67 f"https://swissmodel.expasy.org/project/{self.project_id}/"
68 + "models/full-details/",
69 headers={"Authorization": f"Token {self.token}"},
70 timeout=360,
71 )
72 # Update the status
73 status = response.json()["status"]
74 if status.upper() in ["COMPLETED", "FAILED"]:
75 break
76 # Wait for some time before the next request
77 time.sleep(17)
78
79 return response.json()
80
81 def fetch_results(
82 self, response_object, output_dir, fetch_modelcif=True, fetch_pdb=True
83 ):
84 """Get results of the modelling job."""
85
86 def _store_model_json(model_json, outdir):
87 fname = f"model_{model_json['model_id']}.json"
88 with open(
89 os.path.join(outdir, "JSON", fname), "w", encoding="utf8"
90 ) as jfh:
91 json.dump(model_json, jfh)
92
93 def _fetch_file(url, file_type, outdir):
94 response = requests.get(url, timeout=360)
95 if response.ok is not True:
96 raise RuntimeError(
97 f"Fetching {file_type} output failed ("
98 + f"{response.status_code})."
99 )
100 try:
101 os.mkdir(os.path.join(outdir, file_type))
102 except FileExistsError:
103 pass
104 fname = f"model_{os.path.basename(urlsplit(url).path)}"
105 with open(os.path.join(outdir, file_type, fname), "wb") as mfh:
106 for chunk in response.iter_content(chunk_size=8192):
107 mfh.write(chunk)
108
109 # make sure a JSON directory exists
110 os.mkdir(os.path.join(output_dir, "JSON"))
111 if response_object["status"] == "COMPLETED":
112 for model in response_object["models"]:
113 _store_model_json(model, output_dir)
114 if fetch_modelcif:
115 _fetch_file(model["modelcif_url"], "ModelCIF", output_dir)
116 if fetch_pdb:
117 _fetch_file(model["coordinates_url"], "PDB", output_dir)
118
119
120 class _AutoModelWhisperer(_SmApiWhisperer):
121 """SM automodel project."""
122
123 PROJECT_TYPE = "automodel"
124
125 def get_json_payload(self):
126 """Payload for automodel mode."""
127 return {"target_sequences": self.targets}
128
129
130 class _AlignmentWhisperer(_SmApiWhisperer):
131 """SM alignemt project."""
132
133 PROJECT_TYPE = "alignment"
134
135 def __init__(
136 self,
137 targets,
138 token,
139 template_sequence,
140 template_seqres_offset,
141 pdb_id,
142 auth_asym_id,
143 assembly_id,
144 project_title="Untitled Project",
145 ):
146 # Not sure how to reduce the number of arguments as they are required
147 # by the API, so make an exception in Pylint.
148 # pylint: disable=too-many-arguments,too-many-positional-arguments
149 """Initialise alignment mode, add mode-specific info to the method."""
150 super().__init__(targets, token, project_title=project_title)
151 self.assembly_id = assembly_id
152 self.auth_asym_id = auth_asym_id
153 self.pdb_id = pdb_id.lower()
154 self.template_seqres_offset = template_seqres_offset
155 self.template_sequence = template_sequence
156
157 def get_json_payload(self):
158 """Payload for alignment mode."""
159
160 return {
161 "assembly_id": self.assembly_id,
162 "auth_asym_id": self.auth_asym_id,
163 "pdb_id": self.pdb_id,
164 "target_sequences": self.targets,
165 "template_seqres_offset": self.template_seqres_offset,
166 "template_sequence": self.template_sequence,
167 }
168
169
170 class _UserTemplateWhisperer(_SmApiWhisperer):
171 """SM user-template project."""
172
173 PROJECT_TYPE = "user_template"
174
175 def __init__(
176 self,
177 targets,
178 token,
179 template_file,
180 project_title="Untitled Project",
181 ):
182 """Initialise user template mode."""
183 super().__init__(targets, token, project_title=project_title)
184 self.template_file = template_file
185
186 def get_json_payload(self):
187 """Payload for user upload mode."""
188 with open(self.template_file, encoding="utf8") as tfh:
189 template_coordinates = tfh.read()
190
191 return {
192 "project_title": self.project_title,
193 "target_sequences": self.targets,
194 "template_coordinates": template_coordinates,
195 }
196
197
198 def _defastarise_targets(sequences):
199 """In case some of the targets carry FastA headers, remove them."""
200 targets = []
201 for seq in sequences:
202 seq = seq.split(" ")
203 if len(seq) > 1:
204 if seq[0].strip().startswith((">", "__gt__")):
205 targets.append("".join(seq[1:]))
206 else:
207 targets.append("".join(seq))
208 else:
209 targets.extend(seq)
210
211 return targets
212
213
214 def _parse_args():
215 """Get command line arguments."""
216 parser = argparse.ArgumentParser(description=__doc__)
217
218 parser.add_argument(
219 "-d",
220 "--project-title",
221 help="Title for the modelling project",
222 metavar="<TITLE>",
223 )
224 parser.add_argument(
225 "-m",
226 "--no-modelcif",
227 help="Do not download models in ModelCIF format.",
228 default=False,
229 action="store_true",
230 )
231 parser.add_argument(
232 "-l",
233 "--fetch-pdb",
234 help="Download models in PDB legacy format.",
235 default=False,
236 action="store_true",
237 )
238 parser.add_argument(
239 "-t",
240 "--template-sequence",
241 help="The template sequence used for alignment mode",
242 metavar="<SEQUENCE>",
243 )
244 # ToDo: do we need the offset from the user? Doesn't interactive alignment
245 # mode compute it?
246 parser.add_argument(
247 "-o",
248 "--template-seqres-offset",
249 help="Offset of the template sequence segment compared to the full "
250 + "template sequence",
251 metavar="<NUMBER>",
252 type=int,
253 )
254 parser.add_argument(
255 "-p",
256 "--pdb-id",
257 help="PDB ID (SMTL ID) for the template used in alignment mode",
258 metavar="<PDB ID>",
259 )
260 parser.add_argument(
261 "-c",
262 "--auth-asym-id",
263 help="The chain name to be used in alignment mode",
264 metavar="<CHAIN NAME>",
265 )
266 parser.add_argument(
267 "-a",
268 "--assembly-id",
269 help="ID of the assembly of the SMTL template to be used in alignment "
270 + "mode",
271 metavar="<NUMBER>",
272 type=int,
273 )
274 parser.add_argument(
275 "-f",
276 "--template-file",
277 help="PDB formatted file to serve as template for modelling",
278 metavar="<PDB FILE>",
279 )
280 parser.add_argument(
281 "project_type",
282 choices=("alignment", "automodel", "usertemplate"),
283 help="Kind of project ('alignmet', 'automodel', 'usertemplate')",
284 metavar="<PROJECT TYPE>",
285 )
286 metas = {
287 "outdir": "<OUTPUT DIRECTORY>",
288 "target_sequences": "<SEQUENCE[S]>",
289 "token": "<TOKEN>",
290 }
291 parser.add_argument(
292 "token",
293 help="Authentication token for SWISS-MODEL",
294 metavar=metas["token"],
295 )
296 parser.add_argument(
297 "outdir",
298 help="Directory to store results in",
299 metavar=metas["outdir"],
300 )
301 parser.add_argument(
302 "target_sequences",
303 help="Target sequence to be modelled; to add multiple sequences, "
304 + "delimit with a space",
305 metavar=metas["target_sequences"],
306 nargs=argparse.REMAINDER,
307 )
308
309 opts = parser.parse_args()
310
311 # Make sure arguments for the different modelling modes are there
312 req_opts = {
313 "alignment": [
314 "assembly_id",
315 "auth_asym_id",
316 "pdb_id",
317 "template_seqres_offset",
318 "template_sequence",
319 ],
320 "automodel": [],
321 "usertemplate": ["template_file"],
322 }
323 # check mandatory arguments
324 for req in req_opts[opts.project_type]:
325 value = getattr(opts, req)
326 if value is None:
327 print(
328 f"Option '--{req.replace('_', '-')}' missing for "
329 + f"'{opts.project_type}' mode",
330 file=sys.stderr,
331 )
332 sys.exit(2)
333 if isinstance(value, str) and len(value) == 0:
334 print(
335 f"Option '--{req.replace('_', '-')}' can not be an empty "
336 + "string",
337 file=sys.stderr,
338 )
339 sys.exit(2)
340 # check positional arguments
341 for req, mta in metas.items():
342 value = getattr(opts, req)
343 if isinstance(value, str):
344 if len(value) == 0:
345 print(
346 f"Argument of '{mta}' can not be an empty string",
347 file=sys.stderr,
348 )
349 sys.exit(2)
350 elif isinstance(value, list):
351 if len(value) == 0 or not all(value):
352 print(
353 f"Argument of '{mta}' can not be an empty",
354 file=sys.stderr,
355 )
356 sys.exit(2)
357 else:
358 raise RuntimeError(
359 f"Value with unknown type '{type(value).__name__}' found for "
360 + f"'{mta}'"
361 )
362 # check optional & positional arguments
363 for opt in ["project_title"]:
364 value = getattr(opts, opt)
365 if value is not None and len(value) == 0:
366 print(
367 f"Option '--{opt.replace('_', '-')}' can not have an empty "
368 + "string as value",
369 file=sys.stderr,
370 )
371 sys.exit(2)
372
373 return opts
374
375
376 def _main():
377 """Run as script."""
378 opts = _parse_args()
379
380 target_sequences = _defastarise_targets(opts.target_sequences)
381 # determine class
382 whsprr = None
383 if opts.project_type.lower() == "automodel":
384 whsprr = _AutoModelWhisperer(
385 target_sequences, opts.token, project_title=opts.project_title
386 )
387 elif opts.project_type.lower() == "alignment":
388 template_sequence = _defastarise_targets([opts.template_sequence])
389 assert len(template_sequence) == 1
390 template_sequence = template_sequence[0]
391 whsprr = _AlignmentWhisperer(
392 target_sequences,
393 opts.token,
394 template_sequence,
395 opts.template_seqres_offset,
396 opts.pdb_id,
397 opts.auth_asym_id,
398 opts.assembly_id,
399 project_title=opts.project_title,
400 )
401 elif opts.project_type.lower() == "usertemplate":
402 whsprr = _UserTemplateWhisperer(
403 target_sequences,
404 opts.token,
405 opts.template_file,
406 project_title=opts.project_title,
407 )
408 else:
409 raise RuntimeError(
410 f"Not a suitable project type: '{opts.project_type}'"
411 )
412 # run the modelling job and wait for it to finish
413 whsprr.submit_request()
414 response = whsprr.wait()
415 whsprr.fetch_results(
416 response,
417 opts.outdir,
418 fetch_modelcif=not opts.no_modelcif,
419 fetch_pdb=opts.fetch_pdb,
420 )
421
422 sys.exit(0)
423
424
425 if __name__ == "__main__":
426 _main()
427
428 # LocalWords: Pylint