Mercurial > repos > cpt > cpt_mist3_wrapper
comparison cpt_mist3/mist3.py @ 0:1a9603d09814 draft default tip
Uploaded
| author | cpt |
|---|---|
| date | Fri, 17 Jun 2022 02:58:50 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1a9603d09814 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 import argparse | |
| 3 import time | |
| 4 import tempfile | |
| 5 import pprint | |
| 6 import shutil | |
| 7 import os | |
| 8 import math | |
| 9 from string import Template | |
| 10 from Bio import SeqIO | |
| 11 import subprocess | |
| 12 import sys | |
| 13 import logging | |
| 14 | |
| 15 FORMAT = "[%(levelname)s:%(filename)s:%(lineno)s:%(funcName)s()] %(message)s" | |
| 16 logging.basicConfig(level=logging.INFO, format=FORMAT) | |
| 17 log = logging.getLogger("mist") | |
| 18 SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) | |
| 19 | |
| 20 MONTAGE_BORDER = 80 | |
| 21 IMAGE_BORDER = 2 | |
| 22 | |
| 23 MONTAGE_BORDER_COORD = "%sx%s" % (MONTAGE_BORDER, MONTAGE_BORDER) | |
| 24 IMAGE_BORDER_COORD = "%sx%s" % (IMAGE_BORDER, IMAGE_BORDER) | |
| 25 | |
| 26 DOUBLE_IMAGE_BORDER_COORD = "%sx%s" % (2 * IMAGE_BORDER, 2 * IMAGE_BORDER) | |
| 27 | |
| 28 MONTAGE_BORDER_COLOUR = "grey70" | |
| 29 IMAGE_BORDER_COLOUR = "purple" | |
| 30 LABEL_COLOUR = "grey22" | |
| 31 TICK_LENGTH = 0.2 * MONTAGE_BORDER | |
| 32 | |
| 33 TYPEFONT = "/usr/share/fonts/truetype/dejavu/DejaVuSansMono.ttf" | |
| 34 | |
| 35 CREDITS = ( | |
| 36 "CPTs MISTv3\\n" | |
| 37 "GPLv3 (C) 2015 Helena Rasche <esr\@tamu.edu>\\n" | |
| 38 "Dot plots by Gepard Dot Plotter by Dr. Jan Krumsiek" | |
| 39 ) | |
| 40 | |
| 41 | |
| 42 class FancyRecord(object): | |
| 43 def __init__(self, record, tmpdir): | |
| 44 self.temp = tempfile.NamedTemporaryFile(mode='w', dir=tmpdir, delete=False, suffix=".fa") | |
| 45 self.temp_path = self.temp.name | |
| 46 self.id = self.temp_path.rsplit("/")[-1] | |
| 47 self.record = record | |
| 48 self.header = record.id | |
| 49 | |
| 50 # Description include record.id | |
| 51 sd = record.description.strip().split(" ") | |
| 52 if len(sd) > 1: | |
| 53 self.description = " ".join(sd[1:]) | |
| 54 else: | |
| 55 self.description = "" | |
| 56 | |
| 57 self.length = len(record.seq) | |
| 58 | |
| 59 # Serialize to disk | |
| 60 self._write(self.temp) | |
| 61 self.temp.flush() | |
| 62 self.temp.close() | |
| 63 | |
| 64 def _write(self, handle): | |
| 65 SeqIO.write([self.record], handle, "fasta") | |
| 66 | |
| 67 | |
| 68 class Subplot(object): | |
| 69 def __init__(self, i, j, tmpdir, zoom): | |
| 70 self.i = i | |
| 71 self.j = j | |
| 72 # Mist information | |
| 73 self.tmpdir = tmpdir | |
| 74 self.zoom = zoom | |
| 75 | |
| 76 self.original_path = None | |
| 77 self.thumb_path = None | |
| 78 self.annotated_original_path = None | |
| 79 | |
| 80 def safe(self, string): | |
| 81 return "".join( | |
| 82 [ | |
| 83 c | |
| 84 for c in string.strip() | |
| 85 if c.isalpha() or c.isdigit() or c == " " or c in ("[", "]", "-", "_") | |
| 86 ] | |
| 87 ).rstrip() | |
| 88 | |
| 89 def move_to_dir(self, files_path): | |
| 90 """Move a specific image that's part of a subplot to an output | |
| 91 directory and return the name | |
| 92 """ | |
| 93 destination_fn = ( | |
| 94 self.safe("%s_vs_%s_[annotated]" % (self.i.header, self.j.header)) + ".png" | |
| 95 ) | |
| 96 destination = os.path.join(files_path, destination_fn) | |
| 97 log.debug("cp %s %s", self.annotated_original_path, destination) | |
| 98 if ( | |
| 99 self.annotated_original_path is not None | |
| 100 and os.path.exists(self.annotated_original_path) | |
| 101 and not os.path.exists(destination) | |
| 102 ): | |
| 103 shutil.move(self.annotated_original_path, destination) | |
| 104 return destination_fn | |
| 105 | |
| 106 def get_description(self): | |
| 107 return "%s v %s" % (self.i.header, self.j.header) | |
| 108 | |
| 109 def __repr__(self): | |
| 110 return "Subplot [%s]" % self.get_description() | |
| 111 | |
| 112 def run_gepard(self, matrix, window, global_rescale="35%"): | |
| 113 """Run gepard on two sequences, with a specified output file | |
| 114 """ | |
| 115 log.info("Running Gepard on %s", self.get_description()) | |
| 116 | |
| 117 destination_fn = ( | |
| 118 self.safe("%s_vs_%s_orig" % (self.i.header, self.j.header)) + ".png" | |
| 119 ) | |
| 120 self.original_path = os.path.join(self.tmpdir, destination_fn) | |
| 121 | |
| 122 cmd = [ | |
| 123 "java", | |
| 124 "-jar", | |
| 125 os.path.join(SCRIPT_DIR, "gepard.jar"), | |
| 126 "--seq1", | |
| 127 self.j.temp_path, | |
| 128 "--seq2", | |
| 129 self.i.temp_path, | |
| 130 "--matrix", | |
| 131 matrix + ".mat", | |
| 132 "--outfile", | |
| 133 self.original_path, | |
| 134 "--word", | |
| 135 str(window), | |
| 136 "--zoom", | |
| 137 str(self.zoom), | |
| 138 "--silent", | |
| 139 ] | |
| 140 log.debug(subprocess.list2cmdline(cmd)) | |
| 141 #log.info(subprocess.check_output("convert -list type")) | |
| 142 #exit(2) | |
| 143 failure_count = 0 | |
| 144 while True: | |
| 145 try: | |
| 146 subprocess.check_call(cmd) | |
| 147 break | |
| 148 except subprocess.CalledProcessError: | |
| 149 failure_count += 1 | |
| 150 log.warn("sp.CPE FC %s", failure_count) | |
| 151 if failure_count > 3: | |
| 152 break | |
| 153 time.sleep(1) | |
| 154 | |
| 155 # Generate border/individualised images | |
| 156 log.debug(" Annotating") | |
| 157 destination_fn = ( | |
| 158 self.safe("%s_vs_%s_annotated" % (self.i.header, self.j.header)) + ".png" | |
| 159 ) | |
| 160 self.annotated_original_path = os.path.join(self.tmpdir, destination_fn) | |
| 161 self.annotate_image(self.original_path, self.annotated_original_path) | |
| 162 | |
| 163 # Generate thumbnail version of base image | |
| 164 log.debug(" Resizing") | |
| 165 destination_fn = ( | |
| 166 self.safe("%s_vs_%s_thumb" % (self.i.header, self.j.header)) + ".png" | |
| 167 ) | |
| 168 self.thumb_path = os.path.join(self.tmpdir, destination_fn) | |
| 169 Misty.resize_image(global_rescale, self.original_path, self.thumb_path) | |
| 170 | |
| 171 def get_thumb_dims(self): | |
| 172 """ | |
| 173 For NxN sized images, this avoids running `identify` when it won't be | |
| 174 used (e.g i=3,j=4) | |
| 175 """ | |
| 176 if not hasattr(self, "thumb_dims") or self.thumb_dims is None: | |
| 177 self.thumb_dims = Misty.obtain_image_dimensions(self.thumb_path) | |
| 178 return self.thumb_dims | |
| 179 | |
| 180 def label_formatter(self, bases): | |
| 181 if bases > 1000000: | |
| 182 label = "%s Mbp" % int(bases / 1000000) | |
| 183 elif bases > 1000: | |
| 184 label = "%s kbp" % int(bases / 1000) | |
| 185 else: | |
| 186 label = "%s b" % bases | |
| 187 return label | |
| 188 | |
| 189 def annotate_image(self, infile, outfile): | |
| 190 original_dims = Misty.obtain_image_dimensions(infile) | |
| 191 | |
| 192 half_height = (original_dims[1] / 2) + MONTAGE_BORDER + IMAGE_BORDER | |
| 193 half_width = (original_dims[0] / 2) + MONTAGE_BORDER + IMAGE_BORDER | |
| 194 | |
| 195 restoreBorder = MONTAGE_BORDER | |
| 196 | |
| 197 def char_width(font_size): | |
| 198 """approximate pixel width of a single character at Xpt font | |
| 199 | |
| 200 Assumes that 40pt font results in 20px characters | |
| 201 """ | |
| 202 return int(float(font_size) / 2) | |
| 203 | |
| 204 def char_height(font_size): | |
| 205 """approximate pixel height of a single character at Xpt font | |
| 206 | |
| 207 Assumes that 40pt font results in 30px characters | |
| 208 """ | |
| 209 return int(float(font_size) * 30 / 40) | |
| 210 | |
| 211 def est_pixels(string, font_size): | |
| 212 """guess pixel width of a string at a given font size | |
| 213 """ | |
| 214 return char_width(font_size) * len(string) | |
| 215 | |
| 216 j_ticks = int(Misty.BestTick(self.j.length, 5)) | |
| 217 i_ticks = int(Misty.BestTick(self.i.length, 5)) | |
| 218 | |
| 219 # Convert definitions | |
| 220 GREY_FILL = ["-fill", LABEL_COLOUR] | |
| 221 NO_FILL = ["-fill", "none"] | |
| 222 NO_STROKE = ["-stroke", "none"] | |
| 223 GREY_STROKE = ["-stroke", LABEL_COLOUR, "-strokewidth", "2"] | |
| 224 GFNS = GREY_FILL + NO_STROKE | |
| 225 NFGS = NO_FILL + GREY_STROKE | |
| 226 # Font for labels | |
| 227 FONT_SPEC = GFNS + ["-font", TYPEFONT] | |
| 228 FONT_10pt = FONT_SPEC + ["-pointsize", "10"] | |
| 229 FONT_20pt = FONT_SPEC + ["-pointsize", "20"] | |
| 230 FONT_30pt = FONT_SPEC + ["-pointsize", "30"] | |
| 231 | |
| 232 cmd = [ | |
| 233 "convert", | |
| 234 infile, | |
| 235 "-bordercolor", | |
| 236 IMAGE_BORDER_COLOUR, | |
| 237 "-border", | |
| 238 IMAGE_BORDER_COORD, | |
| 239 "-bordercolor", | |
| 240 MONTAGE_BORDER_COLOUR, | |
| 241 "-border", | |
| 242 MONTAGE_BORDER_COORD, | |
| 243 ] | |
| 244 | |
| 245 # Rotate -90, apply row header at bottom. | |
| 246 primary_header = self.i.header | |
| 247 secondary_head = self.i.description | |
| 248 cmd += ( | |
| 249 ["-rotate", "-90",] | |
| 250 + FONT_30pt | |
| 251 + [ | |
| 252 # Side label (i/row) | |
| 253 "-annotate", | |
| 254 "+%s+%s" | |
| 255 % ( | |
| 256 half_height - 0.5 * est_pixels(self.i.header, 30), | |
| 257 original_dims[0] | |
| 258 + MONTAGE_BORDER | |
| 259 + 2 * IMAGE_BORDER | |
| 260 + char_height(30 + 20) | |
| 261 + 10 | |
| 262 # 30 = primary header | |
| 263 # 20 = tick labels | |
| 264 ), | |
| 265 primary_header, | |
| 266 ] | |
| 267 ) | |
| 268 | |
| 269 if est_pixels(self.i.description, 10) < original_dims[1] and secondary_head != "": | |
| 270 cmd += FONT_10pt + [ | |
| 271 # Side label (i/row) | |
| 272 "-annotate", | |
| 273 "+%s+%s" | |
| 274 % ( | |
| 275 half_height - 0.5 * est_pixels(self.i.description, 10), | |
| 276 original_dims[0] | |
| 277 + MONTAGE_BORDER | |
| 278 + 2 * IMAGE_BORDER | |
| 279 + char_height(30 + 20 + 10 + 4) | |
| 280 # 30 = primary header | |
| 281 # 20 = tick labels | |
| 282 # 10 = secondary header height | |
| 283 # 4 = line spacing | |
| 284 ), | |
| 285 secondary_head, | |
| 286 ] | |
| 287 | |
| 288 # Apply row ticks labels at bottom | |
| 289 for z in range(0, self.i.length, i_ticks): | |
| 290 | |
| 291 image_side_percentage = float(z) / self.i.length | |
| 292 x = ( | |
| 293 (image_side_percentage * original_dims[1]) | |
| 294 + MONTAGE_BORDER | |
| 295 + IMAGE_BORDER | |
| 296 ) | |
| 297 y = MONTAGE_BORDER + original_dims[0] + (2 * IMAGE_BORDER) | |
| 298 | |
| 299 # Apply ticks | |
| 300 cmd += NFGS | |
| 301 cmd += [ | |
| 302 "-draw", | |
| 303 "line %s,%s %s,%s" % (x, y, x, y + TICK_LENGTH), | |
| 304 ] | |
| 305 | |
| 306 # Keep text from running off the edge. | |
| 307 space_to_end_of_image = (1 - image_side_percentage) * original_dims[1] | |
| 308 if space_to_end_of_image - est_pixels(self.label_formatter(z), 20) < 0: | |
| 309 continue | |
| 310 | |
| 311 # Text label | |
| 312 cmd += FONT_20pt | |
| 313 cmd += ["-annotate", "+%s+%s" % (x + 5, y + 15), self.label_formatter(z)] | |
| 314 | |
| 315 # Rotate back to final rotation | |
| 316 primary_header = self.j.header | |
| 317 secondary_head = self.j.description | |
| 318 cmd += ( | |
| 319 [ | |
| 320 "-rotate", | |
| 321 "90", | |
| 322 # Top label (j/column) | |
| 323 ] | |
| 324 + FONT_30pt | |
| 325 + [ | |
| 326 "-annotate", | |
| 327 "+%s+%s" | |
| 328 % ( | |
| 329 half_width - 0.5 * est_pixels(self.j.header, 30), | |
| 330 MONTAGE_BORDER - char_height(20 + 10 + 8), | |
| 331 ), | |
| 332 primary_header, | |
| 333 ] | |
| 334 + FONT_10pt | |
| 335 + [ | |
| 336 # Credits | |
| 337 "-annotate", | |
| 338 "+%s+%s" % (2, MONTAGE_BORDER + original_dims[1] + 2 * IMAGE_BORDER), | |
| 339 "\\n" + CREDITS, | |
| 340 ] | |
| 341 ) | |
| 342 | |
| 343 if est_pixels(self.j.description, 10) < original_dims[0] and secondary_head != "": | |
| 344 cmd += FONT_10pt + [ | |
| 345 "-annotate", | |
| 346 "+%s+%s" | |
| 347 % ( | |
| 348 half_width - 0.5 * est_pixels(self.j.description, 10), | |
| 349 MONTAGE_BORDER - char_height(20 + 4) | |
| 350 # 4 = line spacing | |
| 351 ), | |
| 352 secondary_head, | |
| 353 ] | |
| 354 | |
| 355 # Apply col ticks along top | |
| 356 for z in range(0, self.j.length, j_ticks): | |
| 357 image_side_percentage = float(z) / self.j.length | |
| 358 x = ( | |
| 359 (image_side_percentage * original_dims[0]) | |
| 360 + MONTAGE_BORDER | |
| 361 + IMAGE_BORDER | |
| 362 ) | |
| 363 y = MONTAGE_BORDER - 1 | |
| 364 | |
| 365 # Ticks | |
| 366 cmd += NFGS | |
| 367 cmd += [ | |
| 368 "-draw", | |
| 369 "line %s,%s %s,%s" % (x, y, x, y - TICK_LENGTH), | |
| 370 ] | |
| 371 | |
| 372 # Keep text from running off the edge. | |
| 373 space_to_end_of_image = (1 - image_side_percentage) * original_dims[0] | |
| 374 if space_to_end_of_image - est_pixels(self.label_formatter(z), 20) < 0: | |
| 375 continue | |
| 376 | |
| 377 # Text labels | |
| 378 cmd += FONT_20pt | |
| 379 cmd += ["-annotate", "+%s+%s" % (x + 5, y), self.label_formatter(z)] | |
| 380 | |
| 381 cmd.append(outfile) | |
| 382 #tmpFile = open(outfile, "w") | |
| 383 #tmpFile.close() | |
| 384 log.info(subprocess.check_output( ["cp", infile, outfile] )) | |
| 385 log.info(subprocess.list2cmdline(cmd)) | |
| 386 log.info(subprocess.check_output( "ls" )) | |
| 387 log.info(self.tmpdir) | |
| 388 log.info(subprocess.check_output( ["ls", self.tmpdir])) | |
| 389 log.info(outfile[2:]) | |
| 390 log.info("Above was ls\n") | |
| 391 try: | |
| 392 subprocess.check_output(cmd)# + [" 2>&1"]) | |
| 393 except: | |
| 394 log.info("Excepted") | |
| 395 | |
| 396 | |
| 397 | |
| 398 class Misty(object): | |
| 399 """MIST Class for building MIST Plots | |
| 400 """ | |
| 401 | |
| 402 def __init__(self, window=10, zoom=50, matrix="edna", files_path="mist_images"): | |
| 403 self.tmpdir = tempfile.mkdtemp(prefix="cpt.mist3.", dir=".") | |
| 404 self.window = str(window) | |
| 405 self.zoom = zoom | |
| 406 self.matrix = matrix | |
| 407 self.records = [] | |
| 408 self.matrix_data = [] | |
| 409 | |
| 410 # Image storage | |
| 411 self.files_path = files_path | |
| 412 if not os.path.exists(self.files_path): | |
| 413 os.makedirs(self.files_path) | |
| 414 | |
| 415 def _get_record(self, record_id): | |
| 416 for i, record in enumerate(self.records): | |
| 417 if record.id == record_id: | |
| 418 return record | |
| 419 | |
| 420 def _get_record_idx(self, record_id): | |
| 421 for i, record in enumerate(self.records): | |
| 422 if record.id == record_id: | |
| 423 return i | |
| 424 | |
| 425 raise RuntimeError("Could not find record ID=%s" % record_id) | |
| 426 | |
| 427 def register_all_files(self, file_list): | |
| 428 for fasta_file in file_list: | |
| 429 for record in SeqIO.parse(fasta_file, "fasta"): | |
| 430 self.register_record(record) | |
| 431 | |
| 432 def register_record(self, record): | |
| 433 self.records.append(FancyRecord(record, self.tmpdir)) | |
| 434 | |
| 435 def set_matrix(self, matrix): | |
| 436 self.matrix_data = matrix | |
| 437 for i in range(len(self.matrix_data)): | |
| 438 record_i = self._get_record(self.matrix_data[i][0]["i"]) | |
| 439 for j in range(len(self.matrix_data[i])): | |
| 440 record_j = self._get_record(self.matrix_data[i][j]["j"]) | |
| 441 self.matrix_data[i][j]["subplot"] = Subplot( | |
| 442 record_i, record_j, self.tmpdir, self.zoom | |
| 443 ) | |
| 444 | |
| 445 # More processing? | |
| 446 logging.debug("\n" + pprint.pformat(matrix)) | |
| 447 | |
| 448 def generate_matrix(self, mtype="complete"): | |
| 449 matrix = [] | |
| 450 if mtype == "complete": | |
| 451 for i in self.records: | |
| 452 row = [] | |
| 453 for j in self.records: | |
| 454 row.append({"i": i.id, "j": j.id}) | |
| 455 matrix.append(row) | |
| 456 elif mtype == "1vn": | |
| 457 if len(self.records) < 2: | |
| 458 raise RuntimeError("1vN not available for fewer than two sequences") | |
| 459 else: | |
| 460 row = [] | |
| 461 for j in self.records[1:]: | |
| 462 row.append({"i": self.records[0].id, "j": j.id}) | |
| 463 matrix.append(row) | |
| 464 return matrix | |
| 465 | |
| 466 @classmethod | |
| 467 def obtain_image_dimensions(cls, path): | |
| 468 cmd = ["identify", path] | |
| 469 output = subprocess.check_output(cmd, universal_newlines=True) | |
| 470 size = output.split(" ")[3] | |
| 471 (w, h) = size[0 : size.index("+")].split("x") | |
| 472 return (int(w), int(h)) | |
| 473 | |
| 474 @classmethod | |
| 475 def BestTick(cls, largest, mostticks): | |
| 476 # http://stackoverflow.com/a/361687 | |
| 477 minimum = largest / mostticks | |
| 478 magnitude = 10 ** math.floor(math.log(minimum) / math.log(10)) | |
| 479 residual = minimum / magnitude | |
| 480 if residual > 5: | |
| 481 tick = 10 * magnitude | |
| 482 elif residual > 2: | |
| 483 tick = 5 * magnitude | |
| 484 elif residual > 1: | |
| 485 tick = 2 * magnitude | |
| 486 else: | |
| 487 tick = magnitude | |
| 488 return tick | |
| 489 | |
| 490 @classmethod | |
| 491 def resize_image(cls, scale, from_file, to_file): | |
| 492 cmd = ["convert", "-resize", scale, from_file, to_file] | |
| 493 log.debug(" ".join(cmd)) | |
| 494 subprocess.check_call(cmd) | |
| 495 | |
| 496 def get_image_map(self): | |
| 497 image_template = Template( | |
| 498 '<area shape="rect" coords="${x1},${y1},${x2},${y2}" alt="${alt}" href="${href}" />' | |
| 499 ) | |
| 500 imagemap = [] | |
| 501 | |
| 502 j_widths = [] | |
| 503 i_height = [] | |
| 504 | |
| 505 for j in range(len(self.matrix_data[0])): | |
| 506 j_widths.append(self.matrix_data[0][j]["subplot"].get_thumb_dims()[0]) | |
| 507 | |
| 508 for i in range(len(self.matrix_data)): | |
| 509 i_height.append(self.matrix_data[i][0]["subplot"].get_thumb_dims()[1]) | |
| 510 | |
| 511 log.debug(pprint.pformat(j_widths)) | |
| 512 log.debug(pprint.pformat(i_height)) | |
| 513 | |
| 514 def cur_y(i_idx): | |
| 515 return ( | |
| 516 MONTAGE_BORDER | |
| 517 + sum(i_height[0:i_idx]) | |
| 518 + (2 * IMAGE_BORDER * (1 + i_idx)) | |
| 519 ) | |
| 520 | |
| 521 def cur_x(j_idx): | |
| 522 return ( | |
| 523 MONTAGE_BORDER | |
| 524 + sum(j_widths[0:j_idx]) | |
| 525 + (2 * IMAGE_BORDER * (1 + j_idx)) | |
| 526 ) | |
| 527 | |
| 528 for j in range(len(self.matrix_data[0])): | |
| 529 for i in range(len(self.matrix_data)): | |
| 530 current = self.matrix_data[i][j]["subplot"] | |
| 531 # Move to final resting place | |
| 532 new_image_location = current.move_to_dir(self.files_path) | |
| 533 | |
| 534 # Build imagemagp string | |
| 535 imagemap.append( | |
| 536 image_template.substitute( | |
| 537 { | |
| 538 # Start at +image border so the border isn't included in | |
| 539 # start of box | |
| 540 "x1": cur_x(j), | |
| 541 "y1": cur_y(i), | |
| 542 "x2": cur_x(j) + j_widths[j], | |
| 543 "y2": cur_y(i) + i_height[i], | |
| 544 "alt": current.get_description(), | |
| 545 "href": new_image_location, | |
| 546 } | |
| 547 ) | |
| 548 ) | |
| 549 return "\n".join(imagemap) | |
| 550 | |
| 551 def _generate_montage(self): | |
| 552 image_list = [] | |
| 553 for i in range(len(self.matrix_data)): | |
| 554 for j in range(len(self.matrix_data[i])): | |
| 555 subplot = self.matrix_data[i][j]["subplot"] | |
| 556 image_list.append(subplot.thumb_path) | |
| 557 | |
| 558 # Montage step | |
| 559 global MONTAGE_BORDER | |
| 560 global MONTAGE_BORDER_COORD | |
| 561 for rec in self.records: | |
| 562 MONTAGE_BORDER = max(MONTAGE_BORDER, (len(rec.id) * 12) + 4) | |
| 563 MONTAGE_BORDER_COORD = "%sx%s" % (MONTAGE_BORDER, MONTAGE_BORDER) | |
| 564 | |
| 565 m0 = os.path.join(self.tmpdir, "m0.png") | |
| 566 # log.info(subprocess.check_output( ["cp", image_list[0], m0] )) | |
| 567 cmd = ["montage"] + image_list | |
| 568 cmd += [ | |
| 569 "-tile", | |
| 570 "%sx%s" % (len(self.matrix_data[0]), len(self.matrix_data)), | |
| 571 "-geometry", | |
| 572 "+0+0", | |
| 573 "-border", | |
| 574 str(IMAGE_BORDER), | |
| 575 "-bordercolor", | |
| 576 IMAGE_BORDER_COLOUR, | |
| 577 "-font", | |
| 578 TYPEFONT, | |
| 579 m0, | |
| 580 ] | |
| 581 | |
| 582 log.debug(" ".join(cmd)) | |
| 583 try: | |
| 584 subprocess.check_call(cmd) | |
| 585 except: | |
| 586 log.debug("Excepted, 2") | |
| 587 # Add grey borders | |
| 588 montage_path = os.path.join(self.tmpdir, "montage.png") | |
| 589 cmd = [ | |
| 590 "convert", | |
| 591 m0, | |
| 592 "-bordercolor", | |
| 593 IMAGE_BORDER_COLOUR, | |
| 594 "-border", | |
| 595 IMAGE_BORDER_COORD, | |
| 596 "-bordercolor", | |
| 597 MONTAGE_BORDER_COLOUR, | |
| 598 "-border", | |
| 599 MONTAGE_BORDER_COORD, | |
| 600 montage_path, | |
| 601 ] | |
| 602 | |
| 603 log.debug(" ".join(cmd)) | |
| 604 try: | |
| 605 subprocess.check_call(cmd) | |
| 606 except: | |
| 607 log.debug("Excepted, 2") | |
| 608 os.unlink(m0) | |
| 609 return montage_path | |
| 610 | |
| 611 def _annotate_montage(self, base_path): | |
| 612 # Calculate some expected dimension | |
| 613 cumulative_width = 0 | |
| 614 cumulative_height = 0 | |
| 615 for i in range(len(self.matrix_data)): | |
| 616 for j in range(len(self.matrix_data[i])): | |
| 617 subplot = self.matrix_data[i][j]["subplot"] | |
| 618 | |
| 619 if i == 0: | |
| 620 cumulative_width += subplot.get_thumb_dims()[0] + IMAGE_BORDER * 2 | |
| 621 | |
| 622 if j == 0: | |
| 623 cumulative_height += subplot.get_thumb_dims()[1] + IMAGE_BORDER * 2 | |
| 624 | |
| 625 convert_arguments_top = [] | |
| 626 convert_arguments_left = [] | |
| 627 left_offset = cumulative_width + MONTAGE_BORDER | |
| 628 | |
| 629 current_sum_width = MONTAGE_BORDER | |
| 630 current_sum_height = MONTAGE_BORDER | |
| 631 | |
| 632 convert_arguments_top+= [ | |
| 633 "-rotate", | |
| 634 "-90" | |
| 635 ] | |
| 636 # Top side | |
| 637 for j in range(len(self.matrix_data[0])): | |
| 638 subplot = self.matrix_data[0][j]["subplot"] | |
| 639 convert_arguments_top += [ | |
| 640 "-fill", | |
| 641 LABEL_COLOUR, | |
| 642 "-annotate", | |
| 643 "-%s+%s" % (0, str(cumulative_width - current_sum_width -(subplot.get_thumb_dims()[0]/2) + (2 * MONTAGE_BORDER) + IMAGE_BORDER)), | |
| 644 subplot.j.header, | |
| 645 ] | |
| 646 current_sum_width += subplot.get_thumb_dims()[0] + (2 * IMAGE_BORDER) | |
| 647 log.debug("CSW %s", current_sum_width) | |
| 648 convert_arguments_top+= [ | |
| 649 "-rotate", | |
| 650 "90" | |
| 651 ] | |
| 652 # Left side | |
| 653 #convert_arguments_left += [ | |
| 654 # "-rotate", | |
| 655 # "90" | |
| 656 #] | |
| 657 for i in range(len(self.matrix_data)): | |
| 658 subplot = self.matrix_data[i][0]["subplot"] | |
| 659 convert_arguments_left += [ | |
| 660 "-fill", | |
| 661 LABEL_COLOUR, | |
| 662 "-annotate", | |
| 663 "+2+%s" % str(current_sum_height + (subplot.get_thumb_dims()[1]/2.0) + IMAGE_BORDER), | |
| 664 "\n" + subplot.i.header, | |
| 665 ] | |
| 666 current_sum_height += subplot.get_thumb_dims()[1] + (2 * IMAGE_BORDER) | |
| 667 log.debug("CSH %s", current_sum_height) | |
| 668 | |
| 669 cmd = [ | |
| 670 "convert", | |
| 671 base_path, | |
| 672 # "-rotate", | |
| 673 # "-90", | |
| 674 "-pointsize", | |
| 675 "20", | |
| 676 "-font", | |
| 677 TYPEFONT, | |
| 678 ] | |
| 679 cmd += convert_arguments_left | |
| 680 # cmd += ["-rotate", "90"] | |
| 681 cmd += convert_arguments_top | |
| 682 | |
| 683 output_path = os.path.join(self.tmpdir, "large.png") | |
| 684 cmd += [ | |
| 685 "-pointsize", | |
| 686 "14", | |
| 687 "-annotate", | |
| 688 "+2+%s" % str(current_sum_height + 40), | |
| 689 CREDITS, | |
| 690 output_path, | |
| 691 ] | |
| 692 log.debug(" ".join(cmd)) | |
| 693 try: | |
| 694 subprocess.check_call(cmd) | |
| 695 except: | |
| 696 log.debug("Excepted, 3") | |
| 697 #subprocess.check_output(cmd) | |
| 698 return output_path | |
| 699 | |
| 700 def run(self): | |
| 701 # We want the final thumbnail for the overall image to have a max width/height | |
| 702 # of 700px for nice display | |
| 703 total_seqlen_j = 0 | |
| 704 total_seqlen_i = 0 | |
| 705 for j in range(len(self.matrix_data[0])): | |
| 706 total_seqlen_j += self.matrix_data[0][j]["subplot"].j.length | |
| 707 | |
| 708 for i in range(len(self.matrix_data)): | |
| 709 total_seqlen_i += self.matrix_data[i][0]["subplot"].i.length | |
| 710 total_seqlen = max(total_seqlen_i, total_seqlen_j) | |
| 711 rescale = 200 * (700.0 / (float(total_seqlen) / self.zoom)) | |
| 712 rescale_p = "%s%%" % rescale | |
| 713 | |
| 714 # Generate gepard plots for each of the sub-images | |
| 715 for i in range(len(self.matrix_data)): | |
| 716 for j in range(len(self.matrix_data[i])): | |
| 717 subplot = self.matrix_data[i][j]["subplot"] | |
| 718 | |
| 719 # generates _orig and _thumb versions | |
| 720 subplot.run_gepard(self.matrix, self.window, global_rescale=rescale_p) | |
| 721 | |
| 722 base_montage = self._generate_montage() | |
| 723 annotated_montage = self._annotate_montage(base_montage) | |
| 724 | |
| 725 final_montage_path = os.path.join(self.files_path, "large.png") | |
| 726 shutil.move(annotated_montage, final_montage_path) | |
| 727 return final_montage_path | |
| 728 | |
| 729 | |
| 730 def mist_wrapper( | |
| 731 files, | |
| 732 window=10, | |
| 733 zoom=50, | |
| 734 matrix="edna", | |
| 735 plot_type="complete", | |
| 736 files_path="mist_images", | |
| 737 ): | |
| 738 html_page = """ | |
| 739 <!DOCTYPE html> | |
| 740 <html> | |
| 741 <body> | |
| 742 <h1>Mist Results</h1> | |
| 743 <p>Each section of mist output is now clickable to view a higher resolution version of that subsection</p> | |
| 744 <img src="large.png" usemap="#mistmap"> | |
| 745 <map name="mistmap"> | |
| 746 %s | |
| 747 </map> | |
| 748 </body> | |
| 749 </html> | |
| 750 """ | |
| 751 | |
| 752 m = Misty(window=window, zoom=zoom, matrix=matrix, files_path=files_path) | |
| 753 | |
| 754 # There is only one special case, so we handle that separately. Every other | |
| 755 # plot type wants ALL of the sequences available. | |
| 756 if ( | |
| 757 plot_type == "2up" | |
| 758 and len(files) != 2 | |
| 759 and matrix not in ("protidentity", "blosum62") | |
| 760 ): | |
| 761 idx = 0 | |
| 762 # Pull top two sequences. | |
| 763 for fasta_file in files: | |
| 764 for record in SeqIO.parse(fasta_file, "fasta"): | |
| 765 m.register_record(record) | |
| 766 | |
| 767 # Exit after we've seen two sequences | |
| 768 idx += 1 | |
| 769 if idx == 2: | |
| 770 break | |
| 771 else: | |
| 772 m.register_all_files(files) | |
| 773 | |
| 774 # Generate the matrix appropriate to this plot type. There are two matrix | |
| 775 # types: 1vN and complete. 1vN is just a line, complete is a complete | |
| 776 # square. | |
| 777 if plot_type == "complete": | |
| 778 # ALL sequences are used. | |
| 779 m.set_matrix(m.generate_matrix(mtype="complete")) | |
| 780 elif plot_type == "2up": | |
| 781 # Just two sequences. | |
| 782 if len(files) == 2 and matrix in ("protidentity", "blosum62"): | |
| 783 m.set_matrix(m.generate_matrix(mtype="complete")) | |
| 784 else: | |
| 785 # Co-op the 1vn to do a single plot, rather than a "complete" plot | |
| 786 m.set_matrix(m.generate_matrix(mtype="1vn")) | |
| 787 elif plot_type == "1vn": | |
| 788 m.set_matrix(m.generate_matrix(mtype="1vn")) | |
| 789 else: | |
| 790 raise ValueError("Unknown plot type %s" % plot_type) | |
| 791 | |
| 792 m.run() | |
| 793 # image_map will be returned from MIST | |
| 794 image_map = m.get_image_map() | |
| 795 | |
| 796 return html_page % image_map | |
| 797 | |
| 798 | |
| 799 if __name__ == "__main__": | |
| 800 parser = argparse.ArgumentParser(description="MIST v3", epilog="") | |
| 801 parser.add_argument( | |
| 802 "files", nargs="+", type=argparse.FileType("r"), help="Fasta sequences" | |
| 803 ) | |
| 804 | |
| 805 parser.add_argument("--zoom", type=int, help="# bases / pixel", default=50) | |
| 806 parser.add_argument("--window", type=int, help="Window size", default=10) | |
| 807 parser.add_argument( | |
| 808 "--matrix", | |
| 809 type=str, | |
| 810 choices=["ednaorig", "pam250", "edna", "protidentity", "blosum62"], | |
| 811 help="# bases / pixel", | |
| 812 default="edna", | |
| 813 ) | |
| 814 | |
| 815 parser.add_argument( | |
| 816 "--plot_type", | |
| 817 choices=["2up", "1vn", "complete"], | |
| 818 help="Plot type", | |
| 819 default="complete", | |
| 820 ) | |
| 821 | |
| 822 parser.add_argument( | |
| 823 "--files_path", type=str, help="Image directory", default="mist_images" | |
| 824 ) | |
| 825 | |
| 826 args = parser.parse_args() | |
| 827 print(mist_wrapper(**vars(args))) |
