comparison scripts/hyphy_summary.py @ 29:86e10267b4cc draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit ec5db8349483b2cd46f9da23abe6cefcf65bc714"
author iuc
date Sat, 03 Jul 2021 09:00:32 +0000
parents 50c12e254867
children
comparison
equal deleted inserted replaced
28:50c12e254867 29:86e10267b4cc
105 def cfel_summary(self): 105 def cfel_summary(self):
106 self.cfel = self._load_json(self.arguments.cfel) 106 self.cfel = self._load_json(self.arguments.cfel)
107 if self.cfel is None: 107 if self.cfel is None:
108 return 108 return
109 node_tags = {} 109 node_tags = {}
110 _ = self._newick_parser(self.cfel['input']['trees']['0'], False, node_tags, self.cfel)['json'] 110 _ = newick_parser(self.cfel['input']['trees']['0'], False, node_tags, self.cfel, self.arguments, self.labels)['json']
111 if self.summary_json is not None: 111 if self.summary_json is not None:
112 omegas = {} 112 omegas = {}
113 T = {} 113 T = {}
114 for k in [[k.split('*')[1], v[0][0]] for k, v in self.cfel['fits']['Global MG94xREV']['Rate Distributions'].items()]: 114 for k in [[k.split('*')[1], v[0][0]] for k, v in self.cfel['fits']['Global MG94xREV']['Rate Distributions'].items()]:
115 if k[0] != 'background': 115 if k[0] != 'background':
189 189
190 def def_value(): 190 def def_value():
191 return defaultdict(int) 191 return defaultdict(int)
192 compressed_subs = {} 192 compressed_subs = {}
193 node_tags = {} 193 node_tags = {}
194 the_tree = self._newick_parser(self.slac['input']['trees']['0'], False, node_tags, self.slac)['json'] 194 the_tree = newick_parser(self.slac['input']['trees']['0'], False, node_tags, self.slac, self.arguments, self.labels)
195 root_node = None 195 root_node = None
196 if self.summary_json is not None: 196 if self.summary_json is not None:
197 for branch, info in self.slac['branch attributes']['0'].items(): 197 for branch, info in self.slac['branch attributes']['0'].items():
198 if branch in node_tags: 198 if branch in node_tags:
199 node_tags[branch].append(info['Global MG94xREV']) 199 node_tags[branch].append(info['Global MG94xREV'])
208 counts_aa_site = {} 208 counts_aa_site = {}
209 gs = self._get_genomic_annotation(i) 209 gs = self._get_genomic_annotation(i)
210 if gs[0] >= 0: 210 if gs[0] >= 0:
211 self.labels[root_node] = self.slac['branch attributes']['0'][root_node]['codon'][0][i] 211 self.labels[root_node] = self.slac['branch attributes']['0'][root_node]['codon'][0][i]
212 try: 212 try:
213 self._traverse_tree_in_order(the_tree, self.slac['branch attributes']['0'], i, None, root_node) 213 traverse_tree_in_order(the_tree, self.labels, self.slac['branch attributes']['0'], i, None, root_node)
214 except Exception: 214 except Exception:
215 raise 215 pass
216 compressed_subs[gs[0]] = self.labels 216 compressed_subs[gs[0]] = self.labels
217 for k in set([k[0] for k in node_tags.values()]): 217 for k in set([k[0] for k in node_tags.values()]):
218 if len(k): 218 if len(k):
219 counts_codon_site[k] = defaultdict(int) 219 counts_codon_site[k] = defaultdict(int)
220 counts_aa_site[k] = defaultdict(int) 220 counts_aa_site[k] = defaultdict(int)
431 gene_name = 'Not mapped' 431 gene_name = 'Not mapped'
432 else: 432 else:
433 gene_name = 'N/A' 433 gene_name = 'N/A'
434 return (genomic_site_coord, gene_name, gene_site) 434 return (genomic_site_coord, gene_name, gene_site)
435 435
436 def _traverse_tree_in_order(self, node, slac_data, i, parent_tag, root):
437 node_tag = None
438 if node is None:
439 return
440 try:
441 nn = root if node['name'] == 'root' else node['name']
442 except Exception:
443 raise
444 if nn in slac_data:
445 node_tag = slac_data[nn]['codon'][0][i]
446 if (parent_tag != node_tag):
447 self.labels[nn] = node_tag
448 self.labels[node['name']] = node_tag
449 if 'children' in node:
450 for c in node['children']:
451 if c is not None:
452 if 'name' in c:
453 self._traverse_tree_in_order(c, slac_data, i, node_tag, root)
454
455 def _match_node_names(self, qry_node, ref_node, mapping):
456 if 'children' in qry_node and 'children' in ref_node:
457 mapping[ref_node['name']] = qry_node['name']
458 if len(qry_node['children']) != len(ref_node['children']):
459 raise Exception('Internal topology mismatch')
460 for i, n in enumerate(ref_node['children']):
461 self._match_node_names(qry_node['children'][i], n, mapping)
462 elif 'children' in qry_node:
463 raise Exception('Topology mismatch')
464 elif 'children' in ref_node:
465 raise Exception('Topology mismatch')
466 else:
467 if qry_node['name'] != ref_node['name']:
468 raise Exception('Leaf name mismatch')
469
470 def _get_incoming_labels(self): 436 def _get_incoming_labels(self):
471 json_data = self._load_json(self.arguments.labels) 437 json_data = self._load_json(self.arguments.labels)
472 self.incoming_labels = json_data 438 self.incoming_labels = json_data
473 439
474 def _newick_parser(self, nwk_str, bootstrap_values, track_tags, json_map): 440
475 clade_stack = [] 441 def traverse_tree_in_order(node, labels, slac_data, i, parent_tag, root):
476 automaton_state = 0 442 node_tag = None
477 current_node_name = '' 443 if 'name' not in node:
478 current_node_attribute = '' 444 nn = root
479 current_node_annotation = '' 445 else:
480 quote_delimiter = None 446 nn = root if node["name"] == 'root' else node["name"]
481 name_quotes = {"'": 1, '"': 1} 447 if nn in slac_data:
482 448 node_tag = slac_data[nn]["codon"][0][i]
483 def add_new_tree_level(): 449 if (parent_tag != node_tag):
484 new_level = {'name': None} 450 labels[nn] = node_tag
485 the_parent = clade_stack[len(clade_stack) - 1] 451 labels[node["name"]] = node_tag
486 if ('children' not in the_parent): 452 if "children" in node:
487 the_parent['children'] = [] 453 for c in node["children"]:
488 clade_stack.append(new_level) 454 traverse_tree_in_order(c, labels, slac_data, i, node_tag, root)
489 the_parent['children'].append(clade_stack[len(clade_stack) - 1]) 455
490 clade_stack[len(clade_stack) - 1]['original_child_order'] = len(the_parent['children']) 456
491 457 def newick_parser(nwk_str, bootstrap_values, track_tags, json_map, import_settings, tags):
492 def finish_node_definition(): 458 clade_stack = []
493 nonlocal current_node_name 459 automaton_state = 0
494 nonlocal current_node_annotation 460 current_node_name = ""
495 nonlocal current_node_attribute 461 current_node_attribute = ""
496 this_node = clade_stack.pop() 462 current_node_annotation = ""
497 if (bootstrap_values and 'children' in this_node): 463 quote_delimiter = None
498 this_node['bootstrap_values'] = current_node_name 464 name_quotes = {
465 "'": 1,
466 '"': 1
467 }
468
469 def add_new_tree_level():
470 new_level = {
471 "name": None
472 }
473 the_parent = clade_stack[len(clade_stack) - 1]
474 if "children" not in the_parent:
475 the_parent["children"] = []
476
477 clade_stack.append(new_level)
478 the_parent["children"].append(clade_stack[len(clade_stack) - 1])
479 clade_stack[len(clade_stack) - 1]["original_child_order"] = len(the_parent["children"])
480
481 def finish_node_definition():
482 nonlocal current_node_name
483 nonlocal current_node_annotation
484 nonlocal current_node_attribute
485
486 this_node = clade_stack.pop()
487 if (bootstrap_values and "children" in this_node):
488 this_node["bootstrap_values"] = current_node_name
489 else:
490 this_node["name"] = current_node_name
491
492 this_node["attribute"] = current_node_attribute
493 this_node["annotation"] = current_node_annotation
494
495 try:
496
497 if 'children' not in this_node:
498 node_tag = import_settings.default_tag
499 if json_map:
500 tn = json_map["branch attributes"]["0"][this_node["name"]]
501 else:
502 tn = this_node
503 nn = tn["original name"] if "original name" in tn else tn["name"]
504 for k, v in tags.items():
505 if nn.find(k) >= 0:
506 node_tag = v
507 break
499 else: 508 else:
500 this_node['name'] = current_node_name 509 counts = {}
501 this_node['attribute'] = current_node_attribute 510 node_tag = ""
502 this_node['annotation'] = current_node_annotation 511 for n in this_node['children']:
503 try: 512 counts[n["tag"]] = 1 + (counts[n["tag"]] if n["tag"] in counts else 0)
504 if 'children' not in this_node: 513 if len(counts) == 1:
505 node_tag = self.arguments.default_tag 514 node_tag = list(counts.keys())[0]
506 if json_map: 515
507 tn = json_map['branch attributes']['0'][this_node['name']] 516 this_node["tag"] = node_tag
517 except Exception as e:
518 pass
519 print(e)
520
521 if track_tags is not None:
522 track_tags[this_node["name"]] = [this_node["tag"], 'children' in this_node]
523
524 current_node_name = ""
525 current_node_attribute = ""
526 current_node_annotation = ""
527
528 def generate_error(location):
529 return {
530 'json': None,
531 'error':
532 "Unexpected '%s' in '%s[ERROR HERE]%s'" % (nwk_str[location], nwk_str[location - 20:location + 1], nwk_str[location + 1:location + 20])
533 }
534
535 tree_json = {
536 "name": "root"
537 }
538
539 clade_stack.append(tree_json)
540
541 space = re.compile(r"\s")
542
543 for char_index in range(len(nwk_str)):
544 try:
545 current_char = nwk_str[char_index]
546 if automaton_state == 0:
547 # look for the first opening parenthesis
548 if (current_char == "("):
549 add_new_tree_level()
550 automaton_state = 1
551 elif automaton_state == 1 or automaton_state == 3:
552 # case 1: // name
553 # case 3: { // branch length
554 # reading name
555 if (current_char == ":"):
556 automaton_state = 3
557 elif current_char == "," or current_char == ")":
558 try:
559 finish_node_definition()
560 automaton_state = 1
561 if (current_char == ","):
562 add_new_tree_level()
563 except Exception as e:
564 return generate_error(char_index)
565 print(e)
566
567 elif (current_char == "("):
568 if len(current_node_name) > 0:
569 return generate_error(char_index)
508 else: 570 else:
509 tn = this_node
510 nn = tn['original name'] if 'original name' in tn else tn['name']
511 for k, v in self.incoming_labels.items():
512 if nn.find(k) >= 0:
513 node_tag = v
514 break
515 else:
516 counts = {}
517 node_tag = ''
518 for n in this_node['children']:
519 counts[n['tag']] = 1 + (counts[n['tag']] if n['tag'] in counts else 0)
520 if len(counts) == 1:
521 node_tag = list(counts.keys())[0]
522 this_node['tag'] = node_tag
523 except Exception:
524 raise
525 if track_tags is not None:
526 track_tags[this_node['name']] = [this_node['tag'], 'children' in this_node]
527 current_node_name = ''
528 current_node_attribute = ''
529 current_node_annotation = ''
530
531 def generate_error(location):
532 unexpected = nwk_str[location]
533 before = nwk_str[location - 20:location + 1]
534 after = nwk_str[location + 1:location + 20]
535 return {
536 'json': None,
537 'error': 'Unexpected %s in %s [ERROR HERE] %s' % (unexpected, before, after)
538 }
539 tree_json = {'name': 'root'}
540 clade_stack.append(tree_json)
541 space = re.compile(r'\s')
542 for char_index in range(len(nwk_str)):
543 try:
544 current_char = nwk_str[char_index]
545 if automaton_state == 0:
546 # look for the first opening parenthesis
547 if (current_char == '('):
548 add_new_tree_level() 571 add_new_tree_level()
549 automaton_state = 1 572
550 elif automaton_state == 1 or automaton_state == 3: 573 elif (current_char in name_quotes):
551 # case 1: // name 574 if automaton_state == 1 and len(current_node_name) == 0 and len(current_node_attribute) == 0 and len(current_node_annotation) == 0:
552 # case 3: { // branch length 575 automaton_state = 2
553 # reading name 576 quote_delimiter = current_char
554 if (current_char == ':'): 577 continue
555 automaton_state = 3 578 return generate_error(char_index)
556 elif current_char == ',' or current_char == ')': 579 else:
557 try: 580 if (current_char == "["):
558 finish_node_definition() 581 if len(current_node_annotation):
559 automaton_state = 1
560 if (current_char == ','):
561 add_new_tree_level()
562 except Exception:
563 return generate_error(char_index)
564 elif (current_char == '('):
565 if len(current_node_name) > 0:
566 return generate_error(char_index) 582 return generate_error(char_index)
567 else: 583 else:
568 add_new_tree_level() 584 automaton_state = 4
569 elif (current_char in name_quotes): 585 else:
570 if automaton_state == 1 and len(current_node_name) == 0 and len(current_node_attribute) == 0 and len(current_node_annotation) == 0: 586 if (automaton_state == 3):
571 automaton_state = 2 587 current_node_attribute += current_char
572 quote_delimiter = current_char 588 else:
589 if (space.search(current_char)):
590 continue
591 if (current_char == ""):
592 char_index = len(nwk_str)
593 break
594 current_node_name += current_char
595 elif automaton_state == 2:
596 # inside a quoted expression
597 if (current_char == quote_delimiter):
598 if (char_index < len(nwk_str - 1)):
599 if (nwk_str[char_index + 1] == quote_delimiter):
600 char_index += 1
601 current_node_name += quote_delimiter
573 continue 602 continue
603
604 quote_delimiter = 0
605 automaton_state = 1
606 continue
607 else:
608 current_node_name += current_char
609 elif automaton_state == 4:
610 # inside a comment / attribute
611 if (current_char == "]"):
612 automaton_state = 3
613 else:
614 if (current_char == "["):
574 return generate_error(char_index) 615 return generate_error(char_index)
575 else: 616 current_node_annotation += current_char
576 if (current_char == '['): 617 except Exception as e:
577 if len(current_node_annotation): 618 return generate_error(char_index)
578 return generate_error(char_index) 619 print(e)
579 else: 620
580 automaton_state = 4 621 if (len(clade_stack) != 1):
581 else: 622 return generate_error(len(nwk_str) - 1)
582 if (automaton_state == 3): 623
583 current_node_attribute += current_char 624 if (len(current_node_name)):
584 else: 625 tree_json['name'] = current_node_name
585 if (space.search(current_char)): 626
586 continue 627 return {
587 if (current_char == ';'): 628 'json': tree_json,
588 char_index = len(nwk_str) 629 'error': None
589 break 630 }
590 current_node_name += current_char
591 elif automaton_state == 2:
592 # inside a quoted expression
593 if (current_char == quote_delimiter):
594 if (char_index < len(nwk_str - 1)):
595 if (nwk_str[char_index + 1] == quote_delimiter):
596 char_index += 1
597 current_node_name += quote_delimiter
598 continue
599 quote_delimiter = 0
600 automaton_state = 1
601 continue
602 else:
603 current_node_name += current_char
604 elif automaton_state == 4:
605 # inside a comment / attribute
606 if (current_char == ']'):
607 automaton_state = 3
608 else:
609 if (current_char == '['):
610 return generate_error(char_index)
611 current_node_annotation += current_char
612 except Exception:
613 return generate_error(char_index)
614
615 if (len(clade_stack) != 1):
616 return generate_error(len(nwk_str) - 1)
617
618 if (len(current_node_name)):
619 tree_json['name'] = current_node_name
620
621 return {
622 'json': tree_json,
623 'error': None
624 }
625 631
626 632
627 if __name__ == '__main__': 633 if __name__ == '__main__':
628 parser = argparse.ArgumentParser(description='Summarize selection analysis results.') 634 parser = argparse.ArgumentParser(description='Summarize selection analysis results.')
629 parser.add_argument('--combined', help='Combined reference and query alignment from TN-93', required=False, type=str) 635 parser.add_argument('--combined', help='Combined reference and query alignment from TN-93', required=False, type=str)