# HG changeset patch
# User nml
# Date 1576594432 18000
# Node ID f8847f5a5491b8d0740fe6edc00cb2b96a9e12bd
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
diff -r 000000000000 -r f8847f5a5491 patrist.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/patrist.py Tue Dec 17 09:53:52 2019 -0500
@@ -0,0 +1,151 @@
+import argparse
+import os
+import sys
+
+from Bio import Phylo
+
+
+def walk_up(tips, curnode, pathlen, cutoff):
+ """
+ Recursive function for traversing up a tree.
+ """
+ pathlen += curnode.branch_length
+ if pathlen < cutoff:
+ if curnode.is_terminal():
+ tips.append((curnode.name, pathlen))
+ else:
+ for c in curnode.clades:
+ tips = walk_up(tips, c, pathlen, cutoff)
+ return tips
+
+
+def walk_trunk(curnode, cutoff, parents):
+ """
+ Find all tips in the tree that are within a threshold distance
+ of a reference tip.
+ """
+ # first go down to parent and up other branch
+ tips = []
+ pathlen = curnode.branch_length # 0.0184788
+ p = parents[curnode]
+
+ for c in p.clades:
+ if c == curnode:
+ continue
+ if c.is_terminal():
+ if pathlen + c.branch_length < cutoff:
+ tips.append((c.name, pathlen+c.branch_length))
+ else:
+ tips.extend(walk_up([], c, pathlen, cutoff))
+
+ # next walk down trunk until path length exceeds cutoff or hit root
+ while p in parents:
+ curnode = p
+ pathlen += p.branch_length # + 0.0104047
+ p = parents[curnode]
+ if pathlen >= cutoff:
+ break
+ for c in p.clades:
+ if c == curnode:
+ continue
+ if c.is_terminal():
+ if pathlen + c.branch_length < cutoff: # + 0.0503079
+ tips.append((c.name, pathlen+c.branch_length))
+ else:
+ tips.extend(walk_up([], c, pathlen, cutoff))
+ return tips
+
+
+def find_short_edges(tree, cutoff, keep_ties=True,
+ minimize=False, returnlist=False):
+ """
+ Find the shortest edge from the earliest sequence of a patient to a
+ any sequence from any other patient.
+ minimize = keep only edge from earliest seq to the closest other seq
+ keep_ties = [to be used in conjunction with minimize]
+ report all edges with the same minimum distance
+ """
+
+ # generate dictionary of child->parent associations
+ parents = {}
+ for clade in tree.find_clades(order='level'):
+ for child in clade:
+ parents.update({child: clade})
+
+ tips = tree.get_terminals()
+ res = {}
+ for tip1 in tips:
+ # find the shortest distance in sequences that "cluster" with this one
+ min_dist = 99999.
+ tip2 = []
+ for tipname, dist in walk_trunk(tip1, cutoff, parents):
+ if minimize and dist < min_dist:
+ min_dist = dist
+ tip2 = [[tipname, dist]]
+ else:
+ tip2.append([tipname, dist])
+
+ t1 = tip1.name
+ for t2, dist in tip2:
+ # sort tip names in lexico order
+ key = (t1, t2) if t1 < t2 else (t2, t1)
+ if key in res:
+ continue
+ res.update({key: dist})
+ if minimize and keep_ties:
+ # output only one edge
+ break
+
+ if returnlist:
+ reslist = []
+ for key, dist in res.iteritems():
+ reslist.append((key[0], key[1], dist))
+ return reslist
+
+ return res
+
+
+def main():
+ parser = argparse.ArgumentParser(
+ description='Generate clusters of tips from a tree that have'
+ ' a path length within a maximum distance of each other.'
+ )
+ parser.add_argument('tree', help=' file '
+ 'containing Newick tree string.')
+ parser.add_argument('cutoff', type=float, help='Maximum '
+ 'patristic distance.')
+ parser.add_argument('outfile', default=None, help='