annotate patrist.py @ 0:f8847f5a5491 draft default tip

"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
author nml
date Tue, 17 Dec 2019 09:53:52 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
1 import argparse
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
2 import os
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
3 import sys
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
4
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
5 from Bio import Phylo
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
6
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
7
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
8 def walk_up(tips, curnode, pathlen, cutoff):
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
9 """
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
10 Recursive function for traversing up a tree.
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
11 """
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
12 pathlen += curnode.branch_length
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
13 if pathlen < cutoff:
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
14 if curnode.is_terminal():
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
15 tips.append((curnode.name, pathlen))
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
16 else:
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
17 for c in curnode.clades:
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
18 tips = walk_up(tips, c, pathlen, cutoff)
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
19 return tips
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
20
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
21
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
22 def walk_trunk(curnode, cutoff, parents):
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
23 """
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
24 Find all tips in the tree that are within a threshold distance
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
25 of a reference tip.
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
26 """
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
27 # first go down to parent and up other branch
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
28 tips = []
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
29 pathlen = curnode.branch_length # 0.0184788
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
30 p = parents[curnode]
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
31
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
32 for c in p.clades:
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
33 if c == curnode:
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
34 continue
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
35 if c.is_terminal():
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
36 if pathlen + c.branch_length < cutoff:
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
37 tips.append((c.name, pathlen+c.branch_length))
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
38 else:
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
39 tips.extend(walk_up([], c, pathlen, cutoff))
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
40
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
41 # next walk down trunk until path length exceeds cutoff or hit root
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
42 while p in parents:
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
43 curnode = p
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
44 pathlen += p.branch_length # + 0.0104047
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
45 p = parents[curnode]
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
46 if pathlen >= cutoff:
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
47 break
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
48 for c in p.clades:
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
49 if c == curnode:
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
50 continue
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
51 if c.is_terminal():
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
52 if pathlen + c.branch_length < cutoff: # + 0.0503079
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
53 tips.append((c.name, pathlen+c.branch_length))
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
54 else:
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
55 tips.extend(walk_up([], c, pathlen, cutoff))
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
56 return tips
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
57
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
58
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
59 def find_short_edges(tree, cutoff, keep_ties=True,
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
60 minimize=False, returnlist=False):
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
61 """
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
62 Find the shortest edge from the earliest sequence of a patient to a
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
63 any sequence from any other patient.
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
64 minimize = keep only edge from earliest seq to the closest other seq
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
65 keep_ties = [to be used in conjunction with minimize]
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
66 report all edges with the same minimum distance
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
67 """
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
68
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
69 # generate dictionary of child->parent associations
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
70 parents = {}
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
71 for clade in tree.find_clades(order='level'):
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
72 for child in clade:
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
73 parents.update({child: clade})
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
74
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
75 tips = tree.get_terminals()
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
76 res = {}
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
77 for tip1 in tips:
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
78 # find the shortest distance in sequences that "cluster" with this one
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
79 min_dist = 99999.
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
80 tip2 = []
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
81 for tipname, dist in walk_trunk(tip1, cutoff, parents):
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
82 if minimize and dist < min_dist:
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
83 min_dist = dist
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
84 tip2 = [[tipname, dist]]
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
85 else:
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
86 tip2.append([tipname, dist])
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
87
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
88 t1 = tip1.name
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
89 for t2, dist in tip2:
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
90 # sort tip names in lexico order
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
91 key = (t1, t2) if t1 < t2 else (t2, t1)
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
92 if key in res:
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
93 continue
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
94 res.update({key: dist})
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
95 if minimize and keep_ties:
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
96 # output only one edge
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
97 break
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
98
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
99 if returnlist:
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
100 reslist = []
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
101 for key, dist in res.iteritems():
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
102 reslist.append((key[0], key[1], dist))
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
103 return reslist
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
104
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
105 return res
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
106
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
107
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
108 def main():
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
109 parser = argparse.ArgumentParser(
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
110 description='Generate clusters of tips from a tree that have'
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
111 ' a path length within a maximum distance of each other.'
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
112 )
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
113 parser.add_argument('tree', help='<input> file '
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
114 'containing Newick tree string.')
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
115 parser.add_argument('cutoff', type=float, help='Maximum '
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
116 'patristic distance.')
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
117 parser.add_argument('outfile', default=None, help='<output> file to '
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
118 'write results '
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
119 'in CSV format.')
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
120 parser.add_argument('--minimize', help='Report no more than '
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
121 'one nearest neighbour per tip.',
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
122 action='store_true')
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
123 parser.add_argument('--keep_ties', help='If more than one '
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
124 'tip has the same patristic distance, '
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
125 'report all as nearest neighbours.',
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
126 action='store_true')
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
127 parser.add_argument('--overwrite', help='Overwrite existing '
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
128 'output file.',
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
129 action='store_true')
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
130 args = parser.parse_args()
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
131
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
132 assert args.cutoff > 0, 'Cutoff %f must be greater than 0.' % (args.cutoff)
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
133
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
134 if os.path.exists(args.outfile) and not args.overwrite:
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
135 print ('Output file', args.outfile, 'already exists, use --overwrite.')
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
136 sys.exit()
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
137
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
138 outfile = open(args.outfile, 'w')
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
139 outfile.write('tree,tip1,tip2,dist,is.tie\n')
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
140
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
141 trees = Phylo.parse(args.tree, 'newick')
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
142 for treenum, tree in enumerate(trees):
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
143 results = find_short_edges(tree, args.cutoff)
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
144 for key, dist in results.items():
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
145 outfile.write('%d,%s,%s,%f\n' % (treenum, key[0], key[1], dist))
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
146
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
147 outfile.close()
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
148
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
149
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
150 if __name__ == "__main__":
f8847f5a5491 "planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff changeset
151 main()