Mercurial > repos > nml > patrist
annotate patrist.py @ 0:f8847f5a5491 draft default tip
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
author | nml |
---|---|
date | Tue, 17 Dec 2019 09:53:52 -0500 |
parents | |
children |
rev | line source |
---|---|
0
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
1 import argparse |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
2 import os |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
3 import sys |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
4 |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
5 from Bio import Phylo |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
6 |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
7 |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
8 def walk_up(tips, curnode, pathlen, cutoff): |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
9 """ |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
10 Recursive function for traversing up a tree. |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
11 """ |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
12 pathlen += curnode.branch_length |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
13 if pathlen < cutoff: |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
14 if curnode.is_terminal(): |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
15 tips.append((curnode.name, pathlen)) |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
16 else: |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
17 for c in curnode.clades: |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
18 tips = walk_up(tips, c, pathlen, cutoff) |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
19 return tips |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
20 |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
21 |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
22 def walk_trunk(curnode, cutoff, parents): |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
23 """ |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
24 Find all tips in the tree that are within a threshold distance |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
25 of a reference tip. |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
26 """ |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
27 # first go down to parent and up other branch |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
28 tips = [] |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
29 pathlen = curnode.branch_length # 0.0184788 |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
30 p = parents[curnode] |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
31 |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
32 for c in p.clades: |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
33 if c == curnode: |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
34 continue |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
35 if c.is_terminal(): |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
36 if pathlen + c.branch_length < cutoff: |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
37 tips.append((c.name, pathlen+c.branch_length)) |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
38 else: |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
39 tips.extend(walk_up([], c, pathlen, cutoff)) |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
40 |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
41 # next walk down trunk until path length exceeds cutoff or hit root |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
42 while p in parents: |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
43 curnode = p |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
44 pathlen += p.branch_length # + 0.0104047 |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
45 p = parents[curnode] |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
46 if pathlen >= cutoff: |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
47 break |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
48 for c in p.clades: |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
49 if c == curnode: |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
50 continue |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
51 if c.is_terminal(): |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
52 if pathlen + c.branch_length < cutoff: # + 0.0503079 |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
53 tips.append((c.name, pathlen+c.branch_length)) |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
54 else: |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
55 tips.extend(walk_up([], c, pathlen, cutoff)) |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
56 return tips |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
57 |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
58 |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
59 def find_short_edges(tree, cutoff, keep_ties=True, |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
60 minimize=False, returnlist=False): |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
61 """ |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
62 Find the shortest edge from the earliest sequence of a patient to a |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
63 any sequence from any other patient. |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
64 minimize = keep only edge from earliest seq to the closest other seq |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
65 keep_ties = [to be used in conjunction with minimize] |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
66 report all edges with the same minimum distance |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
67 """ |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
68 |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
69 # generate dictionary of child->parent associations |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
70 parents = {} |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
71 for clade in tree.find_clades(order='level'): |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
72 for child in clade: |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
73 parents.update({child: clade}) |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
74 |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
75 tips = tree.get_terminals() |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
76 res = {} |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
77 for tip1 in tips: |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
78 # find the shortest distance in sequences that "cluster" with this one |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
79 min_dist = 99999. |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
80 tip2 = [] |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
81 for tipname, dist in walk_trunk(tip1, cutoff, parents): |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
82 if minimize and dist < min_dist: |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
83 min_dist = dist |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
84 tip2 = [[tipname, dist]] |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
85 else: |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
86 tip2.append([tipname, dist]) |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
87 |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
88 t1 = tip1.name |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
89 for t2, dist in tip2: |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
90 # sort tip names in lexico order |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
91 key = (t1, t2) if t1 < t2 else (t2, t1) |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
92 if key in res: |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
93 continue |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
94 res.update({key: dist}) |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
95 if minimize and keep_ties: |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
96 # output only one edge |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
97 break |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
98 |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
99 if returnlist: |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
100 reslist = [] |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
101 for key, dist in res.iteritems(): |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
102 reslist.append((key[0], key[1], dist)) |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
103 return reslist |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
104 |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
105 return res |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
106 |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
107 |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
108 def main(): |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
109 parser = argparse.ArgumentParser( |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
110 description='Generate clusters of tips from a tree that have' |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
111 ' a path length within a maximum distance of each other.' |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
112 ) |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
113 parser.add_argument('tree', help='<input> file ' |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
114 'containing Newick tree string.') |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
115 parser.add_argument('cutoff', type=float, help='Maximum ' |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
116 'patristic distance.') |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
117 parser.add_argument('outfile', default=None, help='<output> file to ' |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
118 'write results ' |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
119 'in CSV format.') |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
120 parser.add_argument('--minimize', help='Report no more than ' |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
121 'one nearest neighbour per tip.', |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
122 action='store_true') |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
123 parser.add_argument('--keep_ties', help='If more than one ' |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
124 'tip has the same patristic distance, ' |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
125 'report all as nearest neighbours.', |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
126 action='store_true') |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
127 parser.add_argument('--overwrite', help='Overwrite existing ' |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
128 'output file.', |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
129 action='store_true') |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
130 args = parser.parse_args() |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
131 |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
132 assert args.cutoff > 0, 'Cutoff %f must be greater than 0.' % (args.cutoff) |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
133 |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
134 if os.path.exists(args.outfile) and not args.overwrite: |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
135 print ('Output file', args.outfile, 'already exists, use --overwrite.') |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
136 sys.exit() |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
137 |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
138 outfile = open(args.outfile, 'w') |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
139 outfile.write('tree,tip1,tip2,dist,is.tie\n') |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
140 |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
141 trees = Phylo.parse(args.tree, 'newick') |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
142 for treenum, tree in enumerate(trees): |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
143 results = find_short_edges(tree, args.cutoff) |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
144 for key, dist in results.items(): |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
145 outfile.write('%d,%s,%s,%f\n' % (treenum, key[0], key[1], dist)) |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
146 |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
147 outfile.close() |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
148 |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
149 |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
150 if __name__ == "__main__": |
f8847f5a5491
"planemo upload for repository https://github.com/phac-nml/patrist commit f64cb2a8399f83d8c025c8efdc3c3eec72922a7d"
nml
parents:
diff
changeset
|
151 main() |