Mercurial > repos > earlhaminst > ete
annotate ete_genetree_splitter.py @ 8:16e925bf567e draft
"planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit ed32f2e6d8174873cefcbe141084f857f84b0586"
author | earlhaminst |
---|---|
date | Thu, 31 Oct 2019 07:48:59 -0400 |
parents | 6a5282f71f82 |
children | b29ee6a16524 |
rev | line source |
---|---|
3
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
1 from __future__ import print_function |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
2 |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
3 import optparse |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
4 |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
5 from ete3 import PhyloTree |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
6 |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
7 |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
8 def main(): |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
9 usage = "usage: %prog --genetree <genetree-file> --speciestree <speciestree-file> [options]" |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
10 parser = optparse.OptionParser(usage=usage) |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
11 parser.add_option('--genetree', help='GeneTree in nhx format') |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
12 parser.add_option('--speciestree', help='Species Tree in nhx format') |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
13 parser.add_option('--species_format', type='int', default=8, help='Species Tree input format (0-9)') |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
14 parser.add_option('--gene_node', type='int', default=0, help='Gene node format 0=gene_species, 1=species_gene') |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
15 parser.add_option('--gainlose', action='store_true', default=False, help='Find out gene gain/lose') |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
16 parser.add_option('--output_format', type='int', default=9, help='GeneTree output format (0-9)') |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
17 options, args = parser.parse_args() |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
18 |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
19 if options.genetree is None: |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
20 parser.error("--genetree option must be specified, GeneTree in nhx format") |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
21 |
7
6a5282f71f82
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 55116304ad98416757ad689c9a885dd0967f120e
earlhaminst
parents:
3
diff
changeset
|
22 with open(options.genetree, 'r') as f: |
6a5282f71f82
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 55116304ad98416757ad689c9a885dd0967f120e
earlhaminst
parents:
3
diff
changeset
|
23 contents = f.read() |
6a5282f71f82
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 55116304ad98416757ad689c9a885dd0967f120e
earlhaminst
parents:
3
diff
changeset
|
24 |
6a5282f71f82
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 55116304ad98416757ad689c9a885dd0967f120e
earlhaminst
parents:
3
diff
changeset
|
25 # Remove empty NHX features that can be produced by TreeBest but break ete3 |
6a5282f71f82
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 55116304ad98416757ad689c9a885dd0967f120e
earlhaminst
parents:
3
diff
changeset
|
26 contents = contents.replace('[&&NHX]', '') |
6a5282f71f82
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 55116304ad98416757ad689c9a885dd0967f120e
earlhaminst
parents:
3
diff
changeset
|
27 |
3
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
28 # reads single gene tree |
7
6a5282f71f82
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 55116304ad98416757ad689c9a885dd0967f120e
earlhaminst
parents:
3
diff
changeset
|
29 genetree = PhyloTree(contents) |
3
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
30 |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
31 # sets species naming function |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
32 if options.gene_node == 0: |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
33 genetree.set_species_naming_function(parse_sp_name) |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
34 |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
35 # reconcile species tree with gene tree to help find out gene gain/lose |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
36 if options.gainlose: |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
37 |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
38 if options.speciestree is None: |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
39 parser.error("--speciestree option must be specified, species tree in nhx format") |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
40 |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
41 # reads species tree |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
42 speciestree = PhyloTree(options.speciestree, format=options.species_format) |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
43 |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
44 # Removes '*' from Species names comes from Species tree configrured for TreeBest |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
45 for leaf in speciestree: |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
46 leaf.name = leaf.name.strip('*') |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
47 |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
48 genetree, events = genetree.reconcile(speciestree) |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
49 |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
50 # splits tree by duplication events which returns the list of all subtrees resulting from splitting current tree by its duplication nodes. |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
51 for cluster_id, node in enumerate(genetree.split_by_dups(), 1): |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
52 outfile = str(cluster_id) + '_genetree.nhx' |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
53 with open(outfile, 'w') as f: |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
54 f.write(node.write(format=options.output_format)) |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
55 |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
56 |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
57 def parse_sp_name(node_name): |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
58 return node_name.split("_")[1] |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
59 |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
60 |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
61 if __name__ == "__main__": |
077021c45b96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit a22e605b871c2185e98d89598aebb2fa3a82bb8f
earlhaminst
parents:
diff
changeset
|
62 main() |