Previous changeset 0:4f3fe44edb3f (2012-03-29) Next changeset 2:1f80b01e1490 (2012-06-06) |
Commit message:
Initial commit of metaphlan_to_phyloxml converter. |
added:
metaphlan_to_phyloxml.py metaphlan_to_phyloxml.xml |
b |
diff -r 4f3fe44edb3f -r 016f6375aadc metaphlan_to_phyloxml.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/metaphlan_to_phyloxml.py Tue Apr 03 16:30:27 2012 -0600 |
[ |
@@ -0,0 +1,76 @@ +#!/usr/bin/env python + +""" +Read metaphaln output summarizing taxonomic distribution and format in PhyloXML format + +usage: %prog metaphlan.txt phylo.xml +""" + +import sys + +# Metaphlan output looks like: +# k__Bacteria 99.07618 +# k__Archaea 0.92382 +# k__Bacteria|p__Proteobacteria 82.50732 +# k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria 81.64905 + +rank_map = { 'k__': 'kingdom', 'p__': 'phylum', 'c__': 'class', 'o__': 'order', 'f__': 'family', 'g__': 'genus', 's__': 'species' } + +class Node( object ): + """Node in a taxonomy""" + def __init__( self, rank=None, name=None ): + self.rank = rank + self.name = name + self.value = None + self.children = dict() + @staticmethod + def from_metaphlan_file( file ): + """ + Build tree from metaphlan output + """ + root = Node() + for line in file: + taxa, abundance = line.split() + parts = taxa.split( "|" ) + root.add( parts, abundance ) + return root + def add( self, parts, value ): + """ + Parts is a list of node names, recursively add nodes until we reach + the last part, and then attach the value to that node. + """ + if len( parts ) == 0: + self.value = value + else: + next_part = parts.pop(0) + rank = rank_map[ next_part[:3] ] + name = next_part[3:] + if name not in self.children: + self.children[name] = Node( rank, name ) + self.children[name].add( parts, value ) + def __str__( self ): + if self.children: + return "(" + ",".join( str( child ) for child in self.children.itervalues() ) + "):" + self.name + else: + return self.name + def to_phyloxml( self, out ): + print >>out, "<clade>" + if self.name: + print >>out, "<name>%s</name>" % self.name + print >>out, "<taxonomy><scientific_name>%s</scientific_name><rank>%s</rank></taxonomy>" % ( self.name, self.rank ) + if self.value: + print >>out, "<property datatype='xsd:float' ref='metaphlan:abundance' applies_to='node'>%s</property>" % self.value + ## print >>out, "<confidence type='abundance'>%s</confidence>" % self.value + for child in self.children.itervalues(): + child.to_phyloxml( out ) + print >>out, "</clade>" + +out = open( sys.argv[2], 'w' ) + +print >>out, '<phyloxml xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.phyloxml.org" xsi:schemaLocation="http://www.phyloxml.org http://www.phyloxml.org/1.10/phyloxml.xsd">' +print >>out, '<phylogeny rooted="true">' + +Node.from_metaphlan_file( open( sys.argv[1] ) ).to_phyloxml( out ) + +print >>out, '</phylogeny>' +print >>out, '</phyloxml>' |
b |
diff -r 4f3fe44edb3f -r 016f6375aadc metaphlan_to_phyloxml.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/metaphlan_to_phyloxml.xml Tue Apr 03 16:30:27 2012 -0600 |
b |
@@ -0,0 +1,17 @@ +<tool id="meta_to_phylo" name="MetaPhlAn to PhyloXML" version="1.0.0"> + <description>Converter</description> + <command interpreter="python"> +metaphlan_to_phyloxml.py $input $output + </command> + <inputs> + <param name="input" type="data" format="tabular" label="Input MetaPhlAn File"/> + </inputs> + <outputs> + <data format="xml" name="output" label="${tool.name} on ${on_string}" /> + </outputs> + <tests> + </tests> + <help> + MetaPhlAn to PhyloXML Converter + </help> +</tool> |