# HG changeset patch # User qfab # Date 1401855138 14400 # Node ID edf75201278569b59cd8c8c4bd67ac6168f45e91 # Parent 6a7d52777409fd310d6591c6dcaccef6e3f30664 Uploaded diff -r 6a7d52777409 -r edf752012785 metagenomics_datatypes/datatypes_conf.xml --- a/metagenomics_datatypes/datatypes_conf.xml Tue Jun 03 23:25:25 2014 -0400 +++ b/metagenomics_datatypes/datatypes_conf.xml Wed Jun 04 00:12:18 2014 -0400 @@ -12,9 +12,6 @@ - - - @@ -23,7 +20,5 @@ - - diff -r 6a7d52777409 -r edf752012785 metagenomics_datatypes/metagenomics.py --- a/metagenomics_datatypes/metagenomics.py Tue Jun 03 23:25:25 2014 -0400 +++ b/metagenomics_datatypes/metagenomics.py Wed Jun 04 00:12:18 2014 -0400 @@ -313,72 +313,3 @@ Tabular.__init__( self, **kwd ) self.column_names = ['name'] self.columns = 1 - -class Newick( Text ): - """ - The Newick Standard for representing trees in computer-readable form makes use of the correspondence between trees and nested parentheses. - http://evolution.genetics.washington.edu/phylip/newicktree.html - http://en.wikipedia.org/wiki/Newick_format - Example: - (B,(A,C,E),D); - or example with branch lengths: - (B:6.0,(A:5.0,C:3.0,E:4.0):5.0,D:11.0); - or an example with embedded comments but no branch lengths: - ((a [&&PRIME S=x], b [&&PRIME S=y]), c [&&PRIME S=z]); - Example with named interior noe: - (B:6.0,(A:5.0,C:3.0,E:4.0)Ancestor1:5.0,D:11.0); - """ - file_ext = 'tre' - - def __init__(self, **kwd): - Text.__init__( self, **kwd ) - - def sniff( self, filename ): ## TODO - """ - Determine whether the file is in Newick format - Note: Last non-space char of a tree should be a semicolon: ';' - Usually the first char will be a open parenthesis: '(' - (,,(,)); no nodes are named - (A,B,(C,D)); leaf nodes are named - (A,B,(C,D)E)F; all nodes are named - (:0.1,:0.2,(:0.3,:0.4):0.5); all but root node have a distance to parent - (:0.1,:0.2,(:0.3,:0.4):0.5):0.0; all have a distance to parent - (A:0.1,B:0.2,(C:0.3,D:0.4):0.5); distances and leaf names (popular) - (A:0.1,B:0.2,(C:0.3,D:0.4)E:0.5)F; distances and all names - ((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A; a tree rooted on a leaf node (rare) - """ - if not os.path.exists(filename): - return False - try: - ## For now, guess this is a Newick file if it starts with a '(' and ends with a ';' - flen = os.path.getsize(filename) - fh = open( filename ) - len = min(flen,2000) - # check end of the file for a semicolon - fh.seek(-len,os.SEEK_END) - buf = fh.read(len).strip() - buf = buf.strip() - if not buf.endswith(';'): - return False - # See if this starts with a open parenthesis - if len < flen: - fh.seek(0) - buf = fh.read(len).strip() - if buf.startswith('('): - return True - except: - pass - finally: - close(fh) - return False - -class Nhx( Newick ): - """ - New Hampshire eXtended Newick with embedded - The Newick Standard for representing trees in computer-readable form makes use of the correspondence between trees and nested parentheses. - http://evolution.genetics.washington.edu/phylip/newicktree.html - http://en.wikipedia.org/wiki/Newick_format - Example: - (gene1_Hu[&&NHX:S=Hu_Homo_sapiens], (gene2_Hu[&&NHX:S=Hu_Homo_sapiens], gene2_Mu[&&NHX:S=Mu_Mus_musculus])); - """ - file_ext = 'nhx'