Mercurial > repos > jjohnson > mothur_toolsuite
diff mothur/lib/galaxy/datatypes/metagenomics.py @ 17:57df76d861e4
Modifications for ToolShed proprietary data types
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Tue, 17 Jan 2012 11:08:15 -0600 |
parents | 541e3c97c240 |
children | bfbaf823be4c |
line wrap: on
line diff
--- a/mothur/lib/galaxy/datatypes/metagenomics.py Fri Dec 09 12:12:16 2011 -0600 +++ b/mothur/lib/galaxy/datatypes/metagenomics.py Tue Jan 17 11:08:15 2012 -0600 @@ -4,18 +4,19 @@ for Mothur """ -import data -import logging, os, sys, time, tempfile, shutil, string, glob, re +import logging, os, os.path, sys, time, tempfile, shutil, string, glob, re import galaxy.model +from galaxy.datatypes import data +from galaxy.datatypes.sniff import * from galaxy.datatypes import metadata from galaxy.datatypes import tabular from galaxy.datatypes import sequence from galaxy.datatypes.metadata import MetadataElement +from galaxy.datatypes.data import Text from galaxy.datatypes.tabular import Tabular from galaxy.datatypes.sequence import Fasta from galaxy import util from galaxy.datatypes.images import Html -from sniff import * log = logging.getLogger(__name__) @@ -342,7 +343,7 @@ Tabular.__init__( self, **kwd ) self.column_names = ['name','pound','dash','plus','equal','loop','tilde','total'] -class DistanceMatrix(data.Text): +class DistanceMatrix( Text ): file_ext = 'dist' """Add metadata elements""" MetadataElement( name="sequence_count", default=0, desc="Number of sequences", readonly=False, optional=True, no_value=0 ) @@ -403,7 +404,7 @@ """Initialize secondary structure map datatype""" Tabular.__init__( self, **kwd ) def init_meta( self, dataset, copy_from=None ): - data.Text.init_meta( self, dataset, copy_from=copy_from ) + Text.init_meta( self, dataset, copy_from=copy_from ) def set_meta( self, dataset, overwrite = True, skip = None, **kwd ): dataset.metadata.sequences = 0 @@ -543,7 +544,7 @@ self.column_names = ['name'] self.columns = 1 -class Oligos( data.Text ): +class Oligos( Text ): file_ext = 'oligos' def sniff( self, filename ): @@ -699,7 +700,7 @@ self.masked = True self.filtered = True -class LaneMask(data.Text): +class LaneMask(Text): file_ext = 'filter' def sniff( self, filename ): @@ -872,7 +873,7 @@ Tabular.__init__( self, **kwd ) self.column_names = ['taxlevel','rankID','taxon','daughterlevels','total'] -class Phylip(data.Text): +class Phylip(Text): file_ext = 'phy' def sniff( self, filename ): @@ -1033,6 +1034,124 @@ out = "Can't create peek %s" % str( exc ) return out +class Newick( Text ): + """ + The Newick Standard for representing trees in computer-readable form makes use of the correspondence between trees and nested parentheses. + http://evolution.genetics.washington.edu/phylip/newicktree.html + http://en.wikipedia.org/wiki/Newick_format + Example: + (B,(A,C,E),D); + or example with branch lengths: + (B:6.0,(A:5.0,C:3.0,E:4.0):5.0,D:11.0); + or an example with embedded comments but no branch lengths: + ((a [&&PRIME S=x], b [&&PRIME S=y]), c [&&PRIME S=z]); + Example with named interior noe: + (B:6.0,(A:5.0,C:3.0,E:4.0)Ancestor1:5.0,D:11.0); + """ + file_ext = 'tre' + + def __init__(self, **kwd): + Text.__init__( self, **kwd ) + + def sniff( self, filename ): ## TODO + """ + Determine whether the file is in Newick format + Note: Last non-space char of a tree should be a semicolon: ';' + Usually the first char will be a open parenthesis: '(' + (,,(,)); no nodes are named + (A,B,(C,D)); leaf nodes are named + (A,B,(C,D)E)F; all nodes are named + (:0.1,:0.2,(:0.3,:0.4):0.5); all but root node have a distance to parent + (:0.1,:0.2,(:0.3,:0.4):0.5):0.0; all have a distance to parent + (A:0.1,B:0.2,(C:0.3,D:0.4):0.5); distances and leaf names (popular) + (A:0.1,B:0.2,(C:0.3,D:0.4)E:0.5)F; distances and all names + ((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A; a tree rooted on a leaf node (rare) + """ + if not os.path.exists(filename): + return False + try: + ## For now, guess this is a Newick file if it starts with a '(' and ends with a ';' + flen = os.path.getsize(filename) + fh = open( filename ) + len = min(flen,2000) + # check end of the file for a semicolon + fh.seek(-len,os.SEEK_END) + buf = fh.read(len).strip() + buf = buf.strip() + if not buf.endswith(';'): + return False + # See if this starts with a open parenthesis + if len < flen: + fh.seek(0) + buf = fh.read(len).strip() + if buf.startswith('('): + return True + except: + pass + finally: + close(fh) + return False + +class Nhx( Newick ): + """ + New Hampshire eXtended Newick with embedded + The Newick Standard for representing trees in computer-readable form makes use of the correspondence between trees and nested parentheses. + http://evolution.genetics.washington.edu/phylip/newicktree.html + http://en.wikipedia.org/wiki/Newick_format + Example: + (gene1_Hu[&&NHX:S=Hu_Homo_sapiens], (gene2_Hu[&&NHX:S=Hu_Homo_sapiens], gene2_Mu[&&NHX:S=Mu_Mus_musculus])); + """ + file_ext = 'nhx' + +class Nexus( Text ): + """ + http://en.wikipedia.org/wiki/Nexus_file + Example: + #NEXUS + BEGIN TAXA; + Dimensions NTax=4; + TaxLabels fish frog snake mouse; + END; + + BEGIN CHARACTERS; + Dimensions NChar=20; + Format DataType=DNA; + Matrix + fish ACATA GAGGG TACCT CTAAG + frog ACATA GAGGG TACCT CTAAG + snake ACATA GAGGG TACCT CTAAG + mouse ACATA GAGGG TACCT CTAAG + END; + + BEGIN TREES; + Tree best=(fish, (frog, (snake, mouse))); + END; + """ + file_ext = 'nex' + + def __init__(self, **kwd): + Text.__init__( self, **kwd ) + + def sniff( self, filename ): + """ + Determines whether the file is in nexus format + First line should be: + #NEXUS + """ + try: + fh = open( filename ) + count = 0 + line = fh.readline() + line = line.strip() + if line and line == '#NEXUS': + fh.close() + return True + except: + pass + finally: + fh.close() + return False + ## Qiime Classes @@ -1165,7 +1284,7 @@ """ file_ext = 'qiimeparams' -class QiimePrefs(data.Text): +class QiimePrefs(Text): """ A text file, containing coloring preferences to be used by make_distance_histograms.py, make_2d_plots.py and make_3d_plots.py. Example: