diff mothur/lib/galaxy/datatypes/metagenomics.py @ 0:3202a38e44d9

Migrated tool version 1.15.1 from old tool shed archive to new tool shed repository
author jjohnson
date Tue, 07 Jun 2011 17:32:23 -0400
parents
children fcc0778f6987
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mothur/lib/galaxy/datatypes/metagenomics.py	Tue Jun 07 17:32:23 2011 -0400
@@ -0,0 +1,831 @@
+"""
+metagenomics datatypes
+James E Johnson - University of Minnesota
+for Mothur
+"""
+
+import data
+import logging, os, sys, time, tempfile, shutil, string, glob, re
+import galaxy.model
+from galaxy.datatypes import metadata
+from galaxy.datatypes import tabular
+from galaxy.datatypes import sequence
+from galaxy.datatypes.metadata import MetadataElement
+from galaxy.datatypes.tabular import Tabular
+from galaxy.datatypes.sequence import Fasta
+from galaxy import util
+from galaxy.datatypes.images import Html
+from sniff import *
+
+log = logging.getLogger(__name__)
+
+
+## Mothur Classes 
+
+class Otu( data.Text ):
+    file_ext = 'otu'
+
+    def sniff( self, filename ):
+        """
+        Determines whether the file is a otu (operational taxonomic unit) format
+        """
+        try:
+            fh = open( filename )
+            count = 0
+            while True:
+                line = fh.readline()
+                line = line.strip()
+                if not line:
+                    break #EOF
+                if line:
+                    if line[0] != '@':
+                        linePieces = line.split('\t')
+                        if len(linePieces) < 2:
+                            return False
+                        try:
+                            check = int(linePieces[1])
+                            if check + 2 != len(linePieces):
+                                return False
+                        except ValueError:
+                            return False
+                        count += 1
+                        if count == 5:
+                            return True
+            fh.close()
+            if count < 5 and count > 0:
+                return True
+        except:
+            pass
+        finally:
+            fh.close()
+        return False
+
+class OtuList( Otu ):
+    file_ext = 'list'
+
+class Sabund( Otu ):
+    file_ext = 'sabund'
+
+    def sniff( self, filename ):
+        """
+        Determines whether the file is a otu (operational taxonomic unit) format
+        label<TAB>count[<TAB>value(1..n)]
+        """
+        try:
+            fh = open( filename )
+            count = 0
+            while True:
+                line = fh.readline()
+                line = line.strip()
+                if not line:
+                    break #EOF
+                if line:
+                    if line[0] != '@':
+                        linePieces = line.split('\t')
+                        if len(linePieces) < 2:
+                            return False
+                        try:
+                            check = int(linePieces[1])
+                            if check + 2 != len(linePieces):
+                                return False
+                            for i in range( 2, len(linePieces)):
+                                ival = int(linePieces[i])
+                        except ValueError:
+                            return False
+                        count += 1
+                        if count >= 5:
+                            return True
+            fh.close()
+            if count < 5 and count > 0:
+                return True
+        except:
+            pass
+        finally:
+            fh.close()
+        return False
+
+class Rabund( Sabund ):
+    file_ext = 'rabund'
+
+
+class SharedRabund( Rabund ):
+    file_ext = 'shared'
+
+    def sniff( self, filename ):
+        """
+        Determines whether the file is a otu (operational taxonomic unit) Shared format
+        label<TAB>group<TAB>count[<TAB>value(1..n)]
+        """
+        try:
+            fh = open( filename )
+            count = 0
+            while True:
+                line = fh.readline()
+                line = line.strip()
+                if not line:
+                    break #EOF
+                if line:
+                    if line[0] != '@':
+                        linePieces = line.split('\t')
+                        if len(linePieces) < 3:
+                            return False
+                        try:
+                            check = int(linePieces[2])
+                            if check + 3 != len(linePieces):
+                                return False
+                            for i in range( 3, len(linePieces)):
+                                ival = int(linePieces[i])
+                        except ValueError:
+                            return False
+                        count += 1
+                        if count >= 5:
+                            return True
+            fh.close()
+            if count < 5 and count > 0:
+                return True
+        except:
+            pass
+        finally:
+            fh.close()
+        return False
+
+class RelAbund( Rabund ):
+    file_ext = 'relabund'
+
+    def sniff( self, filename ):
+        """
+        Determines whether the file is a otu (operational taxonomic unit) Relative Abundance format
+        label<TAB>group<TAB>count[<TAB>value(1..n)]
+        """
+        try:
+            fh = open( filename )
+            count = 0
+            while True:
+                line = fh.readline()
+                line = line.strip()
+                if not line:
+                    break #EOF
+                if line:
+                    if line[0] != '@':
+                        linePieces = line.split('\t')
+                        if len(linePieces) < 3:
+                            return False
+                        try:
+                            check = int(linePieces[2])
+                            if check + 3 != len(linePieces):
+                                return False
+                            for i in range( 3, len(linePieces)):
+                                fval = float(linePieces[i])
+                        except ValueError:
+                            return False
+                        count += 1
+                        if count >= 5:
+                            return True
+            fh.close()
+            if count < 5 and count > 0:
+                return True
+        except:
+            pass
+        finally:
+            fh.close()
+        return False
+
+class SecondaryStructureMap(Tabular):
+    file_ext = 'map'
+    def __init__(self, **kwd):
+        """Initialize secondary structure map datatype"""
+        Tabular.__init__( self, **kwd )
+        self.column_names = ['Map']
+
+    def sniff( self, filename ):
+        """
+        Determines whether the file is a secondary structure map format
+        A single column with an integer value which indicates the row that this row maps to.
+        check you make sure is structMap[10] = 380 then structMap[380] = 10.
+        """
+        try:
+            fh = open( filename )
+            line_num = 0
+            rowidxmap = {}
+            while True:
+                line = fh.readline()
+                line_num += 1
+                line = line.strip()
+                if not line:
+                    break #EOF
+                if line:
+                    try:
+                        pointer = int(line)
+                        if pointer > 0:
+                            if pointer > line_num:
+                                rowidxmap[line_num] = pointer 
+                            elif pointer < line_num & rowidxmap[pointer] != line_num:
+                                return False
+                    except ValueError:
+                        return False
+            fh.close()
+            if count < 5 and count > 0:
+                return True
+        except:
+            pass
+        finally:
+            fh.close()
+        return False
+
+class SequenceAlignment( Fasta ):
+    file_ext = 'align'
+    def __init__(self, **kwd):
+        Fasta.__init__( self, **kwd )
+        """Initialize AlignCheck datatype"""
+
+    def sniff( self, filename ):
+        """
+        Determines whether the file is in Mothur align fasta format
+        Each sequence line must be the same length
+        """
+        
+        try:
+            fh = open( filename )
+            len = -1
+            while True:
+                line = fh.readline()
+                if not line:
+                    break #EOF
+                line = line.strip()
+                if line: #first non-empty line
+                    if line.startswith( '>' ):
+                        #The next line.strip() must not be '', nor startwith '>'
+                        line = fh.readline().strip()
+                        if line == '' or line.startswith( '>' ):
+                            break
+                        if len < 0:
+                            len = len(line)
+                        elif len != len(line):
+                            return False
+                    else:
+                        break #we found a non-empty line, but its not a fasta header
+            if len > 0:
+                return True
+        except:
+            pass
+        finally:
+            fh.close()
+        return False
+
+class AlignCheck( Tabular ):
+    file_ext = 'align.check'
+    def __init__(self, **kwd):
+        """Initialize AlignCheck datatype"""
+        Tabular.__init__( self, **kwd )
+        self.column_names = ['name','pound','dash','plus','equal','loop','tilde','total']
+        self.column_types = ['str','int','int','int','int','int','int','int']
+        self.comment_lines = 1
+
+    def set_meta( self, dataset, overwrite = True, **kwd ):
+        # Tabular.set_meta( self, dataset, overwrite = overwrite, first_line_is_header = True, skip = 1 )
+        data_lines = 0
+        if dataset.has_data():
+            dataset_fh = open( dataset.file_name )
+            while True:
+                line = dataset_fh.readline()
+                if not line: break
+                data_lines += 1
+            dataset_fh.close()
+        dataset.metadata.comment_lines = 1
+        dataset.metadata.data_lines = data_lines - 1 if data_lines > 0 else 0
+        dataset.metadata.column_names = self.column_names
+        dataset.metadata.column_types = self.column_types
+
+class AlignReport(Tabular):
+    """
+QueryName	QueryLength	TemplateName	TemplateLength	SearchMethod	SearchScore	AlignmentMethod	QueryStart	QueryEnd	TemplateStart	TemplateEnd	PairwiseAlignmentLength	GapsInQuery	GapsInTemplate	LongestInsert	SimBtwnQuery&Template
+AY457915	501		82283		1525		kmer		89.07		needleman	5		501		1		499		499			2		0		0		97.6
+    """
+    file_ext = 'align.report'
+    def __init__(self, **kwd):
+        """Initialize AlignCheck datatype"""
+        Tabular.__init__( self, **kwd )
+        self.column_names = ['QueryName','QueryLength','TemplateName','TemplateLength','SearchMethod','SearchScore',
+                             'AlignmentMethod','QueryStart','QueryEnd','TemplateStart','TemplateEnd',
+                             'PairwiseAlignmentLength','GapsInQuery','GapsInTemplate','LongestInsert','SimBtwnQuery&Template'
+                             ]
+
+class BellerophonChimera( Tabular ):
+    file_ext = 'bellerophon.chimera'
+    def __init__(self, **kwd):
+        """Initialize AlignCheck datatype"""
+        Tabular.__init__( self, **kwd )
+        self.column_names = ['Name','Score','Left','Right']
+
+class SecondaryStructureMatch(Tabular):
+    """
+	name	pound	dash	plus	equal	loop	tilde	total
+	9_1_12	42	68	8	28	275	420	872
+	9_1_14	36	68	6	26	266	422	851
+	9_1_15	44	68	8	28	276	418	873
+	9_1_16	34	72	6	30	267	430	860
+	9_1_18	46	80	2	36	261	
+    """
+    def __init__(self, **kwd):
+        """Initialize SecondaryStructureMatch datatype"""
+        Tabular.__init__( self, **kwd )
+        self.column_names = ['name','pound','dash','plus','equal','loop','tilde','total']
+
+class DistanceMatrix(data.Text):
+    file_ext = 'dist'
+    """Add metadata elements"""
+    MetadataElement( name="sequence_count", default=0, desc="Number of sequences", readonly=False, optional=True, no_value=0 )
+
+
+class LowerTriangleDistanceMatrix(DistanceMatrix):
+    file_ext = 'lower.dist'
+    def __init__(self, **kwd):
+        """Initialize secondary structure map datatype"""
+        DistanceMatrix.__init__( self, **kwd )
+
+    def sniff( self, filename ):
+        """
+        Determines whether the file is a lower-triangle distance matrix (phylip) format
+        The first line has the number of sequences in the matrix.
+        The remaining lines have the sequence name followed by a list of distances from all preceeding sequences
+                5
+                U68589
+                U68590	0.3371
+                U68591	0.3609	0.3782
+                U68592	0.4155	0.3197	0.4148
+                U68593	0.2872	0.1690	0.3361	0.2842
+        """
+        try:
+            fh = open( filename )
+            count = 0
+            while True:
+                line = fh.readline()
+                line = line.strip()
+                if not line:
+                    break #EOF
+                if line:
+                    if line[0] != '@':
+                        linePieces = line.split('\t')
+                        if len(linePieces) != 3:
+                            return False
+                        try:
+                            check = float(linePieces[2])
+                        except ValueError:
+                            return False
+                        count += 1
+                        if count == 5:
+                            return True
+            fh.close()
+            if count < 5 and count > 0:
+                return True
+        except:
+            pass
+        finally:
+            fh.close()
+        return False
+
+class SquareDistanceMatrix(DistanceMatrix,Tabular):
+    file_ext = 'square.dist'
+    sequence_count = -1
+
+    def __init__(self, **kwd):
+        """Initialize secondary structure map datatype"""
+        Tabular.__init__( self, **kwd )
+    def init_meta( self, dataset, copy_from=None ):
+        data.Text.init_meta( self, dataset, copy_from=copy_from )
+    def set_meta( self, dataset, overwrite = True, skip = None, **kwd ):
+        dataset.metadata.sequences = 0 
+
+    def sniff( self, filename ):
+        """
+        Determines whether the file is a square distance matrix (Column-formatted distance matrix) format
+        The first line has the number of sequences in the matrix.
+        The following lines have the sequence name in the first column plus a column for the distance to each sequence 
+        in the row order in which they appear in the matrix.
+               3
+               U68589  0.0000  0.3371  0.3610
+               U68590  0.3371  0.0000  0.3783
+               U68590  0.3371  0.0000  0.3783
+        """
+        try:
+            fh = open( filename )
+            count = 0
+            line = fh.readline()
+            line = line.strip()
+            sequence_count = int(line)
+            col_cnt = seq_cnt + 1
+            while True:
+                line = fh.readline()
+                line = line.strip()
+                if not line:
+                    break #EOF
+                if line:
+                    if line[0] != '@':
+                        linePieces = line.split('\t')
+                        if len(linePieces) != col_cnt :
+                            return False
+                        try:
+                            for i in range(1, col_cnt):
+                                check = float(linePieces[i])
+                        except ValueError:
+                            return False
+                        count += 1
+                        if count == 5:
+                            return True
+            fh.close()
+            if count < 5 and count > 0:
+                return True
+        except:
+            pass
+        finally:
+            fh.close()
+        return False
+
+class PairwiseDistanceMatrix(DistanceMatrix,Tabular):
+    file_ext = 'pair.dist'
+    def __init__(self, **kwd):
+        """Initialize secondary structure map datatype"""
+        Tabular.__init__( self, **kwd )
+        self.column_names = ['Sequence','Sequence','Distance']
+        self.column_types = ['str','str','float']
+        self.comment_lines = 1
+
+    def sniff( self, filename ):
+        """
+        Determines whether the file is a pairwise distance matrix (Column-formatted distance matrix) format
+        The first and second columns have the sequence names and the third column is the distance between those sequences.
+        """
+        try:
+            fh = open( filename )
+            count = 0
+            while True:
+                line = fh.readline()
+                line = line.strip()
+                if not line:
+                    break #EOF
+                if line:
+                    if line[0] != '@':
+                        linePieces = line.split('\t')
+                        if len(linePieces) != 3:
+                            return False
+                        try:
+                            check = float(linePieces[2])
+                        except ValueError:
+                            return False
+                        count += 1
+                        if count == 5:
+                            return True
+            fh.close()
+            if count < 5 and count > 0:
+                return True
+        except:
+            pass
+        finally:
+            fh.close()
+        return False
+
+class Alignment(Tabular):
+    file_ext = 'align'
+    def __init__(self, **kwd):
+        """Initialize secondary structure map datatype"""
+        Tabular.__init__( self, **kwd )
+        self.column_names = ['name','pound','dash','plus','equal','loop','tilde','total']
+
+class AlignCheck(Tabular):
+    file_ext = 'align.check'
+    def __init__(self, **kwd):
+        """Initialize secondary structure map datatype"""
+        Tabular.__init__( self, **kwd )
+        self.column_names = ['name','pound','dash','plus','equal','loop','tilde','total']
+
+class Names(Tabular):
+    file_ext = 'names'
+    def __init__(self, **kwd):
+        """Name file shows the relationship between a representative sequence(col 1)  and the sequences it represents(col 2)"""
+        Tabular.__init__( self, **kwd )
+        self.column_names = ['name','representatives']
+
+class Summary(Tabular):
+    file_ext = 'summary'
+    def __init__(self, **kwd):
+        """Name file shows the relationship between a representative sequence(col 1)  and the sequences it represents(col 2)"""
+        Tabular.__init__( self, **kwd )
+        self.column_names = ['seqname','start','end','nbases','ambigs','polymer']
+
+class Group(Tabular):
+    file_ext = 'groups'
+    def __init__(self, **kwd):
+        """Name file shows the relationship between a representative sequence(col 1)  and the sequences it represents(col 2)"""
+        Tabular.__init__( self, **kwd )
+        self.column_names = ['name','group']
+
+class AccNos(Tabular):
+    file_ext = 'accnos'
+    def __init__(self, **kwd):
+        """A list of names"""
+        Tabular.__init__( self, **kwd )
+        self.column_names = ['name']
+
+class Oligos( data.Text ):
+    file_ext = 'oligos'
+
+    def sniff( self, filename ):
+        """
+        Determines whether the file is a otu (operational taxonomic unit) format
+        """
+        try:
+            fh = open( filename )
+            count = 0
+            while True:
+                line = fh.readline()
+                line = line.strip()
+                if not line:
+                    break #EOF
+                else:
+                    if line[0] != '#':
+                        linePieces = line.split('\t')
+                        if len(linePieces) == 2 and re.match('forward|reverse',linePieces[0]):
+                            count += 1
+                            continue
+                        elif len(linePieces) == 3 and re.match('barcode',linePieces[0]):
+                            count += 1
+                            continue
+                        else:
+                            return False
+                        if count > 20:
+                            return True
+            if count > 0:
+                return True
+        except:
+            pass
+        finally:
+            fh.close()
+        return False
+
+class Frequency(Tabular):
+    file_ext = 'freq'
+    def __init__(self, **kwd):
+        """A list of names"""
+        Tabular.__init__( self, **kwd )
+        self.column_names = ['position','frequency']
+        self.column_types = ['int','float']
+
+    def sniff( self, filename ):
+        """
+        Determines whether the file is a frequency tabular format for chimera analysis
+        #1.14.0
+        0	0.000
+        1	0.000
+        ...
+        155	0.975
+        """
+        try:
+            fh = open( filename )
+            count = 0
+            while True:
+                line = fh.readline()
+                line = line.strip()
+                if not line:
+                    break #EOF
+                else:
+                    if line[0] != '#':
+                        try:
+                            linePieces = line.split('\t')
+                            i = int(linePieces[0])
+                            f = float(linePieces[1])
+                            count += 1
+                            continue
+                        except:
+                            return False
+                        if count > 20:
+                            return True
+            if count > 0:
+                return True
+        except:
+            pass
+        finally:
+            fh.close()
+        return False
+
+class Quantile(Tabular):
+    file_ext = 'quan'
+    MetadataElement( name="filtered", default=False, no_value=False, optional=True , desc="Quantiles calculated using a mask", readonly=True)
+    MetadataElement( name="masked", default=False, no_value=False, optional=True , desc="Quantiles calculated using a frequency filter", readonly=True)
+    def __init__(self, **kwd):
+        """Quantiles for chimera analysis"""
+        Tabular.__init__( self, **kwd )
+        self.column_names = ['num','ten','twentyfive','fifty','seventyfive','ninetyfive','ninetynine']
+        self.column_types = ['int','float','float','float','float','float','float']
+    def set_meta( self, dataset, overwrite = True, skip = None, **kwd ):
+        log.info( "Mothur Quantile set_meta %s" % kwd)
+    def sniff( self, filename ):
+        """
+        Determines whether the file is a quantiles tabular format for chimera analysis
+        1	0	0	0	0	0	0
+        2       0.309198        0.309198        0.37161 0.37161 0.37161 0.37161
+        3       0.510982        0.563213        0.693529        0.858939        1.07442 1.20608
+        ...
+        """
+        try:
+            fh = open( filename )
+            count = 0
+            while True:
+                line = fh.readline()
+                line = line.strip()
+                if not line:
+                    break #EOF
+                else:
+                    if line[0] != '#':
+                        try:
+                            linePieces = line.split('\t')
+                            i = int(linePieces[0])
+                            f = float(linePieces[1])
+                            f = float(linePieces[2])
+                            f = float(linePieces[3])
+                            f = float(linePieces[4])
+                            f = float(linePieces[5])
+                            f = float(linePieces[6])
+                            count += 1
+                            continue
+                        except:
+                            return False
+                        if count > 10:
+                            return True
+            if count > 0:
+                return True
+        except:
+            pass
+        finally:
+            fh.close()
+        return False
+
+class FilteredQuantile(Quantile):
+    file_ext = 'filtered.quan'
+    def __init__(self, **kwd):
+        """Quantiles for chimera analysis"""
+        Quantile.__init__( self, **kwd )
+        self.filtered = True
+
+class MaskedQuantile(Quantile):
+    file_ext = 'masked.quan'
+    def __init__(self, **kwd):
+        """Quantiles for chimera analysis"""
+        Quantile.__init__( self, **kwd )
+        self.masked = True
+        self.filtered = False
+
+class FilteredMaskedQuantile(Quantile):
+    file_ext = 'filtered.masked.quan'
+    def __init__(self, **kwd):
+        """Quantiles for chimera analysis"""
+        Quantile.__init__( self, **kwd )
+        self.masked = True
+        self.filtered = True
+
+class LaneMask(data.Text):
+    file_ext = 'filter'
+
+    def sniff( self, filename ):
+        """
+        Determines whether the file is a lane mask filter:  1 line consisting of zeros and ones.
+        """
+        try:
+            fh = open( filename )
+            while True:
+                buff = fh.read(1000)
+                if not buff:
+                    break #EOF
+                else:
+                    if not re.match('^[01]+$',line):
+                        return False
+            return True
+        except:
+            pass
+        finally:
+            close(fh)
+        return False
+
+class SequenceTaxonomy(Tabular):
+    file_ext = 'taxonomy'
+    def __init__(self, **kwd):
+        """A list of names"""
+        Tabular.__init__( self, **kwd )
+        self.column_names = ['name','taxonomy']
+
+class ConsensusTaxonomy(Tabular):
+    file_ext = 'cons.taxonomy'
+    def __init__(self, **kwd):
+        """A list of names"""
+        Tabular.__init__( self, **kwd )
+        self.column_names = ['OTU','count','taxonomy']
+
+class TaxonomySummary(Tabular):
+    file_ext = 'tax.summary'
+    def __init__(self, **kwd):
+        """A Summary of taxon classification"""
+        Tabular.__init__( self, **kwd )
+        self.column_names = ['taxlevel','rankID','taxon','daughterlevels','total']
+
+class Phylip(data.Text):
+    file_ext = 'phy'
+
+    def sniff( self, filename ):
+        """
+        Determines whether the file is in Phylip format (Interleaved or Sequential)
+        The first line of the input file contains the number of species and the
+        number of characters, in free format, separated by blanks (not by
+        commas). The information for each species follows, starting with a
+        ten-character species name (which can include punctuation marks and blanks),
+        and continuing with the characters for that species.
+        http://evolution.genetics.washington.edu/phylip/doc/main.html#inputfiles
+        Interleaved Example:
+            6   39
+        Archaeopt CGATGCTTAC CGCCGATGCT
+        HesperorniCGTTACTCGT TGTCGTTACT
+        BaluchitheTAATGTTAAT TGTTAATGTT
+        B. virginiTAATGTTCGT TGTTAATGTT
+        BrontosaurCAAAACCCAT CATCAAAACC
+        B.subtilisGGCAGCCAAT CACGGCAGCC
+        
+        TACCGCCGAT GCTTACCGC
+        CGTTGTCGTT ACTCGTTGT
+        AATTGTTAAT GTTAATTGT
+        CGTTGTTAAT GTTCGTTGT
+        CATCATCAAA ACCCATCAT
+        AATCACGGCA GCCAATCAC
+        """
+        try:
+            fh = open( filename )
+            # counts line
+            line = fh.readline().strip()
+            linePieces = line.split()
+            count = int(linePieces[0])
+            seq_len = int(linePieces[1])
+            # data lines
+            """
+            TODO check data lines
+            while True:
+                line = fh.readline()
+                # name is the first 10 characters
+                name = line[0:10]
+                seq = line[10:].strip()
+                # nucleic base or amino acid 1-char designators (spaces allowed)
+                bases = ''.join(seq.split())
+                # float per base (each separated by space)
+            """
+            return True
+        except:
+            pass
+        finally:
+            close(fh)
+        return False
+
+
+## Qiime Classes
+
+class MetadataMapping(Tabular):
+    MetadataElement( name="column_names", default=[], desc="Column Names", readonly=False, visible=True, no_value=[] )
+    file_ext = 'mapping'
+
+    def __init__(self, **kwd):
+        """
+        http://qiime.sourceforge.net/documentation/file_formats.html#mapping-file-overview
+        Information about the samples necessary to perform the data analysis. 
+        # self.column_names = ['#SampleID','BarcodeSequence','LinkerPrimerSequence','Description']
+        """
+        Tabular.__init__( self, **kwd )
+
+    def sniff( self, filename ):
+        """
+        Determines whether the file is a qiime mapping file
+        Just checking for an appropriate header line for now, could be improved
+        """
+        try:
+            pat = '#SampleID(\t[a-zA-Z][a-zA-Z0-9_]*)*\tDescription'
+            fh = open( filename )
+            while True:
+                line = dataset_fh.readline()
+                if re.match(pat,line):
+                    return True
+        except:
+            pass
+        finally:
+            close(fh)
+        return False
+
+    def set_column_names(self, dataset):
+        if dataset.has_data():
+            dataset_fh = open( dataset.file_name )
+            line = dataset_fh.readline()
+            if line.startswith('#SampleID'):
+                dataset.metadata.column_names = line.strip().split('\t');
+            dataset_fh.close()
+
+    def set_meta( self, dataset, overwrite = True, skip = None, max_data_lines = None, **kwd ):
+        Tabular.set_meta(self, dataset, overwrite, skip, max_data_lines)
+        self.set_column_names(dataset)
+
+if __name__ == '__main__':
+    import doctest, sys
+    doctest.testmod(sys.modules[__name__])
+