diff mothur/lib/galaxy/datatypes/metagenomics.py @ 2:e990ac8a0f58

Migrated tool version 1.19.0 from old tool shed archive to new tool shed repository
author jjohnson
date Tue, 07 Jun 2011 17:39:06 -0400
parents fcc0778f6987
children 7bfe1f843858
line wrap: on
line diff
--- a/mothur/lib/galaxy/datatypes/metagenomics.py	Tue Jun 07 17:35:35 2011 -0400
+++ b/mothur/lib/galaxy/datatypes/metagenomics.py	Tue Jun 07 17:39:06 2011 -0400
@@ -712,12 +712,91 @@
         return False
 
 class SequenceTaxonomy(Tabular):
-    file_ext = 'taxonomy'
+    file_ext = 'seq.taxonomy'
+    """
+        A table with 2 columns:
+        - SequenceName
+        - Taxonomy (semicolon-separated taxonomy in descending order)
+        Example:
+          X56533.1        Eukaryota;Alveolata;Ciliophora;Intramacronucleata;Oligohymenophorea;Hymenostomatida;Tetrahymenina;Glaucomidae;Glaucoma;
+          X97975.1        Eukaryota;Parabasalidea;Trichomonada;Trichomonadida;unclassified_Trichomonadida;
+          AF052717.1      Eukaryota;Parabasalidea;
+    """
     def __init__(self, **kwd):
-        """A list of names"""
         Tabular.__init__( self, **kwd )
         self.column_names = ['name','taxonomy']
 
+    def sniff( self, filename ):
+        """
+        Determines whether the file is a SequenceTaxonomy
+        """
+        try:
+            pat = '^([^ \t\n\r\f\v;]+([(]\d+[)])?[;])+$'
+            fh = open( filename )
+            count = 0
+            while True:
+                line = fh.readline()
+                if not line:
+                    break #EOF
+                line = line.strip()
+                if line:
+                    fields = line.split('\t')
+                    if len(fields) != 2:
+                        return False
+                    if not re.match(pat,fields[1]):
+                        return False
+                    count += 1
+                    if count > 10:
+                        break
+            if count > 0:
+                return True
+        except:
+            pass
+        finally:
+            fh.close()
+        return False
+
+class RDPSequenceTaxonomy(SequenceTaxonomy):
+    file_ext = 'rdp.taxonomy'
+    """
+        A table with 2 columns:
+        - SequenceName
+        - Taxonomy (semicolon-separated taxonomy in descending order, RDP requires exactly 6 levels deep)
+        Example:
+          AB001518.1      Bacteria;Bacteroidetes;Sphingobacteria;Sphingobacteriales;unclassified_Sphingobacteriales;
+          AB001724.1      Bacteria;Cyanobacteria;Cyanobacteria;Family_II;GpIIa;
+          AB001774.1      Bacteria;Chlamydiae;Chlamydiae;Chlamydiales;Chlamydiaceae;Chlamydophila;
+    """
+    def sniff( self, filename ):
+        """
+        Determines whether the file is a SequenceTaxonomy
+        """
+        try:
+            pat = '^([^ \t\n\r\f\v;]+([(]\d+[)])?[;]){6}$'
+            fh = open( filename )
+            count = 0
+            while True:
+                line = fh.readline()
+                if not line:
+                    break #EOF
+                line = line.strip()
+                if line:
+                    fields = line.split('\t')
+                    if len(fields) != 2:
+                        return False
+                    if not re.match(pat,fields[1]):
+                        return False
+                    count += 1
+                    if count > 10:
+                        break
+            if count > 0:
+                return True
+        except:
+            pass
+        finally:
+            fh.close()
+        return False
+
 class ConsensusTaxonomy(Tabular):
     file_ext = 'cons.taxonomy'
     def __init__(self, **kwd):
@@ -845,9 +924,9 @@
 
 ## Qiime Classes
 
-class MetadataMapping(Tabular):
+class QiimeMetadataMapping(Tabular):
     MetadataElement( name="column_names", default=[], desc="Column Names", readonly=False, visible=True, no_value=[] )
-    file_ext = 'mapping'
+    file_ext = 'qiimemapping'
 
     def __init__(self, **kwd):
         """
@@ -887,6 +966,144 @@
         Tabular.set_meta(self, dataset, overwrite, skip, max_data_lines)
         self.set_column_names(dataset)
 
+class QiimeOTU(Tabular):
+    """
+    Associates OTUs with sequence IDs
+    Example:
+    0	FLP3FBN01C2MYD	FLP3FBN01B2ALM
+    1	FLP3FBN01DF6NE	FLP3FBN01CKW1J	FLP3FBN01CHVM4
+    2	FLP3FBN01AXQ2Z
+    """
+    file_ext = 'qiimeotu'
+
+class QiimeOTUTable(Tabular):
+    """
+        #Full OTU Counts
+        #OTU ID	PC.354	PC.355	PC.356	Consensus Lineage
+        0	0	1	0	Root;Bacteria;Firmicutes;"Clostridia";Clostridiales
+        1	1	3	1	Root;Bacteria
+        2	0	2	2	Root;Bacteria;Bacteroidetes
+    """
+    MetadataElement( name="column_names", default=[], desc="Column Names", readonly=False, visible=True, no_value=[] )
+    file_ext = 'qiimeotutable'
+    def init_meta( self, dataset, copy_from=None ):
+        tabular.Tabular.init_meta( self, dataset, copy_from=copy_from )
+    def set_meta( self, dataset, overwrite = True, skip = None, **kwd ):
+        self.set_column_names(dataset) 
+    def set_column_names(self, dataset):
+        if dataset.has_data():
+            dataset_fh = open( dataset.file_name )
+            line = dataset_fh.readline()
+            line = dataset_fh.readline()
+            if line.startswith('#OTU ID'):
+                dataset.metadata.column_names = line.strip().split('\t');
+            dataset_fh.close()
+            dataset.metadata.comment_lines = 2
+
+class QiimeDistanceMatrix(Tabular):
+    """
+        	PC.354	PC.355	PC.356
+        PC.354	0.0	3.177	1.955	
+        PC.355	3.177	0.0	3.444
+        PC.356	1.955	3.444	0.0
+    """
+    file_ext = 'qiimedistmat'
+    def init_meta( self, dataset, copy_from=None ):
+        tabular.Tabular.init_meta( self, dataset, copy_from=copy_from )
+    def set_meta( self, dataset, overwrite = True, skip = None, **kwd ):
+        self.set_column_names(dataset) 
+    def set_column_names(self, dataset):
+        if dataset.has_data():
+            dataset_fh = open( dataset.file_name )
+            line = dataset_fh.readline()
+            # first line contains the names
+            dataset.metadata.column_names = line.strip().split('\t');
+            dataset_fh.close()
+            dataset.metadata.comment_lines = 1
+
+class QiimePCA(Tabular):
+    """
+    Principal Coordinate Analysis Data
+    The principal coordinate (PC) axes (columns) for each sample (rows). 
+    Pairs of PCs can then be graphed to view the relationships between samples. 
+    The bottom of the output file contains the eigenvalues and % variation explained for each PC.
+    Example:
+    pc vector number	1	2	3
+    PC.354	-0.309063936588	0.0398252112257	0.0744672231759
+    PC.355	-0.106593922619	0.141125998277	0.0780204374172
+    PC.356	-0.219869362955	0.00917241121781	0.0357281314115
+    
+    
+    eigvals	0.480220500471	0.163567082874	0.125594470811
+    % variation explained	51.6955484555	17.6079322939
+    """
+    file_ext = 'qiimepca'
+
+class QiimeParams(Tabular):
+    """
+###pick_otus_through_otu_table.py parameters###
+
+# OTU picker parameters
+pick_otus:otu_picking_method    uclust
+pick_otus:clustering_algorithm  furthest
+
+# Representative set picker parameters
+pick_rep_set:rep_set_picking_method     first
+pick_rep_set:sort_by    otu
+    """
+    file_ext = 'qiimeparams'
+
+class QiimePrefs(data.Text):
+    """
+    A text file, containing coloring preferences to be used by make_distance_histograms.py, make_2d_plots.py and make_3d_plots.py.
+    Example:
+{
+'background_color':'black',
+
+'sample_coloring':
+        {
+                'Treatment':
+                {
+                        'column':'Treatment',
+                        'colors':(('red',(0,100,100)),('blue',(240,100,100)))
+                },
+                'DOB':
+                {
+                        'column':'DOB',
+                        'colors':(('red',(0,100,100)),('blue',(240,100,100)))
+                }
+        },
+'MONTE_CARLO_GROUP_DISTANCES':
+        {
+                'Treatment': 10,
+                'DOB': 10
+        }
+}
+    """
+    file_ext = 'qiimeprefs'
+
+class QiimeTaxaSummary(Tabular):
+    """
+        Taxon	PC.354	PC.355	PC.356
+        Root;Bacteria;Actinobacteria	0.0	0.177	0.955	
+        Root;Bacteria;Firmicutes	0.177	0.0	0.444
+        Root;Bacteria;Proteobacteria	0.955	0.444	0.0
+    """
+    MetadataElement( name="column_names", default=[], desc="Column Names", readonly=False, visible=True, no_value=[] )
+    file_ext = 'qiimetaxsummary'
+
+    def set_column_names(self, dataset):
+        if dataset.has_data():
+            dataset_fh = open( dataset.file_name )
+            line = dataset_fh.readline()
+            if line.startswith('Taxon'):
+                dataset.metadata.column_names = line.strip().split('\t');
+            dataset_fh.close()
+
+    def set_meta( self, dataset, overwrite = True, skip = None, max_data_lines = None, **kwd ):
+        Tabular.set_meta(self, dataset, overwrite, skip, max_data_lines)
+        self.set_column_names(dataset)
+
 if __name__ == '__main__':
     import doctest, sys
     doctest.testmod(sys.modules[__name__])