comparison mothur/lib/galaxy/datatypes/metagenomics.py @ 17:57df76d861e4

Modifications for ToolShed proprietary data types
author Jim Johnson <jj@umn.edu>
date Tue, 17 Jan 2012 11:08:15 -0600
parents 541e3c97c240
children bfbaf823be4c
comparison
equal deleted inserted replaced
16:541e3c97c240 17:57df76d861e4
2 metagenomics datatypes 2 metagenomics datatypes
3 James E Johnson - University of Minnesota 3 James E Johnson - University of Minnesota
4 for Mothur 4 for Mothur
5 """ 5 """
6 6
7 import data 7 import logging, os, os.path, sys, time, tempfile, shutil, string, glob, re
8 import logging, os, sys, time, tempfile, shutil, string, glob, re
9 import galaxy.model 8 import galaxy.model
9 from galaxy.datatypes import data
10 from galaxy.datatypes.sniff import *
10 from galaxy.datatypes import metadata 11 from galaxy.datatypes import metadata
11 from galaxy.datatypes import tabular 12 from galaxy.datatypes import tabular
12 from galaxy.datatypes import sequence 13 from galaxy.datatypes import sequence
13 from galaxy.datatypes.metadata import MetadataElement 14 from galaxy.datatypes.metadata import MetadataElement
15 from galaxy.datatypes.data import Text
14 from galaxy.datatypes.tabular import Tabular 16 from galaxy.datatypes.tabular import Tabular
15 from galaxy.datatypes.sequence import Fasta 17 from galaxy.datatypes.sequence import Fasta
16 from galaxy import util 18 from galaxy import util
17 from galaxy.datatypes.images import Html 19 from galaxy.datatypes.images import Html
18 from sniff import *
19 20
20 log = logging.getLogger(__name__) 21 log = logging.getLogger(__name__)
21 22
22 23
23 ## Mothur Classes 24 ## Mothur Classes
340 def __init__(self, **kwd): 341 def __init__(self, **kwd):
341 """Initialize SecondaryStructureMatch datatype""" 342 """Initialize SecondaryStructureMatch datatype"""
342 Tabular.__init__( self, **kwd ) 343 Tabular.__init__( self, **kwd )
343 self.column_names = ['name','pound','dash','plus','equal','loop','tilde','total'] 344 self.column_names = ['name','pound','dash','plus','equal','loop','tilde','total']
344 345
345 class DistanceMatrix(data.Text): 346 class DistanceMatrix( Text ):
346 file_ext = 'dist' 347 file_ext = 'dist'
347 """Add metadata elements""" 348 """Add metadata elements"""
348 MetadataElement( name="sequence_count", default=0, desc="Number of sequences", readonly=False, optional=True, no_value=0 ) 349 MetadataElement( name="sequence_count", default=0, desc="Number of sequences", readonly=False, optional=True, no_value=0 )
349 350
350 351
401 402
402 def __init__(self, **kwd): 403 def __init__(self, **kwd):
403 """Initialize secondary structure map datatype""" 404 """Initialize secondary structure map datatype"""
404 Tabular.__init__( self, **kwd ) 405 Tabular.__init__( self, **kwd )
405 def init_meta( self, dataset, copy_from=None ): 406 def init_meta( self, dataset, copy_from=None ):
406 data.Text.init_meta( self, dataset, copy_from=copy_from ) 407 Text.init_meta( self, dataset, copy_from=copy_from )
407 def set_meta( self, dataset, overwrite = True, skip = None, **kwd ): 408 def set_meta( self, dataset, overwrite = True, skip = None, **kwd ):
408 dataset.metadata.sequences = 0 409 dataset.metadata.sequences = 0
409 410
410 def sniff( self, filename ): 411 def sniff( self, filename ):
411 """ 412 """
541 """A list of names""" 542 """A list of names"""
542 Tabular.__init__( self, **kwd ) 543 Tabular.__init__( self, **kwd )
543 self.column_names = ['name'] 544 self.column_names = ['name']
544 self.columns = 1 545 self.columns = 1
545 546
546 class Oligos( data.Text ): 547 class Oligos( Text ):
547 file_ext = 'oligos' 548 file_ext = 'oligos'
548 549
549 def sniff( self, filename ): 550 def sniff( self, filename ):
550 """ 551 """
551 Determines whether the file is a otu (operational taxonomic unit) format 552 Determines whether the file is a otu (operational taxonomic unit) format
697 """Quantiles for chimera analysis""" 698 """Quantiles for chimera analysis"""
698 Quantile.__init__( self, **kwd ) 699 Quantile.__init__( self, **kwd )
699 self.masked = True 700 self.masked = True
700 self.filtered = True 701 self.filtered = True
701 702
702 class LaneMask(data.Text): 703 class LaneMask(Text):
703 file_ext = 'filter' 704 file_ext = 'filter'
704 705
705 def sniff( self, filename ): 706 def sniff( self, filename ):
706 """ 707 """
707 Determines whether the file is a lane mask filter: 1 line consisting of zeros and ones. 708 Determines whether the file is a lane mask filter: 1 line consisting of zeros and ones.
870 def __init__(self, **kwd): 871 def __init__(self, **kwd):
871 """A Summary of taxon classification""" 872 """A Summary of taxon classification"""
872 Tabular.__init__( self, **kwd ) 873 Tabular.__init__( self, **kwd )
873 self.column_names = ['taxlevel','rankID','taxon','daughterlevels','total'] 874 self.column_names = ['taxlevel','rankID','taxon','daughterlevels','total']
874 875
875 class Phylip(data.Text): 876 class Phylip(Text):
876 file_ext = 'phy' 877 file_ext = 'phy'
877 878
878 def sniff( self, filename ): 879 def sniff( self, filename ):
879 """ 880 """
880 Determines whether the file is in Phylip format (Interleaved or Sequential) 881 Determines whether the file is in Phylip format (Interleaved or Sequential)
1031 out = "".join( out ) 1032 out = "".join( out )
1032 except Exception, exc: 1033 except Exception, exc:
1033 out = "Can't create peek %s" % str( exc ) 1034 out = "Can't create peek %s" % str( exc )
1034 return out 1035 return out
1035 1036
1037 class Newick( Text ):
1038 """
1039 The Newick Standard for representing trees in computer-readable form makes use of the correspondence between trees and nested parentheses.
1040 http://evolution.genetics.washington.edu/phylip/newicktree.html
1041 http://en.wikipedia.org/wiki/Newick_format
1042 Example:
1043 (B,(A,C,E),D);
1044 or example with branch lengths:
1045 (B:6.0,(A:5.0,C:3.0,E:4.0):5.0,D:11.0);
1046 or an example with embedded comments but no branch lengths:
1047 ((a [&&PRIME S=x], b [&&PRIME S=y]), c [&&PRIME S=z]);
1048 Example with named interior noe:
1049 (B:6.0,(A:5.0,C:3.0,E:4.0)Ancestor1:5.0,D:11.0);
1050 """
1051 file_ext = 'tre'
1052
1053 def __init__(self, **kwd):
1054 Text.__init__( self, **kwd )
1055
1056 def sniff( self, filename ): ## TODO
1057 """
1058 Determine whether the file is in Newick format
1059 Note: Last non-space char of a tree should be a semicolon: ';'
1060 Usually the first char will be a open parenthesis: '('
1061 (,,(,)); no nodes are named
1062 (A,B,(C,D)); leaf nodes are named
1063 (A,B,(C,D)E)F; all nodes are named
1064 (:0.1,:0.2,(:0.3,:0.4):0.5); all but root node have a distance to parent
1065 (:0.1,:0.2,(:0.3,:0.4):0.5):0.0; all have a distance to parent
1066 (A:0.1,B:0.2,(C:0.3,D:0.4):0.5); distances and leaf names (popular)
1067 (A:0.1,B:0.2,(C:0.3,D:0.4)E:0.5)F; distances and all names
1068 ((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A; a tree rooted on a leaf node (rare)
1069 """
1070 if not os.path.exists(filename):
1071 return False
1072 try:
1073 ## For now, guess this is a Newick file if it starts with a '(' and ends with a ';'
1074 flen = os.path.getsize(filename)
1075 fh = open( filename )
1076 len = min(flen,2000)
1077 # check end of the file for a semicolon
1078 fh.seek(-len,os.SEEK_END)
1079 buf = fh.read(len).strip()
1080 buf = buf.strip()
1081 if not buf.endswith(';'):
1082 return False
1083 # See if this starts with a open parenthesis
1084 if len < flen:
1085 fh.seek(0)
1086 buf = fh.read(len).strip()
1087 if buf.startswith('('):
1088 return True
1089 except:
1090 pass
1091 finally:
1092 close(fh)
1093 return False
1094
1095 class Nhx( Newick ):
1096 """
1097 New Hampshire eXtended Newick with embedded
1098 The Newick Standard for representing trees in computer-readable form makes use of the correspondence between trees and nested parentheses.
1099 http://evolution.genetics.washington.edu/phylip/newicktree.html
1100 http://en.wikipedia.org/wiki/Newick_format
1101 Example:
1102 (gene1_Hu[&&NHX:S=Hu_Homo_sapiens], (gene2_Hu[&&NHX:S=Hu_Homo_sapiens], gene2_Mu[&&NHX:S=Mu_Mus_musculus]));
1103 """
1104 file_ext = 'nhx'
1105
1106 class Nexus( Text ):
1107 """
1108 http://en.wikipedia.org/wiki/Nexus_file
1109 Example:
1110 #NEXUS
1111 BEGIN TAXA;
1112 Dimensions NTax=4;
1113 TaxLabels fish frog snake mouse;
1114 END;
1115
1116 BEGIN CHARACTERS;
1117 Dimensions NChar=20;
1118 Format DataType=DNA;
1119 Matrix
1120 fish ACATA GAGGG TACCT CTAAG
1121 frog ACATA GAGGG TACCT CTAAG
1122 snake ACATA GAGGG TACCT CTAAG
1123 mouse ACATA GAGGG TACCT CTAAG
1124 END;
1125
1126 BEGIN TREES;
1127 Tree best=(fish, (frog, (snake, mouse)));
1128 END;
1129 """
1130 file_ext = 'nex'
1131
1132 def __init__(self, **kwd):
1133 Text.__init__( self, **kwd )
1134
1135 def sniff( self, filename ):
1136 """
1137 Determines whether the file is in nexus format
1138 First line should be:
1139 #NEXUS
1140 """
1141 try:
1142 fh = open( filename )
1143 count = 0
1144 line = fh.readline()
1145 line = line.strip()
1146 if line and line == '#NEXUS':
1147 fh.close()
1148 return True
1149 except:
1150 pass
1151 finally:
1152 fh.close()
1153 return False
1154
1036 1155
1037 ## Qiime Classes 1156 ## Qiime Classes
1038 1157
1039 class QiimeMetadataMapping(Tabular): 1158 class QiimeMetadataMapping(Tabular):
1040 MetadataElement( name="column_names", default=[], desc="Column Names", readonly=False, visible=True, no_value=[] ) 1159 MetadataElement( name="column_names", default=[], desc="Column Names", readonly=False, visible=True, no_value=[] )
1163 pick_rep_set:rep_set_picking_method first 1282 pick_rep_set:rep_set_picking_method first
1164 pick_rep_set:sort_by otu 1283 pick_rep_set:sort_by otu
1165 """ 1284 """
1166 file_ext = 'qiimeparams' 1285 file_ext = 'qiimeparams'
1167 1286
1168 class QiimePrefs(data.Text): 1287 class QiimePrefs(Text):
1169 """ 1288 """
1170 A text file, containing coloring preferences to be used by make_distance_histograms.py, make_2d_plots.py and make_3d_plots.py. 1289 A text file, containing coloring preferences to be used by make_distance_histograms.py, make_2d_plots.py and make_3d_plots.py.
1171 Example: 1290 Example:
1172 { 1291 {
1173 'background_color':'black', 1292 'background_color':'black',