Mercurial > repos > jjohnson > mothur_toolsuite
comparison mothur/lib/galaxy/datatypes/metagenomics.py @ 17:57df76d861e4
Modifications for ToolShed proprietary data types
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Tue, 17 Jan 2012 11:08:15 -0600 |
parents | 541e3c97c240 |
children | bfbaf823be4c |
comparison
equal
deleted
inserted
replaced
16:541e3c97c240 | 17:57df76d861e4 |
---|---|
2 metagenomics datatypes | 2 metagenomics datatypes |
3 James E Johnson - University of Minnesota | 3 James E Johnson - University of Minnesota |
4 for Mothur | 4 for Mothur |
5 """ | 5 """ |
6 | 6 |
7 import data | 7 import logging, os, os.path, sys, time, tempfile, shutil, string, glob, re |
8 import logging, os, sys, time, tempfile, shutil, string, glob, re | |
9 import galaxy.model | 8 import galaxy.model |
9 from galaxy.datatypes import data | |
10 from galaxy.datatypes.sniff import * | |
10 from galaxy.datatypes import metadata | 11 from galaxy.datatypes import metadata |
11 from galaxy.datatypes import tabular | 12 from galaxy.datatypes import tabular |
12 from galaxy.datatypes import sequence | 13 from galaxy.datatypes import sequence |
13 from galaxy.datatypes.metadata import MetadataElement | 14 from galaxy.datatypes.metadata import MetadataElement |
15 from galaxy.datatypes.data import Text | |
14 from galaxy.datatypes.tabular import Tabular | 16 from galaxy.datatypes.tabular import Tabular |
15 from galaxy.datatypes.sequence import Fasta | 17 from galaxy.datatypes.sequence import Fasta |
16 from galaxy import util | 18 from galaxy import util |
17 from galaxy.datatypes.images import Html | 19 from galaxy.datatypes.images import Html |
18 from sniff import * | |
19 | 20 |
20 log = logging.getLogger(__name__) | 21 log = logging.getLogger(__name__) |
21 | 22 |
22 | 23 |
23 ## Mothur Classes | 24 ## Mothur Classes |
340 def __init__(self, **kwd): | 341 def __init__(self, **kwd): |
341 """Initialize SecondaryStructureMatch datatype""" | 342 """Initialize SecondaryStructureMatch datatype""" |
342 Tabular.__init__( self, **kwd ) | 343 Tabular.__init__( self, **kwd ) |
343 self.column_names = ['name','pound','dash','plus','equal','loop','tilde','total'] | 344 self.column_names = ['name','pound','dash','plus','equal','loop','tilde','total'] |
344 | 345 |
345 class DistanceMatrix(data.Text): | 346 class DistanceMatrix( Text ): |
346 file_ext = 'dist' | 347 file_ext = 'dist' |
347 """Add metadata elements""" | 348 """Add metadata elements""" |
348 MetadataElement( name="sequence_count", default=0, desc="Number of sequences", readonly=False, optional=True, no_value=0 ) | 349 MetadataElement( name="sequence_count", default=0, desc="Number of sequences", readonly=False, optional=True, no_value=0 ) |
349 | 350 |
350 | 351 |
401 | 402 |
402 def __init__(self, **kwd): | 403 def __init__(self, **kwd): |
403 """Initialize secondary structure map datatype""" | 404 """Initialize secondary structure map datatype""" |
404 Tabular.__init__( self, **kwd ) | 405 Tabular.__init__( self, **kwd ) |
405 def init_meta( self, dataset, copy_from=None ): | 406 def init_meta( self, dataset, copy_from=None ): |
406 data.Text.init_meta( self, dataset, copy_from=copy_from ) | 407 Text.init_meta( self, dataset, copy_from=copy_from ) |
407 def set_meta( self, dataset, overwrite = True, skip = None, **kwd ): | 408 def set_meta( self, dataset, overwrite = True, skip = None, **kwd ): |
408 dataset.metadata.sequences = 0 | 409 dataset.metadata.sequences = 0 |
409 | 410 |
410 def sniff( self, filename ): | 411 def sniff( self, filename ): |
411 """ | 412 """ |
541 """A list of names""" | 542 """A list of names""" |
542 Tabular.__init__( self, **kwd ) | 543 Tabular.__init__( self, **kwd ) |
543 self.column_names = ['name'] | 544 self.column_names = ['name'] |
544 self.columns = 1 | 545 self.columns = 1 |
545 | 546 |
546 class Oligos( data.Text ): | 547 class Oligos( Text ): |
547 file_ext = 'oligos' | 548 file_ext = 'oligos' |
548 | 549 |
549 def sniff( self, filename ): | 550 def sniff( self, filename ): |
550 """ | 551 """ |
551 Determines whether the file is a otu (operational taxonomic unit) format | 552 Determines whether the file is a otu (operational taxonomic unit) format |
697 """Quantiles for chimera analysis""" | 698 """Quantiles for chimera analysis""" |
698 Quantile.__init__( self, **kwd ) | 699 Quantile.__init__( self, **kwd ) |
699 self.masked = True | 700 self.masked = True |
700 self.filtered = True | 701 self.filtered = True |
701 | 702 |
702 class LaneMask(data.Text): | 703 class LaneMask(Text): |
703 file_ext = 'filter' | 704 file_ext = 'filter' |
704 | 705 |
705 def sniff( self, filename ): | 706 def sniff( self, filename ): |
706 """ | 707 """ |
707 Determines whether the file is a lane mask filter: 1 line consisting of zeros and ones. | 708 Determines whether the file is a lane mask filter: 1 line consisting of zeros and ones. |
870 def __init__(self, **kwd): | 871 def __init__(self, **kwd): |
871 """A Summary of taxon classification""" | 872 """A Summary of taxon classification""" |
872 Tabular.__init__( self, **kwd ) | 873 Tabular.__init__( self, **kwd ) |
873 self.column_names = ['taxlevel','rankID','taxon','daughterlevels','total'] | 874 self.column_names = ['taxlevel','rankID','taxon','daughterlevels','total'] |
874 | 875 |
875 class Phylip(data.Text): | 876 class Phylip(Text): |
876 file_ext = 'phy' | 877 file_ext = 'phy' |
877 | 878 |
878 def sniff( self, filename ): | 879 def sniff( self, filename ): |
879 """ | 880 """ |
880 Determines whether the file is in Phylip format (Interleaved or Sequential) | 881 Determines whether the file is in Phylip format (Interleaved or Sequential) |
1031 out = "".join( out ) | 1032 out = "".join( out ) |
1032 except Exception, exc: | 1033 except Exception, exc: |
1033 out = "Can't create peek %s" % str( exc ) | 1034 out = "Can't create peek %s" % str( exc ) |
1034 return out | 1035 return out |
1035 | 1036 |
1037 class Newick( Text ): | |
1038 """ | |
1039 The Newick Standard for representing trees in computer-readable form makes use of the correspondence between trees and nested parentheses. | |
1040 http://evolution.genetics.washington.edu/phylip/newicktree.html | |
1041 http://en.wikipedia.org/wiki/Newick_format | |
1042 Example: | |
1043 (B,(A,C,E),D); | |
1044 or example with branch lengths: | |
1045 (B:6.0,(A:5.0,C:3.0,E:4.0):5.0,D:11.0); | |
1046 or an example with embedded comments but no branch lengths: | |
1047 ((a [&&PRIME S=x], b [&&PRIME S=y]), c [&&PRIME S=z]); | |
1048 Example with named interior noe: | |
1049 (B:6.0,(A:5.0,C:3.0,E:4.0)Ancestor1:5.0,D:11.0); | |
1050 """ | |
1051 file_ext = 'tre' | |
1052 | |
1053 def __init__(self, **kwd): | |
1054 Text.__init__( self, **kwd ) | |
1055 | |
1056 def sniff( self, filename ): ## TODO | |
1057 """ | |
1058 Determine whether the file is in Newick format | |
1059 Note: Last non-space char of a tree should be a semicolon: ';' | |
1060 Usually the first char will be a open parenthesis: '(' | |
1061 (,,(,)); no nodes are named | |
1062 (A,B,(C,D)); leaf nodes are named | |
1063 (A,B,(C,D)E)F; all nodes are named | |
1064 (:0.1,:0.2,(:0.3,:0.4):0.5); all but root node have a distance to parent | |
1065 (:0.1,:0.2,(:0.3,:0.4):0.5):0.0; all have a distance to parent | |
1066 (A:0.1,B:0.2,(C:0.3,D:0.4):0.5); distances and leaf names (popular) | |
1067 (A:0.1,B:0.2,(C:0.3,D:0.4)E:0.5)F; distances and all names | |
1068 ((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A; a tree rooted on a leaf node (rare) | |
1069 """ | |
1070 if not os.path.exists(filename): | |
1071 return False | |
1072 try: | |
1073 ## For now, guess this is a Newick file if it starts with a '(' and ends with a ';' | |
1074 flen = os.path.getsize(filename) | |
1075 fh = open( filename ) | |
1076 len = min(flen,2000) | |
1077 # check end of the file for a semicolon | |
1078 fh.seek(-len,os.SEEK_END) | |
1079 buf = fh.read(len).strip() | |
1080 buf = buf.strip() | |
1081 if not buf.endswith(';'): | |
1082 return False | |
1083 # See if this starts with a open parenthesis | |
1084 if len < flen: | |
1085 fh.seek(0) | |
1086 buf = fh.read(len).strip() | |
1087 if buf.startswith('('): | |
1088 return True | |
1089 except: | |
1090 pass | |
1091 finally: | |
1092 close(fh) | |
1093 return False | |
1094 | |
1095 class Nhx( Newick ): | |
1096 """ | |
1097 New Hampshire eXtended Newick with embedded | |
1098 The Newick Standard for representing trees in computer-readable form makes use of the correspondence between trees and nested parentheses. | |
1099 http://evolution.genetics.washington.edu/phylip/newicktree.html | |
1100 http://en.wikipedia.org/wiki/Newick_format | |
1101 Example: | |
1102 (gene1_Hu[&&NHX:S=Hu_Homo_sapiens], (gene2_Hu[&&NHX:S=Hu_Homo_sapiens], gene2_Mu[&&NHX:S=Mu_Mus_musculus])); | |
1103 """ | |
1104 file_ext = 'nhx' | |
1105 | |
1106 class Nexus( Text ): | |
1107 """ | |
1108 http://en.wikipedia.org/wiki/Nexus_file | |
1109 Example: | |
1110 #NEXUS | |
1111 BEGIN TAXA; | |
1112 Dimensions NTax=4; | |
1113 TaxLabels fish frog snake mouse; | |
1114 END; | |
1115 | |
1116 BEGIN CHARACTERS; | |
1117 Dimensions NChar=20; | |
1118 Format DataType=DNA; | |
1119 Matrix | |
1120 fish ACATA GAGGG TACCT CTAAG | |
1121 frog ACATA GAGGG TACCT CTAAG | |
1122 snake ACATA GAGGG TACCT CTAAG | |
1123 mouse ACATA GAGGG TACCT CTAAG | |
1124 END; | |
1125 | |
1126 BEGIN TREES; | |
1127 Tree best=(fish, (frog, (snake, mouse))); | |
1128 END; | |
1129 """ | |
1130 file_ext = 'nex' | |
1131 | |
1132 def __init__(self, **kwd): | |
1133 Text.__init__( self, **kwd ) | |
1134 | |
1135 def sniff( self, filename ): | |
1136 """ | |
1137 Determines whether the file is in nexus format | |
1138 First line should be: | |
1139 #NEXUS | |
1140 """ | |
1141 try: | |
1142 fh = open( filename ) | |
1143 count = 0 | |
1144 line = fh.readline() | |
1145 line = line.strip() | |
1146 if line and line == '#NEXUS': | |
1147 fh.close() | |
1148 return True | |
1149 except: | |
1150 pass | |
1151 finally: | |
1152 fh.close() | |
1153 return False | |
1154 | |
1036 | 1155 |
1037 ## Qiime Classes | 1156 ## Qiime Classes |
1038 | 1157 |
1039 class QiimeMetadataMapping(Tabular): | 1158 class QiimeMetadataMapping(Tabular): |
1040 MetadataElement( name="column_names", default=[], desc="Column Names", readonly=False, visible=True, no_value=[] ) | 1159 MetadataElement( name="column_names", default=[], desc="Column Names", readonly=False, visible=True, no_value=[] ) |
1163 pick_rep_set:rep_set_picking_method first | 1282 pick_rep_set:rep_set_picking_method first |
1164 pick_rep_set:sort_by otu | 1283 pick_rep_set:sort_by otu |
1165 """ | 1284 """ |
1166 file_ext = 'qiimeparams' | 1285 file_ext = 'qiimeparams' |
1167 | 1286 |
1168 class QiimePrefs(data.Text): | 1287 class QiimePrefs(Text): |
1169 """ | 1288 """ |
1170 A text file, containing coloring preferences to be used by make_distance_histograms.py, make_2d_plots.py and make_3d_plots.py. | 1289 A text file, containing coloring preferences to be used by make_distance_histograms.py, make_2d_plots.py and make_3d_plots.py. |
1171 Example: | 1290 Example: |
1172 { | 1291 { |
1173 'background_color':'black', | 1292 'background_color':'black', |