annotate lib/galaxy/datatypes/wsf.py @ 14:8ae67e9fb6ff

Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
author miller-lab
date Fri, 28 Sep 2012 11:35:56 -0400
parents 4b6590dd7250
children cba0d7a63b82
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
12
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
1 """
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
2 SnpFile datatype
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
3 """
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
4
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
5 import galaxy.datatypes.data
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
6 import tempfile
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
7 import os
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
8 import simplejson
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
9 from galaxy import util
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
10 from galaxy.datatypes.sniff import *
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
11 from galaxy.datatypes.tabular import Tabular
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
12 from galaxy.datatypes.images import Html
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
13 from galaxy.datatypes import metadata
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
14 from galaxy.datatypes.metadata import MetadataElement
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
15
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
16 class Wped( Html ):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
17 allow_datatype_change = False
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
18 composite_type = 'basic'
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
19 file_ext = 'gd_ped'
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
20
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
21 MetadataElement( name="base_name", desc="base name for all transformed versions of this genetic dataset", default='WpedData', readonly=True, set_in_upload=True )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
22
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
23 def __init__( self, **kwd ):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
24 Html.__init__( self, **kwd )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
25 self.add_composite_file( '%s.ped', description = 'Pedigree File', substitute_name_with_metadata = 'base_name', is_binary = False )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
26 self.add_composite_file( '%s.map', description = 'Map File', substitute_name_with_metadata = 'base_name', is_binary = False )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
27
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
28 class Individuals( Tabular ):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
29 file_ext = 'gd_indivs'
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
30 def __init__(self, **kwd):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
31 Tabular.__init__( self, **kwd )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
32 self.column_names = [ 'Column', 'Name', 'Alias' ]
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
33
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
34 def display_peek( self, dataset ):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
35 return Tabular.make_html_table( self, dataset, column_names=self.column_names )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
36
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
37 class DatasetComments( object ):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
38 def __init__( self, dataset, comment_string='#' ):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
39 self.dataset = dataset
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
40 self.comment_string = comment_string
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
41 self.comment_string_len = len(comment_string)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
42 self._comments = []
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
43 self._read_comments()
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
44
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
45 def _read_comments( self ):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
46 if self.dataset.has_data():
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
47 try:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
48 for line in open(self.dataset.file_name, 'rU'):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
49 if line.startswith(self.comment_string):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
50 comment = line[self.comment_string_len:]
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
51 self._comments.append(comment)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
52 else:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
53 break
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
54 except:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
55 pass
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
56
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
57 def __str__( self ):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
58 return "".join(self._comments)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
59
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
60 @property
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
61 def comments( self ):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
62 return self._comments
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
63
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
64 class DatasetCommentMetadata( object ):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
65 def __init__( self, dataset, comment_string='#' ):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
66 self.dataset_comments = DatasetComments( dataset, comment_string )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
67 self._comment_metadata = {}
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
68 self._decode_dataset_comments()
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
69
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
70 def _decode_dataset_comments( self ):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
71 dataset_comment_string = str( self.dataset_comments )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
72 try:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
73 self._comment_metadata = simplejson.loads( dataset_comment_string )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
74 except simplejson.JSONDecodeError as e:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
75 pass
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
76
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
77 @property
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
78 def comment_metadata( self ):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
79 return self._comment_metadata
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
80
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
81 class AnnotatedTabular( Tabular ):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
82 """ Tabular file with optional comment block containing JSON to be imported into metadata """
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
83 MetadataElement( name="comment_metadata", desc="comment metadata", param=metadata.DictParameter, visible=False, readonly=True )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
84
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
85 def set_meta( self, dataset, overwrite = True, **kwd ):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
86 Tabular.set_meta( self, dataset, overwrite=overwrite, max_data_lines=None, max_guess_type_data_lines=1000, **kwd )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
87 if dataset.metadata.comment_metadata is None:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
88 dataset_comment_metadata = DatasetCommentMetadata( dataset )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
89 dataset.metadata.comment_metadata = dataset_comment_metadata.comment_metadata.copy()
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
90 self.set_dataset_metadata_from_comments( dataset )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
91
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
92 def set_dataset_metadata_from_comments( self, dataset ):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
93 pass
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
94
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
95 def set_peek( self, dataset, line_count=None, is_multi_byte=False ):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
96 super(Tabular, self).set_peek( dataset, line_count=line_count, is_multi_byte=is_multi_byte, WIDTH='unlimited', skipchars=['#'] )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
97
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
98 def display_peek( self, dataset ):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
99 """Returns formated html of peek"""
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
100 return Tabular.make_html_table( self, dataset, skipchars=['#'] )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
101
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
102 class Fake( AnnotatedTabular ):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
103 MetadataElement( name="scaffold", desc="scaffold column", param=metadata.ColumnParameter, default=0 )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
104 MetadataElement( name="pos", desc="pos column", param=metadata.ColumnParameter, default=0 )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
105 MetadataElement( name="ref", desc="ref column", param=metadata.ColumnParameter, default=0 )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
106 MetadataElement( name="rPos", desc="rPos column", param=metadata.ColumnParameter, default=0 )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
107 MetadataElement( name="species", desc="species", default='', no_value='', visible=False, readonly=True )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
108
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
109 def set_dataset_metadata_from_comments( self, dataset ):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
110 self.set_dataset_column_names_metadata( dataset )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
111 self.set_dataset_columnParameter_metadata( dataset )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
112 self.set_dataset_species_metadata( dataset )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
113 self.set_dataset_dbkey_metadata( dataset )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
114
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
115 def set_dataset_column_names_metadata( self, dataset ):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
116 value_from_comment_metadata = dataset.metadata.comment_metadata.get( 'column_names', None )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
117 if isinstance( value_from_comment_metadata, list ):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
118 dataset.metadata.column_names = value_from_comment_metadata[:]
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
119
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
120 def set_dataset_columnParameter_metadata( self, dataset ):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
121 for name, spec in dataset.metadata.spec.items():
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
122 if isinstance( spec.param, metadata.ColumnParameter ):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
123 value_from_comment_metadata = dataset.metadata.comment_metadata.get( name, None )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
124 if value_from_comment_metadata is not None:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
125 try:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
126 i = int( value_from_comment_metadata )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
127 except:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
128 i = 0
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
129 if 0 <= i <= dataset.metadata.columns:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
130 setattr( dataset.metadata, name, i )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
131
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
132 def set_dataset_species_metadata( self, dataset ):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
133 value_from_comment_metadata = dataset.metadata.comment_metadata.get( 'species', None )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
134 if isinstance( value_from_comment_metadata, basestring ):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
135 dataset.metadata.species = value_from_comment_metadata
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
136
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
137 def set_dataset_dbkey_metadata( self, dataset ):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
138 value_from_comment_metadata = dataset.metadata.comment_metadata.get( 'dbkey', '?' )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
139 if isinstance( value_from_comment_metadata, basestring ):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
140 dataset.metadata.dbkey = value_from_comment_metadata
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
141
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
142 class GDSnp( Fake ):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
143 """ Webb's SNP file format """
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
144 file_ext = 'gd_snp'
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
145
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
146 MetadataElement( name="individual_names", desc="individual names", visible=False, readonly=True )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
147 MetadataElement( name="individual_columns", desc="individual columns", visible=False, readonly=True )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
148
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
149 def set_dataset_metadata_from_comments( self, dataset ):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
150 Fake.set_dataset_metadata_from_comments( self, dataset )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
151 self.set_dataset_individual_metadata( dataset )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
152
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
153 def set_dataset_individual_metadata( self, dataset ):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
154 individual_list = dataset.metadata.comment_metadata.get( 'individuals', None )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
155 if not isinstance( individual_list, list ):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
156 individual_list = []
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
157
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
158 individual_names = []
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
159 individual_columns = []
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
160
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
161 for individual in individual_list:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
162 if not isinstance( individual, list ) or len( individual ) != 2:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
163 continue
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
164 name, col = individual
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
165 if not isinstance( name, basestring ):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
166 name = ''
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
167 try:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
168 c = int( col )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
169 except:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
170 c = 0
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
171 if 0 < c <= dataset.metadata.columns:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
172 individual_names.append( name )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
173 individual_columns.append( c )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
174
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
175 if individual_names:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
176 dataset.metadata.individual_names = individual_names[:]
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
177 dataset.metadata.individual_columns = individual_columns[:]
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
178
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
179 class GDSap( Fake ):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
180 """ Webb's SAP file format """
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
181 file_ext = 'gd_sap'
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
182
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
183 MetadataElement( name="kegg_gene", desc="KEGG gene code column", param=metadata.ColumnParameter, default=0 )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
184 MetadataElement( name="kegg_path", desc="KEGG pathway code/name column", param=metadata.ColumnParameter, default=0 )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
185