12
|
1 """
|
|
2 SnpFile datatype
|
|
3 """
|
|
4
|
|
5 import galaxy.datatypes.data
|
|
6 import tempfile
|
|
7 import os
|
|
8 import simplejson
|
|
9 from galaxy import util
|
|
10 from galaxy.datatypes.sniff import *
|
|
11 from galaxy.datatypes.tabular import Tabular
|
|
12 from galaxy.datatypes.images import Html
|
|
13 from galaxy.datatypes import metadata
|
|
14 from galaxy.datatypes.metadata import MetadataElement
|
|
15
|
|
16 class Wped( Html ):
|
|
17 allow_datatype_change = False
|
|
18 composite_type = 'basic'
|
|
19 file_ext = 'gd_ped'
|
|
20
|
|
21 MetadataElement( name="base_name", desc="base name for all transformed versions of this genetic dataset", default='WpedData', readonly=True, set_in_upload=True )
|
|
22
|
|
23 def __init__( self, **kwd ):
|
|
24 Html.__init__( self, **kwd )
|
|
25 self.add_composite_file( '%s.ped', description = 'Pedigree File', substitute_name_with_metadata = 'base_name', is_binary = False )
|
|
26 self.add_composite_file( '%s.map', description = 'Map File', substitute_name_with_metadata = 'base_name', is_binary = False )
|
|
27
|
|
28 class Individuals( Tabular ):
|
|
29 file_ext = 'gd_indivs'
|
|
30 def __init__(self, **kwd):
|
|
31 Tabular.__init__( self, **kwd )
|
|
32 self.column_names = [ 'Column', 'Name', 'Alias' ]
|
|
33
|
|
34 def display_peek( self, dataset ):
|
|
35 return Tabular.make_html_table( self, dataset, column_names=self.column_names )
|
|
36
|
|
37 class DatasetComments( object ):
|
|
38 def __init__( self, dataset, comment_string='#' ):
|
|
39 self.dataset = dataset
|
|
40 self.comment_string = comment_string
|
|
41 self.comment_string_len = len(comment_string)
|
|
42 self._comments = []
|
|
43 self._read_comments()
|
|
44
|
|
45 def _read_comments( self ):
|
|
46 if self.dataset.has_data():
|
|
47 try:
|
|
48 for line in open(self.dataset.file_name, 'rU'):
|
|
49 if line.startswith(self.comment_string):
|
|
50 comment = line[self.comment_string_len:]
|
|
51 self._comments.append(comment)
|
|
52 else:
|
|
53 break
|
|
54 except:
|
|
55 pass
|
|
56
|
|
57 def __str__( self ):
|
|
58 return "".join(self._comments)
|
|
59
|
|
60 @property
|
|
61 def comments( self ):
|
|
62 return self._comments
|
|
63
|
|
64 class DatasetCommentMetadata( object ):
|
|
65 def __init__( self, dataset, comment_string='#' ):
|
|
66 self.dataset_comments = DatasetComments( dataset, comment_string )
|
|
67 self._comment_metadata = {}
|
|
68 self._decode_dataset_comments()
|
|
69
|
|
70 def _decode_dataset_comments( self ):
|
|
71 dataset_comment_string = str( self.dataset_comments )
|
|
72 try:
|
|
73 self._comment_metadata = simplejson.loads( dataset_comment_string )
|
|
74 except simplejson.JSONDecodeError as e:
|
|
75 pass
|
|
76
|
|
77 @property
|
|
78 def comment_metadata( self ):
|
|
79 return self._comment_metadata
|
|
80
|
|
81 class AnnotatedTabular( Tabular ):
|
|
82 """ Tabular file with optional comment block containing JSON to be imported into metadata """
|
|
83 MetadataElement( name="comment_metadata", desc="comment metadata", param=metadata.DictParameter, visible=False, readonly=True )
|
|
84
|
|
85 def set_meta( self, dataset, overwrite = True, **kwd ):
|
|
86 Tabular.set_meta( self, dataset, overwrite=overwrite, max_data_lines=None, max_guess_type_data_lines=1000, **kwd )
|
|
87 if dataset.metadata.comment_metadata is None:
|
|
88 dataset_comment_metadata = DatasetCommentMetadata( dataset )
|
|
89 dataset.metadata.comment_metadata = dataset_comment_metadata.comment_metadata.copy()
|
|
90 self.set_dataset_metadata_from_comments( dataset )
|
|
91
|
|
92 def set_dataset_metadata_from_comments( self, dataset ):
|
|
93 pass
|
|
94
|
|
95 def set_peek( self, dataset, line_count=None, is_multi_byte=False ):
|
|
96 super(Tabular, self).set_peek( dataset, line_count=line_count, is_multi_byte=is_multi_byte, WIDTH='unlimited', skipchars=['#'] )
|
|
97
|
|
98 def display_peek( self, dataset ):
|
|
99 """Returns formated html of peek"""
|
|
100 return Tabular.make_html_table( self, dataset, skipchars=['#'] )
|
|
101
|
|
102 class Fake( AnnotatedTabular ):
|
|
103 MetadataElement( name="scaffold", desc="scaffold column", param=metadata.ColumnParameter, default=0 )
|
|
104 MetadataElement( name="pos", desc="pos column", param=metadata.ColumnParameter, default=0 )
|
|
105 MetadataElement( name="ref", desc="ref column", param=metadata.ColumnParameter, default=0 )
|
|
106 MetadataElement( name="rPos", desc="rPos column", param=metadata.ColumnParameter, default=0 )
|
|
107 MetadataElement( name="species", desc="species", default='', no_value='', visible=False, readonly=True )
|
|
108
|
|
109 def set_dataset_metadata_from_comments( self, dataset ):
|
|
110 self.set_dataset_column_names_metadata( dataset )
|
|
111 self.set_dataset_columnParameter_metadata( dataset )
|
|
112 self.set_dataset_species_metadata( dataset )
|
|
113 self.set_dataset_dbkey_metadata( dataset )
|
|
114
|
|
115 def set_dataset_column_names_metadata( self, dataset ):
|
|
116 value_from_comment_metadata = dataset.metadata.comment_metadata.get( 'column_names', None )
|
|
117 if isinstance( value_from_comment_metadata, list ):
|
|
118 dataset.metadata.column_names = value_from_comment_metadata[:]
|
|
119
|
|
120 def set_dataset_columnParameter_metadata( self, dataset ):
|
|
121 for name, spec in dataset.metadata.spec.items():
|
|
122 if isinstance( spec.param, metadata.ColumnParameter ):
|
|
123 value_from_comment_metadata = dataset.metadata.comment_metadata.get( name, None )
|
|
124 if value_from_comment_metadata is not None:
|
|
125 try:
|
|
126 i = int( value_from_comment_metadata )
|
|
127 except:
|
|
128 i = 0
|
|
129 if 0 <= i <= dataset.metadata.columns:
|
|
130 setattr( dataset.metadata, name, i )
|
|
131
|
|
132 def set_dataset_species_metadata( self, dataset ):
|
|
133 value_from_comment_metadata = dataset.metadata.comment_metadata.get( 'species', None )
|
|
134 if isinstance( value_from_comment_metadata, basestring ):
|
|
135 dataset.metadata.species = value_from_comment_metadata
|
|
136
|
|
137 def set_dataset_dbkey_metadata( self, dataset ):
|
|
138 value_from_comment_metadata = dataset.metadata.comment_metadata.get( 'dbkey', '?' )
|
|
139 if isinstance( value_from_comment_metadata, basestring ):
|
|
140 dataset.metadata.dbkey = value_from_comment_metadata
|
|
141
|
|
142 class GDSnp( Fake ):
|
|
143 """ Webb's SNP file format """
|
|
144 file_ext = 'gd_snp'
|
|
145
|
|
146 MetadataElement( name="individual_names", desc="individual names", visible=False, readonly=True )
|
|
147 MetadataElement( name="individual_columns", desc="individual columns", visible=False, readonly=True )
|
|
148
|
|
149 def set_dataset_metadata_from_comments( self, dataset ):
|
|
150 Fake.set_dataset_metadata_from_comments( self, dataset )
|
|
151 self.set_dataset_individual_metadata( dataset )
|
|
152
|
|
153 def set_dataset_individual_metadata( self, dataset ):
|
|
154 individual_list = dataset.metadata.comment_metadata.get( 'individuals', None )
|
|
155 if not isinstance( individual_list, list ):
|
|
156 individual_list = []
|
|
157
|
|
158 individual_names = []
|
|
159 individual_columns = []
|
|
160
|
|
161 for individual in individual_list:
|
|
162 if not isinstance( individual, list ) or len( individual ) != 2:
|
|
163 continue
|
|
164 name, col = individual
|
|
165 if not isinstance( name, basestring ):
|
|
166 name = ''
|
|
167 try:
|
|
168 c = int( col )
|
|
169 except:
|
|
170 c = 0
|
|
171 if 0 < c <= dataset.metadata.columns:
|
|
172 individual_names.append( name )
|
|
173 individual_columns.append( c )
|
|
174
|
|
175 if individual_names:
|
|
176 dataset.metadata.individual_names = individual_names[:]
|
|
177 dataset.metadata.individual_columns = individual_columns[:]
|
|
178
|
|
179 class GDSap( Fake ):
|
|
180 """ Webb's SAP file format """
|
|
181 file_ext = 'gd_sap'
|
|
182
|
|
183 MetadataElement( name="kegg_gene", desc="KEGG gene code column", param=metadata.ColumnParameter, default=0 )
|
|
184 MetadataElement( name="kegg_path", desc="KEGG pathway code/name column", param=metadata.ColumnParameter, default=0 )
|
|
185
|