Mercurial > repos > miller-lab > genome_diversity
comparison lib/galaxy/datatypes/wsf.py @ 17:a3af29edcce2
Uploaded Miller Lab Devshed version a51c894f5bed
author | miller-lab |
---|---|
date | Fri, 28 Sep 2012 11:57:18 -0400 |
parents | 4b6590dd7250 |
children | cba0d7a63b82 |
comparison
equal
deleted
inserted
replaced
16:be0e2223c531 | 17:a3af29edcce2 |
---|---|
1 """ | |
2 SnpFile datatype | |
3 """ | |
4 | |
5 import galaxy.datatypes.data | |
6 import tempfile | |
7 import os | |
8 import simplejson | |
9 from galaxy import util | |
10 from galaxy.datatypes.sniff import * | |
11 from galaxy.datatypes.tabular import Tabular | |
12 from galaxy.datatypes.images import Html | |
13 from galaxy.datatypes import metadata | |
14 from galaxy.datatypes.metadata import MetadataElement | |
15 | |
16 class Wped( Html ): | |
17 allow_datatype_change = False | |
18 composite_type = 'basic' | |
19 file_ext = 'gd_ped' | |
20 | |
21 MetadataElement( name="base_name", desc="base name for all transformed versions of this genetic dataset", default='WpedData', readonly=True, set_in_upload=True ) | |
22 | |
23 def __init__( self, **kwd ): | |
24 Html.__init__( self, **kwd ) | |
25 self.add_composite_file( '%s.ped', description = 'Pedigree File', substitute_name_with_metadata = 'base_name', is_binary = False ) | |
26 self.add_composite_file( '%s.map', description = 'Map File', substitute_name_with_metadata = 'base_name', is_binary = False ) | |
27 | |
28 class Individuals( Tabular ): | |
29 file_ext = 'gd_indivs' | |
30 def __init__(self, **kwd): | |
31 Tabular.__init__( self, **kwd ) | |
32 self.column_names = [ 'Column', 'Name', 'Alias' ] | |
33 | |
34 def display_peek( self, dataset ): | |
35 return Tabular.make_html_table( self, dataset, column_names=self.column_names ) | |
36 | |
37 class DatasetComments( object ): | |
38 def __init__( self, dataset, comment_string='#' ): | |
39 self.dataset = dataset | |
40 self.comment_string = comment_string | |
41 self.comment_string_len = len(comment_string) | |
42 self._comments = [] | |
43 self._read_comments() | |
44 | |
45 def _read_comments( self ): | |
46 if self.dataset.has_data(): | |
47 try: | |
48 for line in open(self.dataset.file_name, 'rU'): | |
49 if line.startswith(self.comment_string): | |
50 comment = line[self.comment_string_len:] | |
51 self._comments.append(comment) | |
52 else: | |
53 break | |
54 except: | |
55 pass | |
56 | |
57 def __str__( self ): | |
58 return "".join(self._comments) | |
59 | |
60 @property | |
61 def comments( self ): | |
62 return self._comments | |
63 | |
64 class DatasetCommentMetadata( object ): | |
65 def __init__( self, dataset, comment_string='#' ): | |
66 self.dataset_comments = DatasetComments( dataset, comment_string ) | |
67 self._comment_metadata = {} | |
68 self._decode_dataset_comments() | |
69 | |
70 def _decode_dataset_comments( self ): | |
71 dataset_comment_string = str( self.dataset_comments ) | |
72 try: | |
73 self._comment_metadata = simplejson.loads( dataset_comment_string ) | |
74 except simplejson.JSONDecodeError as e: | |
75 pass | |
76 | |
77 @property | |
78 def comment_metadata( self ): | |
79 return self._comment_metadata | |
80 | |
81 class AnnotatedTabular( Tabular ): | |
82 """ Tabular file with optional comment block containing JSON to be imported into metadata """ | |
83 MetadataElement( name="comment_metadata", desc="comment metadata", param=metadata.DictParameter, visible=False, readonly=True ) | |
84 | |
85 def set_meta( self, dataset, overwrite = True, **kwd ): | |
86 Tabular.set_meta( self, dataset, overwrite=overwrite, max_data_lines=None, max_guess_type_data_lines=1000, **kwd ) | |
87 if dataset.metadata.comment_metadata is None: | |
88 dataset_comment_metadata = DatasetCommentMetadata( dataset ) | |
89 dataset.metadata.comment_metadata = dataset_comment_metadata.comment_metadata.copy() | |
90 self.set_dataset_metadata_from_comments( dataset ) | |
91 | |
92 def set_dataset_metadata_from_comments( self, dataset ): | |
93 pass | |
94 | |
95 def set_peek( self, dataset, line_count=None, is_multi_byte=False ): | |
96 super(Tabular, self).set_peek( dataset, line_count=line_count, is_multi_byte=is_multi_byte, WIDTH='unlimited', skipchars=['#'] ) | |
97 | |
98 def display_peek( self, dataset ): | |
99 """Returns formated html of peek""" | |
100 return Tabular.make_html_table( self, dataset, skipchars=['#'] ) | |
101 | |
102 class Fake( AnnotatedTabular ): | |
103 MetadataElement( name="scaffold", desc="scaffold column", param=metadata.ColumnParameter, default=0 ) | |
104 MetadataElement( name="pos", desc="pos column", param=metadata.ColumnParameter, default=0 ) | |
105 MetadataElement( name="ref", desc="ref column", param=metadata.ColumnParameter, default=0 ) | |
106 MetadataElement( name="rPos", desc="rPos column", param=metadata.ColumnParameter, default=0 ) | |
107 MetadataElement( name="species", desc="species", default='', no_value='', visible=False, readonly=True ) | |
108 | |
109 def set_dataset_metadata_from_comments( self, dataset ): | |
110 self.set_dataset_column_names_metadata( dataset ) | |
111 self.set_dataset_columnParameter_metadata( dataset ) | |
112 self.set_dataset_species_metadata( dataset ) | |
113 self.set_dataset_dbkey_metadata( dataset ) | |
114 | |
115 def set_dataset_column_names_metadata( self, dataset ): | |
116 value_from_comment_metadata = dataset.metadata.comment_metadata.get( 'column_names', None ) | |
117 if isinstance( value_from_comment_metadata, list ): | |
118 dataset.metadata.column_names = value_from_comment_metadata[:] | |
119 | |
120 def set_dataset_columnParameter_metadata( self, dataset ): | |
121 for name, spec in dataset.metadata.spec.items(): | |
122 if isinstance( spec.param, metadata.ColumnParameter ): | |
123 value_from_comment_metadata = dataset.metadata.comment_metadata.get( name, None ) | |
124 if value_from_comment_metadata is not None: | |
125 try: | |
126 i = int( value_from_comment_metadata ) | |
127 except: | |
128 i = 0 | |
129 if 0 <= i <= dataset.metadata.columns: | |
130 setattr( dataset.metadata, name, i ) | |
131 | |
132 def set_dataset_species_metadata( self, dataset ): | |
133 value_from_comment_metadata = dataset.metadata.comment_metadata.get( 'species', None ) | |
134 if isinstance( value_from_comment_metadata, basestring ): | |
135 dataset.metadata.species = value_from_comment_metadata | |
136 | |
137 def set_dataset_dbkey_metadata( self, dataset ): | |
138 value_from_comment_metadata = dataset.metadata.comment_metadata.get( 'dbkey', '?' ) | |
139 if isinstance( value_from_comment_metadata, basestring ): | |
140 dataset.metadata.dbkey = value_from_comment_metadata | |
141 | |
142 class GDSnp( Fake ): | |
143 """ Webb's SNP file format """ | |
144 file_ext = 'gd_snp' | |
145 | |
146 MetadataElement( name="individual_names", desc="individual names", visible=False, readonly=True ) | |
147 MetadataElement( name="individual_columns", desc="individual columns", visible=False, readonly=True ) | |
148 | |
149 def set_dataset_metadata_from_comments( self, dataset ): | |
150 Fake.set_dataset_metadata_from_comments( self, dataset ) | |
151 self.set_dataset_individual_metadata( dataset ) | |
152 | |
153 def set_dataset_individual_metadata( self, dataset ): | |
154 individual_list = dataset.metadata.comment_metadata.get( 'individuals', None ) | |
155 if not isinstance( individual_list, list ): | |
156 individual_list = [] | |
157 | |
158 individual_names = [] | |
159 individual_columns = [] | |
160 | |
161 for individual in individual_list: | |
162 if not isinstance( individual, list ) or len( individual ) != 2: | |
163 continue | |
164 name, col = individual | |
165 if not isinstance( name, basestring ): | |
166 name = '' | |
167 try: | |
168 c = int( col ) | |
169 except: | |
170 c = 0 | |
171 if 0 < c <= dataset.metadata.columns: | |
172 individual_names.append( name ) | |
173 individual_columns.append( c ) | |
174 | |
175 if individual_names: | |
176 dataset.metadata.individual_names = individual_names[:] | |
177 dataset.metadata.individual_columns = individual_columns[:] | |
178 | |
179 class GDSap( Fake ): | |
180 """ Webb's SAP file format """ | |
181 file_ext = 'gd_sap' | |
182 | |
183 MetadataElement( name="kegg_gene", desc="KEGG gene code column", param=metadata.ColumnParameter, default=0 ) | |
184 MetadataElement( name="kegg_path", desc="KEGG pathway code/name column", param=metadata.ColumnParameter, default=0 ) | |
185 |