Mercurial > repos > immport-devteam > flow_datatypes
comparison flow.py @ 0:ed90d166300e draft default tip
Uploaded
author | immport-devteam |
---|---|
date | Mon, 27 Feb 2017 15:18:40 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:ed90d166300e |
---|---|
1 # -*- coding: utf-8 -*- | |
2 ###################################################################### | |
3 # Copyright (c) 2016 Northrop Grumman. | |
4 # All rights reserved. | |
5 ###################################################################### | |
6 | |
7 """ | |
8 Flow analysis datatypes. | |
9 """ | |
10 | |
11 import gzip | |
12 import json | |
13 import logging | |
14 import os | |
15 import re | |
16 import subprocess | |
17 import tempfile | |
18 import rpy2.interactive as r | |
19 import rpy2.interactive.packages | |
20 | |
21 from galaxy.datatypes.binary import Binary | |
22 from galaxy.datatypes.tabular import Tabular | |
23 from galaxy.datatypes.data import get_file_peek, Text | |
24 from galaxy.datatypes.metadata import MetadataElement | |
25 from galaxy.util import nice_size, string_as_bool | |
26 from . import data | |
27 | |
28 log = logging.getLogger(__name__) | |
29 | |
30 | |
31 def is_number(s): | |
32 try: | |
33 float(s) | |
34 return True | |
35 except ValueError: | |
36 return False | |
37 | |
38 | |
39 class FCS(Binary): | |
40 """Class describing an FCS binary file""" | |
41 file_ext = "fcs" | |
42 | |
43 def set_peek(self, dataset, is_multi_byte=False): | |
44 if not dataset.dataset.purged: | |
45 dataset.peek = "Binary FCS file" | |
46 dataset.blurb = data.nice_size(dataset.get_size()) | |
47 else: | |
48 dataset.peek = 'file does not exist' | |
49 dataset.blurb = 'file purged from disk' | |
50 | |
51 def display_peek(self, dataset): | |
52 try: | |
53 return dataset.peek | |
54 except: | |
55 return "Binary FCSfile (%s)" % (data.nice_size(dataset.get_size())) | |
56 | |
57 def sniff(self, filename): | |
58 """ | |
59 Checking if the file is in FCS format. Should read FCS2.0, FCS3.0 | |
60 and FCS3.1 | |
61 """ | |
62 r.packages.importr("flowCore") | |
63 rlib = r.packages.packages | |
64 try: | |
65 fcsobject = rlib.flowCore.isFCSfile(filename) | |
66 return list(fcsobject)[0] | |
67 except: | |
68 return False | |
69 | |
70 def get_mime(self): | |
71 """Returns the mime type of the datatype""" | |
72 return 'application/octet-stream' | |
73 Binary.register_sniffable_binary_format("fcs","fcs",FCS) | |
74 | |
75 class FlowText(Tabular): | |
76 """Class describing an Flow Text file""" | |
77 file_ext = "flowtext" | |
78 | |
79 def set_peek(self, dataset, is_multi_byte=False): | |
80 if not dataset.dataset.purged: | |
81 dataset.peek = "Text Flow file" | |
82 dataset.blurb = data.nice_size(dataset.get_size()) | |
83 else: | |
84 dataset.peek = 'file does not exist' | |
85 dataset.blurb = 'file purged from disk' | |
86 | |
87 def display_peek(self, dataset): | |
88 try: | |
89 return dataset.peek | |
90 except: | |
91 return "Text Flow file (%s)" % (data.nice_size(dataset.get_size())) | |
92 | |
93 def sniff(self, filename): | |
94 """Quick test on file formatting and values""" | |
95 with open(filename, "r") as f: | |
96 f.readline() | |
97 values = f.readline().strip().split("\t") | |
98 for vals in values: | |
99 if not is_number(vals): | |
100 return False | |
101 return True | |
102 | |
103 def get_mime(self): | |
104 """Returns the mime type of the datatype""" | |
105 return 'text/tab-separated-values' | |
106 | |
107 | |
108 class FlowClustered(Tabular): | |
109 """Class describing a Flow Text that has been clustered through FLOCK""" | |
110 file_ext = "flowclr" | |
111 | |
112 def set_peek(self, dataset, is_multi_byte=False): | |
113 if not dataset.dataset.purged: | |
114 dataset.peek = "Text Flow Clustered file" | |
115 dataset.blurb = data.nice_size(dataset.get_size()) | |
116 else: | |
117 dataset.peek = 'file does not exist' | |
118 dataset.blurb = 'file purged from disk' | |
119 | |
120 def display_peek(self, dataset): | |
121 try: | |
122 return dataset.peek | |
123 except: | |
124 return "Flow Text Clustered file (%s)" % (data.nice_size(dataset.get_size())) | |
125 | |
126 def sniff(self, filename): | |
127 """Quick test on headers and values""" | |
128 with open(filename, "r") as f: | |
129 population = f.readline().strip().split("\t")[-1] | |
130 if population != "Population": | |
131 return False | |
132 values = f.readline().strip().split("\t") | |
133 for vals in values: | |
134 if not is_number(vals): | |
135 return False | |
136 return True | |
137 | |
138 def get_mime(self): | |
139 """Returns the mime type of the datatype""" | |
140 return 'text/tab-separated-values' | |
141 | |
142 | |
143 class FlowMFI(Tabular): | |
144 """Class describing a Flow MFI file""" | |
145 file_ext = "flowmfi" | |
146 | |
147 def set_peek(self, dataset, is_multi_byte=False): | |
148 if not dataset.dataset.purged: | |
149 dataset.peek = "MFI Flow file" | |
150 dataset.blurb = data.nice_size(dataset.get_size()) | |
151 else: | |
152 dataset.peek = 'file does not exist' | |
153 dataset.blurb = 'file purged from disk' | |
154 | |
155 def display_peek(self, dataset): | |
156 try: | |
157 return dataset.peek | |
158 except: | |
159 return "MFI Flow file (%s)" % (data.nice_size(dataset.get_size())) | |
160 | |
161 def sniff(self, filename): | |
162 """Quick test on file formatting and values""" | |
163 with open(filename, "r") as f: | |
164 population = f.readline().strip().split("\t")[0] | |
165 if population != "Population": | |
166 return False | |
167 values = f.readline().strip().split("\t") | |
168 for vals in values: | |
169 if not is_number(vals): | |
170 return False | |
171 return True | |
172 | |
173 def get_mime(self): | |
174 """Returns the mime type of the datatype""" | |
175 return 'text/tab-separated-values' | |
176 | |
177 | |
178 class FlowStats1(Tabular): | |
179 """Class describing a Flow Stats file""" | |
180 file_ext = "flowstat1" | |
181 | |
182 def set_peek(self, dataset, is_multi_byte=False): | |
183 if not dataset.dataset.purged: | |
184 dataset.peek = "Flow Stats1 file" | |
185 dataset.blurb = data.nice_size(dataset.get_size()) | |
186 else: | |
187 dataset.peek = 'file does not exist' | |
188 dataset.blurb = 'file purged from disk' | |
189 | |
190 def display_peek(self, dataset): | |
191 try: | |
192 return dataset.peek | |
193 except: | |
194 return "Flow Stats1 file (%s)" % (data.nice_size(dataset.get_size())) | |
195 | |
196 def sniff(self, filename): | |
197 """Quick test on file formatting and values""" | |
198 with open(filename, "r") as f: | |
199 first_header = f.readline().strip().split("\t")[0] | |
200 if first_header != "FileID": | |
201 return False | |
202 return True | |
203 | |
204 def get_mime(self): | |
205 """Returns the mime type of the datatype""" | |
206 return 'text/tab-separated-values' | |
207 | |
208 | |
209 class FlowStats2(Tabular): | |
210 """Class describing a Flow Stats file""" | |
211 file_ext = "flowstat2" | |
212 | |
213 def set_peek(self, dataset, is_multi_byte=False): | |
214 if not dataset.dataset.purged: | |
215 dataset.peek = "Flow Stats2 file" | |
216 dataset.blurb = data.nice_size(dataset.get_size()) | |
217 else: | |
218 dataset.peek = 'file does not exist' | |
219 dataset.blurb = 'file purged from disk' | |
220 | |
221 def display_peek(self, dataset): | |
222 try: | |
223 return dataset.peek | |
224 except: | |
225 return "Flow Stats2 file (%s)" % (data.nice_size(dataset.get_size())) | |
226 | |
227 def sniff(self, filename): | |
228 """Quick test on file formatting and values""" | |
229 with open(filename, "r") as f: | |
230 smp_name = f.readline().strip().split("\t")[-1] | |
231 if smp_name != "SampleName": | |
232 return False | |
233 return True | |
234 | |
235 def get_mime(self): | |
236 """Returns the mime type of the datatype""" | |
237 return 'text/tab-separated-values' | |
238 | |
239 | |
240 class FlowStats3(Tabular): | |
241 """Class describing a Flow Stats file""" | |
242 file_ext = "flowstat3" | |
243 | |
244 def set_peek(self, dataset, is_multi_byte=False): | |
245 if not dataset.dataset.purged: | |
246 dataset.peek = "Flow Stats3 file" | |
247 dataset.blurb = data.nice_size(dataset.get_size()) | |
248 else: | |
249 dataset.peek = 'file does not exist' | |
250 dataset.blurb = 'file purged from disk' | |
251 | |
252 def display_peek(self, dataset): | |
253 try: | |
254 return dataset.peek | |
255 except: | |
256 return "Flow Stats3 file (%s)" % (data.nice_size(dataset.get_size())) | |
257 | |
258 def sniff(self, filename): | |
259 """Quick test on file formatting and values""" | |
260 with open(filename, "r") as f: | |
261 last_col = f.readline().strip().split("\t")[-1] | |
262 if last_col != "Percentage_stdev": | |
263 return False | |
264 values = f.readline().strip().split("\t") | |
265 for vals in values: | |
266 if not is_number(vals): | |
267 return False | |
268 return True | |
269 | |
270 def get_mime(self): | |
271 """Returns the mime type of the datatype""" | |
272 return 'text/tab-separated-values' | |
273 | |
274 | |
275 class FlowScore(Tabular): | |
276 """Class describing a Flow Score file""" | |
277 file_ext = "flowscore" | |
278 | |
279 def set_peek(self, dataset, is_multi_byte=False): | |
280 if not dataset.dataset.purged: | |
281 dataset.peek = "Flow Score file" | |
282 dataset.blurb = data.nice_size(dataset.get_size()) | |
283 else: | |
284 dataset.peek = 'file does not exist' | |
285 dataset.blurb = 'file purged from disk' | |
286 | |
287 def display_peek(self, dataset): | |
288 try: | |
289 return dataset.peek | |
290 except: | |
291 return "Flow Score file (%s)" % (data.nice_size(dataset.get_size())) | |
292 | |
293 def sniff(self, filename): | |
294 """Quick test on file formatting and values""" | |
295 with open(filename, "r") as f: | |
296 population = f.readline().strip().split("\t")[0] | |
297 if population != "Population_ID": | |
298 return False | |
299 values = f.readline().strip().split("\t") | |
300 for vals in values: | |
301 if not is_number(vals): | |
302 return False | |
303 return True | |
304 | |
305 def get_mime(self): | |
306 """Returns the mime type of the datatype""" | |
307 return 'text/tab-separated-values' |