0
|
1 # -*- coding: utf-8 -*-
|
|
2 ######################################################################
|
|
3 # Copyright (c) 2016 Northrop Grumman.
|
|
4 # All rights reserved.
|
|
5 ######################################################################
|
|
6
|
|
7 """
|
|
8 Flow analysis datatypes.
|
|
9 """
|
|
10
|
|
11 import gzip
|
|
12 import json
|
|
13 import logging
|
|
14 import os
|
|
15 import re
|
|
16 import subprocess
|
|
17 import tempfile
|
|
18 import rpy2.interactive as r
|
|
19 import rpy2.interactive.packages
|
|
20
|
|
21 from galaxy.datatypes.binary import Binary
|
|
22 from galaxy.datatypes.tabular import Tabular
|
|
23 from galaxy.datatypes.data import get_file_peek, Text
|
|
24 from galaxy.datatypes.metadata import MetadataElement
|
|
25 from galaxy.util import nice_size, string_as_bool
|
|
26 from . import data
|
|
27
|
|
28 log = logging.getLogger(__name__)
|
|
29
|
|
30
|
|
31 def is_number(s):
|
|
32 try:
|
|
33 float(s)
|
|
34 return True
|
|
35 except ValueError:
|
|
36 return False
|
|
37
|
|
38
|
|
39 class FCS(Binary):
|
|
40 """Class describing an FCS binary file"""
|
|
41 file_ext = "fcs"
|
|
42
|
|
43 def set_peek(self, dataset, is_multi_byte=False):
|
|
44 if not dataset.dataset.purged:
|
|
45 dataset.peek = "Binary FCS file"
|
|
46 dataset.blurb = data.nice_size(dataset.get_size())
|
|
47 else:
|
|
48 dataset.peek = 'file does not exist'
|
|
49 dataset.blurb = 'file purged from disk'
|
|
50
|
|
51 def display_peek(self, dataset):
|
|
52 try:
|
|
53 return dataset.peek
|
|
54 except:
|
|
55 return "Binary FCSfile (%s)" % (data.nice_size(dataset.get_size()))
|
|
56
|
|
57 def sniff(self, filename):
|
|
58 """
|
|
59 Checking if the file is in FCS format. Should read FCS2.0, FCS3.0
|
|
60 and FCS3.1
|
|
61 """
|
|
62 r.packages.importr("flowCore")
|
|
63 rlib = r.packages.packages
|
|
64 try:
|
|
65 fcsobject = rlib.flowCore.isFCSfile(filename)
|
|
66 return list(fcsobject)[0]
|
|
67 except:
|
|
68 return False
|
|
69
|
|
70 def get_mime(self):
|
|
71 """Returns the mime type of the datatype"""
|
|
72 return 'application/octet-stream'
|
|
73 Binary.register_sniffable_binary_format("fcs","fcs",FCS)
|
|
74
|
|
75 class FlowText(Tabular):
|
|
76 """Class describing an Flow Text file"""
|
|
77 file_ext = "flowtext"
|
|
78
|
|
79 def set_peek(self, dataset, is_multi_byte=False):
|
|
80 if not dataset.dataset.purged:
|
|
81 dataset.peek = "Text Flow file"
|
|
82 dataset.blurb = data.nice_size(dataset.get_size())
|
|
83 else:
|
|
84 dataset.peek = 'file does not exist'
|
|
85 dataset.blurb = 'file purged from disk'
|
|
86
|
|
87 def display_peek(self, dataset):
|
|
88 try:
|
|
89 return dataset.peek
|
|
90 except:
|
|
91 return "Text Flow file (%s)" % (data.nice_size(dataset.get_size()))
|
|
92
|
|
93 def sniff(self, filename):
|
|
94 """Quick test on file formatting and values"""
|
|
95 with open(filename, "r") as f:
|
|
96 f.readline()
|
|
97 values = f.readline().strip().split("\t")
|
|
98 for vals in values:
|
|
99 if not is_number(vals):
|
|
100 return False
|
|
101 return True
|
|
102
|
|
103 def get_mime(self):
|
|
104 """Returns the mime type of the datatype"""
|
|
105 return 'text/tab-separated-values'
|
|
106
|
|
107
|
|
108 class FlowClustered(Tabular):
|
|
109 """Class describing a Flow Text that has been clustered through FLOCK"""
|
|
110 file_ext = "flowclr"
|
|
111
|
|
112 def set_peek(self, dataset, is_multi_byte=False):
|
|
113 if not dataset.dataset.purged:
|
|
114 dataset.peek = "Text Flow Clustered file"
|
|
115 dataset.blurb = data.nice_size(dataset.get_size())
|
|
116 else:
|
|
117 dataset.peek = 'file does not exist'
|
|
118 dataset.blurb = 'file purged from disk'
|
|
119
|
|
120 def display_peek(self, dataset):
|
|
121 try:
|
|
122 return dataset.peek
|
|
123 except:
|
|
124 return "Flow Text Clustered file (%s)" % (data.nice_size(dataset.get_size()))
|
|
125
|
|
126 def sniff(self, filename):
|
|
127 """Quick test on headers and values"""
|
|
128 with open(filename, "r") as f:
|
|
129 population = f.readline().strip().split("\t")[-1]
|
|
130 if population != "Population":
|
|
131 return False
|
|
132 values = f.readline().strip().split("\t")
|
|
133 for vals in values:
|
|
134 if not is_number(vals):
|
|
135 return False
|
|
136 return True
|
|
137
|
|
138 def get_mime(self):
|
|
139 """Returns the mime type of the datatype"""
|
|
140 return 'text/tab-separated-values'
|
|
141
|
|
142
|
|
143 class FlowMFI(Tabular):
|
|
144 """Class describing a Flow MFI file"""
|
|
145 file_ext = "flowmfi"
|
|
146
|
|
147 def set_peek(self, dataset, is_multi_byte=False):
|
|
148 if not dataset.dataset.purged:
|
|
149 dataset.peek = "MFI Flow file"
|
|
150 dataset.blurb = data.nice_size(dataset.get_size())
|
|
151 else:
|
|
152 dataset.peek = 'file does not exist'
|
|
153 dataset.blurb = 'file purged from disk'
|
|
154
|
|
155 def display_peek(self, dataset):
|
|
156 try:
|
|
157 return dataset.peek
|
|
158 except:
|
|
159 return "MFI Flow file (%s)" % (data.nice_size(dataset.get_size()))
|
|
160
|
|
161 def sniff(self, filename):
|
|
162 """Quick test on file formatting and values"""
|
|
163 with open(filename, "r") as f:
|
|
164 population = f.readline().strip().split("\t")[0]
|
|
165 if population != "Population":
|
|
166 return False
|
|
167 values = f.readline().strip().split("\t")
|
|
168 for vals in values:
|
|
169 if not is_number(vals):
|
|
170 return False
|
|
171 return True
|
|
172
|
|
173 def get_mime(self):
|
|
174 """Returns the mime type of the datatype"""
|
|
175 return 'text/tab-separated-values'
|
|
176
|
|
177
|
|
178 class FlowStats1(Tabular):
|
|
179 """Class describing a Flow Stats file"""
|
|
180 file_ext = "flowstat1"
|
|
181
|
|
182 def set_peek(self, dataset, is_multi_byte=False):
|
|
183 if not dataset.dataset.purged:
|
|
184 dataset.peek = "Flow Stats1 file"
|
|
185 dataset.blurb = data.nice_size(dataset.get_size())
|
|
186 else:
|
|
187 dataset.peek = 'file does not exist'
|
|
188 dataset.blurb = 'file purged from disk'
|
|
189
|
|
190 def display_peek(self, dataset):
|
|
191 try:
|
|
192 return dataset.peek
|
|
193 except:
|
|
194 return "Flow Stats1 file (%s)" % (data.nice_size(dataset.get_size()))
|
|
195
|
|
196 def sniff(self, filename):
|
|
197 """Quick test on file formatting and values"""
|
|
198 with open(filename, "r") as f:
|
|
199 first_header = f.readline().strip().split("\t")[0]
|
|
200 if first_header != "FileID":
|
|
201 return False
|
|
202 return True
|
|
203
|
|
204 def get_mime(self):
|
|
205 """Returns the mime type of the datatype"""
|
|
206 return 'text/tab-separated-values'
|
|
207
|
|
208
|
|
209 class FlowStats2(Tabular):
|
|
210 """Class describing a Flow Stats file"""
|
|
211 file_ext = "flowstat2"
|
|
212
|
|
213 def set_peek(self, dataset, is_multi_byte=False):
|
|
214 if not dataset.dataset.purged:
|
|
215 dataset.peek = "Flow Stats2 file"
|
|
216 dataset.blurb = data.nice_size(dataset.get_size())
|
|
217 else:
|
|
218 dataset.peek = 'file does not exist'
|
|
219 dataset.blurb = 'file purged from disk'
|
|
220
|
|
221 def display_peek(self, dataset):
|
|
222 try:
|
|
223 return dataset.peek
|
|
224 except:
|
|
225 return "Flow Stats2 file (%s)" % (data.nice_size(dataset.get_size()))
|
|
226
|
|
227 def sniff(self, filename):
|
|
228 """Quick test on file formatting and values"""
|
|
229 with open(filename, "r") as f:
|
|
230 smp_name = f.readline().strip().split("\t")[-1]
|
|
231 if smp_name != "SampleName":
|
|
232 return False
|
|
233 return True
|
|
234
|
|
235 def get_mime(self):
|
|
236 """Returns the mime type of the datatype"""
|
|
237 return 'text/tab-separated-values'
|
|
238
|
|
239
|
|
240 class FlowStats3(Tabular):
|
|
241 """Class describing a Flow Stats file"""
|
|
242 file_ext = "flowstat3"
|
|
243
|
|
244 def set_peek(self, dataset, is_multi_byte=False):
|
|
245 if not dataset.dataset.purged:
|
|
246 dataset.peek = "Flow Stats3 file"
|
|
247 dataset.blurb = data.nice_size(dataset.get_size())
|
|
248 else:
|
|
249 dataset.peek = 'file does not exist'
|
|
250 dataset.blurb = 'file purged from disk'
|
|
251
|
|
252 def display_peek(self, dataset):
|
|
253 try:
|
|
254 return dataset.peek
|
|
255 except:
|
|
256 return "Flow Stats3 file (%s)" % (data.nice_size(dataset.get_size()))
|
|
257
|
|
258 def sniff(self, filename):
|
|
259 """Quick test on file formatting and values"""
|
|
260 with open(filename, "r") as f:
|
|
261 last_col = f.readline().strip().split("\t")[-1]
|
|
262 if last_col != "Percentage_stdev":
|
|
263 return False
|
|
264 values = f.readline().strip().split("\t")
|
|
265 for vals in values:
|
|
266 if not is_number(vals):
|
|
267 return False
|
|
268 return True
|
|
269
|
|
270 def get_mime(self):
|
|
271 """Returns the mime type of the datatype"""
|
|
272 return 'text/tab-separated-values'
|
|
273
|
|
274
|
|
275 class FlowScore(Tabular):
|
|
276 """Class describing a Flow Score file"""
|
|
277 file_ext = "flowscore"
|
|
278
|
|
279 def set_peek(self, dataset, is_multi_byte=False):
|
|
280 if not dataset.dataset.purged:
|
|
281 dataset.peek = "Flow Score file"
|
|
282 dataset.blurb = data.nice_size(dataset.get_size())
|
|
283 else:
|
|
284 dataset.peek = 'file does not exist'
|
|
285 dataset.blurb = 'file purged from disk'
|
|
286
|
|
287 def display_peek(self, dataset):
|
|
288 try:
|
|
289 return dataset.peek
|
|
290 except:
|
|
291 return "Flow Score file (%s)" % (data.nice_size(dataset.get_size()))
|
|
292
|
|
293 def sniff(self, filename):
|
|
294 """Quick test on file formatting and values"""
|
|
295 with open(filename, "r") as f:
|
|
296 population = f.readline().strip().split("\t")[0]
|
|
297 if population != "Population_ID":
|
|
298 return False
|
|
299 values = f.readline().strip().split("\t")
|
|
300 for vals in values:
|
|
301 if not is_number(vals):
|
|
302 return False
|
|
303 return True
|
|
304
|
|
305 def get_mime(self):
|
|
306 """Returns the mime type of the datatype"""
|
|
307 return 'text/tab-separated-values'
|