comparison flow.py @ 0:ed90d166300e draft default tip

Uploaded
author immport-devteam
date Mon, 27 Feb 2017 15:18:40 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:ed90d166300e
1 # -*- coding: utf-8 -*-
2 ######################################################################
3 # Copyright (c) 2016 Northrop Grumman.
4 # All rights reserved.
5 ######################################################################
6
7 """
8 Flow analysis datatypes.
9 """
10
11 import gzip
12 import json
13 import logging
14 import os
15 import re
16 import subprocess
17 import tempfile
18 import rpy2.interactive as r
19 import rpy2.interactive.packages
20
21 from galaxy.datatypes.binary import Binary
22 from galaxy.datatypes.tabular import Tabular
23 from galaxy.datatypes.data import get_file_peek, Text
24 from galaxy.datatypes.metadata import MetadataElement
25 from galaxy.util import nice_size, string_as_bool
26 from . import data
27
28 log = logging.getLogger(__name__)
29
30
31 def is_number(s):
32 try:
33 float(s)
34 return True
35 except ValueError:
36 return False
37
38
39 class FCS(Binary):
40 """Class describing an FCS binary file"""
41 file_ext = "fcs"
42
43 def set_peek(self, dataset, is_multi_byte=False):
44 if not dataset.dataset.purged:
45 dataset.peek = "Binary FCS file"
46 dataset.blurb = data.nice_size(dataset.get_size())
47 else:
48 dataset.peek = 'file does not exist'
49 dataset.blurb = 'file purged from disk'
50
51 def display_peek(self, dataset):
52 try:
53 return dataset.peek
54 except:
55 return "Binary FCSfile (%s)" % (data.nice_size(dataset.get_size()))
56
57 def sniff(self, filename):
58 """
59 Checking if the file is in FCS format. Should read FCS2.0, FCS3.0
60 and FCS3.1
61 """
62 r.packages.importr("flowCore")
63 rlib = r.packages.packages
64 try:
65 fcsobject = rlib.flowCore.isFCSfile(filename)
66 return list(fcsobject)[0]
67 except:
68 return False
69
70 def get_mime(self):
71 """Returns the mime type of the datatype"""
72 return 'application/octet-stream'
73 Binary.register_sniffable_binary_format("fcs","fcs",FCS)
74
75 class FlowText(Tabular):
76 """Class describing an Flow Text file"""
77 file_ext = "flowtext"
78
79 def set_peek(self, dataset, is_multi_byte=False):
80 if not dataset.dataset.purged:
81 dataset.peek = "Text Flow file"
82 dataset.blurb = data.nice_size(dataset.get_size())
83 else:
84 dataset.peek = 'file does not exist'
85 dataset.blurb = 'file purged from disk'
86
87 def display_peek(self, dataset):
88 try:
89 return dataset.peek
90 except:
91 return "Text Flow file (%s)" % (data.nice_size(dataset.get_size()))
92
93 def sniff(self, filename):
94 """Quick test on file formatting and values"""
95 with open(filename, "r") as f:
96 f.readline()
97 values = f.readline().strip().split("\t")
98 for vals in values:
99 if not is_number(vals):
100 return False
101 return True
102
103 def get_mime(self):
104 """Returns the mime type of the datatype"""
105 return 'text/tab-separated-values'
106
107
108 class FlowClustered(Tabular):
109 """Class describing a Flow Text that has been clustered through FLOCK"""
110 file_ext = "flowclr"
111
112 def set_peek(self, dataset, is_multi_byte=False):
113 if not dataset.dataset.purged:
114 dataset.peek = "Text Flow Clustered file"
115 dataset.blurb = data.nice_size(dataset.get_size())
116 else:
117 dataset.peek = 'file does not exist'
118 dataset.blurb = 'file purged from disk'
119
120 def display_peek(self, dataset):
121 try:
122 return dataset.peek
123 except:
124 return "Flow Text Clustered file (%s)" % (data.nice_size(dataset.get_size()))
125
126 def sniff(self, filename):
127 """Quick test on headers and values"""
128 with open(filename, "r") as f:
129 population = f.readline().strip().split("\t")[-1]
130 if population != "Population":
131 return False
132 values = f.readline().strip().split("\t")
133 for vals in values:
134 if not is_number(vals):
135 return False
136 return True
137
138 def get_mime(self):
139 """Returns the mime type of the datatype"""
140 return 'text/tab-separated-values'
141
142
143 class FlowMFI(Tabular):
144 """Class describing a Flow MFI file"""
145 file_ext = "flowmfi"
146
147 def set_peek(self, dataset, is_multi_byte=False):
148 if not dataset.dataset.purged:
149 dataset.peek = "MFI Flow file"
150 dataset.blurb = data.nice_size(dataset.get_size())
151 else:
152 dataset.peek = 'file does not exist'
153 dataset.blurb = 'file purged from disk'
154
155 def display_peek(self, dataset):
156 try:
157 return dataset.peek
158 except:
159 return "MFI Flow file (%s)" % (data.nice_size(dataset.get_size()))
160
161 def sniff(self, filename):
162 """Quick test on file formatting and values"""
163 with open(filename, "r") as f:
164 population = f.readline().strip().split("\t")[0]
165 if population != "Population":
166 return False
167 values = f.readline().strip().split("\t")
168 for vals in values:
169 if not is_number(vals):
170 return False
171 return True
172
173 def get_mime(self):
174 """Returns the mime type of the datatype"""
175 return 'text/tab-separated-values'
176
177
178 class FlowStats1(Tabular):
179 """Class describing a Flow Stats file"""
180 file_ext = "flowstat1"
181
182 def set_peek(self, dataset, is_multi_byte=False):
183 if not dataset.dataset.purged:
184 dataset.peek = "Flow Stats1 file"
185 dataset.blurb = data.nice_size(dataset.get_size())
186 else:
187 dataset.peek = 'file does not exist'
188 dataset.blurb = 'file purged from disk'
189
190 def display_peek(self, dataset):
191 try:
192 return dataset.peek
193 except:
194 return "Flow Stats1 file (%s)" % (data.nice_size(dataset.get_size()))
195
196 def sniff(self, filename):
197 """Quick test on file formatting and values"""
198 with open(filename, "r") as f:
199 first_header = f.readline().strip().split("\t")[0]
200 if first_header != "FileID":
201 return False
202 return True
203
204 def get_mime(self):
205 """Returns the mime type of the datatype"""
206 return 'text/tab-separated-values'
207
208
209 class FlowStats2(Tabular):
210 """Class describing a Flow Stats file"""
211 file_ext = "flowstat2"
212
213 def set_peek(self, dataset, is_multi_byte=False):
214 if not dataset.dataset.purged:
215 dataset.peek = "Flow Stats2 file"
216 dataset.blurb = data.nice_size(dataset.get_size())
217 else:
218 dataset.peek = 'file does not exist'
219 dataset.blurb = 'file purged from disk'
220
221 def display_peek(self, dataset):
222 try:
223 return dataset.peek
224 except:
225 return "Flow Stats2 file (%s)" % (data.nice_size(dataset.get_size()))
226
227 def sniff(self, filename):
228 """Quick test on file formatting and values"""
229 with open(filename, "r") as f:
230 smp_name = f.readline().strip().split("\t")[-1]
231 if smp_name != "SampleName":
232 return False
233 return True
234
235 def get_mime(self):
236 """Returns the mime type of the datatype"""
237 return 'text/tab-separated-values'
238
239
240 class FlowStats3(Tabular):
241 """Class describing a Flow Stats file"""
242 file_ext = "flowstat3"
243
244 def set_peek(self, dataset, is_multi_byte=False):
245 if not dataset.dataset.purged:
246 dataset.peek = "Flow Stats3 file"
247 dataset.blurb = data.nice_size(dataset.get_size())
248 else:
249 dataset.peek = 'file does not exist'
250 dataset.blurb = 'file purged from disk'
251
252 def display_peek(self, dataset):
253 try:
254 return dataset.peek
255 except:
256 return "Flow Stats3 file (%s)" % (data.nice_size(dataset.get_size()))
257
258 def sniff(self, filename):
259 """Quick test on file formatting and values"""
260 with open(filename, "r") as f:
261 last_col = f.readline().strip().split("\t")[-1]
262 if last_col != "Percentage_stdev":
263 return False
264 values = f.readline().strip().split("\t")
265 for vals in values:
266 if not is_number(vals):
267 return False
268 return True
269
270 def get_mime(self):
271 """Returns the mime type of the datatype"""
272 return 'text/tab-separated-values'
273
274
275 class FlowScore(Tabular):
276 """Class describing a Flow Score file"""
277 file_ext = "flowscore"
278
279 def set_peek(self, dataset, is_multi_byte=False):
280 if not dataset.dataset.purged:
281 dataset.peek = "Flow Score file"
282 dataset.blurb = data.nice_size(dataset.get_size())
283 else:
284 dataset.peek = 'file does not exist'
285 dataset.blurb = 'file purged from disk'
286
287 def display_peek(self, dataset):
288 try:
289 return dataset.peek
290 except:
291 return "Flow Score file (%s)" % (data.nice_size(dataset.get_size()))
292
293 def sniff(self, filename):
294 """Quick test on file formatting and values"""
295 with open(filename, "r") as f:
296 population = f.readline().strip().split("\t")[0]
297 if population != "Population_ID":
298 return False
299 values = f.readline().strip().split("\t")
300 for vals in values:
301 if not is_number(vals):
302 return False
303 return True
304
305 def get_mime(self):
306 """Returns the mime type of the datatype"""
307 return 'text/tab-separated-values'