annotate flow.py @ 0:ed90d166300e draft default tip

Uploaded
author immport-devteam
date Mon, 27 Feb 2017 15:18:40 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
1 # -*- coding: utf-8 -*-
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
2 ######################################################################
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
3 # Copyright (c) 2016 Northrop Grumman.
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
4 # All rights reserved.
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
5 ######################################################################
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
6
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
7 """
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
8 Flow analysis datatypes.
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
9 """
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
10
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
11 import gzip
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
12 import json
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
13 import logging
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
14 import os
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
15 import re
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
16 import subprocess
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
17 import tempfile
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
18 import rpy2.interactive as r
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
19 import rpy2.interactive.packages
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
20
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
21 from galaxy.datatypes.binary import Binary
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
22 from galaxy.datatypes.tabular import Tabular
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
23 from galaxy.datatypes.data import get_file_peek, Text
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
24 from galaxy.datatypes.metadata import MetadataElement
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
25 from galaxy.util import nice_size, string_as_bool
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
26 from . import data
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
27
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
28 log = logging.getLogger(__name__)
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
29
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
30
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
31 def is_number(s):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
32 try:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
33 float(s)
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
34 return True
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
35 except ValueError:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
36 return False
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
37
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
38
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
39 class FCS(Binary):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
40 """Class describing an FCS binary file"""
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
41 file_ext = "fcs"
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
42
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
43 def set_peek(self, dataset, is_multi_byte=False):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
44 if not dataset.dataset.purged:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
45 dataset.peek = "Binary FCS file"
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
46 dataset.blurb = data.nice_size(dataset.get_size())
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
47 else:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
48 dataset.peek = 'file does not exist'
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
49 dataset.blurb = 'file purged from disk'
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
50
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
51 def display_peek(self, dataset):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
52 try:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
53 return dataset.peek
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
54 except:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
55 return "Binary FCSfile (%s)" % (data.nice_size(dataset.get_size()))
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
56
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
57 def sniff(self, filename):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
58 """
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
59 Checking if the file is in FCS format. Should read FCS2.0, FCS3.0
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
60 and FCS3.1
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
61 """
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
62 r.packages.importr("flowCore")
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
63 rlib = r.packages.packages
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
64 try:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
65 fcsobject = rlib.flowCore.isFCSfile(filename)
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
66 return list(fcsobject)[0]
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
67 except:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
68 return False
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
69
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
70 def get_mime(self):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
71 """Returns the mime type of the datatype"""
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
72 return 'application/octet-stream'
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
73 Binary.register_sniffable_binary_format("fcs","fcs",FCS)
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
74
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
75 class FlowText(Tabular):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
76 """Class describing an Flow Text file"""
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
77 file_ext = "flowtext"
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
78
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
79 def set_peek(self, dataset, is_multi_byte=False):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
80 if not dataset.dataset.purged:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
81 dataset.peek = "Text Flow file"
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
82 dataset.blurb = data.nice_size(dataset.get_size())
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
83 else:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
84 dataset.peek = 'file does not exist'
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
85 dataset.blurb = 'file purged from disk'
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
86
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
87 def display_peek(self, dataset):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
88 try:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
89 return dataset.peek
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
90 except:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
91 return "Text Flow file (%s)" % (data.nice_size(dataset.get_size()))
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
92
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
93 def sniff(self, filename):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
94 """Quick test on file formatting and values"""
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
95 with open(filename, "r") as f:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
96 f.readline()
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
97 values = f.readline().strip().split("\t")
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
98 for vals in values:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
99 if not is_number(vals):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
100 return False
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
101 return True
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
102
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
103 def get_mime(self):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
104 """Returns the mime type of the datatype"""
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
105 return 'text/tab-separated-values'
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
106
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
107
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
108 class FlowClustered(Tabular):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
109 """Class describing a Flow Text that has been clustered through FLOCK"""
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
110 file_ext = "flowclr"
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
111
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
112 def set_peek(self, dataset, is_multi_byte=False):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
113 if not dataset.dataset.purged:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
114 dataset.peek = "Text Flow Clustered file"
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
115 dataset.blurb = data.nice_size(dataset.get_size())
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
116 else:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
117 dataset.peek = 'file does not exist'
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
118 dataset.blurb = 'file purged from disk'
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
119
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
120 def display_peek(self, dataset):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
121 try:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
122 return dataset.peek
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
123 except:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
124 return "Flow Text Clustered file (%s)" % (data.nice_size(dataset.get_size()))
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
125
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
126 def sniff(self, filename):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
127 """Quick test on headers and values"""
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
128 with open(filename, "r") as f:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
129 population = f.readline().strip().split("\t")[-1]
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
130 if population != "Population":
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
131 return False
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
132 values = f.readline().strip().split("\t")
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
133 for vals in values:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
134 if not is_number(vals):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
135 return False
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
136 return True
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
137
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
138 def get_mime(self):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
139 """Returns the mime type of the datatype"""
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
140 return 'text/tab-separated-values'
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
141
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
142
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
143 class FlowMFI(Tabular):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
144 """Class describing a Flow MFI file"""
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
145 file_ext = "flowmfi"
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
146
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
147 def set_peek(self, dataset, is_multi_byte=False):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
148 if not dataset.dataset.purged:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
149 dataset.peek = "MFI Flow file"
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
150 dataset.blurb = data.nice_size(dataset.get_size())
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
151 else:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
152 dataset.peek = 'file does not exist'
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
153 dataset.blurb = 'file purged from disk'
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
154
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
155 def display_peek(self, dataset):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
156 try:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
157 return dataset.peek
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
158 except:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
159 return "MFI Flow file (%s)" % (data.nice_size(dataset.get_size()))
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
160
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
161 def sniff(self, filename):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
162 """Quick test on file formatting and values"""
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
163 with open(filename, "r") as f:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
164 population = f.readline().strip().split("\t")[0]
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
165 if population != "Population":
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
166 return False
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
167 values = f.readline().strip().split("\t")
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
168 for vals in values:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
169 if not is_number(vals):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
170 return False
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
171 return True
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
172
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
173 def get_mime(self):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
174 """Returns the mime type of the datatype"""
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
175 return 'text/tab-separated-values'
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
176
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
177
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
178 class FlowStats1(Tabular):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
179 """Class describing a Flow Stats file"""
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
180 file_ext = "flowstat1"
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
181
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
182 def set_peek(self, dataset, is_multi_byte=False):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
183 if not dataset.dataset.purged:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
184 dataset.peek = "Flow Stats1 file"
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
185 dataset.blurb = data.nice_size(dataset.get_size())
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
186 else:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
187 dataset.peek = 'file does not exist'
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
188 dataset.blurb = 'file purged from disk'
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
189
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
190 def display_peek(self, dataset):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
191 try:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
192 return dataset.peek
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
193 except:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
194 return "Flow Stats1 file (%s)" % (data.nice_size(dataset.get_size()))
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
195
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
196 def sniff(self, filename):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
197 """Quick test on file formatting and values"""
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
198 with open(filename, "r") as f:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
199 first_header = f.readline().strip().split("\t")[0]
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
200 if first_header != "FileID":
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
201 return False
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
202 return True
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
203
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
204 def get_mime(self):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
205 """Returns the mime type of the datatype"""
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
206 return 'text/tab-separated-values'
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
207
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
208
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
209 class FlowStats2(Tabular):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
210 """Class describing a Flow Stats file"""
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
211 file_ext = "flowstat2"
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
212
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
213 def set_peek(self, dataset, is_multi_byte=False):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
214 if not dataset.dataset.purged:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
215 dataset.peek = "Flow Stats2 file"
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
216 dataset.blurb = data.nice_size(dataset.get_size())
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
217 else:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
218 dataset.peek = 'file does not exist'
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
219 dataset.blurb = 'file purged from disk'
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
220
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
221 def display_peek(self, dataset):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
222 try:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
223 return dataset.peek
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
224 except:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
225 return "Flow Stats2 file (%s)" % (data.nice_size(dataset.get_size()))
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
226
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
227 def sniff(self, filename):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
228 """Quick test on file formatting and values"""
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
229 with open(filename, "r") as f:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
230 smp_name = f.readline().strip().split("\t")[-1]
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
231 if smp_name != "SampleName":
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
232 return False
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
233 return True
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
234
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
235 def get_mime(self):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
236 """Returns the mime type of the datatype"""
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
237 return 'text/tab-separated-values'
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
238
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
239
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
240 class FlowStats3(Tabular):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
241 """Class describing a Flow Stats file"""
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
242 file_ext = "flowstat3"
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
243
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
244 def set_peek(self, dataset, is_multi_byte=False):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
245 if not dataset.dataset.purged:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
246 dataset.peek = "Flow Stats3 file"
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
247 dataset.blurb = data.nice_size(dataset.get_size())
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
248 else:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
249 dataset.peek = 'file does not exist'
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
250 dataset.blurb = 'file purged from disk'
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
251
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
252 def display_peek(self, dataset):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
253 try:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
254 return dataset.peek
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
255 except:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
256 return "Flow Stats3 file (%s)" % (data.nice_size(dataset.get_size()))
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
257
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
258 def sniff(self, filename):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
259 """Quick test on file formatting and values"""
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
260 with open(filename, "r") as f:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
261 last_col = f.readline().strip().split("\t")[-1]
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
262 if last_col != "Percentage_stdev":
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
263 return False
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
264 values = f.readline().strip().split("\t")
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
265 for vals in values:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
266 if not is_number(vals):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
267 return False
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
268 return True
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
269
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
270 def get_mime(self):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
271 """Returns the mime type of the datatype"""
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
272 return 'text/tab-separated-values'
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
273
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
274
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
275 class FlowScore(Tabular):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
276 """Class describing a Flow Score file"""
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
277 file_ext = "flowscore"
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
278
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
279 def set_peek(self, dataset, is_multi_byte=False):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
280 if not dataset.dataset.purged:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
281 dataset.peek = "Flow Score file"
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
282 dataset.blurb = data.nice_size(dataset.get_size())
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
283 else:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
284 dataset.peek = 'file does not exist'
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
285 dataset.blurb = 'file purged from disk'
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
286
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
287 def display_peek(self, dataset):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
288 try:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
289 return dataset.peek
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
290 except:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
291 return "Flow Score file (%s)" % (data.nice_size(dataset.get_size()))
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
292
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
293 def sniff(self, filename):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
294 """Quick test on file formatting and values"""
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
295 with open(filename, "r") as f:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
296 population = f.readline().strip().split("\t")[0]
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
297 if population != "Population_ID":
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
298 return False
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
299 values = f.readline().strip().split("\t")
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
300 for vals in values:
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
301 if not is_number(vals):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
302 return False
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
303 return True
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
304
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
305 def get_mime(self):
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
306 """Returns the mime type of the datatype"""
ed90d166300e Uploaded
immport-devteam
parents:
diff changeset
307 return 'text/tab-separated-values'