comparison vkmz.py @ 6:35b984684450 draft

planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
author eslerm
date Tue, 10 Jul 2018 17:58:35 -0400
parents 04079c34452a
children
comparison
equal deleted inserted replaced
5:04079c34452a 6:35b984684450
1 ''' 1 import argparse
2 based on the BMRB compound database which can be found at: 2 import csv
3 http://www.bmrb.wisc.edu/ftp/pub/bmrb/relational_tables/metabolomics/Chem_comp.csv 3 import math
4 '''
5
6 import re 4 import re
7 import argparse
8 import multiprocessing
9 from multiprocessing import Pool
10 import csv
11
12 import numpy as np
13 import math
14 import pandas as pd
15 from plotly import __version__
16 import plotly.offline as py
17 import plotly.graph_objs as go
18 5
19 parser = argparse.ArgumentParser() 6 parser = argparse.ArgumentParser()
20 inputSubparser = parser.add_subparsers(help='Select input type:', dest='input-type') 7 inputSubparser = parser.add_subparsers(help='Select input type:', dest='input-type')
21 parse_tsv = inputSubparser.add_parser('tsv', help='Use tabular data as input.') 8 parse_tsv = inputSubparser.add_parser('tsv', help='Use tabular data as input.')
22 parse_tsv.add_argument('--input', '-i', required=True, help='Path to tabular file. Must include columns: sample ID, mz, polarity, intensity, & retention time.') 9 parse_tsv.add_argument('--input', '-i', required=True, help='Path to tabular file. Must include columns: sample ID, mz, polarity, intensity, & retention time.')
23 parse_tsv.add_argument('--no-plot', '-np', action='store_true', help='Disable plot generation.')
24 parse_xcms = inputSubparser.add_parser('xcms', help='Use XCMS data as input.') 10 parse_xcms = inputSubparser.add_parser('xcms', help='Use XCMS data as input.')
25 parse_xcms.add_argument('--data-matrix', '-xd', required=True, nargs='?', type=str, help='Path to XCMS dataMatrix file.') 11 parse_xcms.add_argument('--data-matrix', '-xd', required=True, nargs='?', type=str, help='Path to XCMS data matrix file.')
26 parse_xcms.add_argument('--sample-metadata', '-xs', required=True, nargs='?', type=str, help='Path to XCMS sampleMetadata file.') 12 parse_xcms.add_argument('--sample-metadata', '-xs', required=True, nargs='?', type=str, help='Path to XCMS sample metadata file.')
27 parse_xcms.add_argument('--variable-metadata', '-xv', required=True, nargs='?', type=str, help='Path to XCMS variableMetadata file.') 13 parse_xcms.add_argument('--variable-metadata', '-xv', required=True, nargs='?', type=str, help='Path to XCMS variable metadata file.')
28 parse_xcms.add_argument('--no-plot', '-n', action='store_true', help='Disable plot generation.')
29 parse_plot = inputSubparser.add_parser('plot', help='Only plot data.')
30 parse_plot.add_argument('--input', '-i', required=True, nargs='?', type=str, help='Path to VKMZ generated tabular file.')
31 for inputSubparser in [parse_tsv, parse_xcms]: 14 for inputSubparser in [parse_tsv, parse_xcms]:
32 inputSubparser.add_argument('--output', '-o', nargs='?', type=str, required=True, help='Specify output file path.') 15 inputSubparser.add_argument('--output', '-o', nargs='?', type=str, required=True, help='Specify output file path.')
33 inputSubparser.add_argument('--error', '-e', nargs='?', type=float, required=True, help='Mass error of mass spectrometer in PPM') 16 inputSubparser.add_argument('--error', '-e', nargs='?', type=float, required=True, help='Mass error of mass spectrometer in parts-per-million.')
34 inputSubparser.add_argument('--database', '-d', nargs='?', default='databases/bmrb-light.tsv', help='Select database.') 17 inputSubparser.add_argument('--database', '-db', nargs='?', default='databases/bmrb-light.tsv', help='Select database of known formula masses.')
35 inputSubparser.add_argument('--directory', nargs='?', default='', type=str, help='Define directory of tool.') 18 inputSubparser.add_argument('--directory','-dir', nargs='?', default='', type=str, help='Define path of tool directory. Assumes relative path if unset.')
36 inputSubparser.add_argument('--polarity', '-p', choices=['positive','negative'], help='Force polarity mode. Ignore variables in input file.') 19 inputSubparser.add_argument('--polarity', '-p', choices=['positive','negative'], help='Force polarity mode to positive or negative. Overrides variables in input file.')
37 inputSubparser.add_argument('--no-adjustment', '-na', action='store_true', help='Use flag to turn off polarity based mass adjustment. This flag should always be used if reprocessing data generated by VKMZ.') 20 inputSubparser.add_argument('--neutral', '-n', action='store_true', help='Set neutral flag if masses in input data are neutral. No mass adjustmnet will be made.')
38 inputSubparser.add_argument('--unique', '-u', action='store_true', help='Set flag to only output features which have a single match.') 21 inputSubparser.add_argument('--unique', '-u', action='store_true', help='Set flag to remove features with multiple predictions.')
39 inputSubparser.add_argument('--multiprocessing', '-m', action='store_true', help='Set flag to turn on multiprocessing.')
40 inputSubparser.add_argument('--plottype', '-t', nargs='?', default='scatter-2d', choices=['scatter-2d', 'scatter-3d'], help='Select plot type.')
41 inputSubparser.add_argument('--size', '-s', nargs='?', default=5, type=int, help='Set maxium size of plot symbols.')
42 inputSubparser.add_argument('--size-algorithm', '-a', nargs='?', default=0, type=int, choices=[0,1], help="Symbol size algorithm selector. Algorithm 0 sets all symbols to the maxium size. Algorithm 2 determines a features symbol size by it's log intensity.")
43 args = parser.parse_args() 22 args = parser.parse_args()
44 23
45 vkInputType = getattr(args, "input-type") 24 # store input constants
46 25 INPUT_TYPE = getattr(args, "input-type")
47 # read inputs, arguments and define globals 26 POLARITY = getattr(args, "polarity")
48 vkError = getattr(args, "error") 27
49
50 vkPolarity = getattr(args, "polarity")
51
52 vkUnique = getattr(args, "unique")
53
54 vkMultiprocessing = getattr(args, "multiprocessing")
55
56 vkNoAdjustment = getattr(args, "no_adjustment")
57
58 vkDatabaseFile = getattr(args, "database")
59 vkDirectory = getattr(args, "directory")
60
61 vkMass = []
62 vkFormula = []
63 try:
64 with open(vkDirectory+vkDatabaseFile, 'r') as tsv:
65 next(tsv) # skip first row
66 for row in tsv:
67 mass, formula = row.split()
68 vkMass.append(mass)
69 vkFormula.append(formula)
70 except ValueError:
71 print('The %s database could not be loaded.' % vkDatabaseFile)
72 vkMaxIndex = len(vkMass)-1
73
74 vkOutput = getattr(args, "output")
75
76 vkPlotType = getattr(args, 'plottype')
77
78 vkSize = getattr(args, 'size')
79
80 vkSizeAlgo = getattr(args, 'size_algorithm')
81
82 # control predictions
83 def forecaster(vkInput):
84 if vkMultiprocessing:
85 try:
86 pool = Pool()
87 vkOutputList = pool.map(featurePrediction, vkInput)
88 except Exception as e:
89 print("Error during multirpocessing: "+str(e))
90 finally:
91 pool.close()
92 pool.join()
93 else:
94 vkOutputList = map(featurePrediction, vkInput)
95 vkOutputList = [x for x in vkOutputList if x is not None]
96 return(vkOutputList)
97
98 # predict feature formulas and creates output list
99 def featurePrediction(feature):
100 if vkNoAdjustment:
101 mass = feature[2]
102 else:
103 mass = adjust(feature[2], feature[1]) # mz & polarity
104 uncertainty = mass * vkError / 1e6
105 prediction = predict(mass, uncertainty, 0, vkMaxIndex)
106 if prediction != -1:
107 feature[2] = mass
108 predictions = predictNeighbors(mass, uncertainty, prediction)
109 if vkUnique and len(predictions) > 1:
110 return
111 feature.append(predictions) # feature[5]
112 predictionClosest = predictions[0]
113 formula = predictionClosest[1]
114 formulaList = re.findall('[A-Z][a-z]?|[0-9]+', formula)
115 formulaDictionary = {'C':0,'H':0,'O':0,'N':0}
116 i = 0;
117 while i < len(formulaList):
118 if formulaList[i] in formulaDictionary:
119 # if there is only one of this element
120 if i+1 == len(formulaList) or formulaList[i+1].isalpha():
121 formulaDictionary[formulaList[i]] = 1
122 else:
123 formulaDictionary[formulaList[i]] = formulaList[i+1]
124 i+=1
125 i+=1
126 predictionClosest.append(formulaDictionary)
127 hc = float(formulaDictionary['H'])/float(formulaDictionary['C'])
128 oc = float(formulaDictionary['O'])/float(formulaDictionary['C'])
129 nc = float(formulaDictionary['N'])/float(formulaDictionary['C'])
130 predictionClosestDelta = feature[5][0][2]
131 feature += [predictionClosestDelta, hc, oc, nc]
132 return(feature)
133
134 # adjust charged mass to a neutral mass
135 def adjust(mass, polarity):
136 # value to adjust by
137 proton = 1.007276
138 if polarity == 'positive':
139 mass += proton
140 elif polarity == 'negative':
141 mass -= proton
142 return mass
143
144 # Binary search to match observed mass to known mass within error
145 # https://en.wikipedia.org/wiki/Binary_search_tree
146 def predict(mass, uncertainty, left, right):
147 mid = ((right - left) / 2) + left
148 if left <= mid <= right and mid <= vkMaxIndex:
149 delta = float(vkMass[mid]) - mass
150 if uncertainty >= abs(delta):
151 return mid
152 elif uncertainty < delta:
153 return predict(mass, uncertainty, left, mid-1)
154 else:
155 return predict(mass, uncertainty, mid+1, right)
156 return -1
157
158 def plotData(vkData):
159 if vkSizeAlgo == 0:
160 for row in vkData:
161 row.append(vkSize)
162 else:
163 max_intensity = 0.0
164 for row in vkData:
165 intensity = row[4]
166 if intensity > max_intensity:
167 max_intensity = intensity
168 alpha = vkSize/math.log(max_intensity+1)
169 for row in vkData:
170 intensity = row[4]
171 row.append(alpha*math.log(intensity+1))
172 return vkData
173
174 # find and sort known masses within error limit of observed mass
175 def predictNeighbors(mass, uncertainty, prediction):
176 i = 0
177 neighbors = [[vkMass[prediction],vkFormula[prediction],(float(vkMass[prediction])-mass)],]
178 while prediction+i+1 <= vkMaxIndex:
179 neighbor = prediction+i+1
180 delta = float(vkMass[neighbor])-mass
181 if uncertainty >= abs(delta):
182 neighbors.append([vkMass[neighbor],vkFormula[neighbor],delta])
183 i += 1
184 else:
185 break
186 i = 0
187 while prediction+i-1 >= 0:
188 neighbor = prediction+i-1
189 delta = float(vkMass[neighbor])-mass
190 if uncertainty >= abs(delta):
191 neighbors.append([vkMass[neighbor],vkFormula[neighbor],(float(vkMass[neighbor])-mass)])
192 i -= 1
193 else:
194 break
195 neighbors = sorted(neighbors, key = (lambda delta: abs(delta[2])))
196 return neighbors
197
198 # write output file
199 def saveForcast(vkOutputList):
200 try:
201 with open(vkOutput+'.tsv', 'w') as f:
202 f.writelines(str("sample_id\tpolarity\tmz\tretention_time\tintensity\tpredictions\tdelta\tH:C\tO:C\tN:C\tsymbol_size") + '\n')
203 for feature in vkOutputList:
204 f.writelines(feature[0]+'\t'+feature[1]+'\t'+str(feature[2])+'\t'+str(feature[3])+'\t'+str(feature[4])+'\t'+str(feature[5])+'\t'+str(feature[6])+'\t'+str(feature[7])+'\t'+str(feature[8])+'\t'+str(feature[9])+'\t'+str(feature[10])+'\t'+'\n')
205 except ValueError:
206 print('"%s" could not be saved.' % filename)
207
208 def plotRatios(vkData):
209 max_rt = 0
210 max_hc = 0
211 max_oc = 0
212 max_nc = 0
213 for row in vkData:
214 if row[3] > max_rt:
215 max_rt = row[3]
216 if row[7] > max_hc:
217 max_hc = row[7]
218 if row[8] > max_oc:
219 max_oc = row[8]
220 if row[9] > max_nc:
221 max_nc = row[9]
222 labels = ['sampleID', 'polarity', 'mz', 'rt', 'intensity', 'predictions', 'delta', 'hc', 'oc', 'nc', 'symbol_size']
223 df = pd.DataFrame.from_records(vkData, columns=labels)
224 sampleIDs = df.sampleID.unique()
225 data = []
226 menus = []
227 i = 0
228 for sampleID in sampleIDs:
229 dfSample = df.loc[df['sampleID'] == sampleID]
230 size = dfSample.symbol_size
231 trace = go.Scatter(
232 x = dfSample.oc,
233 y = dfSample.hc,
234 text = dfSample.predictions.apply(lambda x: "Prediction: "+str(x[0][1])+"<br>mz: " +str(x[0][0])+"<br>Delta: "+str(x[0][2])),
235 line = dict(width = 0.5),
236 mode = 'markers',
237 marker = dict(
238 size = size,
239 sizemode = "area",
240 color = dfSample.rt,
241 colorscale = 'Viridis',
242 cmin = 0,
243 cmax = max_rt,
244 colorbar=dict(title='Retention Time (s)'),
245 line = dict(width = 0.5),
246 showscale = True
247 ),
248 opacity = 0.8
249 )
250 data.append(trace)
251 vision = []
252 j = 0
253 while j < len(sampleIDs):
254 if j != i:
255 vision.append(False)
256 else:
257 vision.append(True)
258 j += 1
259 menu = dict(
260 method = 'update',
261 label = sampleID,
262 args = [{'visible': vision}, {'title': sampleID}]
263 )
264 menus.append(menu)
265 i += 1
266 updatemenus = list([
267 dict(
268 active = -1,
269 buttons = menus
270 )
271 ])
272 layout = go.Layout(
273 title = "Van Krevelen Diagram",
274 showlegend = False,
275 xaxis = dict(
276 title = 'Oxygen to Carbon Ratio',
277 zeroline = False,
278 gridcolor = 'rgb(183,183,183)',
279 showline = True,
280 range = [0, max_oc]
281 ),
282 yaxis = dict(
283 title = 'Hydrogen to Carbon Ratio',
284 zeroline = False,
285 gridcolor = 'rgb(183,183,183)',
286 showline = True,
287 range = [0, max_hc]
288 ),
289 margin = dict(r=0, b=100, l=100, t=100),
290 updatemenus = updatemenus
291 )
292 fig = go.Figure(data=data, layout=layout)
293 py.plot(fig, auto_open=False, show_link=False, filename=vkOutput+'.html')
294
295 def polaritySanitizer(sample_polarity): 28 def polaritySanitizer(sample_polarity):
296 if sample_polarity.lower() in {'positive','pos','+'}: 29 if sample_polarity.lower() in {'positive','pos','+'}:
297 sample_polarity = 'positive' 30 sample_polarity = 'positive'
298 elif sample_polarity.lower() in {'negative', 'neg', '-'}: 31 elif sample_polarity.lower() in {'negative', 'neg', '-'}:
299 sample_polarity = 'negative' 32 sample_polarity = 'negative'
300 else: 33 else:
301 print('A sample has an unknown polarity type: %s. Polarity in the XCMS sample metadata should be set to "negative" or "positive".' % sample_polarity) 34 print('A sample has an unknown polarity type: %s. Polarity in the XCMS sample metadata should be set to "negative" or "positive".' % sample_polarity)
302 raise ValueError 35 raise ValueError
303 return sample_polarity 36 return sample_polarity
304 37
305 # main 38 # read input
306 if vkInputType == "tsv": 39 vkInput = [] # each element is a feature from input
307 vkInput = [] 40 if INPUT_TYPE == "tsv":
308 tsvFile = getattr(args, "input") 41 tsvFile = getattr(args, "input")
309 try: 42 try:
310 with open(tsvFile, 'r') as f: 43 with open(tsvFile, 'r') as f:
311 next(f) # skip hearder line 44 next(f) # skip hearder line
312 tsvData = csv.reader(f, delimiter='\t') 45 tsvData = csv.reader(f, delimiter='\t')
313 for row in tsvData: 46 for row in tsvData:
314 vkInput.append([row[0],polaritySanitizer(row[1]),float(row[2]),float(row[3]),float(row[4])]) 47 vkInput.append([row[0],polaritySanitizer(row[1]),float(row[2]),float(row[3]),float(row[4])])
315 except ValueError: 48 except ValueError:
316 print('The %s data file could not be read.' % tsvFile) 49 print('The %s data file could not be read.' % tsvFile)
317 vkData = forecaster(vkInput) 50 else: # INPUT_TYPE == "xcms"
318 vkData = plotData(vkData)
319 saveForcast(vkData)
320 plotRatios(vkData)
321 elif vkInputType == "xcms":
322 vkInput = []
323 xcmsSampleMetadataFile = getattr(args, "sample_metadata") 51 xcmsSampleMetadataFile = getattr(args, "sample_metadata")
324 try: 52 try:
325 polarity = {} 53 polarity = {}
326 with open(xcmsSampleMetadataFile, 'r') as f: 54 with open(xcmsSampleMetadataFile, 'r') as f:
327 xcmsSampleMetadata = csv.reader(f, delimiter='\t') 55 xcmsSampleMetadata = csv.reader(f, delimiter='\t')
328 next(xcmsSampleMetadata, None) # skip header 56 next(xcmsSampleMetadata, None) # skip header
329 for row in xcmsSampleMetadata: 57 for row in xcmsSampleMetadata:
330 sample = row[0] 58 sample = row[0]
331 if vkPolarity: 59 if POLARITY:
332 polarity[sample] = vkPolarity 60 polarity[sample] = POLARITY
333 else: 61 else:
334 sample_polarity = polaritySanitizer(row[2]) 62 sample_polarity = polaritySanitizer(row[2])
335 polarity[sample] = sample_polarity 63 polarity[sample] = sample_polarity
336 except ValueError: 64 except ValueError:
337 print('The %s data file could not be read. Check that polarity is set to "negative" or "positive"' % xcmsSampleMetadataFile) 65 print('The %s data file could not be read. Check that polarity is set to "negative" or "positive"' % xcmsSampleMetadataFile)
380 if sample != "": 108 if sample != "":
381 vkInput.append([sample, polarity[sample], mz[variable], rt[variable], float(intensity)]) 109 vkInput.append([sample, polarity[sample], mz[variable], rt[variable], float(intensity)])
382 i+=1 110 i+=1
383 except ValueError: 111 except ValueError:
384 print('The %s data file could not be read.' % xcmsDataMatrixFile) 112 print('The %s data file could not be read.' % xcmsDataMatrixFile)
385 vkData = forecaster(vkInput) 113
386 vkData = plotData(vkData) 114 # store||generate remaining constants
387 saveForcast(vkData) 115 OUTPUT = getattr(args, "output")
388 plotRatios(vkData) 116 MASS_ERROR = getattr(args, "error")
389 else: 117 UNIQUE = getattr(args, "unique")
390 vkData = [] 118 NEUTRAL = getattr(args, "neutral")
391 tsvPlotvFile = getattr(args, "input") 119 DATABASE = getattr(args, "database")
392 try: 120 DIRECTORY = getattr(args, "directory")
393 with open(tsvPlotFile, 'r') as f: 121 MASS = []
394 next(f) # skip header line 122 FORMULA = []
395 plotData = csv.reader(f, delimiter='\t') 123 try:
396 for row in plotData: 124 with open(DIRECTORY+DATABASE, 'r') as tsv:
397 vkData.append([row[0],row[1],float(row[2]),float(row[3]),float(row[4]),list(row[4]),float(row[5]),float(row[6]),float(row[7]),float(row[8])]) 125 for row in tsv:
398 except ValueError: 126 mass, formula = row.split()
399 print('The %s data file could not be read.' % tsvFile) 127 MASS.append(mass)
400 plotRatios(vkData) 128 FORMULA.append(formula)
401 129 except ValueError:
130 print('The %s database could not be loaded.' % DATABASE)
131 MAX_MASS_INDEX = len(MASS)-1
132
133 # adjust charged mass to a neutral mass
134 def adjust(mass, polarity):
135 # value to adjust by
136 proton = 1.007276
137 if polarity == 'positive':
138 mass -= proton
139 else: # sanitized to negative
140 mass += proton
141 return mass
142
143 # binary search to match a neutral mass to known mass within error
144 def predict(mass, uncertainty, left, right):
145 mid = ((right - left) / 2) + left
146 if left <= mid <= right and mid <= MAX_MASS_INDEX:
147 delta = float(MASS[mid]) - mass
148 if uncertainty >= abs(delta):
149 return mid
150 elif uncertainty < delta:
151 return predict(mass, uncertainty, left, mid-1)
152 else:
153 return predict(mass, uncertainty, mid+1, right)
154 return -1
155
156 # find and rank predictions which are adjacent to the index of an intial prediction
157 def predictNeighbors(mass, uncertainty, prediction):
158 i = 0
159 neighbors = [[MASS[prediction],FORMULA[prediction],(float(MASS[prediction])-mass)],]
160 while prediction+i+1 <= MAX_MASS_INDEX:
161 neighbor = prediction+i+1
162 delta = float(MASS[neighbor])-mass
163 if uncertainty >= abs(delta):
164 neighbors.append([MASS[neighbor],FORMULA[neighbor],delta])
165 i += 1
166 else:
167 break
168 i = 0
169 while prediction+i-1 >= 0:
170 neighbor = prediction+i-1
171 delta = float(MASS[neighbor])-mass
172 if uncertainty >= abs(delta):
173 neighbors.append([MASS[neighbor],FORMULA[neighbor],(float(MASS[neighbor])-mass)])
174 i -= 1
175 else:
176 break
177 neighbors = sorted(neighbors, key = (lambda delta: abs(delta[2])))
178 return neighbors
179
180 # predict formulas by the mass of a feature
181 def featurePrediction(feature):
182 if NEUTRAL:
183 mass = feature[2]
184 else:
185 mass = adjust(feature[2], feature[1]) # mz & polarity
186 uncertainty = mass * MASS_ERROR / 1e6
187 prediction = predict(mass, uncertainty, 0, MAX_MASS_INDEX)
188 if prediction != -1: # else feature if forgotten
189 predictions = predictNeighbors(mass, uncertainty, prediction)
190 if UNIQUE and len(predictions) > 1:
191 return
192 feature.append(predictions) # feature[5]
193 formula = predictions[0][1] # formula of prediction with lowest abs(delta)
194 formulaList = re.findall('[A-Z][a-z]?|[0-9]+', formula)
195 formulaDictionary = {'C':0,'H':0,'O':0,'N':0} # other elements are easy to add
196 i = 0;
197 while i < len(formulaList):
198 if formulaList[i] in formulaDictionary:
199 # if there is only one of this element
200 if i+1 == len(formulaList) or formulaList[i+1].isalpha():
201 formulaDictionary[formulaList[i]] = 1
202 else:
203 formulaDictionary[formulaList[i]] = formulaList[i+1]
204 i+=1
205 i+=1
206 hc = float(formulaDictionary['H'])/float(formulaDictionary['C'])
207 oc = float(formulaDictionary['O'])/float(formulaDictionary['C'])
208 nc = float(formulaDictionary['N'])/float(formulaDictionary['C'])
209 feature += [hc, oc, nc] # feature[6], [7], [8]
210 return(feature)
211
212 # write output file
213 def write(vkData):
214 json = ''
215 try:
216 # write tabular file and generate json for html output
217 with open(OUTPUT+'.tsv', 'w') as f:
218 f.writelines(str("sample_id\tpolarity\tmz\trt\tintensity\tpredictions\thc\toc\tnc") + '\n')
219 for feature in vkData:
220 f.writelines(feature[0]+'\t'+feature[1]+'\t'+str(feature[2])+'\t'+str(feature[3])+'\t'+str(feature[4])+'\t'+str(feature[5])+'\t'+str(feature[6])+'\t'+str(feature[7])+'\t'+str(feature[8])+'\n')
221 json += '{sample_id:\''+str(feature[0])+'\', polarity:\''+str(feature[1])+'\', mz:'+str(feature[2])+', rt:'+str(feature[3])+', intensity:'+str(feature[4])+', predictions:'+str(feature[5])+', hc:'+str(feature[6])+', oc:'+str(feature[7])+', nc:'+str(feature[8])+'},'
222 json = json[:-1] # remove final comma
223 # write html
224 try:
225 with open(DIRECTORY+'d3.html', 'r') as template, open(OUTPUT+'.html', 'w') as f:
226 for line in template:
227 line = re.sub('^var data.*$', 'var data = ['+json+']', line, flags=re.M)
228 f.write(line)
229 except ValueError:
230 print('"%s" could not be read or "%s" could not be written' % template, f)
231 except ValueError:
232 print('"%s" could not be saved.' % filename)
233
234 # main
235 vkData = map(featurePrediction, vkInput)
236 vkData = [x for x in vkData if x is not None]
237 # sort by intensity so D3 draws largest symbols first
238 vkData.sort(key=lambda x: x[4], reverse=True)
239 write(vkData)