comparison nist_wrapper.py @ 0:cce6989ed423

new NIST wrapper demo tools
author pieter.lukasse@wur.nl
date Thu, 22 Jan 2015 16:14:57 +0100
parents
children c3dc158717fc
comparison
equal deleted inserted replaced
-1:000000000000 0:cce6989ed423
1 #!/usr/bin/env python
2 # encoding: utf-8
3 '''
4 Module wrapping the NIST MSSEARCH application for matching
5 spectra one or more spectra libraries.
6 '''
7 import csv
8 import sys
9 import fileinput
10 import urllib2
11 import time
12 import utils
13 import uuid
14 import os
15 import subprocess
16 from report_generator import ReportGenerator
17
18 __author__ = "Pieter Lukasse"
19 __contact__ = "pieterlukasse@gmail.com,pieter.lukasse@wur.nl"
20 __copyright__ = "Copyright, 2015"
21 __license__ = "Apache v2"
22
23
24
25 def _prepare_NIST(uuid_value, nist_home_dir, nist_ini_file, spectrum_file, is_wine):
26 '''
27 executes the following steps:
28 - copy nist_home_dir folder to nist_home_dir+_uuid
29 - copy spectrum_file.msp to ~/.wine/drive_c/NIST_uid
30 - creates nist_home_dir+_uuid/MSSEARCH/AUTOIMP.MSD -> pointing to C:\NIST_uid\MSSEARCH\temp.msd (in case of is_wine) or to nist_home_dir+_uuidM\SSEARCH\temp.msd
31 - creates nist_home_dir+_uuid/MSSEARCH/temp.msd -> pointing to C:\NIST_uid\spectrum_file.msp (in case of is_wine) or to nist_home_dir+_uuid\spectrum_file.msp
32 and the text "10 724" in the second row
33 - copy nistms.INI to nist_home_dir+_uuid/MSSEARCH , overwriting the existing one
34 - in case of is_wine:
35 replace all occurrences of C:\NIST with C:\NIST_uid in this new nistms.INI
36 else:
37 replace all occurrences of C:\NIST with nist_home_dir+_uuid in this new nistms.INI
38 '''
39 if nist_home_dir.endswith("/") or nist_home_dir.endswith("\\"):
40 nist_home_dir = nist_home_dir[:-1]
41
42 # small validation for wine scenario
43 if is_wine and not nist_home_dir.endswith("drive_c/NIST"):
44 raise Exception('Error: invalid NIST home. For wine usage NIST home dir must be in the .wine folder and then in drive_c/NIST')
45
46 new_nist_home = nist_home_dir+uuid_value
47 utils.copy_dir(nist_home_dir, new_nist_home)
48
49 utils.copy_file(spectrum_file, new_nist_home+"/spectrum_file.msp")
50
51 # remove old file:
52 os.remove(new_nist_home+"/MSSEARCH/AUTOIMP.MSD")
53 with open(new_nist_home + "/MSSEARCH/AUTOIMP.MSD", "a") as text_file:
54 if is_wine:
55 text_file.write("C:\\NIST" + uuid_value + "\\MSSEARCH\\temp.msd")
56 else:
57 text_file.write(new_nist_home + "\\MSSEARCH\\temp.msd")
58
59 with open(new_nist_home + "/MSSEARCH/temp.msd", "a") as text_file:
60 if is_wine:
61 text_file.write("C:\\NIST" + uuid_value + "\\spectrum_file.msp\n")
62 else:
63 text_file.write(new_nist_home + "\\spectrum_file.msp\n")
64 text_file.write("10 724")
65
66 replacement_text = new_nist_home
67 if is_wine:
68 replacement_text = "C:\\NIST" + uuid_value
69
70 # remove old file
71 os.remove(new_nist_home+"/MSSEARCH/nistms.INI")
72 # make new one
73 o = open(new_nist_home+"/MSSEARCH/nistms.INI","a") #open for append
74 # TODO : this loop/replace below is a bit limited to specific variables...either test different NIST versions or make more generic (harder in case of wine, or we need extra "home in .INI file" parameter):
75 for line in open(nist_ini_file):
76 if "Library Directory=" in line:
77 line = "Library Directory="+ new_nist_home + "\\MSSEARCH\\\n"
78 if "Dir=" in line:
79 line = "Dir="+ replacement_text + "\\MSSEARCH\\\n"
80
81 o.write(line)
82 o.close()
83
84 return new_nist_home
85
86 def _run_NIST(new_nist_home, output_file, is_wine):
87 '''
88 - run : (wine) new_nist_home/MSSEARCH/nistms$.exe /INSTRUMENT /PAR=2
89 - monitor : new_nist_home/MSSEARCH/SRCREADY.TXT for content = "1"
90 - when ready:
91 > copy SRCRESLT.TXT to output_file
92 > kill nist process
93 > (optional)remove ~/.wine/drive_c/NIST_uid/
94 > finish
95 '''
96 # to avoid conflicts in the orphan process killing (see end of this method), we will
97 # only run NIST again after previous nistms.exe process has been killed:
98 # TODO : solution is currently only for wine (in the windows context the solution is not there yet, but parallel calls are not expected as in windows we only run tests one by one for now)
99 # if is_wine:
100 # while True:
101 # # check if process exists. If not, break loop and continue
102 # pid = utils.get_process_pid("nistms.exe")
103 # if pid == -1:
104 # break
105 # time.sleep(2)
106
107
108 # remove old file, if it is there:
109 file_to_monitor = new_nist_home+"/MSSEARCH/SRCREADY.TXT"
110 if os.path.exists(file_to_monitor):
111 os.remove(file_to_monitor)
112
113 exec_path = new_nist_home + "/MSSEARCH/nistms$.exe"
114
115 pro = ""
116 if is_wine:
117 print "calling wine with " + exec_path
118 cmd = ["wine "+ exec_path + " /INSTRUMENT /PAR=2"]
119 # The os.setsid() is passed in the argument preexec_fn so
120 # it's run after the fork() and before exec() to run the shell.
121 pro = subprocess.Popen(cmd, stdout=subprocess.PIPE,
122 shell=True, preexec_fn=os.setsid)
123 else:
124 cmd = [
125 exec_path,
126 "/INSTRUMENT",
127 "/PAR=2"]
128 subprocess.call(cmd)
129
130
131 # monitor process by checking state file:
132 while True:
133 # check if SRCREADY.TXT is there already:
134 if os.path.exists(file_to_monitor):
135 break
136 time.sleep(2)
137
138 # kill process:
139 #p.terminate() - not needed, nistm$ will terminate...nistms.exe is the one that
140 #stays open...and orphan..killing it:
141
142 if is_wine:
143 # pid = utils.get_process_pid("nistms.exe")
144 # os.kill(pid, 9)
145 os.killpg(pro.pid, 9)
146 else:
147 # windows case:
148 proc_name = "nistms.exe"
149 os.system("taskkill /f /im " + proc_name)
150
151 # copy SRCRESLT.TXT to output_file
152 result_file = new_nist_home+"/MSSEARCH/SRCRESLT.TXT"
153 utils.copy_file(result_file, output_file)
154
155
156 def _create_html_report(output_html_report, output_html_report_files_path, hits_dict, spectra_dict):
157 '''
158 This report will contain a page that displays essentially the same list as found in the
159 tabular output file (rendered with datatables jquery plugin), with some extra features:
160 - when user clicks on an entry, it should display the query spectrum and the hit spectrum
161 in "head to tail" and "difference" mode (see galaxy/report_example.png)
162 -> the query spectrum can be generated from the data in the input MSP file
163 -> the library "online representative" spectrum can be generated from data returned by http://webbook.nist.gov/cgi/cbook.cgi?JCAMP=C537268&Index=0&Type=Mass ,
164 where C537268 in this case is the CAS ID without the '-' separators
165 '''
166 # step 1 : generate HTML via the jinja template engine
167 # step 1.1: make sure to link the query spectrum data to the corresponding html object for quick rendering when needed
168
169 html_file = open(output_html_report,'w')
170 html_render = ReportGenerator(os.path.dirname(__file__), 'templates/main_template.html',hits_dict, spectra_dict)
171 html_render.render(html_file)
172 # copy necessary .js files as well:
173 templates_folder = os.path.dirname(__file__) + '/templates/'
174 utils.copy_file(templates_folder + 'spectrum_gen.js', output_html_report_files_path+"/spectrum_gen.js")
175 utils.copy_dir(templates_folder + 'lib', output_html_report_files_path+'/lib' )
176 utils.copy_dir(templates_folder + 'images', output_html_report_files_path+'/images' )
177
178 html_file.close()
179
180
181
182
183
184 return None
185
186 def _get_extra_info_and_link_cols(data_found, data_type_found, query_link):
187 '''
188 This method will go over the data found and will return a
189 list with the following items:
190 - Experiment details where hits have been found :
191 'organism', 'tissue','experiment_name','user_name','column_type'
192 - Link that executes same query
193
194 '''
195 # set() makes a unique list:
196 organism_set = []
197 tissue_set = []
198 experiment_name_set = []
199 user_name_set = []
200 column_type_set = []
201 cas_nr_set = []
202
203 if 'organism' in data_found:
204 organism_set = set(data_found['organism'])
205 if 'tissue' in data_found:
206 tissue_set = set(data_found['tissue'])
207 if 'experiment_name' in data_found:
208 experiment_name_set = set(data_found['experiment_name'])
209 if 'user_name' in data_found:
210 user_name_set = set(data_found['user_name'])
211 if 'column_type' in data_found:
212 column_type_set = set(data_found['column_type'])
213 if 'CAS' in data_found:
214 cas_nr_set = set(data_found['CAS'])
215
216
217 result = [data_type_found,
218
219 #To let Excel interpret as link, use e.g. =HYPERLINK("http://stackoverflow.com", "friendly name"):
220 "=HYPERLINK(\""+ query_link + "\", \"Link to entries found in DB \")"]
221 return result
222
223
224
225
226
227 # alternative: ?
228 # s = requests.Session()
229 # s.verify = False
230 # #s.auth = (token01, token02)
231 # resp = s.get(url, params={'name': 'anonymous'}, stream=True)
232 # content = resp.content
233 # # transform to dictionary:
234
235
236
237 def _save_data(data_rows, headers, out_csv):
238 '''
239 Writes tab-separated data to file
240 @param data_rows: dictionary containing merged/enriched dataset
241 @param out_csv: output csv file
242 '''
243
244 # Open output file for writing
245 outfile_single_handle = open(out_csv, 'wb')
246 output_single_handle = csv.writer(outfile_single_handle, delimiter="\t")
247
248 # Write headers
249 output_single_handle.writerow(headers)
250
251 # Write one line for each row
252 for data_row in data_rows:
253 output_single_handle.writerow(data_row)
254
255 def _get_metexp_URL(metexp_dblink_file):
256 '''
257 Read out and return the URL stored in the given file.
258 '''
259 file_input = fileinput.input(metexp_dblink_file)
260 try:
261 for line in file_input:
262 if line[0] != '#':
263 # just return the first line that is not a comment line:
264 return line
265 finally:
266 file_input.close()
267
268
269 def main():
270 '''
271 Wrapper main function
272
273 The input expected is:
274 NIST_HOME dir
275 nistms.INI
276 spectrum_file.msp
277 outputfileName
278 (optional) htmlReportFile
279 (optional) htmlReportFile.files_path
280 '''
281 seconds_start = int(round(time.time()))
282
283 nist_home_dir = sys.argv[1]
284 nist_ini_file = sys.argv[2]
285 spectrum_file = sys.argv[3]
286 nist_output_file = sys.argv[4]
287 final_output_file = sys.argv[5]
288 # html report pars:
289 output_html_report = None
290 output_html_report_files_path = None
291 if len(sys.argv) > 6:
292 output_html_report = sys.argv[6]
293 output_html_report_files_path = sys.argv[7]
294
295 is_wine = False
296 if "wine" in nist_home_dir:
297 is_wine = True
298
299 uuid_value = str(uuid.uuid4())
300
301 # prepare NIST environment for running:
302 new_nist_home = _prepare_NIST(uuid_value, nist_home_dir, nist_ini_file, spectrum_file, is_wine)
303
304 # run NIST search command:
305 _run_NIST(new_nist_home, nist_output_file, is_wine)
306
307 # write output tabular:
308 hits_dict = utils.get_nist_out_as_dict(nist_output_file)
309 utils.save_dict_as_tsv(hits_dict, final_output_file)
310
311 # create report:
312 if len(sys.argv) > 6:
313 spectra_dict = utils.get_spectra_file_as_dict(spectrum_file)
314 _create_html_report(output_html_report, output_html_report_files_path, hits_dict, spectra_dict)
315
316
317 #_save_data(enriched_data, headers, output_result)
318
319 seconds_end = int(round(time.time()))
320 print "Took " + str(seconds_end - seconds_start) + " seconds"
321
322
323
324 if __name__ == '__main__':
325 main()