0
|
1 '''
|
|
2 Created on Mar 6, 2012
|
|
3
|
|
4 @author: marcelk
|
|
5 '''
|
|
6 from GCMS import library_lookup, match_library
|
|
7 from pkg_resources import resource_filename # @UnresolvedImport # pylint: disable=E0611
|
|
8 import os
|
|
9 import shutil
|
|
10 import tempfile
|
|
11 import unittest
|
|
12
|
|
13
|
|
14 class Test(unittest.TestCase):
|
|
15 '''
|
|
16 Tests the 'library_lookup' Galaxy tool
|
|
17 '''
|
|
18
|
|
19 def setUp(self):
|
|
20 self.ri_database = resource_filename(__name__, "data/RIDB_subset.txt")
|
|
21 self.nist_output = resource_filename(__name__, "data/NIST_tabular.txt")
|
|
22 self.ridb_poly_regress = resource_filename(__name__, "data/ridb_poly_regression.txt")
|
|
23 self.ridb_linear_regress = resource_filename(__name__, "data/ridb_linear_regression.txt")
|
|
24
|
|
25 def test_create_lookup_table(self):
|
|
26 '''
|
|
27 Tests the 'create_lookup_table' function
|
|
28 '''
|
|
29 column_type = 'Capillary'
|
|
30 polarity = 'Semi-standard non-polar'
|
|
31 lookup_dict = library_lookup.create_lookup_table(self.ri_database, column_type, polarity)
|
|
32 self.assertFalse(False in [res[4] == 'Capillary' for res in lookup_dict['4177166']])
|
|
33 self.assertEqual(['C51276336', '2,6-Dimethyl-octa-1,7-dien-3,6-diol', 'C10H18O2',
|
|
34 '1277', 'Capillary', 'Semi-standard non-polar', 'DB-5MS', '1',
|
|
35 'C51276336_DB-5MS', '', '', ''], lookup_dict['51276336'][1])
|
|
36
|
|
37 def test_read_model(self):
|
|
38 '''
|
|
39 Tests reading the regression model data containing the parameters required for converting
|
|
40 retention indices between GC-columns
|
|
41 '''
|
|
42 model, _ = library_lookup._read_model(self.ridb_poly_regress)
|
|
43 # Order of values: coefficient 1 through 4, left limit, right limit
|
|
44 # Polynomial model
|
|
45 self.assertEqual([20.6155874639486, 0.945187096379008, 3.96480787567566e-05, -9.04377237159287e-09,
|
|
46 628.0, 2944.0, 405.0, 0, 0.998685262365514], model['HP-5']['SE-54'])
|
|
47 self.assertEqual([-92.3963391356951, 1.26116176393346, -0.000191991657547972, 4.15387371263164e-08,
|
|
48 494.0, 2198.0, 407.0, 0, 0.996665023122993], model['Apiezon L']['Squalane'])
|
|
49 # Linear model
|
|
50 model, _ = library_lookup._read_model(self.ridb_linear_regress)
|
|
51 self.assertEqual([2.81208738561543, 0.99482475526584, 628.0, 2944.0, 405.0, 0, 0.998643883946458],
|
|
52 model['HP-5']['SE-54'])
|
|
53 self.assertEqual([19.979922768462, 0.993741869298272, 494.0, 2198.0, 407.0, 0, 0.99636062891041],
|
|
54 model['Apiezon L']['Squalane'])
|
|
55
|
|
56 def test_apply_regression(self):
|
|
57 '''
|
|
58 Tests the regression model on some arbitrary retention indices
|
|
59 '''
|
|
60 poly_model, _ = library_lookup._read_model(self.ridb_poly_regress)
|
|
61 linear_model, _ = library_lookup._read_model(self.ridb_linear_regress)
|
|
62 retention_indices = [1000, 1010, 1020, 1030, 1040, 1050]
|
|
63 converted_poly = []
|
|
64 converted_linear = []
|
|
65 for ri in retention_indices:
|
|
66 converted_poly.append(library_lookup._apply_poly_regression('HP-5', 'DB-5', ri, poly_model))
|
|
67 converted_linear.append(library_lookup._apply_linear_regression('HP-5', 'DB-5', ri, linear_model))
|
|
68
|
|
69 self.assertEqual([1003.0566541860778, 1013.0979459524663, 1023.1358645806529, 1033.170466241159,
|
|
70 1043.2018071045052, 1053.2299433412131], converted_poly)
|
|
71 self.assertEqual([1001.8127584915925, 1011.830140783027, 1021.8475230744615, 1031.864905365896,
|
|
72 1041.8822876573306, 1051.899669948765], converted_linear)
|
|
73
|
|
74 # Test polynomial limit detection, the following RI falls outside of the possible limits
|
|
75 ri = 3400
|
|
76 converted_poly = library_lookup._apply_poly_regression('HP-5', 'DB-5', ri, poly_model)
|
|
77 self.assertEqual(False, converted_poly)
|
|
78
|
|
79 def test_preferred_hit(self):
|
|
80 ''' Tests the matching of the hits with the preferred column, including regression '''
|
|
81 model, method = library_lookup._read_model(self.ridb_poly_regress)
|
|
82 column_type = 'Capillary'
|
|
83 polarity = 'Semi-standard non-polar'
|
|
84 lookup_dict = library_lookup.create_lookup_table(self.ri_database, column_type, polarity)
|
|
85 hits = lookup_dict['150867']
|
|
86 # No regression, should however consider order of given preference
|
|
87 match = library_lookup._preferred(hits, ['SE-52', 'DB-5', 'HP-5'], column_type, polarity, model, method)
|
|
88 expected = (['C150867', '(E)-phytol', 'C20H40O', '2110', 'Capillary',
|
|
89 'Semi-standard non-polar', 'SE-52', '', 'C150867_SE-52', '', '', ''], False)
|
|
90 self.assertEqual(expected, match)
|
|
91
|
|
92 # Perform regression by looking for 'OV-101' which isn't there. 'SE-52' has the best regression model
|
|
93 # of the available columns
|
|
94 match = library_lookup._preferred(hits, ['OV-101'], column_type, polarity, model, method)
|
|
95 expected = (['C150867', '(E)-phytol', 'C20H40O', 2158.5769891569125, 'Capillary',
|
|
96 'Semi-standard non-polar', 'SE-52', '', 'C150867_SE-52', '', '', ''], 'SE-52')
|
|
97 self.assertEqual(expected, match)
|
|
98
|
|
99 def test_format_result(self):
|
|
100 '''
|
|
101 Tests the 'format_result' function
|
|
102 '''
|
|
103 column_type = 'Capillary'
|
|
104 polarity = 'Semi-standard non-polar'
|
|
105
|
|
106 # Look for DB-5
|
|
107 pref_column = ['DB-5']
|
|
108 model, method = library_lookup._read_model(self.ridb_poly_regress)
|
|
109 lookup_dict = library_lookup.create_lookup_table(self.ri_database, column_type, polarity)
|
|
110 data = library_lookup.format_result(lookup_dict, self.nist_output, pref_column, column_type,
|
|
111 polarity, model, method)#False, None)
|
|
112
|
|
113 # remove non-hits from set:
|
|
114 data = _get_hits_only(data)
|
|
115 self.assertEqual(['C544354', 'Ethyl linoleate', 'C20H36O2', '2155', 'Capillary', 'Semi-standard non-polar',
|
|
116 'DB-5', '1', 'C544354_DB-5', '1810', 'None', '', '', '0'], data[20])
|
|
117 self.assertEqual(111, len(data))
|
|
118
|
|
119 # Look for both DB-5 and HP-5
|
|
120 pref_column = ['DB-5', 'HP-5']
|
|
121 data = library_lookup.format_result(lookup_dict, self.nist_output, pref_column, column_type,
|
|
122 polarity, False, None)
|
|
123 # remove non-hits from set:
|
|
124 data = _get_hits_only(data)
|
|
125 self.assertEqual(['C502614', '.beta.-(E)-Farnesene', 'C15H24', '1508', 'Capillary', 'Semi-standard non-polar',
|
|
126 'DB-5', '1', 'C502614_DB-5', '942', 'None', '1482', '1522', '22'], data[50])
|
|
127 self.assertEqual(106, len(data))
|
|
128
|
|
129
|
|
130 def test_save_data(self):
|
|
131 '''
|
|
132 Tests the creation of the output tabular file
|
|
133 '''
|
|
134 temp_folder = tempfile.mkdtemp(prefix='gcms_combine_output_')
|
|
135 saved_data = '{0}/{1}'.format(temp_folder, 'output.tsv')
|
|
136 column_type = 'Capillary'
|
|
137 polarity = 'Semi-standard non-polar'
|
|
138 pref_column = ['DB-5']
|
|
139 lookup_dict = library_lookup.create_lookup_table(self.ri_database, column_type, polarity)
|
|
140 data = library_lookup.format_result(lookup_dict, self.nist_output, pref_column, column_type, polarity, False, None)
|
|
141 library_lookup._save_data(data, saved_data)
|
|
142 self.failUnless(os.path.exists(saved_data))
|
|
143 shutil.rmtree(temp_folder)
|
|
144
|
|
145
|
|
146 def test_match_library_get_lib_files(self):
|
|
147 '''
|
|
148 Tests the match_library.py functionality
|
|
149 '''
|
16
|
150 riqc_libs_dir = resource_filename(__name__, "../repositories/PRIMS-metabolomics/RI_DB_libraries")
|
0
|
151 get_library_files_output = match_library.get_directory_files(riqc_libs_dir)
|
16
|
152 self.assertEqual(1, len(get_library_files_output))
|
0
|
153 self.assertEqual("Library_RI_DB_capillary_columns-noDuplicates", get_library_files_output[0][0])
|
|
154 #TODO change assert below to assert that the result is a file, so the test can run on other dirs as well:
|
|
155 #self.assertEqual("E:\\workspace\\PRIMS-metabolomics\\python-tools\\tools\\GCMS\\test\\data\\riqc_libs\\RI DB library (capillary columns) Dec.2012.txt", get_library_files_output[0][1])
|
|
156 #self.assertEqual("RI DB library (capillary columns) Jan.2013", get_library_files_output[1][0])
|
|
157 try:
|
|
158 get_library_files_output = match_library.get_directory_files("/blah")
|
|
159 # should not come here
|
|
160 self.assertTrue(False)
|
|
161 except:
|
|
162 # should come here
|
|
163 self.assertTrue(True)
|
|
164
|
|
165 def _get_hits_only(data):
|
|
166 '''
|
|
167 removes items that have RI == 0.0 and Name == '' (these are dummy lines just for the output
|
|
168 '''
|
|
169 result = []
|
|
170 for item_idx in xrange(len(data)):
|
|
171 item = data[item_idx]
|
|
172 if item[1] != '' and item[3] > 0.0 :
|
|
173 result.append(item)
|
|
174
|
|
175 return result
|
|
176
|
|
177
|
|
178 if __name__ == "__main__":
|
|
179 #import sys;sys.argv = ['', 'Test.testName']
|
|
180 unittest.main()
|