Mercurial > repos > pieterlukasse > prims_metabolomics
annotate test/test_library_lookup.py @ 37:2398cbcac2cb
increased version nr
| author | pieter.lukasse@wur.nl |
|---|---|
| date | Fri, 19 Sep 2014 11:28:05 +0200 |
| parents | 60b53f2aa48a |
| children | eb0e25d06060 |
| rev | line source |
|---|---|
| 0 | 1 ''' |
| 2 Created on Mar 6, 2012 | |
| 3 | |
| 4 @author: marcelk | |
| 5 ''' | |
| 6 from GCMS import library_lookup, match_library | |
| 7 from pkg_resources import resource_filename # @UnresolvedImport # pylint: disable=E0611 | |
| 8 import os | |
| 9 import shutil | |
| 10 import tempfile | |
| 11 import unittest | |
| 12 | |
| 13 | |
| 14 class Test(unittest.TestCase): | |
| 15 ''' | |
| 16 Tests the 'library_lookup' Galaxy tool | |
| 17 ''' | |
| 18 | |
| 19 def setUp(self): | |
| 20 self.ri_database = resource_filename(__name__, "data/RIDB_subset.txt") | |
| 21 self.nist_output = resource_filename(__name__, "data/NIST_tabular.txt") | |
| 22 self.ridb_poly_regress = resource_filename(__name__, "data/ridb_poly_regression.txt") | |
| 23 self.ridb_linear_regress = resource_filename(__name__, "data/ridb_linear_regression.txt") | |
| 24 | |
| 25 def test_create_lookup_table(self): | |
| 26 ''' | |
| 27 Tests the 'create_lookup_table' function | |
| 28 ''' | |
| 29 column_type = 'Capillary' | |
| 30 polarity = 'Semi-standard non-polar' | |
| 31 lookup_dict = library_lookup.create_lookup_table(self.ri_database, column_type, polarity) | |
| 32 self.assertFalse(False in [res[4] == 'Capillary' for res in lookup_dict['4177166']]) | |
| 33 self.assertEqual(['C51276336', '2,6-Dimethyl-octa-1,7-dien-3,6-diol', 'C10H18O2', | |
| 34 '1277', 'Capillary', 'Semi-standard non-polar', 'DB-5MS', '1', | |
| 35 'C51276336_DB-5MS', '', '', ''], lookup_dict['51276336'][1]) | |
| 36 | |
| 37 def test_read_model(self): | |
| 38 ''' | |
| 39 Tests reading the regression model data containing the parameters required for converting | |
| 40 retention indices between GC-columns | |
| 41 ''' | |
| 42 model, _ = library_lookup._read_model(self.ridb_poly_regress) | |
| 43 # Order of values: coefficient 1 through 4, left limit, right limit | |
| 44 # Polynomial model | |
| 45 self.assertEqual([20.6155874639486, 0.945187096379008, 3.96480787567566e-05, -9.04377237159287e-09, | |
| 46 628.0, 2944.0, 405.0, 0, 0.998685262365514], model['HP-5']['SE-54']) | |
| 47 self.assertEqual([-92.3963391356951, 1.26116176393346, -0.000191991657547972, 4.15387371263164e-08, | |
| 48 494.0, 2198.0, 407.0, 0, 0.996665023122993], model['Apiezon L']['Squalane']) | |
| 49 # Linear model | |
| 50 model, _ = library_lookup._read_model(self.ridb_linear_regress) | |
| 51 self.assertEqual([2.81208738561543, 0.99482475526584, 628.0, 2944.0, 405.0, 0, 0.998643883946458], | |
| 52 model['HP-5']['SE-54']) | |
| 53 self.assertEqual([19.979922768462, 0.993741869298272, 494.0, 2198.0, 407.0, 0, 0.99636062891041], | |
| 54 model['Apiezon L']['Squalane']) | |
| 55 | |
| 56 def test_apply_regression(self): | |
| 57 ''' | |
| 58 Tests the regression model on some arbitrary retention indices | |
| 59 ''' | |
| 60 poly_model, _ = library_lookup._read_model(self.ridb_poly_regress) | |
| 61 linear_model, _ = library_lookup._read_model(self.ridb_linear_regress) | |
| 62 retention_indices = [1000, 1010, 1020, 1030, 1040, 1050] | |
| 63 converted_poly = [] | |
| 64 converted_linear = [] | |
| 65 for ri in retention_indices: | |
| 66 converted_poly.append(library_lookup._apply_poly_regression('HP-5', 'DB-5', ri, poly_model)) | |
| 67 converted_linear.append(library_lookup._apply_linear_regression('HP-5', 'DB-5', ri, linear_model)) | |
| 68 | |
| 69 self.assertEqual([1003.0566541860778, 1013.0979459524663, 1023.1358645806529, 1033.170466241159, | |
| 70 1043.2018071045052, 1053.2299433412131], converted_poly) | |
| 71 self.assertEqual([1001.8127584915925, 1011.830140783027, 1021.8475230744615, 1031.864905365896, | |
| 72 1041.8822876573306, 1051.899669948765], converted_linear) | |
| 73 | |
| 74 # Test polynomial limit detection, the following RI falls outside of the possible limits | |
| 75 ri = 3400 | |
| 76 converted_poly = library_lookup._apply_poly_regression('HP-5', 'DB-5', ri, poly_model) | |
| 77 self.assertEqual(False, converted_poly) | |
| 78 | |
| 79 def test_preferred_hit(self): | |
| 80 ''' Tests the matching of the hits with the preferred column, including regression ''' | |
| 81 model, method = library_lookup._read_model(self.ridb_poly_regress) | |
| 82 column_type = 'Capillary' | |
| 83 polarity = 'Semi-standard non-polar' | |
| 84 lookup_dict = library_lookup.create_lookup_table(self.ri_database, column_type, polarity) | |
| 85 hits = lookup_dict['150867'] | |
| 86 # No regression, should however consider order of given preference | |
| 87 match = library_lookup._preferred(hits, ['SE-52', 'DB-5', 'HP-5'], column_type, polarity, model, method) | |
| 88 expected = (['C150867', '(E)-phytol', 'C20H40O', '2110', 'Capillary', | |
| 89 'Semi-standard non-polar', 'SE-52', '', 'C150867_SE-52', '', '', ''], False) | |
| 90 self.assertEqual(expected, match) | |
| 91 | |
| 92 # Perform regression by looking for 'OV-101' which isn't there. 'SE-52' has the best regression model | |
| 93 # of the available columns | |
| 94 match = library_lookup._preferred(hits, ['OV-101'], column_type, polarity, model, method) | |
| 95 expected = (['C150867', '(E)-phytol', 'C20H40O', 2158.5769891569125, 'Capillary', | |
| 96 'Semi-standard non-polar', 'SE-52', '', 'C150867_SE-52', '', '', ''], 'SE-52') | |
| 97 self.assertEqual(expected, match) | |
| 98 | |
| 99 def test_format_result(self): | |
| 100 ''' | |
| 101 Tests the 'format_result' function | |
| 102 ''' | |
| 103 column_type = 'Capillary' | |
| 104 polarity = 'Semi-standard non-polar' | |
| 105 | |
| 106 # Look for DB-5 | |
| 107 pref_column = ['DB-5'] | |
| 108 model, method = library_lookup._read_model(self.ridb_poly_regress) | |
| 109 lookup_dict = library_lookup.create_lookup_table(self.ri_database, column_type, polarity) | |
| 110 data = library_lookup.format_result(lookup_dict, self.nist_output, pref_column, column_type, | |
| 111 polarity, model, method)#False, None) | |
| 112 | |
| 113 # remove non-hits from set: | |
| 114 data = _get_hits_only(data) | |
| 115 self.assertEqual(['C544354', 'Ethyl linoleate', 'C20H36O2', '2155', 'Capillary', 'Semi-standard non-polar', | |
| 116 'DB-5', '1', 'C544354_DB-5', '1810', 'None', '', '', '0'], data[20]) | |
| 117 self.assertEqual(111, len(data)) | |
| 118 | |
| 119 # Look for both DB-5 and HP-5 | |
| 120 pref_column = ['DB-5', 'HP-5'] | |
| 121 data = library_lookup.format_result(lookup_dict, self.nist_output, pref_column, column_type, | |
| 122 polarity, False, None) | |
| 123 # remove non-hits from set: | |
| 124 data = _get_hits_only(data) | |
| 125 self.assertEqual(['C502614', '.beta.-(E)-Farnesene', 'C15H24', '1508', 'Capillary', 'Semi-standard non-polar', | |
| 126 'DB-5', '1', 'C502614_DB-5', '942', 'None', '1482', '1522', '22'], data[50]) | |
| 127 self.assertEqual(106, len(data)) | |
| 128 | |
| 129 | |
| 130 def test_save_data(self): | |
| 131 ''' | |
| 132 Tests the creation of the output tabular file | |
| 133 ''' | |
| 134 temp_folder = tempfile.mkdtemp(prefix='gcms_combine_output_') | |
| 135 saved_data = '{0}/{1}'.format(temp_folder, 'output.tsv') | |
| 136 column_type = 'Capillary' | |
| 137 polarity = 'Semi-standard non-polar' | |
| 138 pref_column = ['DB-5'] | |
| 139 lookup_dict = library_lookup.create_lookup_table(self.ri_database, column_type, polarity) | |
| 140 data = library_lookup.format_result(lookup_dict, self.nist_output, pref_column, column_type, polarity, False, None) | |
| 141 library_lookup._save_data(data, saved_data) | |
| 142 self.failUnless(os.path.exists(saved_data)) | |
| 143 shutil.rmtree(temp_folder) | |
| 144 | |
| 145 | |
| 146 def test_match_library_get_lib_files(self): | |
| 147 ''' | |
| 148 Tests the match_library.py functionality | |
| 149 ''' | |
| 16 | 150 riqc_libs_dir = resource_filename(__name__, "../repositories/PRIMS-metabolomics/RI_DB_libraries") |
| 0 | 151 get_library_files_output = match_library.get_directory_files(riqc_libs_dir) |
|
30
60b53f2aa48a
Small fixes, added microminutes support to MsClust, removed TIC or MsClust output
pieter.lukasse@wur.nl
parents:
16
diff
changeset
|
152 self.assertEqual(2, len(get_library_files_output)) |
| 0 | 153 self.assertEqual("Library_RI_DB_capillary_columns-noDuplicates", get_library_files_output[0][0]) |
| 154 #TODO change assert below to assert that the result is a file, so the test can run on other dirs as well: | |
| 155 #self.assertEqual("E:\\workspace\\PRIMS-metabolomics\\python-tools\\tools\\GCMS\\test\\data\\riqc_libs\\RI DB library (capillary columns) Dec.2012.txt", get_library_files_output[0][1]) | |
| 156 #self.assertEqual("RI DB library (capillary columns) Jan.2013", get_library_files_output[1][0]) | |
| 157 try: | |
| 158 get_library_files_output = match_library.get_directory_files("/blah") | |
| 159 # should not come here | |
| 160 self.assertTrue(False) | |
| 161 except: | |
| 162 # should come here | |
| 163 self.assertTrue(True) | |
| 164 | |
| 165 def _get_hits_only(data): | |
| 166 ''' | |
| 167 removes items that have RI == 0.0 and Name == '' (these are dummy lines just for the output | |
| 168 ''' | |
| 169 result = [] | |
| 170 for item_idx in xrange(len(data)): | |
| 171 item = data[item_idx] | |
| 172 if item[1] != '' and item[3] > 0.0 : | |
| 173 result.append(item) | |
| 174 | |
| 175 return result | |
| 176 | |
| 177 | |
| 178 if __name__ == "__main__": | |
| 179 #import sys;sys.argv = ['', 'Test.testName'] | |
| 180 unittest.main() |
