Mercurial > repos > pieterlukasse > prims_metabolomics
annotate rankfilter_GCMS/test/test_pdfread.py @ 61:d685210eef3e
fix in pdftotabular tool
author | pieter.lukasse@wur.nl |
---|---|
date | Fri, 19 Dec 2014 15:30:13 +0100 |
parents | 637830ac8bcd |
children |
rev | line source |
---|---|
0 | 1 ''' |
2 Created on Mar 13, 2012 | |
3 | |
4 @author: marcelk | |
5 ''' | |
61 | 6 from GCMS.rankfilter_GCMS import pdfread, pdftotabular # @UnresolvedImport |
0 | 7 from pkg_resources import resource_filename # @UnresolvedImport # pylint: disable=E0611 |
8 import unittest | |
9 | |
10 | |
11 class Test(unittest.TestCase): | |
12 | |
61 | 13 |
0 | 14 def setUp(self): |
15 self.nist_pdf = resource_filename(__name__, "data/NIST_test_PDF.txt") | |
16 | |
17 def test_getPDF(self): | |
18 ''' | |
19 Tests the reading and parsing of a NIST PDF file | |
20 ''' | |
16 | 21 [hitlist, hitlist_missed] = pdfread.getPDF(self.nist_pdf, True) |
0 | 22 rows = [hitlist[row] for row in hitlist.keys()] |
23 data = [set(row) for row in zip(*rows)] | |
61 | 24 expected_element = set(('12.3', 'Sucrose', '14', 'undef', 'standards 2009', '660', 'not_def', |
25 '18495-0.142537-21284-2.26544e+07-135', '22.6544', '714')) | |
0 | 26 self.failUnless(expected_element in data) |
27 self.failUnless(len(hitlist_missed) != 0) | |
26
637830ac8bcd
added validation in metexp to tabular tool; added workaround/fix for L and D compound types
pieter.lukasse@wur.nl
parents:
16
diff
changeset
|
28 ''' |
637830ac8bcd
added validation in metexp to tabular tool; added workaround/fix for L and D compound types
pieter.lukasse@wur.nl
parents:
16
diff
changeset
|
29 Check for last (dummy) hit: |
61 | 30 Hit 6 : (dummy hit)Sorbopyranose, 1,2,3,4,5-pentakis-O-(trimethylsilyl)-, L C21H52O6Si5;MF: 658; RMF: 658; Prob 15.6%; CAS: 30645-02-4; Lib: mainlib; ID: 37062. |
26
637830ac8bcd
added validation in metexp to tabular tool; added workaround/fix for L and D compound types
pieter.lukasse@wur.nl
parents:
16
diff
changeset
|
31 ''' |
61 | 32 expected_element = set(['C21H52O6Si5', '30645-02-4', 'mainlib', '15.6', '(dummy hit)Sorbopyranose, 1,2,3,4,5-pentakis-O-(trimethylsilyl)-, L C21H52O6Si5', '7298-1-9580-1.29014e+07-9', '658', '12.9014', '37062']) |
26
637830ac8bcd
added validation in metexp to tabular tool; added workaround/fix for L and D compound types
pieter.lukasse@wur.nl
parents:
16
diff
changeset
|
33 self.failUnless(expected_element in data) |
637830ac8bcd
added validation in metexp to tabular tool; added workaround/fix for L and D compound types
pieter.lukasse@wur.nl
parents:
16
diff
changeset
|
34 |
61 | 35 |
36 def test_pdftotabular(self): | |
37 #pdftotabular.convert_pdftotext(resource_filename(__name__, "data/Coffee_suntory_without spectra.pdf"), "Coffee_suntory_without spectra2.txt") | |
38 pdfread.convert_pdftotext2tabular(resource_filename(__name__, "data/testfile_2.txt"), | |
39 resource_filename(__name__, "data/testfile_2.tab"), | |
40 resource_filename(__name__, "data/testfile_2.log"), False) | |
41 | |
42 #read both the reference file and actual output files | |
43 expected = _read_file(resource_filename(__name__, "data/testfile_2.tab_ref")) | |
44 actual = _read_file(resource_filename(__name__, "data/testfile_2.tab")) | |
45 | |
46 #convert the read in files to lists we can compare | |
47 expected = expected.split() | |
48 actual = actual.split() | |
49 | |
50 for exp, act in zip(expected, actual): | |
51 # compare values | |
52 self.failUnlessEqual(exp, act) | |
0 | 53 |
54 if __name__ == "__main__": | |
55 #import sys;sys.argv = ['', 'Test.test_getPDF'] | |
56 unittest.main() | |
61 | 57 |
58 def _read_file(filename): | |
59 ''' | |
60 Helper method to quickly read a file | |
61 @param filename: | |
62 ''' | |
63 with open(filename) as handle: | |
64 return handle.read() |