comparison rankfilter_GCMS/test/test_pdfread.py @ 61:d685210eef3e

fix in pdftotabular tool
author pieter.lukasse@wur.nl
date Fri, 19 Dec 2014 15:30:13 +0100
parents 637830ac8bcd
children
comparison
equal deleted inserted replaced
60:35f506f30ae4 61:d685210eef3e
1 ''' 1 '''
2 Created on Mar 13, 2012 2 Created on Mar 13, 2012
3 3
4 @author: marcelk 4 @author: marcelk
5 ''' 5 '''
6 from GCMS.rankfilter_GCMS import pdfread # @UnresolvedImport 6 from GCMS.rankfilter_GCMS import pdfread, pdftotabular # @UnresolvedImport
7 from pkg_resources import resource_filename # @UnresolvedImport # pylint: disable=E0611 7 from pkg_resources import resource_filename # @UnresolvedImport # pylint: disable=E0611
8 import unittest 8 import unittest
9 9
10 10
11 class Test(unittest.TestCase): 11 class Test(unittest.TestCase):
12
12 13
13 def setUp(self): 14 def setUp(self):
14 self.nist_pdf = resource_filename(__name__, "data/NIST_test_PDF.txt") 15 self.nist_pdf = resource_filename(__name__, "data/NIST_test_PDF.txt")
15 16
16 def test_getPDF(self): 17 def test_getPDF(self):
18 Tests the reading and parsing of a NIST PDF file 19 Tests the reading and parsing of a NIST PDF file
19 ''' 20 '''
20 [hitlist, hitlist_missed] = pdfread.getPDF(self.nist_pdf, True) 21 [hitlist, hitlist_missed] = pdfread.getPDF(self.nist_pdf, True)
21 rows = [hitlist[row] for row in hitlist.keys()] 22 rows = [hitlist[row] for row in hitlist.keys()]
22 data = [set(row) for row in zip(*rows)] 23 data = [set(row) for row in zip(*rows)]
23 expected_element = set(('12.3', ' Sucrose ', '14', 'undef', ' standards 2009', ' 660', 'not_def', 24 expected_element = set(('12.3', 'Sucrose', '14', 'undef', 'standards 2009', '660', 'not_def',
24 '18495-0.142537-21284-2.26544e+07-135', '22.6544', ' 714')) 25 '18495-0.142537-21284-2.26544e+07-135', '22.6544', '714'))
25 self.failUnless(expected_element in data) 26 self.failUnless(expected_element in data)
26 self.failUnless(len(hitlist_missed) != 0) 27 self.failUnless(len(hitlist_missed) != 0)
27 ''' 28 '''
28 Check for last (dummy) hit: 29 Check for last (dummy) hit:
29 Hit 6 : (dummy hit)Sorbopyranose, 1,2,3,4,5-pentakis-O-(trimethylsilyl)-, LC21H52O6Si5;MF: 658; RMF: 658; Prob 15.6%; CAS: 30645-02-4; Lib: mainlib; ID: 37062. 30 Hit 6 : (dummy hit)Sorbopyranose, 1,2,3,4,5-pentakis-O-(trimethylsilyl)-, L C21H52O6Si5;MF: 658; RMF: 658; Prob 15.6%; CAS: 30645-02-4; Lib: mainlib; ID: 37062.
30 ''' 31 '''
31 expected_element = set(['C21H52O6Si5', ' 30645-02-4', ' mainlib', '15.6', ' (dummy hit)Sorbopyranose, 1,2,3,4,5-pentakis-O-(trimethylsilyl)-, L C21H52O6Si5', '7298-1-9580-1.29014e+07-9', ' 658', '12.9014', '37062']) 32 expected_element = set(['C21H52O6Si5', '30645-02-4', 'mainlib', '15.6', '(dummy hit)Sorbopyranose, 1,2,3,4,5-pentakis-O-(trimethylsilyl)-, L C21H52O6Si5', '7298-1-9580-1.29014e+07-9', '658', '12.9014', '37062'])
32 self.failUnless(expected_element in data) 33 self.failUnless(expected_element in data)
33 34
35
36 def test_pdftotabular(self):
37 #pdftotabular.convert_pdftotext(resource_filename(__name__, "data/Coffee_suntory_without spectra.pdf"), "Coffee_suntory_without spectra2.txt")
38 pdfread.convert_pdftotext2tabular(resource_filename(__name__, "data/testfile_2.txt"),
39 resource_filename(__name__, "data/testfile_2.tab"),
40 resource_filename(__name__, "data/testfile_2.log"), False)
41
42 #read both the reference file and actual output files
43 expected = _read_file(resource_filename(__name__, "data/testfile_2.tab_ref"))
44 actual = _read_file(resource_filename(__name__, "data/testfile_2.tab"))
45
46 #convert the read in files to lists we can compare
47 expected = expected.split()
48 actual = actual.split()
49
50 for exp, act in zip(expected, actual):
51 # compare values
52 self.failUnlessEqual(exp, act)
34 53
35 if __name__ == "__main__": 54 if __name__ == "__main__":
36 #import sys;sys.argv = ['', 'Test.test_getPDF'] 55 #import sys;sys.argv = ['', 'Test.test_getPDF']
37 unittest.main() 56 unittest.main()
57
58 def _read_file(filename):
59 '''
60 Helper method to quickly read a file
61 @param filename:
62 '''
63 with open(filename) as handle:
64 return handle.read()