diff rankfilter_GCMS/test/test_pdfread.py @ 61:d685210eef3e

fix in pdftotabular tool
author pieter.lukasse@wur.nl
date Fri, 19 Dec 2014 15:30:13 +0100
parents 637830ac8bcd
children
line wrap: on
line diff
--- a/rankfilter_GCMS/test/test_pdfread.py	Fri Dec 19 11:30:22 2014 +0100
+++ b/rankfilter_GCMS/test/test_pdfread.py	Fri Dec 19 15:30:13 2014 +0100
@@ -3,13 +3,14 @@
 
 @author: marcelk
 '''
-from GCMS.rankfilter_GCMS import pdfread  # @UnresolvedImport
+from GCMS.rankfilter_GCMS import pdfread, pdftotabular  # @UnresolvedImport
 from pkg_resources import resource_filename  # @UnresolvedImport # pylint: disable=E0611
 import unittest
 
 
 class Test(unittest.TestCase):
 
+
     def setUp(self):
         self.nist_pdf = resource_filename(__name__, "data/NIST_test_PDF.txt")
 
@@ -20,18 +21,44 @@
         [hitlist, hitlist_missed] = pdfread.getPDF(self.nist_pdf, True)
         rows = [hitlist[row] for row in hitlist.keys()]
         data = [set(row) for row in zip(*rows)]
-        expected_element = set(('12.3', ' Sucrose ', '14', 'undef', ' standards 2009', ' 660', 'not_def',
-        '18495-0.142537-21284-2.26544e+07-135', '22.6544', ' 714'))
+        expected_element = set(('12.3', 'Sucrose', '14', 'undef', 'standards 2009', '660', 'not_def',
+        '18495-0.142537-21284-2.26544e+07-135', '22.6544', '714'))
         self.failUnless(expected_element in data)
         self.failUnless(len(hitlist_missed) != 0)
         '''
         Check for last (dummy) hit:  
-        Hit 6 : (dummy hit)Sorbopyranose, 1,2,3,4,5-pentakis-O-(trimethylsilyl)-, LC21H52O6Si5;MF: 658; RMF: 658; Prob 15.6%; CAS: 30645-02-4; Lib: mainlib; ID: 37062.
+        Hit 6 : (dummy hit)Sorbopyranose, 1,2,3,4,5-pentakis-O-(trimethylsilyl)-, L C21H52O6Si5;MF: 658; RMF: 658; Prob 15.6%; CAS: 30645-02-4; Lib: mainlib; ID: 37062.
         '''
-        expected_element = set(['C21H52O6Si5', ' 30645-02-4', ' mainlib', '15.6', ' (dummy hit)Sorbopyranose, 1,2,3,4,5-pentakis-O-(trimethylsilyl)-, L C21H52O6Si5', '7298-1-9580-1.29014e+07-9', ' 658', '12.9014', '37062'])
+        expected_element = set(['C21H52O6Si5', '30645-02-4', 'mainlib', '15.6', '(dummy hit)Sorbopyranose, 1,2,3,4,5-pentakis-O-(trimethylsilyl)-, L C21H52O6Si5', '7298-1-9580-1.29014e+07-9', '658', '12.9014', '37062'])
         self.failUnless(expected_element in data)
         
+        
+    def test_pdftotabular(self):
+        #pdftotabular.convert_pdftotext(resource_filename(__name__, "data/Coffee_suntory_without spectra.pdf"), "Coffee_suntory_without spectra2.txt")
+        pdfread.convert_pdftotext2tabular(resource_filename(__name__, "data/testfile_2.txt"),
+                                          resource_filename(__name__, "data/testfile_2.tab"),
+                                          resource_filename(__name__, "data/testfile_2.log"), False)
+        
+        #read both the reference file  and actual output files
+        expected = _read_file(resource_filename(__name__, "data/testfile_2.tab_ref"))
+        actual = _read_file(resource_filename(__name__, "data/testfile_2.tab"))
+        
+        #convert the read in files to lists we can compare
+        expected = expected.split()
+        actual = actual.split()
+
+        for exp, act in zip(expected, actual):
+            # compare values
+            self.failUnlessEqual(exp, act)
 
 if __name__ == "__main__":
     #import sys;sys.argv = ['', 'Test.test_getPDF']
     unittest.main()
+    
+def _read_file(filename):
+    '''
+    Helper method to quickly read a file
+    @param filename:
+    '''
+    with open(filename) as handle:
+        return handle.read()