view rankfilter_GCMS/test/test_pdfread.py @ 61:d685210eef3e

fix in pdftotabular tool
author pieter.lukasse@wur.nl
date Fri, 19 Dec 2014 15:30:13 +0100
parents 637830ac8bcd
children
line wrap: on
line source

'''
Created on Mar 13, 2012

@author: marcelk
'''
from GCMS.rankfilter_GCMS import pdfread, pdftotabular  # @UnresolvedImport
from pkg_resources import resource_filename  # @UnresolvedImport # pylint: disable=E0611
import unittest


class Test(unittest.TestCase):


    def setUp(self):
        self.nist_pdf = resource_filename(__name__, "data/NIST_test_PDF.txt")

    def test_getPDF(self):
        '''
        Tests the reading and parsing of a NIST PDF file
        '''
        [hitlist, hitlist_missed] = pdfread.getPDF(self.nist_pdf, True)
        rows = [hitlist[row] for row in hitlist.keys()]
        data = [set(row) for row in zip(*rows)]
        expected_element = set(('12.3', 'Sucrose', '14', 'undef', 'standards 2009', '660', 'not_def',
        '18495-0.142537-21284-2.26544e+07-135', '22.6544', '714'))
        self.failUnless(expected_element in data)
        self.failUnless(len(hitlist_missed) != 0)
        '''
        Check for last (dummy) hit:  
        Hit 6 : (dummy hit)Sorbopyranose, 1,2,3,4,5-pentakis-O-(trimethylsilyl)-, L C21H52O6Si5;MF: 658; RMF: 658; Prob 15.6%; CAS: 30645-02-4; Lib: mainlib; ID: 37062.
        '''
        expected_element = set(['C21H52O6Si5', '30645-02-4', 'mainlib', '15.6', '(dummy hit)Sorbopyranose, 1,2,3,4,5-pentakis-O-(trimethylsilyl)-, L C21H52O6Si5', '7298-1-9580-1.29014e+07-9', '658', '12.9014', '37062'])
        self.failUnless(expected_element in data)
        
        
    def test_pdftotabular(self):
        #pdftotabular.convert_pdftotext(resource_filename(__name__, "data/Coffee_suntory_without spectra.pdf"), "Coffee_suntory_without spectra2.txt")
        pdfread.convert_pdftotext2tabular(resource_filename(__name__, "data/testfile_2.txt"),
                                          resource_filename(__name__, "data/testfile_2.tab"),
                                          resource_filename(__name__, "data/testfile_2.log"), False)
        
        #read both the reference file  and actual output files
        expected = _read_file(resource_filename(__name__, "data/testfile_2.tab_ref"))
        actual = _read_file(resource_filename(__name__, "data/testfile_2.tab"))
        
        #convert the read in files to lists we can compare
        expected = expected.split()
        actual = actual.split()

        for exp, act in zip(expected, actual):
            # compare values
            self.failUnlessEqual(exp, act)

if __name__ == "__main__":
    #import sys;sys.argv = ['', 'Test.test_getPDF']
    unittest.main()
    
def _read_file(filename):
    '''
    Helper method to quickly read a file
    @param filename:
    '''
    with open(filename) as handle:
        return handle.read()