Mercurial > repos > pieterlukasse > prims_metabolomics
view rankfilter_GCMS/test/test_pdfread.py @ 61:d685210eef3e
fix in pdftotabular tool
author | pieter.lukasse@wur.nl |
---|---|
date | Fri, 19 Dec 2014 15:30:13 +0100 |
parents | 637830ac8bcd |
children |
line wrap: on
line source
''' Created on Mar 13, 2012 @author: marcelk ''' from GCMS.rankfilter_GCMS import pdfread, pdftotabular # @UnresolvedImport from pkg_resources import resource_filename # @UnresolvedImport # pylint: disable=E0611 import unittest class Test(unittest.TestCase): def setUp(self): self.nist_pdf = resource_filename(__name__, "data/NIST_test_PDF.txt") def test_getPDF(self): ''' Tests the reading and parsing of a NIST PDF file ''' [hitlist, hitlist_missed] = pdfread.getPDF(self.nist_pdf, True) rows = [hitlist[row] for row in hitlist.keys()] data = [set(row) for row in zip(*rows)] expected_element = set(('12.3', 'Sucrose', '14', 'undef', 'standards 2009', '660', 'not_def', '18495-0.142537-21284-2.26544e+07-135', '22.6544', '714')) self.failUnless(expected_element in data) self.failUnless(len(hitlist_missed) != 0) ''' Check for last (dummy) hit: Hit 6 : (dummy hit)Sorbopyranose, 1,2,3,4,5-pentakis-O-(trimethylsilyl)-, L C21H52O6Si5;MF: 658; RMF: 658; Prob 15.6%; CAS: 30645-02-4; Lib: mainlib; ID: 37062. ''' expected_element = set(['C21H52O6Si5', '30645-02-4', 'mainlib', '15.6', '(dummy hit)Sorbopyranose, 1,2,3,4,5-pentakis-O-(trimethylsilyl)-, L C21H52O6Si5', '7298-1-9580-1.29014e+07-9', '658', '12.9014', '37062']) self.failUnless(expected_element in data) def test_pdftotabular(self): #pdftotabular.convert_pdftotext(resource_filename(__name__, "data/Coffee_suntory_without spectra.pdf"), "Coffee_suntory_without spectra2.txt") pdfread.convert_pdftotext2tabular(resource_filename(__name__, "data/testfile_2.txt"), resource_filename(__name__, "data/testfile_2.tab"), resource_filename(__name__, "data/testfile_2.log"), False) #read both the reference file and actual output files expected = _read_file(resource_filename(__name__, "data/testfile_2.tab_ref")) actual = _read_file(resource_filename(__name__, "data/testfile_2.tab")) #convert the read in files to lists we can compare expected = expected.split() actual = actual.split() for exp, act in zip(expected, actual): # compare values self.failUnlessEqual(exp, act) if __name__ == "__main__": #import sys;sys.argv = ['', 'Test.test_getPDF'] unittest.main() def _read_file(filename): ''' Helper method to quickly read a file @param filename: ''' with open(filename) as handle: return handle.read()