Mercurial > repos > pieterlukasse > prims_metabolomics
diff rankfilter_GCMS/test/test_pdfread.py @ 61:d685210eef3e
fix in pdftotabular tool
author | pieter.lukasse@wur.nl |
---|---|
date | Fri, 19 Dec 2014 15:30:13 +0100 |
parents | 637830ac8bcd |
children |
line wrap: on
line diff
--- a/rankfilter_GCMS/test/test_pdfread.py Fri Dec 19 11:30:22 2014 +0100 +++ b/rankfilter_GCMS/test/test_pdfread.py Fri Dec 19 15:30:13 2014 +0100 @@ -3,13 +3,14 @@ @author: marcelk ''' -from GCMS.rankfilter_GCMS import pdfread # @UnresolvedImport +from GCMS.rankfilter_GCMS import pdfread, pdftotabular # @UnresolvedImport from pkg_resources import resource_filename # @UnresolvedImport # pylint: disable=E0611 import unittest class Test(unittest.TestCase): + def setUp(self): self.nist_pdf = resource_filename(__name__, "data/NIST_test_PDF.txt") @@ -20,18 +21,44 @@ [hitlist, hitlist_missed] = pdfread.getPDF(self.nist_pdf, True) rows = [hitlist[row] for row in hitlist.keys()] data = [set(row) for row in zip(*rows)] - expected_element = set(('12.3', ' Sucrose ', '14', 'undef', ' standards 2009', ' 660', 'not_def', - '18495-0.142537-21284-2.26544e+07-135', '22.6544', ' 714')) + expected_element = set(('12.3', 'Sucrose', '14', 'undef', 'standards 2009', '660', 'not_def', + '18495-0.142537-21284-2.26544e+07-135', '22.6544', '714')) self.failUnless(expected_element in data) self.failUnless(len(hitlist_missed) != 0) ''' Check for last (dummy) hit: - Hit 6 : (dummy hit)Sorbopyranose, 1,2,3,4,5-pentakis-O-(trimethylsilyl)-, LC21H52O6Si5;MF: 658; RMF: 658; Prob 15.6%; CAS: 30645-02-4; Lib: mainlib; ID: 37062. + Hit 6 : (dummy hit)Sorbopyranose, 1,2,3,4,5-pentakis-O-(trimethylsilyl)-, L C21H52O6Si5;MF: 658; RMF: 658; Prob 15.6%; CAS: 30645-02-4; Lib: mainlib; ID: 37062. ''' - expected_element = set(['C21H52O6Si5', ' 30645-02-4', ' mainlib', '15.6', ' (dummy hit)Sorbopyranose, 1,2,3,4,5-pentakis-O-(trimethylsilyl)-, L C21H52O6Si5', '7298-1-9580-1.29014e+07-9', ' 658', '12.9014', '37062']) + expected_element = set(['C21H52O6Si5', '30645-02-4', 'mainlib', '15.6', '(dummy hit)Sorbopyranose, 1,2,3,4,5-pentakis-O-(trimethylsilyl)-, L C21H52O6Si5', '7298-1-9580-1.29014e+07-9', '658', '12.9014', '37062']) self.failUnless(expected_element in data) + + def test_pdftotabular(self): + #pdftotabular.convert_pdftotext(resource_filename(__name__, "data/Coffee_suntory_without spectra.pdf"), "Coffee_suntory_without spectra2.txt") + pdfread.convert_pdftotext2tabular(resource_filename(__name__, "data/testfile_2.txt"), + resource_filename(__name__, "data/testfile_2.tab"), + resource_filename(__name__, "data/testfile_2.log"), False) + + #read both the reference file and actual output files + expected = _read_file(resource_filename(__name__, "data/testfile_2.tab_ref")) + actual = _read_file(resource_filename(__name__, "data/testfile_2.tab")) + + #convert the read in files to lists we can compare + expected = expected.split() + actual = actual.split() + + for exp, act in zip(expected, actual): + # compare values + self.failUnlessEqual(exp, act) if __name__ == "__main__": #import sys;sys.argv = ['', 'Test.test_getPDF'] unittest.main() + +def _read_file(filename): + ''' + Helper method to quickly read a file + @param filename: + ''' + with open(filename) as handle: + return handle.read()