Mercurial > repos > pieterlukasse > prims_metabolomics
comparison rankfilter_GCMS/test/test_pdfread.py @ 61:d685210eef3e
fix in pdftotabular tool
author | pieter.lukasse@wur.nl |
---|---|
date | Fri, 19 Dec 2014 15:30:13 +0100 |
parents | 637830ac8bcd |
children |
comparison
equal
deleted
inserted
replaced
60:35f506f30ae4 | 61:d685210eef3e |
---|---|
1 ''' | 1 ''' |
2 Created on Mar 13, 2012 | 2 Created on Mar 13, 2012 |
3 | 3 |
4 @author: marcelk | 4 @author: marcelk |
5 ''' | 5 ''' |
6 from GCMS.rankfilter_GCMS import pdfread # @UnresolvedImport | 6 from GCMS.rankfilter_GCMS import pdfread, pdftotabular # @UnresolvedImport |
7 from pkg_resources import resource_filename # @UnresolvedImport # pylint: disable=E0611 | 7 from pkg_resources import resource_filename # @UnresolvedImport # pylint: disable=E0611 |
8 import unittest | 8 import unittest |
9 | 9 |
10 | 10 |
11 class Test(unittest.TestCase): | 11 class Test(unittest.TestCase): |
12 | |
12 | 13 |
13 def setUp(self): | 14 def setUp(self): |
14 self.nist_pdf = resource_filename(__name__, "data/NIST_test_PDF.txt") | 15 self.nist_pdf = resource_filename(__name__, "data/NIST_test_PDF.txt") |
15 | 16 |
16 def test_getPDF(self): | 17 def test_getPDF(self): |
18 Tests the reading and parsing of a NIST PDF file | 19 Tests the reading and parsing of a NIST PDF file |
19 ''' | 20 ''' |
20 [hitlist, hitlist_missed] = pdfread.getPDF(self.nist_pdf, True) | 21 [hitlist, hitlist_missed] = pdfread.getPDF(self.nist_pdf, True) |
21 rows = [hitlist[row] for row in hitlist.keys()] | 22 rows = [hitlist[row] for row in hitlist.keys()] |
22 data = [set(row) for row in zip(*rows)] | 23 data = [set(row) for row in zip(*rows)] |
23 expected_element = set(('12.3', ' Sucrose ', '14', 'undef', ' standards 2009', ' 660', 'not_def', | 24 expected_element = set(('12.3', 'Sucrose', '14', 'undef', 'standards 2009', '660', 'not_def', |
24 '18495-0.142537-21284-2.26544e+07-135', '22.6544', ' 714')) | 25 '18495-0.142537-21284-2.26544e+07-135', '22.6544', '714')) |
25 self.failUnless(expected_element in data) | 26 self.failUnless(expected_element in data) |
26 self.failUnless(len(hitlist_missed) != 0) | 27 self.failUnless(len(hitlist_missed) != 0) |
27 ''' | 28 ''' |
28 Check for last (dummy) hit: | 29 Check for last (dummy) hit: |
29 Hit 6 : (dummy hit)Sorbopyranose, 1,2,3,4,5-pentakis-O-(trimethylsilyl)-, LC21H52O6Si5;MF: 658; RMF: 658; Prob 15.6%; CAS: 30645-02-4; Lib: mainlib; ID: 37062. | 30 Hit 6 : (dummy hit)Sorbopyranose, 1,2,3,4,5-pentakis-O-(trimethylsilyl)-, L C21H52O6Si5;MF: 658; RMF: 658; Prob 15.6%; CAS: 30645-02-4; Lib: mainlib; ID: 37062. |
30 ''' | 31 ''' |
31 expected_element = set(['C21H52O6Si5', ' 30645-02-4', ' mainlib', '15.6', ' (dummy hit)Sorbopyranose, 1,2,3,4,5-pentakis-O-(trimethylsilyl)-, L C21H52O6Si5', '7298-1-9580-1.29014e+07-9', ' 658', '12.9014', '37062']) | 32 expected_element = set(['C21H52O6Si5', '30645-02-4', 'mainlib', '15.6', '(dummy hit)Sorbopyranose, 1,2,3,4,5-pentakis-O-(trimethylsilyl)-, L C21H52O6Si5', '7298-1-9580-1.29014e+07-9', '658', '12.9014', '37062']) |
32 self.failUnless(expected_element in data) | 33 self.failUnless(expected_element in data) |
33 | 34 |
35 | |
36 def test_pdftotabular(self): | |
37 #pdftotabular.convert_pdftotext(resource_filename(__name__, "data/Coffee_suntory_without spectra.pdf"), "Coffee_suntory_without spectra2.txt") | |
38 pdfread.convert_pdftotext2tabular(resource_filename(__name__, "data/testfile_2.txt"), | |
39 resource_filename(__name__, "data/testfile_2.tab"), | |
40 resource_filename(__name__, "data/testfile_2.log"), False) | |
41 | |
42 #read both the reference file and actual output files | |
43 expected = _read_file(resource_filename(__name__, "data/testfile_2.tab_ref")) | |
44 actual = _read_file(resource_filename(__name__, "data/testfile_2.tab")) | |
45 | |
46 #convert the read in files to lists we can compare | |
47 expected = expected.split() | |
48 actual = actual.split() | |
49 | |
50 for exp, act in zip(expected, actual): | |
51 # compare values | |
52 self.failUnlessEqual(exp, act) | |
34 | 53 |
35 if __name__ == "__main__": | 54 if __name__ == "__main__": |
36 #import sys;sys.argv = ['', 'Test.test_getPDF'] | 55 #import sys;sys.argv = ['', 'Test.test_getPDF'] |
37 unittest.main() | 56 unittest.main() |
57 | |
58 def _read_file(filename): | |
59 ''' | |
60 Helper method to quickly read a file | |
61 @param filename: | |
62 ''' | |
63 with open(filename) as handle: | |
64 return handle.read() |