annotate rankfilter_GCMS/test/test_pdfread.py @ 61:d685210eef3e

fix in pdftotabular tool
author pieter.lukasse@wur.nl
date Fri, 19 Dec 2014 15:30:13 +0100
parents 637830ac8bcd
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
1 '''
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
2 Created on Mar 13, 2012
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
3
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
4 @author: marcelk
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
5 '''
61
d685210eef3e fix in pdftotabular tool
pieter.lukasse@wur.nl
parents: 26
diff changeset
6 from GCMS.rankfilter_GCMS import pdfread, pdftotabular # @UnresolvedImport
0
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
7 from pkg_resources import resource_filename # @UnresolvedImport # pylint: disable=E0611
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
8 import unittest
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
9
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
10
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
11 class Test(unittest.TestCase):
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
12
61
d685210eef3e fix in pdftotabular tool
pieter.lukasse@wur.nl
parents: 26
diff changeset
13
0
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
14 def setUp(self):
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
15 self.nist_pdf = resource_filename(__name__, "data/NIST_test_PDF.txt")
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
16
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
17 def test_getPDF(self):
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
18 '''
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
19 Tests the reading and parsing of a NIST PDF file
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
20 '''
16
53e1eee93430 Last tested version
pieter.lukasse@wur.nl
parents: 0
diff changeset
21 [hitlist, hitlist_missed] = pdfread.getPDF(self.nist_pdf, True)
0
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
22 rows = [hitlist[row] for row in hitlist.keys()]
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
23 data = [set(row) for row in zip(*rows)]
61
d685210eef3e fix in pdftotabular tool
pieter.lukasse@wur.nl
parents: 26
diff changeset
24 expected_element = set(('12.3', 'Sucrose', '14', 'undef', 'standards 2009', '660', 'not_def',
d685210eef3e fix in pdftotabular tool
pieter.lukasse@wur.nl
parents: 26
diff changeset
25 '18495-0.142537-21284-2.26544e+07-135', '22.6544', '714'))
0
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
26 self.failUnless(expected_element in data)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
27 self.failUnless(len(hitlist_missed) != 0)
26
637830ac8bcd added validation in metexp to tabular tool; added workaround/fix for L and D compound types
pieter.lukasse@wur.nl
parents: 16
diff changeset
28 '''
637830ac8bcd added validation in metexp to tabular tool; added workaround/fix for L and D compound types
pieter.lukasse@wur.nl
parents: 16
diff changeset
29 Check for last (dummy) hit:
61
d685210eef3e fix in pdftotabular tool
pieter.lukasse@wur.nl
parents: 26
diff changeset
30 Hit 6 : (dummy hit)Sorbopyranose, 1,2,3,4,5-pentakis-O-(trimethylsilyl)-, L C21H52O6Si5;MF: 658; RMF: 658; Prob 15.6%; CAS: 30645-02-4; Lib: mainlib; ID: 37062.
26
637830ac8bcd added validation in metexp to tabular tool; added workaround/fix for L and D compound types
pieter.lukasse@wur.nl
parents: 16
diff changeset
31 '''
61
d685210eef3e fix in pdftotabular tool
pieter.lukasse@wur.nl
parents: 26
diff changeset
32 expected_element = set(['C21H52O6Si5', '30645-02-4', 'mainlib', '15.6', '(dummy hit)Sorbopyranose, 1,2,3,4,5-pentakis-O-(trimethylsilyl)-, L C21H52O6Si5', '7298-1-9580-1.29014e+07-9', '658', '12.9014', '37062'])
26
637830ac8bcd added validation in metexp to tabular tool; added workaround/fix for L and D compound types
pieter.lukasse@wur.nl
parents: 16
diff changeset
33 self.failUnless(expected_element in data)
637830ac8bcd added validation in metexp to tabular tool; added workaround/fix for L and D compound types
pieter.lukasse@wur.nl
parents: 16
diff changeset
34
61
d685210eef3e fix in pdftotabular tool
pieter.lukasse@wur.nl
parents: 26
diff changeset
35
d685210eef3e fix in pdftotabular tool
pieter.lukasse@wur.nl
parents: 26
diff changeset
36 def test_pdftotabular(self):
d685210eef3e fix in pdftotabular tool
pieter.lukasse@wur.nl
parents: 26
diff changeset
37 #pdftotabular.convert_pdftotext(resource_filename(__name__, "data/Coffee_suntory_without spectra.pdf"), "Coffee_suntory_without spectra2.txt")
d685210eef3e fix in pdftotabular tool
pieter.lukasse@wur.nl
parents: 26
diff changeset
38 pdfread.convert_pdftotext2tabular(resource_filename(__name__, "data/testfile_2.txt"),
d685210eef3e fix in pdftotabular tool
pieter.lukasse@wur.nl
parents: 26
diff changeset
39 resource_filename(__name__, "data/testfile_2.tab"),
d685210eef3e fix in pdftotabular tool
pieter.lukasse@wur.nl
parents: 26
diff changeset
40 resource_filename(__name__, "data/testfile_2.log"), False)
d685210eef3e fix in pdftotabular tool
pieter.lukasse@wur.nl
parents: 26
diff changeset
41
d685210eef3e fix in pdftotabular tool
pieter.lukasse@wur.nl
parents: 26
diff changeset
42 #read both the reference file and actual output files
d685210eef3e fix in pdftotabular tool
pieter.lukasse@wur.nl
parents: 26
diff changeset
43 expected = _read_file(resource_filename(__name__, "data/testfile_2.tab_ref"))
d685210eef3e fix in pdftotabular tool
pieter.lukasse@wur.nl
parents: 26
diff changeset
44 actual = _read_file(resource_filename(__name__, "data/testfile_2.tab"))
d685210eef3e fix in pdftotabular tool
pieter.lukasse@wur.nl
parents: 26
diff changeset
45
d685210eef3e fix in pdftotabular tool
pieter.lukasse@wur.nl
parents: 26
diff changeset
46 #convert the read in files to lists we can compare
d685210eef3e fix in pdftotabular tool
pieter.lukasse@wur.nl
parents: 26
diff changeset
47 expected = expected.split()
d685210eef3e fix in pdftotabular tool
pieter.lukasse@wur.nl
parents: 26
diff changeset
48 actual = actual.split()
d685210eef3e fix in pdftotabular tool
pieter.lukasse@wur.nl
parents: 26
diff changeset
49
d685210eef3e fix in pdftotabular tool
pieter.lukasse@wur.nl
parents: 26
diff changeset
50 for exp, act in zip(expected, actual):
d685210eef3e fix in pdftotabular tool
pieter.lukasse@wur.nl
parents: 26
diff changeset
51 # compare values
d685210eef3e fix in pdftotabular tool
pieter.lukasse@wur.nl
parents: 26
diff changeset
52 self.failUnlessEqual(exp, act)
0
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
53
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
54 if __name__ == "__main__":
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
55 #import sys;sys.argv = ['', 'Test.test_getPDF']
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
56 unittest.main()
61
d685210eef3e fix in pdftotabular tool
pieter.lukasse@wur.nl
parents: 26
diff changeset
57
d685210eef3e fix in pdftotabular tool
pieter.lukasse@wur.nl
parents: 26
diff changeset
58 def _read_file(filename):
d685210eef3e fix in pdftotabular tool
pieter.lukasse@wur.nl
parents: 26
diff changeset
59 '''
d685210eef3e fix in pdftotabular tool
pieter.lukasse@wur.nl
parents: 26
diff changeset
60 Helper method to quickly read a file
d685210eef3e fix in pdftotabular tool
pieter.lukasse@wur.nl
parents: 26
diff changeset
61 @param filename:
d685210eef3e fix in pdftotabular tool
pieter.lukasse@wur.nl
parents: 26
diff changeset
62 '''
d685210eef3e fix in pdftotabular tool
pieter.lukasse@wur.nl
parents: 26
diff changeset
63 with open(filename) as handle:
d685210eef3e fix in pdftotabular tool
pieter.lukasse@wur.nl
parents: 26
diff changeset
64 return handle.read()