diff rankfilter_GCMS/pdftotabular.py @ 61:d685210eef3e

fix in pdftotabular tool
author pieter.lukasse@wur.nl
date Fri, 19 Dec 2014 15:30:13 +0100
parents 9d5f4f5f764b
children
line wrap: on
line diff
--- a/rankfilter_GCMS/pdftotabular.py	Fri Dec 19 11:30:22 2014 +0100
+++ b/rankfilter_GCMS/pdftotabular.py	Fri Dec 19 15:30:13 2014 +0100
@@ -27,8 +27,11 @@
     @param output_file: output text file for the hits    
     '''
     
+    # "-layout" option in pdftotext call below: Maintain (as best as possible) the original physical layout of the text. The 
+    #                                           default is to 'undo' physical layout (columns, hyphenation, etc.) and output 
+    #                                           the text in reading order.
     try:
-        call(["pdftotext", filename, output_file])
+        call(["pdftotext", "-layout", filename, output_file])
     except:
         raise Exception("Error while trying to convert PDF to text")