comparison rankfilter_GCMS/pdftotabular.py @ 0:9d5f4f5f764b

Initial commit to toolshed
author pieter.lukasse@wur.nl
date Thu, 16 Jan 2014 13:10:00 +0100
parents
children d685210eef3e
comparison
equal deleted inserted replaced
-1:000000000000 0:9d5f4f5f764b
1 """
2 Copyright (C) 2013, Pieter Lukasse, Plant Research International, Wageningen
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this software except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15
16 """
17
18 import sys
19 import pdfread
20 from subprocess import call
21
22
23 def convert_pdftotext(filename, output_file):
24 '''
25 Converts PDF file to text
26 @param filename: PDF file to parse
27 @param output_file: output text file for the hits
28 '''
29
30 try:
31 call(["pdftotext", filename, output_file])
32 except:
33 raise Exception("Error while trying to convert PDF to text")
34
35
36
37
38 if __name__ == '__main__':
39 pdf_as_text = sys.argv[1]+".txt"
40 convert_pdftotext(sys.argv[1], pdf_as_text)
41 pdfread.convert_pdftotext2tabular(pdf_as_text, sys.argv[2], sys.argv[3], False)