comparison sparql_import.py @ 3:4b4bbcf5db31 draft

Uploaded
author saml
date Wed, 21 Nov 2012 12:46:24 -0500
parents 62cfd14e2520
children
comparison
equal deleted inserted replaced
2:62cfd14e2520 3:4b4bbcf5db31
8 from xml.etree import ElementTree as et 8 from xml.etree import ElementTree as et
9 from optparse import OptionParser 9 from optparse import OptionParser
10 import urllib, sys, re 10 import urllib, sys, re
11 11
12 # ----------------------- 12 # -----------------------
13 # Option parsing
14 # -----------------------
15
16 parser = OptionParser()
17 parser.add_option("-u", "--url",
18 help = "The URL to the SPARQL endpoint")
19 parser.add_option("-q", "--sparql_query",
20 help = "A SPARQL query to send to a SPARQL endpoint")
21 parser.add_option("-o", "--output_file",
22 help = "An output file for storing the results")
23 (options, args) = parser.parse_args()
24
25 if not options.url:
26 sys.exit("You have to specify an URL! Use the -h flag to view command line options!")
27 if not options.sparql_query:
28 sys.exit("You have to specify a SPARQL query! Use the -h flag to view command line options!")
29 if not options.output_file:
30 sys.exit("You have to specify an output file! Use the -h flag to view command line options!")
31
32 if len(options.sparql_query) < 9:
33 sys.exit("Your SPARQL query is too short (printed below)!\n" + options.sparql_query)
34
35 if not re.match("^http", options.url):
36 sys.exit("The URL has to start with 'http://'! Please try again!")
37
38 # -----------------------
39 # The main code 13 # The main code
40 # ----------------------- 14 # -----------------------
41 15
42 def main(): 16 def main():
17 # Parse command line options
18 (options, args) = parse_options()
19
43 # Extract command line options 20 # Extract command line options
44 sparql_query = options.sparql_query 21 sparql_query = options.sparql_query
45 sparql_query = sparql_query.replace("__oc__","{") 22 sparql_query = restore_escaped_chars( sparql_query )
46 sparql_query = sparql_query.replace("__ob__","[")
47 sparql_query = sparql_query.replace("__cc__","}")
48 sparql_query = sparql_query.replace("__cb__","]")
49 sparql_query = sparql_query.replace("__cr__"," ")
50 sparql_query = sparql_query.replace("__cn__"," ")
51 sparql_query = urllib.quote_plus(sparql_query) 23 sparql_query = urllib.quote_plus(sparql_query)
52 url = options.url 24 url = options.url
53
54 output_file = options.output_file 25 output_file = options.output_file
55 26
56 # Create SPARQL query URL 27 # Create SPARQL query URL
57 sparql_query_url = url + "?query=" + sparql_query 28 sparql_query_url = url + "?query=" + sparql_query
58 29
64 # Convert to tabular format 35 # Convert to tabular format
65 if "<sparql" in results: 36 if "<sparql" in results:
66 xmldata = extract_xml( results ) 37 xmldata = extract_xml( results )
67 tabular = xml_to_tabular( xmldata ) 38 tabular = xml_to_tabular( xmldata )
68 else: 39 else:
69 sys.exit("No SPARQL content found in returned data!\nReturned data:\n" + "-"*80 + "\n" + results) 40 sys.exit("No SPARQL content found in returned data!\nReturned data:\n" + "-"*10 + "\n" + results)
70 41
71 # Print to file 42 # Print to file
72 of = open(output_file, "w") 43 of = open(output_file, "w")
73 of.write(tabular) 44 of.write(tabular)
74 of.close() 45 of.close()
88 tree = et.ElementTree(root) 59 tree = et.ElementTree(root)
89 tabular = "" 60 tabular = ""
90 61
91 results = root.getchildren()[1] 62 results = root.getchildren()[1]
92 for result in results: 63 for result in results:
93 line_bits = [binding.getchildren()[0].text for binding in result.getchildren()] 64 line_bits = ['<' + binding.getchildren()[0].text + '>' for binding in result.getchildren()]
94 line = "\t".join(line_bits) 65 line = "\t".join(line_bits)
95 tabular += line + "\n" 66 tabular += line + "\n"
96 return tabular 67 return tabular
97 68
69 def restore_escaped_chars( sparql_query ):
70 sparql_query = sparql_query.replace("__oc__","{")
71 sparql_query = sparql_query.replace("__ob__","[")
72 sparql_query = sparql_query.replace("__cc__","}")
73 sparql_query = sparql_query.replace("__cb__","]")
74 sparql_query = sparql_query.replace("__cr__"," ")
75 sparql_query = sparql_query.replace("__cn__"," ")
76 sparql_query = sparql_query.replace("__at__","@")
77 return sparql_query
78
79 def parse_options():
80 parser = OptionParser()
81 parser.add_option("-u", "--url",
82 help = "The URL to the SPARQL endpoint")
83 parser.add_option("-q", "--sparql_query",
84 help = "A SPARQL query to send to a SPARQL endpoint")
85 parser.add_option("-o", "--output_file",
86 help = "An output file for storing the results")
87 (options, args) = parser.parse_args()
88
89 if not options.url:
90 sys.exit("You have to specify an URL! Use the -h flag to view command line options!")
91 if not options.sparql_query:
92 sys.exit("You have to specify a SPARQL query! Use the -h flag to view command line options!")
93 if not options.output_file:
94 sys.exit("You have to specify an output file! Use the -h flag to view command line options!")
95
96 if len(options.sparql_query) < 9:
97 sys.exit("Your SPARQL query is too short (printed below)!\n" + options.sparql_query)
98
99 if not re.match("^http", options.url):
100 sys.exit("The URL has to start with 'http://'! Please try again!")
101
102 return options, args
103
98 if __name__ == '__main__': 104 if __name__ == '__main__':
99 main() 105 main()