Mercurial > repos > saml > semweb_tools
comparison sparql_import.py @ 3:4b4bbcf5db31 draft
Uploaded
author | saml |
---|---|
date | Wed, 21 Nov 2012 12:46:24 -0500 |
parents | 62cfd14e2520 |
children |
comparison
equal
deleted
inserted
replaced
2:62cfd14e2520 | 3:4b4bbcf5db31 |
---|---|
8 from xml.etree import ElementTree as et | 8 from xml.etree import ElementTree as et |
9 from optparse import OptionParser | 9 from optparse import OptionParser |
10 import urllib, sys, re | 10 import urllib, sys, re |
11 | 11 |
12 # ----------------------- | 12 # ----------------------- |
13 # Option parsing | |
14 # ----------------------- | |
15 | |
16 parser = OptionParser() | |
17 parser.add_option("-u", "--url", | |
18 help = "The URL to the SPARQL endpoint") | |
19 parser.add_option("-q", "--sparql_query", | |
20 help = "A SPARQL query to send to a SPARQL endpoint") | |
21 parser.add_option("-o", "--output_file", | |
22 help = "An output file for storing the results") | |
23 (options, args) = parser.parse_args() | |
24 | |
25 if not options.url: | |
26 sys.exit("You have to specify an URL! Use the -h flag to view command line options!") | |
27 if not options.sparql_query: | |
28 sys.exit("You have to specify a SPARQL query! Use the -h flag to view command line options!") | |
29 if not options.output_file: | |
30 sys.exit("You have to specify an output file! Use the -h flag to view command line options!") | |
31 | |
32 if len(options.sparql_query) < 9: | |
33 sys.exit("Your SPARQL query is too short (printed below)!\n" + options.sparql_query) | |
34 | |
35 if not re.match("^http", options.url): | |
36 sys.exit("The URL has to start with 'http://'! Please try again!") | |
37 | |
38 # ----------------------- | |
39 # The main code | 13 # The main code |
40 # ----------------------- | 14 # ----------------------- |
41 | 15 |
42 def main(): | 16 def main(): |
17 # Parse command line options | |
18 (options, args) = parse_options() | |
19 | |
43 # Extract command line options | 20 # Extract command line options |
44 sparql_query = options.sparql_query | 21 sparql_query = options.sparql_query |
45 sparql_query = sparql_query.replace("__oc__","{") | 22 sparql_query = restore_escaped_chars( sparql_query ) |
46 sparql_query = sparql_query.replace("__ob__","[") | |
47 sparql_query = sparql_query.replace("__cc__","}") | |
48 sparql_query = sparql_query.replace("__cb__","]") | |
49 sparql_query = sparql_query.replace("__cr__"," ") | |
50 sparql_query = sparql_query.replace("__cn__"," ") | |
51 sparql_query = urllib.quote_plus(sparql_query) | 23 sparql_query = urllib.quote_plus(sparql_query) |
52 url = options.url | 24 url = options.url |
53 | |
54 output_file = options.output_file | 25 output_file = options.output_file |
55 | 26 |
56 # Create SPARQL query URL | 27 # Create SPARQL query URL |
57 sparql_query_url = url + "?query=" + sparql_query | 28 sparql_query_url = url + "?query=" + sparql_query |
58 | 29 |
64 # Convert to tabular format | 35 # Convert to tabular format |
65 if "<sparql" in results: | 36 if "<sparql" in results: |
66 xmldata = extract_xml( results ) | 37 xmldata = extract_xml( results ) |
67 tabular = xml_to_tabular( xmldata ) | 38 tabular = xml_to_tabular( xmldata ) |
68 else: | 39 else: |
69 sys.exit("No SPARQL content found in returned data!\nReturned data:\n" + "-"*80 + "\n" + results) | 40 sys.exit("No SPARQL content found in returned data!\nReturned data:\n" + "-"*10 + "\n" + results) |
70 | 41 |
71 # Print to file | 42 # Print to file |
72 of = open(output_file, "w") | 43 of = open(output_file, "w") |
73 of.write(tabular) | 44 of.write(tabular) |
74 of.close() | 45 of.close() |
88 tree = et.ElementTree(root) | 59 tree = et.ElementTree(root) |
89 tabular = "" | 60 tabular = "" |
90 | 61 |
91 results = root.getchildren()[1] | 62 results = root.getchildren()[1] |
92 for result in results: | 63 for result in results: |
93 line_bits = [binding.getchildren()[0].text for binding in result.getchildren()] | 64 line_bits = ['<' + binding.getchildren()[0].text + '>' for binding in result.getchildren()] |
94 line = "\t".join(line_bits) | 65 line = "\t".join(line_bits) |
95 tabular += line + "\n" | 66 tabular += line + "\n" |
96 return tabular | 67 return tabular |
97 | 68 |
69 def restore_escaped_chars( sparql_query ): | |
70 sparql_query = sparql_query.replace("__oc__","{") | |
71 sparql_query = sparql_query.replace("__ob__","[") | |
72 sparql_query = sparql_query.replace("__cc__","}") | |
73 sparql_query = sparql_query.replace("__cb__","]") | |
74 sparql_query = sparql_query.replace("__cr__"," ") | |
75 sparql_query = sparql_query.replace("__cn__"," ") | |
76 sparql_query = sparql_query.replace("__at__","@") | |
77 return sparql_query | |
78 | |
79 def parse_options(): | |
80 parser = OptionParser() | |
81 parser.add_option("-u", "--url", | |
82 help = "The URL to the SPARQL endpoint") | |
83 parser.add_option("-q", "--sparql_query", | |
84 help = "A SPARQL query to send to a SPARQL endpoint") | |
85 parser.add_option("-o", "--output_file", | |
86 help = "An output file for storing the results") | |
87 (options, args) = parser.parse_args() | |
88 | |
89 if not options.url: | |
90 sys.exit("You have to specify an URL! Use the -h flag to view command line options!") | |
91 if not options.sparql_query: | |
92 sys.exit("You have to specify a SPARQL query! Use the -h flag to view command line options!") | |
93 if not options.output_file: | |
94 sys.exit("You have to specify an output file! Use the -h flag to view command line options!") | |
95 | |
96 if len(options.sparql_query) < 9: | |
97 sys.exit("Your SPARQL query is too short (printed below)!\n" + options.sparql_query) | |
98 | |
99 if not re.match("^http", options.url): | |
100 sys.exit("The URL has to start with 'http://'! Please try again!") | |
101 | |
102 return options, args | |
103 | |
98 if __name__ == '__main__': | 104 if __name__ == '__main__': |
99 main() | 105 main() |