Mercurial > repos > saml > semweb_tools
comparison sparql_import.py @ 2:62cfd14e2520 draft
Uploaded
author | saml |
---|---|
date | Wed, 21 Nov 2012 12:21:20 -0500 |
parents | |
children | 4b4bbcf5db31 |
comparison
equal
deleted
inserted
replaced
1:5972a5799e8f | 2:62cfd14e2520 |
---|---|
1 #!/usr/bin/python | |
2 # -------------------------------------------------------- | |
3 # A Galaxy plugin for querying external SPARQL Endpoints | |
4 # Samuel Lampa, samuel.lampa@gmail.com | |
5 # Created: 2012-11-16 | |
6 # -------------------------------------------------------- | |
7 | |
8 from xml.etree import ElementTree as et | |
9 from optparse import OptionParser | |
10 import urllib, sys, re | |
11 | |
12 # ----------------------- | |
13 # Option parsing | |
14 # ----------------------- | |
15 | |
16 parser = OptionParser() | |
17 parser.add_option("-u", "--url", | |
18 help = "The URL to the SPARQL endpoint") | |
19 parser.add_option("-q", "--sparql_query", | |
20 help = "A SPARQL query to send to a SPARQL endpoint") | |
21 parser.add_option("-o", "--output_file", | |
22 help = "An output file for storing the results") | |
23 (options, args) = parser.parse_args() | |
24 | |
25 if not options.url: | |
26 sys.exit("You have to specify an URL! Use the -h flag to view command line options!") | |
27 if not options.sparql_query: | |
28 sys.exit("You have to specify a SPARQL query! Use the -h flag to view command line options!") | |
29 if not options.output_file: | |
30 sys.exit("You have to specify an output file! Use the -h flag to view command line options!") | |
31 | |
32 if len(options.sparql_query) < 9: | |
33 sys.exit("Your SPARQL query is too short (printed below)!\n" + options.sparql_query) | |
34 | |
35 if not re.match("^http", options.url): | |
36 sys.exit("The URL has to start with 'http://'! Please try again!") | |
37 | |
38 # ----------------------- | |
39 # The main code | |
40 # ----------------------- | |
41 | |
42 def main(): | |
43 # Extract command line options | |
44 sparql_query = options.sparql_query | |
45 sparql_query = sparql_query.replace("__oc__","{") | |
46 sparql_query = sparql_query.replace("__ob__","[") | |
47 sparql_query = sparql_query.replace("__cc__","}") | |
48 sparql_query = sparql_query.replace("__cb__","]") | |
49 sparql_query = sparql_query.replace("__cr__"," ") | |
50 sparql_query = sparql_query.replace("__cn__"," ") | |
51 sparql_query = urllib.quote_plus(sparql_query) | |
52 url = options.url | |
53 | |
54 output_file = options.output_file | |
55 | |
56 # Create SPARQL query URL | |
57 sparql_query_url = url + "?query=" + sparql_query | |
58 | |
59 # Read from SPARQL Endpoint | |
60 sparql_endpoint = urllib.urlopen(sparql_query_url) | |
61 results = sparql_endpoint.read() | |
62 sparql_endpoint.close() | |
63 | |
64 # Convert to tabular format | |
65 if "<sparql" in results: | |
66 xmldata = extract_xml( results ) | |
67 tabular = xml_to_tabular( xmldata ) | |
68 else: | |
69 sys.exit("No SPARQL content found in returned data!\nReturned data:\n" + "-"*80 + "\n" + results) | |
70 | |
71 # Print to file | |
72 of = open(output_file, "w") | |
73 of.write(tabular) | |
74 of.close() | |
75 | |
76 # ----------------------- | |
77 # Helper methods | |
78 # ----------------------- | |
79 | |
80 def extract_xml( content ): | |
81 '''Extract the part of the document starting with <?xml ...''' | |
82 xmlcontent = re.search("<sparql.*", content, re.DOTALL).group(0) | |
83 return xmlcontent | |
84 | |
85 def xml_to_tabular( xmldata ): | |
86 '''Convert SPARQL result set XML format to tabular text''' | |
87 root = et.fromstring(xmldata) | |
88 tree = et.ElementTree(root) | |
89 tabular = "" | |
90 | |
91 results = root.getchildren()[1] | |
92 for result in results: | |
93 line_bits = [binding.getchildren()[0].text for binding in result.getchildren()] | |
94 line = "\t".join(line_bits) | |
95 tabular += line + "\n" | |
96 return tabular | |
97 | |
98 if __name__ == '__main__': | |
99 main() |