2
|
1 #!/usr/bin/python
|
|
2 # --------------------------------------------------------
|
|
3 # A Galaxy plugin for querying external SPARQL Endpoints
|
|
4 # Samuel Lampa, samuel.lampa@gmail.com
|
|
5 # Created: 2012-11-16
|
|
6 # --------------------------------------------------------
|
|
7
|
|
8 from xml.etree import ElementTree as et
|
|
9 from optparse import OptionParser
|
|
10 import urllib, sys, re
|
|
11
|
|
12 # -----------------------
|
|
13 # Option parsing
|
|
14 # -----------------------
|
|
15
|
|
16 parser = OptionParser()
|
|
17 parser.add_option("-u", "--url",
|
|
18 help = "The URL to the SPARQL endpoint")
|
|
19 parser.add_option("-q", "--sparql_query",
|
|
20 help = "A SPARQL query to send to a SPARQL endpoint")
|
|
21 parser.add_option("-o", "--output_file",
|
|
22 help = "An output file for storing the results")
|
|
23 (options, args) = parser.parse_args()
|
|
24
|
|
25 if not options.url:
|
|
26 sys.exit("You have to specify an URL! Use the -h flag to view command line options!")
|
|
27 if not options.sparql_query:
|
|
28 sys.exit("You have to specify a SPARQL query! Use the -h flag to view command line options!")
|
|
29 if not options.output_file:
|
|
30 sys.exit("You have to specify an output file! Use the -h flag to view command line options!")
|
|
31
|
|
32 if len(options.sparql_query) < 9:
|
|
33 sys.exit("Your SPARQL query is too short (printed below)!\n" + options.sparql_query)
|
|
34
|
|
35 if not re.match("^http", options.url):
|
|
36 sys.exit("The URL has to start with 'http://'! Please try again!")
|
|
37
|
|
38 # -----------------------
|
|
39 # The main code
|
|
40 # -----------------------
|
|
41
|
|
42 def main():
|
|
43 # Extract command line options
|
|
44 sparql_query = options.sparql_query
|
|
45 sparql_query = sparql_query.replace("__oc__","{")
|
|
46 sparql_query = sparql_query.replace("__ob__","[")
|
|
47 sparql_query = sparql_query.replace("__cc__","}")
|
|
48 sparql_query = sparql_query.replace("__cb__","]")
|
|
49 sparql_query = sparql_query.replace("__cr__"," ")
|
|
50 sparql_query = sparql_query.replace("__cn__"," ")
|
|
51 sparql_query = urllib.quote_plus(sparql_query)
|
|
52 url = options.url
|
|
53
|
|
54 output_file = options.output_file
|
|
55
|
|
56 # Create SPARQL query URL
|
|
57 sparql_query_url = url + "?query=" + sparql_query
|
|
58
|
|
59 # Read from SPARQL Endpoint
|
|
60 sparql_endpoint = urllib.urlopen(sparql_query_url)
|
|
61 results = sparql_endpoint.read()
|
|
62 sparql_endpoint.close()
|
|
63
|
|
64 # Convert to tabular format
|
|
65 if "<sparql" in results:
|
|
66 xmldata = extract_xml( results )
|
|
67 tabular = xml_to_tabular( xmldata )
|
|
68 else:
|
|
69 sys.exit("No SPARQL content found in returned data!\nReturned data:\n" + "-"*80 + "\n" + results)
|
|
70
|
|
71 # Print to file
|
|
72 of = open(output_file, "w")
|
|
73 of.write(tabular)
|
|
74 of.close()
|
|
75
|
|
76 # -----------------------
|
|
77 # Helper methods
|
|
78 # -----------------------
|
|
79
|
|
80 def extract_xml( content ):
|
|
81 '''Extract the part of the document starting with <?xml ...'''
|
|
82 xmlcontent = re.search("<sparql.*", content, re.DOTALL).group(0)
|
|
83 return xmlcontent
|
|
84
|
|
85 def xml_to_tabular( xmldata ):
|
|
86 '''Convert SPARQL result set XML format to tabular text'''
|
|
87 root = et.fromstring(xmldata)
|
|
88 tree = et.ElementTree(root)
|
|
89 tabular = ""
|
|
90
|
|
91 results = root.getchildren()[1]
|
|
92 for result in results:
|
|
93 line_bits = [binding.getchildren()[0].text for binding in result.getchildren()]
|
|
94 line = "\t".join(line_bits)
|
|
95 tabular += line + "\n"
|
|
96 return tabular
|
|
97
|
|
98 if __name__ == '__main__':
|
|
99 main()
|