2
|
1 #!/usr/bin/python
|
|
2 # --------------------------------------------------------
|
|
3 # A Galaxy plugin for querying external SPARQL Endpoints
|
|
4 # Samuel Lampa, samuel.lampa@gmail.com
|
|
5 # Created: 2012-11-16
|
|
6 # --------------------------------------------------------
|
|
7
|
|
8 from xml.etree import ElementTree as et
|
|
9 from optparse import OptionParser
|
|
10 import urllib, sys, re
|
|
11
|
|
12 # -----------------------
|
|
13 # The main code
|
|
14 # -----------------------
|
|
15
|
|
16 def main():
|
3
|
17 # Parse command line options
|
|
18 (options, args) = parse_options()
|
|
19
|
2
|
20 # Extract command line options
|
|
21 sparql_query = options.sparql_query
|
3
|
22 sparql_query = restore_escaped_chars( sparql_query )
|
2
|
23 sparql_query = urllib.quote_plus(sparql_query)
|
|
24 url = options.url
|
|
25 output_file = options.output_file
|
|
26
|
|
27 # Create SPARQL query URL
|
|
28 sparql_query_url = url + "?query=" + sparql_query
|
|
29
|
|
30 # Read from SPARQL Endpoint
|
|
31 sparql_endpoint = urllib.urlopen(sparql_query_url)
|
|
32 results = sparql_endpoint.read()
|
|
33 sparql_endpoint.close()
|
|
34
|
|
35 # Convert to tabular format
|
|
36 if "<sparql" in results:
|
|
37 xmldata = extract_xml( results )
|
|
38 tabular = xml_to_tabular( xmldata )
|
|
39 else:
|
3
|
40 sys.exit("No SPARQL content found in returned data!\nReturned data:\n" + "-"*10 + "\n" + results)
|
2
|
41
|
|
42 # Print to file
|
|
43 of = open(output_file, "w")
|
|
44 of.write(tabular)
|
|
45 of.close()
|
|
46
|
|
47 # -----------------------
|
|
48 # Helper methods
|
|
49 # -----------------------
|
|
50
|
|
51 def extract_xml( content ):
|
|
52 '''Extract the part of the document starting with <?xml ...'''
|
|
53 xmlcontent = re.search("<sparql.*", content, re.DOTALL).group(0)
|
|
54 return xmlcontent
|
|
55
|
|
56 def xml_to_tabular( xmldata ):
|
|
57 '''Convert SPARQL result set XML format to tabular text'''
|
|
58 root = et.fromstring(xmldata)
|
|
59 tree = et.ElementTree(root)
|
|
60 tabular = ""
|
|
61
|
|
62 results = root.getchildren()[1]
|
|
63 for result in results:
|
3
|
64 line_bits = ['<' + binding.getchildren()[0].text + '>' for binding in result.getchildren()]
|
2
|
65 line = "\t".join(line_bits)
|
|
66 tabular += line + "\n"
|
|
67 return tabular
|
|
68
|
3
|
69 def restore_escaped_chars( sparql_query ):
|
|
70 sparql_query = sparql_query.replace("__oc__","{")
|
|
71 sparql_query = sparql_query.replace("__ob__","[")
|
|
72 sparql_query = sparql_query.replace("__cc__","}")
|
|
73 sparql_query = sparql_query.replace("__cb__","]")
|
|
74 sparql_query = sparql_query.replace("__cr__"," ")
|
|
75 sparql_query = sparql_query.replace("__cn__"," ")
|
|
76 sparql_query = sparql_query.replace("__at__","@")
|
|
77 return sparql_query
|
|
78
|
|
79 def parse_options():
|
|
80 parser = OptionParser()
|
|
81 parser.add_option("-u", "--url",
|
|
82 help = "The URL to the SPARQL endpoint")
|
|
83 parser.add_option("-q", "--sparql_query",
|
|
84 help = "A SPARQL query to send to a SPARQL endpoint")
|
|
85 parser.add_option("-o", "--output_file",
|
|
86 help = "An output file for storing the results")
|
|
87 (options, args) = parser.parse_args()
|
|
88
|
|
89 if not options.url:
|
|
90 sys.exit("You have to specify an URL! Use the -h flag to view command line options!")
|
|
91 if not options.sparql_query:
|
|
92 sys.exit("You have to specify a SPARQL query! Use the -h flag to view command line options!")
|
|
93 if not options.output_file:
|
|
94 sys.exit("You have to specify an output file! Use the -h flag to view command line options!")
|
|
95
|
|
96 if len(options.sparql_query) < 9:
|
|
97 sys.exit("Your SPARQL query is too short (printed below)!\n" + options.sparql_query)
|
|
98
|
|
99 if not re.match("^http", options.url):
|
|
100 sys.exit("The URL has to start with 'http://'! Please try again!")
|
|
101
|
|
102 return options, args
|
|
103
|
2
|
104 if __name__ == '__main__':
|
|
105 main()
|