| 2 | 1 #!/usr/bin/python | 
|  | 2 # -------------------------------------------------------- | 
|  | 3 # A Galaxy plugin for querying external SPARQL Endpoints | 
|  | 4 # Samuel Lampa, samuel.lampa@gmail.com | 
|  | 5 # Created: 2012-11-16 | 
|  | 6 # -------------------------------------------------------- | 
|  | 7 | 
|  | 8 from xml.etree import ElementTree as et | 
|  | 9 from optparse import OptionParser | 
|  | 10 import urllib, sys, re | 
|  | 11 | 
|  | 12 # ----------------------- | 
|  | 13 # The main code | 
|  | 14 # ----------------------- | 
|  | 15 | 
|  | 16 def main(): | 
| 3 | 17 	# Parse command line options | 
|  | 18 	(options, args) = parse_options() | 
|  | 19 | 
| 2 | 20 	# Extract command line options | 
|  | 21 	sparql_query = options.sparql_query | 
| 3 | 22 	sparql_query = restore_escaped_chars( sparql_query ) | 
| 2 | 23 	sparql_query = urllib.quote_plus(sparql_query) | 
|  | 24 	url = options.url | 
|  | 25 	output_file = options.output_file | 
|  | 26 | 
|  | 27 	# Create SPARQL query URL | 
|  | 28 	sparql_query_url = url + "?query=" + sparql_query | 
|  | 29 | 
|  | 30 	# Read from SPARQL Endpoint | 
|  | 31 	sparql_endpoint = urllib.urlopen(sparql_query_url) | 
|  | 32 	results = sparql_endpoint.read() | 
|  | 33 	sparql_endpoint.close() | 
|  | 34 | 
|  | 35         # Convert to tabular format | 
|  | 36         if "<sparql" in results: | 
|  | 37                 xmldata = extract_xml( results ) | 
|  | 38                 tabular = xml_to_tabular( xmldata ) | 
|  | 39         else: | 
| 3 | 40                 sys.exit("No SPARQL content found in returned data!\nReturned data:\n" + "-"*10 + "\n" + results) | 
| 2 | 41 | 
|  | 42 	# Print to file | 
|  | 43 	of = open(output_file, "w") | 
|  | 44 	of.write(tabular) | 
|  | 45 	of.close() | 
|  | 46 | 
|  | 47 # ----------------------- | 
|  | 48 # Helper methods | 
|  | 49 # ----------------------- | 
|  | 50 | 
|  | 51 def extract_xml( content ): | 
|  | 52 	'''Extract the part of the document starting with <?xml ...''' | 
|  | 53 	xmlcontent = re.search("<sparql.*", content, re.DOTALL).group(0) | 
|  | 54 	return xmlcontent | 
|  | 55 | 
|  | 56 def xml_to_tabular( xmldata ): | 
|  | 57 	'''Convert SPARQL result set XML format to tabular text''' | 
|  | 58 	root = et.fromstring(xmldata) | 
|  | 59 	tree = et.ElementTree(root) | 
|  | 60 	tabular = "" | 
|  | 61 | 
|  | 62 	results = root.getchildren()[1] | 
|  | 63 	for result in results: | 
| 3 | 64 		line_bits = ['<' + binding.getchildren()[0].text + '>' for binding in result.getchildren()] | 
| 2 | 65 		line = "\t".join(line_bits) | 
|  | 66 		tabular += line + "\n" | 
|  | 67 	return tabular | 
|  | 68 | 
| 3 | 69 def restore_escaped_chars( sparql_query ): | 
|  | 70 	sparql_query = sparql_query.replace("__oc__","{") | 
|  | 71 	sparql_query = sparql_query.replace("__ob__","[") | 
|  | 72 	sparql_query = sparql_query.replace("__cc__","}") | 
|  | 73 	sparql_query = sparql_query.replace("__cb__","]") | 
|  | 74 	sparql_query = sparql_query.replace("__cr__"," ") | 
|  | 75 	sparql_query = sparql_query.replace("__cn__"," ") | 
|  | 76 	sparql_query = sparql_query.replace("__at__","@") | 
|  | 77 	return sparql_query | 
|  | 78 | 
|  | 79 def parse_options(): | 
|  | 80 	parser = OptionParser() | 
|  | 81 	parser.add_option("-u", "--url", | 
|  | 82 		help = "The URL to the SPARQL endpoint") | 
|  | 83 	parser.add_option("-q", "--sparql_query", | 
|  | 84 		help = "A SPARQL query to send to a SPARQL endpoint") | 
|  | 85 	parser.add_option("-o", "--output_file", | 
|  | 86 		help = "An output file for storing the results") | 
|  | 87 	(options, args) = parser.parse_args() | 
|  | 88 | 
|  | 89 	if not options.url: | 
|  | 90 		sys.exit("You have to specify an URL! Use the -h flag to view command line options!") | 
|  | 91 	if not options.sparql_query: | 
|  | 92 		sys.exit("You have to specify a SPARQL query! Use the -h flag to view command line options!") | 
|  | 93 	if not options.output_file: | 
|  | 94 		sys.exit("You have to specify an output file! Use the -h flag to view command line options!") | 
|  | 95 | 
|  | 96 	if len(options.sparql_query) < 9: | 
|  | 97 		sys.exit("Your SPARQL query is too short (printed below)!\n" + options.sparql_query) | 
|  | 98 | 
|  | 99 	if not re.match("^http", options.url): | 
|  | 100 		sys.exit("The URL has to start with 'http://'! Please try again!") | 
|  | 101 | 
|  | 102 	return options, args | 
|  | 103 | 
| 2 | 104 if __name__ == '__main__': | 
|  | 105 	main() |