| 2 | 1 #!/usr/bin/python | 
|  | 2 # -------------------------------------------------------- | 
|  | 3 # A Galaxy plugin for querying external SPARQL Endpoints | 
|  | 4 # Samuel Lampa, samuel.lampa@gmail.com | 
|  | 5 # Created: 2012-11-16 | 
|  | 6 # -------------------------------------------------------- | 
|  | 7 | 
|  | 8 from xml.etree import ElementTree as et | 
|  | 9 from optparse import OptionParser | 
|  | 10 import urllib, sys, re | 
|  | 11 | 
|  | 12 # ----------------------- | 
|  | 13 # Option parsing | 
|  | 14 # ----------------------- | 
|  | 15 | 
|  | 16 parser = OptionParser() | 
|  | 17 parser.add_option("-u", "--url", | 
|  | 18 	help = "The URL to the SPARQL endpoint") | 
|  | 19 parser.add_option("-q", "--sparql_query", | 
|  | 20 	help = "A SPARQL query to send to a SPARQL endpoint") | 
|  | 21 parser.add_option("-o", "--output_file", | 
|  | 22 	help = "An output file for storing the results") | 
|  | 23 (options, args) = parser.parse_args() | 
|  | 24 | 
|  | 25 if not options.url: | 
|  | 26 	sys.exit("You have to specify an URL! Use the -h flag to view command line options!") | 
|  | 27 if not options.sparql_query: | 
|  | 28 	sys.exit("You have to specify a SPARQL query! Use the -h flag to view command line options!") | 
|  | 29 if not options.output_file: | 
|  | 30 	sys.exit("You have to specify an output file! Use the -h flag to view command line options!") | 
|  | 31 | 
|  | 32 if len(options.sparql_query) < 9: | 
|  | 33 	sys.exit("Your SPARQL query is too short (printed below)!\n" + options.sparql_query) | 
|  | 34 | 
|  | 35 if not re.match("^http", options.url): | 
|  | 36 	sys.exit("The URL has to start with 'http://'! Please try again!") | 
|  | 37 | 
|  | 38 # ----------------------- | 
|  | 39 # The main code | 
|  | 40 # ----------------------- | 
|  | 41 | 
|  | 42 def main(): | 
|  | 43 	# Extract command line options | 
|  | 44 	sparql_query = options.sparql_query | 
|  | 45 	sparql_query = sparql_query.replace("__oc__","{") | 
|  | 46 	sparql_query = sparql_query.replace("__ob__","[") | 
|  | 47 	sparql_query = sparql_query.replace("__cc__","}") | 
|  | 48 	sparql_query = sparql_query.replace("__cb__","]") | 
|  | 49 	sparql_query = sparql_query.replace("__cr__"," ") | 
|  | 50 	sparql_query = sparql_query.replace("__cn__"," ") | 
|  | 51 	sparql_query = urllib.quote_plus(sparql_query) | 
|  | 52 	url = options.url | 
|  | 53 | 
|  | 54 	output_file = options.output_file | 
|  | 55 | 
|  | 56 	# Create SPARQL query URL | 
|  | 57 	sparql_query_url = url + "?query=" + sparql_query | 
|  | 58 | 
|  | 59 	# Read from SPARQL Endpoint | 
|  | 60 	sparql_endpoint = urllib.urlopen(sparql_query_url) | 
|  | 61 	results = sparql_endpoint.read() | 
|  | 62 	sparql_endpoint.close() | 
|  | 63 | 
|  | 64         # Convert to tabular format | 
|  | 65         if "<sparql" in results: | 
|  | 66                 xmldata = extract_xml( results ) | 
|  | 67                 tabular = xml_to_tabular( xmldata ) | 
|  | 68         else: | 
|  | 69                 sys.exit("No SPARQL content found in returned data!\nReturned data:\n" + "-"*80 + "\n" + results) | 
|  | 70 | 
|  | 71 	# Print to file | 
|  | 72 	of = open(output_file, "w") | 
|  | 73 	of.write(tabular) | 
|  | 74 	of.close() | 
|  | 75 | 
|  | 76 # ----------------------- | 
|  | 77 # Helper methods | 
|  | 78 # ----------------------- | 
|  | 79 | 
|  | 80 def extract_xml( content ): | 
|  | 81 	'''Extract the part of the document starting with <?xml ...''' | 
|  | 82 	xmlcontent = re.search("<sparql.*", content, re.DOTALL).group(0) | 
|  | 83 	return xmlcontent | 
|  | 84 | 
|  | 85 def xml_to_tabular( xmldata ): | 
|  | 86 	'''Convert SPARQL result set XML format to tabular text''' | 
|  | 87 	root = et.fromstring(xmldata) | 
|  | 88 	tree = et.ElementTree(root) | 
|  | 89 	tabular = "" | 
|  | 90 | 
|  | 91 	results = root.getchildren()[1] | 
|  | 92 	for result in results: | 
|  | 93 		line_bits = [binding.getchildren()[0].text for binding in result.getchildren()] | 
|  | 94 		line = "\t".join(line_bits) | 
|  | 95 		tabular += line + "\n" | 
|  | 96 	return tabular | 
|  | 97 | 
|  | 98 if __name__ == '__main__': | 
|  | 99 	main() |