Mercurial > repos > saml > semweb_tools
comparison sparql_import.py @ 2:62cfd14e2520 draft
Uploaded
| author | saml |
|---|---|
| date | Wed, 21 Nov 2012 12:21:20 -0500 |
| parents | |
| children | 4b4bbcf5db31 |
comparison
equal
deleted
inserted
replaced
| 1:5972a5799e8f | 2:62cfd14e2520 |
|---|---|
| 1 #!/usr/bin/python | |
| 2 # -------------------------------------------------------- | |
| 3 # A Galaxy plugin for querying external SPARQL Endpoints | |
| 4 # Samuel Lampa, samuel.lampa@gmail.com | |
| 5 # Created: 2012-11-16 | |
| 6 # -------------------------------------------------------- | |
| 7 | |
| 8 from xml.etree import ElementTree as et | |
| 9 from optparse import OptionParser | |
| 10 import urllib, sys, re | |
| 11 | |
| 12 # ----------------------- | |
| 13 # Option parsing | |
| 14 # ----------------------- | |
| 15 | |
| 16 parser = OptionParser() | |
| 17 parser.add_option("-u", "--url", | |
| 18 help = "The URL to the SPARQL endpoint") | |
| 19 parser.add_option("-q", "--sparql_query", | |
| 20 help = "A SPARQL query to send to a SPARQL endpoint") | |
| 21 parser.add_option("-o", "--output_file", | |
| 22 help = "An output file for storing the results") | |
| 23 (options, args) = parser.parse_args() | |
| 24 | |
| 25 if not options.url: | |
| 26 sys.exit("You have to specify an URL! Use the -h flag to view command line options!") | |
| 27 if not options.sparql_query: | |
| 28 sys.exit("You have to specify a SPARQL query! Use the -h flag to view command line options!") | |
| 29 if not options.output_file: | |
| 30 sys.exit("You have to specify an output file! Use the -h flag to view command line options!") | |
| 31 | |
| 32 if len(options.sparql_query) < 9: | |
| 33 sys.exit("Your SPARQL query is too short (printed below)!\n" + options.sparql_query) | |
| 34 | |
| 35 if not re.match("^http", options.url): | |
| 36 sys.exit("The URL has to start with 'http://'! Please try again!") | |
| 37 | |
| 38 # ----------------------- | |
| 39 # The main code | |
| 40 # ----------------------- | |
| 41 | |
| 42 def main(): | |
| 43 # Extract command line options | |
| 44 sparql_query = options.sparql_query | |
| 45 sparql_query = sparql_query.replace("__oc__","{") | |
| 46 sparql_query = sparql_query.replace("__ob__","[") | |
| 47 sparql_query = sparql_query.replace("__cc__","}") | |
| 48 sparql_query = sparql_query.replace("__cb__","]") | |
| 49 sparql_query = sparql_query.replace("__cr__"," ") | |
| 50 sparql_query = sparql_query.replace("__cn__"," ") | |
| 51 sparql_query = urllib.quote_plus(sparql_query) | |
| 52 url = options.url | |
| 53 | |
| 54 output_file = options.output_file | |
| 55 | |
| 56 # Create SPARQL query URL | |
| 57 sparql_query_url = url + "?query=" + sparql_query | |
| 58 | |
| 59 # Read from SPARQL Endpoint | |
| 60 sparql_endpoint = urllib.urlopen(sparql_query_url) | |
| 61 results = sparql_endpoint.read() | |
| 62 sparql_endpoint.close() | |
| 63 | |
| 64 # Convert to tabular format | |
| 65 if "<sparql" in results: | |
| 66 xmldata = extract_xml( results ) | |
| 67 tabular = xml_to_tabular( xmldata ) | |
| 68 else: | |
| 69 sys.exit("No SPARQL content found in returned data!\nReturned data:\n" + "-"*80 + "\n" + results) | |
| 70 | |
| 71 # Print to file | |
| 72 of = open(output_file, "w") | |
| 73 of.write(tabular) | |
| 74 of.close() | |
| 75 | |
| 76 # ----------------------- | |
| 77 # Helper methods | |
| 78 # ----------------------- | |
| 79 | |
| 80 def extract_xml( content ): | |
| 81 '''Extract the part of the document starting with <?xml ...''' | |
| 82 xmlcontent = re.search("<sparql.*", content, re.DOTALL).group(0) | |
| 83 return xmlcontent | |
| 84 | |
| 85 def xml_to_tabular( xmldata ): | |
| 86 '''Convert SPARQL result set XML format to tabular text''' | |
| 87 root = et.fromstring(xmldata) | |
| 88 tree = et.ElementTree(root) | |
| 89 tabular = "" | |
| 90 | |
| 91 results = root.getchildren()[1] | |
| 92 for result in results: | |
| 93 line_bits = [binding.getchildren()[0].text for binding in result.getchildren()] | |
| 94 line = "\t".join(line_bits) | |
| 95 tabular += line + "\n" | |
| 96 return tabular | |
| 97 | |
| 98 if __name__ == '__main__': | |
| 99 main() |
