Mercurial > repos > saml > semweb_tools
changeset 1:5972a5799e8f draft
Deleted selected files
author | saml |
---|---|
date | Wed, 21 Nov 2012 12:20:28 -0500 |
parents | bd5caa8e2290 |
children | 62cfd14e2520 |
files | semweb_tools/README.md semweb_tools/sparql_import.py semweb_tools/sparql_import.xml |
diffstat | 3 files changed, 0 insertions(+), 163 deletions(-) [+] |
line wrap: on
line diff
--- a/semweb_tools/README.md Wed Nov 21 12:19:18 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,20 +0,0 @@ -# Semantic Web Tools for Galaxy # - -## SPARQL Import tool ## -This [Galaxy (bioinformatics web portal)](http://galaxyproject.org/) toolkit currently just contains a tool that enables to specify an URL to a SPARQL endpoint, and a SPARQL query, and to save the returned data in tabular format, for further processing in Galaxy's tools. - -## Installation ## - -1. Create a folder "semweb_tools" under [galaxy_root]/tools/ and add these files. -2. Add the following section right under the starting <toolbox> tag in [galaxy_root]/tool_conf.xml: - -```` - <section name="Semantic Web Tools" id="semweb_tools"> - <tool file="semweb_tools/sparql_import.xml" /> - </section> -```` - -## Change log ## - -* 2012-11-16: Now works properly (at least for my simple test case)! -* 2012-11-16: Created today, still under heavy development!
--- a/semweb_tools/sparql_import.py Wed Nov 21 12:19:18 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,99 +0,0 @@ -#!/usr/bin/python -# -------------------------------------------------------- -# A Galaxy plugin for querying external SPARQL Endpoints -# Samuel Lampa, samuel.lampa@gmail.com -# Created: 2012-11-16 -# -------------------------------------------------------- - -from xml.etree import ElementTree as et -from optparse import OptionParser -import urllib, sys, re - -# ----------------------- -# Option parsing -# ----------------------- - -parser = OptionParser() -parser.add_option("-u", "--url", - help = "The URL to the SPARQL endpoint") -parser.add_option("-q", "--sparql_query", - help = "A SPARQL query to send to a SPARQL endpoint") -parser.add_option("-o", "--output_file", - help = "An output file for storing the results") -(options, args) = parser.parse_args() - -if not options.url: - sys.exit("You have to specify an URL! Use the -h flag to view command line options!") -if not options.sparql_query: - sys.exit("You have to specify a SPARQL query! Use the -h flag to view command line options!") -if not options.output_file: - sys.exit("You have to specify an output file! Use the -h flag to view command line options!") - -if len(options.sparql_query) < 9: - sys.exit("Your SPARQL query is too short (printed below)!\n" + options.sparql_query) - -if not re.match("^http", options.url): - sys.exit("The URL has to start with 'http://'! Please try again!") - -# ----------------------- -# The main code -# ----------------------- - -def main(): - # Extract command line options - sparql_query = options.sparql_query - sparql_query = sparql_query.replace("__oc__","{") - sparql_query = sparql_query.replace("__ob__","[") - sparql_query = sparql_query.replace("__cc__","}") - sparql_query = sparql_query.replace("__cb__","]") - sparql_query = sparql_query.replace("__cr__"," ") - sparql_query = sparql_query.replace("__cn__"," ") - sparql_query = urllib.quote_plus(sparql_query) - url = options.url - - output_file = options.output_file - - # Create SPARQL query URL - sparql_query_url = url + "?query=" + sparql_query - - # Read from SPARQL Endpoint - sparql_endpoint = urllib.urlopen(sparql_query_url) - results = sparql_endpoint.read() - sparql_endpoint.close() - - # Convert to tabular format - if "<sparql" in results: - xmldata = extract_xml( results ) - tabular = xml_to_tabular( xmldata ) - else: - sys.exit("No SPARQL content found in returned data!\nReturned data:\n" + "-"*80 + "\n" + results) - - # Print to file - of = open(output_file, "w") - of.write(tabular) - of.close() - -# ----------------------- -# Helper methods -# ----------------------- - -def extract_xml( content ): - '''Extract the part of the document starting with <?xml ...''' - xmlcontent = re.search("<sparql.*", content, re.DOTALL).group(0) - return xmlcontent - -def xml_to_tabular( xmldata ): - '''Convert SPARQL result set XML format to tabular text''' - root = et.fromstring(xmldata) - tree = et.ElementTree(root) - tabular = "" - - results = root.getchildren()[1] - for result in results: - line_bits = [binding.getchildren()[0].text for binding in result.getchildren()] - line = "\t".join(line_bits) - tabular += line + "\n" - return tabular - -if __name__ == '__main__': - main()
--- a/semweb_tools/sparql_import.xml Wed Nov 21 12:19:18 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,44 +0,0 @@ -<tool id="sparql_import" name="SPARQL Import" version="1.0.0"> - <description>Import data from a SPARQL Endpoint</description> - <command interpreter="python">sparql_import.py -u $url -q "$sparqlquery" -o $outfile</command> - <inputs> - <param name="url" type="text" format="text" size="80" label="SPARQL Endpoint URL"/> - <param name="sparqlquery" type="text" format="text" area="True" size="5x80" label="SPARQL Query"/> - </inputs> - <outputs> - <data name="outfile" format="tabular"/> - </outputs> - <help> -**What it does** - -The SPARQL import executes a SPARQL query against a SPARQL endpoint and returns the results in tabular format, with one column per variable in the SPARQL query. - -------- - -**Example** - -You could for example use this SPARQL Endpoint URL: - -http://sparql.wikipathways.org/ - -and the following SPARQL query, which will select 25 sets of "subject", "predicate" and "object" out of the database, with no filtering at all:: - - SELECT ?s ?p ?o - WHERE { ?s ?p ?o } - LIMIT 10 - -This will generate a 3-column tabular output like this:: - - http://identifiers.org/pubmed/12860264 http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://vocabularies.wikipathways.org/wp#PublicationReference - http://identifiers.org/pubmed/15782111 http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://vocabularies.wikipathways.org/wp#PublicationReference - http://identifiers.org/pubmed/11017945 http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://vocabularies.wikipathways.org/wp#PublicationReference - http://identifiers.org/pubmed/10997684 http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://vocabularies.wikipathways.org/wp#PublicationReference - http://identifiers.org/pubmed/19635799 http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://vocabularies.wikipathways.org/wp#PublicationReference - http://identifiers.org/pubmed/16480962 http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://vocabularies.wikipathways.org/wp#PublicationReference - http://identifiers.org/pubmed/15976321 http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://vocabularies.wikipathways.org/wp#PublicationReference - http://identifiers.org/pubmed/17218259 http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://vocabularies.wikipathways.org/wp#PublicationReference - http://identifiers.org/pubmed/17215298 http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://vocabularies.wikipathways.org/wp#PublicationReference - http://identifiers.org/pubmed/11177571 http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://vocabularies.wikipathways.org/wp#PublicationReference - -</help> -</tool>