Previous changeset 1:c76273c080a1 (2016-05-02) Next changeset 3:ba860aea7033 (2016-05-02) |
Commit message:
Uploaded |
added:
sparql_uniprot.py |
b |
diff -r c76273c080a1 -r 9bb153d42a1c sparql_uniprot.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sparql_uniprot.py Mon May 02 16:50:00 2016 -0400 |
[ |
b'@@ -0,0 +1,230 @@\n+from SPARQLWrapper import SPARQLWrapper, JSON\n+import sys\n+\n+# Constante con los prefijos comunes a usar en queries\n+COMMON_PREFIXES = """\n+\tPREFIX up:<http://purl.uniprot.org/core/>\n+\tPREFIX keywords:<http://purl.uniprot.org/keywords/>\n+\tPREFIX uniprotkb:<http://purl.uniprot.org/uniprot/>\n+\tPREFIX taxon:<http://purl.uniprot.org/taxonomy/>\n+\tPREFIX ec:<http://purl.uniprot.org/enzyme/>\n+\tPREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n+\tPREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>\n+\tPREFIX skos:<http://www.w3.org/2004/02/skos/core#>\n+\tPREFIX owl:<http://www.w3.org/2002/07/owl#>\n+\tPREFIX bibo:<http://purl.org/ontology/bibo/>\n+\tPREFIX dc:<http://purl.org/dc/terms/>\n+\tPREFIX xsd:<http://www.w3.org/2001/XMLSchema#>\n+\tPREFIX faldo:<http://biohackathon.org/resource/faldo#>\n+"""\n+\n+# Lista con los nombres de las variables que obtenemos de la base de datos.\n+paramList = [\'protein\', \'proteinFullName\', \'geneName\', \'organismName\', \'diseaseAnnotationText\', \'domainFullName\', \'similarityAnnotationText\', \'locationAnnotationText\', \'functionAnnotationText\', \'pharmaceuticalAnnotationText\'];\n+\n+def buildQuery(proteinId, proteinName, geneName, organismName, diseaseAnnotation, domainName, similarityAnnotation, locationAnnotation, functionAnnotation, pharmaceuticalAnnotation):\n+\tquery = COMMON_PREFIXES\n+\tquery += "select distinct \\n"\n+\tquery += "\t?protein\\n"\n+\tquery += "\t?proteinFullName\\n"\n+\tquery += "\t?geneName\\n"\n+\tquery += "\t?organismName\\n"\n+\tquery += "\t?diseaseAnnotationText\\n"\n+\tquery += "\t?domainFullName\\n"\n+\tquery += "\t?similarityAnnotationText\\n"\n+\tquery += "\t?locationAnnotationText\\n"\n+\tquery += "\t?functionAnnotationText\\n"\n+\tquery += "\t?pharmaceuticalAnnotationText\\n"\n+\tquery += "where{\\n"\n+\n+\tquery += "\t?protein a up:Protein .\\n"\n+\n+\tif (proteinId != \'\'):\n+\t\tquery += "\tVALUES ?protein {uniprotkb:"+ proteinId + "}\\n"\n+\n+\tquery += "\\n"\n+\n+\tif (proteinName == \'\'):\n+\t\tquery += "\tOPTIONAL {\\n"\n+\tquery += "\t?protein up:recommendedName ?proteinName .\\n"\n+\tquery += "\t?proteinName up:fullName ?proteinFullName . \\n"\n+\tif (proteinName !=\'\'):\n+\t\tquery += "\tfilter( regex(str(?proteinFullName), " + \'"\' + proteinName + \'"\' + ",\\"i\\" )) .\\n"\n+\tif (proteinName == \'\'):\n+\t\tquery += "\t}\\n"\n+\tquery += "\\n"\n+\n+\tif (geneName == \'\'):\n+\t\tquery += "\tOPTIONAL {\\n"\n+\tquery += "\t?protein up:encodedBy ?gene .\\n"\n+\tquery += "\t?gene skos:prefLabel ?geneName .\\n"\n+\tif (geneName != \'\'):\n+\t\tquery += "\tfilter( regex(str(?geneName), " + \'"\' + geneName + \'"\' + ",\\"i\\" )) .\\n"\n+\tif (geneName == \'\'):\n+\t\tquery += "\t}\\n"\n+\n+\tquery += "\\n"\n+\n+\tif (organismName == \'\'):\n+\t\tquery += "\tOPTIONAL {\\n"\n+\tquery += "\t?protein up:organism ?organism .\\n"\n+\tquery += "\t?organism up:scientificName ?organismName .\\n"\n+\tif (organismName != \'\'):\n+\t\tquery += "\tfilter( regex(str(?organismName), " + \'"\' + organismName + \'"\' + ",\\"i\\" )) .\\n"\n+\tif (organismName == \'\'):\n+\t\tquery += "\t}\\n"\n+\n+\tquery += "\\n"\n+\n+\tif (diseaseAnnotation == \'\'):\n+\t\tquery += "\tOPTIONAL {\\n"\n+\tquery += "\t?protein up:annotation ?diseaseAnnotation .\\n"\n+\tquery += "\t?diseaseAnnotation a up:Disease_Annotation .\\n"\n+\tquery += "\t?diseaseAnnotation up:disease ?disease .\\n"\n+\tquery += "\t?disease rdfs:comment ?diseaseAnnotationText\\n"\n+\tif (diseaseAnnotation != \'\'):\n+\t\tquery += "\tfilter( regex(str(?diseaseAnnotationText), " + \'"\' + diseaseAnnotation + \'"\' + ",\\"i\\" )) .\\n"\n+\tif (diseaseAnnotation == \'\'):\n+\t\tquery += "\t}\\n"\n+\n+\tquery += "\\n"\n+\n+\tif (domainName == \'\'):\n+\t\tquery += "\tOPTIONAL {\\n"\n+\tquery += "\t?protein up:domain ?domain .\\n"\n+\tquery += "\t?domain up:recommendedName ?domainName .\\n"\n+\tquery += "\t?domainName up:fullName ?domainFullName .\\n"\n+\tif (domainName != \'\'):\n+\t\tquery += "\tfilter( regex(str(?domainFullName), " + \'"\' + domainName + \'"\' + ",\\"i\\" )) .\\n"\n+\tif (domainName == \'\'):\n+\t\tquery += "\t}\\n"\n+\n+\tquery += "\\n"\n+\n+\tif (similarityAnnotation == \'\'):\n+\t\tquery += "\tOPTIONAL {\\n"\n+\tquery += "\t?protein up:annotation ?similarityAnnotation .\\n"\n+\tquery += "\t?similarityAnnotation a up:Sim'..b'comment ?locationAnnotationText .\\n"\n+\tif (locationAnnotation != \'\'):\n+\t\tquery += "\tfilter( regex(str(?locationAnnotationText), " + \'"\' + locationAnnotation + \'"\' + ",\\"i\\" )) .\\n"\n+\tif (locationAnnotation == \'\'):\n+\t\tquery += "\t}\\n"\n+\n+\tquery += "\\n"\n+\n+\tif (functionAnnotation == \'\'):\n+\t\tquery += "\tOPTIONAL {\\n"\n+\tquery += "\t?protein up:annotation ?functionAnnotation .\\n"\n+\tquery += "\t?functionAnnotation a up:Function_Annotation .\\n"\n+\tquery += "\t?functionAnnotation rdfs:comment ?functionAnnotationText .\\n"\n+\tif (functionAnnotation != \'\'):\n+\t\tquery += "\tfilter( regex(str(?functionAnnotationText), " + \'"\' + functionAnnotation + \'"\' + ",\\"i\\" )) .\\n"\n+\tif(functionAnnotation == \'\'):\n+\t\tquery += "\t}\\n"\n+\n+\tquery += "\\n"\n+\n+\tif (pharmaceuticalAnnotation == \'\'):\n+\t\tquery += "\tOPTIONAL {\\n"\n+\tquery += "\t?protein up:annotation ?pharmaceuticalAnnotation .\\n"\n+\tquery += "\t?pharmaceuticalAnnotation a up:Pharmaceutical_Annotation .\\n"\n+\tquery += "\t?pharmaceuticalAnnotation rdfs:comment ?pharmaceuticalAnnotationText .\\n"\n+\tif (pharmaceuticalAnnotation != \'\'):\n+\t\tquery += "\tfilter( regex(str(?pharmaceuticalAnnotationText), " + \'"\' + pharmaceuticalAnnotation + \'"\' + ",\\"i\\" )) .\\n"\n+\tif (pharmaceuticalAnnotation == \'\'):\n+\t\tquery += "\t}\\n"\n+\tquery += "}\\n"\n+\t#query += "limit 30\\n"\n+\n+\treturn query\n+\n+def printResults(json, output):\n+\t# Abrir fichero para escritura\n+\tfileRes = open(output, \'w\')\n+\t# Imprimir cabecera\n+\tfor param in paramList:\n+\t\tfileRes.write(param + "\\t")\n+\tfileRes.write("\\n")\n+\n+\t# El formato json se puede recorrer de esta manera\n+\t# para ir obteniendo valores de la respuesta.\n+\tfor entrada in json["results"]["bindings"]:\n+\t\tfor param in paramList:\n+\t\t\tif (entrada.get(param)):\n+\t\t\t\tfileRes.write(entrada.get(param)["value"] + "\\t")\n+\t\t\telse:\n+\t\t\t\tfileRes.write("\\t")\n+\t\tfileRes.write("\\n")\n+\tfileRes.close()\n+\n+def sparqlwrap(proteinId, proteinName, geneName, organismName, diseaseAnnotation, domainName, similarityAnnotation, locationAnnotation, functionAnnotation, pharmaceuticalAnnotation, output):\n+\t\n+\tquery = buildQuery(proteinId, proteinName, geneName, organismName, diseaseAnnotation, domainName, similarityAnnotation, locationAnnotation, functionAnnotation, pharmaceuticalAnnotation)\n+\tprint query\n+\n+\t# Creamos un objeto del tipo SPARQLWrapper indicando en que\n+\t# direccion esta el servicio que recibe consultas en sparql\n+\t# y responde a estas.\n+\tsparql = SPARQLWrapper(\'http://sparql.uniprot.org/sparql\')\n+\n+\t# Especificamos la consulta que queremos hacer en sparql.\n+\tsparql.setQuery(query)\n+\n+\t# Indicamos en que formato queremos que nos devuelva\n+\t# los resultados de la consulta. Puede ser json, xml,\n+\t# rfd, turtle... Simplemente son distintos formatos\n+\t# para representar los datos en ficheros de texto.\n+\tsparql.setReturnFormat(JSON)\n+\n+\t# Esta es la instruccion que realiza la consulta a\n+\t# uniprot. Devuelve un objeto de python que hay que\n+\t# tratar.\n+\tprint "Ejecutando query"\n+\tresults = sparql.query()\n+\n+\t# Con esto, convertimos el objeto devuelto por\n+\t# el servicio al formato que especificamos antes.\n+\t# En este caso, json.\n+\tprint "Conviertiendo a json"\n+\tjson = results.convert()\n+\tprint "Fin conversion a json"\n+\n+\t# Dentro de la variable results tenemos informacion\n+\t# (metadatos) de lo que ha devuelto el servidor de\n+\t# uniprot.\n+\tprint results.info()\n+\n+\t# Imprimir resultados\n+\tprintResults(json, output)\n+\n+\n+# Obtener parametros de la linea de comandos.\n+proteinId = sys.argv[1]\n+proteinName = sys.argv[2]\n+geneName = sys.argv[3]\n+organismName = sys.argv[4]\n+diseaseAnnotation = sys.argv[5]\n+domainName =sys.argv[6]\n+similarityAnnotation = sys.argv[7]\n+locationAnnotation = sys.argv[8]\n+functionAnnotation = sys.argv[9]\n+pharmaceuticalAnnotation = sys.argv[10]\n+output = sys.argv[11]\n+\n+# Llamada a la funcion que realiza la consulta.\n+sparqlwrap(proteinId, proteinName, geneName, organismName, diseaseAnnotation, domainName, similarityAnnotation, locationAnnotation, functionAnnotation, pharmaceuticalAnnotation, output)\n+\n' |