2
|
1 from SPARQLWrapper import SPARQLWrapper, JSON
|
|
2 import sys
|
|
3
|
|
4 # Constante con los prefijos comunes a usar en queries
|
|
5 COMMON_PREFIXES = """
|
|
6 PREFIX up:<http://purl.uniprot.org/core/>
|
|
7 PREFIX keywords:<http://purl.uniprot.org/keywords/>
|
|
8 PREFIX uniprotkb:<http://purl.uniprot.org/uniprot/>
|
|
9 PREFIX taxon:<http://purl.uniprot.org/taxonomy/>
|
|
10 PREFIX ec:<http://purl.uniprot.org/enzyme/>
|
|
11 PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
|
12 PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
|
|
13 PREFIX skos:<http://www.w3.org/2004/02/skos/core#>
|
|
14 PREFIX owl:<http://www.w3.org/2002/07/owl#>
|
|
15 PREFIX bibo:<http://purl.org/ontology/bibo/>
|
|
16 PREFIX dc:<http://purl.org/dc/terms/>
|
|
17 PREFIX xsd:<http://www.w3.org/2001/XMLSchema#>
|
|
18 PREFIX faldo:<http://biohackathon.org/resource/faldo#>
|
|
19 """
|
|
20
|
|
21 # Lista con los nombres de las variables que obtenemos de la base de datos.
|
|
22 paramList = ['protein', 'proteinFullName', 'geneName', 'organismName', 'diseaseAnnotationText', 'domainFullName', 'similarityAnnotationText', 'locationAnnotationText', 'functionAnnotationText', 'pharmaceuticalAnnotationText'];
|
|
23
|
|
24 def buildQuery(proteinId, proteinName, geneName, organismName, diseaseAnnotation, domainName, similarityAnnotation, locationAnnotation, functionAnnotation, pharmaceuticalAnnotation):
|
|
25 query = COMMON_PREFIXES
|
|
26 query += "select distinct \n"
|
|
27 query += " ?protein\n"
|
|
28 query += " ?proteinFullName\n"
|
|
29 query += " ?geneName\n"
|
|
30 query += " ?organismName\n"
|
|
31 query += " ?diseaseAnnotationText\n"
|
|
32 query += " ?domainFullName\n"
|
|
33 query += " ?similarityAnnotationText\n"
|
|
34 query += " ?locationAnnotationText\n"
|
|
35 query += " ?functionAnnotationText\n"
|
|
36 query += " ?pharmaceuticalAnnotationText\n"
|
|
37 query += "where{\n"
|
|
38
|
|
39 query += " ?protein a up:Protein .\n"
|
|
40
|
|
41 if (proteinId != ''):
|
|
42 query += " VALUES ?protein {uniprotkb:"+ proteinId + "}\n"
|
|
43
|
|
44 query += "\n"
|
|
45
|
|
46 if (proteinName == ''):
|
|
47 query += " OPTIONAL {\n"
|
|
48 query += " ?protein up:recommendedName ?proteinName .\n"
|
|
49 query += " ?proteinName up:fullName ?proteinFullName . \n"
|
|
50 if (proteinName !=''):
|
|
51 query += " filter( regex(str(?proteinFullName), " + '"' + proteinName + '"' + ",\"i\" )) .\n"
|
|
52 if (proteinName == ''):
|
|
53 query += " }\n"
|
|
54 query += "\n"
|
|
55
|
|
56 if (geneName == ''):
|
|
57 query += " OPTIONAL {\n"
|
|
58 query += " ?protein up:encodedBy ?gene .\n"
|
|
59 query += " ?gene skos:prefLabel ?geneName .\n"
|
|
60 if (geneName != ''):
|
|
61 query += " filter( regex(str(?geneName), " + '"' + geneName + '"' + ",\"i\" )) .\n"
|
|
62 if (geneName == ''):
|
|
63 query += " }\n"
|
|
64
|
|
65 query += "\n"
|
|
66
|
|
67 if (organismName == ''):
|
|
68 query += " OPTIONAL {\n"
|
|
69 query += " ?protein up:organism ?organism .\n"
|
|
70 query += " ?organism up:scientificName ?organismName .\n"
|
|
71 if (organismName != ''):
|
|
72 query += " filter( regex(str(?organismName), " + '"' + organismName + '"' + ",\"i\" )) .\n"
|
|
73 if (organismName == ''):
|
|
74 query += " }\n"
|
|
75
|
|
76 query += "\n"
|
|
77
|
|
78 if (diseaseAnnotation == ''):
|
|
79 query += " OPTIONAL {\n"
|
|
80 query += " ?protein up:annotation ?diseaseAnnotation .\n"
|
|
81 query += " ?diseaseAnnotation a up:Disease_Annotation .\n"
|
|
82 query += " ?diseaseAnnotation up:disease ?disease .\n"
|
|
83 query += " ?disease rdfs:comment ?diseaseAnnotationText\n"
|
|
84 if (diseaseAnnotation != ''):
|
|
85 query += " filter( regex(str(?diseaseAnnotationText), " + '"' + diseaseAnnotation + '"' + ",\"i\" )) .\n"
|
|
86 if (diseaseAnnotation == ''):
|
|
87 query += " }\n"
|
|
88
|
|
89 query += "\n"
|
|
90
|
|
91 if (domainName == ''):
|
|
92 query += " OPTIONAL {\n"
|
|
93 query += " ?protein up:domain ?domain .\n"
|
|
94 query += " ?domain up:recommendedName ?domainName .\n"
|
|
95 query += " ?domainName up:fullName ?domainFullName .\n"
|
|
96 if (domainName != ''):
|
|
97 query += " filter( regex(str(?domainFullName), " + '"' + domainName + '"' + ",\"i\" )) .\n"
|
|
98 if (domainName == ''):
|
|
99 query += " }\n"
|
|
100
|
|
101 query += "\n"
|
|
102
|
|
103 if (similarityAnnotation == ''):
|
|
104 query += " OPTIONAL {\n"
|
|
105 query += " ?protein up:annotation ?similarityAnnotation .\n"
|
|
106 query += " ?similarityAnnotation a up:Similarity_Annotation .\n"
|
|
107 query += " ?similarityAnnotation rdfs:comment ?similarityAnnotationText .\n"
|
|
108 if (similarityAnnotation != ''):
|
|
109 query += " filter( regex(str(?similarityAnotationText), " + '"' + similarityAnnotation + '"' + ",\"i\" )) .\n"
|
|
110 if (similarityAnnotation == ''):
|
|
111 query += " }\n"
|
|
112
|
|
113 query += "\n"
|
|
114
|
|
115 if (locationAnnotation == ''):
|
|
116 query += " OPTIONAL {\n"
|
|
117 query += " ?protein up:annotation ?locationAnnotation .\n"
|
|
118 query += " ?locationAnnotation a up:Subcellular_Location_Annotation .\n"
|
|
119 query += " ?locationAnnotation up:locatedIn ?location .\n"
|
|
120 query += " ?location up:cellularComponent ?cellComponent .\n"
|
|
121 query += " ?cellComponent rdfs:comment ?locationAnnotationText .\n"
|
|
122 if (locationAnnotation != ''):
|
|
123 query += " filter( regex(str(?locationAnnotationText), " + '"' + locationAnnotation + '"' + ",\"i\" )) .\n"
|
|
124 if (locationAnnotation == ''):
|
|
125 query += " }\n"
|
|
126
|
|
127 query += "\n"
|
|
128
|
|
129 if (functionAnnotation == ''):
|
|
130 query += " OPTIONAL {\n"
|
|
131 query += " ?protein up:annotation ?functionAnnotation .\n"
|
|
132 query += " ?functionAnnotation a up:Function_Annotation .\n"
|
|
133 query += " ?functionAnnotation rdfs:comment ?functionAnnotationText .\n"
|
|
134 if (functionAnnotation != ''):
|
|
135 query += " filter( regex(str(?functionAnnotationText), " + '"' + functionAnnotation + '"' + ",\"i\" )) .\n"
|
|
136 if(functionAnnotation == ''):
|
|
137 query += " }\n"
|
|
138
|
|
139 query += "\n"
|
|
140
|
|
141 if (pharmaceuticalAnnotation == ''):
|
|
142 query += " OPTIONAL {\n"
|
|
143 query += " ?protein up:annotation ?pharmaceuticalAnnotation .\n"
|
|
144 query += " ?pharmaceuticalAnnotation a up:Pharmaceutical_Annotation .\n"
|
|
145 query += " ?pharmaceuticalAnnotation rdfs:comment ?pharmaceuticalAnnotationText .\n"
|
|
146 if (pharmaceuticalAnnotation != ''):
|
|
147 query += " filter( regex(str(?pharmaceuticalAnnotationText), " + '"' + pharmaceuticalAnnotation + '"' + ",\"i\" )) .\n"
|
|
148 if (pharmaceuticalAnnotation == ''):
|
|
149 query += " }\n"
|
|
150 query += "}\n"
|
|
151 #query += "limit 30\n"
|
|
152
|
|
153 return query
|
|
154
|
|
155 def printResults(json, output):
|
|
156 # Abrir fichero para escritura
|
|
157 fileRes = open(output, 'w')
|
|
158 # Imprimir cabecera
|
|
159 for param in paramList:
|
|
160 fileRes.write(param + "\t")
|
|
161 fileRes.write("\n")
|
|
162
|
|
163 # El formato json se puede recorrer de esta manera
|
|
164 # para ir obteniendo valores de la respuesta.
|
|
165 for entrada in json["results"]["bindings"]:
|
|
166 for param in paramList:
|
|
167 if (entrada.get(param)):
|
|
168 fileRes.write(entrada.get(param)["value"] + "\t")
|
|
169 else:
|
|
170 fileRes.write("\t")
|
|
171 fileRes.write("\n")
|
|
172 fileRes.close()
|
|
173
|
|
174 def sparqlwrap(proteinId, proteinName, geneName, organismName, diseaseAnnotation, domainName, similarityAnnotation, locationAnnotation, functionAnnotation, pharmaceuticalAnnotation, output):
|
|
175
|
|
176 query = buildQuery(proteinId, proteinName, geneName, organismName, diseaseAnnotation, domainName, similarityAnnotation, locationAnnotation, functionAnnotation, pharmaceuticalAnnotation)
|
|
177 print query
|
|
178
|
|
179 # Creamos un objeto del tipo SPARQLWrapper indicando en que
|
|
180 # direccion esta el servicio que recibe consultas en sparql
|
|
181 # y responde a estas.
|
|
182 sparql = SPARQLWrapper('http://sparql.uniprot.org/sparql')
|
|
183
|
|
184 # Especificamos la consulta que queremos hacer en sparql.
|
|
185 sparql.setQuery(query)
|
|
186
|
|
187 # Indicamos en que formato queremos que nos devuelva
|
|
188 # los resultados de la consulta. Puede ser json, xml,
|
|
189 # rfd, turtle... Simplemente son distintos formatos
|
|
190 # para representar los datos en ficheros de texto.
|
|
191 sparql.setReturnFormat(JSON)
|
|
192
|
|
193 # Esta es la instruccion que realiza la consulta a
|
|
194 # uniprot. Devuelve un objeto de python que hay que
|
|
195 # tratar.
|
|
196 print "Ejecutando query"
|
|
197 results = sparql.query()
|
|
198
|
|
199 # Con esto, convertimos el objeto devuelto por
|
|
200 # el servicio al formato que especificamos antes.
|
|
201 # En este caso, json.
|
|
202 print "Conviertiendo a json"
|
|
203 json = results.convert()
|
|
204 print "Fin conversion a json"
|
|
205
|
|
206 # Dentro de la variable results tenemos informacion
|
|
207 # (metadatos) de lo que ha devuelto el servidor de
|
|
208 # uniprot.
|
|
209 print results.info()
|
|
210
|
|
211 # Imprimir resultados
|
|
212 printResults(json, output)
|
|
213
|
|
214
|
|
215 # Obtener parametros de la linea de comandos.
|
|
216 proteinId = sys.argv[1]
|
|
217 proteinName = sys.argv[2]
|
|
218 geneName = sys.argv[3]
|
|
219 organismName = sys.argv[4]
|
|
220 diseaseAnnotation = sys.argv[5]
|
|
221 domainName =sys.argv[6]
|
|
222 similarityAnnotation = sys.argv[7]
|
|
223 locationAnnotation = sys.argv[8]
|
|
224 functionAnnotation = sys.argv[9]
|
|
225 pharmaceuticalAnnotation = sys.argv[10]
|
|
226 output = sys.argv[11]
|
|
227
|
|
228 # Llamada a la funcion que realiza la consulta.
|
|
229 sparqlwrap(proteinId, proteinName, geneName, organismName, diseaseAnnotation, domainName, similarityAnnotation, locationAnnotation, functionAnnotation, pharmaceuticalAnnotation, output)
|
|
230
|