0
|
1 __author__ = 'cbarnett'
|
|
2 __license__ = "MIT"
|
1
|
3 __version__ = "0.4"
|
0
|
4 # http://www.kegg.jp/kegg/rest/keggapi.html
|
|
5
|
|
6
|
|
7 def find_entries_in_KEGG(db, query):
|
|
8 """
|
|
9 :param db: db to search by default is glycan
|
|
10 :param query:
|
|
11 :return: string of linked entries
|
|
12 """
|
|
13 import urllib2
|
|
14
|
|
15 if db == [] or db == "" or db is None:
|
|
16 raise IOError("no db given")
|
|
17 if query == [] or query == "" or query is None:
|
|
18 raise IOError("no query given")
|
1
|
19 query = query.replace('\n', '+') # in case of new lines, assume this means the user want to "AND"
|
|
20 query = query.replace('\r', '+') # in case of new lines, assume this means the user want to "AND"
|
|
21 query = query.replace('ec: ', 'ec:') # in case of ec: spaces - e.g. issue "ec: 2.4.99.1" spaces are ok usually as allows for combination searching "2.4.99.1 2.4.99.6"
|
|
22
|
|
23 import re
|
|
24 p = re.compile(' *\+ *') # ensure no unneccesary space in an AND query, otherwise incorrect behaviour from KEGG
|
|
25 queryfix = p.subn('+', query)
|
|
26 query=queryfix[0]
|
|
27
|
|
28 if ' ' in query:
|
|
29 query='"'+query+'"' # if spaces, query must be placed in quotes, otherwise incorrect behaviour from KEGG
|
|
30 query = query.replace(' ', '%20') # previous behaviour was ignoring text after a space, rather convert to '%20' and pass on to KEGG REST service
|
0
|
31 uri = 'http://rest.kegg.jp/find/'
|
|
32 fulluri = uri + db + "/" + query
|
|
33 try:
|
|
34 response = urllib2.urlopen(fulluri).read()
|
|
35 except Exception as e:
|
|
36 raise urllib2.HTTPError(e.url, e.code, e.msg, e.hdrs, e.fp)
|
|
37 if str(response.strip()) == "":
|
1
|
38 return "" # change return from None to "" for easily writing to file
|
0
|
39 return response
|
|
40
|
|
41
|
|
42 if __name__ == "__main__":
|
|
43 from optparse import OptionParser
|
|
44
|
|
45 usage = "usage: python %prog [options]\n"
|
|
46 parser = OptionParser(usage=usage)
|
|
47 parser.add_option("-d", action="store", type="string", dest="d", default="glycan",
|
|
48 help="db name, options are: pathway | brite | module | ko | genome | <org> | compound | glycan | reaction | rpair | rclass | enzyme | disease | drug | dgroup | environ")
|
|
49 parser.add_option("-q", action="store", type="string", dest="q", default="glucose",
|
|
50 help="query e.g. glucose")
|
|
51 parser.add_option("-o", action="store", type="string", dest="o", default="found_entries.txt",
|
|
52 help="entries from search output in text format")
|
|
53 (options, args) = parser.parse_args()
|
|
54 try:
|
|
55 outstream = file(options.o, 'w')
|
|
56 except Exception as e:
|
|
57 raise IOError(e, "the output file cannot be opened. Use -h flag for help")
|
|
58 results = find_entries_in_KEGG(db=options.d, query=options.q)
|
|
59 try:
|
|
60 outstream.write(results)
|
|
61 except Exception as e:
|
|
62 raise IOError(e, "cannot open output files. -h flag for help")
|
|
63 finally:
|
|
64 outstream.close()
|
|
65
|
|
66
|