annotate get_data/kegg_glycan/findKEGG.py @ 1:0a5e0df17054 draft default tip

Uploaded
author chrisb
date Fri, 06 May 2016 08:05:48 -0400
parents 89592faa2875
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
89592faa2875 Uploaded
chrisb
parents:
diff changeset
1 __author__ = 'cbarnett'
89592faa2875 Uploaded
chrisb
parents:
diff changeset
2 __license__ = "MIT"
1
0a5e0df17054 Uploaded
chrisb
parents: 0
diff changeset
3 __version__ = "0.4"
0
89592faa2875 Uploaded
chrisb
parents:
diff changeset
4 # http://www.kegg.jp/kegg/rest/keggapi.html
89592faa2875 Uploaded
chrisb
parents:
diff changeset
5
89592faa2875 Uploaded
chrisb
parents:
diff changeset
6
89592faa2875 Uploaded
chrisb
parents:
diff changeset
7 def find_entries_in_KEGG(db, query):
89592faa2875 Uploaded
chrisb
parents:
diff changeset
8 """
89592faa2875 Uploaded
chrisb
parents:
diff changeset
9 :param db: db to search by default is glycan
89592faa2875 Uploaded
chrisb
parents:
diff changeset
10 :param query:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
11 :return: string of linked entries
89592faa2875 Uploaded
chrisb
parents:
diff changeset
12 """
89592faa2875 Uploaded
chrisb
parents:
diff changeset
13 import urllib2
89592faa2875 Uploaded
chrisb
parents:
diff changeset
14
89592faa2875 Uploaded
chrisb
parents:
diff changeset
15 if db == [] or db == "" or db is None:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
16 raise IOError("no db given")
89592faa2875 Uploaded
chrisb
parents:
diff changeset
17 if query == [] or query == "" or query is None:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
18 raise IOError("no query given")
1
0a5e0df17054 Uploaded
chrisb
parents: 0
diff changeset
19 query = query.replace('\n', '+') # in case of new lines, assume this means the user want to "AND"
0a5e0df17054 Uploaded
chrisb
parents: 0
diff changeset
20 query = query.replace('\r', '+') # in case of new lines, assume this means the user want to "AND"
0a5e0df17054 Uploaded
chrisb
parents: 0
diff changeset
21 query = query.replace('ec: ', 'ec:') # in case of ec: spaces - e.g. issue "ec: 2.4.99.1" spaces are ok usually as allows for combination searching "2.4.99.1 2.4.99.6"
0a5e0df17054 Uploaded
chrisb
parents: 0
diff changeset
22
0a5e0df17054 Uploaded
chrisb
parents: 0
diff changeset
23 import re
0a5e0df17054 Uploaded
chrisb
parents: 0
diff changeset
24 p = re.compile(' *\+ *') # ensure no unneccesary space in an AND query, otherwise incorrect behaviour from KEGG
0a5e0df17054 Uploaded
chrisb
parents: 0
diff changeset
25 queryfix = p.subn('+', query)
0a5e0df17054 Uploaded
chrisb
parents: 0
diff changeset
26 query=queryfix[0]
0a5e0df17054 Uploaded
chrisb
parents: 0
diff changeset
27
0a5e0df17054 Uploaded
chrisb
parents: 0
diff changeset
28 if ' ' in query:
0a5e0df17054 Uploaded
chrisb
parents: 0
diff changeset
29 query='"'+query+'"' # if spaces, query must be placed in quotes, otherwise incorrect behaviour from KEGG
0a5e0df17054 Uploaded
chrisb
parents: 0
diff changeset
30 query = query.replace(' ', '%20') # previous behaviour was ignoring text after a space, rather convert to '%20' and pass on to KEGG REST service
0
89592faa2875 Uploaded
chrisb
parents:
diff changeset
31 uri = 'http://rest.kegg.jp/find/'
89592faa2875 Uploaded
chrisb
parents:
diff changeset
32 fulluri = uri + db + "/" + query
89592faa2875 Uploaded
chrisb
parents:
diff changeset
33 try:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
34 response = urllib2.urlopen(fulluri).read()
89592faa2875 Uploaded
chrisb
parents:
diff changeset
35 except Exception as e:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
36 raise urllib2.HTTPError(e.url, e.code, e.msg, e.hdrs, e.fp)
89592faa2875 Uploaded
chrisb
parents:
diff changeset
37 if str(response.strip()) == "":
1
0a5e0df17054 Uploaded
chrisb
parents: 0
diff changeset
38 return "" # change return from None to "" for easily writing to file
0
89592faa2875 Uploaded
chrisb
parents:
diff changeset
39 return response
89592faa2875 Uploaded
chrisb
parents:
diff changeset
40
89592faa2875 Uploaded
chrisb
parents:
diff changeset
41
89592faa2875 Uploaded
chrisb
parents:
diff changeset
42 if __name__ == "__main__":
89592faa2875 Uploaded
chrisb
parents:
diff changeset
43 from optparse import OptionParser
89592faa2875 Uploaded
chrisb
parents:
diff changeset
44
89592faa2875 Uploaded
chrisb
parents:
diff changeset
45 usage = "usage: python %prog [options]\n"
89592faa2875 Uploaded
chrisb
parents:
diff changeset
46 parser = OptionParser(usage=usage)
89592faa2875 Uploaded
chrisb
parents:
diff changeset
47 parser.add_option("-d", action="store", type="string", dest="d", default="glycan",
89592faa2875 Uploaded
chrisb
parents:
diff changeset
48 help="db name, options are: pathway | brite | module | ko | genome | <org> | compound | glycan | reaction | rpair | rclass | enzyme | disease | drug | dgroup | environ")
89592faa2875 Uploaded
chrisb
parents:
diff changeset
49 parser.add_option("-q", action="store", type="string", dest="q", default="glucose",
89592faa2875 Uploaded
chrisb
parents:
diff changeset
50 help="query e.g. glucose")
89592faa2875 Uploaded
chrisb
parents:
diff changeset
51 parser.add_option("-o", action="store", type="string", dest="o", default="found_entries.txt",
89592faa2875 Uploaded
chrisb
parents:
diff changeset
52 help="entries from search output in text format")
89592faa2875 Uploaded
chrisb
parents:
diff changeset
53 (options, args) = parser.parse_args()
89592faa2875 Uploaded
chrisb
parents:
diff changeset
54 try:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
55 outstream = file(options.o, 'w')
89592faa2875 Uploaded
chrisb
parents:
diff changeset
56 except Exception as e:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
57 raise IOError(e, "the output file cannot be opened. Use -h flag for help")
89592faa2875 Uploaded
chrisb
parents:
diff changeset
58 results = find_entries_in_KEGG(db=options.d, query=options.q)
89592faa2875 Uploaded
chrisb
parents:
diff changeset
59 try:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
60 outstream.write(results)
89592faa2875 Uploaded
chrisb
parents:
diff changeset
61 except Exception as e:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
62 raise IOError(e, "cannot open output files. -h flag for help")
89592faa2875 Uploaded
chrisb
parents:
diff changeset
63 finally:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
64 outstream.close()
89592faa2875 Uploaded
chrisb
parents:
diff changeset
65
89592faa2875 Uploaded
chrisb
parents:
diff changeset
66