annotate get_data/kegg_glycan/getkcfKEGG.py @ 1:0a5e0df17054 draft default tip

Uploaded
author chrisb
date Fri, 06 May 2016 08:05:48 -0400
parents 89592faa2875
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
89592faa2875 Uploaded
chrisb
parents:
diff changeset
1 __author__ = 'cbarnett'
89592faa2875 Uploaded
chrisb
parents:
diff changeset
2 __license__ = "MIT"
89592faa2875 Uploaded
chrisb
parents:
diff changeset
3 __version = "0.3"
89592faa2875 Uploaded
chrisb
parents:
diff changeset
4
89592faa2875 Uploaded
chrisb
parents:
diff changeset
5
89592faa2875 Uploaded
chrisb
parents:
diff changeset
6 def helper_get_G(lineentry):
89592faa2875 Uploaded
chrisb
parents:
diff changeset
7 if "G" in lineentry.upper():
89592faa2875 Uploaded
chrisb
parents:
diff changeset
8 splitline = lineentry.split()
89592faa2875 Uploaded
chrisb
parents:
diff changeset
9 for item in splitline:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
10 if "G" in item:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
11 return item
89592faa2875 Uploaded
chrisb
parents:
diff changeset
12 return None
89592faa2875 Uploaded
chrisb
parents:
diff changeset
13
89592faa2875 Uploaded
chrisb
parents:
diff changeset
14
89592faa2875 Uploaded
chrisb
parents:
diff changeset
15 def get_kcf_from_kegg(inputstream):
89592faa2875 Uploaded
chrisb
parents:
diff changeset
16 """
89592faa2875 Uploaded
chrisb
parents:
diff changeset
17 :param inputstream: input stream containing gl entries
89592faa2875 Uploaded
chrisb
parents:
diff changeset
18 :return: list of kcf output and list of db entry output or empty list
89592faa2875 Uploaded
chrisb
parents:
diff changeset
19 """
89592faa2875 Uploaded
chrisb
parents:
diff changeset
20 import urllib2
89592faa2875 Uploaded
chrisb
parents:
diff changeset
21
89592faa2875 Uploaded
chrisb
parents:
diff changeset
22 uri = 'http://rest.kegg.jp/get/'
89592faa2875 Uploaded
chrisb
parents:
diff changeset
23 if inputstream is None or inputstream == []:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
24 raise IOError("empty input stream")
89592faa2875 Uploaded
chrisb
parents:
diff changeset
25 dbresponses = []
89592faa2875 Uploaded
chrisb
parents:
diff changeset
26 kcfresponses = []
89592faa2875 Uploaded
chrisb
parents:
diff changeset
27 for line in inputstream:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
28 glentry = helper_get_G(line)
89592faa2875 Uploaded
chrisb
parents:
diff changeset
29 if glentry is not None:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
30 try:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
31 dbresponse = urllib2.urlopen(uri + glentry).read()
89592faa2875 Uploaded
chrisb
parents:
diff changeset
32 kcfresponse = urllib2.urlopen(uri + glentry + "/kcf").read()
89592faa2875 Uploaded
chrisb
parents:
diff changeset
33 except Exception as e:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
34 raise urllib2.HTTPError(e.url, e.code, e.msg, e.hdrs, e.fp)
89592faa2875 Uploaded
chrisb
parents:
diff changeset
35 dbresponses.append(dbresponse)
89592faa2875 Uploaded
chrisb
parents:
diff changeset
36 kcfresponses.append(kcfresponse)
89592faa2875 Uploaded
chrisb
parents:
diff changeset
37 return kcfresponses, dbresponses
89592faa2875 Uploaded
chrisb
parents:
diff changeset
38
89592faa2875 Uploaded
chrisb
parents:
diff changeset
39
89592faa2875 Uploaded
chrisb
parents:
diff changeset
40 if __name__ == "__main__":
89592faa2875 Uploaded
chrisb
parents:
diff changeset
41 from optparse import OptionParser
89592faa2875 Uploaded
chrisb
parents:
diff changeset
42
89592faa2875 Uploaded
chrisb
parents:
diff changeset
43 usage = "usage: python %prog [options]\n"
89592faa2875 Uploaded
chrisb
parents:
diff changeset
44 parser = OptionParser(usage=usage)
89592faa2875 Uploaded
chrisb
parents:
diff changeset
45 parser.add_option("-i", action="store", type="string", dest="i", default="input",
89592faa2875 Uploaded
chrisb
parents:
diff changeset
46 help="single or double column text file containing GL entries")
89592faa2875 Uploaded
chrisb
parents:
diff changeset
47 parser.add_option("-k", action="store", type="string", dest="k", default="kcf.output",
89592faa2875 Uploaded
chrisb
parents:
diff changeset
48 help="kcf output file name")
89592faa2875 Uploaded
chrisb
parents:
diff changeset
49 parser.add_option("-d", action="store", type="string", dest="d", default="db.output",
89592faa2875 Uploaded
chrisb
parents:
diff changeset
50 help="KEGG db entry in text format output file name")
89592faa2875 Uploaded
chrisb
parents:
diff changeset
51 (options, args) = parser.parse_args()
89592faa2875 Uploaded
chrisb
parents:
diff changeset
52 try:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
53 instream = file(options.i, 'r')
89592faa2875 Uploaded
chrisb
parents:
diff changeset
54 except Exception as e:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
55 raise IOError(e, "the input file specified does not exist. Use -h flag for help")
89592faa2875 Uploaded
chrisb
parents:
diff changeset
56 kcf, db = get_kcf_from_kegg(instream)
89592faa2875 Uploaded
chrisb
parents:
diff changeset
57 try:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
58 kcfout = file(options.k, 'w')
89592faa2875 Uploaded
chrisb
parents:
diff changeset
59 dbout = file(options.d, 'w')
89592faa2875 Uploaded
chrisb
parents:
diff changeset
60 except Exception as e:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
61 raise IOError(e, "cannot open output files. -h flag for help")
89592faa2875 Uploaded
chrisb
parents:
diff changeset
62
89592faa2875 Uploaded
chrisb
parents:
diff changeset
63 kcfout.write("".join(kcf))
89592faa2875 Uploaded
chrisb
parents:
diff changeset
64 dbout.write("".join(db))
89592faa2875 Uploaded
chrisb
parents:
diff changeset
65 kcfout.close()
89592faa2875 Uploaded
chrisb
parents:
diff changeset
66 dbout.close()
89592faa2875 Uploaded
chrisb
parents:
diff changeset
67