diff get_data/kegg_glycan/getkcfKEGG.py @ 0:89592faa2875 draft

Uploaded
author chrisb
date Wed, 23 Mar 2016 14:35:56 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/get_data/kegg_glycan/getkcfKEGG.py	Wed Mar 23 14:35:56 2016 -0400
@@ -0,0 +1,67 @@
+__author__ = 'cbarnett'
+__license__ = "MIT"
+__version = "0.3"
+
+
+def helper_get_G(lineentry):
+    if "G" in lineentry.upper():
+        splitline = lineentry.split()
+        for item in splitline:
+            if "G" in item:
+                return item
+    return None
+
+
+def get_kcf_from_kegg(inputstream):
+    """
+    :param inputstream: input stream containing gl entries
+    :return: list of kcf output and list of db entry output or empty list
+    """
+    import urllib2
+
+    uri = 'http://rest.kegg.jp/get/'
+    if inputstream is None or inputstream == []:
+        raise IOError("empty input stream")
+    dbresponses = []
+    kcfresponses = []
+    for line in inputstream:
+        glentry = helper_get_G(line)
+        if glentry is not None:
+            try:
+                dbresponse = urllib2.urlopen(uri + glentry).read()
+                kcfresponse = urllib2.urlopen(uri + glentry + "/kcf").read()
+            except Exception as e:
+                raise urllib2.HTTPError(e.url, e.code, e.msg, e.hdrs, e.fp)
+            dbresponses.append(dbresponse)
+            kcfresponses.append(kcfresponse)
+    return kcfresponses, dbresponses
+
+
+if __name__ == "__main__":
+    from optparse import OptionParser
+
+    usage = "usage: python %prog [options]\n"
+    parser = OptionParser(usage=usage)
+    parser.add_option("-i", action="store", type="string", dest="i", default="input",
+                      help="single or double column text file containing GL entries")
+    parser.add_option("-k", action="store", type="string", dest="k", default="kcf.output",
+                      help="kcf output file name")
+    parser.add_option("-d", action="store", type="string", dest="d", default="db.output",
+                      help="KEGG db entry in text format output file name")
+    (options, args) = parser.parse_args()
+    try:
+        instream = file(options.i, 'r')
+    except Exception as e:
+        raise IOError(e, "the input file specified does not exist. Use -h flag for help")
+    kcf, db = get_kcf_from_kegg(instream)
+    try:
+        kcfout = file(options.k, 'w')
+        dbout = file(options.d, 'w')
+    except Exception as e:
+        raise IOError(e, "cannot open output files. -h flag for help")
+
+    kcfout.write("".join(kcf))
+    dbout.write("".join(db))
+    kcfout.close()
+    dbout.close()
+