Mercurial > repos > chrisb > gap_all_glycan_tools
diff get_data/kegg_glycan/getkcfKEGG.py @ 0:89592faa2875 draft
Uploaded
author | chrisb |
---|---|
date | Wed, 23 Mar 2016 14:35:56 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/get_data/kegg_glycan/getkcfKEGG.py Wed Mar 23 14:35:56 2016 -0400 @@ -0,0 +1,67 @@ +__author__ = 'cbarnett' +__license__ = "MIT" +__version = "0.3" + + +def helper_get_G(lineentry): + if "G" in lineentry.upper(): + splitline = lineentry.split() + for item in splitline: + if "G" in item: + return item + return None + + +def get_kcf_from_kegg(inputstream): + """ + :param inputstream: input stream containing gl entries + :return: list of kcf output and list of db entry output or empty list + """ + import urllib2 + + uri = 'http://rest.kegg.jp/get/' + if inputstream is None or inputstream == []: + raise IOError("empty input stream") + dbresponses = [] + kcfresponses = [] + for line in inputstream: + glentry = helper_get_G(line) + if glentry is not None: + try: + dbresponse = urllib2.urlopen(uri + glentry).read() + kcfresponse = urllib2.urlopen(uri + glentry + "/kcf").read() + except Exception as e: + raise urllib2.HTTPError(e.url, e.code, e.msg, e.hdrs, e.fp) + dbresponses.append(dbresponse) + kcfresponses.append(kcfresponse) + return kcfresponses, dbresponses + + +if __name__ == "__main__": + from optparse import OptionParser + + usage = "usage: python %prog [options]\n" + parser = OptionParser(usage=usage) + parser.add_option("-i", action="store", type="string", dest="i", default="input", + help="single or double column text file containing GL entries") + parser.add_option("-k", action="store", type="string", dest="k", default="kcf.output", + help="kcf output file name") + parser.add_option("-d", action="store", type="string", dest="d", default="db.output", + help="KEGG db entry in text format output file name") + (options, args) = parser.parse_args() + try: + instream = file(options.i, 'r') + except Exception as e: + raise IOError(e, "the input file specified does not exist. Use -h flag for help") + kcf, db = get_kcf_from_kegg(instream) + try: + kcfout = file(options.k, 'w') + dbout = file(options.d, 'w') + except Exception as e: + raise IOError(e, "cannot open output files. -h flag for help") + + kcfout.write("".join(kcf)) + dbout.write("".join(db)) + kcfout.close() + dbout.close() +