diff get_data/kegg_glycan/findKEGG.py @ 1:0a5e0df17054 draft default tip

Uploaded
author chrisb
date Fri, 06 May 2016 08:05:48 -0400
parents 89592faa2875
children
line wrap: on
line diff
--- a/get_data/kegg_glycan/findKEGG.py	Wed Mar 23 14:35:56 2016 -0400
+++ b/get_data/kegg_glycan/findKEGG.py	Fri May 06 08:05:48 2016 -0400
@@ -1,6 +1,6 @@
 __author__ = 'cbarnett'
 __license__ = "MIT"
-__version__ = "0.3"
+__version__ = "0.4"
 # http://www.kegg.jp/kegg/rest/keggapi.html
 
 
@@ -16,8 +16,18 @@
         raise IOError("no db given")
     if query == [] or query == "" or query is None:
         raise IOError("no query given")
-    query = query.replace('\n', ' ')  # in case of new lines
-    query = query.replace('\r', ' ')  # in case of new lines
+    query = query.replace('\n', '+')  # in case of new lines, assume this means the user want to "AND"
+    query = query.replace('\r', '+')  # in case of new lines, assume this means the user want to "AND"
+    query = query.replace('ec: ', 'ec:')  # in case of ec: spaces - e.g. issue "ec: 2.4.99.1" spaces are ok usually as allows for combination searching "2.4.99.1 2.4.99.6"
+    
+    import re
+    p = re.compile(' *\+ *') # ensure no unneccesary space in an AND query, otherwise incorrect behaviour from KEGG
+    queryfix = p.subn('+', query)
+    query=queryfix[0]
+
+    if ' ' in query:
+        query='"'+query+'"' # if spaces, query must be placed in quotes, otherwise incorrect behaviour from KEGG
+    query = query.replace(' ', '%20')  # previous behaviour was ignoring text after a space, rather convert to '%20' and pass on to KEGG REST service
     uri = 'http://rest.kegg.jp/find/'
     fulluri = uri + db + "/" + query
     try:
@@ -25,7 +35,7 @@
     except Exception as e:
         raise urllib2.HTTPError(e.url, e.code, e.msg, e.hdrs, e.fp)
     if str(response.strip()) == "":
-        return None
+        return "" # change return from None to "" for easily writing to file
     return response