diff scripts/safety.py @ 1:dddadbbac949 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/table_compute commit 6820ec9431a22576f3716c40feeb27f0b8cf5e83"
author iuc
date Fri, 30 Aug 2019 05:28:18 -0400
parents 1b0f96ed73f2
children 02c3e335a695
line wrap: on
line diff
--- a/scripts/safety.py	Sat Aug 17 16:25:37 2019 -0400
+++ b/scripts/safety.py	Fri Aug 30 05:28:18 2019 -0400
@@ -11,6 +11,7 @@
         '(', ')', 'if', 'else', 'or', 'and', 'not', 'in',
         '+', '-', '*', '/', '%', ',', '!=', '==', '>', '>=', '<', '<=',
         'min', 'max', 'sum',
+        'str', 'int', 'float'
     )
     __allowed_ref_types = {
         'pd.DataFrame': {
@@ -163,26 +164,25 @@
 
         safe = True
         # examples of user-expressions
-        # '-math.log(1 - elem/4096) * 4096 if elem != bn else elem - 0.5'
+        # '-math.log(1 - elem/4096) * 4096 if elem != 1 else elem - 0.5'
         # 'vec.median() +  vec.sum()'
 
         # 1. Break expressions into tokens
         # e.g.,
         # [
         #     '-', 'math.log', '(', '1', '-', 'elem', '/', '4096', ')', '*',
-        #     '4096', 'if', 'elem', '!=', 'bn', 'else', 'elem', '-', '0.5'
+        #     '4096', 'if', 'elem', '!=', '1', 'else', 'elem', '-', '0.5'
         # ]
         # or
         # ['vec.median', '(', ')', '+', 'vec.sum', '(', ')']
         tokens = [
             e for e in re.split(
-                r'([a-zA-Z0-9_.]+|[^a-zA-Z0-9_.() ]+|[()])', self.expr
+                r'("[a-zA-Z%0-9_.]+"|[a-zA-Z0-9_.]+|[^a-zA-Z0-9_.() ]+|[()])', self.expr
             ) if e.strip()
         ]
 
         # 2. Subtract allowed standard tokens
         rem = [e for e in tokens if e not in self.__allowed_tokens]
-
         # 3. Subtract allowed qualified objects from allowed modules
         #    and whitelisted references and their attributes
         rem2 = []
@@ -194,18 +194,32 @@
             if len(parts) == 2:
                 if parts[0] in self.these:
                     parts[0] = '_this'
+                elif parts[0] == "":
+                    # e.g. '.T' gives ['','.T']
+                    # Here we assume that the blank part[0] refers to the
+                    # self.ref_type (e.g. "pd.DataFrame"), and that
+                    # the second part is a function of that type.
+                    if parts[1] in self.allowed_qualified['_this']:
+                        continue
+
                 if parts[0] in self.allowed_qualified:
                     if parts[1] in self.allowed_qualified[parts[0]]:
                         continue
+
             rem2.append(e)
 
-        # 4. Assert that rest are real numbers
+        # Debug
+        # for x in (tokens, rem, rem2):print(x)
+
+        # 4. Assert that rest are real numbers or strings
         e = ''
         for e in rem2:
             try:
                 _ = float(e)
             except ValueError:
-                safe = False
-                break
+                # e.g. '"TEXT"' is okay.
+                if not(e[0] == '"' and e[-1] == '"'):
+                    safe = False
+                    break
 
         return safe, e