comparison scripts/safety.py @ 2:02c3e335a695 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/table_compute commit d00a518202228b990aeeea2ec3f842501fd2ec09"
author iuc
date Fri, 13 Sep 2019 14:54:41 -0400
parents dddadbbac949
children
comparison
equal deleted inserted replaced
1:dddadbbac949 2:02c3e335a695
9 9
10 __allowed_tokens = ( 10 __allowed_tokens = (
11 '(', ')', 'if', 'else', 'or', 'and', 'not', 'in', 11 '(', ')', 'if', 'else', 'or', 'and', 'not', 'in',
12 '+', '-', '*', '/', '%', ',', '!=', '==', '>', '>=', '<', '<=', 12 '+', '-', '*', '/', '%', ',', '!=', '==', '>', '>=', '<', '<=',
13 'min', 'max', 'sum', 13 'min', 'max', 'sum',
14 'str', 'int', 'float'
15 ) 14 )
16 __allowed_ref_types = { 15 __allowed_ref_types = {
17 'pd.DataFrame': { 16 'pd.DataFrame': {
18 'abs', 'add', 'agg', 'aggregate', 'align', 'all', 'any', 'append', 17 'abs', 'add', 'agg', 'aggregate', 'align', 'all', 'any', 'append',
19 'apply', 'applymap', 'as_matrix', 'asfreq', 'at', 'axes', 'bool', 18 'apply', 'applymap', 'as_matrix', 'asfreq', 'at', 'axes', 'bool',
175 # ] 174 # ]
176 # or 175 # or
177 # ['vec.median', '(', ')', '+', 'vec.sum', '(', ')'] 176 # ['vec.median', '(', ')', '+', 'vec.sum', '(', ')']
178 tokens = [ 177 tokens = [
179 e for e in re.split( 178 e for e in re.split(
180 r'("[a-zA-Z%0-9_.]+"|[a-zA-Z0-9_.]+|[^a-zA-Z0-9_.() ]+|[()])', self.expr 179 r'([a-zA-Z0-9_.]+|[^a-zA-Z0-9_.() ]+|[()])', self.expr
181 ) if e.strip() 180 ) if e.strip()
182 ] 181 ]
183 182
184 # 2. Subtract allowed standard tokens 183 # 2. Subtract allowed standard tokens
185 rem = [e for e in tokens if e not in self.__allowed_tokens] 184 rem = [e for e in tokens if e not in self.__allowed_tokens]
185
186 # 3. Subtract allowed qualified objects from allowed modules 186 # 3. Subtract allowed qualified objects from allowed modules
187 # and whitelisted references and their attributes 187 # and whitelisted references and their attributes
188 rem2 = [] 188 rem2 = []
189 for e in rem: 189 for e in rem:
190 parts = e.split('.') 190 parts = e.split('.')
192 if parts[0] in self.these: 192 if parts[0] in self.these:
193 continue 193 continue
194 if len(parts) == 2: 194 if len(parts) == 2:
195 if parts[0] in self.these: 195 if parts[0] in self.these:
196 parts[0] = '_this' 196 parts[0] = '_this'
197 elif parts[0] == "":
198 # e.g. '.T' gives ['','.T']
199 # Here we assume that the blank part[0] refers to the
200 # self.ref_type (e.g. "pd.DataFrame"), and that
201 # the second part is a function of that type.
202 if parts[1] in self.allowed_qualified['_this']:
203 continue
204
205 if parts[0] in self.allowed_qualified: 197 if parts[0] in self.allowed_qualified:
206 if parts[1] in self.allowed_qualified[parts[0]]: 198 if parts[1] in self.allowed_qualified[parts[0]]:
207 continue 199 continue
208
209 rem2.append(e) 200 rem2.append(e)
210
211 # Debug
212 # for x in (tokens, rem, rem2):print(x)
213 201
214 # 4. Assert that rest are real numbers or strings 202 # 4. Assert that rest are real numbers or strings
215 e = '' 203 e = ''
216 for e in rem2: 204 for e in rem2:
217 try: 205 try:
218 _ = float(e) 206 _ = float(e)
219 except ValueError: 207 except ValueError:
220 # e.g. '"TEXT"' is okay. 208 safe = False
221 if not(e[0] == '"' and e[-1] == '"'): 209 break
222 safe = False
223 break
224 210
225 return safe, e 211 return safe, e