Mercurial > repos > iuc > table_compute
changeset 2:02c3e335a695 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/table_compute commit d00a518202228b990aeeea2ec3f842501fd2ec09"
author | iuc |
---|---|
date | Fri, 13 Sep 2019 14:54:41 -0400 |
parents | dddadbbac949 |
children | 60ff16842fcd |
files | scripts/safety.py scripts/table_compute.py table_compute.xml |
diffstat | 3 files changed, 185 insertions(+), 151 deletions(-) [+] |
line wrap: on
line diff
--- a/scripts/safety.py Fri Aug 30 05:28:18 2019 -0400 +++ b/scripts/safety.py Fri Sep 13 14:54:41 2019 -0400 @@ -11,7 +11,6 @@ '(', ')', 'if', 'else', 'or', 'and', 'not', 'in', '+', '-', '*', '/', '%', ',', '!=', '==', '>', '>=', '<', '<=', 'min', 'max', 'sum', - 'str', 'int', 'float' ) __allowed_ref_types = { 'pd.DataFrame': { @@ -177,12 +176,13 @@ # ['vec.median', '(', ')', '+', 'vec.sum', '(', ')'] tokens = [ e for e in re.split( - r'("[a-zA-Z%0-9_.]+"|[a-zA-Z0-9_.]+|[^a-zA-Z0-9_.() ]+|[()])', self.expr + r'([a-zA-Z0-9_.]+|[^a-zA-Z0-9_.() ]+|[()])', self.expr ) if e.strip() ] # 2. Subtract allowed standard tokens rem = [e for e in tokens if e not in self.__allowed_tokens] + # 3. Subtract allowed qualified objects from allowed modules # and whitelisted references and their attributes rem2 = [] @@ -194,32 +194,18 @@ if len(parts) == 2: if parts[0] in self.these: parts[0] = '_this' - elif parts[0] == "": - # e.g. '.T' gives ['','.T'] - # Here we assume that the blank part[0] refers to the - # self.ref_type (e.g. "pd.DataFrame"), and that - # the second part is a function of that type. - if parts[1] in self.allowed_qualified['_this']: - continue - if parts[0] in self.allowed_qualified: if parts[1] in self.allowed_qualified[parts[0]]: continue - rem2.append(e) - # Debug - # for x in (tokens, rem, rem2):print(x) - # 4. Assert that rest are real numbers or strings e = '' for e in rem2: try: _ = float(e) except ValueError: - # e.g. '"TEXT"' is okay. - if not(e[0] == '"' and e[-1] == '"'): - safe = False - break + safe = False + break return safe, e
--- a/scripts/table_compute.py Fri Aug 30 05:28:18 2019 -0400 +++ b/scripts/table_compute.py Fri Sep 13 14:54:41 2019 -0400 @@ -4,7 +4,7 @@ """ -__version__ = "0.9.1" +__version__ = "0.9.2" import csv import math @@ -265,7 +265,12 @@ mode = params["element_mode"] if mode == "replace": replacement_val = params["element_replace"] - out_table = data.mask(bool_mat, replacement_val) + out_table = data.mask( + bool_mat, + data.where(bool_mat).applymap( + lambda x: replacement_val.format(elem=x) + ) + ) elif mode == "modify": mod_op = Utils.getOneValueMathOp(params["element_modify_op"]) out_table = data.mask( @@ -300,7 +305,9 @@ elif user_mode_single == "fulltable": general_mode = params["mode"] - if general_mode == "melt": + if general_mode == "transpose": + out_table = data.T + elif general_mode == "melt": melt_ids = params["MELT"]["melt_ids"] melt_values = params["MELT"]["melt_values"]
--- a/table_compute.xml Fri Aug 30 05:28:18 2019 -0400 +++ b/table_compute.xml Fri Sep 13 14:54:41 2019 -0400 @@ -1,7 +1,7 @@ <tool id="table_compute" name="Table Compute" version="@VERSION@"> <description>computes operations on table data</description> <macros> - <token name="@VERSION@">0.9.1</token> + <token name="@VERSION@">0.9.2</token> <token name="@COPEN@"><![CDATA[<code>]]></token> <token name="@CCLOSE@"><![CDATA[</code>]]></token> <import>allowed_functions.xml</import> @@ -23,7 +23,7 @@ <sanitizer sanitize="false" /> </macro> <macro name="validator_functiondef"> - <validator type="regex" message="An expression is required and is allowed to contain only letters, numbers and the characters '_ !-+=/*%.<>()'">^['"\w !\-+=/*%,.<>()]+$</validator> + <validator type="regex" message="An expression is required and is allowed to contain only letters, numbers and the characters _ !-+=/*%.<>()">^[\w !\-+=/*%,.<>()]+$</validator> <sanitizer sanitize="false" /> </macro> <!-- macro for main input tests --> @@ -51,8 +51,9 @@ <yield /> <conditional name="user" > <param name="mode" value="element" /> - <param name="element_op" value="gt" /> - <param name="element_value" value="0" /> + <conditional name="elem_val"> + <param name="element_op" value="None" /> + </conditional> <conditional name="element" > <param name="mode" value="custom" /> <param name="custom_expr" value="-math.log(1 - elem/4096) * 4096 if elem != 4096 else elem - 0.5" /> @@ -133,6 +134,7 @@ } #if str($singtabop.use_type) == "single": + #set $op_mode = str($singtabop.user.mode) Data = { "tables": [ { @@ -159,12 +161,12 @@ } ], "params": { - "user_mode_single": '$singtabop.user.mode.value', - #if $singtabop.user.mode.value == 'precision': + "user_mode_single": '$op_mode', + #if $op_mode == 'precision': ## Literally do nothing, the user just sets the precision slider ## at the top default level } - #elif $singtabop.user.mode.value == 'select': + #elif $op_mode == 'select': "select_cols_unique": #echo 'select_cols_keepdupe' in str($singtabop.user.select_keepdupe)#, "select_rows_unique": #echo 'select_rows_keepdupe' in str($singtabop.user.select_keepdupe)#, #if $singtabop.user.select_cols_wanted: @@ -178,24 +180,25 @@ "select_rows_wanted": None, #end if } - #elif $singtabop.user.mode.value == 'filtersumval': - "filtersumval_mode": '$singtabop.user.filtersumval_mode.use.value', - "filtersumval_axis": $singtabop.user.axis.value, - #if $singtabop.user.filtersumval_mode.use.value == 'operation': - "filtersumval_compare": '$singtabop.user.filtersumval_mode.compare_op.value', - "filtersumval_op": '$singtabop.user.filtersumval_mode.operation.value', + #elif $op_mode == 'filtersumval': + #set $filter_type = str($singtabop.user.filtersumval_mode.use) + "filtersumval_mode": '$filter_type', + "filtersumval_axis": $singtabop.user.axis, + #if $filter_type == 'operation': + "filtersumval_compare": '$singtabop.user.filtersumval_mode.compare_op', + "filtersumval_op": '$singtabop.user.filtersumval_mode.operation', "filtersumval_against": $singtabop.user.filtersumval_mode.against, "filtersumval_minmatch": None, - #elif $singtabop.user.filtersumval_mode.use.value == 'element': + #elif $filter_type == 'element': "filtersumval_compare": None, - "filtersumval_op": '$singtabop.user.filtersumval_mode.operation.value', + "filtersumval_op": '$singtabop.user.filtersumval_mode.operation', ## against could be string or float, so we parse this in the code "filtersumval_against": '$singtabop.user.filtersumval_mode.against', "filtersumval_minmatch": $singtabop.user.filtersumval_mode.minmatch, #end if } - #elif $singtabop.user.mode.value == 'matrixapply': - "matrixapply_dimension": $singtabop.user.dimension.value, + #elif $op_mode == 'matrixapply': + "matrixapply_dimension": $singtabop.user.dimension, #if str($singtabop.user.matrixapply_func.vector_op) == 'custom': #set $custom_func = str($singtabop.user.matrixapply_func.custom_func).strip() "matrixapply_custom": True, @@ -207,10 +210,12 @@ "matrixapply_op": '$singtabop.user.matrixapply_func.vector_op', #end if } - #elif $singtabop.user.mode.value == 'element': - #if str($singtabop.user.elem_val.element_op) == "None": + #elif $op_mode == 'element': + #set $filter_type = str($singtabop.user.elem_val.element_op) + #set $element_mode = str($singtabop.user.element.mode) + #if $filter_type == "None": "element_op": None, - #elif str($singtabop.user.elem_val.element_op) == "rowcol": + #elif $filter_type == "rowcol": "element_op": "rowcol", #if $singtabop.user.elem_val.select_rows_wanted "element_rows": '$singtabop.user.elem_val.select_rows_wanted', @@ -220,24 +225,24 @@ #end if "element_inclusive": $singtabop.user.elem_val.inclusive_selection, #else: - "element_op": '$singtabop.user.elem_val.element_op.value', + "element_op": '$filter_type', ## Value is string or float, parsed in code later "element_value" : '$singtabop.user.elem_val.element_value', #end if - "element_mode": '$singtabop.user.element.mode.value', - #if str($singtabop.user.element.mode) == "replace": - "element_replace": '$singtabop.user.element.replace_value.value', - #elif str($singtabop.user.element.mode) == "modify": - "element_modify_op": '$singtabop.user.element.modify_op.value', - #elif str($singtabop.user.element.mode) == "scale": - "element_scale_op": '$singtabop.user.element.scale_op.value', + "element_mode": '$element_mode', + #if $element_mode == "replace": + "element_replace": '$singtabop.user.element.replace_value', + #elif $element_mode == "modify": + "element_modify_op": '$singtabop.user.element.modify_op', + #elif $element_mode == "scale": + "element_scale_op": '$singtabop.user.element.scale_op', "element_scale_value": $singtabop.user.element.scale_value, - #elif str($singtabop.user.element.mode) == "custom": + #elif $element_mode == "custom": #set $custom_func = str($singtabop.user.element.custom_expr).strip() "element_customop": '$custom_func', #end if } - #elif $singtabop.user.mode.value == 'fulltable': + #elif $op_mode == 'fulltable': "mode": '$singtabop.user.general.use', #if str($singtabop.user.general.use) == 'melt': #if str($singtabop.user.general.id_vars).strip(): @@ -273,7 +278,7 @@ "pivot_values": $pivot_values, }, #elif str($singtabop.user.general.use) == 'custom': - #set $custom_func = str($singtabop.user.general.fulltable_custom_expr.value).strip() + #set $custom_func = str($singtabop.user.general.fulltable_custom_expr).strip() "fulltable_customop": '$custom_func', #end if @@ -282,7 +287,7 @@ } #elif str($singtabop.use_type) == "multiple": -#set $custom_func = str($singtabop.fulltable_custom_expr).strip() + #set $custom_func = str($singtabop.fulltable_custom_expr).strip() Data = { "tables": [ #for $i, $s in enumerate($singtabop.tables) @@ -424,10 +429,12 @@ <when value="fulltable"> <conditional name="general" > <param name="use" type="select" label="Operation" help="See Examples 5, 7, and 8 for usage" > - <option value="melt" >Melt</option> - <option value="pivot" >Pivot</option> - <option value="custom" >Custom</option> + <option value="transpose">Transpose</option> + <option value="melt">Melt</option> + <option value="pivot">Pivot</option> + <option value="custom">Custom</option> </param> + <when value="transpose" /> <when value="melt" > <param name="id_vars" type="text" value="" label="Variable IDs" help="Comma-delimited list of column names to use as identifiers" > @@ -474,7 +481,14 @@ <option value="custom">Custom</option> </param> <when value="replace" > - <param name="replace_value" type="text" label="Replacement value" help="This value is converted to numeric if possible, otherwise it is treated as a string" /> + <param name="replace_value" type="text" label="Replacement value" help="If you want to reuse the current value of each element as part of the replacement value, you can refer to it using the special @COPEN@{elem}@CCLOSE@ placeholder (see Example #9 in the Help section)."> + <sanitizer> + <valid> + <add value="{" /> + <add value="}" /> + </valid> + </sanitizer> + </param> </when> <when value="modify" > <param name="modify_op" type="select" label="Transformation function" help="Example: to transform values to their square root, select @COPEN@Square Root@CCLOSE@ here."> @@ -820,7 +834,22 @@ </expand> </test> <test expect_num_outputs="1"> - <!-- Test 22: Melt --> + <!-- Test 22: Transpose --> + <expand macro="test_inputs_single" > + <param name="mode" value="fulltable" /> + <conditional name="general" > + <param name="use" value="transpose" /> + </conditional> + </expand> + <param name="out_opts" value="ignore_nas,output_headers_col" /> + <output name="table" > + <assert_contents> + <has_n_columns n="5" /> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <!-- Test 23: Melt --> <expand macro="test_inputs_single" > <param name="mode" value="fulltable" /> <conditional name="general" > @@ -839,7 +868,7 @@ </output> </test> <test expect_num_outputs="1"> - <!-- Test 23: Pivot --> + <!-- Test 24: Pivot --> <expand macro="test_inputs_single" > <param name="mode" value="fulltable" /> <conditional name="general" > @@ -860,7 +889,7 @@ </test> <!-- Add Example Text Tests --> <test expect_num_outputs="1" > - <!-- Test 24: Ex 1 --> + <!-- Test 25: Ex 1 --> <conditional name="singtabop" > <param name="use_type" value="single" /> <param name="input" value="examples.1.tsv" /> @@ -881,7 +910,7 @@ </output> </test> <test expect_num_outputs="1" > - <!-- Test 25: Ex 2 --> + <!-- Test 26: Ex 2 --> <conditional name="singtabop" > <param name="use_type" value="single" /> <param name="input" value="examples.2.tsv" /> @@ -906,7 +935,7 @@ </output> </test> <test expect_num_outputs="1" > - <!-- Test 26: Ex 3_P1 --> + <!-- Test 27: Ex 3_P1 --> <conditional name="singtabop" > <param name="use_type" value="single" /> <param name="input" value="examples.3p1.tsv" /> @@ -931,7 +960,7 @@ </output> </test> <test expect_num_outputs="1" > - <!-- Test 27: Ex 3_P2 --> + <!-- Test 28: Ex 3_P2 --> <conditional name="singtabop" > <param name="use_type" value="single" /> <param name="input" value="examples.3p2.tsv" /> @@ -953,7 +982,7 @@ </output> </test> <test expect_num_outputs="1" > - <!-- Test 28: Ex 4 --> + <!-- Test 29: Ex 4 --> <conditional name="singtabop" > <param name="use_type" value="single" /> <param name="input" value="examples.4.tsv" /> @@ -978,7 +1007,7 @@ </output> </test> <test expect_num_outputs="1" > - <!-- Test 29: Ex 5 --> + <!-- Test 30: Ex 5 --> <conditional name="singtabop" > <param name="use_type" value="single" /> <param name="input" value="examples.5.tsv" /> @@ -1000,7 +1029,7 @@ </output> </test> <test expect_num_outputs="1" > - <!-- Test 30: Ex 6 --> + <!-- Test 31: Ex 6 --> <conditional name="singtabop" > <param name="use_type" value="multiple" /> <repeat name="tables" > @@ -1026,7 +1055,7 @@ </output> </test> <test expect_num_outputs="1" > - <!-- Test 31: Ex 7 --> + <!-- Test 32: Ex 7 --> <conditional name="singtabop" > <param name="use_type" value="single" /> <param name="input" value="examples.7.tsv" /> @@ -1049,7 +1078,7 @@ </output> </test> <test expect_num_outputs="1" > - <!-- Test 32: Ex 8 --> + <!-- Test 33: Ex 8 --> <conditional name="singtabop" > <param name="use_type" value="single" /> <param name="input" value="examples.8.tsv" /> @@ -1073,74 +1102,7 @@ </output> </test> <test expect_num_outputs="1" > - <!-- Test 33: Subtracting table means #1 --> - <conditional name="singtabop" > - <param name="use_type" value="single" /> - <param name="input" value="examples.5.tsv" /> - <param name="col_row_names" value="has_col_names,has_row_names" /> - <conditional name="user" > - <param name="mode" value="fulltable" /> - <conditional name="general" > - <param name="use" value="custom" /> - <param name="fulltable_custom_expr" value="table - table.mean(1)/table.std(1)" /> - </conditional> - </conditional> - </conditional> - <output name="table" > - <assert_contents> - <has_n_columns n="8" /> - <!-- should give nonsense empty output --> - <has_line_matching expression="^\sc1\sc2\sc3\sg1\sg2\sg3\sg4$" /> - <has_line_matching expression="^g4\s+$" /> - </assert_contents> - </output> - </test> - <test expect_num_outputs="1" > - <!-- Test 34: Subtracting table means #2 --> - <conditional name="singtabop" > - <param name="use_type" value="single" /> - <param name="input" value="examples.5.tsv" /> - <param name="col_row_names" value="has_col_names,has_row_names" /> - <conditional name="user" > - <param name="mode" value="fulltable" /> - <conditional name="general" > - <param name="use" value="custom" /> - <param name="fulltable_custom_expr" value="table.sub(table.mean(1), 0).div(table.std(1),0)" /> - </conditional> - </conditional> - </conditional> - <output name="table" > - <assert_contents> - <has_n_columns n="4" /> - <has_line_matching expression="^g1\s+-1\.\d+\s+0\.\d+\s+1\.\d+\s*$" /> - <has_line_matching expression="^g4\s+1\.1\d+\s+-0\.5\d+\s+-0\.5\d+\s*$" /> - </assert_contents> - </output> - </test> - <test expect_num_outputs="1" > - <!-- Test 35: Subtracting table means #3 --> - <conditional name="singtabop" > - <param name="use_type" value="single" /> - <param name="input" value="examples.5.tsv" /> - <param name="col_row_names" value="has_col_names,has_row_names" /> - <conditional name="user" > - <param name="mode" value="fulltable" /> - <conditional name="general" > - <param name="use" value="custom" /> - <param name="fulltable_custom_expr" value="(table.T - table.mean(1)).T" /> - </conditional> - </conditional> - </conditional> - <output name="table" > - <assert_contents> - <has_n_columns n="4" /> - <has_line_matching expression="^g1\s+-10\.\d+\s+0\.\d+\s+10\.\d+\s*$" /> - <has_line_matching expression="^g4\s+47\.3\d+\s+-23\.6\d+\s+-23\.6\d+\s*$" /> - </assert_contents> - </output> - </test> - <test expect_num_outputs="1" > - <!-- Test 36: Negative range test #1 --> + <!-- Test 34: Negative range test #1 --> <expand macro="test_inputs_ranges" > <param name="select_cols_wanted" value="-2:2,1,-1" /> <param name="select_rows_wanted" value="1,3:-3,1" /> @@ -1154,7 +1116,7 @@ </output> </test> <test expect_num_outputs="1" > - <!-- Test 37: Negative range test #2 --> + <!-- Test 35: Negative range test #2 --> <expand macro="test_inputs_ranges" > <param name="select_cols_wanted" value="-1,-3,3:-3,2" /> <param name="select_rows_wanted" value="3:-1" /> @@ -1169,7 +1131,7 @@ </output> </test> <test expect_num_outputs="1"> - <!-- Test 38: Skip first 3 lines --> + <!-- Test 36: Skip first 3 lines --> <conditional name="singtabop" > <param name="use_type" value="single" /> <param name="input" value="skiplines.tsv" /> @@ -1192,7 +1154,7 @@ </output> </test> <test expect_num_outputs="1"> - <!-- Test 39: Skip first 3 lines --> + <!-- Test 37: Skip first 3 lines --> <conditional name="singtabop" > <param name="use_type" value="single" /> <param name="input" value="skiplines.tsv" /> @@ -1216,7 +1178,7 @@ </output> </test> <test expect_num_outputs="1" > - <!-- Test 40: Row Col custom op #1 --> + <!-- Test 38: Row Col replace with format spec #1 --> <conditional name="singtabop" > <param name="use_type" value="single" /> <param name="input" value="examples.1.tsv" /> @@ -1224,8 +1186,8 @@ <conditional name="user" > <param name="mode" value="element" /> <conditional name="element" > - <param name="mode" value="custom" /> - <param name="custom_expr" value=""chr%.f" % elem" /> + <param name="mode" value="replace" /> + <param name="replace_value" value="chr{elem:.0f}" /> </conditional> <conditional name="elem_val" > <param name="element_op" value="rowcol" /> @@ -1244,7 +1206,7 @@ </output> </test> <test expect_num_outputs="1" > - <!-- Test 41: Row Col custom op #2 --> + <!-- Test 39: Row Col replace with format spec #2 --> <conditional name="singtabop" > <param name="use_type" value="single" /> <param name="input" value="examples.1.tsv" /> @@ -1252,8 +1214,8 @@ <conditional name="user" > <param name="mode" value="element" /> <conditional name="element" > - <param name="mode" value="custom" /> - <param name="custom_expr" value=""chr%.f" % elem" /> + <param name="mode" value="replace" /> + <param name="replace_value" value="chr{elem:.0f}" /> </conditional> <conditional name="elem_val" > <param name="element_op" value="rowcol" /> @@ -1271,13 +1233,78 @@ </assert_contents> </output> </test> + <test expect_num_outputs="1"> + <!-- Test 40: Test safety of Replacement value free text + Tries to escape/reenter config file quoting. + If the test fails, this shows that Python has performed + string concatenation upon importing the config file. --> + <conditional name="singtabop" > + <param name="use_type" value="single" /> + <param name="input" value="examples.1.tsv" /> + <param name="col_row_names" value="has_col_names,has_row_names" /> + <conditional name="user" > + <param name="mode" value="element" /> + <conditional name="elem_val"> + <param name="element_op" value="None" /> + </conditional> + <conditional name="element" > + <param name="mode" value="replace" /> + <param name="scale_op" value="mod" /> + <param name="replace_value" value="a'+'b" /> + </conditional> + </conditional> + </conditional> + <output name="table" > + <assert_contents> + <has_n_columns n="4" /> + <not_has_text text="g1	ab" /> + <not_has_text text="g2	ab" /> + <not_has_text text="g3	ab" /> + <not_has_text text="g4	ab" /> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1" expect_failure="true"> + <!-- Test 41: Test safety of custom expression free text + Tries to escape/reenter config file quoting. + + This test is expected to fail if either + - a validator disallows the use of single quotes or + - the single quotes get sanitized (which is unlikely to result in a + valid expression); note that in this situation, it cannot be + guaranteed that the single quote sanitization is safe with + *every* input + + If the test succeeds unexpectedly, this shows that Python + has performed string concatenation upon importing the config file + resulting in each element of the test table being retained. + --> + <conditional name="singtabop" > + <param name="use_type" value="single" /> + <param name="input" value="examples.4.tsv" /> + <param name="col_row_names" value="has_col_names,has_row_names" /> + <conditional name="user" > + <param name="mode" value="element" /> + <conditional name="element" > + <param name="mode" value="custom" /> + <param name="custom_expr" value="el'+'em" /> + </conditional> + <conditional name="elem_val" > + <param name="element_op" value="All" /> + </conditional> + </conditional> + </conditional> + </test> </tests> <help><![CDATA[ This tool computes table expressions on the element, row, and column basis. It can sub-select, duplicate, as well as perform general and custom expressions on rows, columns or elements. -Only a single operation can be performed on the data. Multiple operations can be performed by -chaining successive runs of this tool. This is to provide a more transparent workflow for complex operations. +.. class:: infomark + + Only a single operation can be performed on the data. Multiple operations + can be performed by chaining successive runs of this tool. This is to + provide a more transparent workflow for complex operations. @@ -1582,6 +1609,14 @@ table1 / min(np.max(np.max(table2)), np.max(np.max(table3))) +.. class:: infomark + + Complex operations (like ones that would benefit from specifying nested + attributes) can often be broken into subsequent runs ot the tool, in + which the first run generates an intermediate table representing the result + of the "inner" operation that the second run can then use as input to + perform the "outer" operation. + Also note that, currently `min()`, `max()` and `sum()` are the only built-in Python functions that can be used inside expressions. If you want to use additional functions, these have to be qualified functions from the `math`, @@ -1702,11 +1737,17 @@ * *Type of table operation* → **Manipulate selected table elements** - * *Operation to perform* → **Custom** + * *Operation to perform* → **Replace values** + + * *Replacement value* → :: + + chr{elem:.0f} - * *Custom Expression* → :: - - "chr%.f" % elem + Here, the placeholder ``{elem}`` lets us refer to each element's + current value, while the ``:.0f`` part is a format specifier that makes + sure numbers are printed without decimals (for a complete description of + the available syntax see the + `Python Format Specification Mini-Language <https://docs.python.org/3/library/string.html#formatspec>`_). * *Operate on elements* → **Specific Rows and/or Columns** * *List of columns to select* → "2"