Mercurial > repos > bgruening > sklearn_regression_metrics
changeset 3:8437a2320171 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 97c4f22cdcfa6cddeeffc7b102c418a7ff12a888
author | bgruening |
---|---|
date | Tue, 05 Jun 2018 06:47:21 -0400 |
parents | 09a0357ce462 |
children | 64c87ceb6766 |
files | main_macros.xml regression_metrics.xml |
diffstat | 2 files changed, 50 insertions(+), 15 deletions(-) [+] |
line wrap: on
line diff
--- a/main_macros.xml Wed May 30 08:26:25 2018 -0400 +++ b/main_macros.xml Tue Jun 05 06:47:21 2018 -0400 @@ -2,12 +2,11 @@ <token name="@VERSION@">0.9</token> <token name="@COLUMNS_FUNCTION@"> -def read_columns(f, c, return_df=False, **args): +def read_columns(f, c=None, c_option='by_index_number', return_df=False, **args): data = pandas.read_csv(f, **args) - cols = c.split (',') - cols = map(int, cols) - cols = list(map(lambda x: x - 1, cols)) - data = data.iloc[:,cols] + if c_option == 'by_index_number': + cols = list(map(lambda x: x - 1, c)) + data = data.iloc[:,cols] y = data.values if return_df: return y, data @@ -17,7 +16,6 @@ </token> ## generate an instance for one of sklearn.feature_selection classes -## must call "@COLUMNS_FUNCTION@" <token name="@FEATURE_SELECTOR_FUNCTION@"> def feature_selector(inputs): selector = inputs["selected_algorithm"] @@ -428,16 +426,37 @@ <yield/> </xml> - <xml name="samples_tabular" token_multiple1="False" token_multiple2="False"> + <xml name="samples_tabular" token_multiple1="false" token_multiple2="false"> <param name="infile1" type="data" format="tabular" label="Training samples dataset:"/> - <param name="header1" type="boolean" optional="True" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" /> - <param name="col1" multiple="@MULTIPLE1@" type="data_column" data_ref="infile1" label="Select target column(s):"/> + <param name="header1" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" /> + <conditional name="column_selector_options_1"> + <expand macro="samples_column_selector_options" multiple="@MULTIPLE1@"/> + </conditional> <param name="infile2" type="data" format="tabular" label="Dataset containing class labels:"/> - <param name="header2" type="boolean" optional="True" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" /> - <param name="col2" multiple="@MULTIPLE2@" type="data_column" data_ref="infile2" label="Select target column(s):"/> + <param name="header2" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" /> + <conditional name="column_selector_options_2"> + <expand macro="samples_column_selector_options" column_option="selected_column_selector_option2" col_name="col2" multiple="@MULTIPLE2@" infile="infile2"/> + </conditional> <yield/> </xml> + <xml name="samples_column_selector_options" token_column_option="selected_column_selector_option" token_col_name="col1" token_multiple="False" token_infile="infile1"> + <param name="@COLUMN_OPTION@" type="select" label="Choose how to select data by column:"> + <option value="by_index_number" selected="true">Select columns by column index number(s)</option> + <!-- + <option value="by_header_name">Select columns by column header name(s)</option> + <option value="all_but_by_index_number">All columns but by column index number(s)</option> + <option value="all_but_by_header_name">All columns but by column header name(s)</option> + --> + <option value="all_columns">All columns</option> + </param> + <when value="by_index_number"> + <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" data_ref="@INFILE@" label="Select target column(s):"/> + </when> + <when value="all_columns"> + </when> + </xml> + <xml name="clf_inputs_extended" token_label1=" " token_label2=" " token_multiple="False"> <conditional name="true_columns"> <param name="selected_input1" type="select" label="Select the input type of true labels dataset:"> @@ -470,10 +489,14 @@ <xml name="clf_inputs" token_label1="Dataset containing true labels (tabular):" token_label2="Dataset containing predicted values (tabular):" token_multiple1="False" token_multiple="False"> <param name="infile1" type="data" format="tabular" label="@LABEL1@"/> <param name="header1" type="boolean" optional="True" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" /> - <param name="col1" multiple="@MULTIPLE1@" type="data_column" data_ref="infile1" label="Select the target column:"/> + <conditional name="column_selector_options_1"> + <expand macro="samples_column_selector_options" multiple="@MULTIPLE1@"/> + </conditional> <param name="infile2" type="data" format="tabular" label="@LABEL2@"/> <param name="header2" type="boolean" optional="True" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" /> - <param name="col2" multiple="@MULTIPLE@" type="data_column" data_ref="infile2" label="Select target column(s):"/> + <conditional name="column_selector_options_2"> + <expand macro="samples_column_selector_options" column_option="selected_column_selector_option2" col_name="col2" multiple="@MULTIPLE@" infile="infile2"/> + </conditional> </xml> <xml name="multiple_input" token_name="input_files" token_max_num="10" token_format="txt" token_label="Sparse matrix file (.mtx, .txt)" token_help_text="Specify a sparse matrix file in .txt format.">
--- a/regression_metrics.xml Wed May 30 08:26:25 2018 -0400 +++ b/regression_metrics.xml Tue Jun 05 06:47:21 2018 -0400 @@ -27,18 +27,30 @@ params = json.load(open(input_json_path, "r")) header='infer' if params["regression_metrics"]["header1"] else None +column_option = params["regression_metrics"]["column_selector_options_1"]["selected_column_selector_option"] +if column_option == "by_index_number": + c = params["regression_metrics"]["column_selector_options_1"]["col1"] +else: + c = None y_t = read_columns( "$regression_metrics.infile1", - "$regression_metrics.col1", + c = c, + c_option = column_option, sep='\t', header=header, parse_dates=True ) header='infer' if params["regression_metrics"]["header2"] else None +column_option = params["regression_metrics"]["column_selector_options_2"]["selected_column_selector_option2"] +if column_option == "by_index_number": + c = params["regression_metrics"]["column_selector_options_2"]["col2"] +else: + c = None y_p = read_columns( "$regression_metrics.infile2", - "$regression_metrics.col2", + c = c, + c_option = column_option, sep='\t', header=header, parse_dates=True