Mercurial > repos > bgruening > sklearn_model_validation
diff main_macros.xml @ 19:efbec977a47d draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
author | bgruening |
---|---|
date | Fri, 09 Aug 2019 07:26:09 -0400 |
parents | 492d34a75de6 |
children | 5895fe0b8bde |
line wrap: on
line diff
--- a/main_macros.xml Tue Jul 09 19:39:58 2019 -0400 +++ b/main_macros.xml Fri Aug 09 07:26:09 2019 -0400 @@ -1,16 +1,12 @@ <macros> - <token name="@VERSION@">1.0.0.4</token> + <token name="@VERSION@">1.0.7.10</token> + + <token name="@ENSEMBLE_VERSION@">0.2.0</token> <xml name="python_requirements"> <requirements> <requirement type="package" version="3.6">python</requirement> - <requirement type="package" version="0.20.3">scikit-learn</requirement> - <requirement type="package" version="0.24.2">pandas</requirement> - <requirement type="package" version="0.80">xgboost</requirement> - <requirement type="package" version="0.9.13">asteval</requirement> - <requirement type="package" version="0.6">skrebate</requirement> - <requirement type="package" version="0.4.2">imbalanced-learn</requirement> - <requirement type="package" version="0.16.0">mlxtend</requirement> + <requirement type="package" version="0.7.10">Galaxy-ML</requirement> <yield/> </requirements> </xml> @@ -420,8 +416,7 @@ <xml name="sparse_target" token_label1="Select a sparse matrix:" token_label2="Select the tabular containing true labels:" token_multiple="False" token_format1="txt" token_format2="tabular" token_help1="" token_help2=""> <param name="infile1" type="data" format="@FORMAT1@" label="@LABEL1@" help="@HELP1@"/> - <param name="infile2" type="data" format="@FORMAT2@" label="@LABEL2@" help="@HELP2@"/> - <param name="col2" multiple="@MULTIPLE@" type="data_column" data_ref="infile2" label="Select target column(s):"/> + <expand macro="input_tabular_target"/> </xml> <xml name="sl_mixed_input"> @@ -429,6 +424,8 @@ <param name="selected_input" type="select" label="Select input type:"> <option value="tabular" selected="true">tabular data</option> <option value="sparse">sparse matrix</option> + <option value="seq_fasta">sequnences in a fasta file</option> + <option value="refseq_and_interval">reference genome and intervals</option> </param> <when value="tabular"> <expand macro="samples_tabular" multiple1="true" multiple2="false"/> @@ -436,6 +433,36 @@ <when value="sparse"> <expand macro="sparse_target"/> </when> + <when value="seq_fasta"> + <expand macro="inputs_seq_fasta"/> + </when> + <when value="refseq_and_interval"> + <expand macro="inputs_refseq_and_interval"/> + </when> + </conditional> + </xml> + + <xml name="input_tabular_target"> + <param name="infile2" type="data" format="tabular" label="Dataset containing class labels or target values:"/> + <param name="header2" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Does the dataset contain header:" /> + <conditional name="column_selector_options_2"> + <expand macro="samples_column_selector_options" column_option="selected_column_selector_option2" col_name="col2" multiple="false" infile="infile2"/> + </conditional> + </xml> + + <xml name="inputs_seq_fasta"> + <param name="fasta_path" type="data" format="fasta" label="Dataset containing fasta genomic/protein sequences" help="Sequences will be one-hot encoded to arrays."/> + <expand macro="input_tabular_target"/> + </xml> + + <xml name="inputs_refseq_and_interval"> + <param name="ref_genome_file" type="data" format="fasta" label="Dataset containing reference genomic sequence"/> + <param name="interval_file" type="data" format="interval" label="Dataset containing sequence intervals for training" help="interval. Sequences will be retrieved from the reference genome and one-hot encoded to training arrays."/> + <param name="target_file" type="data" format="bed" label="Dataset containing positions and features for target values." help="bed. The file will be compressed with `bgzip` and then indexed using `tabix`."/> + <param name="infile2" type="data" format="tabular" label="Dataset containing the feature list for prediction"/> + <param name="header2" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Does the dataset contain header:" /> + <conditional name="column_selector_options_2"> + <expand macro="samples_column_selector_options" column_option="selected_column_selector_option2" col_name="col2" multiple="true" infile="infile2"/> </conditional> </xml> @@ -705,7 +732,6 @@ <param name="selected_pre_processor" type="select" label="Select a preprocessor:"> <option value="StandardScaler" selected="true">Standard Scaler (Standardizes features by removing the mean and scaling to unit variance)</option> <option value="Binarizer">Binarizer (Binarizes data)</option> - <option value="Imputer">Imputer (Completes missing values)</option> <option value="MaxAbsScaler">Max Abs Scaler (Scales features by their maximum absolute value)</option> <option value="Normalizer">Normalizer (Normalizes samples individually to unit norm)</option> <yield/> @@ -731,25 +757,6 @@ help="Feature values below or equal to this are replaced by 0, above it by 1. Threshold may not be less than 0 for operations on sparse matrices. "/> </section> </when> - <when value="Imputer"> - <section name="options" title="Advanced Options" expanded="False"> - <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" - label="Use a copy of data for precomputing imputation" help=" "/> - <param argument="strategy" type="select" optional="true" label="Imputation strategy" help=" "> - <option value="mean" selected="true">Replace missing values using the mean along the axis</option> - <option value="median">Replace missing values using the median along the axis</option> - <option value="most_frequent">Replace missing using the most frequent value along the axis</option> - </param> - <param argument="missing_values" type="text" optional="true" value="NaN" - label="Placeholder for missing values" help="For missing values encoded as numpy.nan, use the string value “NaN”"/> - <!--param argument="axis" type="boolean" optional="true" truevalue="1" falsevalue="0" - label="Impute along axis = 1" help="If fasle, axis = 0 is selected for imputation. "/> --> - <!--param argument="axis" type="select" optional="true" label="The axis along which to impute" help=" "> - <option value="0" selected="true">Impute along columns</option> - <option value="1">Impute along rows</option> - </param--> - </section> - </when> <when value="StandardScaler"> <section name="options" title="Advanced Options" expanded="False"> <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" @@ -788,7 +795,7 @@ </when> <when value="MinMaxScaler"> <section name="options" title="Advanced Options" expanded="False"> - <!--feature_range--> + <param argument="feature_range" type="text" value="(0, 1)" optional="true" help="Desired range of transformed data. None or tuple (min, max). None equals to (0, 1)"/> <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Use a copy of data for precomputing normalization" help=" "/> </section> @@ -922,9 +929,9 @@ </conditional> </xml> - <xml name="cv_reduced"> + <xml name="cv_reduced" token_label="Select the cv splitter"> <conditional name="cv_selector"> - <param name="selected_cv" type="select" label="Select the cv splitter:"> + <param name="selected_cv" type="select" label="@LABEL@"> <expand macro="cv_splitter"/> </param> <expand macro="cv_splitter_options"/> @@ -932,7 +939,7 @@ </xml> <xml name="cv_n_splits" token_value="3" token_help="Number of folds. Must be at least 2."> - <param argument="n_splits" type="integer" value="@VALUE@" min="2" label="n_splits" help="@HELP@"/> + <param argument="n_splits" type="integer" value="@VALUE@" min="1" label="n_splits" help="@HELP@"/> </xml> <xml name="cv_shuffle"> @@ -953,6 +960,40 @@ </section> </xml> + <xml name="train_test_split_params"> + <conditional name="split_algos"> + <param name="shuffle" type="select" label="Select the splitting method"> + <option value="None">No shuffle</option> + <option value="simple" selected="true">ShuffleSplit</option> + <option value="stratified">StratifiedShuffleSplit -- target values serve as class labels</option> + <option value="group">GroupShuffleSplit or split by group names</option> + </param> + <when value="None"> + <expand macro="train_test_split_test_size"/> + </when> + <when value="simple"> + <expand macro="train_test_split_test_size"/> + <expand macro="random_state"/> + </when> + <when value="stratified"> + <expand macro="train_test_split_test_size"/> + <expand macro="random_state"/> + </when> + <when value="group"> + <expand macro="train_test_split_test_size" optional="true"/> + <expand macro="random_state"/> + <param argument="group_names" type="text" value="" optional="true" label="Type in group names instead" + help="For example: chr6, chr7. This parameter is optional. If used, it will override the holdout size and random seed."/> + <yield/> + </when> + </conditional> + <!--param argument="train_size" type="float" optional="True" value="" label="Train size:"/>--> + </xml> + + <xml name="train_test_split_test_size" token_optional="false"> + <param name="test_size" type="float" value="0.2" optional="@OPTIONAL@" label="Holdout size" help="Leass than 1, for preportion; greater than 1 (integer), for number of samples."/> + </xml> + <xml name="feature_selection_algorithms"> <option value="SelectKBest" selected="true">SelectKBest - Select features according to the k highest scores</option> <option value="GenericUnivariateSelect">GenericUnivariateSelect - Univariate feature selector with configurable strategy</option> @@ -1167,7 +1208,7 @@ <xml name="model_validation_common_options"> <expand macro="cv"/> - <!-- expand macro="verbose"/> --> + <expand macro="verbose"/> <yield/> </xml> @@ -1286,14 +1327,13 @@ <xml name="search_cv_estimator"> <param name="infile_estimator" type="data" format="zip" label="Choose the dataset containing pipeline/estimator object"/> <section name="search_params_builder" title="Search parameters Builder" expanded="true"> - <param name="infile_params" type="data" format="tabular" label="Choose the dataset containing parameter names"/> + <param name="infile_params" type="data" format="tabular" optional="true" label="Choose the dataset containing parameter names" help="This dataset could be the output of `get_params` in the `Estimator Attributes` tool."/> <repeat name="param_set" min="1" max="30" title="Parameter settings for search:"> - <param name="sp_name" type="select" label="Choose a parameter name (with current value)"> + <param name="sp_name" type="select" optional="true" label="Choose a parameter name (with current value)"> <options from_dataset="infile_params" startswith="@"> <column name="name" index="2"/> <column name="value" index="1"/> <filter type="unique_value" name="unique_param" column="1"/> - <filter type="sort_by" name="sorted_param" column="2"/> </options> </param> <param name="sp_list" type="text" value="" optional="true" label="Search list" help="list or array-like, for example: [1, 10, 100, 1000], [True, False] and ['auto', 'sqrt', None]. See `help` section for more examples"> @@ -1310,6 +1350,30 @@ </section> </xml> + <xml name="estimator_and_hyperparameter"> + <param name="infile_estimator" type="data" format="zip" label="Choose the dataset containing pipeline/estimator object"/> + <section name="hyperparams_swapping" title="Hyperparameter Swapping" expanded="false"> + <param name="infile_params" type="data" format="tabular" optional="true" label="Choose the dataset containing hyperparameters for the pipeline/estimator above" help="This dataset could be the output of `get_params` in the `Estimator Attributes` tool."/> + <repeat name="param_set" min="1" max="30" title="New hyperparameter setting"> + <param name="sp_name" type="select" optional="true" label="Choose a parameter name (with current value)"> + <options from_dataset="infile_params" startswith="@"> + <column name="name" index="2"/> + <column name="value" index="1"/> + <filter type="unique_value" name="unique_param" column="1"/> + </options> + </param> + <param name="sp_value" type="text" value="" optional="true" label="New value" help="Supports int, float, boolean, single quoted string, and selected object constructor. Similar to the `Parameter settings for search` section in `searchcv` tool except that only single value is expected here."> + <sanitizer> + <valid initial="default"> + <add value="'"/> + <add value="""/> + </valid> + </sanitizer> + </param> + </repeat> + </section> + </xml> + <xml name="search_cv_options"> <expand macro="scoring_selection"/> <expand macro="model_validation_common_options"/> @@ -1750,6 +1814,40 @@ </conditional> </xml> + <xml name="stacking_voting_weights"> + <section name="options" title="Advanced Options" expanded="false"> + <param argument="weights" type="text" value="[]" optional="true" help="Sequence of weights (float or int). Uses uniform weights if None (`[]`)."> + <sanitizer> + <valid initial="default"> + <add value="["/> + <add value="]"/> + </valid> + </sanitizer> + </param> + <yield/> + </section> + </xml> + + <xml name="preprocessors_sequence_encoders"> + <conditional name="encoder_selection"> + <param name="encoder_type" type="select" label="Choose the sequence encoder class"> + <option value="GenomeOneHotEncoder">GenomeOneHotEncoder</option> + <option value="ProteinOneHotEncoder">ProteinOneHotEncoder</option> + </param> + <when value="GenomeOneHotEncoder"> + <expand macro="preprocessors_sequence_encoder_arguments"/> + </when> + <when value="ProteinOneHotEncoder"> + <expand macro="preprocessors_sequence_encoder_arguments"/> + </when> + </conditional> + </xml> + + <xml name="preprocessors_sequence_encoder_arguments"> + <param argument="seq_length" type="integer" value="" min="0" optional="true" help="Integer. Sequence length"/> + <param argument="padding" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" help="Whether to pad or truncate sequence to meet the sequence length."/> + </xml> + <!-- Outputs --> <xml name="output"> @@ -1847,7 +1945,7 @@ </citation> </xml> - <xml name="imblearn_citation"> + <xml name="imblearn_citation"> <citation type="bibtex"> @article{JMLR:v18:16-365, author = {Guillaume Lema{{\^i}}tre and Fernando Nogueira and Christos K. Aridas}, @@ -1862,4 +1960,19 @@ </citation> </xml> + <xml name="selene_citation"> + <citation type="bibtex"> + @article{chen2019selene, + title={Selene: a PyTorch-based deep learning library for sequence data}, + author={Chen, Kathleen M and Cofer, Evan M and Zhou, Jian and Troyanskaya, Olga G}, + journal={Nature methods}, + volume={16}, + number={4}, + pages={315}, + year={2019}, + publisher={Nature Publishing Group} + } + </citation> + </xml> + </macros>