Mercurial > repos > bgruening > sklearn_model_validation
diff main_macros.xml @ 1:02eadaaa4bf7 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 79fe42239dcf077b13f85cbcd6c6e30d7e1e4832
author | bgruening |
---|---|
date | Tue, 22 May 2018 19:33:14 -0400 |
parents | 333507faecab |
children | dd502cb0d567 |
line wrap: on
line diff
--- a/main_macros.xml Sat Apr 28 18:10:26 2018 -0400 +++ b/main_macros.xml Tue May 22 19:33:14 2018 -0400 @@ -2,12 +2,17 @@ <token name="@VERSION@">0.9</token> <token name="@COLUMNS_FUNCTION@"> -def read_columns(f, c, **args): +def read_columns(f, c, return_df=False, **args): data = pandas.read_csv(f, **args) cols = c.split (',') cols = map(int, cols) cols = list(map(lambda x: x - 1, cols)) - y = data.iloc[:,cols].values + data = data.iloc[:,cols] + y = data.values + if return_df: + return y, data + else: + return y return y </token> @@ -789,6 +794,128 @@ </when> <yield/> </xml> + <xml name="feature_selection_all"> + <conditional name="feature_selection_algorithms"> + <param name="selected_algorithm" type="select" label="Select a feature selection algorithm"> + <option value="SelectFromModel" selected="true">SelectFromModel - Meta-transformer for selecting features based on importance weights</option> + <option value="GenericUnivariateSelect" selected="true">GenericUnivariateSelect - Univariate feature selector with configurable strategy</option> + <option value="SelectPercentile">SelectPercentile - Select features according to a percentile of the highest scores</option> + <option value="SelectKBest">SelectKBest - Select features according to the k highest scores</option> + <option value="SelectFpr">SelectFpr - Filter: Select the p-values below alpha based on a FPR test</option> + <option value="SelectFdr">SelectFdr - Filter: Select the p-values for an estimated false discovery rate</option> + <option value="SelectFwe">SelectFwe - Filter: Select the p-values corresponding to Family-wise error rate</option> + <option value="RFE">RFE - Feature ranking with recursive feature elimination</option> + <option value="RFECV">RFECV - Feature ranking with recursive feature elimination and cross-validated selection of the best number of features</option> + <option value="VarianceThreshold">VarianceThreshold - Feature selector that removes all low-variance features</option> + <!--option value="chi2">Compute chi-squared stats between each non-negative feature and class</option--> + <!--option value="f_classif">Compute the ANOVA F-value for the provided sample</option--> + <!--option value="f_regression">Univariate linear regression tests</option--> + <!--option value="mutual_info_classif">Estimate mutual information for a discrete target variable</option--> + <!--option value="mutual_info_regression">Estimate mutual information for a continuous target variable</option--> + </param> + <when value="SelectFromModel"> + <expand macro="feature_selection_estimator" /> + <conditional name="extra_estimator"> + <expand macro="feature_selection_extra_estimator" > + <option value="no_load">No, I will load a prefitted estimator</option> + </expand> + <expand macro="feature_selection_estimator_choices" > + <when value="no_load"> + <param name="fitted_estimator" type="data" format='zip' label="Load a prefitted estimator" /> + </when> + </expand> + </conditional> + <section name="options" title="Other Options" expanded="True"> + <param argument="threshold" type="text" value="" optional="true" label="threshold" help="The threshold value to use for feature selection. e.g. 'mean', 'median', '1.25*mean'." /> + <param argument="norm_order" type="integer" value="1" label="norm_order" help="Order of the norm used to filter the vectors of coefficients below threshold in the case where the coef_ attribute of the estimator is of dimension 2. " /> + </section> + </when> + <when value="GenericUnivariateSelect"> + <expand macro="feature_selection_score_function" /> + <section name="options" title="Other Options" expanded="True"> + <param argument="mode" type="select" label="Feature selection mode"> + <option value="percentile">percentile</option> + <option value="k_best">k_best</option> + <option value="fpr">fpr</option> + <option value="fdr">fdr</option> + <option value="fwe">fwe</option> + </param> + <param argument="param" type="float" value="" optional="true" label="Parameter of the corresponding mode" help="float or int depending on the feature selection mode" /> + </section> + </when> + <when value="SelectPercentile"> + <expand macro="feature_selection_score_function" /> + <section name="options" title="Other Options" expanded="True"> + <param argument="percentile" type="integer" value="10" optional="True" label="Percent of features to keep" /> + </section> + </when> + <when value="SelectKBest"> + <expand macro="feature_selection_score_function" /> + <section name="options" title="Other Options" expanded="True"> + <param argument="k" type="integer" value="10" optional="True" label="Number of top features to select" help="No 'all' option is supported." /> + </section> + </when> + <when value="SelectFpr"> + <expand macro="feature_selection_score_function" /> + <section name="options" title="Other Options" expanded="True"> + <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest p-value for features to be kept."/> + </section> + </when> + <when value="SelectFdr"> + <expand macro="feature_selection_score_function" /> + <section name="options" title="Other Options" expanded="True"> + <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest uncorrected p-value for features to keep."/> + </section> + </when> + <when value="SelectFwe"> + <expand macro="feature_selection_score_function" /> + <section name="options" title="Other Options" expanded="True"> + <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest uncorrected p-value for features to keep."/> + </section> + </when> + <when value="RFE"> + <expand macro="feature_selection_estimator" /> + <conditional name="extra_estimator"> + <expand macro="feature_selection_extra_estimator" /> + <expand macro="feature_selection_estimator_choices" /> + </conditional> + <section name="options" title="Other Options" expanded="True"> + <param argument="n_features_to_select" type="integer" value="" optional="true" label="n_features_to_select" help="The number of features to select. If None, half of the features are selected." /> + <param argument="step" type="float" value="1" label="step" optional="true" help="Default = 1. " /> + <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." /> + </section> + </when> + <when value="RFECV"> + <expand macro="feature_selection_estimator" /> + <conditional name="extra_estimator"> + <expand macro="feature_selection_extra_estimator" /> + <expand macro="feature_selection_estimator_choices" /> + </conditional> + <section name="options" title="Other Options" expanded="True"> + <param argument="step" type="float" value="1" label="step" optional="true" help="Default = 1. " /> + <param argument="cv" type="integer" value="" optional="true" label="cv" help="Determines the cross-validation splitting strategy" /> + <param argument="scoring" type="text" value="" optional="true" label="scoring" help="A string (see model evaluation documentation) or a scorer callable object / function with signature scorer(estimator, X, y)."/> + <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." /> + <param argument="n_jobs" type="integer" value="1" label="n_jobs" help="Number of cores to run in parallel while fitting across folds. Defaults to 1 core."/> + </section> + </when> + <when value="VarianceThreshold"> + <section name="options" title="Options" expanded="True"> + <param argument="threshold" type="float" value="" optional="True" label="Threshold" help="Features with a training-set variance lower than this threshold will be removed."/> + </section> + </when> + <!--when value="chi2"> + </when> + <when value="f_classif"> + </when> + <when value="f_regression"> + </when> + <when value="mutual_info_classif"> + </when> + <when value="mutual_info_regression"> + </when--> + </conditional> + </xml> <xml name="feature_selection_score_function"> <param argument="score_func" type="select" label="Select a score function"> <option value="chi2">chi2 - Compute chi-squared stats between each non-negative feature and class</option>