Mercurial > repos > bgruening > sklearn_searchcv
comparison search_model_validation.xml @ 10:82b6104d4682 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
author | bgruening |
---|---|
date | Fri, 09 Aug 2019 07:12:16 -0400 |
parents | 1c4a241bef5c |
children | 68753d45815f |
comparison
equal
deleted
inserted
replaced
9:21d3e08b1a48 | 10:82b6104d4682 |
---|---|
4 <import>main_macros.xml</import> | 4 <import>main_macros.xml</import> |
5 </macros> | 5 </macros> |
6 <expand macro="python_requirements"/> | 6 <expand macro="python_requirements"/> |
7 <expand macro="macro_stdio"/> | 7 <expand macro="macro_stdio"/> |
8 <version_command>echo "@VERSION@"</version_command> | 8 <version_command>echo "@VERSION@"</version_command> |
9 <command> | 9 <command detect_errors="aggressive"> |
10 <![CDATA[ | 10 <![CDATA[ |
11 export HDF5_USE_FILE_LOCKING='FALSE'; | |
12 #if $input_options.selected_input == 'refseq_and_interval' | |
13 bgzip -c '$input_options.target_file' > '${target_file.element_identifier}.gz' && | |
14 tabix -p bed '${target_file.element_identifier}.gz' && | |
15 #end if | |
11 python '$__tool_directory__/search_model_validation.py' | 16 python '$__tool_directory__/search_model_validation.py' |
12 --inputs '$inputs' | 17 --inputs '$inputs' |
13 --estimator '$search_schemes.infile_estimator' | 18 --estimator '$search_schemes.infile_estimator' |
19 #if $input_options.selected_input == 'seq_fasta' | |
20 --fasta_path '$input_options.fasta_path' | |
21 #elif $input_options.selected_input == 'refseq_and_interval' | |
22 --ref_seq '$input_options.ref_genome_file' | |
23 --interval '$input_options.interval_file' | |
24 --targets "`pwd`/${target_file.element_identifier}.gz" | |
25 #else | |
14 --infile1 '$input_options.infile1' | 26 --infile1 '$input_options.infile1' |
27 #end if | |
15 --infile2 '$input_options.infile2' | 28 --infile2 '$input_options.infile2' |
16 --outfile_result '$outfile_result' | 29 --outfile_result "`pwd`/tmp_outfile_result" |
17 #if $save | 30 #if $save != 'nope' |
18 --outfile_object '$outfile_object' | 31 --outfile_object '$outfile_object' |
19 #end if | 32 #end if |
33 #if $save == 'save_weights' | |
34 --outfile_weights '$outfile_weights' | |
35 #end if | |
20 #if $search_schemes.options.cv_selector.selected_cv in ['GroupKFold', 'GroupShuffleSplit', 'LeaveOneGroupOut', 'LeavePGroupsOut'] | 36 #if $search_schemes.options.cv_selector.selected_cv in ['GroupKFold', 'GroupShuffleSplit', 'LeaveOneGroupOut', 'LeavePGroupsOut'] |
21 --groups '$inputs,$search_schemes.options.cv_selector.groups_selector.infile_g' | 37 --groups '$search_schemes.options.cv_selector.groups_selector.infile_g' |
22 #end if | 38 #end if |
39 >'$outfile_result' && cp tmp_outfile_result '$outfile_result'; | |
23 | 40 |
24 ]]> | 41 ]]> |
25 </command> | 42 </command> |
26 <configfiles> | 43 <configfiles> |
27 <inputs name="inputs" /> | 44 <inputs name="inputs" /> |
45 <param argument="n_iter" type="integer" value="10" label="Number of parameter settings that are sampled"/> | 62 <param argument="n_iter" type="integer" value="10" label="Number of parameter settings that are sampled"/> |
46 <expand macro="random_state"/> | 63 <expand macro="random_state"/> |
47 </section> | 64 </section> |
48 </when> | 65 </when> |
49 </conditional> | 66 </conditional> |
50 <param name="save" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Save the searchCV object"/> | |
51 <expand macro="sl_mixed_input"/> | 67 <expand macro="sl_mixed_input"/> |
52 <conditional name="train_test_split"> | 68 <conditional name="outer_split"> |
53 <param name="do_split" type="select" label="Whether to hold a portion of samples for test exclusively?" help="train_test_split"> | 69 <param name="split_mode" type="select" label="Whether to hold a portion of samples for test exclusively?" help="Nested CV or train_test_split"> |
54 <option value="no">Nope</option> | 70 <option value="no" selected="true">Nope</option> |
55 <option value="yes">Yes - I do</option> | 71 <option value="train_test_split">Yes - do a single train test split</option> |
72 <option value="nested_cv">Yes - do nested CV</option> | |
56 </param> | 73 </param> |
57 <when value='no'/> | 74 <when value='no'/> |
58 <when value='yes'> | 75 <when value='train_test_split'> |
59 <param argument="test_size" type="float" optional="True" value="0.25" label="Test size:"/> | 76 <param argument="test_size" type="float" optional="True" value="0.25" label="Test size:"/> |
60 <param argument="train_size" type="float" optional="True" value="" label="Train size:"/> | 77 <!--param argument="train_size" type="float" optional="True" value="" label="Train size:"/>--> |
61 <param argument="random_state" type="integer" optional="True" value="" label="Random seed number:"/> | 78 <param argument="random_state" type="integer" optional="True" value="" label="Random seed number:"/> |
62 <param argument="shuffle" type="select"> | 79 <param argument="shuffle" type="select"> |
63 <option value="None">None - No shuffle</option> | 80 <option value="None">None - No shuffle</option> |
64 <option value="simple">Shuffle -- for regression problems</option> | 81 <option value="simple">Shuffle -- for regression problems</option> |
65 <option value="stratified">StratifiedShuffle -- will use the target values as class labels</option> | 82 <option value="stratified">StratifiedShuffle -- will use the target values as class labels</option> |
66 <option value="group">GroupShuffle -- make sure group CV option is choosen</option> | 83 <option value="group">GroupShuffle -- make sure group CV option is choosen</option> |
67 </param> | 84 </param> |
68 </when> | 85 </when> |
86 <when value="nested_cv"> | |
87 <expand macro="cv_reduced" label="Select the outer cv splitter"/> | |
88 </when> | |
69 </conditional> | 89 </conditional> |
90 <param name="save" type="select" label="Save best estimator?" help="For security reason, deep learning models will be saved into two datasets, model skeleton and weights."> | |
91 <option value="nope" selected="true">Nope, save is unnecessary</option> | |
92 <option value="save_estimator">Fitted estimator (excluding deep learning)</option> | |
93 <option value="save_weights">Model skeleton and weights, for deep learning exclusively</option> | |
94 </param> | |
70 </inputs> | 95 </inputs> |
71 <outputs> | 96 <outputs> |
72 <data format="tabular" name="outfile_result"/> | 97 <data format="tabular" name="outfile_result"/> |
73 <data format="zip" name="outfile_object" label="${search_schemes.selected_search_scheme} on ${on_string}"> | 98 <data format="zip" name="outfile_object" label="Fitted estimator or estimator skeleton on ${on_string}"> |
74 <filter>save</filter> | 99 <filter>save != 'nope'</filter> |
100 </data> | |
101 <data format="h5" name="outfile_weights" label="Weights trained on ${on_string}"> | |
102 <filter>save == 'save_weights'</filter> | |
75 </data> | 103 </data> |
76 </outputs> | 104 </outputs> |
77 <tests> | 105 <tests> |
78 <test> | 106 <test> |
79 <param name="selected_search_scheme" value="GridSearchCV"/> | 107 <param name="selected_search_scheme" value="GridSearchCV"/> |
225 <param name="header1" value="true" /> | 253 <param name="header1" value="true" /> |
226 <param name="selected_column_selector_option" value="all_columns"/> | 254 <param name="selected_column_selector_option" value="all_columns"/> |
227 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 255 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> |
228 <param name="header2" value="true" /> | 256 <param name="header2" value="true" /> |
229 <param name="selected_column_selector_option2" value="all_columns"/> | 257 <param name="selected_column_selector_option2" value="all_columns"/> |
258 <param name="save" value="save_estimator"/> | |
230 <output name="outfile_object" file="searchCV01" compare="sim_size" delta="10"/> | 259 <output name="outfile_object" file="searchCV01" compare="sim_size" delta="10"/> |
231 </test> | 260 </test> |
232 <test> | 261 <test> |
233 <param name="selected_search_scheme" value="GridSearchCV"/> | 262 <param name="selected_search_scheme" value="GridSearchCV"/> |
234 <param name="infile_estimator" value="pipeline06" ftype="zip"/> | 263 <param name="infile_estimator" value="pipeline06" ftype="zip"/> |
329 <param name="header1" value="true" /> | 358 <param name="header1" value="true" /> |
330 <param name="selected_column_selector_option" value="all_columns"/> | 359 <param name="selected_column_selector_option" value="all_columns"/> |
331 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 360 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> |
332 <param name="header2" value="true" /> | 361 <param name="header2" value="true" /> |
333 <param name="selected_column_selector_option2" value="all_columns"/> | 362 <param name="selected_column_selector_option2" value="all_columns"/> |
363 <param name="save" value="save_estimator"/> | |
334 <output name="outfile_object" file="searchCV02" compare="sim_size" delta="10"/> | 364 <output name="outfile_object" file="searchCV02" compare="sim_size" delta="10"/> |
335 </test> | 365 </test> |
336 <test> | 366 <test> |
337 <param name="selected_search_scheme" value="GridSearchCV"/> | 367 <param name="selected_search_scheme" value="GridSearchCV"/> |
338 <param name="infile_estimator" value="pipeline03" ftype="zip"/> | 368 <param name="infile_estimator" value="pipeline03" ftype="zip"/> |
507 <test> | 537 <test> |
508 <param name="selected_search_scheme" value="GridSearchCV"/> | 538 <param name="selected_search_scheme" value="GridSearchCV"/> |
509 <param name="infile_estimator" value="pipeline09" ftype="zip"/> | 539 <param name="infile_estimator" value="pipeline09" ftype="zip"/> |
510 <param name="infile_params" value="get_params09.tabular" ftype="tabular"/> | 540 <param name="infile_params" value="get_params09.tabular" ftype="tabular"/> |
511 <repeat name="param_set"> | 541 <repeat name="param_set"> |
512 <param name="sp_list" value=": [None,'sk_prep_all', 8, 14, skrebate_ReliefF(n_features_to_select=12)]"/> | 542 <param name="sp_list" value=": [None,'sk_prep_all', 7, 13, skrebate_ReliefF(n_features_to_select=12)]"/> |
513 <param name="sp_name" value="relieff"/> | 543 <param name="sp_name" value="relieff"/> |
514 </repeat> | 544 </repeat> |
515 <repeat name="param_set"> | 545 <repeat name="param_set"> |
516 <param name="sp_list" value="[10]"/> | 546 <param name="sp_list" value="[10]"/> |
517 <param name="sp_name" value="randomforestregressor__random_state"/> | 547 <param name="sp_name" value="randomforestregressor__random_state"/> |
591 <has_n_columns n="13"/> | 621 <has_n_columns n="13"/> |
592 <has_text text="0.8149439619875293"/> | 622 <has_text text="0.8149439619875293"/> |
593 </assert_contents> | 623 </assert_contents> |
594 </output> | 624 </output> |
595 </test> | 625 </test> |
596 <!--test> | 626 <test> |
597 <conditional name="search_schemes"> | 627 <conditional name="search_schemes"> |
598 <param name="selected_search_scheme" value="GridSearchCV"/> | 628 <param name="selected_search_scheme" value="GridSearchCV"/> |
599 <param name="infile_estimator" value="pipeline05" ftype="zip"/> | 629 <param name="infile_estimator" value="pipeline05" ftype="zip"/> |
600 <section name="search_params_builder"> | 630 <section name="search_params_builder"> |
601 <param name="infile_params" value="get_params05.tabular" ftype="tabular"/> | 631 <param name="infile_params" value="get_params05.tabular" ftype="tabular"/> |
609 <param name="header1" value="true" /> | 639 <param name="header1" value="true" /> |
610 <param name="selected_column_selector_option" value="all_columns"/> | 640 <param name="selected_column_selector_option" value="all_columns"/> |
611 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 641 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> |
612 <param name="header2" value="true" /> | 642 <param name="header2" value="true" /> |
613 <param name="selected_column_selector_option2" value="all_columns"/> | 643 <param name="selected_column_selector_option2" value="all_columns"/> |
644 <conditional name="outer_split"> | |
645 <param name="split_mode" value="train_test_split"/> | |
646 <param name="shuffle" value="simple"/> | |
647 <param name="random_state" value="123"/> | |
648 </conditional> | |
614 <output name="outfile_result"> | 649 <output name="outfile_result"> |
615 <assert_contents> | 650 <assert_contents> |
616 <has_n_columns n="1"/> | 651 <has_n_columns n="1"/> |
617 <has_text text="0.7986842219788204" /> | 652 <has_text text="0.8124083594523798"/> |
618 </assert_contents> | 653 </assert_contents> |
619 </output> | 654 </output> |
620 </test--> | 655 </test> |
656 <test> | |
657 <conditional name="search_schemes"> | |
658 <param name="selected_search_scheme" value="GridSearchCV"/> | |
659 <param name="infile_estimator" value="pipeline05" ftype="zip"/> | |
660 <section name="search_params_builder"> | |
661 <param name="infile_params" value="get_params05.tabular" ftype="tabular"/> | |
662 <repeat name="param_set"> | |
663 <param name="sp_list" value="[10, 50, 100, 300]"/> | |
664 <param name="sp_name" value="randomforestregressor__n_estimators"/> | |
665 </repeat> | |
666 </section> | |
667 </conditional> | |
668 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | |
669 <param name="header1" value="true" /> | |
670 <param name="selected_column_selector_option" value="all_columns"/> | |
671 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | |
672 <param name="header2" value="true" /> | |
673 <param name="selected_column_selector_option2" value="all_columns"/> | |
674 <conditional name="outer_split"> | |
675 <param name="split_mode" value="nested_cv"/> | |
676 <conditional name="cv_selector"> | |
677 <param name='selected_cv' value="KFold"/> | |
678 <param name="n_splits" value="3"/> | |
679 <param name="shuffle" value="true" /> | |
680 <param name="random_state" value="123"/> | |
681 </conditional> | |
682 </conditional> | |
683 <output name="outfile_result"> | |
684 <assert_contents> | |
685 <has_n_columns n="4"/> | |
686 <has_text text="0.8044418936007722" /> | |
687 </assert_contents> | |
688 </output> | |
689 </test> | |
621 </tests> | 690 </tests> |
622 <help> | 691 <help> |
623 <![CDATA[ | 692 <![CDATA[ |
624 **What it does** | 693 **What it does** |
625 Searches optimized parameter settings for an estimator or pipeline through either exhaustive grid cross validation search or Randomized cross validation search. | 694 Searches optimized parameter settings for an estimator or pipeline through either exhaustive grid cross validation search or Randomized cross validation search. |
659 | 728 |
660 **Hot number/keyword for preprocessors**:: | 729 **Hot number/keyword for preprocessors**:: |
661 | 730 |
662 0 sklearn_preprocessing.StandardScaler(copy=True, with_mean=True, with_std=True) | 731 0 sklearn_preprocessing.StandardScaler(copy=True, with_mean=True, with_std=True) |
663 1 sklearn_preprocessing.Binarizer(copy=True, threshold=0.0) | 732 1 sklearn_preprocessing.Binarizer(copy=True, threshold=0.0) |
664 2 sklearn_preprocessing.Imputer(axis=0, copy=True, missing_values='NaN', strategy='mean', verbose=0) | 733 2 sklearn_preprocessing.MaxAbsScaler(copy=True) |
665 3 sklearn_preprocessing.MaxAbsScaler(copy=True) | 734 3 sklearn_preprocessing.Normalizer(copy=True, norm='l2') |
666 4 sklearn_preprocessing.Normalizer(copy=True, norm='l2') | 735 4 sklearn_preprocessing.MinMaxScaler(copy=True, feature_range=(0, 1)) |
667 5 sklearn_preprocessing.MinMaxScaler(copy=True, feature_range=(0, 1)) | 736 5 sklearn_preprocessing.PolynomialFeatures(degree=2, include_bias=True, interaction_only=False) |
668 6 sklearn_preprocessing.PolynomialFeatures(degree=2, include_bias=True, interaction_only=False) | 737 6 sklearn_preprocessing.RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True, with_scaling=True) |
669 7 sklearn_preprocessing.RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True, with_scaling=True) | 738 7 sklearn_feature_selection.SelectKBest(k=10, score_func=<function f_classif at 0x113806d90>) |
670 8 sklearn_feature_selection.SelectKBest(k=10, score_func=<function f_classif at 0x113806d90>) | 739 8 sklearn_feature_selection.GenericUnivariateSelect(mode='percentile', param=1e-05, score_func=<function f_classif at 0x113806d90>) |
671 9 sklearn_feature_selection.GenericUnivariateSelect(mode='percentile', param=1e-05, score_func=<function f_classif at 0x113806d90>) | 740 9 sklearn_feature_selection.SelectPercentile(percentile=10, score_func=<function f_classif at 0x113806d90>) |
672 10 sklearn_feature_selection.SelectPercentile(percentile=10, score_func=<function f_classif at 0x113806d90>) | 741 10 sklearn_feature_selection.SelectFpr(alpha=0.05, score_func=<function f_classif at 0x113806d90>) |
673 11 sklearn_feature_selection.SelectFpr(alpha=0.05, score_func=<function f_classif at 0x113806d90>) | 742 11 sklearn_feature_selection.SelectFdr(alpha=0.05, score_func=<function f_classif at 0x113806d90>) |
674 12 sklearn_feature_selection.SelectFdr(alpha=0.05, score_func=<function f_classif at 0x113806d90>) | 743 12 sklearn_feature_selection.SelectFwe(alpha=0.05, score_func=<function f_classif at 0x113806d90>) |
675 13 sklearn_feature_selection.SelectFwe(alpha=0.05, score_func=<function f_classif at 0x113806d90>) | 744 13 sklearn_feature_selection.VarianceThreshold(threshold=0.0) |
676 14 sklearn_feature_selection.VarianceThreshold(threshold=0.0) | 745 14 sklearn_decomposition.FactorAnalysis(copy=True, iterated_power=3, max_iter=1000, n_components=None, |
677 15 sklearn_decomposition.FactorAnalysis(copy=True, iterated_power=3, max_iter=1000, n_components=None, | |
678 noise_variance_init=None, random_state=0, svd_method='randomized', tol=0.01) | 746 noise_variance_init=None, random_state=0, svd_method='randomized', tol=0.01) |
679 16 sklearn_decomposition.FastICA(algorithm='parallel', fun='logcosh', fun_args=None, | 747 15 sklearn_decomposition.FastICA(algorithm='parallel', fun='logcosh', fun_args=None, |
680 max_iter=200, n_components=None, random_state=0, tol=0.0001, w_init=None, whiten=True) | 748 max_iter=200, n_components=None, random_state=0, tol=0.0001, w_init=None, whiten=True) |
681 17 sklearn_decomposition.IncrementalPCA(batch_size=None, copy=True, n_components=None, whiten=False) | 749 16 sklearn_decomposition.IncrementalPCA(batch_size=None, copy=True, n_components=None, whiten=False) |
682 18 sklearn_decomposition.KernelPCA(alpha=1.0, coef0=1, copy_X=True, degree=3, eigen_solver='auto', | 750 17 sklearn_decomposition.KernelPCA(alpha=1.0, coef0=1, copy_X=True, degree=3, eigen_solver='auto', |
683 fit_inverse_transform=False, gamma=None, kernel='linear', kernel_params=None, max_iter=None, | 751 fit_inverse_transform=False, gamma=None, kernel='linear', kernel_params=None, max_iter=None, |
684 n_components=None, random_state=0, remove_zero_eig=False, tol=0) | 752 n_components=None, random_state=0, remove_zero_eig=False, tol=0) |
685 19 sklearn_decomposition.LatentDirichletAllocation(batch_size=128, doc_topic_prior=None, evaluate_every=-1, learning_decay=0.7, | 753 18 sklearn_decomposition.LatentDirichletAllocation(batch_size=128, doc_topic_prior=None, evaluate_every=-1, learning_decay=0.7, |
686 learning_method=None, learning_offset=10.0, max_doc_update_iter=100, max_iter=10, mean_change_tol=0.001, n_components=10, | 754 learning_method=None, learning_offset=10.0, max_doc_update_iter=100, max_iter=10, mean_change_tol=0.001, n_components=10, |
687 n_topics=None, perp_tol=0.1, random_state=0, topic_word_prior=None, total_samples=1000000.0, verbose=0) | 755 n_topics=None, perp_tol=0.1, random_state=0, topic_word_prior=None, total_samples=1000000.0, verbose=0) |
688 20 sklearn_decomposition.MiniBatchDictionaryLearning(alpha=1, batch_size=3, dict_init=None, fit_algorithm='lars', | 756 19 sklearn_decomposition.MiniBatchDictionaryLearning(alpha=1, batch_size=3, dict_init=None, fit_algorithm='lars', |
689 n_components=None, n_iter=1000, random_state=0, shuffle=True, split_sign=False, transform_algorithm='omp', | 757 n_components=None, n_iter=1000, random_state=0, shuffle=True, split_sign=False, transform_algorithm='omp', |
690 transform_alpha=None, transform_n_nonzero_coefs=None, verbose=False) | 758 transform_alpha=None, transform_n_nonzero_coefs=None, verbose=False) |
691 21 sklearn_decomposition.MiniBatchSparsePCA(alpha=1, batch_size=3, callback=None, method='lars', n_components=None, | 759 20 sklearn_decomposition.MiniBatchSparsePCA(alpha=1, batch_size=3, callback=None, method='lars', n_components=None, |
692 n_iter=100, random_state=0, ridge_alpha=0.01, shuffle=True, verbose=False) | 760 n_iter=100, random_state=0, ridge_alpha=0.01, shuffle=True, verbose=False) |
693 22 sklearn_decomposition.NMF(alpha=0.0, beta_loss='frobenius', init=None, l1_ratio=0.0, max_iter=200, | 761 21 sklearn_decomposition.NMF(alpha=0.0, beta_loss='frobenius', init=None, l1_ratio=0.0, max_iter=200, |
694 n_components=None, random_state=0, shuffle=False, solver='cd', tol=0.0001, verbose=0) | 762 n_components=None, random_state=0, shuffle=False, solver='cd', tol=0.0001, verbose=0) |
695 23 sklearn_decomposition.PCA(copy=True, iterated_power='auto', n_components=None, random_state=0, svd_solver='auto', tol=0.0, whiten=False) | 763 22 sklearn_decomposition.PCA(copy=True, iterated_power='auto', n_components=None, random_state=0, svd_solver='auto', tol=0.0, whiten=False) |
696 24 sklearn_decomposition.SparsePCA(U_init=None, V_init=None, alpha=1, max_iter=1000, method='lars', | 764 23 sklearn_decomposition.SparsePCA(U_init=None, V_init=None, alpha=1, max_iter=1000, method='lars', |
697 n_components=None, random_state=0, ridge_alpha=0.01, tol=1e-08, verbose=False) | 765 n_components=None, random_state=0, ridge_alpha=0.01, tol=1e-08, verbose=False) |
698 25 sklearn_decomposition.TruncatedSVD(algorithm='randomized', n_components=2, n_iter=5, random_state=0, tol=0.0) | 766 24 sklearn_decomposition.TruncatedSVD(algorithm='randomized', n_components=2, n_iter=5, random_state=0, tol=0.0) |
699 26 sklearn_kernel_approximation.Nystroem(coef0=None, degree=None, gamma=None, kernel='rbf', | 767 25 sklearn_kernel_approximation.Nystroem(coef0=None, degree=None, gamma=None, kernel='rbf', |
700 kernel_params=None, n_components=100, random_state=0) | 768 kernel_params=None, n_components=100, random_state=0) |
701 27 sklearn_kernel_approximation.RBFSampler(gamma=1.0, n_components=100, random_state=0) | 769 26 sklearn_kernel_approximation.RBFSampler(gamma=1.0, n_components=100, random_state=0) |
702 28 sklearn_kernel_approximation.AdditiveChi2Sampler(sample_interval=None, sample_steps=2) | 770 27 sklearn_kernel_approximation.AdditiveChi2Sampler(sample_interval=None, sample_steps=2) |
703 29 sklearn_kernel_approximation.SkewedChi2Sampler(n_components=100, random_state=0, skewedness=1.0) | 771 28 sklearn_kernel_approximation.SkewedChi2Sampler(n_components=100, random_state=0, skewedness=1.0) |
704 30 sklearn_cluster.FeatureAgglomeration(affinity='euclidean', compute_full_tree='auto', connectivity=None, | 772 29 sklearn_cluster.FeatureAgglomeration(affinity='euclidean', compute_full_tree='auto', connectivity=None, |
705 linkage='ward', memory=None, n_clusters=2, pooling_func=<function mean at 0x113078ae8>) | 773 linkage='ward', memory=None, n_clusters=2, pooling_func=<function mean at 0x113078ae8>) |
706 31 skrebate_ReliefF(discrete_threshold=10, n_features_to_select=10, n_neighbors=100, verbose=False) | 774 30 skrebate_ReliefF(discrete_threshold=10, n_features_to_select=10, n_neighbors=100, verbose=False) |
707 32 skrebate_SURF(discrete_threshold=10, n_features_to_select=10, verbose=False) | 775 31 skrebate_SURF(discrete_threshold=10, n_features_to_select=10, verbose=False) |
708 33 skrebate_SURFstar(discrete_threshold=10, n_features_to_select=10, verbose=False) | 776 32 skrebate_SURFstar(discrete_threshold=10, n_features_to_select=10, verbose=False) |
709 34 skrebate_MultiSURF(discrete_threshold=10, n_features_to_select=10, verbose=False) | 777 33 skrebate_MultiSURF(discrete_threshold=10, n_features_to_select=10, verbose=False) |
710 35 skrebate_MultiSURFstar(discrete_threshold=10, n_features_to_select=10, verbose=False) | 778 34 skrebate_MultiSURFstar(discrete_threshold=10, n_features_to_select=10, verbose=False) |
711 'sk_prep_all': All sklearn preprocessing estimators, i.e., 0-7 | 779 'sk_prep_all': All sklearn preprocessing estimators, i.e., 0-6 |
712 'fs_all': All feature_selection estimators, i.e., 8-14 | 780 'fs_all': All feature_selection estimators, i.e., 7-13 |
713 'decomp_all': All decomposition estimators, i.e., 15-25 | 781 'decomp_all': All decomposition estimators, i.e., 14-24 |
714 'k_appr_all': All kernel_approximation estimators, i.e., 26-29 | 782 'k_appr_all': All kernel_approximation estimators, i.e., 25-28 |
715 'reb_all': All skrebate estimators, i.e., 31-35 | 783 'reb_all': All skrebate estimators, i.e., 30-34 |
716 'all_0': All except the imbalanced-learn samplers, i.e., 0-35 | 784 'all_0': All except the imbalanced-learn samplers, i.e., 0-34 |
717 'imb_all': All imbalanced-learn sampling methods, i.e., 36-54. | 785 'imb_all': All imbalanced-learn sampling methods, i.e., 35-53. |
718 **CAUTION**: Mix of imblearn and other preprocessors may not work. | 786 **CAUTION**: Mix of imblearn and other preprocessors may not work. |
719 None: opt out of preprocessor | 787 None: opt out of preprocessor |
720 | 788 |
721 Support mix (CAUTION: Mix of imblearn and other preprocessors may not work), e.g.:: | 789 Support mix (CAUTION: Mix of imblearn and other preprocessors may not work), e.g.:: |
722 | 790 |
723 : [None, 'sk_prep_all', 22, 'k_appr_all', sklearn_feature_selection.SelectKBest(k=50)] | 791 : [None, 'sk_prep_all', 21, 'k_appr_all', sklearn_feature_selection.SelectKBest(k=50)] |
724 | 792 |
725 | 793 |
726 | 794 |
727 **Whether to do train_test_split?** | 795 **Whether to do train_test_split?** |
728 | 796 |