Mercurial > repos > bgruening > sklearn_model_validation
comparison main_macros.xml @ 8:fd7a054ffdbd draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit f54ff2ba2f8e7542d68966ce5a6b17d7f624ac48
author | bgruening |
---|---|
date | Fri, 13 Jul 2018 03:56:45 -0400 |
parents | 8518fdc23c7e |
children | c6b3efcba7bd |
comparison
equal
deleted
inserted
replaced
7:57a7471292df | 8:fd7a054ffdbd |
---|---|
33 | 33 |
34 if inputs['selected_algorithm'] == 'SelectFromModel': | 34 if inputs['selected_algorithm'] == 'SelectFromModel': |
35 if not options['threshold'] or options['threshold'] == 'None': | 35 if not options['threshold'] or options['threshold'] == 'None': |
36 options['threshold'] = None | 36 options['threshold'] = None |
37 if 'extra_estimator' in inputs and inputs['extra_estimator']['has_estimator'] == 'no_load': | 37 if 'extra_estimator' in inputs and inputs['extra_estimator']['has_estimator'] == 'no_load': |
38 fitted_estimator = pickle.load(open("inputs['extra_estimator']['fitted_estimator']", 'r')) | 38 with open("inputs['extra_estimator']['fitted_estimator']", 'rb') as model_handler: |
39 fitted_estimator = pickle.load(model_handler) | |
39 new_selector = selector(fitted_estimator, prefit=True, **options) | 40 new_selector = selector(fitted_estimator, prefit=True, **options) |
40 else: | 41 else: |
41 estimator=inputs["estimator"] | 42 estimator=inputs["estimator"] |
42 if inputs["extra_estimator"]["has_estimator"]=='no': | 43 if inputs["extra_estimator"]["has_estimator"]=='no': |
43 estimator=inputs["extra_estimator"]["new_estimator"] | 44 estimator=inputs["extra_estimator"]["new_estimator"] |
81 sep='\t', | 82 sep='\t', |
82 header=header, | 83 header=header, |
83 parse_dates=True | 84 parse_dates=True |
84 ) | 85 ) |
85 else: | 86 else: |
86 X = mmread(open(file1, 'r')) | 87 X = mmread(file1) |
87 | 88 |
88 header = 'infer' if params["selected_tasks"]["selected_algorithms"]["input_options"]["header2"] else None | 89 header = 'infer' if params["selected_tasks"]["selected_algorithms"]["input_options"]["header2"] else None |
89 column_option = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_2"]["selected_column_selector_option2"] | 90 column_option = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_2"]["selected_column_selector_option2"] |
90 if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]: | 91 if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]: |
91 c = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_2"]["col2"] | 92 c = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_2"]["col2"] |
430 <param argument="beta" type="float" value="1.0" label="The strength of recall versus precision in the F-score" help=" "/> | 431 <param argument="beta" type="float" value="1.0" label="The strength of recall versus precision in the F-score" help=" "/> |
431 </xml> | 432 </xml> |
432 | 433 |
433 | 434 |
434 <!--Data interface--> | 435 <!--Data interface--> |
435 <xml name="tabular_input"> | |
436 <param name="infile" type="data" format="tabular" label="Data file with numeric values"/> | |
437 <param name="start_column" type="data_column" data_ref="infile" optional="True" label="Select a subset of data. Start column:" /> | |
438 <param name="end_column" type="data_column" data_ref="infile" optional="True" label="End column:" /> | |
439 </xml> | |
440 | |
441 <xml name="sample_cols" token_label1="File containing true class labels:" token_label2="File containing predicted class labels:" token_multiple1="False" token_multiple2="False" token_format1="tabular" token_format2="tabular" token_help1="" token_help2=""> | |
442 <param name="infile1" type="data" format="@FORMAT1@" label="@LABEL1@" help="@HELP1@"/> | |
443 <param name="col1" multiple="@MULTIPLE1@" type="data_column" data_ref="infile1" label="Select target column(s):"/> | |
444 <param name="infile2" type="data" format="@FORMAT2@" label="@LABEL2@" help="@HELP2@"/> | |
445 <param name="col2" multiple="@MULTIPLE2@" type="data_column" data_ref="infile2" label="Select target column(s):"/> | |
446 <yield/> | |
447 </xml> | |
448 | 436 |
449 <xml name="samples_tabular" token_multiple1="false" token_multiple2="false"> | 437 <xml name="samples_tabular" token_multiple1="false" token_multiple2="false"> |
450 <param name="infile1" type="data" format="tabular" label="Training samples dataset:"/> | 438 <param name="infile1" type="data" format="tabular" label="Training samples dataset:"/> |
451 <param name="header1" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" /> | 439 <param name="header1" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" /> |
452 <conditional name="column_selector_options_1"> | 440 <conditional name="column_selector_options_1"> |
470 </param> | 458 </param> |
471 <when value="by_index_number"> | 459 <when value="by_index_number"> |
472 <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" data_ref="@INFILE@" label="Select target column(s):"/> | 460 <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" data_ref="@INFILE@" label="Select target column(s):"/> |
473 </when> | 461 </when> |
474 <when value="by_header_name"> | 462 <when value="by_header_name"> |
475 <param name="@COL_NAME@" type="text" value="" label="Type header name(s):" help="String seperate by colon. For example: target1,target2"/> | 463 <param name="@COL_NAME@" type="text" value="" label="Type header name(s):" help="Comma-separated string. For example: target1,target2"/> |
476 </when> | 464 </when> |
477 <when value="all_but_by_index_number"> | 465 <when value="all_but_by_index_number"> |
478 <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" data_ref="@INFILE@" label="Select target column(s):"/> | 466 <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" data_ref="@INFILE@" label="Select target column(s):"/> |
479 </when> | 467 </when> |
480 <when value="all_but_by_header_name"> | 468 <when value="all_but_by_header_name"> |
481 <param name="@COL_NAME@" type="text" value="" label="Type header name(s):" help="String seperate by colon. For example: target1,target2"/> | 469 <param name="@COL_NAME@" type="text" value="" label="Type header name(s):" help="Comma-separated string. For example: target1,target2"/> |
482 </when> | 470 </when> |
483 <when value="all_columns"> | 471 <when value="all_columns"> |
484 </when> | 472 </when> |
485 </xml> | 473 </xml> |
486 | 474 |
550 <when value="sparse"> | 538 <when value="sparse"> |
551 <expand macro="sparse_target"/> | 539 <expand macro="sparse_target"/> |
552 </when> | 540 </when> |
553 </conditional> | 541 </conditional> |
554 </xml> | 542 </xml> |
555 | |
556 <xml name="multitype_input" token_format="tabular" token_help="All datasets with tabular format are supporetd."> | |
557 <param name="infile_transform" type="data" format="@FORMAT@" label="Select a dataset to transform:" help="@HELP@"/> | |
558 </xml> | |
559 | |
560 | 543 |
561 <!--Advanced options--> | 544 <!--Advanced options--> |
562 <xml name="nn_advanced_options"> | 545 <xml name="nn_advanced_options"> |
563 <section name="options" title="Advanced Options" expanded="False"> | 546 <section name="options" title="Advanced Options" expanded="False"> |
564 <yield/> | 547 <yield/> |
820 <option value="Normalizer">Normalizer (Normalizes samples individually to unit norm)</option> | 803 <option value="Normalizer">Normalizer (Normalizes samples individually to unit norm)</option> |
821 <yield/> | 804 <yield/> |
822 </param> | 805 </param> |
823 </xml> | 806 </xml> |
824 | 807 |
808 <xml name="sparse_preprocessors_ext"> | |
809 <expand macro="sparse_preprocessors"> | |
810 <option value="KernelCenterer">Kernel Centerer (Centers a kernel matrix)</option> | |
811 <option value="MinMaxScaler">Minmax Scaler (Scales features to a range)</option> | |
812 <option value="PolynomialFeatures">Polynomial Features (Generates polynomial and interaction features)</option> | |
813 <option value="RobustScaler">Robust Scaler (Scales features using outlier-invariance statistics)</option> | |
814 </expand> | |
815 </xml> | |
816 | |
825 <xml name="sparse_preprocessor_options"> | 817 <xml name="sparse_preprocessor_options"> |
826 <when value="Binarizer"> | 818 <when value="Binarizer"> |
827 <expand macro="multitype_input" format="tabular,txt" help="Tabular and sparse datasets are supporetd."/> | |
828 <section name="options" title="Advanced Options" expanded="False"> | 819 <section name="options" title="Advanced Options" expanded="False"> |
829 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" | 820 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" |
830 label="Use a copy of data for precomputing binarization" help=" "/> | 821 label="Use a copy of data for precomputing binarization" help=" "/> |
831 <param argument="threshold" type="float" optional="true" value="0.0" | 822 <param argument="threshold" type="float" optional="true" value="0.0" |
832 label="Threshold" | 823 label="Threshold" |
833 help="Feature values below or equal to this are replaced by 0, above it by 1. Threshold may not be less than 0 for operations on sparse matrices. "/> | 824 help="Feature values below or equal to this are replaced by 0, above it by 1. Threshold may not be less than 0 for operations on sparse matrices. "/> |
834 </section> | 825 </section> |
835 </when> | 826 </when> |
836 <when value="Imputer"> | 827 <when value="Imputer"> |
837 <expand macro="multitype_input" format="tabular,txt" help="Tabular and sparse datasets are supporetd."/> | |
838 <section name="options" title="Advanced Options" expanded="False"> | 828 <section name="options" title="Advanced Options" expanded="False"> |
839 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" | 829 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" |
840 label="Use a copy of data for precomputing imputation" help=" "/> | 830 label="Use a copy of data for precomputing imputation" help=" "/> |
841 <param argument="strategy" type="select" optional="true" label="Imputation strategy" help=" "> | 831 <param argument="strategy" type="select" optional="true" label="Imputation strategy" help=" "> |
842 <option value="mean" selected="true">Replace missing values using the mean along the axis</option> | 832 <option value="mean" selected="true">Replace missing values using the mean along the axis</option> |
852 <option value="1">Impute along rows</option> | 842 <option value="1">Impute along rows</option> |
853 </param--> | 843 </param--> |
854 </section> | 844 </section> |
855 </when> | 845 </when> |
856 <when value="StandardScaler"> | 846 <when value="StandardScaler"> |
857 <expand macro="multitype_input"/> | |
858 <section name="options" title="Advanced Options" expanded="False"> | 847 <section name="options" title="Advanced Options" expanded="False"> |
859 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" | 848 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" |
860 label="Use a copy of data for performing inplace scaling" help=" "/> | 849 label="Use a copy of data for performing inplace scaling" help=" "/> |
861 <param argument="with_mean" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" | 850 <param argument="with_mean" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" |
862 label="Center the data before scaling" help=" "/> | 851 label="Center the data before scaling" help=" "/> |
863 <param argument="with_std" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" | 852 <param argument="with_std" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" |
864 label="Scale the data to unit variance (or unit standard deviation)" help=" "/> | 853 label="Scale the data to unit variance (or unit standard deviation)" help=" "/> |
865 </section> | 854 </section> |
866 </when> | 855 </when> |
867 <when value="MaxAbsScaler"> | 856 <when value="MaxAbsScaler"> |
868 <expand macro="multitype_input" format="tabular,txt" help="Tabular and sparse datasets are supporetd."/> | |
869 <section name="options" title="Advanced Options" expanded="False"> | 857 <section name="options" title="Advanced Options" expanded="False"> |
870 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" | 858 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" |
871 label="Use a copy of data for precomputing scaling" help=" "/> | 859 label="Use a copy of data for precomputing scaling" help=" "/> |
872 </section> | 860 </section> |
873 </when> | 861 </when> |
874 <when value="Normalizer"> | 862 <when value="Normalizer"> |
875 <expand macro="multitype_input" format="tabular,txt" help="Tabular and sparse datasets are supporetd."/> | |
876 <section name="options" title="Advanced Options" expanded="False"> | 863 <section name="options" title="Advanced Options" expanded="False"> |
877 <param argument="norm" type="select" optional="true" label="The norm to use to normalize non zero samples" help=" "> | 864 <param argument="norm" type="select" optional="true" label="The norm to use to normalize non zero samples" help=" "> |
878 <option value="l1" selected="true">l1</option> | 865 <option value="l1" selected="true">l1</option> |
879 <option value="l2">l2</option> | 866 <option value="l2">l2</option> |
880 <option value="max">max</option> | 867 <option value="max">max</option> |
883 label="Use a copy of data for precomputing row normalization" help=" "/> | 870 label="Use a copy of data for precomputing row normalization" help=" "/> |
884 </section> | 871 </section> |
885 </when> | 872 </when> |
886 <yield/> | 873 <yield/> |
887 </xml> | 874 </xml> |
875 | |
876 <xml name="sparse_preprocessor_options_ext"> | |
877 <expand macro="sparse_preprocessor_options"> | |
878 <when value="KernelCenterer"> | |
879 <section name="options" title="Advanced Options" expanded="False"> | |
880 </section> | |
881 </when> | |
882 <when value="MinMaxScaler"> | |
883 <section name="options" title="Advanced Options" expanded="False"> | |
884 <!--feature_range--> | |
885 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" | |
886 label="Use a copy of data for precomputing normalization" help=" "/> | |
887 </section> | |
888 </when> | |
889 <when value="PolynomialFeatures"> | |
890 <section name="options" title="Advanced Options" expanded="False"> | |
891 <param argument="degree" type="integer" optional="true" value="2" label="The degree of the polynomial features " help=""/> | |
892 <param argument="interaction_only" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="false" label="Produce interaction features only" help="(Features that are products of at most degree distinct input features) "/> | |
893 <param argument="include_bias" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Include a bias column" help="Feature in which all polynomial powers are zero "/> | |
894 </section> | |
895 </when> | |
896 <when value="RobustScaler"> | |
897 <section name="options" title="Advanced Options" expanded="False"> | |
898 <!--=True, =True, copy=True--> | |
899 <param argument="with_centering" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" | |
900 label="Center the data before scaling" help=" "/> | |
901 <param argument="with_scaling" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" | |
902 label="Scale the data to interquartile range" help=" "/> | |
903 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" | |
904 label="Use a copy of data for inplace scaling" help=" "/> | |
905 </section> | |
906 </when> | |
907 </expand> | |
908 </xml> | |
909 | |
888 <xml name="estimator_input_no_fit"> | 910 <xml name="estimator_input_no_fit"> |
889 <expand macro="feature_selection_estimator" /> | 911 <expand macro="feature_selection_estimator" /> |
890 <conditional name="extra_estimator"> | 912 <conditional name="extra_estimator"> |
891 <expand macro="feature_selection_extra_estimator" /> | 913 <expand macro="feature_selection_extra_estimator" /> |
892 <expand macro="feature_selection_estimator_choices" /> | 914 <expand macro="feature_selection_estimator_choices" /> |
893 </conditional> | 915 </conditional> |
894 </xml> | 916 </xml> |
917 | |
895 <xml name="feature_selection_all"> | 918 <xml name="feature_selection_all"> |
896 <conditional name="feature_selection_algorithms"> | 919 <conditional name="feature_selection_algorithms"> |
897 <param name="selected_algorithm" type="select" label="Select a feature selection algorithm"> | 920 <param name="selected_algorithm" type="select" label="Select a feature selection algorithm"> |
898 <option value="SelectFromModel" selected="true">SelectFromModel - Meta-transformer for selecting features based on importance weights</option> | 921 <option value="SelectFromModel" selected="true">SelectFromModel - Meta-transformer for selecting features based on importance weights</option> |
899 <option value="GenericUnivariateSelect" selected="true">GenericUnivariateSelect - Univariate feature selector with configurable strategy</option> | 922 <option value="GenericUnivariateSelect" selected="true">GenericUnivariateSelect - Univariate feature selector with configurable strategy</option> |
1012 </when> | 1035 </when> |
1013 <when value="mutual_info_regression"> | 1036 <when value="mutual_info_regression"> |
1014 </when--> | 1037 </when--> |
1015 </conditional> | 1038 </conditional> |
1016 </xml> | 1039 </xml> |
1040 | |
1017 <xml name="feature_selection_score_function"> | 1041 <xml name="feature_selection_score_function"> |
1018 <param argument="score_func" type="select" label="Select a score function"> | 1042 <param argument="score_func" type="select" label="Select a score function"> |
1019 <option value="chi2">chi2 - Compute chi-squared stats between each non-negative feature and class</option> | 1043 <option value="chi2">chi2 - Compute chi-squared stats between each non-negative feature and class</option> |
1020 <option value="f_classif">f_classif - Compute the ANOVA F-value for the provided sample</option> | 1044 <option value="f_classif">f_classif - Compute the ANOVA F-value for the provided sample</option> |
1021 <option value="f_regression">f_regression - Univariate linear regression tests</option> | 1045 <option value="f_regression">f_regression - Univariate linear regression tests</option> |
1022 <option value="mutual_info_classif">mutual_info_classif - Estimate mutual information for a discrete target variable</option> | 1046 <option value="mutual_info_classif">mutual_info_classif - Estimate mutual information for a discrete target variable</option> |
1023 <option value="mutual_info_regression">mutual_info_regression - Estimate mutual information for a continuous target variable</option> | 1047 <option value="mutual_info_regression">mutual_info_regression - Estimate mutual information for a continuous target variable</option> |
1024 </param> | 1048 </param> |
1025 </xml> | 1049 </xml> |
1050 | |
1026 <xml name="feature_selection_estimator"> | 1051 <xml name="feature_selection_estimator"> |
1027 <param argument="estimator" type="select" label="Select an estimator" help="The base estimator from which the transformer is built."> | 1052 <param argument="estimator" type="select" label="Select an estimator" help="The base estimator from which the transformer is built."> |
1028 <option value="svm.SVR(kernel="linear")">svm.SVR(kernel="linear")</option> | 1053 <option value="svm.SVR(kernel="linear")">svm.SVR(kernel="linear")</option> |
1029 <option value="svm.SVC(kernel="linear")">svm.SVC(kernel="linear")</option> | 1054 <option value="svm.SVC(kernel="linear")">svm.SVC(kernel="linear")</option> |
1030 <option value="svm.LinearSVC(penalty="l1", dual=False, tol=1e-3)">svm.LinearSVC(penalty="l1", dual=False, tol=1e-3)</option> | 1055 <option value="svm.LinearSVC(penalty="l1", dual=False, tol=1e-3)">svm.LinearSVC(penalty="l1", dual=False, tol=1e-3)</option> |
1031 <option value="linear_model.LassoCV()">linear_model.LassoCV()</option> | 1056 <option value="linear_model.LassoCV()">linear_model.LassoCV()</option> |
1032 <option value="ensemble.RandomForestRegressor(n_estimators = 1000, random_state = 42)">ensemble.RandomForestRegressor(n_estimators = 1000, random_state = 42)</option> | 1057 <option value="ensemble.RandomForestRegressor(n_estimators = 1000, random_state = 42)">ensemble.RandomForestRegressor(n_estimators = 1000, random_state = 42)</option> |
1033 </param> | 1058 </param> |
1034 </xml> | 1059 </xml> |
1060 | |
1035 <xml name="feature_selection_extra_estimator"> | 1061 <xml name="feature_selection_extra_estimator"> |
1036 <param name="has_estimator" type="select" label="Does your estimator on the list above?"> | 1062 <param name="has_estimator" type="select" label="Does your estimator on the list above?"> |
1037 <option value="yes">Yes, my estimator is on the list</option> | 1063 <option value="yes">Yes, my estimator is on the list</option> |
1038 <option value="no">No, I need make a new estimator</option> | 1064 <option value="no">No, I need make a new estimator</option> |
1039 <yield/> | 1065 <yield/> |
1040 </param> | 1066 </param> |
1041 </xml> | 1067 </xml> |
1068 | |
1042 <xml name="feature_selection_estimator_choices"> | 1069 <xml name="feature_selection_estimator_choices"> |
1043 <when value="yes"> | 1070 <when value="yes"> |
1044 </when> | 1071 </when> |
1045 <when value="no"> | 1072 <when value="no"> |
1046 <param name="new_estimator" type="text" value="" label="Make a new estimator" /> | 1073 <param name="new_estimator" type="text" value="" label="Make a new estimator" /> |
1047 </when> | 1074 </when> |
1048 <yield/> | 1075 <yield/> |
1049 </xml> | 1076 </xml> |
1077 | |
1050 <xml name="feature_selection_methods"> | 1078 <xml name="feature_selection_methods"> |
1051 <conditional name="select_methods"> | 1079 <conditional name="select_methods"> |
1052 <param name="selected_method" type="select" label="Select an operation"> | 1080 <param name="selected_method" type="select" label="Select an operation"> |
1053 <option value="fit_transform">fit_transform - Fit to data, then transform it</option> | 1081 <option value="fit_transform">fit_transform - Fit to data, then transform it</option> |
1054 <option value="get_support">get_support - Get a mask, or integer index, of the features selected</option> | 1082 <option value="get_support">get_support - Get a mask, or integer index, of the features selected</option> |