comparison main_macros.xml @ 15:8da7dc3f4e66 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit f54ff2ba2f8e7542d68966ce5a6b17d7f624ac48
author bgruening
date Fri, 13 Jul 2018 03:55:56 -0400
parents af7c23a25c55
children b5d22365febb
comparison
equal deleted inserted replaced
14:830b48bb1617 15:8da7dc3f4e66
33 33
34 if inputs['selected_algorithm'] == 'SelectFromModel': 34 if inputs['selected_algorithm'] == 'SelectFromModel':
35 if not options['threshold'] or options['threshold'] == 'None': 35 if not options['threshold'] or options['threshold'] == 'None':
36 options['threshold'] = None 36 options['threshold'] = None
37 if 'extra_estimator' in inputs and inputs['extra_estimator']['has_estimator'] == 'no_load': 37 if 'extra_estimator' in inputs and inputs['extra_estimator']['has_estimator'] == 'no_load':
38 fitted_estimator = pickle.load(open("inputs['extra_estimator']['fitted_estimator']", 'r')) 38 with open("inputs['extra_estimator']['fitted_estimator']", 'rb') as model_handler:
39 fitted_estimator = pickle.load(model_handler)
39 new_selector = selector(fitted_estimator, prefit=True, **options) 40 new_selector = selector(fitted_estimator, prefit=True, **options)
40 else: 41 else:
41 estimator=inputs["estimator"] 42 estimator=inputs["estimator"]
42 if inputs["extra_estimator"]["has_estimator"]=='no': 43 if inputs["extra_estimator"]["has_estimator"]=='no':
43 estimator=inputs["extra_estimator"]["new_estimator"] 44 estimator=inputs["extra_estimator"]["new_estimator"]
81 sep='\t', 82 sep='\t',
82 header=header, 83 header=header,
83 parse_dates=True 84 parse_dates=True
84 ) 85 )
85 else: 86 else:
86 X = mmread(open(file1, 'r')) 87 X = mmread(file1)
87 88
88 header = 'infer' if params["selected_tasks"]["selected_algorithms"]["input_options"]["header2"] else None 89 header = 'infer' if params["selected_tasks"]["selected_algorithms"]["input_options"]["header2"] else None
89 column_option = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_2"]["selected_column_selector_option2"] 90 column_option = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_2"]["selected_column_selector_option2"]
90 if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]: 91 if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]:
91 c = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_2"]["col2"] 92 c = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_2"]["col2"]
430 <param argument="beta" type="float" value="1.0" label="The strength of recall versus precision in the F-score" help=" "/> 431 <param argument="beta" type="float" value="1.0" label="The strength of recall versus precision in the F-score" help=" "/>
431 </xml> 432 </xml>
432 433
433 434
434 <!--Data interface--> 435 <!--Data interface-->
435 <xml name="tabular_input">
436 <param name="infile" type="data" format="tabular" label="Data file with numeric values"/>
437 <param name="start_column" type="data_column" data_ref="infile" optional="True" label="Select a subset of data. Start column:" />
438 <param name="end_column" type="data_column" data_ref="infile" optional="True" label="End column:" />
439 </xml>
440
441 <xml name="sample_cols" token_label1="File containing true class labels:" token_label2="File containing predicted class labels:" token_multiple1="False" token_multiple2="False" token_format1="tabular" token_format2="tabular" token_help1="" token_help2="">
442 <param name="infile1" type="data" format="@FORMAT1@" label="@LABEL1@" help="@HELP1@"/>
443 <param name="col1" multiple="@MULTIPLE1@" type="data_column" data_ref="infile1" label="Select target column(s):"/>
444 <param name="infile2" type="data" format="@FORMAT2@" label="@LABEL2@" help="@HELP2@"/>
445 <param name="col2" multiple="@MULTIPLE2@" type="data_column" data_ref="infile2" label="Select target column(s):"/>
446 <yield/>
447 </xml>
448 436
449 <xml name="samples_tabular" token_multiple1="false" token_multiple2="false"> 437 <xml name="samples_tabular" token_multiple1="false" token_multiple2="false">
450 <param name="infile1" type="data" format="tabular" label="Training samples dataset:"/> 438 <param name="infile1" type="data" format="tabular" label="Training samples dataset:"/>
451 <param name="header1" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" /> 439 <param name="header1" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" />
452 <conditional name="column_selector_options_1"> 440 <conditional name="column_selector_options_1">
470 </param> 458 </param>
471 <when value="by_index_number"> 459 <when value="by_index_number">
472 <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" data_ref="@INFILE@" label="Select target column(s):"/> 460 <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" data_ref="@INFILE@" label="Select target column(s):"/>
473 </when> 461 </when>
474 <when value="by_header_name"> 462 <when value="by_header_name">
475 <param name="@COL_NAME@" type="text" value="" label="Type header name(s):" help="String seperate by colon. For example: target1,target2"/> 463 <param name="@COL_NAME@" type="text" value="" label="Type header name(s):" help="Comma-separated string. For example: target1,target2"/>
476 </when> 464 </when>
477 <when value="all_but_by_index_number"> 465 <when value="all_but_by_index_number">
478 <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" data_ref="@INFILE@" label="Select target column(s):"/> 466 <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" data_ref="@INFILE@" label="Select target column(s):"/>
479 </when> 467 </when>
480 <when value="all_but_by_header_name"> 468 <when value="all_but_by_header_name">
481 <param name="@COL_NAME@" type="text" value="" label="Type header name(s):" help="String seperate by colon. For example: target1,target2"/> 469 <param name="@COL_NAME@" type="text" value="" label="Type header name(s):" help="Comma-separated string. For example: target1,target2"/>
482 </when> 470 </when>
483 <when value="all_columns"> 471 <when value="all_columns">
484 </when> 472 </when>
485 </xml> 473 </xml>
486 474
550 <when value="sparse"> 538 <when value="sparse">
551 <expand macro="sparse_target"/> 539 <expand macro="sparse_target"/>
552 </when> 540 </when>
553 </conditional> 541 </conditional>
554 </xml> 542 </xml>
555
556 <xml name="multitype_input" token_format="tabular" token_help="All datasets with tabular format are supporetd.">
557 <param name="infile_transform" type="data" format="@FORMAT@" label="Select a dataset to transform:" help="@HELP@"/>
558 </xml>
559
560 543
561 <!--Advanced options--> 544 <!--Advanced options-->
562 <xml name="nn_advanced_options"> 545 <xml name="nn_advanced_options">
563 <section name="options" title="Advanced Options" expanded="False"> 546 <section name="options" title="Advanced Options" expanded="False">
564 <yield/> 547 <yield/>
820 <option value="Normalizer">Normalizer (Normalizes samples individually to unit norm)</option> 803 <option value="Normalizer">Normalizer (Normalizes samples individually to unit norm)</option>
821 <yield/> 804 <yield/>
822 </param> 805 </param>
823 </xml> 806 </xml>
824 807
808 <xml name="sparse_preprocessors_ext">
809 <expand macro="sparse_preprocessors">
810 <option value="KernelCenterer">Kernel Centerer (Centers a kernel matrix)</option>
811 <option value="MinMaxScaler">Minmax Scaler (Scales features to a range)</option>
812 <option value="PolynomialFeatures">Polynomial Features (Generates polynomial and interaction features)</option>
813 <option value="RobustScaler">Robust Scaler (Scales features using outlier-invariance statistics)</option>
814 </expand>
815 </xml>
816
825 <xml name="sparse_preprocessor_options"> 817 <xml name="sparse_preprocessor_options">
826 <when value="Binarizer"> 818 <when value="Binarizer">
827 <expand macro="multitype_input" format="tabular,txt" help="Tabular and sparse datasets are supporetd."/>
828 <section name="options" title="Advanced Options" expanded="False"> 819 <section name="options" title="Advanced Options" expanded="False">
829 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" 820 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true"
830 label="Use a copy of data for precomputing binarization" help=" "/> 821 label="Use a copy of data for precomputing binarization" help=" "/>
831 <param argument="threshold" type="float" optional="true" value="0.0" 822 <param argument="threshold" type="float" optional="true" value="0.0"
832 label="Threshold" 823 label="Threshold"
833 help="Feature values below or equal to this are replaced by 0, above it by 1. Threshold may not be less than 0 for operations on sparse matrices. "/> 824 help="Feature values below or equal to this are replaced by 0, above it by 1. Threshold may not be less than 0 for operations on sparse matrices. "/>
834 </section> 825 </section>
835 </when> 826 </when>
836 <when value="Imputer"> 827 <when value="Imputer">
837 <expand macro="multitype_input" format="tabular,txt" help="Tabular and sparse datasets are supporetd."/>
838 <section name="options" title="Advanced Options" expanded="False"> 828 <section name="options" title="Advanced Options" expanded="False">
839 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" 829 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true"
840 label="Use a copy of data for precomputing imputation" help=" "/> 830 label="Use a copy of data for precomputing imputation" help=" "/>
841 <param argument="strategy" type="select" optional="true" label="Imputation strategy" help=" "> 831 <param argument="strategy" type="select" optional="true" label="Imputation strategy" help=" ">
842 <option value="mean" selected="true">Replace missing values using the mean along the axis</option> 832 <option value="mean" selected="true">Replace missing values using the mean along the axis</option>
852 <option value="1">Impute along rows</option> 842 <option value="1">Impute along rows</option>
853 </param--> 843 </param-->
854 </section> 844 </section>
855 </when> 845 </when>
856 <when value="StandardScaler"> 846 <when value="StandardScaler">
857 <expand macro="multitype_input"/>
858 <section name="options" title="Advanced Options" expanded="False"> 847 <section name="options" title="Advanced Options" expanded="False">
859 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" 848 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true"
860 label="Use a copy of data for performing inplace scaling" help=" "/> 849 label="Use a copy of data for performing inplace scaling" help=" "/>
861 <param argument="with_mean" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" 850 <param argument="with_mean" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true"
862 label="Center the data before scaling" help=" "/> 851 label="Center the data before scaling" help=" "/>
863 <param argument="with_std" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" 852 <param argument="with_std" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true"
864 label="Scale the data to unit variance (or unit standard deviation)" help=" "/> 853 label="Scale the data to unit variance (or unit standard deviation)" help=" "/>
865 </section> 854 </section>
866 </when> 855 </when>
867 <when value="MaxAbsScaler"> 856 <when value="MaxAbsScaler">
868 <expand macro="multitype_input" format="tabular,txt" help="Tabular and sparse datasets are supporetd."/>
869 <section name="options" title="Advanced Options" expanded="False"> 857 <section name="options" title="Advanced Options" expanded="False">
870 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" 858 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true"
871 label="Use a copy of data for precomputing scaling" help=" "/> 859 label="Use a copy of data for precomputing scaling" help=" "/>
872 </section> 860 </section>
873 </when> 861 </when>
874 <when value="Normalizer"> 862 <when value="Normalizer">
875 <expand macro="multitype_input" format="tabular,txt" help="Tabular and sparse datasets are supporetd."/>
876 <section name="options" title="Advanced Options" expanded="False"> 863 <section name="options" title="Advanced Options" expanded="False">
877 <param argument="norm" type="select" optional="true" label="The norm to use to normalize non zero samples" help=" "> 864 <param argument="norm" type="select" optional="true" label="The norm to use to normalize non zero samples" help=" ">
878 <option value="l1" selected="true">l1</option> 865 <option value="l1" selected="true">l1</option>
879 <option value="l2">l2</option> 866 <option value="l2">l2</option>
880 <option value="max">max</option> 867 <option value="max">max</option>
883 label="Use a copy of data for precomputing row normalization" help=" "/> 870 label="Use a copy of data for precomputing row normalization" help=" "/>
884 </section> 871 </section>
885 </when> 872 </when>
886 <yield/> 873 <yield/>
887 </xml> 874 </xml>
875
876 <xml name="sparse_preprocessor_options_ext">
877 <expand macro="sparse_preprocessor_options">
878 <when value="KernelCenterer">
879 <section name="options" title="Advanced Options" expanded="False">
880 </section>
881 </when>
882 <when value="MinMaxScaler">
883 <section name="options" title="Advanced Options" expanded="False">
884 <!--feature_range-->
885 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true"
886 label="Use a copy of data for precomputing normalization" help=" "/>
887 </section>
888 </when>
889 <when value="PolynomialFeatures">
890 <section name="options" title="Advanced Options" expanded="False">
891 <param argument="degree" type="integer" optional="true" value="2" label="The degree of the polynomial features " help=""/>
892 <param argument="interaction_only" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="false" label="Produce interaction features only" help="(Features that are products of at most degree distinct input features) "/>
893 <param argument="include_bias" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Include a bias column" help="Feature in which all polynomial powers are zero "/>
894 </section>
895 </when>
896 <when value="RobustScaler">
897 <section name="options" title="Advanced Options" expanded="False">
898 <!--=True, =True, copy=True-->
899 <param argument="with_centering" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true"
900 label="Center the data before scaling" help=" "/>
901 <param argument="with_scaling" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true"
902 label="Scale the data to interquartile range" help=" "/>
903 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true"
904 label="Use a copy of data for inplace scaling" help=" "/>
905 </section>
906 </when>
907 </expand>
908 </xml>
909
888 <xml name="estimator_input_no_fit"> 910 <xml name="estimator_input_no_fit">
889 <expand macro="feature_selection_estimator" /> 911 <expand macro="feature_selection_estimator" />
890 <conditional name="extra_estimator"> 912 <conditional name="extra_estimator">
891 <expand macro="feature_selection_extra_estimator" /> 913 <expand macro="feature_selection_extra_estimator" />
892 <expand macro="feature_selection_estimator_choices" /> 914 <expand macro="feature_selection_estimator_choices" />
893 </conditional> 915 </conditional>
894 </xml> 916 </xml>
917
895 <xml name="feature_selection_all"> 918 <xml name="feature_selection_all">
896 <conditional name="feature_selection_algorithms"> 919 <conditional name="feature_selection_algorithms">
897 <param name="selected_algorithm" type="select" label="Select a feature selection algorithm"> 920 <param name="selected_algorithm" type="select" label="Select a feature selection algorithm">
898 <option value="SelectFromModel" selected="true">SelectFromModel - Meta-transformer for selecting features based on importance weights</option> 921 <option value="SelectFromModel" selected="true">SelectFromModel - Meta-transformer for selecting features based on importance weights</option>
899 <option value="GenericUnivariateSelect" selected="true">GenericUnivariateSelect - Univariate feature selector with configurable strategy</option> 922 <option value="GenericUnivariateSelect" selected="true">GenericUnivariateSelect - Univariate feature selector with configurable strategy</option>
1012 </when> 1035 </when>
1013 <when value="mutual_info_regression"> 1036 <when value="mutual_info_regression">
1014 </when--> 1037 </when-->
1015 </conditional> 1038 </conditional>
1016 </xml> 1039 </xml>
1040
1017 <xml name="feature_selection_score_function"> 1041 <xml name="feature_selection_score_function">
1018 <param argument="score_func" type="select" label="Select a score function"> 1042 <param argument="score_func" type="select" label="Select a score function">
1019 <option value="chi2">chi2 - Compute chi-squared stats between each non-negative feature and class</option> 1043 <option value="chi2">chi2 - Compute chi-squared stats between each non-negative feature and class</option>
1020 <option value="f_classif">f_classif - Compute the ANOVA F-value for the provided sample</option> 1044 <option value="f_classif">f_classif - Compute the ANOVA F-value for the provided sample</option>
1021 <option value="f_regression">f_regression - Univariate linear regression tests</option> 1045 <option value="f_regression">f_regression - Univariate linear regression tests</option>
1022 <option value="mutual_info_classif">mutual_info_classif - Estimate mutual information for a discrete target variable</option> 1046 <option value="mutual_info_classif">mutual_info_classif - Estimate mutual information for a discrete target variable</option>
1023 <option value="mutual_info_regression">mutual_info_regression - Estimate mutual information for a continuous target variable</option> 1047 <option value="mutual_info_regression">mutual_info_regression - Estimate mutual information for a continuous target variable</option>
1024 </param> 1048 </param>
1025 </xml> 1049 </xml>
1050
1026 <xml name="feature_selection_estimator"> 1051 <xml name="feature_selection_estimator">
1027 <param argument="estimator" type="select" label="Select an estimator" help="The base estimator from which the transformer is built."> 1052 <param argument="estimator" type="select" label="Select an estimator" help="The base estimator from which the transformer is built.">
1028 <option value="svm.SVR(kernel=&quot;linear&quot;)">svm.SVR(kernel=&quot;linear&quot;)</option> 1053 <option value="svm.SVR(kernel=&quot;linear&quot;)">svm.SVR(kernel=&quot;linear&quot;)</option>
1029 <option value="svm.SVC(kernel=&quot;linear&quot;)">svm.SVC(kernel=&quot;linear&quot;)</option> 1054 <option value="svm.SVC(kernel=&quot;linear&quot;)">svm.SVC(kernel=&quot;linear&quot;)</option>
1030 <option value="svm.LinearSVC(penalty=&quot;l1&quot;, dual=False, tol=1e-3)">svm.LinearSVC(penalty=&quot;l1&quot;, dual=False, tol=1e-3)</option> 1055 <option value="svm.LinearSVC(penalty=&quot;l1&quot;, dual=False, tol=1e-3)">svm.LinearSVC(penalty=&quot;l1&quot;, dual=False, tol=1e-3)</option>
1031 <option value="linear_model.LassoCV()">linear_model.LassoCV()</option> 1056 <option value="linear_model.LassoCV()">linear_model.LassoCV()</option>
1032 <option value="ensemble.RandomForestRegressor(n_estimators = 1000, random_state = 42)">ensemble.RandomForestRegressor(n_estimators = 1000, random_state = 42)</option> 1057 <option value="ensemble.RandomForestRegressor(n_estimators = 1000, random_state = 42)">ensemble.RandomForestRegressor(n_estimators = 1000, random_state = 42)</option>
1033 </param> 1058 </param>
1034 </xml> 1059 </xml>
1060
1035 <xml name="feature_selection_extra_estimator"> 1061 <xml name="feature_selection_extra_estimator">
1036 <param name="has_estimator" type="select" label="Does your estimator on the list above?"> 1062 <param name="has_estimator" type="select" label="Does your estimator on the list above?">
1037 <option value="yes">Yes, my estimator is on the list</option> 1063 <option value="yes">Yes, my estimator is on the list</option>
1038 <option value="no">No, I need make a new estimator</option> 1064 <option value="no">No, I need make a new estimator</option>
1039 <yield/> 1065 <yield/>
1040 </param> 1066 </param>
1041 </xml> 1067 </xml>
1068
1042 <xml name="feature_selection_estimator_choices"> 1069 <xml name="feature_selection_estimator_choices">
1043 <when value="yes"> 1070 <when value="yes">
1044 </when> 1071 </when>
1045 <when value="no"> 1072 <when value="no">
1046 <param name="new_estimator" type="text" value="" label="Make a new estimator" /> 1073 <param name="new_estimator" type="text" value="" label="Make a new estimator" />
1047 </when> 1074 </when>
1048 <yield/> 1075 <yield/>
1049 </xml> 1076 </xml>
1077
1050 <xml name="feature_selection_methods"> 1078 <xml name="feature_selection_methods">
1051 <conditional name="select_methods"> 1079 <conditional name="select_methods">
1052 <param name="selected_method" type="select" label="Select an operation"> 1080 <param name="selected_method" type="select" label="Select an operation">
1053 <option value="fit_transform">fit_transform - Fit to data, then transform it</option> 1081 <option value="fit_transform">fit_transform - Fit to data, then transform it</option>
1054 <option value="get_support">get_support - Get a mask, or integer index, of the features selected</option> 1082 <option value="get_support">get_support - Get a mask, or integer index, of the features selected</option>