comparison main_macros.xml @ 20:0b88494bdcac draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
author bgruening
date Fri, 09 Aug 2019 07:25:16 -0400
parents 231e9a9849e8
children fe47a06943fb
comparison
equal deleted inserted replaced
19:231e9a9849e8 20:0b88494bdcac
1 <macros> 1 <macros>
2 <token name="@VERSION@">1.0.0.4</token> 2 <token name="@VERSION@">1.0.7.10</token>
3
4 <token name="@ENSEMBLE_VERSION@">0.2.0</token>
3 5
4 <xml name="python_requirements"> 6 <xml name="python_requirements">
5 <requirements> 7 <requirements>
6 <requirement type="package" version="3.6">python</requirement> 8 <requirement type="package" version="3.6">python</requirement>
7 <requirement type="package" version="0.20.3">scikit-learn</requirement> 9 <requirement type="package" version="0.7.10">Galaxy-ML</requirement>
8 <requirement type="package" version="0.24.2">pandas</requirement>
9 <requirement type="package" version="0.80">xgboost</requirement>
10 <requirement type="package" version="0.9.13">asteval</requirement>
11 <requirement type="package" version="0.6">skrebate</requirement>
12 <requirement type="package" version="0.4.2">imbalanced-learn</requirement>
13 <requirement type="package" version="0.16.0">mlxtend</requirement>
14 <yield/> 10 <yield/>
15 </requirements> 11 </requirements>
16 </xml> 12 </xml>
17 13
18 <xml name="macro_stdio"> 14 <xml name="macro_stdio">
418 </repeat> 414 </repeat>
419 </xml> 415 </xml>
420 416
421 <xml name="sparse_target" token_label1="Select a sparse matrix:" token_label2="Select the tabular containing true labels:" token_multiple="False" token_format1="txt" token_format2="tabular" token_help1="" token_help2=""> 417 <xml name="sparse_target" token_label1="Select a sparse matrix:" token_label2="Select the tabular containing true labels:" token_multiple="False" token_format1="txt" token_format2="tabular" token_help1="" token_help2="">
422 <param name="infile1" type="data" format="@FORMAT1@" label="@LABEL1@" help="@HELP1@"/> 418 <param name="infile1" type="data" format="@FORMAT1@" label="@LABEL1@" help="@HELP1@"/>
423 <param name="infile2" type="data" format="@FORMAT2@" label="@LABEL2@" help="@HELP2@"/> 419 <expand macro="input_tabular_target"/>
424 <param name="col2" multiple="@MULTIPLE@" type="data_column" data_ref="infile2" label="Select target column(s):"/>
425 </xml> 420 </xml>
426 421
427 <xml name="sl_mixed_input"> 422 <xml name="sl_mixed_input">
428 <conditional name="input_options"> 423 <conditional name="input_options">
429 <param name="selected_input" type="select" label="Select input type:"> 424 <param name="selected_input" type="select" label="Select input type:">
430 <option value="tabular" selected="true">tabular data</option> 425 <option value="tabular" selected="true">tabular data</option>
431 <option value="sparse">sparse matrix</option> 426 <option value="sparse">sparse matrix</option>
427 <option value="seq_fasta">sequnences in a fasta file</option>
428 <option value="refseq_and_interval">reference genome and intervals</option>
432 </param> 429 </param>
433 <when value="tabular"> 430 <when value="tabular">
434 <expand macro="samples_tabular" multiple1="true" multiple2="false"/> 431 <expand macro="samples_tabular" multiple1="true" multiple2="false"/>
435 </when> 432 </when>
436 <when value="sparse"> 433 <when value="sparse">
437 <expand macro="sparse_target"/> 434 <expand macro="sparse_target"/>
438 </when> 435 </when>
436 <when value="seq_fasta">
437 <expand macro="inputs_seq_fasta"/>
438 </when>
439 <when value="refseq_and_interval">
440 <expand macro="inputs_refseq_and_interval"/>
441 </when>
442 </conditional>
443 </xml>
444
445 <xml name="input_tabular_target">
446 <param name="infile2" type="data" format="tabular" label="Dataset containing class labels or target values:"/>
447 <param name="header2" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Does the dataset contain header:" />
448 <conditional name="column_selector_options_2">
449 <expand macro="samples_column_selector_options" column_option="selected_column_selector_option2" col_name="col2" multiple="false" infile="infile2"/>
450 </conditional>
451 </xml>
452
453 <xml name="inputs_seq_fasta">
454 <param name="fasta_path" type="data" format="fasta" label="Dataset containing fasta genomic/protein sequences" help="Sequences will be one-hot encoded to arrays."/>
455 <expand macro="input_tabular_target"/>
456 </xml>
457
458 <xml name="inputs_refseq_and_interval">
459 <param name="ref_genome_file" type="data" format="fasta" label="Dataset containing reference genomic sequence"/>
460 <param name="interval_file" type="data" format="interval" label="Dataset containing sequence intervals for training" help="interval. Sequences will be retrieved from the reference genome and one-hot encoded to training arrays."/>
461 <param name="target_file" type="data" format="bed" label="Dataset containing positions and features for target values." help="bed. The file will be compressed with `bgzip` and then indexed using `tabix`."/>
462 <param name="infile2" type="data" format="tabular" label="Dataset containing the feature list for prediction"/>
463 <param name="header2" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Does the dataset contain header:" />
464 <conditional name="column_selector_options_2">
465 <expand macro="samples_column_selector_options" column_option="selected_column_selector_option2" col_name="col2" multiple="true" infile="infile2"/>
439 </conditional> 466 </conditional>
440 </xml> 467 </xml>
441 468
442 <!--Advanced options--> 469 <!--Advanced options-->
443 <xml name="nn_advanced_options"> 470 <xml name="nn_advanced_options">
703 730
704 <xml name="sparse_preprocessors"> 731 <xml name="sparse_preprocessors">
705 <param name="selected_pre_processor" type="select" label="Select a preprocessor:"> 732 <param name="selected_pre_processor" type="select" label="Select a preprocessor:">
706 <option value="StandardScaler" selected="true">Standard Scaler (Standardizes features by removing the mean and scaling to unit variance)</option> 733 <option value="StandardScaler" selected="true">Standard Scaler (Standardizes features by removing the mean and scaling to unit variance)</option>
707 <option value="Binarizer">Binarizer (Binarizes data)</option> 734 <option value="Binarizer">Binarizer (Binarizes data)</option>
708 <option value="Imputer">Imputer (Completes missing values)</option>
709 <option value="MaxAbsScaler">Max Abs Scaler (Scales features by their maximum absolute value)</option> 735 <option value="MaxAbsScaler">Max Abs Scaler (Scales features by their maximum absolute value)</option>
710 <option value="Normalizer">Normalizer (Normalizes samples individually to unit norm)</option> 736 <option value="Normalizer">Normalizer (Normalizes samples individually to unit norm)</option>
711 <yield/> 737 <yield/>
712 </param> 738 </param>
713 </xml> 739 </xml>
728 label="Use a copy of data for precomputing binarization" help=" "/> 754 label="Use a copy of data for precomputing binarization" help=" "/>
729 <param argument="threshold" type="float" optional="true" value="0.0" 755 <param argument="threshold" type="float" optional="true" value="0.0"
730 label="Threshold" 756 label="Threshold"
731 help="Feature values below or equal to this are replaced by 0, above it by 1. Threshold may not be less than 0 for operations on sparse matrices. "/> 757 help="Feature values below or equal to this are replaced by 0, above it by 1. Threshold may not be less than 0 for operations on sparse matrices. "/>
732 </section> 758 </section>
733 </when>
734 <when value="Imputer">
735 <section name="options" title="Advanced Options" expanded="False">
736 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true"
737 label="Use a copy of data for precomputing imputation" help=" "/>
738 <param argument="strategy" type="select" optional="true" label="Imputation strategy" help=" ">
739 <option value="mean" selected="true">Replace missing values using the mean along the axis</option>
740 <option value="median">Replace missing values using the median along the axis</option>
741 <option value="most_frequent">Replace missing using the most frequent value along the axis</option>
742 </param>
743 <param argument="missing_values" type="text" optional="true" value="NaN"
744 label="Placeholder for missing values" help="For missing values encoded as numpy.nan, use the string value “NaN”"/>
745 <!--param argument="axis" type="boolean" optional="true" truevalue="1" falsevalue="0"
746 label="Impute along axis = 1" help="If fasle, axis = 0 is selected for imputation. "/> -->
747 <!--param argument="axis" type="select" optional="true" label="The axis along which to impute" help=" ">
748 <option value="0" selected="true">Impute along columns</option>
749 <option value="1">Impute along rows</option>
750 </param-->
751 </section>
752 </when> 759 </when>
753 <when value="StandardScaler"> 760 <when value="StandardScaler">
754 <section name="options" title="Advanced Options" expanded="False"> 761 <section name="options" title="Advanced Options" expanded="False">
755 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" 762 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true"
756 label="Use a copy of data for performing inplace scaling" help=" "/> 763 label="Use a copy of data for performing inplace scaling" help=" "/>
786 <section name="options" title="Advanced Options" expanded="False"> 793 <section name="options" title="Advanced Options" expanded="False">
787 </section> 794 </section>
788 </when> 795 </when>
789 <when value="MinMaxScaler"> 796 <when value="MinMaxScaler">
790 <section name="options" title="Advanced Options" expanded="False"> 797 <section name="options" title="Advanced Options" expanded="False">
791 <!--feature_range--> 798 <param argument="feature_range" type="text" value="(0, 1)" optional="true" help="Desired range of transformed data. None or tuple (min, max). None equals to (0, 1)"/>
792 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" 799 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true"
793 label="Use a copy of data for precomputing normalization" help=" "/> 800 label="Use a copy of data for precomputing normalization" help=" "/>
794 </section> 801 </section>
795 </when> 802 </when>
796 <when value="PolynomialFeatures"> 803 <when value="PolynomialFeatures">
920 </when> 927 </when>
921 </expand> 928 </expand>
922 </conditional> 929 </conditional>
923 </xml> 930 </xml>
924 931
925 <xml name="cv_reduced"> 932 <xml name="cv_reduced" token_label="Select the cv splitter">
926 <conditional name="cv_selector"> 933 <conditional name="cv_selector">
927 <param name="selected_cv" type="select" label="Select the cv splitter:"> 934 <param name="selected_cv" type="select" label="@LABEL@">
928 <expand macro="cv_splitter"/> 935 <expand macro="cv_splitter"/>
929 </param> 936 </param>
930 <expand macro="cv_splitter_options"/> 937 <expand macro="cv_splitter_options"/>
931 </conditional> 938 </conditional>
932 </xml> 939 </xml>
933 940
934 <xml name="cv_n_splits" token_value="3" token_help="Number of folds. Must be at least 2."> 941 <xml name="cv_n_splits" token_value="3" token_help="Number of folds. Must be at least 2.">
935 <param argument="n_splits" type="integer" value="@VALUE@" min="2" label="n_splits" help="@HELP@"/> 942 <param argument="n_splits" type="integer" value="@VALUE@" min="1" label="n_splits" help="@HELP@"/>
936 </xml> 943 </xml>
937 944
938 <xml name="cv_shuffle"> 945 <xml name="cv_shuffle">
939 <param argument="shuffle" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Whether to shuffle data before splitting" /> 946 <param argument="shuffle" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Whether to shuffle data before splitting" />
940 </xml> 947 </xml>
949 <param name="header_g" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" /> 956 <param name="header_g" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" />
950 <conditional name="column_selector_options_g"> 957 <conditional name="column_selector_options_g">
951 <expand macro="samples_column_selector_options" column_option="selected_column_selector_option_g" col_name="col_g" multiple="False" infile="infile_g"/> 958 <expand macro="samples_column_selector_options" column_option="selected_column_selector_option_g" col_name="col_g" multiple="False" infile="infile_g"/>
952 </conditional> 959 </conditional>
953 </section> 960 </section>
961 </xml>
962
963 <xml name="train_test_split_params">
964 <conditional name="split_algos">
965 <param name="shuffle" type="select" label="Select the splitting method">
966 <option value="None">No shuffle</option>
967 <option value="simple" selected="true">ShuffleSplit</option>
968 <option value="stratified">StratifiedShuffleSplit -- target values serve as class labels</option>
969 <option value="group">GroupShuffleSplit or split by group names</option>
970 </param>
971 <when value="None">
972 <expand macro="train_test_split_test_size"/>
973 </when>
974 <when value="simple">
975 <expand macro="train_test_split_test_size"/>
976 <expand macro="random_state"/>
977 </when>
978 <when value="stratified">
979 <expand macro="train_test_split_test_size"/>
980 <expand macro="random_state"/>
981 </when>
982 <when value="group">
983 <expand macro="train_test_split_test_size" optional="true"/>
984 <expand macro="random_state"/>
985 <param argument="group_names" type="text" value="" optional="true" label="Type in group names instead"
986 help="For example: chr6, chr7. This parameter is optional. If used, it will override the holdout size and random seed."/>
987 <yield/>
988 </when>
989 </conditional>
990 <!--param argument="train_size" type="float" optional="True" value="" label="Train size:"/>-->
991 </xml>
992
993 <xml name="train_test_split_test_size" token_optional="false">
994 <param name="test_size" type="float" value="0.2" optional="@OPTIONAL@" label="Holdout size" help="Leass than 1, for preportion; greater than 1 (integer), for number of samples."/>
954 </xml> 995 </xml>
955 996
956 <xml name="feature_selection_algorithms"> 997 <xml name="feature_selection_algorithms">
957 <option value="SelectKBest" selected="true">SelectKBest - Select features according to the k highest scores</option> 998 <option value="SelectKBest" selected="true">SelectKBest - Select features according to the k highest scores</option>
958 <option value="GenericUnivariateSelect">GenericUnivariateSelect - Univariate feature selector with configurable strategy</option> 999 <option value="GenericUnivariateSelect">GenericUnivariateSelect - Univariate feature selector with configurable strategy</option>
1165 </param> 1206 </param>
1166 </xml> 1207 </xml>
1167 1208
1168 <xml name="model_validation_common_options"> 1209 <xml name="model_validation_common_options">
1169 <expand macro="cv"/> 1210 <expand macro="cv"/>
1170 <!-- expand macro="verbose"/> --> 1211 <expand macro="verbose"/>
1171 <yield/> 1212 <yield/>
1172 </xml> 1213 </xml>
1173 1214
1174 <xml name="scoring_selection"> 1215 <xml name="scoring_selection">
1175 <conditional name="scoring"> 1216 <conditional name="scoring">
1284 </xml> 1325 </xml>
1285 1326
1286 <xml name="search_cv_estimator"> 1327 <xml name="search_cv_estimator">
1287 <param name="infile_estimator" type="data" format="zip" label="Choose the dataset containing pipeline/estimator object"/> 1328 <param name="infile_estimator" type="data" format="zip" label="Choose the dataset containing pipeline/estimator object"/>
1288 <section name="search_params_builder" title="Search parameters Builder" expanded="true"> 1329 <section name="search_params_builder" title="Search parameters Builder" expanded="true">
1289 <param name="infile_params" type="data" format="tabular" label="Choose the dataset containing parameter names"/> 1330 <param name="infile_params" type="data" format="tabular" optional="true" label="Choose the dataset containing parameter names" help="This dataset could be the output of `get_params` in the `Estimator Attributes` tool."/>
1290 <repeat name="param_set" min="1" max="30" title="Parameter settings for search:"> 1331 <repeat name="param_set" min="1" max="30" title="Parameter settings for search:">
1291 <param name="sp_name" type="select" label="Choose a parameter name (with current value)"> 1332 <param name="sp_name" type="select" optional="true" label="Choose a parameter name (with current value)">
1292 <options from_dataset="infile_params" startswith="@"> 1333 <options from_dataset="infile_params" startswith="@">
1293 <column name="name" index="2"/> 1334 <column name="name" index="2"/>
1294 <column name="value" index="1"/> 1335 <column name="value" index="1"/>
1295 <filter type="unique_value" name="unique_param" column="1"/> 1336 <filter type="unique_value" name="unique_param" column="1"/>
1296 <filter type="sort_by" name="sorted_param" column="2"/>
1297 </options> 1337 </options>
1298 </param> 1338 </param>
1299 <param name="sp_list" type="text" value="" optional="true" label="Search list" help="list or array-like, for example: [1, 10, 100, 1000], [True, False] and ['auto', 'sqrt', None]. See `help` section for more examples"> 1339 <param name="sp_list" type="text" value="" optional="true" label="Search list" help="list or array-like, for example: [1, 10, 100, 1000], [True, False] and ['auto', 'sqrt', None]. See `help` section for more examples">
1300 <sanitizer> 1340 <sanitizer>
1301 <valid initial="default"> 1341 <valid initial="default">
1302 <add value="&apos;"/> 1342 <add value="&apos;"/>
1303 <add value="&quot;"/> 1343 <add value="&quot;"/>
1304 <add value="["/> 1344 <add value="["/>
1305 <add value="]"/> 1345 <add value="]"/>
1346 </valid>
1347 </sanitizer>
1348 </param>
1349 </repeat>
1350 </section>
1351 </xml>
1352
1353 <xml name="estimator_and_hyperparameter">
1354 <param name="infile_estimator" type="data" format="zip" label="Choose the dataset containing pipeline/estimator object"/>
1355 <section name="hyperparams_swapping" title="Hyperparameter Swapping" expanded="false">
1356 <param name="infile_params" type="data" format="tabular" optional="true" label="Choose the dataset containing hyperparameters for the pipeline/estimator above" help="This dataset could be the output of `get_params` in the `Estimator Attributes` tool."/>
1357 <repeat name="param_set" min="1" max="30" title="New hyperparameter setting">
1358 <param name="sp_name" type="select" optional="true" label="Choose a parameter name (with current value)">
1359 <options from_dataset="infile_params" startswith="@">
1360 <column name="name" index="2"/>
1361 <column name="value" index="1"/>
1362 <filter type="unique_value" name="unique_param" column="1"/>
1363 </options>
1364 </param>
1365 <param name="sp_value" type="text" value="" optional="true" label="New value" help="Supports int, float, boolean, single quoted string, and selected object constructor. Similar to the `Parameter settings for search` section in `searchcv` tool except that only single value is expected here.">
1366 <sanitizer>
1367 <valid initial="default">
1368 <add value="&apos;"/>
1369 <add value="&quot;"/>
1306 </valid> 1370 </valid>
1307 </sanitizer> 1371 </sanitizer>
1308 </param> 1372 </param>
1309 </repeat> 1373 </repeat>
1310 </section> 1374 </section>
1748 </when> 1812 </when>
1749 </expand> 1813 </expand>
1750 </conditional> 1814 </conditional>
1751 </xml> 1815 </xml>
1752 1816
1817 <xml name="stacking_voting_weights">
1818 <section name="options" title="Advanced Options" expanded="false">
1819 <param argument="weights" type="text" value="[]" optional="true" help="Sequence of weights (float or int). Uses uniform weights if None (`[]`).">
1820 <sanitizer>
1821 <valid initial="default">
1822 <add value="["/>
1823 <add value="]"/>
1824 </valid>
1825 </sanitizer>
1826 </param>
1827 <yield/>
1828 </section>
1829 </xml>
1830
1831 <xml name="preprocessors_sequence_encoders">
1832 <conditional name="encoder_selection">
1833 <param name="encoder_type" type="select" label="Choose the sequence encoder class">
1834 <option value="GenomeOneHotEncoder">GenomeOneHotEncoder</option>
1835 <option value="ProteinOneHotEncoder">ProteinOneHotEncoder</option>
1836 </param>
1837 <when value="GenomeOneHotEncoder">
1838 <expand macro="preprocessors_sequence_encoder_arguments"/>
1839 </when>
1840 <when value="ProteinOneHotEncoder">
1841 <expand macro="preprocessors_sequence_encoder_arguments"/>
1842 </when>
1843 </conditional>
1844 </xml>
1845
1846 <xml name="preprocessors_sequence_encoder_arguments">
1847 <param argument="seq_length" type="integer" value="" min="0" optional="true" help="Integer. Sequence length"/>
1848 <param argument="padding" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" help="Whether to pad or truncate sequence to meet the sequence length."/>
1849 </xml>
1850
1753 <!-- Outputs --> 1851 <!-- Outputs -->
1754 1852
1755 <xml name="output"> 1853 <xml name="output">
1756 <outputs> 1854 <outputs>
1757 <data format="tabular" name="outfile_predict"> 1855 <data format="tabular" name="outfile_predict">
1845 keywords = {large-scale machine learning}, 1943 keywords = {large-scale machine learning},
1846 } 1944 }
1847 </citation> 1945 </citation>
1848 </xml> 1946 </xml>
1849 1947
1850 <xml name="imblearn_citation"> 1948 <xml name="imblearn_citation">
1851 <citation type="bibtex"> 1949 <citation type="bibtex">
1852 @article{JMLR:v18:16-365, 1950 @article{JMLR:v18:16-365,
1853 author = {Guillaume Lema{{\^i}}tre and Fernando Nogueira and Christos K. Aridas}, 1951 author = {Guillaume Lema{{\^i}}tre and Fernando Nogueira and Christos K. Aridas},
1854 title = {Imbalanced-learn: A Python Toolbox to Tackle the Curse of Imbalanced Datasets in Machine Learning}, 1952 title = {Imbalanced-learn: A Python Toolbox to Tackle the Curse of Imbalanced Datasets in Machine Learning},
1855 journal = {Journal of Machine Learning Research}, 1953 journal = {Journal of Machine Learning Research},
1860 url = {http://jmlr.org/papers/v18/16-365.html} 1958 url = {http://jmlr.org/papers/v18/16-365.html}
1861 } 1959 }
1862 </citation> 1960 </citation>
1863 </xml> 1961 </xml>
1864 1962
1963 <xml name="selene_citation">
1964 <citation type="bibtex">
1965 @article{chen2019selene,
1966 title={Selene: a PyTorch-based deep learning library for sequence data},
1967 author={Chen, Kathleen M and Cofer, Evan M and Zhou, Jian and Troyanskaya, Olga G},
1968 journal={Nature methods},
1969 volume={16},
1970 number={4},
1971 pages={315},
1972 year={2019},
1973 publisher={Nature Publishing Group}
1974 }
1975 </citation>
1976 </xml>
1977
1865 </macros> 1978 </macros>