Mercurial > repos > florianbegusch > qiime2_suite
comparison qiime2/qiime_sample-classifier_regress-samples-ncv.xml @ 14:a0a8d77a991c draft
Uploaded
author | florianbegusch |
---|---|
date | Thu, 03 Sep 2020 09:51:29 +0000 |
parents | f190567fe3f6 |
children |
comparison
equal
deleted
inserted
replaced
13:887cd4ad8e16 | 14:a0a8d77a991c |
---|---|
1 <?xml version="1.0" ?> | 1 <?xml version="1.0" ?> |
2 <tool id="qiime_sample-classifier_regress-samples-ncv" name="qiime sample-classifier regress-samples-ncv" version="2019.7"> | 2 <tool id="qiime_sample-classifier_regress-samples-ncv" name="qiime sample-classifier regress-samples-ncv" |
3 <description> - Nested cross-validated supervised learning regressor.</description> | 3 version="2020.8"> |
4 <requirements> | 4 <description>Nested cross-validated supervised learning regressor.</description> |
5 <requirement type="package" version="2019.7">qiime2</requirement> | 5 <requirements> |
6 </requirements> | 6 <requirement type="package" version="2020.8">qiime2</requirement> |
7 <command><![CDATA[ | 7 </requirements> |
8 <command><![CDATA[ | |
8 qiime sample-classifier regress-samples-ncv | 9 qiime sample-classifier regress-samples-ncv |
9 | 10 |
10 --i-table=$itable | 11 --i-table=$itable |
11 --m-metadata-column="$mmetadatacolumn" | 12 # if $input_files_mmetadatafile: |
13 # def list_dict_to_string(list_dict): | |
14 # set $file_list = list_dict[0]['additional_input'].__getattr__('file_name') | |
15 # for d in list_dict[1:]: | |
16 # set $file_list = $file_list + ' --m-metadata-file=' + d['additional_input'].__getattr__('file_name') | |
17 # end for | |
18 # return $file_list | |
19 # end def | |
20 --m-metadata-file=$list_dict_to_string($input_files_mmetadatafile) | |
21 # end if | |
12 | 22 |
13 #if str($pcv): | 23 #if '__ob__' in str($mmetadatacolumn): |
14 --p-cv=$pcv | 24 #set $mmetadatacolumn_temp = $mmetadatacolumn.replace('__ob__', '[') |
25 #set $mmetadatacolumn = $mmetadatacolumn_temp | |
26 #end if | |
27 #if '__cb__' in str($mmetadatacolumn): | |
28 #set $mmetadatacolumn_temp = $mmetadatacolumn.replace('__cb__', ']') | |
29 #set $mmetadatacolumn = $mmetadatacolumn_temp | |
30 #end if | |
31 #if 'X' in str($mmetadatacolumn): | |
32 #set $mmetadatacolumn_temp = $mmetadatacolumn.replace('X', '\\') | |
33 #set $mmetadatacolumn = $mmetadatacolumn_temp | |
34 #end if | |
35 #if '__sq__' in str($mmetadatacolumn): | |
36 #set $mmetadatacolumn_temp = $mmetadatacolumn.replace('__sq__', "'") | |
37 #set $mmetadatacolumn = $mmetadatacolumn_temp | |
38 #end if | |
39 #if '__db__' in str($mmetadatacolumn): | |
40 #set $mmetadatacolumn_temp = $mmetadatacolumn.replace('__db__', '"') | |
41 #set $mmetadatacolumn = $mmetadatacolumn_temp | |
15 #end if | 42 #end if |
16 | 43 |
17 #if str($prandomstate): | 44 --m-metadata-column=$mmetadatacolumn |
18 --p-random-state="$prandomstate" | |
19 #end if | |
20 | |
21 #set $pnjobs = '${GALAXY_SLOTS:-4}' | |
22 | |
23 #if str($pnjobs): | |
24 --p-n-jobs="$pnjobs" | |
25 #end if | |
26 | 45 |
27 | 46 |
28 #if str($pnestimators): | 47 --p-cv=$pcv |
29 --p-n-estimators=$pnestimators | 48 |
49 #if str($prandomstate): | |
50 --p-random-state=$prandomstate | |
30 #end if | 51 #end if |
52 --p-n-jobs=$pnjobs | |
53 | |
54 --p-n-estimators=$pnestimators | |
31 | 55 |
32 #if str($pestimator) != 'None': | 56 #if str($pestimator) != 'None': |
33 --p-estimator=$pestimator | 57 --p-estimator=$pestimator |
34 #end if | 58 #end if |
35 | 59 |
36 #if $pstratify: | 60 #if $pstratify: |
37 --p-stratify | 61 --p-stratify |
38 #end if | 62 #end if |
40 #if $pparametertuning: | 64 #if $pparametertuning: |
41 --p-parameter-tuning | 65 --p-parameter-tuning |
42 #end if | 66 #end if |
43 | 67 |
44 #if str($pmissingsamples) != 'None': | 68 #if str($pmissingsamples) != 'None': |
45 --p-missing-samples=$pmissingsamples | 69 --p-missing-samples=$pmissingsamples |
46 #end if | 70 #end if |
47 | 71 |
72 --o-predictions=opredictions | |
48 | 73 |
74 --o-feature-importance=ofeatureimportance | |
49 | 75 |
50 #if $metadatafile: | 76 #if str($examples) != 'None': |
51 --m-metadata-file=$metadatafile | 77 --examples=$examples |
52 #end if | 78 #end if |
53 | 79 |
80 ; | |
81 cp ofeatureimportance.qza $ofeatureimportance | |
54 | 82 |
83 ]]></command> | |
84 <inputs> | |
85 <param format="qza,no_unzip.zip" label="--i-table: ARTIFACT FeatureTable[Frequency] Feature table containing all features that should be used for target prediction. [required]" name="itable" optional="False" type="data" /> | |
86 <repeat name="input_files_mmetadatafile" optional="True" title="--m-metadata-file"> | |
87 <param format="tabular,qza,no_unzip.zip" label="--m-metadata-file: METADATA" name="additional_input" optional="True" type="data" /> | |
88 </repeat> | |
89 <param label="--m-metadata-column: COLUMN MetadataColumn[Numeric] Numeric metadata column to use as prediction target. [required]" name="mmetadatacolumn" optional="False" type="text" /> | |
90 <param label="--p-cv: INTEGER Number of k-fold cross-validations to perform. Range(1, None) [default: 5]" min="1" name="pcv" optional="True" type="integer" value="5" /> | |
91 <param label="--p-random-state: INTEGER Seed used by random number generator. [optional]" name="prandomstate" optional="False" type="text" /> | |
92 <param label="--p-n-estimators: INTEGER Range(1, None) Number of trees to grow for estimation. More trees will improve predictive accuracy up to a threshold level, but will also increase time and memory requirements. This parameter only affects ensemble estimators, such as Random Forest, AdaBoost, ExtraTrees, and GradientBoosting. [default: 100]" min="1" name="pnestimators" optional="True" type="integer" value="100" /> | |
93 <param label="--p-estimator: " name="pestimator" optional="True" type="select"> | |
94 <option selected="True" value="None">Selection is Optional</option> | |
95 <option value="RandomForestRegressor">RandomForestRegressor</option> | |
96 <option value="ExtraTreesRegressor">ExtraTreesRegressor</option> | |
97 <option value="GradientBoostingRegressor">GradientBoostingRegressor</option> | |
98 <option value="AdaBoostRegressor">AdaBoostRegressor</option> | |
99 <option value="ElasticNet">ElasticNet</option> | |
100 <option value="Ridge">Ridge</option> | |
101 <option value="Lasso">Lasso</option> | |
102 <option value="KNeighborsRegressor">KNeighborsRegressor</option> | |
103 <option value="LinearSVR">LinearSVR</option> | |
104 <option value="SVR">SVR</option> | |
105 </param> | |
106 <param label="--p-stratify: --p-stratify: / --p-no-stratify Evenly stratify training and test data among metadata categories. If True, all values in column must match at least two samples. [default: False]" name="pstratify" selected="False" type="boolean" /> | |
107 <param label="--p-parameter-tuning: --p-parameter-tuning: / --p-no-parameter-tuning Automatically tune hyperparameters using random grid search. [default: False]" name="pparametertuning" selected="False" type="boolean" /> | |
108 <param label="--p-missing-samples: " name="pmissingsamples" optional="True" type="select"> | |
109 <option selected="True" value="None">Selection is Optional</option> | |
110 <option value="error">error</option> | |
111 <option value="ignore">ignore</option> | |
112 </param> | |
113 <param label="--examples: Show usage examples and exit." name="examples" optional="False" type="data" /> | |
114 | |
115 </inputs> | |
55 | 116 |
56 --o-predictions=opredictions | 117 <outputs> |
57 --o-feature-importance=ofeatureimportance | 118 <data format="qza" label="${tool.name} on ${on_string}: predictions.qza" name="opredictions" /> |
58 ; | 119 <data format="qza" label="${tool.name} on ${on_string}: featureimportance.qza" name="ofeatureimportance" /> |
59 cp opredictions.qza $opredictions; | 120 |
60 cp ofeatureimportance.qza $ofeatureimportance | 121 </outputs> |
61 ]]></command> | |
62 <inputs> | |
63 <param format="qza,no_unzip.zip" label="--i-table: ARTIFACT FeatureTable[Frequency] Feature table containing all features that should be used for target prediction. [required]" name="itable" optional="False" type="data"/> | |
64 <param label="--m-metadata-column: COLUMN MetadataColumn[Numeric] Numeric metadata column to use as prediction target. [required]" name="mmetadatacolumn" optional="False" type="text"/> | |
65 <param label="--p-cv: INTEGER Number of k-fold cross-validations to perform. Range(1, None) [default: 5]" name="pcv" optional="True" type="integer" value="5" min="1"/> | |
66 <param label="--p-random-state: INTEGER Seed used by random number generator. [optional]" name="prandomstate" optional="True" type="integer"/> | |
67 <param label="--p-n-estimators: INTEGER Range(1, None) Number of trees to grow for estimation. More trees will improve predictive accuracy up to a threshold level, but will also increase time and memory requirements. This parameter only affects ensemble estimators, such as Random Forest, AdaBoost, ExtraTrees, and GradientBoosting. [default: 100]" name="pnestimators" optional="True" type="integer" value="100" min="1"/> | |
68 <param label="--p-estimator: " name="pestimator" optional="True" type="select"> | |
69 <option selected="True" value="None">Selection is Optional</option> | |
70 <option value="RandomForestRegressor">RandomForestRegressor</option> | |
71 <option value="ExtraTreesRegressor">ExtraTreesRegressor</option> | |
72 <option value="GradientBoostingRegressor">GradientBoostingRegressor</option> | |
73 <option value="AdaBoostRegressor">AdaBoostRegressor</option> | |
74 <option value="ElasticNet">ElasticNet</option> | |
75 <option value="Ridge">Ridge</option> | |
76 <option value="Lasso">Lasso</option> | |
77 <option value="KNeighborsRegressor">KNeighborsRegressor</option> | |
78 <option value="LinearSVR">LinearSVR</option> | |
79 <option value="SVR">SVR</option> | |
80 </param> | |
81 <param label="--p-stratify: --p-no-stratify Evenly stratify training and test data among metadata categories. If True, all values in column must match at least two samples. [default: False]" name="pstratify" selected="False" type="boolean"/> | |
82 <param label="--p-parameter-tuning: --p-no-parameter-tuning Automatically tune hyperparameters using random grid search. [default: False]" name="pparametertuning" selected="False" type="boolean"/> | |
83 <param label="--p-missing-samples: " name="pmissingsamples" optional="True" type="select"> | |
84 <option selected="True" value="None">Selection is Optional</option> | |
85 <option value="error">error</option> | |
86 <option value="ignore">ignore</option> | |
87 </param> | |
88 | 122 |
123 <help><![CDATA[ | |
124 Nested cross-validated supervised learning regressor. | |
125 ############################################################### | |
89 | 126 |
90 <param label="--m-metadata-file METADATA" name="metadatafile" type="data" format="tabular,qza,no_unzip.zip" /> | 127 Predicts a continuous sample metadata column using a supervised learning |
91 | 128 regressor. Uses nested stratified k-fold cross validation for automated |
92 </inputs> | |
93 <outputs> | |
94 <data format="qza" label="${tool.name} on ${on_string}: predictions.qza" name="opredictions"/> | |
95 <data format="qza" label="${tool.name} on ${on_string}: featureimportance.qza" name="ofeatureimportance"/> | |
96 </outputs> | |
97 <help><![CDATA[ | |
98 Nested cross-validated supervised learning classifier. | |
99 ###################################################### | |
100 | |
101 Predicts a categorical sample metadata column using a supervised learning | |
102 classifier. Uses nested stratified k-fold cross validation for automated | |
103 hyperparameter optimization and sample prediction. Outputs predicted values | 129 hyperparameter optimization and sample prediction. Outputs predicted values |
104 for each input sample, and relative importance of each feature for model | 130 for each input sample, and relative importance of each feature for model |
105 accuracy. | 131 accuracy. |
106 | 132 |
107 Parameters | 133 Parameters |
108 ---------- | 134 ---------- |
109 table : FeatureTable[Frequency] | 135 table : FeatureTable[Frequency] |
110 Feature table containing all features that should be used for target | 136 Feature table containing all features that should be used for target |
111 prediction. | 137 prediction. |
112 metadata : MetadataColumn[Categorical] | 138 metadata : MetadataColumn[Numeric] |
113 Categorical metadata column to use as prediction target. | 139 Numeric metadata column to use as prediction target. |
114 cv : Int % Range(1, None), optional | 140 cv : Int % Range(1, None), optional |
115 Number of k-fold cross-validations to perform. | 141 Number of k-fold cross-validations to perform. |
116 random_state : Int, optional | 142 random_state : Int, optional |
117 Seed used by random number generator. | 143 Seed used by random number generator. |
144 n_jobs : Int, optional | |
145 Number of jobs to run in parallel. | |
118 n_estimators : Int % Range(1, None), optional | 146 n_estimators : Int % Range(1, None), optional |
119 Number of trees to grow for estimation. More trees will improve | 147 Number of trees to grow for estimation. More trees will improve |
120 predictive accuracy up to a threshold level, but will also increase | 148 predictive accuracy up to a threshold level, but will also increase |
121 time and memory requirements. This parameter only affects ensemble | 149 time and memory requirements. This parameter only affects ensemble |
122 estimators, such as Random Forest, AdaBoost, ExtraTrees, and | 150 estimators, such as Random Forest, AdaBoost, ExtraTrees, and |
123 GradientBoosting. | 151 GradientBoosting. |
124 estimator : Str % Choices('RandomForestClassifier', 'ExtraTreesClassifier', 'GradientBoostingClassifier', 'AdaBoostClassifier', 'KNeighborsClassifier', 'LinearSVC', 'SVC'), optional | 152 estimator : Str % Choices('RandomForestRegressor', 'ExtraTreesRegressor', 'GradientBoostingRegressor', 'AdaBoostRegressor', 'ElasticNet', 'Ridge', 'Lasso', 'KNeighborsRegressor', 'LinearSVR', 'SVR'), optional |
125 Estimator method to use for sample prediction. | 153 Estimator method to use for sample prediction. |
154 stratify : Bool, optional | |
155 Evenly stratify training and test data among metadata categories. If | |
156 True, all values in column must match at least two samples. | |
126 parameter_tuning : Bool, optional | 157 parameter_tuning : Bool, optional |
127 Automatically tune hyperparameters using random grid search. | 158 Automatically tune hyperparameters using random grid search. |
128 missing_samples : Str % Choices('error', 'ignore'), optional | 159 missing_samples : Str % Choices('error', 'ignore'), optional |
129 How to handle missing samples in metadata. "error" will fail if missing | 160 How to handle missing samples in metadata. "error" will fail if missing |
130 samples are detected. "ignore" will cause the feature table and | 161 samples are detected. "ignore" will cause the feature table and |
131 metadata to be filtered, so that only samples found in both files are | 162 metadata to be filtered, so that only samples found in both files are |
132 retained. | 163 retained. |
133 | 164 |
134 Returns | 165 Returns |
135 ------- | 166 ------- |
136 predictions : SampleData[ClassifierPredictions] | 167 predictions : SampleData[RegressorPredictions] |
137 Predicted target values for each input sample. | 168 Predicted target values for each input sample. |
138 feature_importance : FeatureData[Importance] | 169 feature_importance : FeatureData[Importance] |
139 Importance of each input feature to model accuracy. | 170 Importance of each input feature to model accuracy. |
140 ]]></help> | 171 ]]></help> |
141 <macros> | 172 <macros> |
142 <import>qiime_citation.xml</import> | 173 <import>qiime_citation.xml</import> |
143 </macros> | 174 </macros> |
144 <expand macro="qiime_citation"/> | 175 <expand macro="qiime_citation"/> |
145 </tool> | 176 </tool> |