comparison qiime2/qiime_sample-classifier_regress-samples-ncv.xml @ 14:a0a8d77a991c draft

Uploaded
author florianbegusch
date Thu, 03 Sep 2020 09:51:29 +0000
parents f190567fe3f6
children
comparison
equal deleted inserted replaced
13:887cd4ad8e16 14:a0a8d77a991c
1 <?xml version="1.0" ?> 1 <?xml version="1.0" ?>
2 <tool id="qiime_sample-classifier_regress-samples-ncv" name="qiime sample-classifier regress-samples-ncv" version="2019.7"> 2 <tool id="qiime_sample-classifier_regress-samples-ncv" name="qiime sample-classifier regress-samples-ncv"
3 <description> - Nested cross-validated supervised learning regressor.</description> 3 version="2020.8">
4 <requirements> 4 <description>Nested cross-validated supervised learning regressor.</description>
5 <requirement type="package" version="2019.7">qiime2</requirement> 5 <requirements>
6 </requirements> 6 <requirement type="package" version="2020.8">qiime2</requirement>
7 <command><![CDATA[ 7 </requirements>
8 <command><![CDATA[
8 qiime sample-classifier regress-samples-ncv 9 qiime sample-classifier regress-samples-ncv
9 10
10 --i-table=$itable 11 --i-table=$itable
11 --m-metadata-column="$mmetadatacolumn" 12 # if $input_files_mmetadatafile:
13 # def list_dict_to_string(list_dict):
14 # set $file_list = list_dict[0]['additional_input'].__getattr__('file_name')
15 # for d in list_dict[1:]:
16 # set $file_list = $file_list + ' --m-metadata-file=' + d['additional_input'].__getattr__('file_name')
17 # end for
18 # return $file_list
19 # end def
20 --m-metadata-file=$list_dict_to_string($input_files_mmetadatafile)
21 # end if
12 22
13 #if str($pcv): 23 #if '__ob__' in str($mmetadatacolumn):
14 --p-cv=$pcv 24 #set $mmetadatacolumn_temp = $mmetadatacolumn.replace('__ob__', '[')
25 #set $mmetadatacolumn = $mmetadatacolumn_temp
26 #end if
27 #if '__cb__' in str($mmetadatacolumn):
28 #set $mmetadatacolumn_temp = $mmetadatacolumn.replace('__cb__', ']')
29 #set $mmetadatacolumn = $mmetadatacolumn_temp
30 #end if
31 #if 'X' in str($mmetadatacolumn):
32 #set $mmetadatacolumn_temp = $mmetadatacolumn.replace('X', '\\')
33 #set $mmetadatacolumn = $mmetadatacolumn_temp
34 #end if
35 #if '__sq__' in str($mmetadatacolumn):
36 #set $mmetadatacolumn_temp = $mmetadatacolumn.replace('__sq__', "'")
37 #set $mmetadatacolumn = $mmetadatacolumn_temp
38 #end if
39 #if '__db__' in str($mmetadatacolumn):
40 #set $mmetadatacolumn_temp = $mmetadatacolumn.replace('__db__', '"')
41 #set $mmetadatacolumn = $mmetadatacolumn_temp
15 #end if 42 #end if
16 43
17 #if str($prandomstate): 44 --m-metadata-column=$mmetadatacolumn
18 --p-random-state="$prandomstate"
19 #end if
20
21 #set $pnjobs = '${GALAXY_SLOTS:-4}'
22
23 #if str($pnjobs):
24 --p-n-jobs="$pnjobs"
25 #end if
26 45
27 46
28 #if str($pnestimators): 47 --p-cv=$pcv
29 --p-n-estimators=$pnestimators 48
49 #if str($prandomstate):
50 --p-random-state=$prandomstate
30 #end if 51 #end if
52 --p-n-jobs=$pnjobs
53
54 --p-n-estimators=$pnestimators
31 55
32 #if str($pestimator) != 'None': 56 #if str($pestimator) != 'None':
33 --p-estimator=$pestimator 57 --p-estimator=$pestimator
34 #end if 58 #end if
35 59
36 #if $pstratify: 60 #if $pstratify:
37 --p-stratify 61 --p-stratify
38 #end if 62 #end if
40 #if $pparametertuning: 64 #if $pparametertuning:
41 --p-parameter-tuning 65 --p-parameter-tuning
42 #end if 66 #end if
43 67
44 #if str($pmissingsamples) != 'None': 68 #if str($pmissingsamples) != 'None':
45 --p-missing-samples=$pmissingsamples 69 --p-missing-samples=$pmissingsamples
46 #end if 70 #end if
47 71
72 --o-predictions=opredictions
48 73
74 --o-feature-importance=ofeatureimportance
49 75
50 #if $metadatafile: 76 #if str($examples) != 'None':
51 --m-metadata-file=$metadatafile 77 --examples=$examples
52 #end if 78 #end if
53 79
80 ;
81 cp ofeatureimportance.qza $ofeatureimportance
54 82
83 ]]></command>
84 <inputs>
85 <param format="qza,no_unzip.zip" label="--i-table: ARTIFACT FeatureTable[Frequency] Feature table containing all features that should be used for target prediction. [required]" name="itable" optional="False" type="data" />
86 <repeat name="input_files_mmetadatafile" optional="True" title="--m-metadata-file">
87 <param format="tabular,qza,no_unzip.zip" label="--m-metadata-file: METADATA" name="additional_input" optional="True" type="data" />
88 </repeat>
89 <param label="--m-metadata-column: COLUMN MetadataColumn[Numeric] Numeric metadata column to use as prediction target. [required]" name="mmetadatacolumn" optional="False" type="text" />
90 <param label="--p-cv: INTEGER Number of k-fold cross-validations to perform. Range(1, None) [default: 5]" min="1" name="pcv" optional="True" type="integer" value="5" />
91 <param label="--p-random-state: INTEGER Seed used by random number generator. [optional]" name="prandomstate" optional="False" type="text" />
92 <param label="--p-n-estimators: INTEGER Range(1, None) Number of trees to grow for estimation. More trees will improve predictive accuracy up to a threshold level, but will also increase time and memory requirements. This parameter only affects ensemble estimators, such as Random Forest, AdaBoost, ExtraTrees, and GradientBoosting. [default: 100]" min="1" name="pnestimators" optional="True" type="integer" value="100" />
93 <param label="--p-estimator: " name="pestimator" optional="True" type="select">
94 <option selected="True" value="None">Selection is Optional</option>
95 <option value="RandomForestRegressor">RandomForestRegressor</option>
96 <option value="ExtraTreesRegressor">ExtraTreesRegressor</option>
97 <option value="GradientBoostingRegressor">GradientBoostingRegressor</option>
98 <option value="AdaBoostRegressor">AdaBoostRegressor</option>
99 <option value="ElasticNet">ElasticNet</option>
100 <option value="Ridge">Ridge</option>
101 <option value="Lasso">Lasso</option>
102 <option value="KNeighborsRegressor">KNeighborsRegressor</option>
103 <option value="LinearSVR">LinearSVR</option>
104 <option value="SVR">SVR</option>
105 </param>
106 <param label="--p-stratify: --p-stratify: / --p-no-stratify Evenly stratify training and test data among metadata categories. If True, all values in column must match at least two samples. [default: False]" name="pstratify" selected="False" type="boolean" />
107 <param label="--p-parameter-tuning: --p-parameter-tuning: / --p-no-parameter-tuning Automatically tune hyperparameters using random grid search. [default: False]" name="pparametertuning" selected="False" type="boolean" />
108 <param label="--p-missing-samples: " name="pmissingsamples" optional="True" type="select">
109 <option selected="True" value="None">Selection is Optional</option>
110 <option value="error">error</option>
111 <option value="ignore">ignore</option>
112 </param>
113 <param label="--examples: Show usage examples and exit." name="examples" optional="False" type="data" />
114
115 </inputs>
55 116
56 --o-predictions=opredictions 117 <outputs>
57 --o-feature-importance=ofeatureimportance 118 <data format="qza" label="${tool.name} on ${on_string}: predictions.qza" name="opredictions" />
58 ; 119 <data format="qza" label="${tool.name} on ${on_string}: featureimportance.qza" name="ofeatureimportance" />
59 cp opredictions.qza $opredictions; 120
60 cp ofeatureimportance.qza $ofeatureimportance 121 </outputs>
61 ]]></command>
62 <inputs>
63 <param format="qza,no_unzip.zip" label="--i-table: ARTIFACT FeatureTable[Frequency] Feature table containing all features that should be used for target prediction. [required]" name="itable" optional="False" type="data"/>
64 <param label="--m-metadata-column: COLUMN MetadataColumn[Numeric] Numeric metadata column to use as prediction target. [required]" name="mmetadatacolumn" optional="False" type="text"/>
65 <param label="--p-cv: INTEGER Number of k-fold cross-validations to perform. Range(1, None) [default: 5]" name="pcv" optional="True" type="integer" value="5" min="1"/>
66 <param label="--p-random-state: INTEGER Seed used by random number generator. [optional]" name="prandomstate" optional="True" type="integer"/>
67 <param label="--p-n-estimators: INTEGER Range(1, None) Number of trees to grow for estimation. More trees will improve predictive accuracy up to a threshold level, but will also increase time and memory requirements. This parameter only affects ensemble estimators, such as Random Forest, AdaBoost, ExtraTrees, and GradientBoosting. [default: 100]" name="pnestimators" optional="True" type="integer" value="100" min="1"/>
68 <param label="--p-estimator: " name="pestimator" optional="True" type="select">
69 <option selected="True" value="None">Selection is Optional</option>
70 <option value="RandomForestRegressor">RandomForestRegressor</option>
71 <option value="ExtraTreesRegressor">ExtraTreesRegressor</option>
72 <option value="GradientBoostingRegressor">GradientBoostingRegressor</option>
73 <option value="AdaBoostRegressor">AdaBoostRegressor</option>
74 <option value="ElasticNet">ElasticNet</option>
75 <option value="Ridge">Ridge</option>
76 <option value="Lasso">Lasso</option>
77 <option value="KNeighborsRegressor">KNeighborsRegressor</option>
78 <option value="LinearSVR">LinearSVR</option>
79 <option value="SVR">SVR</option>
80 </param>
81 <param label="--p-stratify: --p-no-stratify Evenly stratify training and test data among metadata categories. If True, all values in column must match at least two samples. [default: False]" name="pstratify" selected="False" type="boolean"/>
82 <param label="--p-parameter-tuning: --p-no-parameter-tuning Automatically tune hyperparameters using random grid search. [default: False]" name="pparametertuning" selected="False" type="boolean"/>
83 <param label="--p-missing-samples: " name="pmissingsamples" optional="True" type="select">
84 <option selected="True" value="None">Selection is Optional</option>
85 <option value="error">error</option>
86 <option value="ignore">ignore</option>
87 </param>
88 122
123 <help><![CDATA[
124 Nested cross-validated supervised learning regressor.
125 ###############################################################
89 126
90 <param label="--m-metadata-file METADATA" name="metadatafile" type="data" format="tabular,qza,no_unzip.zip" /> 127 Predicts a continuous sample metadata column using a supervised learning
91 128 regressor. Uses nested stratified k-fold cross validation for automated
92 </inputs>
93 <outputs>
94 <data format="qza" label="${tool.name} on ${on_string}: predictions.qza" name="opredictions"/>
95 <data format="qza" label="${tool.name} on ${on_string}: featureimportance.qza" name="ofeatureimportance"/>
96 </outputs>
97 <help><![CDATA[
98 Nested cross-validated supervised learning classifier.
99 ######################################################
100
101 Predicts a categorical sample metadata column using a supervised learning
102 classifier. Uses nested stratified k-fold cross validation for automated
103 hyperparameter optimization and sample prediction. Outputs predicted values 129 hyperparameter optimization and sample prediction. Outputs predicted values
104 for each input sample, and relative importance of each feature for model 130 for each input sample, and relative importance of each feature for model
105 accuracy. 131 accuracy.
106 132
107 Parameters 133 Parameters
108 ---------- 134 ----------
109 table : FeatureTable[Frequency] 135 table : FeatureTable[Frequency]
110 Feature table containing all features that should be used for target 136 Feature table containing all features that should be used for target
111 prediction. 137 prediction.
112 metadata : MetadataColumn[Categorical] 138 metadata : MetadataColumn[Numeric]
113 Categorical metadata column to use as prediction target. 139 Numeric metadata column to use as prediction target.
114 cv : Int % Range(1, None), optional 140 cv : Int % Range(1, None), optional
115 Number of k-fold cross-validations to perform. 141 Number of k-fold cross-validations to perform.
116 random_state : Int, optional 142 random_state : Int, optional
117 Seed used by random number generator. 143 Seed used by random number generator.
144 n_jobs : Int, optional
145 Number of jobs to run in parallel.
118 n_estimators : Int % Range(1, None), optional 146 n_estimators : Int % Range(1, None), optional
119 Number of trees to grow for estimation. More trees will improve 147 Number of trees to grow for estimation. More trees will improve
120 predictive accuracy up to a threshold level, but will also increase 148 predictive accuracy up to a threshold level, but will also increase
121 time and memory requirements. This parameter only affects ensemble 149 time and memory requirements. This parameter only affects ensemble
122 estimators, such as Random Forest, AdaBoost, ExtraTrees, and 150 estimators, such as Random Forest, AdaBoost, ExtraTrees, and
123 GradientBoosting. 151 GradientBoosting.
124 estimator : Str % Choices('RandomForestClassifier', 'ExtraTreesClassifier', 'GradientBoostingClassifier', 'AdaBoostClassifier', 'KNeighborsClassifier', 'LinearSVC', 'SVC'), optional 152 estimator : Str % Choices('RandomForestRegressor', 'ExtraTreesRegressor', 'GradientBoostingRegressor', 'AdaBoostRegressor', 'ElasticNet', 'Ridge', 'Lasso', 'KNeighborsRegressor', 'LinearSVR', 'SVR'), optional
125 Estimator method to use for sample prediction. 153 Estimator method to use for sample prediction.
154 stratify : Bool, optional
155 Evenly stratify training and test data among metadata categories. If
156 True, all values in column must match at least two samples.
126 parameter_tuning : Bool, optional 157 parameter_tuning : Bool, optional
127 Automatically tune hyperparameters using random grid search. 158 Automatically tune hyperparameters using random grid search.
128 missing_samples : Str % Choices('error', 'ignore'), optional 159 missing_samples : Str % Choices('error', 'ignore'), optional
129 How to handle missing samples in metadata. "error" will fail if missing 160 How to handle missing samples in metadata. "error" will fail if missing
130 samples are detected. "ignore" will cause the feature table and 161 samples are detected. "ignore" will cause the feature table and
131 metadata to be filtered, so that only samples found in both files are 162 metadata to be filtered, so that only samples found in both files are
132 retained. 163 retained.
133 164
134 Returns 165 Returns
135 ------- 166 -------
136 predictions : SampleData[ClassifierPredictions] 167 predictions : SampleData[RegressorPredictions]
137 Predicted target values for each input sample. 168 Predicted target values for each input sample.
138 feature_importance : FeatureData[Importance] 169 feature_importance : FeatureData[Importance]
139 Importance of each input feature to model accuracy. 170 Importance of each input feature to model accuracy.
140 ]]></help> 171 ]]></help>
141 <macros> 172 <macros>
142 <import>qiime_citation.xml</import> 173 <import>qiime_citation.xml</import>
143 </macros> 174 </macros>
144 <expand macro="qiime_citation"/> 175 <expand macro="qiime_citation"/>
145 </tool> 176 </tool>