comparison qiime2/qiime_sample-classifier_classify-samples.xml @ 0:370e0b6e9826 draft

Uploaded
author florianbegusch
date Wed, 17 Jul 2019 03:05:17 -0400
parents
children a025a4a89e07
comparison
equal deleted inserted replaced
-1:000000000000 0:370e0b6e9826
1 <?xml version="1.0" ?>
2 <tool id="qiime_sample-classifier_classify-samples" name="qiime sample-classifier classify-samples" version="2019.4">
3 <description> - Train and test a cross-validated supervised learning classifier.</description>
4 <requirements>
5 <requirement type="package" version="2019.4">qiime2</requirement>
6 </requirements>
7 <command><![CDATA[
8 qiime sample-classifier classify-samples
9
10 --i-table=$itable
11 --m-metadata-column="$mmetadatacolumn"
12
13 #if $ptestsize:
14 --p-test-size=$ptestsize
15 #end if
16
17 #if $pstep:
18 --p-step=$pstep
19 #end if
20
21 #if $pcv:
22 --p-cv=$pcv
23 #end if
24
25 #if str($prandomstate):
26 --p-random-state="$prandomstate"
27 #end if
28
29 #set $pnjobs = '${GALAXY_SLOTS:-4}'
30
31 #if str($pnjobs):
32 --p-n-jobs="$pnjobs"
33 #end if
34
35
36 #if $pnestimators:
37 --p-n-estimators=$pnestimators
38 #end if
39
40 #if str($pestimator) != 'None':
41 --p-estimator=$pestimator
42 #end if
43
44 #if $poptimizefeatureselection:
45 --p-optimize-feature-selection
46 #end if
47
48 #if $pparametertuning:
49 --p-parameter-tuning
50 #end if
51
52 #if str($ppalette) != 'None':
53 --p-palette=$ppalette
54 #end if
55
56 #if str($pmissingsamples) != 'None':
57 --p-missing-samples=$pmissingsamples
58 #end if
59
60
61 #if $input_files_mmetadatafile:
62 #def list_dict_to_string(list_dict):
63 #set $file_list = list_dict[0]['additional_input'].__getattr__('file_name')
64 #for d in list_dict[1:]:
65 #set $file_list = $file_list + ' --m-metadata-file=' + d['additional_input'].__getattr__('file_name')
66 #end for
67 #return $file_list
68 #end def
69 --m-metadata-file=$list_dict_to_string($input_files_mmetadatafile)
70 #end if
71
72
73 --o-sample-estimator=osampleestimator
74 --o-feature-importance=ofeatureimportance
75 --o-predictions=opredictions
76 --o-model-summary=omodelsummary
77 --o-accuracy-results=oaccuracyresults
78 ;
79 cp osampleestimator.qza $osampleestimator;
80 cp ofeatureimportance.qza $ofeatureimportance;
81 cp opredictions.qza $opredictions;
82 qiime tools export --input-path omodelsummary.qzv --output-path out && mkdir -p '$omodelsummary.files_path'
83 && cp -r out/* '$omodelsummary.files_path'
84 && mv '$omodelsummary.files_path/index.html' '$omodelsummary';
85 qiime tools export --input-path oaccuracyresults.qzv --output-path out && mkdir -p '$oaccuracyresults.files_path'
86 && cp -r out/* '$oaccuracyresults.files_path'
87 && mv '$oaccuracyresults.files_path/index.html' '$oaccuracyresults'
88 ]]></command>
89 <inputs>
90 <param format="qza,no_unzip.zip" label="--i-table: ARTIFACT FeatureTable[Frequency] Feature table containing all features that should be used for target prediction. [required]" name="itable" optional="False" type="data"/>
91 <param label="--m-metadata-column: COLUMN MetadataColumn[Categorical] Categorical metadata column to use as prediction target. [required]" name="mmetadatacolumn" optional="False" type="text"/>
92 <param label="--p-test-size: PROPORTION Range(0.0, 1.0, inclusive_start=False) Fraction of input samples to exclude from training set and use for classifier testing. [default: 0.2]" name="ptestsize" optional="True" type="float" value="0.2" min="0" max="1" exclusive_start="True"/>
93 <param label="--p-step: PROPORTION Range(0.0, 1.0, inclusive_start=False) If optimize-feature-selection is True, step is the percentage of features to remove at each iteration. [default: 0.05]" name="pstep" optional="True" type="float" value="0.05" min="0" max="1" exclusive_start="True"/>
94 <param label="--p-cv: INTEGER Number of k-fold cross-validations to perform. Range(1, None) [default: 5]" name="pcv" optional="True" type="integer" value="5" min="1"/>
95 <param label="--p-random-state: INTEGER Seed used by random number generator. [optional]" name="prandomstate" optional="True" type="integer"/>
96 <param label="--p-n-estimators: INTEGER Range(1, None) Number of trees to grow for estimation. More trees will improve predictive accuracy up to a threshold level, but will also increase time and memory requirements. This parameter only affects ensemble estimators, such as Random Forest, AdaBoost, ExtraTrees, and GradientBoosting. [default: 100]" name="pnestimators" optional="True" type="integer" value="100" min="1"/>
97 <param label="--p-estimator: " name="pestimator" optional="True" type="select">
98 <option selected="True" value="None">Selection is Optional</option>
99 <option value="RandomForestClassifier">RandomForestClassifier</option>
100 <option value="ExtraTreesClassifier">ExtraTreesClassifier</option>
101 <option value="GradientBoostingClassifier">GradientBoostingClassifier</option>
102 <option value="AdaBoostClassifier">AdaBoostClassifier</option>
103 <option value="KNeighborsClassifier">KNeighborsClassifier</option>
104 <option value="LinearSVC">LinearSVC</option>
105 <option value="SVC">SVC</option>
106 </param>
107 <param label="--p-optimize-feature-selection: --p-no-optimize-feature-selection Automatically optimize input feature selection using recursive feature elimination. [default: False]" name="poptimizefeatureselection" selected="False" type="boolean"/>
108 <param label="--p-parameter-tuning: --p-no-parameter-tuning Automatically tune hyperparameters using random grid search. [default: False]" name="pparametertuning" selected="False" type="boolean"/>
109 <param label="--p-palette: " name="ppalette" optional="True" type="select">
110 <option selected="True" value="None">Selection is Optional</option>
111 <option value="YellowOrangeBrown">YellowOrangeBrown</option>
112 <option value="YellowOrangeRed">YellowOrangeRed</option>
113 <option value="OrangeRed">OrangeRed</option>
114 <option value="PurpleRed">PurpleRed</option>
115 <option value="RedPurple">RedPurple</option>
116 <option value="BluePurple">BluePurple</option>
117 <option value="GreenBlue">GreenBlue</option>
118 <option value="PurpleBlue">PurpleBlue</option>
119 <option value="YellowGreen">YellowGreen</option>
120 <option value="summer">summer</option>
121 <option value="copper">copper</option>
122 <option value="viridis">viridis</option>
123 <option value="plasma">plasma</option>
124 <option value="inferno">inferno</option>
125 <option value="magma">magma</option>
126 <option value="sirocco">sirocco</option>
127 <option value="drifting">drifting</option>
128 <option value="melancholy">melancholy</option>
129 <option value="enigma">enigma</option>
130 <option value="eros">eros</option>
131 <option value="spectre">spectre</option>
132 <option value="ambition">ambition</option>
133 <option value="mysteriousstains">mysteriousstains</option>
134 <option value="daydream">daydream</option>
135 <option value="solano">solano</option>
136 <option value="navarro">navarro</option>
137 <option value="dandelions">dandelions</option>
138 <option value="deepblue">deepblue</option>
139 <option value="verve">verve</option>
140 <option value="greyscale">greyscale</option>
141 </param>
142 <param label="--p-missing-samples: " name="pmissingsamples" optional="True" type="select">
143 <option selected="True" value="None">Selection is Optional</option>
144 <option value="error">error</option>
145 <option value="ignore">ignore</option>
146 </param>
147
148 <repeat name="input_files_mmetadatafile" optional="True" title="--m-metadata-file">
149 <param label="--m-metadata-file: Metadata file or artifact viewable as metadata. This option may be supplied multiple times to merge metadata. [optional]" name="additional_input" type="data" format="tabular,qza,no_unzip.zip" />
150 </repeat>
151
152 </inputs>
153 <outputs>
154 <data format="qza" label="${tool.name} on ${on_string}: sampleestimator.qza" name="osampleestimator"/>
155 <data format="qza" label="${tool.name} on ${on_string}: featureimportance.qza" name="ofeatureimportance"/>
156 <data format="qza" label="${tool.name} on ${on_string}: predictions.qza" name="opredictions"/>
157 <data format="html" label="${tool.name} on ${on_string}: modelsummary.qzv" name="omodelsummary"/>
158 <data format="html" label="${tool.name} on ${on_string}: accuracyresults.qzv" name="oaccuracyresults"/>
159 </outputs>
160 <help><![CDATA[
161 Train and test a cross-validated supervised learning classifier.
162 ################################################################
163
164 Predicts a categorical sample metadata column using a supervised learning
165 classifier. Splits input data into training and test sets. The training set
166 is used to train and test the estimator using a stratified k-fold cross-
167 validation scheme. This includes optional steps for automated feature
168 extraction and hyperparameter optimization. The test set validates
169 classification accuracy of the optimized estimator. Outputs classification
170 results for test set. For more details on the learning algorithm, see
171 http://scikit-learn.org/stable/supervised_learning.html
172
173 Parameters
174 ----------
175 table : FeatureTable[Frequency]
176 Feature table containing all features that should be used for target
177 prediction.
178 metadata : MetadataColumn[Categorical]
179 Categorical metadata column to use as prediction target.
180 test_size : Float % Range(0.0, 1.0, inclusive_start=False), optional
181 Fraction of input samples to exclude from training set and use for
182 classifier testing.
183 step : Float % Range(0.0, 1.0, inclusive_start=False), optional
184 If optimize_feature_selection is True, step is the percentage of
185 features to remove at each iteration.
186 cv : Int % Range(1, None), optional
187 Number of k-fold cross-validations to perform.
188 random_state : Int, optional
189 Seed used by random number generator.
190 n_estimators : Int % Range(1, None), optional
191 Number of trees to grow for estimation. More trees will improve
192 predictive accuracy up to a threshold level, but will also increase
193 time and memory requirements. This parameter only affects ensemble
194 estimators, such as Random Forest, AdaBoost, ExtraTrees, and
195 GradientBoosting.
196 estimator : Str % Choices('RandomForestClassifier', 'ExtraTreesClassifier', 'GradientBoostingClassifier', 'AdaBoostClassifier', 'KNeighborsClassifier', 'LinearSVC', 'SVC'), optional
197 Estimator method to use for sample prediction.
198 optimize_feature_selection : Bool, optional
199 Automatically optimize input feature selection using recursive feature
200 elimination.
201 parameter_tuning : Bool, optional
202 Automatically tune hyperparameters using random grid search.
203 palette : Str % Choices('YellowOrangeBrown', 'YellowOrangeRed', 'OrangeRed', 'PurpleRed', 'RedPurple', 'BluePurple', 'GreenBlue', 'PurpleBlue', 'YellowGreen', 'summer', 'copper', 'viridis', 'plasma', 'inferno', 'magma', 'sirocco', 'drifting', 'melancholy', 'enigma', 'eros', 'spectre', 'ambition', 'mysteriousstains', 'daydream', 'solano', 'navarro', 'dandelions', 'deepblue', 'verve', 'greyscale'), optional
204 The color palette to use for plotting.
205 missing_samples : Str % Choices('error', 'ignore'), optional
206 How to handle missing samples in metadata. "error" will fail if missing
207 samples are detected. "ignore" will cause the feature table and
208 metadata to be filtered, so that only samples found in both files are
209 retained.
210
211 Returns
212 -------
213 sample_estimator : SampleEstimator[Classifier]
214 Trained sample estimator.
215 feature_importance : FeatureData[Importance]
216 Importance of each input feature to model accuracy.
217 predictions : SampleData[ClassifierPredictions]
218 Predicted target values for each input sample.
219 model_summary : Visualization
220 Summarized parameter and (if enabled) feature selection information for
221 the trained estimator.
222 accuracy_results : Visualization
223 Accuracy results visualization.
224 ]]></help>
225 <macros>
226 <import>qiime_citation.xml</import>
227 </macros>
228 <expand macro="qiime_citation"/>
229 </tool>