comparison qiime2/qiime_longitudinal_maturity-index.xml @ 0:370e0b6e9826 draft

Uploaded
author florianbegusch
date Wed, 17 Jul 2019 03:05:17 -0400
parents
children 914fa4daf16a
comparison
equal deleted inserted replaced
-1:000000000000 0:370e0b6e9826
1 <?xml version="1.0" ?>
2 <tool id="qiime_longitudinal_maturity-index" name="qiime longitudinal maturity-index" version="2019.4">
3 <description> - Microbial maturity index prediction.</description>
4 <requirements>
5 <requirement type="package" version="2019.4">qiime2</requirement>
6 </requirements>
7 <command><![CDATA[
8 qiime longitudinal maturity-index
9
10 --i-table=$itable
11 --p-state-column="$pstatecolumn"
12 --p-group-by="$pgroupby"
13 --p-control="$pcontrol"
14
15 #if str($pindividualidcolumn):
16 --p-individual-id-column="$pindividualidcolumn"
17 #end if
18
19 #if str($pestimator) != 'None':
20 --p-estimator=$pestimator
21 #end if
22
23 #if $pnestimators:
24 --p-n-estimators=$pnestimators
25 #end if
26
27 #if $ptestsize:
28 --p-test-size=$ptestsize
29 #end if
30
31 #if $pstep:
32 --p-step=$pstep
33 #end if
34
35 #if $pcv:
36 --p-cv=$pcv
37 #end if
38
39 #if str($prandomstate):
40 --p-random-state="$prandomstate"
41 #end if
42
43 #set $pnjobs = '${GALAXY_SLOTS:-4}'
44 #if str($pnjobs):
45 --p-n-jobs="$pnjobs"
46 #end if
47
48
49 #if $pparametertuning:
50 --p-parameter-tuning
51 #end if
52
53 #if $poptimizefeatureselection:
54 --p-optimize-feature-selection
55 #end if
56
57 #if $pstratify:
58 --p-stratify
59 #end if
60
61 #if str($pmissingsamples) != 'None':
62 --p-missing-samples=$pmissingsamples
63 #end if
64
65 #if $pfeaturecount:
66 --p-feature-count=$pfeaturecount
67 #end if
68
69
70 #if $input_files_mmetadatafile:
71 #def list_dict_to_string(list_dict):
72 #set $file_list = list_dict[0]['additional_input'].__getattr__('file_name')
73 #for d in list_dict[1:]:
74 #set $file_list = $file_list + ' --m-metadata-file=' + d['additional_input'].__getattr__('file_name')
75 #end for
76 #return $file_list
77 #end def
78 --m-metadata-file=$list_dict_to_string($input_files_mmetadatafile)
79 #end if
80
81
82 --o-sample-estimator=osampleestimator
83 --o-feature-importance=ofeatureimportance
84 --o-predictions=opredictions
85 --o-model-summary=omodelsummary
86 --o-accuracy-results=oaccuracyresults
87 --o-maz-scores=omazscores
88 --o-clustermap=oclustermap
89 --o-volatility-plots=ovolatilityplots
90 ;
91 cp osampleestimator.qza $osampleestimator;
92 cp ofeatureimportance.qza $ofeatureimportance;
93 cp opredictions.qza $opredictions;
94 qiime tools export --input-path omodelsummary.qzv --output-path out && mkdir -p '$omodelsummary.files_path'
95 && cp -r out/* '$omodelsummary.files_path'
96 && mv '$omodelsummary.files_path/index.html' '$omodelsummary';
97 qiime tools export --input-path oaccuracyresults.qzv --output-path out && mkdir -p '$oaccuracyresults.files_path'
98 && cp -r out/* '$oaccuracyresults.files_path'
99 && mv '$oaccuracyresults.files_path/index.html' '$oaccuracyresults';
100 cp omazscores.qza $omazscores;
101 qiime tools export --input-path oclustermap.qzv --output-path out && mkdir -p '$oclustermap.files_path'
102 && cp -r out/* '$oclustermap.files_path'
103 && mv '$oclustermap.files_path/index.html' '$oclustermap';
104 qiime tools export --input-path ovolatilityplots.qzv --output-path out && mkdir -p '$ovolatilityplots.files_path'
105 && cp -r out/* '$ovolatilityplots.files_path'
106 && mv '$ovolatilityplots.files_path/index.html' '$ovolatilityplots'
107 ]]></command>
108 <inputs>
109 <param format="qza,no_unzip.zip" label="--i-table: ARTIFACT FeatureTable[Frequency] Feature table containing all features that should be used for target prediction. [required]" name="itable" optional="False" type="data"/>
110 <param label="--p-state-column: TEXT Numeric metadata column containing sampling time (state) data to use as prediction target. [required]" name="pstatecolumn" optional="False" type="text"/>
111 <param label="--p-group-by: TEXT Categorical metadata column to use for plotting and significance testing between main treatment groups. [required]" name="pgroupby" optional="False" type="text"/>
112 <param label="--p-control: TEXT Value of group-by to use as control group. The regression model will be trained using only control group data, and the maturity scores of other groups consequently will be assessed relative to this group. [required]" name="pcontrol" optional="False" type="text"/>
113 <param label="--p-individual-id-column: TEXT Optional metadata column containing IDs for individual subjects. Adds individual subject (spaghetti) vectors to volatility charts if a column name is provided. [optional]" name="pindividualidcolumn" optional="True" type="text"/>
114 <param label="--p-estimator: " name="pestimator" optional="True" type="select">
115 <option selected="True" value="None">Selection is Optional</option>
116 <option value="RandomForestRegressor">RandomForestRegressor</option>
117 <option value="ExtraTreesRegressor">ExtraTreesRegressor</option>
118 <option value="GradientBoostingRegressor">GradientBoostingRegressor</option>
119 <option value="AdaBoostRegressor">AdaBoostRegressor</option>
120 <option value="ElasticNet">ElasticNet</option>
121 <option value="Ridge">Ridge</option>
122 <option value="Lasso">Lasso</option>
123 <option value="KNeighborsRegressor">KNeighborsRegressor</option>
124 <option value="LinearSVR">LinearSVR</option>
125 <option value="SVR">SVR</option>
126 </param>
127 <param label="--p-n-estimators: INTEGER Range(1, None) Number of trees to grow for estimation. More trees will improve predictive accuracy up to a threshold level, but will also increase time and memory requirements. This parameter only affects ensemble estimators, such as Random Forest, AdaBoost, ExtraTrees, and GradientBoosting. [default: 100]" name="pnestimators" optional="True" type="integer" min="1" value="100"/>
128 <param label="--p-test-size: PROPORTION Range(0.0, 1.0, inclusive_start=False) Fraction of input samples to exclude from training set and use for classifier testing. [default: 0.5]" name="ptestsize" optional="True" type="float" exclusive_start="True" min="0" max="1" value="0.5"/>
129 <param label="--p-step: PROPORTION Range(0.0, 1.0, inclusive_start=False) If optimize-feature-selection is True, step is the percentage of features to remove at each iteration. [default: 0.05]" name="pstep" optional="True" type="float" exclusive_start="True" min="0" max="1" value="0.05"/>
130 <param label="--p-cv: INTEGER Number of k-fold cross-validations to perform. Range(1, None) [default: 5]" name="pcv" optional="True" type="integer" min="1" value="5"/>
131 <param label="--p-random-state: INTEGER Seed used by random number generator. [optional]" name="prandomstate" optional="True" type="integer"/>
132 <param label="--p-parameter-tuning: --p-no-parameter-tuning Automatically tune hyperparameters using random grid search. [default: False]" name="pparametertuning" selected="False" type="boolean"/>
133 <param label="--p-optimize-feature-selection: --p-no-optimize-feature-selection Automatically optimize input feature selection using recursive feature elimination. [default: False]" name="poptimizefeatureselection" selected="False" type="boolean"/>
134 <param label="--p-stratify: --p-no-stratify Evenly stratify training and test data among metadata categories. If True, all values in column must match at least two samples. [default: False]" name="pstratify" selected="False" type="boolean"/>
135 <param label="--p-missing-samples: " name="pmissingsamples" optional="True" type="select">
136 <option selected="True" value="None">Selection is Optional</option>
137 <option value="error">error</option>
138 <option value="ignore">ignore</option>
139 </param>
140 <param label="--p-feature-count: INTEGER Range(0, None) Filter feature table to include top N most important features. Set to zero to include all features. [default: 50]" name="pfeaturecount" optional="True" type="integer" min="0" value="50"/>
141
142 <repeat name="input_files_mmetadatafile" optional="True" title="--m-metadata-file">
143 <param label="--m-metadata-file: Metadata file or artifact viewable as metadata. This option may be supplied multiple times to merge metadata. [optional]" name="additional_input" type="data" format="tabular,qza,no_unzip.zip" />
144 </repeat> </inputs>
145
146 <outputs>
147 <data format="qza" label="${tool.name} on ${on_string}: sampleestimator.qza" name="osampleestimator"/>
148 <data format="qza" label="${tool.name} on ${on_string}: featureimportance.qza" name="ofeatureimportance"/>
149 <data format="qza" label="${tool.name} on ${on_string}: predictions.qza" name="opredictions"/>
150 <data format="html" label="${tool.name} on ${on_string}: modelsummary.qzv" name="omodelsummary"/>
151 <data format="html" label="${tool.name} on ${on_string}: accuracyresults.qzv" name="oaccuracyresults"/>
152 <data format="qza" label="${tool.name} on ${on_string}: mazscores.qza" name="omazscores"/>
153 <data format="html" label="${tool.name} on ${on_string}: clustermap.qzv" name="oclustermap"/>
154 <data format="html" label="${tool.name} on ${on_string}: volatilityplots.qzv" name="ovolatilityplots"/>
155 </outputs>
156 <help><![CDATA[
157 Microbial maturity index prediction.
158 ####################################
159
160 Calculates a "microbial maturity" index from a regression model trained on
161 feature data to predict a given continuous metadata column, e.g., to
162 predict age as a function of microbiota composition. The model is trained
163 on a subset of control group samples, then predicts the column value for
164 all samples. This visualization computes maturity index z-scores to compare
165 relative "maturity" between each group, as described in
166 doi:10.1038/nature13421. This method can be used to predict between-group
167 differences in relative trajectory across any type of continuous metadata
168 gradient, e.g., intestinal microbiome development by age, microbial
169 succession during wine fermentation, or microbial community differences
170 along environmental gradients, as a function of two or more different
171 "treatment" groups.
172
173 Parameters
174 ----------
175 table : FeatureTable[Frequency]
176 Feature table containing all features that should be used for target
177 prediction.
178 metadata : Metadata
179 \
180 state_column : Str
181 Numeric metadata column containing sampling time (state) data to use as
182 prediction target.
183 group_by : Str
184 Categorical metadata column to use for plotting and significance
185 testing between main treatment groups.
186 control : Str
187 Value of group_by to use as control group. The regression model will be
188 trained using only control group data, and the maturity scores of other
189 groups consequently will be assessed relative to this group.
190 individual_id_column : Str, optional
191 Optional metadata column containing IDs for individual subjects. Adds
192 individual subject (spaghetti) vectors to volatility charts if a column
193 name is provided.
194 estimator : Str % Choices('RandomForestRegressor', 'ExtraTreesRegressor', 'GradientBoostingRegressor', 'AdaBoostRegressor', 'ElasticNet', 'Ridge', 'Lasso', 'KNeighborsRegressor', 'LinearSVR', 'SVR'), optional
195 Regression model to use for prediction.
196 n_estimators : Int % Range(1, None), optional
197 Number of trees to grow for estimation. More trees will improve
198 predictive accuracy up to a threshold level, but will also increase
199 time and memory requirements. This parameter only affects ensemble
200 estimators, such as Random Forest, AdaBoost, ExtraTrees, and
201 GradientBoosting.
202 test_size : Float % Range(0.0, 1.0, inclusive_start=False), optional
203 Fraction of input samples to exclude from training set and use for
204 classifier testing.
205 step : Float % Range(0.0, 1.0, inclusive_start=False), optional
206 If optimize_feature_selection is True, step is the percentage of
207 features to remove at each iteration.
208 cv : Int % Range(1, None), optional
209 Number of k-fold cross-validations to perform.
210 random_state : Int, optional
211 Seed used by random number generator.
212 parameter_tuning : Bool, optional
213 Automatically tune hyperparameters using random grid search.
214 optimize_feature_selection : Bool, optional
215 Automatically optimize input feature selection using recursive feature
216 elimination.
217 stratify : Bool, optional
218 Evenly stratify training and test data among metadata categories. If
219 True, all values in column must match at least two samples.
220 missing_samples : Str % Choices('error', 'ignore'), optional
221 How to handle missing samples in metadata. "error" will fail if missing
222 samples are detected. "ignore" will cause the feature table and
223 metadata to be filtered, so that only samples found in both files are
224 retained.
225 feature_count : Int % Range(0, None), optional
226 Filter feature table to include top N most important features. Set to
227 zero to include all features.
228
229 Returns
230 -------
231 sample_estimator : SampleEstimator[Regressor]
232 Trained sample estimator.
233 feature_importance : FeatureData[Importance]
234 Importance of each input feature to model accuracy.
235 predictions : SampleData[RegressorPredictions]
236 Predicted target values for each input sample.
237 model_summary : Visualization
238 Summarized parameter and (if enabled) feature selection information for
239 the trained estimator.
240 accuracy_results : Visualization
241 Accuracy results visualization.
242 maz_scores : SampleData[RegressorPredictions]
243 Microbiota-for-age z-score predictions.
244 clustermap : Visualization
245 Heatmap of important feature abundance at each time point in each
246 group.
247 volatility_plots : Visualization
248 Interactive volatility plots of MAZ and maturity scores, target
249 (column) predictions, and the sample metadata.
250 ]]></help>
251 <macros>
252 <import>qiime_citation.xml</import>
253 </macros>
254 <expand macro="qiime_citation"/>
255 </tool>