comparison qiime2/qiime_sample-classifier_maturity-index.xml @ 0:370e0b6e9826 draft

Uploaded
author florianbegusch
date Wed, 17 Jul 2019 03:05:17 -0400
parents
children 255f48db74f8
comparison
equal deleted inserted replaced
-1:000000000000 0:370e0b6e9826
1 <?xml version="1.0" ?>
2 <tool id="qiime_sample-classifier_maturity-index" name="qiime sample-classifier maturity-index" version="2019.4">
3 <description> - Microbial maturity index prediction.</description>
4 <requirements>
5 <requirement type="package" version="2019.4">qiime2</requirement>
6 </requirements>
7 <command>
8 <![CDATA[
9 qiime sample-classifier maturity-index --i-table=$itable
10
11 #def list_dict_to_string(list_dict):
12 #set $file_list = list_dict[0]['additional_input'].__getattr__('file_name')
13 #for d in list_dict[1:]:
14 #set $file_list = $file_list + ' --m-metadata-file=' + d['additional_input'].__getattr__('file_name')
15 #end for
16 #return $file_list
17 #end def
18
19 --m-metadata-file=$list_dict_to_string($input_files_mmetadatafile) --p-group-by="$pgroupby" --p-column="$pcolumn" --p-control="$pcontrol"
20
21 #set $pnjobs = '${GALAXY_SLOTS:-4}'
22
23 #if str($pnjobs):
24 --p-n-jobs="$pnjobs"
25 #end if
26
27
28 #if $pparametertuning:
29 --p-parameter-tuning
30 #else
31 --p-no-parameter-tuning
32 #end if
33
34 #if $pstep:
35 --p-step=$pstep
36 #end if
37
38 #if $pstratify:
39 --p-stratify
40 #else
41 --p-no-stratify
42 #end if
43
44 #if $poptimizefeatureselection:
45 --p-optimize-feature-selection
46 #else
47 --p-no-optimize-feature-selection
48 #end if
49
50 #if $ptestsize:
51 --p-test-size=$ptestsize
52 #end if
53 --o-visualization=ovisualization
54 #if str($pestimator) != 'None':
55 --p-estimator=$pestimator
56 #end if
57
58 #if $pmazstats:
59 --p-maz-stats
60 #else
61 --p-no-maz-stats
62 #end if
63
64 #if str($cmdconfig) != 'None':
65 --cmd-config=$cmdconfig
66 #end if
67
68 #if $pcv:
69 --p-cv=$pcv
70 #end if
71
72 #if $pnestimators:
73 --p-n-estimators=$pnestimators
74 #end if
75
76 #if str($prandomstate):
77 --p-random-state="$prandomstate"
78 #end if
79 ;
80 qiime tools export ovisualization.qzv --output-dir out && mkdir -p '$ovisualization.files_path'
81 && cp -r out/* '$ovisualization.files_path'
82 && mv '$ovisualization.files_path/index.html' '$ovisualization'
83 ]]>
84 </command>
85 <inputs>
86 <param format="qza,no_unzip.zip" label="--i-table: FeatureTable[Frequency] Feature table containing all features that should be used for target prediction. [required]" name="itable" optional="False" type="data"/>
87
88 <repeat name="input_files_mmetadatafile" optional="False" title="--m-metadata-file">
89 <param label="--m-metadata-file: Metadata file or artifact viewable as metadata. This option may be supplied multiple times to merge metadata. [required]" name="additional_input" type="data" format="tabular,qza,no_unzip.zip" />
90 </repeat>
91
92 <param label="--p-column: Numeric metadata column to use as prediction target. [required]" name="pcolumn" optional="False" type="text"/>
93
94 <param label="--p-group-by: Categorical metadata column to use for plotting and significance testing between main treatment groups. [required]" name="pgroupby" optional="False" type="text"/>
95 <param label="--p-control: Value of group_by to use as control group. The regression model will be trained using only control group data, and the maturity scores of other groups consequently will be assessed relative to this group. [required]" name="pcontrol" optional="False" type="text"/>
96
97 <param label="--p-estimator: Regression model to use for prediction.
98 [default: RandomForestRegressor]" name="pestimator" optional="True" type="select">
99 <option selected="True" value="None">Selection is Optional</option>
100 <option value="Ridge">Ridge</option>
101 <option value="RandomForestRegressor">RandomForestRegressor</option>
102 <option value="GradientBoostingRegressor">GradientBoostingRegressor</option>
103 <option value="ExtraTreesRegressor">ExtraTreesRegressor</option>
104 <option value="SVR">SVR</option>
105 <option value="ElasticNet">ElasticNet</option>
106 <option value="Lasso">Lasso</option>
107 </param>
108 <param label="--p-n-estimators: Number of trees to grow for estimation. More trees will improve predictive accuracy up to a threshold level, but will also increase time and memory requirements. This parameter only affects ensemble estimators, such as Random Forest, AdaBoost, ExtraTrees, and GradientBoosting. [default: 100]" name="pnestimators" optional="True" type="integer" value="100"/>
109
110 <param label="--p-test-size: Fraction of input samples to exclude from training set and use for classifier testing. [default: 0.2]" name="ptestsize" optional="True" type="float" value="0.2"/>
111
112 <param label="--p-step: If optimize_feature_selection is True, step is the percentage of features to remove at each iteration. [default: 0.05]" name="pstep" optional="True" type="float" value="0.05"/>
113
114 <param label="--p-cv: Number of k-fold cross-validations to perform. [default: 5]" name="pcv" optional="True" type="integer" value="5"/>
115
116 <param label="--p-random-state: Seed used by random number generator. [optional]" name="prandomstate" optional="True" type="text"/>
117
118 <param label="--p-parameter-tuning: --p-no-parameter-tuning Automatically tune hyperparameters using random grid search. [default: True]" name="pparametertuning" checked="True" type="boolean"/>
119 <param label="--p-optimize-feature-selection: --p-no-optimize-feature-selection Automatically optimize input feature selection using recursive feature elimination. [default: True]" name="poptimizefeatureselection" checked="True" type="boolean"/>
120
121 <param label="--p-stratify: --p-no-stratify Evenly stratify training and test data among metadata categories. If True, all values in column must match at least two samples. [default: False]" name="pstratify" checked="False" type="boolean"/>
122
123 <param label="--p-maz-stats: --p-no-maz-stats Calculate anova and pairwise tests on MAZ scores. [default: True]" name="pmazstats" checked="True" type="boolean"/>
124
125 <param label="--cmd-config: Use config file for command options" name="cmdconfig" optional="True" type="data"/>
126 </inputs>
127 <outputs>
128 <data format="html" label="${tool.name} on ${on_string}: visualization.qzv" name="ovisualization"/>
129 </outputs>
130 <help>
131 <![CDATA[
132 Microbial maturity index prediction.
133 -------------------------------------
134
135 Calculates a "microbial maturity" index from a regression model trained on
136 feature data to predict a given continuous metadata column, e.g., to
137 predict age as a function of microbiota composition. The model is trained
138 on a subset of control group samples, then predicts the column value for
139 all samples. This visualization computes maturity index z-scores to compare
140 relative "maturity" between each group, as described in
141 doi:10.1038/nature13421. This method can be used to predict between-group
142 differences in relative trajectory across any type of continuous metadata
143 gradient, e.g., intestinal microbiome development by age, microbial
144 succession during wine fermentation, or microbial community differences
145 along environmental gradients, as a function of two or more different
146 "treatment" groups.
147
148 Parameters
149 ----------
150 table : FeatureTable[Frequency]
151 Feature table containing all features that should be used for target
152 prediction.
153 metadata : Metadata
154 \
155 column : Str
156 Numeric metadata column to use as prediction target.
157 group_by : Str
158 Categorical metadata column to use for plotting and significance
159 testing between main treatment groups.
160 control : Str
161 Value of group_by to use as control group. The regression model will be
162 trained using only control group data, and the maturity scores of other
163 groups consequently will be assessed relative to this group.
164 estimator : Str % Choices({'ElasticNet', 'ExtraTreesRegressor', 'GradientBoostingRegressor', 'Lasso', 'RandomForestRegressor', 'Ridge', 'SVR'}), optional
165 Regression model to use for prediction.
166 n_estimators : Int % Range(1, None), optional
167 Number of trees to grow for estimation. More trees will improve
168 predictive accuracy up to a threshold level, but will also increase
169 time and memory requirements. This parameter only affects ensemble
170 estimators, such as Random Forest, AdaBoost, ExtraTrees, and
171 GradientBoosting.
172 test_size : Float % Range(0.0, 1.0, inclusive_start=False), optional
173 Fraction of input samples to exclude from training set and use for
174 classifier testing.
175 step : Float % Range(0.0, 1.0, inclusive_start=False), optional
176 If optimize_feature_selection is True, step is the percentage of
177 features to remove at each iteration.
178 cv : Int % Range(1, None), optional
179 Number of k-fold cross-validations to perform.
180 random_state : Int, optional
181 Seed used by random number generator.
182 parameter_tuning : Bool, optional
183 Automatically tune hyperparameters using random grid search.
184 optimize_feature_selection : Bool, optional
185 Automatically optimize input feature selection using recursive feature
186 elimination.
187 stratify : Bool, optional
188 Evenly stratify training and test data among metadata categories. If
189 True, all values in column must match at least two samples.
190 maz_stats : Bool, optional
191 Calculate anova and pairwise tests on MAZ scores.
192
193 Returns
194 -------
195 visualization : Visualization
196 \
197 ]]>
198 </help>
199 <macros>
200 <import>qiime_citation.xml</import>
201 </macros>
202 <expand macro="qiime_citation" />
203 </tool>