comparison micropita.xml @ 0:0de566f21448 draft default tip

v2
author sagun98
date Thu, 03 Jun 2021 18:13:32 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:0de566f21448
1 <tool id="micropita" name="Run" version="1.0.1">
2 <code file="micropita_format_input_selector.py"/>
3 <description>micropita</description>
4 <command interpreter="python">micropita_prepare.py
5 --lastmeta $cls_x
6 -m $cond.method_sel
7 -n $selected_samples
8 --input $inp_data
9 --output $out_file1
10 --stratify_value $cls_s
11
12 #if $cond.method_sel == "features":
13 --feature_method $cond.feature_method
14 --targets $cond.cls_f
15 #end if
16 #if $cond.method_sel == "distinct" or $cond.method_sel == "discriminant" :
17 --label_value $cond.cls_L
18 #end if
19
20 </command>
21 <inputs>
22 <param format="micropita" name="inp_data" type="data" label="Input file"/>
23
24 <param name="cls_x" type="select" label="Last metadata row (Use 'Label' for demo data)" multiple="False" size ="70" dynamic_options="get_cols(inp_data,'0')"/>
25
26
27 <param name="cond" type="data_column" data_ref="inp_data" accept_default="true" />
28 <conditional name="cond" type="data_column" data_ref="inp_data" accept_default="true">
29 <param name="method_sel" type="select" data_ref="inp_data" label="Select method">
30 <option value="representative" selected="True">Representative</option>
31 <option value="diverse">Diverse</option>
32 <option value="extreme" >Extreme</option>
33 <option value="features" >Features</option>
34 <option value="distinct" >Distinct (Supervised)</option>
35 <option value="discriminant" >Discriminant (Supervised)</option>
36 </param>
37 <when value="representative">
38 </when>
39 <when value="diverse">
40 </when>
41 <when value="extreme">
42 </when>
43 <when value="features">
44 <param name="cls_f" type="select" label="Targeted feature(s)" multiple="True" size ="70" dynamic_options="get_cols_features(inp_data,'0',cls_x)"/>
45 <param name="feature_method" type="select" format="text">
46 <label>Selection type</label>
47 <option value="rank">Rank</option>
48 <option value="abundance">Abundance</option>
49 </param>
50 </when>
51 <when value="distinct">
52 <param name="cls_L" type="select" label="Label (Use 'Group' for demo data)" multiple="False" size ="70" dynamic_options="get_cols_add_line(inp_data,'0',cls_x)"/>
53 </when>
54
55 <when value="discriminant">
56 <param name="cls_L" type="select" label="Label (Use 'Group' for demo data)" multiple="False" size ="70" dynamic_options="get_cols_add_line(inp_data,'0',cls_x)"/>
57 </when>
58
59 </conditional>
60
61
62 <param name="cls_s" type="select" label="Stratify by (optional) (Use 'StratifyLabel' for demo data)" multiple="False" size ="70" dynamic_options="get_cols_add_line(inp_data,'0',cls_x)"/>
63
64 <param name="selected_samples" type="integer" size="4" value="10" label="Number of samples to select"/>
65
66
67
68 </inputs>
69
70 <outputs>
71 <data format="text" name="out_file1" />
72 </outputs>
73 <requirements>
74 <requirement type="set_environment">micropita_SCRIPT_PATH</requirement>
75 </requirements>
76 <tests>
77 <test>
78 <param name="inp_data" value="micropita_input" ftype="micropita" />
79 <param name="cls_x" value="5" />
80 <param name="cls_s" value="5" />
81 <param name="selected_samples" value="10" />
82 <param name="cond.method_sel" value="representative" />
83 <output name="out_file1" file="micropita_output" />
84 <assert_contents>
85 <has_text text="representative Sample_22_R Sample_20_R" />
86 </assert_contents>
87 </test>
88 </tests>
89 <help>
90
91
92 microbiome: Picking Interesting Taxonomic Abundance
93 ---------------------------------------------------
94
95
96
97 microPITA is a computational tool enabling sample selection in tiered studies. Using tiered-study designs can more efficiently allocate resources, reducing study costs, and maximizing the use of samples. From a survey study, selection of samples can be performed to target various microbial communities including:
98
99 1. Samples with the most diverse community (maximum diversity);
100 2. Samples dominated by specific microbes (targeted feature);
101 3. Samples with microbial communities representative of the survey (representative dissimilarity);
102 4. Samples with the most extreme microbial communities in the survey (most dissimilar);
103 5. Given a phenotype (like disease state), samples at the border of phenotypes (discriminant) or samples typical of each phenotype (distinct).
104
105 Additionally, methods can leverage clinical metadata by stratifying samples into groups in which samples are subsequently selected. This enables the use of microPITA in cohort studies.
106
107
108 .. image:: https://bytebucket.org/biobakery/galaxy_micropita/wiki/HMPStool10PCoA.png
109 :height: 500
110 :width: 600
111
112 MicroPITA unsupervised method selection in the HMP 16S Gut Microbiome. Selection of 10 samples using targeted feature targeting *Bacteroides* (blue), maximum diversity (orange), representative dissimilarity (purple), and most dissimilar (pink) using Principle Covariance Analysis (PCoA) for ordination. Targeted feature selects samples dominated by *Bacteroides* (upper left) while maximum diversity select more diverse samples away from *Bacteroides* dominant samples. Representative selection selects samples covering the range of samples in the PCoA plot focusing on the higher density central region while maximum dissimilarity selects samples at the periphery of the plot.
113
114
115 Intructions to run:
116 -------------------
117
118 Before running microPita, you must upload your data using Glaxay's **Get Data - Upload File**
119 Please make sure that you choose **File Format Micropita**
120 An example can be found at https://bytebucket.org/biobakery/micropita/wiki/micropita_sample_PCL.txt
121
122 Required inputs
123 ---------------
124
125 microPITA requires an input pcl file of metadata and microbial community measurements. Although some defaults can be changed, microPITA expects a PCL file as an input file. A PCL file is a text delimited file similar to an excel spread sheet with the following characteristics.
126
127 1. **Rows** represent metadata and features (bugs), **columns** represent samples.
128 2. The **first row** by default should be the sample ids.
129 3. Metadata rows should be next.
130 4. Lastly, rows containing features (bugs) measurements (like abundance) should be after metadata rows.
131 5. The **first column** should contain the ID describing the column. For metadata this may be, for example, "Age" for a row containing the age of the patients donating the samples. For measurements, this should be the feature name (bug name).
132 6. The file is expected to be TAB delimited.
133 7. If a consensus lineage or hierarchy of taxonomy is contained in the feature name, the default delimiter between clades is the pipe ("|").
134
135 **Note** MAC users, please save file as windows formatted text.
136
137 .. image:: https://bytebucket.org/biobakery/galaxy_micropita/wiki/pcl_diagram.png
138 :height: 500
139 :width: 600
140
141 Outputs
142 -------
143
144 The Run MicroPITA module will create one output text file. The output will consist of one line starting with a key word for the selection method and then followed by selected samples delimited by tabs. An example of 6 samples selected by the representative:
145
146 representative sample_1 sample_2 sample_3 sample_4 sample_5 sample_6
147
148
149
150
151 Run microPITA
152 -------------
153
154 A brief description of the Run micropita module.
155
156 **Input file:**
157 This should be populated by the Load microPITA module.
158
159 **Last metadata row:**
160 The row on the input pcl file that is the last metadata. All microbial measurements should follow this row.
161
162 **Select method:**
163 Select which method to use for sample selection. Selection methods include:
164
165 1. Representative. Samples with microbial communities representative of the survey (representative dissimilarity);
166 2. Diverse. Samples with the most diverse community (maximum diversity);
167 3. Extreme. Samples with the most extreme microbial communities in the survey (most dissimilar);
168 4. Features. Samples dominated by specific microbes (targeted feature);
169 5. Distinct. Given a phenotype (like disease state), samples typical of each phenotype (Distinct).
170 6. Discriminant. Given a phenotype (like disease state), samples at the border of phenotypes (Discriminant).
171
172 **Targeted feature(s):** (visible with Features method selection only)
173 Select 1 or more features to target in sample selection.
174
175 **Selection type:** (visible with Features method selection only)
176 Rank or Abundance.
177
178 1. Rank indicates selecting samples that have the highest rank of the Targeted features(s), this tends to select sample in which these feature dominant the sample.
179 2. Abundance indicates selecting samples that have the highest average abundance of the Targeted features(s), this selects samples where features are most abundant but not necessarily dominant in the community.
180
181 **Label:** (visible with supervised method selection only)
182 The row which contains the label used to classify the samples from supervised methods.
183
184 **Stratify by (optional):**
185 The row which contains the groupings the samples will first be placed in before running the selection method on each group. If no grouping is selected, selection methods will be performed on the data set as a whole.
186
187 **Number of samples to select:**
188 The number of samples to select. If samples are stratified, this is per stratification (or group). If supervised methods are used, this is the number of samples selected per classification group (as defined by the label).
189
190 For more information please visit http://huttenhower.sph.harvard.edu/micropita
191
192
193 Acknowledgments
194 ---------------
195 Special thanks to Eric Franzosa for developing the above PCL figure!
196
197 Citation and Contacts
198 ---------------------
199
200 For more information please visit http://huttenhower.sph.harvard.edu/micropita
201 When using MicroPITA please cite:
202 Tickle T, Segata N, Waldron L, Weingart G, Huttenhower C. Two-stage microbial community experimental design. (Under review)
203
204 Please feel free to contact us at ttickle@hsph.harvard.edu for any questions or comments!
205
206
207 </help>
208 </tool>