annotate micropita.xml @ 0:0de566f21448 draft default tip

v2
author sagun98
date Thu, 03 Jun 2021 18:13:32 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
sagun98
parents:
diff changeset
1 <tool id="micropita" name="Run" version="1.0.1">
sagun98
parents:
diff changeset
2 <code file="micropita_format_input_selector.py"/>
sagun98
parents:
diff changeset
3 <description>micropita</description>
sagun98
parents:
diff changeset
4 <command interpreter="python">micropita_prepare.py
sagun98
parents:
diff changeset
5 --lastmeta $cls_x
sagun98
parents:
diff changeset
6 -m $cond.method_sel
sagun98
parents:
diff changeset
7 -n $selected_samples
sagun98
parents:
diff changeset
8 --input $inp_data
sagun98
parents:
diff changeset
9 --output $out_file1
sagun98
parents:
diff changeset
10 --stratify_value $cls_s
sagun98
parents:
diff changeset
11
sagun98
parents:
diff changeset
12 #if $cond.method_sel == "features":
sagun98
parents:
diff changeset
13 --feature_method $cond.feature_method
sagun98
parents:
diff changeset
14 --targets $cond.cls_f
sagun98
parents:
diff changeset
15 #end if
sagun98
parents:
diff changeset
16 #if $cond.method_sel == "distinct" or $cond.method_sel == "discriminant" :
sagun98
parents:
diff changeset
17 --label_value $cond.cls_L
sagun98
parents:
diff changeset
18 #end if
sagun98
parents:
diff changeset
19
sagun98
parents:
diff changeset
20 </command>
sagun98
parents:
diff changeset
21 <inputs>
sagun98
parents:
diff changeset
22 <param format="micropita" name="inp_data" type="data" label="Input file"/>
sagun98
parents:
diff changeset
23
sagun98
parents:
diff changeset
24 <param name="cls_x" type="select" label="Last metadata row (Use 'Label' for demo data)" multiple="False" size ="70" dynamic_options="get_cols(inp_data,'0')"/>
sagun98
parents:
diff changeset
25
sagun98
parents:
diff changeset
26
sagun98
parents:
diff changeset
27 <param name="cond" type="data_column" data_ref="inp_data" accept_default="true" />
sagun98
parents:
diff changeset
28 <conditional name="cond" type="data_column" data_ref="inp_data" accept_default="true">
sagun98
parents:
diff changeset
29 <param name="method_sel" type="select" data_ref="inp_data" label="Select method">
sagun98
parents:
diff changeset
30 <option value="representative" selected="True">Representative</option>
sagun98
parents:
diff changeset
31 <option value="diverse">Diverse</option>
sagun98
parents:
diff changeset
32 <option value="extreme" >Extreme</option>
sagun98
parents:
diff changeset
33 <option value="features" >Features</option>
sagun98
parents:
diff changeset
34 <option value="distinct" >Distinct (Supervised)</option>
sagun98
parents:
diff changeset
35 <option value="discriminant" >Discriminant (Supervised)</option>
sagun98
parents:
diff changeset
36 </param>
sagun98
parents:
diff changeset
37 <when value="representative">
sagun98
parents:
diff changeset
38 </when>
sagun98
parents:
diff changeset
39 <when value="diverse">
sagun98
parents:
diff changeset
40 </when>
sagun98
parents:
diff changeset
41 <when value="extreme">
sagun98
parents:
diff changeset
42 </when>
sagun98
parents:
diff changeset
43 <when value="features">
sagun98
parents:
diff changeset
44 <param name="cls_f" type="select" label="Targeted feature(s)" multiple="True" size ="70" dynamic_options="get_cols_features(inp_data,'0',cls_x)"/>
sagun98
parents:
diff changeset
45 <param name="feature_method" type="select" format="text">
sagun98
parents:
diff changeset
46 <label>Selection type</label>
sagun98
parents:
diff changeset
47 <option value="rank">Rank</option>
sagun98
parents:
diff changeset
48 <option value="abundance">Abundance</option>
sagun98
parents:
diff changeset
49 </param>
sagun98
parents:
diff changeset
50 </when>
sagun98
parents:
diff changeset
51 <when value="distinct">
sagun98
parents:
diff changeset
52 <param name="cls_L" type="select" label="Label (Use 'Group' for demo data)" multiple="False" size ="70" dynamic_options="get_cols_add_line(inp_data,'0',cls_x)"/>
sagun98
parents:
diff changeset
53 </when>
sagun98
parents:
diff changeset
54
sagun98
parents:
diff changeset
55 <when value="discriminant">
sagun98
parents:
diff changeset
56 <param name="cls_L" type="select" label="Label (Use 'Group' for demo data)" multiple="False" size ="70" dynamic_options="get_cols_add_line(inp_data,'0',cls_x)"/>
sagun98
parents:
diff changeset
57 </when>
sagun98
parents:
diff changeset
58
sagun98
parents:
diff changeset
59 </conditional>
sagun98
parents:
diff changeset
60
sagun98
parents:
diff changeset
61
sagun98
parents:
diff changeset
62 <param name="cls_s" type="select" label="Stratify by (optional) (Use 'StratifyLabel' for demo data)" multiple="False" size ="70" dynamic_options="get_cols_add_line(inp_data,'0',cls_x)"/>
sagun98
parents:
diff changeset
63
sagun98
parents:
diff changeset
64 <param name="selected_samples" type="integer" size="4" value="10" label="Number of samples to select"/>
sagun98
parents:
diff changeset
65
sagun98
parents:
diff changeset
66
sagun98
parents:
diff changeset
67
sagun98
parents:
diff changeset
68 </inputs>
sagun98
parents:
diff changeset
69
sagun98
parents:
diff changeset
70 <outputs>
sagun98
parents:
diff changeset
71 <data format="text" name="out_file1" />
sagun98
parents:
diff changeset
72 </outputs>
sagun98
parents:
diff changeset
73 <requirements>
sagun98
parents:
diff changeset
74 <requirement type="set_environment">micropita_SCRIPT_PATH</requirement>
sagun98
parents:
diff changeset
75 </requirements>
sagun98
parents:
diff changeset
76 <tests>
sagun98
parents:
diff changeset
77 <test>
sagun98
parents:
diff changeset
78 <param name="inp_data" value="micropita_input" ftype="micropita" />
sagun98
parents:
diff changeset
79 <param name="cls_x" value="5" />
sagun98
parents:
diff changeset
80 <param name="cls_s" value="5" />
sagun98
parents:
diff changeset
81 <param name="selected_samples" value="10" />
sagun98
parents:
diff changeset
82 <param name="cond.method_sel" value="representative" />
sagun98
parents:
diff changeset
83 <output name="out_file1" file="micropita_output" />
sagun98
parents:
diff changeset
84 <assert_contents>
sagun98
parents:
diff changeset
85 <has_text text="representative Sample_22_R Sample_20_R" />
sagun98
parents:
diff changeset
86 </assert_contents>
sagun98
parents:
diff changeset
87 </test>
sagun98
parents:
diff changeset
88 </tests>
sagun98
parents:
diff changeset
89 <help>
sagun98
parents:
diff changeset
90
sagun98
parents:
diff changeset
91
sagun98
parents:
diff changeset
92 microbiome: Picking Interesting Taxonomic Abundance
sagun98
parents:
diff changeset
93 ---------------------------------------------------
sagun98
parents:
diff changeset
94
sagun98
parents:
diff changeset
95
sagun98
parents:
diff changeset
96
sagun98
parents:
diff changeset
97 microPITA is a computational tool enabling sample selection in tiered studies. Using tiered-study designs can more efficiently allocate resources, reducing study costs, and maximizing the use of samples. From a survey study, selection of samples can be performed to target various microbial communities including:
sagun98
parents:
diff changeset
98
sagun98
parents:
diff changeset
99 1. Samples with the most diverse community (maximum diversity);
sagun98
parents:
diff changeset
100 2. Samples dominated by specific microbes (targeted feature);
sagun98
parents:
diff changeset
101 3. Samples with microbial communities representative of the survey (representative dissimilarity);
sagun98
parents:
diff changeset
102 4. Samples with the most extreme microbial communities in the survey (most dissimilar);
sagun98
parents:
diff changeset
103 5. Given a phenotype (like disease state), samples at the border of phenotypes (discriminant) or samples typical of each phenotype (distinct).
sagun98
parents:
diff changeset
104
sagun98
parents:
diff changeset
105 Additionally, methods can leverage clinical metadata by stratifying samples into groups in which samples are subsequently selected. This enables the use of microPITA in cohort studies.
sagun98
parents:
diff changeset
106
sagun98
parents:
diff changeset
107
sagun98
parents:
diff changeset
108 .. image:: https://bytebucket.org/biobakery/galaxy_micropita/wiki/HMPStool10PCoA.png
sagun98
parents:
diff changeset
109 :height: 500
sagun98
parents:
diff changeset
110 :width: 600
sagun98
parents:
diff changeset
111
sagun98
parents:
diff changeset
112 MicroPITA unsupervised method selection in the HMP 16S Gut Microbiome. Selection of 10 samples using targeted feature targeting *Bacteroides* (blue), maximum diversity (orange), representative dissimilarity (purple), and most dissimilar (pink) using Principle Covariance Analysis (PCoA) for ordination. Targeted feature selects samples dominated by *Bacteroides* (upper left) while maximum diversity select more diverse samples away from *Bacteroides* dominant samples. Representative selection selects samples covering the range of samples in the PCoA plot focusing on the higher density central region while maximum dissimilarity selects samples at the periphery of the plot.
sagun98
parents:
diff changeset
113
sagun98
parents:
diff changeset
114
sagun98
parents:
diff changeset
115 Intructions to run:
sagun98
parents:
diff changeset
116 -------------------
sagun98
parents:
diff changeset
117
sagun98
parents:
diff changeset
118 Before running microPita, you must upload your data using Glaxay's **Get Data - Upload File**
sagun98
parents:
diff changeset
119 Please make sure that you choose **File Format Micropita**
sagun98
parents:
diff changeset
120 An example can be found at https://bytebucket.org/biobakery/micropita/wiki/micropita_sample_PCL.txt
sagun98
parents:
diff changeset
121
sagun98
parents:
diff changeset
122 Required inputs
sagun98
parents:
diff changeset
123 ---------------
sagun98
parents:
diff changeset
124
sagun98
parents:
diff changeset
125 microPITA requires an input pcl file of metadata and microbial community measurements. Although some defaults can be changed, microPITA expects a PCL file as an input file. A PCL file is a text delimited file similar to an excel spread sheet with the following characteristics.
sagun98
parents:
diff changeset
126
sagun98
parents:
diff changeset
127 1. **Rows** represent metadata and features (bugs), **columns** represent samples.
sagun98
parents:
diff changeset
128 2. The **first row** by default should be the sample ids.
sagun98
parents:
diff changeset
129 3. Metadata rows should be next.
sagun98
parents:
diff changeset
130 4. Lastly, rows containing features (bugs) measurements (like abundance) should be after metadata rows.
sagun98
parents:
diff changeset
131 5. The **first column** should contain the ID describing the column. For metadata this may be, for example, "Age" for a row containing the age of the patients donating the samples. For measurements, this should be the feature name (bug name).
sagun98
parents:
diff changeset
132 6. The file is expected to be TAB delimited.
sagun98
parents:
diff changeset
133 7. If a consensus lineage or hierarchy of taxonomy is contained in the feature name, the default delimiter between clades is the pipe ("|").
sagun98
parents:
diff changeset
134
sagun98
parents:
diff changeset
135 **Note** MAC users, please save file as windows formatted text.
sagun98
parents:
diff changeset
136
sagun98
parents:
diff changeset
137 .. image:: https://bytebucket.org/biobakery/galaxy_micropita/wiki/pcl_diagram.png
sagun98
parents:
diff changeset
138 :height: 500
sagun98
parents:
diff changeset
139 :width: 600
sagun98
parents:
diff changeset
140
sagun98
parents:
diff changeset
141 Outputs
sagun98
parents:
diff changeset
142 -------
sagun98
parents:
diff changeset
143
sagun98
parents:
diff changeset
144 The Run MicroPITA module will create one output text file. The output will consist of one line starting with a key word for the selection method and then followed by selected samples delimited by tabs. An example of 6 samples selected by the representative:
sagun98
parents:
diff changeset
145
sagun98
parents:
diff changeset
146 representative sample_1 sample_2 sample_3 sample_4 sample_5 sample_6
sagun98
parents:
diff changeset
147
sagun98
parents:
diff changeset
148
sagun98
parents:
diff changeset
149
sagun98
parents:
diff changeset
150
sagun98
parents:
diff changeset
151 Run microPITA
sagun98
parents:
diff changeset
152 -------------
sagun98
parents:
diff changeset
153
sagun98
parents:
diff changeset
154 A brief description of the Run micropita module.
sagun98
parents:
diff changeset
155
sagun98
parents:
diff changeset
156 **Input file:**
sagun98
parents:
diff changeset
157 This should be populated by the Load microPITA module.
sagun98
parents:
diff changeset
158
sagun98
parents:
diff changeset
159 **Last metadata row:**
sagun98
parents:
diff changeset
160 The row on the input pcl file that is the last metadata. All microbial measurements should follow this row.
sagun98
parents:
diff changeset
161
sagun98
parents:
diff changeset
162 **Select method:**
sagun98
parents:
diff changeset
163 Select which method to use for sample selection. Selection methods include:
sagun98
parents:
diff changeset
164
sagun98
parents:
diff changeset
165 1. Representative. Samples with microbial communities representative of the survey (representative dissimilarity);
sagun98
parents:
diff changeset
166 2. Diverse. Samples with the most diverse community (maximum diversity);
sagun98
parents:
diff changeset
167 3. Extreme. Samples with the most extreme microbial communities in the survey (most dissimilar);
sagun98
parents:
diff changeset
168 4. Features. Samples dominated by specific microbes (targeted feature);
sagun98
parents:
diff changeset
169 5. Distinct. Given a phenotype (like disease state), samples typical of each phenotype (Distinct).
sagun98
parents:
diff changeset
170 6. Discriminant. Given a phenotype (like disease state), samples at the border of phenotypes (Discriminant).
sagun98
parents:
diff changeset
171
sagun98
parents:
diff changeset
172 **Targeted feature(s):** (visible with Features method selection only)
sagun98
parents:
diff changeset
173 Select 1 or more features to target in sample selection.
sagun98
parents:
diff changeset
174
sagun98
parents:
diff changeset
175 **Selection type:** (visible with Features method selection only)
sagun98
parents:
diff changeset
176 Rank or Abundance.
sagun98
parents:
diff changeset
177
sagun98
parents:
diff changeset
178 1. Rank indicates selecting samples that have the highest rank of the Targeted features(s), this tends to select sample in which these feature dominant the sample.
sagun98
parents:
diff changeset
179 2. Abundance indicates selecting samples that have the highest average abundance of the Targeted features(s), this selects samples where features are most abundant but not necessarily dominant in the community.
sagun98
parents:
diff changeset
180
sagun98
parents:
diff changeset
181 **Label:** (visible with supervised method selection only)
sagun98
parents:
diff changeset
182 The row which contains the label used to classify the samples from supervised methods.
sagun98
parents:
diff changeset
183
sagun98
parents:
diff changeset
184 **Stratify by (optional):**
sagun98
parents:
diff changeset
185 The row which contains the groupings the samples will first be placed in before running the selection method on each group. If no grouping is selected, selection methods will be performed on the data set as a whole.
sagun98
parents:
diff changeset
186
sagun98
parents:
diff changeset
187 **Number of samples to select:**
sagun98
parents:
diff changeset
188 The number of samples to select. If samples are stratified, this is per stratification (or group). If supervised methods are used, this is the number of samples selected per classification group (as defined by the label).
sagun98
parents:
diff changeset
189
sagun98
parents:
diff changeset
190 For more information please visit http://huttenhower.sph.harvard.edu/micropita
sagun98
parents:
diff changeset
191
sagun98
parents:
diff changeset
192
sagun98
parents:
diff changeset
193 Acknowledgments
sagun98
parents:
diff changeset
194 ---------------
sagun98
parents:
diff changeset
195 Special thanks to Eric Franzosa for developing the above PCL figure!
sagun98
parents:
diff changeset
196
sagun98
parents:
diff changeset
197 Citation and Contacts
sagun98
parents:
diff changeset
198 ---------------------
sagun98
parents:
diff changeset
199
sagun98
parents:
diff changeset
200 For more information please visit http://huttenhower.sph.harvard.edu/micropita
sagun98
parents:
diff changeset
201 When using MicroPITA please cite:
sagun98
parents:
diff changeset
202 Tickle T, Segata N, Waldron L, Weingart G, Huttenhower C. Two-stage microbial community experimental design. (Under review)
sagun98
parents:
diff changeset
203
sagun98
parents:
diff changeset
204 Please feel free to contact us at ttickle@hsph.harvard.edu for any questions or comments!
sagun98
parents:
diff changeset
205
sagun98
parents:
diff changeset
206
sagun98
parents:
diff changeset
207 </help>
sagun98
parents:
diff changeset
208 </tool>