comparison collectorcurve/collectorcurve.xml @ 0:f4d86766f766 draft

Uploaded
author qfab
date Fri, 30 May 2014 03:16:03 -0400
parents
children 03ab673e45cb
comparison
equal deleted inserted replaced
-1:000000000000 0:f4d86766f766
1 <tool id="mothur_collect_single" name="Collector Curve" version="1.0" force_history_refresh="True">
2 <description>Calculate and plot collector's curves for OTUs</description>
3 <requirements>
4 <requirement type="binary">@BINARY@</requirement>
5 <requirement type="package" version="1.30.2-1.33.3">mothur</requirement>
6 </requirements>
7 <command interpreter="bash">
8 collectorcurve.sh
9 #if isinstance($otu.datatype, $__app__.datatypes_registry.get_datatype_by_extension('shared').__class__):
10 shared
11 #elif isinstance($otu.datatype, $__app__.datatypes_registry.get_datatype_by_extension('rabund').__class__):
12 rabund
13 #elif isinstance($otu.datatype, $__app__.datatypes_registry.get_datatype_by_extension('sabund').__class__):
14 sabund
15 #elif isinstance($otu.datatype, $__app__.datatypes_registry.get_datatype_by_extension('list').__class__):
16 list
17 #end if
18 $otu
19 $label
20 $output
21 $output.id
22 $__new_file_path__
23 #if $calc.__str__ != "None" and len($calc.__str__) > 0:
24 $calc
25 #end if
26 $advancedoption.advanced
27 #if $advancedoption.advanced:
28 $advancedoption.abundoption.cabund
29 #if $advancedoption.abundoption.cabund:
30 $advancedoption.abundoption.abund
31 #else
32 ''
33 #end if
34 $advancedoption.sizeoption.csize
35 #if $advancedoption.sizeoption.csize:
36 $advancedoption.sizeoption.size
37 #else
38 ''
39 #end if
40 $advancedoption.freqoption.cfreq
41 #if $advancedoption.freqoption.cfreq:
42 $advancedoption.freqoption.freq
43 #else
44 ''
45 #end if
46 #end if
47 </command>
48 <inputs>
49 <param name="otu" type="data" format="rabund,sabund,list,shared" label="OTU list (rabund, sabund, list or shared format)" help="The rabund output from the 'Map Reads to OTU' tool"/>
50 <param name="label" type="select" label="Labels - OTU labels" multiple="true" help="Select one or more labels to calculate the collector's curve for. By default all labels are selected" >
51 <options>
52 <filter type="data_meta" ref="otu" key="labels" />
53 </options>
54 </param>
55 <param name="calc" type="select" label="Calculators" multiple="true" help="Select one or more calculators for collector's curve generation. By default chao, invsimpson and npshannon are selected. Please see the description for information on the calculators.">
56 <option value="ace">ace</option>
57 <option value="bootstrap">bootstrap</option>
58 <option value="chao" selected="true">chao</option>
59 <option value="jack">jack</option>
60 <option value="sobs">sobs</option>
61 <option value="simpsoneven">simpsoneven</option>
62 <option value="shannoneven">shannoneven</option>
63 <option value="heip">heip</option>
64 <option value="smithwilson">smithwilson</option>
65 <option value="bergerparker">bergerparker</option>
66 <option value="coverage">coverage</option>
67 <option value="goodscoverage">goodscoverage</option>
68 <option value="simpson">simpson</option>
69 <option value="invsimpson" selected="true">invsimpson</option>
70 <option value="qstat">qstat</option>
71 <option value="shannon">shannon</option>
72 <option value="npshannon" selected="true">npshannon</option>
73 <option value="boneh">boneh</option>
74 <option value="efron">efron</option>
75 <option value="shen">shen</option>
76 <option value="solow">solow</option>
77 <option value="logseries">logseries</option>
78 <option value="geometric">geometric</option>
79 <option value="bstick">bstick</option>
80 <option value="nseqs">nseqs</option>
81 </param>
82 <conditional name="advancedoption">
83 <param name="advanced" type="boolean" value="true" label="Please select to show and specify advanced option" help="Advanced options including specification of ACE Estimator threshold for abundant versus rare OTUs, sample size for OTU predicition and frequency for output."/>
84 <when value="true">
85 <conditional name="abundoption">
86 <param name="cabund" type="boolean" value="true" label="Select, if you would like to specify the ACE Estimator threshold for abundant versus rare OTUs" help=""/>
87 <when value="true">
88 <param name="abund" type="integer" value="10" label="Abund - ACE estimator threshold for abundant versus rare OTUs (default 10)"/>
89 </when>
90 </conditional>
91 <conditional name="sizeoption">
92 <param name="csize" type="boolean" value="true" label="Select, if you would like to specify the sample size for OTU prediction" help=""/>
93 <when value="true">
94 <param name="size" type="integer" value="1" label="Size - sample size for OTU prediction (ignored if &lt; 1)"
95 help="By default these calculators will base the prediction on a sample that is the same size as the initial sampling"/>
96 </when>
97 </conditional>
98 <conditional name="freqoption">
99 <param name="cfreq" type="boolean" value="true" label="Select, if you would like to specify the frequency for output" help=""/>
100 <when value="true">
101 <param name="freq" type="float" value="0.1" label="Frequency - frequency for output (the default setting is to output data every 100 sequences)"
102 help="Use a decimal between 0 and 1 to set the frequency as a percentage of the number of sequences"/>
103 </when>
104 </conditional>
105 </when>
106 </conditional>
107 </inputs>
108 <outputs>
109 <data format="tabular" name="output" label="${tool.name} on ${on_string}: tab" />
110 </outputs>
111 <tests>
112 <test>
113 <param name="otu" value="reads2otu.rabund" />
114 <param name="label" value="0.03" />
115 <param name="calc" value="chao-invsimpson-npshannon" />
116 <output name="output" file="collectorsummary.tabular" ftype="tabular" lines_diff="10" />
117 </test>
118 </tests>
119 <help>
120 ===========
121 Description
122 ===========
123
124 Collector's curve calculation based on mothur's collect.single command.
125
126 Collector's curves can be calculated using calculators, that describe the richness, diversity, and other features of individual samples. Collector's curves describe how richness or diversity change as you sample additional individuals. If a collector's curve becomes parallel to the x-axis, you can be reasonably confident that you have done a good job of sampling and can trust the last value in the curve. Otherwise, you need to keep sampling.
127 For calculator parameter choices see: mothur_wiki__
128
129 .. _mothur_wiki: http://www.mothur.org/wiki/Calculators
130
131 -----
132
133 ----------
134 Input
135 ----------
136
137 OTU list
138 rabund,
139 sabund,
140 list or
141 shared format
142
143 ----------
144 Parameters
145 ----------
146
147 Labels - OTU labels
148 Select labels you want the collector's curve calculated for (e.g. lines labelled 0.03)
149 by default collector's curves will be calculated for all labels listed
150
151 Calculators
152 Find following the list of available calculators (see mothur_wiki_ for a more detailed description)
153 and select calculators for calculating collector's curves .
154 Default selection: chao - Community richness, npshannon (non-parametric) and invsimpson - Community diversity
155
156 +---------------+-----------------------------------------------------------------------------------------------+
157 | chao | Community richness the Chao1 estimator |
158 +---------------+-----------------------------------------------------------------------------------------------+
159 | invsimpson | Community diversity the Simpson index |
160 +---------------+-----------------------------------------------------------------------------------------------+
161 | npshannon | Community diversity the non-parametric Shannon index |
162 +---------------+-----------------------------------------------------------------------------------------------+
163 | ace | Community richness the ACE estimator |
164 +---------------+-----------------------------------------------------------------------------------------------+
165 | bootstrap | Community richness the bootstrap estimator |
166 +---------------+-----------------------------------------------------------------------------------------------+
167 | jack | Community richness the jackknife estimator |
168 +---------------+-----------------------------------------------------------------------------------------------+
169 | sobs | Community richness the observed richness |
170 +---------------+-----------------------------------------------------------------------------------------------+
171 | simpsoneven | Community evenness a Simpson index-based measure of evenness |
172 +---------------+-----------------------------------------------------------------------------------------------+
173 | shannoneven | Community evenness a Shannon index-based measure of evenness |
174 +---------------+-----------------------------------------------------------------------------------------------+
175 | heip | Community evenness Heip's metric of community evenness |
176 +---------------+-----------------------------------------------------------------------------------------------+
177 | smithwilson | Community evenness Smith and Wilson's metric of community evenness |
178 +---------------+-----------------------------------------------------------------------------------------------+
179 | bergerparker | Community diversity the Berger-Parker index |
180 +---------------+-----------------------------------------------------------------------------------------------+
181 | coverage | Community diversity the sampling coverage |
182 +---------------+-----------------------------------------------------------------------------------------------+
183 | goodscoverage | Community diversity the Good's estimate of sampling coverage |
184 +---------------+-----------------------------------------------------------------------------------------------+
185 | simpson | Community diversity the Simpson index |
186 +---------------+-----------------------------------------------------------------------------------------------+
187 | qstat | Community diversity the Q statistic |
188 +---------------+-----------------------------------------------------------------------------------------------+
189 | shannon | Community diversity the Shannon index |
190 +---------------+-----------------------------------------------------------------------------------------------+
191 | boneh | Estimator Boneh's estimator |
192 +---------------+-----------------------------------------------------------------------------------------------+
193 | efron | Estimator Efron's estimator |
194 +---------------+-----------------------------------------------------------------------------------------------+
195 | shen | Estimator Shen's estimator |
196 +---------------+-----------------------------------------------------------------------------------------------+
197 | solow | Estimator Solow's estimator |
198 +---------------+-----------------------------------------------------------------------------------------------+
199 | logseries | Statistical distribution tests whether observed data follow the log series distribution |
200 +---------------+-----------------------------------------------------------------------------------------------+
201 | geometric | Statistical distribution tests whether observed data follow the geometric series distribution |
202 +---------------+-----------------------------------------------------------------------------------------------+
203 | bstick | Statistical distribution tests whether observed data follow the broken stick distribution |
204 +---------------+-----------------------------------------------------------------------------------------------+
205 | nseqs | Utility the number of sequences in a sample |
206 +---------------+-----------------------------------------------------------------------------------------------+
207
208
209 ----------------------------
210 Optional advanced parameters
211 ----------------------------
212
213 ACE estimator threshold
214 By default the ACE estimator uses 10 as the cutoff between OTUs that are rare and abundant. So if an OTU has more than 10 individuals in it, then it is considered abundant. This is really just an empirical decision and we are merely following the lead of Anne Chao and others who implement 10 in their software. If you would like to use a different cutoff, you can use the abund option.
215
216 Size - Sample Size
217 Within the suite of calculators available in mothur are a set that will predict the number of additional OTUs that will be observed for a given sample size. By default these calculators will base the prediction on a sample that is the same size as the initial sampling. If you would like to use a different sample size, use the size option.
218 The value of size should be between 1 and the size of the initial sampling.
219
220 Frequency
221 For larger datasets you might not be interested in obtaining all of the data for the number of sequences sampled. For instance, if you have 100,000 sequences, you may only want to output the data every 100 sequences. Alternatively, if you only have 100 sequences, you may only want to output all of the data.
222 The default setting is to output data every 100 sequences.
223
224 ------
225 Output
226 ------
227
228 .. class:: warningmark
229
230 Please note, the number of outputs is depending on the number of selected calculators. Each selected calculator will result in an extra output, which is indicated by the calculator name in brackets at the end of the output's filename. In case the outputs for the selected calculators are **not** showing in the History panel, refresh your history by clicking on the refresh icon.
231
232
233 (A) A summary file in table format containing the following fields, number of sequences, the sample coverage, the number of observed OTUs, the chao richness estimate, the invsimpson diversity estimate, and the npshannon non-parametric diversity estimate. The summary gives results for each of the listed fields when all available data is used.
234
235 (B) Followed by a file for each calculator selected (indicated by the calculator's name in brackets at the end of the output's filename) which can be plotted as collector's curve and used to evaluate how the results of the calculator change with sampling effort.
236
237 .. class:: infomark
238
239
240 Use Galaxy's integrated visualization tool to plot the collector's curve. The visualization tool is accessible via the 'Visualize' icon in the extended dataset information area. After having launched the integrated visualization tool select the 'Data Controls' tab. In the 'Data Controls' tab select column 1 (number sampled) as 'Data column for X' and use column 2 (minimum identity, according to select labels) as 'Data column for Y'.
241
242 -----
243
244 =========
245 Resources
246 =========
247
248 mothur_
249
250 .. _http://www.mothur.org
251
252 **Author**
253
254 Patrick D. Schloss (pschloss@umich.edu)
255
256 **Wrapper Author**
257
258 QFAB Bioinformatics (support@qfab.org)
259 based on jjohnson mothur_toolsuite wrapper
260
261 </help>
262 </tool>