comparison cloudmap.xml @ 6:85214e4428fd

upgrade to mimodd version 0.1.7.0
author Wolfgang Maier
date Fri, 11 Sep 2015 16:25:49 +0200
parents bdd1995c9e66
children 3619e85a5477
comparison
equal deleted inserted replaced
5:bdd1995c9e66 6:85214e4428fd
1 <tool id="cloudmap_prepare" name="Prepare variant data for mapping" version="0.1.6.1"> 1 <tool id="nacreousmap" name="NacreousMap" version="0.1.7.0">
2 <description>with the CloudMap series of tools.</description> 2 <description>Map causative mutations by multi-variant linkage analysis.</description>
3 <macros> 3 <macros>
4 <import>toolshed_macros.xml</import> 4 <import>toolshed_macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="requirements"/> 6 <expand macro="requirements"/>
7 <version_command>mimodd version -q</version_command> 7 <version_command>mimodd version -q</version_command>
8 <command> 8 <command>
9 mimodd cloudmap "$ifile" ${run.mode} "$sample" 9 mimodd map "${opt.source.ifile}" ${opt.mode}
10 10 #if $str($opt.source.sample):
11 #if $str($run.related_parent_sample): 11 -m "${opt.source.sample}"
12 -r "${run.related_parent_sample}" 12 #end if
13 #end if 13 #if $str($opt.source.related_parent_sample):
14 #if $str($run.unrelated_parent_sample): 14 -r "${opt.source.related_parent_sample}"
15 -u "${run.unrelated_parent_sample}" 15 #end if
16 #end if 16 #if $str($opt.source.unrelated_parent_sample):
17 17 -u "${opt.source.unrelated_parent_sample}"
18 $run.infer_missing 18 #end if
19 $opt.source.infer_missing
19 -o "$ofile" 20 -o "$ofile"
20 21 #if $str($opt.source.seqdict_required.required) == "yes":
21 #if $seqdict: 22 -s "${opt.source.seqdict_required.seqdict}"
22 -s "$dictfile" 23 #end if
23 #end if 24 $opt.source.norm
25 #if $len($opt.source.bin_sizes):
26 --bin-sizes
27 #for $size in $opt.source.bin_sizes:
28 "${size.bin_size}"
29 #end for
30 #end if
31 #if $str($opt.source.tabfile):
32 $str($opt.source.tabfile) $tfile
33 #end if
34 #if $str($opt.source.plotopts.plots):
35 $str($opt.source.plotopts.plots) "$pfile"
36 $str($opt.source.plotopts.xlim)
37 #if $str($opt.source.plotopts.hylim):
38 --ylim-hist $str($opt.source.plotopts.hylim)
39 #end if
40 #if $str($opt.source.plotopts.hcols) and $len($opt.source.plotopts.hcols):
41 --hist-colors
42 #for $color in $opt.source.plotopts.hcols:
43 "${color.hcolor}"
44 #end for
45 #end if
46 #if $str($opt.source.plotopts.sylim):
47 --ylim-scatter $str($opt.source.plotopts.sylim)
48 #end if
49 #if $str($opt.source.plotopts.pcol):
50 --points-color "$str($opt.source.plotopts.pcol)"
51 #end if
52 #if $str($opt.source.plotopts.lcol):
53 --loess-color "$str($opt.source.plotopts.lcol)"
54 #end if
55 #if $str($opt.source.plotopts.span):
56 --loess-span "$str($opt.source.plotopts.span)"
57 #end if
58 #end if
59
24 </command> 60 </command>
25 61
62 <macros>
63 <macro name="svd_unconditional">
64 <expand macro="hidden_vaf_algo_params" />
65 <expand macro="seqdict_param" />
66 <expand macro="bins" />
67 <param name="norm" type="boolean" label="normalize variant counts to bin-width" truevalue="" falsevalue="--no-normalize" checked="true" help="without normalization the tool will just report the number of nucleotides per bin; with normalization the results for different bin-widths will be comparable." />
68 <conditional name="plotopts">
69 <param name="plots" type="select" label="graphical output settings">
70 <option value="">Do not generate graphs.</option>
71 <option value="-p">Give me graphics.</option>
72 </param>
73 <when value="-p">
74 <expand macro="scatter_default" />
75 <param name="hylim" type="text" label="upper limit for the histogram y-axis (leave blank for automatic scaling)" />
76 <param name="xlim" type="select" label="x-axis scaling">
77 <option value="">preserve relative contig sizes</option>
78 <option value="--fit-width">scale each contig to fit the plot width</option>
79 </param>
80 <expand macro="hist_colors" />
81 </when>
82 </conditional>
83 </macro>
84 <macro name="vaf_unconditional">
85 <expand macro="bins" />
86 <param name="norm" type="boolean" label="normalize variant counts to bin-width" truevalue="" falsevalue="--no-normalize" checked="true" />
87 <conditional name="plotopts">
88 <param name="plots" type="select" label="graphical output settings">
89 <option value="">Do not generate graphs.</option>
90 <option value="--no-scatter -p">Generate only histograms</option>
91 <option value="--no-hist -p">Generate only scatter plots</option>
92 <option value="-p">Give me everything (scatter plots and histograms)</option>
93 </param>
94 <when value="--no-scatter -p">
95 <expand macro="scatter_default" />
96 <param name="hylim" type="text" label="upper limit for the histogram y-axis (leave blank for automatic scaling)" />
97 <param name="xlim" type="select" label="x-axis scaling">
98 <option value="">preserve relative contig sizes</option>
99 <option value="--fit-width">scale each contig to fit the plot width</option>
100 </param>
101 <expand macro="hist_colors" />
102 </when>
103 <when value="--no-hist -p">
104 <expand macro="hist_default" />
105 <param name="sylim" type="text" label="upper limit for the scatter plot y-axis (default: 1)" />
106 <param name="xlim" type="select" label="x-axis scaling">
107 <option value="">preserve relative contig sizes</option>
108 <option value="--fit-width">scale each contig to fit the plot width</option>
109 </param>
110 <param name="span" type="text" label="span value to be used in calculating the Loess regression line through the scatter data (default=0.1)" help="smaller values give a more responsive curve that often picks up local evidence for tight linkage better, but too small values lead to plotting failures (in that case just rerun the tool with a larger value)." />
111 <expand macro="scatter_colors" />
112 </when>
113 <when value="-p">
114 <expand macro="plot_all" />
115 </when>
116 </conditional>
117 </macro>
118 <macro name="hidden_vaf_algo_params">
119 <param name="sample" type="hidden" value="" />
120 <param name="related_parent_sample" type="hidden" value="" />
121 <param name="unrelated_parent_sample" type="hidden" value="" />
122 <param name="infer_missing" type="hidden" value="" />
123 </macro>
124 <macro name="bins">
125 <repeat name="bin_sizes" title="bin sizes to analyze variants in (defaults to: 1Mb and 500Kb" default="0" min="0" help="Values can be entered in bases (e.g., 1000000), kilobases (e.g., 500Kb) or megabases (e.g., 1Mb), but must be integral, i.e. no decimal numbers are allowed.">
126 <param name="bin_size" type="text" />
127 </repeat>
128 </macro>
129 <macro name="scatter_default">
130 <param name="sylim" type="hidden" value="" />
131 <param name="span" type="hidden" value="" />
132 <param name="pcol" type="hidden" value="" />
133 <param name="lcol" type="hidden" value="" />
134 </macro>
135 <macro name="hist_default">
136 <param name="hylim" type="hidden" value="" />
137 <param name="hcols" type="hidden" value="" />
138 </macro>
139 <macro name="hist_colors">
140 <repeat name="hcols" title="histogram colors" default="0" min="0" help="For each bin size chosen above a histogram will be generated with its color selected from the list provided here (defaults to alternating darkgrey, red).">
141 <param name="hcolor" type="color" value="darkgrey">
142 <sanitizer><valid><add value="#" /></valid></sanitizer>
143 </param>
144 </repeat>
145 </macro>
146 <macro name="scatter_colors">
147 <param name="pcol" type="color" value="#454545" label="color to be used for the scatter plot data points (default: gray27)">
148 <sanitizer><valid><add value="#" /></valid></sanitizer>
149 </param>
150 <param name="lcol" type="color" value="red" label="color to be used for the regression line (default: red)">
151 <sanitizer><valid><add value="#" /></valid></sanitizer>
152 </param>
153 </macro>
154 <macro name="plot_all">
155 <param name="hylim" type="text" label="upper limit for the histogram y-axis (leave blank for automatic scaling)" />
156 <param name="sylim" type="text" label="upper limit for the scatter plot y-axis (default: 1)" />
157 <param name="xlim" type="select" label="x-axis scaling">
158 <option value="">preserve relative contig sizes</option>
159 <option value="--fit-width">scale each contig to fit the plot width</option>
160 </param>
161 <param name="span" type="text" label="span value to be used in calculating the Loess regression line through the scatter data (default=0.1)" help="smaller values give a more responsive curve that often picks up local evidence for tight linkage better, but too small values lead to plotting failures (in that case just rerun the tool with a larger value)." />
162 <expand macro="hist_colors" />
163 <expand macro="scatter_colors" />
164 </macro>
165 <macro name="seqdict_param">
166 <conditional name="seqdict_required">
167 <param name="required" type="select" label="does this input file require a CloudMap-style sequence dictionary?" help="A sequence dictionary file is required ONLY if the input file does not provide information about the sizes of the chromosomes defined in it. It is NEVER needed for MiModD-generated input files.">
168 <option value="no">No</option>
169 <option value="yes">Yes</option>
170 </param>
171 <when value="yes">
172 <param name="seqdict" type="data" format="tabular" label="CloudMap-style sequence dictionary file" />
173 </when>
174 </conditional>
175 </macro>
176 </macros>
177
26 <inputs> 178 <inputs>
27 <param name="ifile" type="data" format="vcf" label="vcf input file" /> 179 <conditional name="opt">
28 <conditional name="run"> 180 <param name="mode" type="select" label="type of mapping analysis to perform" help="Select Simple Variant Density (SVD) Mapping to map mutations based on linked inheritance in near isogenic populations, Variant Allele Frequency (VAF) Mapping for bulk segregant analysis. Select Reprocess for rapidly replotting the result of a previous VAF analysis.">
29 <param name="mode" type="select" label="Type of CloudMap analysis to prepare data for" help="select Simple Variant Density (SVD) Mapping to generate output for the CloudMap EMS Variant Density Mapping tool or Variant Allele Frequency (VAF) Mapping for output for the CloudMap Variant Discovery or Hawaiian Variant Mapping tools.">
30 <option value="SVD">Simple Variant Density Mapping</option> 181 <option value="SVD">Simple Variant Density Mapping</option>
31 <option value="VAF">Variant Allele Frequency Mapping</option> 182 <option value="VAF">Variant Allele Frequency Mapping</option>
32 </param> 183 </param>
33 <when value="SVD"> 184 <when value="SVD">
34 <param name="related_parent_sample" type="hidden" value="" /> 185 <conditional name="source">
35 <param name="unrelated_parent_sample" type="hidden" value="" /> 186 <param name="inputtype" type="select" label="data source to use">
36 <param name="infer_missing" type="hidden" value="" /> 187 <option value="vcf">VCF file of variants (for de-novo mapping)</option>
188 <option value="rep">per-variant report file (for remapping a previous analysis)</option>
189 </param>
190 <when value="vcf">
191 <param name="ifile" type="data" format="vcf" label="input file with variants to analyze" />
192 <expand macro="svd_unconditional" />
193 <param name="tabfile" type="select" label="additional per-variant output file" help="You can either choose to produce a tabular per-variant report, which is useful for fast replotting with different plot settings or a vcf-like CloudMap-compatibility file that can be used as input for the CloudMap EMS Variant Density Mapping tool as an alternative plotting tool.">
194 <option value="">Do not generate per-variant output</option>
195 <option value="-t">Tabular per-variant report</option>
196 <option value="--cloudmap -t">CloudMap compatibility file</option>
197 </param>
198 </when>
199 <when value="rep">
200 <param name="ifile" type="data" format="tabular" label="input file with variants to analyze" />
201 <param name="tabfile" type="hidden" value="" />
202 <expand macro="svd_unconditional" />
203 </when>
204 </conditional>
37 </when> 205 </when>
38 <when value="VAF"> 206 <when value="VAF">
39 <param name="related_parent_sample" type="text" label="name of the related parent sample" help="the sample that provides variants present in your original mutant strain or in an ancestor (like the pre-mutagenesis strain); leave blank if not available" /> 207 <conditional name="source">
40 <param name="unrelated_parent_sample" type="text" label="name of the unrelated parent sample" help="the sample that provides variants present in the unrelated mapping strain (or in an ancestor of it) used in the mapping cross; leave blank if not available" /> 208 <param name="inputtype" type="select" label="data source to use">
41 <param name="infer_missing" type="boolean" checked="false" truevalue="--infer-missing" falsevalue="" label="Infer alleles for missing parent" help="if variant data for either the related or the unrelated parent strain is not available, the tool can try to infer the alleles present in that parent from the allele spectrum found in the mapping sample. This is an experimental option that will give a benefit only in certain situations. Enable at your own risk." /> 209 <option value="vcf">VCF file of variants (for de-novo mapping)</option>
210 <option value="rep">per-variant report file (for remapping a previous analysis)</option>
211 </param>
212 <when value="vcf">
213 <param name="ifile" type="data" format="vcf" label="input file with variants to analyze" />
214 <expand macro="seqdict_param" />
215 <param name="sample" type="text" label="mapping sample name" help="the sample to perform mutation mapping for" />
216 <param name="related_parent_sample" type="text" label="name of the related parent sample" help="the sample that provides variants present in your original mutant strain or in an ancestor (like the pre-mutagenesis strain); leave blank if not available" />
217 <param name="unrelated_parent_sample" type="text" label="name of the unrelated parent sample" help="the sample that provides variants present in the unrelated mapping strain (or in an ancestor of it) used in the mapping cross; leave blank if not available" />
218 <param name="infer_missing" type="boolean" checked="false" truevalue="--infer-missing" falsevalue="" label="Infer alleles for missing parent" help="if variant data for either the related or the unrelated parent strain is not available, the tool can try to infer the alleles present in that parent from the allele spectrum found in the mapping sample. This is an EXPERIMENTAL option that will give a benefit only in certain situations. Enable at your own risk." />
219 <expand macro="vaf_unconditional" />
220 <param name="tabfile" type="select" label="additional per-variant output file" help="You can either choose to produce a tabular per-variant report, which is useful for fast replotting with different plot settings or a vcf-like CloudMap-compatibility file that can be used as input for the CloudMap Hawaiian Variant Mapping tool as an alternative plotting tool.">
221 <option value="">Do not generate per-variant output</option>
222 <option value="-t">Tabular per-variant report</option>
223 <option value="--cloudmap -t">CloudMap compatibility file</option>
224 </param>
225 </when>
226 <when value="rep">
227 <param name="ifile" type="data" format="tabular" label="input file with variants to analyze" />
228 <expand macro="seqdict_param" />
229 <param name="tabfile" type="hidden" value="" />
230 <expand macro="hidden_vaf_algo_params" />
231 <expand macro="vaf_unconditional" />
232 </when>
233 </conditional>
42 </when> 234 </when>
43 </conditional> 235 </conditional>
44 <param name="sample" type="text" label="mapping sample name" help="the sample to perform mutation mapping for" />
45 <param name="seqdict" type="boolean" checked="true" label="Generate species configuration file for CloudMap" />
46 </inputs> 236 </inputs>
47 237
48 <outputs> 238 <outputs>
49 <data name="ofile" format="vcf" label="CloudMap-ready ${run.mode} File from ${on_string}" /> 239 <data name="ofile" format="tabular" label="MiModD ${opt.mode} Mapping - binned variant counts for ${on_string}" />
50 <data name="dictfile" format="tabular" label="Species Configuration File for CloudMap from ${on_string}"> 240 <data name="tfile" format="tabular" label="MiModD ${opt.mode} Mapping - per-variant report for ${on_string}">
51 <filter>seqdict</filter> 241 <filter>(opt['source']['tabfile'])</filter>
242 </data>
243 <data name="pfile" format="pdf" label="MiModD ${opt.mode} Mapping - linkage plots for ${on_string}">
244 <filter>(opt['source']['plotopts']['plots'])</filter>
52 </data> 245 </data>
53 </outputs> 246 </outputs>
54 247
55 <help> 248 <help>
56 .. class:: infomark 249 .. class:: infomark
57 250
58 **What it does** 251 **What it does**
59 252
60 The purpose of this tool is to provide compatibility of the MiModD analysis workflow with the external `CloudMap`_ *EMS Variant Density Mapping*, *Variant Discovery Mapping* and *Hawaiian Variant Mapping* tools. These tools complement MiModD by providing easily interpreted visualizations of mapping-by-sequencing analysis workflows. 253 This tool is a complete rewrite of and improves the EMS Variant Density and Hawaiian Variant Mapping tools of `CloudMap`_. It is the most downstream tool in `mapping-by-sequencing analysis workflows in MiModD`_.
61 254
62 The tool converts a VCF file as generated by the *Extract Variant Sites* or *VCF Filter* tools to the format expected by the *CloudMap* series of tools. 255 It can be used to analyze and visualize the inheritance pattern of variants detected and selected by other MiModD tools or as an alternative (and more versatile) plotting engine for data generated with `CloudMap`_.
63 256
64 Optionally, it also extracts the chromosome names and sizes and reports them in the *CloudMap* *species configuration file* format. 257 -------------
65 Such a file is required as input to the current versions of the *CloudMap* *Hawaiian* and *Variant Density* mapping tools, if you are working with a species other than the natively supported ones (i.e., other than *C. elegans*, *A. thaliana* or *Brachypodium distachyon*). 258
66 259 **Usage Modes:**
67 To use the output datasets of the tool with *CloudMap*, you only have to upload them to any public Galaxy server that hosts *CloudMap* like, e.g., the main Galaxy server at https://usegalaxy.org . 260
68 261 This tool can be run in one of two different modes depending on the type of mapping analysis that should be performed:
69 **Notes:** 262
70 263 1) *Simple Variant Density (SVD) Mapping* mode analyzes the density of variants along the reference genome by dividing each chromosome into regions of user-defined size (bins) and counting the variants found in each bin.
71 1) Simple Variant Density (SVD) Mapping mode generates output for use with the CloudMap EMS Variant Density Mapping tool. The aim of SVD analysis is to identify clusters of variants that appear linked to a mutant phenotype selected for during several rounds of outcrossing or backcrossing to a non-mutagenized strain. The "mapping sample" is the out-/backcrossed strain and only its variants are taken into account for the analysis. 264
72 265 All variants listed in the input file are analyzed in this mode, which means that as input you will typically want to use filtered lists of variants (as produced by the VCF Filter tool).
73 .. class:: warningmark 266
74 267 The aim of SVD analysis is to identify clusters of variants in an outcrossed strain carrying a selectable unknown mutation, which is interpreted as linkage between the corresponding genomic region and the unknown mutation.
75 EMS Variant Density Mapping is currently limited to *C. elegans* and other species with six chromosomes on the *CloudMap* side. 268
76 269 This mode corresponds roughly to EMS Variant Density Mapping in CloudMap.
77 2) Variant Allele Frequency (VAF) Mapping mode generates output for use with the CloudMap Variant Discovery or Hawaiian Variant Mapping tools. The aim of VAF analysis is to identify clusters of variants with (near) homozygous inheritance in a F2 population obtained from a cross between a mutant strain of interest and an unrelated mapping strain. Here, the "mapping sample" is the pooled F2 population. To analyze inheritance patterns this mode **requires either** a list of variants that could have been inherited through the mapping strain, i.e. the "unrelated parent strain", or through the mutant parent, i.e. through the "related parent strain". If variants are available for both parents, they can be analyzed together for higher mapping accuracy. 270
78 271 2) *Variant Allele Frequency (VAF) Mapping** mode analyzes the inheritance pattern in cross-progeny at sites, at which the parents are homozygous for different alleles.
79 3) More information on combining MiModD and CloudMap in mapping-by-sequencing analyses can be found in the `corresponding section of the MiModD User Guide`_. 272
273 The aim of VAF analysis is to identify clusters of variants with (near) homozygous inheritance in a F2 (or later generation) population obtained from a cross between a strain carrying a selectable unknown mutation and an unrelated mapping strain. Such a cluster is interpreted as linkage between the corresponding genomic region and the unknown mutation selected for in the F2 generation.
274
275 This mode corresponds roughly to Hawaiian Variant Mapping in CloudMap, but can simultaneously take into account non-reference alleles found in either parent strain (CloudMap users may think of this as a combined Hawaiian Variant and Variant Discovery Mapping analysis).
276
277 -------------
278
279 **Input:**
280
281 Valid input for this tool are VCF files (any VCF file in SVD mode, a MiModD-generated multi-sample VCF file in VAF mode) or a CloudMap tabular report file as generated by the Hawaiian Variant Mapping tool. Alternatively, the tool can generate (in both modes) its own tabular report file, which can be used as input instead of the original VCF file when rerunning the tool with different plotting parameters to reduce analysis time.
282
283 .. class:: infomark
284
285 CloudMap-generated tabular input files require, as additional input, a CloudMap-style sequence dictionary (even if the original CloudMap analysis was possible without one) as described in the original CloudMap paper. This file has a simple two-column tab-delimited format, in which each line lists the chromosome name (as it appears in the input VCF file) and the up-rounded length of the chromosome in megabases.
286
287 -------------
288
289 **Output:**
290
291 The tool produces up to three output files:
292
293 1) a default tabular file of binned variant counts that can be used to plot the data with external software such as Excel,
294
295
296 2) an optional pdf containing linkage plots, which should look just like the plots produced by CloudMap, but are optimized for file size and display speed and offer more user-configurable parameters and
297
298
299 3) an optional tabular per-variant report file, which can be configured to be either a valid input file for the corresponding original CloudMap tool (for users who really, really want to continue using CloudMap for plotting) or to be reusable in fast reruns of the tool (which can be useful to experiment with different plotting parameters).
300
301 -------------
302
303 **Settings:**
304
305 1) Analysis settings
306
307 *bin size to analyze variants in* - determines the width of the regions along each chromosome, in which variants are counted and analyzed together.
308
309 Several bin sizes can be specified and for each size you will get a corresponding report section in the binned variant counts file and a histogram plot in the linkage plots file.
310
311 *normalize variant counts to bin-width* - if selected (as per default) the variant counts for different bin sizes are not absolute, but normalized to the bin width
312
313 *sample names (in VAF mode only)* - to analyze inheritance patterns, VAF mode needs information about the relationship between the samples defined in the input VCF file:
314
315 The *mapping sample name* should be set to the name of the sample for which the inheritance pattern is to be analyzed (the pooled progeny population).
316
317 The *name of the related sample* should be that of the parent sample that carried and brought in the unknown mutation to be mapped (or, alternatively, that of a closely related ancestor).
318
319 Finally, the *name of the unrelated sample* should be that of the other parent strain used in the cross.
320
321 At least one of the parent samples MUST be specified, but if the input file contains variant information for both parents, they can be analyzed together for higher mapping accuracy. If you are reanalyzing a tabular report file from a previous tool run or from CloudMap, the association between variants and samples is already incorporated into the input file and cannot be specified again.
322
323 2) Graphical output settings
324
325 .. class:: warningmark
326
327 To be able to generate plots the system running MiModD needs to have the statistical programming environment R and its Python interface rpy2 installed.
328
329
330 *y-axes scaling* - if you want to override the defaults
331
332 *x-axis scaling* - choose *preserve relative contig sizes* if you want the largest chromosome to fit the page width and smaller chromosomes to appear according to their relative size or choose *scale each contig to fit the plot width* if all chromosomes should exploit the available space
333
334 *span value to be used in calculating the Loess regression line* - this value determines the degree of smoothing of the regression line through the scatterplot data. Information on loess regression and the loess span parameter can be found at http://en.wikipedia.org/wiki/Local_regression. The default is 0.1 as in CloudMap.
335
336 *colors used for plotting* - can be selected freely from the offered palette. For histogram colors, the list of selected colors will be used to provide the colors for the different histograms plotted. If less colors than histograms (determined by the number of bin sizes selected) are specified, colors from the list will be recycled.
337
80 338
81 .. _CloudMap: https://usegalaxy.org/u/gm2123/p/cloudmap 339 .. _CloudMap: https://usegalaxy.org/u/gm2123/p/cloudmap
82 .. _corresponding section of the MiModD User Guide: http://mimodd.readthedocs.org/en/latest/cloudmap.html 340 .. _mapping-by-sequencing analysis workflows in MiModD: http://mimodd.readthedocs.org/en/latest/cloudmap.html
83
84 </help> 341 </help>
85 </tool> 342 </tool>