comparison joint_snv_mix.xml @ 0:a1034918ab9b draft

Uploaded
author fcaramia
date Thu, 20 Jun 2013 00:03:08 -0400
parents
children 26953f1c8af2
comparison
equal deleted inserted replaced
-1:000000000000 0:a1034918ab9b
1 <tool id="joint_snv_mix" name="Joint SNV Mix" version="0.7.5">
2 <description>classify germline and somatic mutations</description>
3 <requirements>
4 <requirement type="package" version="2.7">python</requirement>
5 <requirement type="package" version="0.19.1">cython</requirement>
6 <requirement type="package" version="0.5">pysam</requirement>
7 <requirement type="package" version="0.1.18">samtools</requirement>
8 <requirement type="package" version="0.7.5">jointsnvmix</requirement>
9 </requirements>
10 <command interpreter="perl">
11
12 joint_snv_mix.pl
13
14 "ACTION::${option.option}"
15
16 "REFGENOME::$refFile.fields.path"
17 "BAMNORMAL::$normal_file"
18 "BAMTUMOR::$tumor_file"
19
20
21 #if str($option.option) == "classify":
22 #if ($option.parameters):
23 "OPTION::--parameters_file $option.parameters"
24 #end if
25 "OPTION::--out_file $output"
26 "OPTION::--somatic_threshold $option.somatic_threshold"
27
28 #end if
29
30 #if str($option.option) == "train":
31 #if ($option.priors):
32 "OPTION::--priors_file $option.priors"
33 #end if
34 "OUTPUT::$output"
35 "OPTION::--convergence_threshold $option.convergence_threshold"
36 "OPTION::--max_iters $option.max_iters"
37
38 #end if
39 #if ($positions_file):
40 "OPTION::--positions_file $positions_file"
41 #end if
42
43 "OPTION::--min_base_qual $min_base_quality"
44 "OPTION::--min_map_qual $min_map_quality"
45 "OPTION::--model $model"
46 #if ($chromosome):
47 "OPTION::--chromosome $chromosome"
48 #end if
49
50
51
52 </command>
53 <inputs>
54 <param name="refFile" type="select" label="Select a reference genome" optional="false">
55 <options from_data_table="all_fasta">
56 <filter type="sort_by" column="2" />
57 <validator type="no_options" message="No indexes are available" />
58 </options>
59 </param>
60 <param name="normal_file" type="data" format="bam" label="Normal Sample " help="Bam" />
61 <param name="tumor_file" type="data" format="bam" label="Tumor Sample" help="Bam" />
62 <param name="model" type="select" label="Model" help="" optional="true">
63 <option value="binomial">binomial</option>
64 <option value="snvmix2" selected="true">snvmix2</option>
65 <option value="beta_binomial">beta binomial</option>
66 </param>
67 <param name="positions_file" type="data" format="txt" label="Positions file" help="Filter positions" optional="true"/>
68 <param name="min_map_quality" type="text" label="Min map quality" help="Filter reads" value="0"/>
69 <param name="min_base_quality" type="text" label="Min base quality" help="Filter reads" value="0"/>
70 <param name="chromosome" type="text" label="Chromosome" help="a chromosome to analyse, leave blank for all"/>
71
72
73 <conditional name="option">
74 <param name="option" type="select" label="Action" help="" optional="true">
75 <option value="train" selected="true">Train</option>
76 <option value="classify">Classify</option>
77 </param>
78
79 <when value="train">
80
81 <param name="priors" type="data" format="txt" label="Prior Probabilities" optional="true"/>
82 <param name="initial_parameters" type="data" format="txt" label="Initial Parameters" optional="true"/>
83 <param name="convergence_threshold" type="text" label="Convergence Threshold" value="1e-6"/>
84 <param name="max_iters" type="text" label="Max number of training iterations" value="1000"/>
85
86 </when>
87 <when value="classify">
88
89 <param name="parameters" type="data" format="txt" label="Classify Parameters" help="" optional="true" />
90 <param name="somatic_threshold" type="text" label="Somatic Threshold" help="filter by probability" value="0.0"/>
91 </when>
92
93 </conditional>
94
95
96 </inputs>
97 <outputs>
98 <data type="data" format="txt" name="output" label="${tool.name} result on ${on_string}"/>
99 </outputs>
100
101 <help>
102
103 .. class:: infomark
104
105 **What it does**
106
107 ::
108
109 JointSNVMix implements a probabilistic graphical model to analyse sequence data
110 from tumour/normal pairs. The model draws statistical strength by analysing both
111 genome jointly to more accurately classify germline and somatic mutations.
112
113
114 Train
115
116 The SnvMix family of models are complete generative models of the data.
117 As such the model parameters can be learned using the Expectation Maximisation
118 (EM) algorithm. The train command allows this to be done.
119
120 All methods require that a file with the parameters for the prior densities,
121 and an initial set of parameters be passed in. Templates for these files can
122 be found in the config/ directory which ships with the package. If you are
123 unsure about setting the priors or parameter values these files should suffice.
124
125 The train command will produce a parameters file suitable for use with the
126 classification command. Training is highly recommended to achieve optimal
127 performance when using SnvMix based model.
128
129 To reduce memory consumption all subcommands of train take an optional --skip-size flag.
130 This is the number of positions to skip over before sampling a position for the training set.
131 Smaller values will lead to larger training sets which will require more memory,
132 but should yield better parameter estimates.
133
134 All subcommands of train also take optional parameters for minimum depth a
135 position has in the tumour and normal to be used for training. Higher depth
136 sites should give more robust estimates of the parameters. The default values
137 of these are likely fine.
138
139
140 Classify
141
142 The classify command is used for analysing tumour/normal paired data and
143 computing the posterior probability for each of the nine joint genotypes for
144 a pair of diploid genomes.
145
146
147
148 **Models**
149
150 ::
151
152 There are currently three models supported by both the train and classify commands.
153 All models use the JointSNVMix mixture model which jointly analyses the normal and tumour genomes.
154 By default snvmix2 is used but other models can be specified.
155
156 binomial
157
158 Uses binomial densities in the mixture model this was previously referred to as the JointSnvMix1 mode.
159
160 snvmix2
161
162 Uses snvmix2 densities in the mixture as described in the original SNVMix paper previously referred to as JointSnvMix2.
163
164 beta_binomial
165
166 Uses beta-binomial densities in the mixture model new in version 0.8. The beta-binomial is a robust (in the statistical sense)
167 alternative to binomial model. It can be beneficial when dealing with over-dispersed data. This is useful in cancer genomes
168 since allelic frequencies at somatic mutations sites may deviate significantly from those expected under diploid model.
169
170
171 **Input**
172
173 Bam files containing normal and tumor reads.
174
175
176 **Parameters**
177
178
179 Classify
180
181 chromosome CHROMOSOME
182 Chromosome to analyse. If not set all chromosomes will
183 be analysed.
184
185 min_base_qual MIN_BASE_QUAL
186 Remove bases with base quality lower than this.
187 Default is 0.
188
189 min_map_qual MIN_MAP_QUAL
190 Remove bases with mapping quality lower than this.
191 Default is 0.
192
193 positions_file POSITIONS_FILE
194 Path to a file containing a list of positions to
195 create use for analysis. Should be space separated
196 chrom pos. Additionally for each chromosome the
197 positions should be sorted. The same format as
198 samtools.
199
200 parameters_file PARAMETERS_FILE
201 Path to a file with custom parameters values for the
202 model.
203
204 somatic_threshold SOMATIC_THRESHOLD
205 Only sites with P(Somatic) = p_AA_AB + p_AA_BB greater
206 than equal this value will be printed. Default is 0.
207
208
209 Train
210
211 chromosome CHROMOSOME
212 Chromosome to analyse. If not set all chromosomes will
213 be analysed.
214
215 min_base_qual MIN_BASE_QUAL
216 Remove bases with base quality lower than this.
217 Default is 0.
218
219 min_map_qual MIN_MAP_QUAL
220 Remove bases with mapping quality lower than this.
221 Default is 0.
222
223 positions_file POSITIONS_FILE
224 Path to a file containing a list of positions to
225 create use for analysis. Should be space separated
226 chrom pos. Additionally for each chromosome the
227 positions should be sorted. The same format as
228 samtools.
229
230 priors_file PRIORS_FILE
231 Path to a file with priors for the model parameters.
232
233 initial_parameters_file INITIAL_PARAMETERS_FILE
234 Path to a file with initial parameter values for the
235 model.
236
237 min_normal_depth MIN_NORMAL_DEPTH
238 Minimum depth of coverage in normal sample for a site
239 to be eligible for use in training set. Default 10
240
241 min_tumour_depth MIN_TUMOUR_DEPTH
242 Minimum depth of coverage in tumour sample for a site
243 to be eligible for use in training set. Default 10
244
245 max_normal_depth MAX_NORMAL_DEPTH
246 Maximum depth of coverage in normal sample for a site
247 to be eligible for use in training set. Default 100
248
249 max_tumour_depth MAX_TUMOUR_DEPTH
250 Maximum depth of coverage in tumour sample for a site
251 to be eligible for use in training set. Default 100
252
253 max_iters MAX_ITERS
254 Maximum number of iterations to used for training
255 model. Default 1000
256
257 skip_size SKIP_SIZE
258 When subsampling will skip over this number of
259 position before adding a site to the subsample. Larger
260 values lead to smaller subsample data sets with faster
261 training and less memory. Smaller values should lead
262 to better parameter estimates. Default 1.
263
264 convergence_threshold CONVERGENCE_THRESHOLD
265 Convergence threshold for EM training. Once the change
266 in objective function is below this value training
267 will end. Default 1e-6
268
269
270
271
272 </help>
273 </tool>
274
275
276
277