0
|
1 <tool id="joint_snv_mix" name="Joint SNV Mix" version="0.7.5">
|
|
2 <description>classify germline and somatic mutations</description>
|
|
3 <requirements>
|
|
4 <requirement type="package" version="0.19.1">cython</requirement>
|
|
5 <requirement type="package" version="0.5">pysam</requirement>
|
|
6 <requirement type="package" version="0.1.18">samtools</requirement>
|
|
7 <requirement type="package" version="0.7.5">jointsnvmix</requirement>
|
|
8 </requirements>
|
|
9 <command interpreter="perl">
|
|
10
|
|
11 joint_snv_mix.pl
|
|
12
|
|
13 "ACTION::${option.option}"
|
|
14
|
|
15 "REFGENOME::$refFile.fields.path"
|
|
16 "BAMNORMAL::$normal_file"
|
|
17 "BAMTUMOR::$tumor_file"
|
|
18
|
|
19
|
|
20 #if str($option.option) == "classify":
|
|
21 #if ($option.parameters):
|
|
22 "OPTION::--parameters_file $option.parameters"
|
|
23 #end if
|
|
24 "OPTION::--out_file $output"
|
|
25 "OPTION::--somatic_threshold $option.somatic_threshold"
|
|
26
|
|
27 #end if
|
|
28
|
|
29 #if str($option.option) == "train":
|
|
30 #if ($option.priors):
|
|
31 "OPTION::--priors_file $option.priors"
|
|
32 #end if
|
|
33 "OUTPUT::$output"
|
|
34 "OPTION::--convergence_threshold $option.convergence_threshold"
|
|
35 "OPTION::--max_iters $option.max_iters"
|
|
36
|
|
37 #end if
|
|
38 #if ($positions_file):
|
|
39 "OPTION::--positions_file $positions_file"
|
|
40 #end if
|
|
41
|
|
42 "OPTION::--min_base_qual $min_base_quality"
|
|
43 "OPTION::--min_map_qual $min_map_quality"
|
|
44 "OPTION::--model $model"
|
|
45 #if ($chromosome):
|
|
46 "OPTION::--chromosome $chromosome"
|
|
47 #end if
|
|
48
|
|
49
|
|
50
|
|
51 </command>
|
|
52 <inputs>
|
|
53 <param name="refFile" type="select" label="Select a reference genome" optional="false">
|
|
54 <options from_data_table="all_fasta">
|
|
55 <filter type="sort_by" column="2" />
|
|
56 <validator type="no_options" message="No indexes are available" />
|
|
57 </options>
|
|
58 </param>
|
|
59 <param name="normal_file" type="data" format="bam" label="Normal Sample " help="Bam" />
|
|
60 <param name="tumor_file" type="data" format="bam" label="Tumor Sample" help="Bam" />
|
|
61 <param name="model" type="select" label="Model" help="" optional="true">
|
|
62 <option value="binomial">binomial</option>
|
|
63 <option value="snvmix2" selected="true">snvmix2</option>
|
|
64 <option value="beta_binomial">beta binomial</option>
|
|
65 </param>
|
|
66 <param name="positions_file" type="data" format="txt" label="Positions file" help="Filter positions" optional="true"/>
|
|
67 <param name="min_map_quality" type="text" label="Min map quality" help="Filter reads" value="0"/>
|
|
68 <param name="min_base_quality" type="text" label="Min base quality" help="Filter reads" value="0"/>
|
|
69 <param name="chromosome" type="text" label="Chromosome" help="a chromosome to analyse, leave blank for all"/>
|
|
70
|
|
71
|
|
72 <conditional name="option">
|
|
73 <param name="option" type="select" label="Action" help="" optional="true">
|
|
74 <option value="train" selected="true">Train</option>
|
|
75 <option value="classify">Classify</option>
|
|
76 </param>
|
|
77
|
|
78 <when value="train">
|
|
79
|
|
80 <param name="priors" type="data" format="txt" label="Prior Probabilities" optional="true"/>
|
|
81 <param name="initial_parameters" type="data" format="txt" label="Initial Parameters" optional="true"/>
|
|
82 <param name="convergence_threshold" type="text" label="Convergence Threshold" value="1e-6"/>
|
|
83 <param name="max_iters" type="text" label="Max number of training iterations" value="1000"/>
|
|
84
|
|
85 </when>
|
|
86 <when value="classify">
|
|
87
|
|
88 <param name="parameters" type="data" format="txt" label="Classify Parameters" help="" optional="true" />
|
|
89 <param name="somatic_threshold" type="text" label="Somatic Threshold" help="filter by probability" value="0.0"/>
|
|
90 </when>
|
|
91
|
|
92 </conditional>
|
|
93
|
|
94
|
|
95 </inputs>
|
|
96 <outputs>
|
|
97 <data type="data" format="txt" name="output" label="${tool.name} result on ${on_string}"/>
|
|
98 </outputs>
|
|
99
|
|
100 <help>
|
|
101
|
|
102 .. class:: infomark
|
|
103
|
|
104 **What it does**
|
|
105
|
|
106 ::
|
|
107
|
|
108 JointSNVMix implements a probabilistic graphical model to analyse sequence data
|
|
109 from tumour/normal pairs. The model draws statistical strength by analysing both
|
|
110 genome jointly to more accurately classify germline and somatic mutations.
|
|
111
|
|
112
|
|
113 Train
|
|
114
|
|
115 The SnvMix family of models are complete generative models of the data.
|
|
116 As such the model parameters can be learned using the Expectation Maximisation
|
|
117 (EM) algorithm. The train command allows this to be done.
|
|
118
|
|
119 All methods require that a file with the parameters for the prior densities,
|
|
120 and an initial set of parameters be passed in. Templates for these files can
|
|
121 be found in the config/ directory which ships with the package. If you are
|
|
122 unsure about setting the priors or parameter values these files should suffice.
|
|
123
|
|
124 The train command will produce a parameters file suitable for use with the
|
|
125 classification command. Training is highly recommended to achieve optimal
|
|
126 performance when using SnvMix based model.
|
|
127
|
|
128 To reduce memory consumption all subcommands of train take an optional --skip-size flag.
|
|
129 This is the number of positions to skip over before sampling a position for the training set.
|
|
130 Smaller values will lead to larger training sets which will require more memory,
|
|
131 but should yield better parameter estimates.
|
|
132
|
|
133 All subcommands of train also take optional parameters for minimum depth a
|
|
134 position has in the tumour and normal to be used for training. Higher depth
|
|
135 sites should give more robust estimates of the parameters. The default values
|
|
136 of these are likely fine.
|
|
137
|
|
138
|
|
139 Classify
|
|
140
|
|
141 The classify command is used for analysing tumour/normal paired data and
|
|
142 computing the posterior probability for each of the nine joint genotypes for
|
|
143 a pair of diploid genomes.
|
|
144
|
|
145
|
|
146
|
|
147 **Models**
|
|
148
|
|
149 ::
|
|
150
|
|
151 There are currently three models supported by both the train and classify commands.
|
|
152 All models use the JointSNVMix mixture model which jointly analyses the normal and tumour genomes.
|
|
153 By default snvmix2 is used but other models can be specified.
|
|
154
|
|
155 binomial
|
|
156
|
|
157 Uses binomial densities in the mixture model this was previously referred to as the JointSnvMix1 mode.
|
|
158
|
|
159 snvmix2
|
|
160
|
|
161 Uses snvmix2 densities in the mixture as described in the original SNVMix paper previously referred to as JointSnvMix2.
|
|
162
|
|
163 beta_binomial
|
|
164
|
|
165 Uses beta-binomial densities in the mixture model new in version 0.8. The beta-binomial is a robust (in the statistical sense)
|
|
166 alternative to binomial model. It can be beneficial when dealing with over-dispersed data. This is useful in cancer genomes
|
|
167 since allelic frequencies at somatic mutations sites may deviate significantly from those expected under diploid model.
|
|
168
|
|
169
|
|
170 **Input**
|
|
171
|
|
172 Bam files containing normal and tumor reads.
|
|
173
|
|
174
|
|
175 **Parameters**
|
|
176
|
|
177
|
|
178 Classify
|
|
179
|
|
180 chromosome CHROMOSOME
|
|
181 Chromosome to analyse. If not set all chromosomes will
|
|
182 be analysed.
|
|
183
|
|
184 min_base_qual MIN_BASE_QUAL
|
|
185 Remove bases with base quality lower than this.
|
|
186 Default is 0.
|
|
187
|
|
188 min_map_qual MIN_MAP_QUAL
|
|
189 Remove bases with mapping quality lower than this.
|
|
190 Default is 0.
|
|
191
|
|
192 positions_file POSITIONS_FILE
|
|
193 Path to a file containing a list of positions to
|
|
194 create use for analysis. Should be space separated
|
|
195 chrom pos. Additionally for each chromosome the
|
|
196 positions should be sorted. The same format as
|
|
197 samtools.
|
|
198
|
|
199 parameters_file PARAMETERS_FILE
|
|
200 Path to a file with custom parameters values for the
|
|
201 model.
|
|
202
|
|
203 somatic_threshold SOMATIC_THRESHOLD
|
|
204 Only sites with P(Somatic) = p_AA_AB + p_AA_BB greater
|
|
205 than equal this value will be printed. Default is 0.
|
|
206
|
|
207
|
|
208 Train
|
|
209
|
|
210 chromosome CHROMOSOME
|
|
211 Chromosome to analyse. If not set all chromosomes will
|
|
212 be analysed.
|
|
213
|
|
214 min_base_qual MIN_BASE_QUAL
|
|
215 Remove bases with base quality lower than this.
|
|
216 Default is 0.
|
|
217
|
|
218 min_map_qual MIN_MAP_QUAL
|
|
219 Remove bases with mapping quality lower than this.
|
|
220 Default is 0.
|
|
221
|
|
222 positions_file POSITIONS_FILE
|
|
223 Path to a file containing a list of positions to
|
|
224 create use for analysis. Should be space separated
|
|
225 chrom pos. Additionally for each chromosome the
|
|
226 positions should be sorted. The same format as
|
|
227 samtools.
|
|
228
|
|
229 priors_file PRIORS_FILE
|
|
230 Path to a file with priors for the model parameters.
|
|
231
|
|
232 initial_parameters_file INITIAL_PARAMETERS_FILE
|
|
233 Path to a file with initial parameter values for the
|
|
234 model.
|
|
235
|
|
236 min_normal_depth MIN_NORMAL_DEPTH
|
|
237 Minimum depth of coverage in normal sample for a site
|
|
238 to be eligible for use in training set. Default 10
|
|
239
|
|
240 min_tumour_depth MIN_TUMOUR_DEPTH
|
|
241 Minimum depth of coverage in tumour sample for a site
|
|
242 to be eligible for use in training set. Default 10
|
|
243
|
|
244 max_normal_depth MAX_NORMAL_DEPTH
|
|
245 Maximum depth of coverage in normal sample for a site
|
|
246 to be eligible for use in training set. Default 100
|
|
247
|
|
248 max_tumour_depth MAX_TUMOUR_DEPTH
|
|
249 Maximum depth of coverage in tumour sample for a site
|
|
250 to be eligible for use in training set. Default 100
|
|
251
|
|
252 max_iters MAX_ITERS
|
|
253 Maximum number of iterations to used for training
|
|
254 model. Default 1000
|
|
255
|
|
256 skip_size SKIP_SIZE
|
|
257 When subsampling will skip over this number of
|
|
258 position before adding a site to the subsample. Larger
|
|
259 values lead to smaller subsample data sets with faster
|
|
260 training and less memory. Smaller values should lead
|
|
261 to better parameter estimates. Default 1.
|
|
262
|
|
263 convergence_threshold CONVERGENCE_THRESHOLD
|
|
264 Convergence threshold for EM training. Once the change
|
|
265 in objective function is below this value training
|
|
266 will end. Default 1e-6
|
|
267
|
|
268
|
|
269
|
|
270
|
|
271 </help>
|
|
272 </tool>
|
|
273
|
|
274
|
|
275
|
|
276
|