Mercurial > repos > fcaramia > jointsnvmix
comparison joint_snv_mix.xml @ 0:a1034918ab9b draft
Uploaded
author | fcaramia |
---|---|
date | Thu, 20 Jun 2013 00:03:08 -0400 |
parents | |
children | 26953f1c8af2 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a1034918ab9b |
---|---|
1 <tool id="joint_snv_mix" name="Joint SNV Mix" version="0.7.5"> | |
2 <description>classify germline and somatic mutations</description> | |
3 <requirements> | |
4 <requirement type="package" version="2.7">python</requirement> | |
5 <requirement type="package" version="0.19.1">cython</requirement> | |
6 <requirement type="package" version="0.5">pysam</requirement> | |
7 <requirement type="package" version="0.1.18">samtools</requirement> | |
8 <requirement type="package" version="0.7.5">jointsnvmix</requirement> | |
9 </requirements> | |
10 <command interpreter="perl"> | |
11 | |
12 joint_snv_mix.pl | |
13 | |
14 "ACTION::${option.option}" | |
15 | |
16 "REFGENOME::$refFile.fields.path" | |
17 "BAMNORMAL::$normal_file" | |
18 "BAMTUMOR::$tumor_file" | |
19 | |
20 | |
21 #if str($option.option) == "classify": | |
22 #if ($option.parameters): | |
23 "OPTION::--parameters_file $option.parameters" | |
24 #end if | |
25 "OPTION::--out_file $output" | |
26 "OPTION::--somatic_threshold $option.somatic_threshold" | |
27 | |
28 #end if | |
29 | |
30 #if str($option.option) == "train": | |
31 #if ($option.priors): | |
32 "OPTION::--priors_file $option.priors" | |
33 #end if | |
34 "OUTPUT::$output" | |
35 "OPTION::--convergence_threshold $option.convergence_threshold" | |
36 "OPTION::--max_iters $option.max_iters" | |
37 | |
38 #end if | |
39 #if ($positions_file): | |
40 "OPTION::--positions_file $positions_file" | |
41 #end if | |
42 | |
43 "OPTION::--min_base_qual $min_base_quality" | |
44 "OPTION::--min_map_qual $min_map_quality" | |
45 "OPTION::--model $model" | |
46 #if ($chromosome): | |
47 "OPTION::--chromosome $chromosome" | |
48 #end if | |
49 | |
50 | |
51 | |
52 </command> | |
53 <inputs> | |
54 <param name="refFile" type="select" label="Select a reference genome" optional="false"> | |
55 <options from_data_table="all_fasta"> | |
56 <filter type="sort_by" column="2" /> | |
57 <validator type="no_options" message="No indexes are available" /> | |
58 </options> | |
59 </param> | |
60 <param name="normal_file" type="data" format="bam" label="Normal Sample " help="Bam" /> | |
61 <param name="tumor_file" type="data" format="bam" label="Tumor Sample" help="Bam" /> | |
62 <param name="model" type="select" label="Model" help="" optional="true"> | |
63 <option value="binomial">binomial</option> | |
64 <option value="snvmix2" selected="true">snvmix2</option> | |
65 <option value="beta_binomial">beta binomial</option> | |
66 </param> | |
67 <param name="positions_file" type="data" format="txt" label="Positions file" help="Filter positions" optional="true"/> | |
68 <param name="min_map_quality" type="text" label="Min map quality" help="Filter reads" value="0"/> | |
69 <param name="min_base_quality" type="text" label="Min base quality" help="Filter reads" value="0"/> | |
70 <param name="chromosome" type="text" label="Chromosome" help="a chromosome to analyse, leave blank for all"/> | |
71 | |
72 | |
73 <conditional name="option"> | |
74 <param name="option" type="select" label="Action" help="" optional="true"> | |
75 <option value="train" selected="true">Train</option> | |
76 <option value="classify">Classify</option> | |
77 </param> | |
78 | |
79 <when value="train"> | |
80 | |
81 <param name="priors" type="data" format="txt" label="Prior Probabilities" optional="true"/> | |
82 <param name="initial_parameters" type="data" format="txt" label="Initial Parameters" optional="true"/> | |
83 <param name="convergence_threshold" type="text" label="Convergence Threshold" value="1e-6"/> | |
84 <param name="max_iters" type="text" label="Max number of training iterations" value="1000"/> | |
85 | |
86 </when> | |
87 <when value="classify"> | |
88 | |
89 <param name="parameters" type="data" format="txt" label="Classify Parameters" help="" optional="true" /> | |
90 <param name="somatic_threshold" type="text" label="Somatic Threshold" help="filter by probability" value="0.0"/> | |
91 </when> | |
92 | |
93 </conditional> | |
94 | |
95 | |
96 </inputs> | |
97 <outputs> | |
98 <data type="data" format="txt" name="output" label="${tool.name} result on ${on_string}"/> | |
99 </outputs> | |
100 | |
101 <help> | |
102 | |
103 .. class:: infomark | |
104 | |
105 **What it does** | |
106 | |
107 :: | |
108 | |
109 JointSNVMix implements a probabilistic graphical model to analyse sequence data | |
110 from tumour/normal pairs. The model draws statistical strength by analysing both | |
111 genome jointly to more accurately classify germline and somatic mutations. | |
112 | |
113 | |
114 Train | |
115 | |
116 The SnvMix family of models are complete generative models of the data. | |
117 As such the model parameters can be learned using the Expectation Maximisation | |
118 (EM) algorithm. The train command allows this to be done. | |
119 | |
120 All methods require that a file with the parameters for the prior densities, | |
121 and an initial set of parameters be passed in. Templates for these files can | |
122 be found in the config/ directory which ships with the package. If you are | |
123 unsure about setting the priors or parameter values these files should suffice. | |
124 | |
125 The train command will produce a parameters file suitable for use with the | |
126 classification command. Training is highly recommended to achieve optimal | |
127 performance when using SnvMix based model. | |
128 | |
129 To reduce memory consumption all subcommands of train take an optional --skip-size flag. | |
130 This is the number of positions to skip over before sampling a position for the training set. | |
131 Smaller values will lead to larger training sets which will require more memory, | |
132 but should yield better parameter estimates. | |
133 | |
134 All subcommands of train also take optional parameters for minimum depth a | |
135 position has in the tumour and normal to be used for training. Higher depth | |
136 sites should give more robust estimates of the parameters. The default values | |
137 of these are likely fine. | |
138 | |
139 | |
140 Classify | |
141 | |
142 The classify command is used for analysing tumour/normal paired data and | |
143 computing the posterior probability for each of the nine joint genotypes for | |
144 a pair of diploid genomes. | |
145 | |
146 | |
147 | |
148 **Models** | |
149 | |
150 :: | |
151 | |
152 There are currently three models supported by both the train and classify commands. | |
153 All models use the JointSNVMix mixture model which jointly analyses the normal and tumour genomes. | |
154 By default snvmix2 is used but other models can be specified. | |
155 | |
156 binomial | |
157 | |
158 Uses binomial densities in the mixture model this was previously referred to as the JointSnvMix1 mode. | |
159 | |
160 snvmix2 | |
161 | |
162 Uses snvmix2 densities in the mixture as described in the original SNVMix paper previously referred to as JointSnvMix2. | |
163 | |
164 beta_binomial | |
165 | |
166 Uses beta-binomial densities in the mixture model new in version 0.8. The beta-binomial is a robust (in the statistical sense) | |
167 alternative to binomial model. It can be beneficial when dealing with over-dispersed data. This is useful in cancer genomes | |
168 since allelic frequencies at somatic mutations sites may deviate significantly from those expected under diploid model. | |
169 | |
170 | |
171 **Input** | |
172 | |
173 Bam files containing normal and tumor reads. | |
174 | |
175 | |
176 **Parameters** | |
177 | |
178 | |
179 Classify | |
180 | |
181 chromosome CHROMOSOME | |
182 Chromosome to analyse. If not set all chromosomes will | |
183 be analysed. | |
184 | |
185 min_base_qual MIN_BASE_QUAL | |
186 Remove bases with base quality lower than this. | |
187 Default is 0. | |
188 | |
189 min_map_qual MIN_MAP_QUAL | |
190 Remove bases with mapping quality lower than this. | |
191 Default is 0. | |
192 | |
193 positions_file POSITIONS_FILE | |
194 Path to a file containing a list of positions to | |
195 create use for analysis. Should be space separated | |
196 chrom pos. Additionally for each chromosome the | |
197 positions should be sorted. The same format as | |
198 samtools. | |
199 | |
200 parameters_file PARAMETERS_FILE | |
201 Path to a file with custom parameters values for the | |
202 model. | |
203 | |
204 somatic_threshold SOMATIC_THRESHOLD | |
205 Only sites with P(Somatic) = p_AA_AB + p_AA_BB greater | |
206 than equal this value will be printed. Default is 0. | |
207 | |
208 | |
209 Train | |
210 | |
211 chromosome CHROMOSOME | |
212 Chromosome to analyse. If not set all chromosomes will | |
213 be analysed. | |
214 | |
215 min_base_qual MIN_BASE_QUAL | |
216 Remove bases with base quality lower than this. | |
217 Default is 0. | |
218 | |
219 min_map_qual MIN_MAP_QUAL | |
220 Remove bases with mapping quality lower than this. | |
221 Default is 0. | |
222 | |
223 positions_file POSITIONS_FILE | |
224 Path to a file containing a list of positions to | |
225 create use for analysis. Should be space separated | |
226 chrom pos. Additionally for each chromosome the | |
227 positions should be sorted. The same format as | |
228 samtools. | |
229 | |
230 priors_file PRIORS_FILE | |
231 Path to a file with priors for the model parameters. | |
232 | |
233 initial_parameters_file INITIAL_PARAMETERS_FILE | |
234 Path to a file with initial parameter values for the | |
235 model. | |
236 | |
237 min_normal_depth MIN_NORMAL_DEPTH | |
238 Minimum depth of coverage in normal sample for a site | |
239 to be eligible for use in training set. Default 10 | |
240 | |
241 min_tumour_depth MIN_TUMOUR_DEPTH | |
242 Minimum depth of coverage in tumour sample for a site | |
243 to be eligible for use in training set. Default 10 | |
244 | |
245 max_normal_depth MAX_NORMAL_DEPTH | |
246 Maximum depth of coverage in normal sample for a site | |
247 to be eligible for use in training set. Default 100 | |
248 | |
249 max_tumour_depth MAX_TUMOUR_DEPTH | |
250 Maximum depth of coverage in tumour sample for a site | |
251 to be eligible for use in training set. Default 100 | |
252 | |
253 max_iters MAX_ITERS | |
254 Maximum number of iterations to used for training | |
255 model. Default 1000 | |
256 | |
257 skip_size SKIP_SIZE | |
258 When subsampling will skip over this number of | |
259 position before adding a site to the subsample. Larger | |
260 values lead to smaller subsample data sets with faster | |
261 training and less memory. Smaller values should lead | |
262 to better parameter estimates. Default 1. | |
263 | |
264 convergence_threshold CONVERGENCE_THRESHOLD | |
265 Convergence threshold for EM training. Once the change | |
266 in objective function is below this value training | |
267 will end. Default 1e-6 | |
268 | |
269 | |
270 | |
271 | |
272 </help> | |
273 </tool> | |
274 | |
275 | |
276 | |
277 |