comparison gemini_inheritance.xml @ 0:3123ce7acd0e draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 62ed732cba355e695181924a8ed4cce49ca21c59
author iuc
date Fri, 11 Jan 2019 17:50:55 -0500
parents
children 2c68e29c3527
comparison
equal deleted inserted replaced
-1:000000000000 0:3123ce7acd0e
1 <tool id="gemini_inheritance" name="GEMINI inheritance pattern" version="@VERSION@">
2 <description>based identification of candidate genes</description>
3 <macros>
4 <import>gemini_macros.xml</import>
5 <xml name="name_X">
6 <param name="X" type="text" value=""
7 label="Alias to use for X chromosome"
8 help="The tool expects the X chromosome to be named 'X' or 'chrX'. If the reference genome used for variant calling had a different name for it, you will have to specify it here." />
9 </xml>
10 </macros>
11 <expand macro="requirements" />
12 <expand macro="stdio" />
13 <expand macro="version_command" />
14 <command>
15 <![CDATA[
16 gemini ${inheritance.pattern_type}
17
18 #for $cond in $inheritance.constraint:
19 #set $filter = str($cond.filter).strip()
20 #if str($filter):
21 #if str($inheritance.pattern_type) == "comp_hets" and $cond.overwrite_default_filter:
22 --gene-where '$filter'
23 #else:
24 --filter '$filter'
25 #end if
26 #end if
27 #end for
28
29 #if str($inheritance.pattern_type) in ("comp_hets", "mendel_errors"):
30 ${inheritance.by_pattern_only}
31 #end if
32
33 ${inheritance.lenient}
34 ${inheritance.allow_unaffected}
35
36 #if str($inheritance.pattern_type).startswith('x_linked_') and str($inheritance.X).strip():
37 -X "${inheritance.X}"
38 #end if
39
40 #if int($family_wise.min_kindreds) > 0:
41 --min-kindreds ${family_wise.min_kindreds}
42 #end if
43
44 #if str($family_wise.families).strip():
45 #set $families = ','.join([f.strip() for f in $family_wise.families.split(',')])
46 --families "$families"
47 #end if
48
49 #if int($family_wise.per_variant_selection.min_dp) > 0:
50 -d ${family_wise.per_variant_selection.min_dp}
51 #end if
52
53 #if int($family_wise.per_variant_selection.min_gq) > 0:
54 --min-gq ${family_wise.per_variant_selection.min_gq}
55 #end if
56
57 #if int($family_wise.per_variant_selection.max_pl) > -1:
58 --gt-pl-max ${family_wise.per_variant_selection.max_pl}
59 #end if
60
61 #set $report = $oformat.report
62 @COLUMN_SELECT@
63
64 "${ infile }"
65 > "${ outfile }"
66 ]]>
67 </command>
68 <inputs>
69 <expand macro="infile" />
70 <conditional name="inheritance">
71 <param name="pattern_type" type="select"
72 label="Your assumption about the inheritance pattern of the phenotype of interest">
73 <option value="autosomal_recessive">Autosomal recessive</option>
74 <option value="autosomal_dominant">Autosomal dominant</option>
75 <option value="x_linked_recessive">X-linked recessive</option>
76 <option value="x_linked_dominant">X-linked dominant</option>
77 <option value="de_novo">Autosomal de-novo</option>
78 <option value="x_linked_de_novo">X-linked de-novo</option>
79 <option value="comp_hets">Compound heterozygous</option>
80 <option value="mendel_errors">Violation of mendelian laws (LOH, plausible and implausible de-novo, uniparental disomy)</option>
81 </param>
82 <when value="comp_hets">
83 <expand macro="insert_constraint">
84 <expand macro="overwritable_where_default" default_where="exonic and high-impact variants (SQL clause: is_exonic = 1 or impact_severity != 'LOW')" />
85 </expand>
86 <param argument="--pattern-only" name="by_pattern_only" type="boolean" truevalue="--pattern-only" falsevalue="" checked="false"
87 label="Ignore sample phenotypes during variant identification"
88 help="When turned on, the identification of compound heterozygous variant pairs gets based on the family tree only, i.e., the tool looks for heterozygous allele pairs in any kid that weren't occuring together in the parents (see the tool help below for the exact criteria used to detect compound heterozygosity)." />
89 <expand macro="lenient" argument="--max-priority" truevalue="--max-priority 3"
90 help="When turned on, runs the tool with --max-priority 3 instead of the default value 1. This leads to inclusion of more ambiguous cases of compound heterozygosity." />
91 <expand macro="unaffected" />
92 </when>
93 <when value="mendel_errors">
94 <expand macro="insert_constraint" />
95 <param argument="--only-affected" name="by_pattern_only" type="boolean" truevalue="" falsevalue="--only-affected" checked="false"
96 label="Ignore sample phenotypes during variant identification"
97 help="When turned on, the identification of candidate variants gets based on the observed inheritance pattern only. The default is to report candidates only if there is evidence for them being phenotypically relevant, i.e., if they are observed in an affected sample." />
98 <expand macro="lenient" />
99 <param name="allow_unaffected" type="hidden" value="" />
100 </when>
101 <when value="autosomal_recessive">
102 <expand macro="insert_constraint" />
103 <expand macro="lenient" />
104 <expand macro="unaffected" />
105 </when>
106 <when value="autosomal_dominant">
107 <expand macro="insert_constraint" />
108 <expand macro="lenient" />
109 <expand macro="unaffected" />
110 </when>
111 <when value="x_linked_recessive">
112 <expand macro="insert_constraint" />
113 <param name="lenient" type="hidden" value="" />
114 <expand macro="unaffected" />
115 <expand macro="name_X" />
116 </when>
117 <when value="x_linked_dominant">
118 <expand macro="insert_constraint" />
119 <param name="lenient" type="hidden" value="" />
120 <expand macro="unaffected" />
121 <expand macro="name_X" />
122 </when>
123 <when value="de_novo">
124 <expand macro="insert_constraint" />
125 <expand macro="lenient" />
126 <expand macro="unaffected" />
127 </when>
128 <when value="x_linked_de_novo">
129 <expand macro="insert_constraint" />
130 <param name="lenient" type="hidden" value="" />
131 <expand macro="unaffected" />
132 <expand macro="name_X" />
133 </when>
134 </conditional>
135 <section name="family_wise" title="Family-wise criteria for variant selection" expanded="true">
136 <expand macro="min_kindreds" />
137 <param argument="--families" name="families" type="text" value=""
138 label="List of families to restrict the analysis to (comma-separated)"
139 help="Leave empty for an analysis including all families"/>
140 <conditional name="per_variant_selection">
141 <param name="enabled" type="select"
142 label="Specify additional criteria to exclude families on a per-variant basis">
143 <option value="no">No, analyze all variants from all included families</option>
144 <option value="yes">Yes, filter variants within families</option>
145 </param>
146 <when value="no">
147 <param name="min_dp" type="hidden" value="0" />
148 <param name="min_gq" type="hidden" value="0" />
149 <param name="max_pl" type="hidden" value="-1" />
150 </when>
151 <when value="yes">
152 <param argument="-d" name="min_dp" type="integer" value="0" min="0"
153 label="Per-variant DP threshold for including a family"
154 help="All samples from a family must have a sequencing depth of at least this value at a given variant site in order for the family to be included in the analysis of this particular variant. default: 0 (do not apply this filter)" />
155 <param argument="--min-gq" name="min_gq" type="integer" value="0" min="0"
156 label="per-variant GQ threshold for including a family"
157 help="The genotypes of all samples from a family must be called with at least this quality at a given variant site in order for the family to be included in the analysis of this particular variant. default: 0 (do not apply this filter)">
158 </param>
159 <param argument="--gt-pl-max" name="max_pl" type="integer" value="-1" min="-1"
160 label="per-variant PL threshold for including a family"
161 help="The genotypes at a given variant site of all samples from a family must not have a higher (phred-scaled) likelihood to be wrong than this value in order for the family to be included in the analysis of this particular variant. default: -1 (do not apply this filter); if used the GEMINI documentation suggests 10 as a reasonable value" />
162 </when>
163 </conditional>
164 </section>
165 <section name="oformat" title="Output - included information" expanded="true">
166 <expand macro="column_filter" help="The tool reports key information about the inheritance pattern detection for each candidate variant found. It can precede each such row with additional columns, listing information about the variant taken from the variants table of the GEMINI database. Here, you can control which subset of the variants table columns should be added to the output."/>
167 </section>
168 </inputs>
169 <outputs>
170 <data name="outfile" format="tabular"
171 label="GEMINI ${inheritance.pattern_type} pattern on ${on_string}" />
172 </outputs>
173 <tests>
174 <test>
175 <param name="infile" value="gemini_auto_dom_input.db" ftype="gemini.sqlite" />
176 <conditional name="inheritance">
177 <param name="pattern_type" value="autosomal_dominant" />
178 <param name="lenient" value="true" />
179 </conditional>
180 <conditional name="report">
181 <param name="report_selector" value="minimal" />
182 </conditional>
183 <output name="outfile">
184 <assert_contents>
185 <has_line_matching expression="variant_id&#009;gene&#009;.*" />
186 </assert_contents>
187 </output>
188 </test>
189 <test>
190 <param name="infile" value="gemini_auto_dom_input.db" ftype="gemini.sqlite" />
191 <conditional name="inheritance">
192 <param name="pattern_type" value="autosomal_dominant" />
193 <param name="lenient" value="true" />
194 </conditional>
195 <section name="oformat">
196 <conditional name="report">
197 <param name="report_selector" value="custom" />
198 <param name="columns" value="gene,chrom,impact" />
199 </conditional>
200 </section>
201 <output name="outfile">
202 <assert_contents>
203 <has_line_matching expression="gene&#009;chrom&#009;impact.*" />
204 </assert_contents>
205 </output>
206 </test>
207 <test>
208 <param name="infile" value="gemini_auto_dom_input.db" ftype="gemini.sqlite" />
209 <conditional name="inheritance">
210 <param name="pattern_type" value="autosomal_dominant" />
211 <param name="lenient" value="true" />
212 </conditional>
213 <section name="oformat">
214 <conditional name="report">
215 <param name="report_selector" value="custom" />
216 <!-- test with empty multiselect list and columns specified
217 via text field instead -->
218 <param name="extra_cols" value="gene,chrom,impact" />
219 </conditional>
220 </section>
221 <output name="outfile">
222 <assert_contents>
223 <has_line_matching expression="gene&#009;chrom&#009;impact.*" />
224 </assert_contents>
225 </output>
226 </test>
227 <test>
228 <param name="infile" value="gemini_auto_rec_input.db" ftype="gemini.sqlite" />
229 <conditional name="inheritance">
230 <param name="pattern_type" value="autosomal_recessive" />
231 <param name="lenient" value="true" />
232 </conditional>
233 <section name="oformat">
234 <conditional name="report">
235 <param name="report_selector" value="custom" />
236 <param name="columns" value="gene,chrom,impact" />
237 </conditional>
238 </section>
239 <output name="outfile">
240 <assert_contents>
241 <has_line_matching expression="gene&#009;chrom&#009;impact.*" />
242 </assert_contents>
243 </output>
244 </test>
245 <test>
246 <param name="infile" value="gemini_de_novo_input.db" ftype="gemini.sqlite" />
247 <conditional name="inheritance">
248 <param name="pattern_type" value="de_novo" />
249 </conditional>
250 <section name="oformat">
251 <conditional name="report">
252 <param name="report_selector" value="custom" />
253 <param name="columns" value="gene,ref,alt,impact" />
254 </conditional>
255 </section>
256 <output name="outfile">
257 <assert_contents>
258 <has_line_matching expression="gene&#009;ref&#009;alt&#009;impact.*" />
259 </assert_contents>
260 </output>
261 </test>
262 <test>
263 <param name="infile" value="gemini_comphets_input.db" ftype="gemini.sqlite" />
264 <conditional name="inheritance">
265 <param name="pattern_type" value="comp_hets" />
266 <param name="lenient" value="true" />
267 <param name="allow_unaffected" value="true" />
268 </conditional>
269 <section name="oformat">
270 <conditional name="report">
271 <param name="report_selector" value="custom" />
272 <param name="columns" value="chrom,start,end,ref,alt,gene,impact" />
273 </conditional>
274 </section>
275 <output name="outfile">
276 <assert_contents>
277 <has_line_matching expression="chrom&#009;start&#009;end&#009;.*gene.*" />
278 </assert_contents>
279 </output>
280 </test>
281 <test>
282 <param name="infile" value="gemini_de_novo_input.db" ftype="gemini.sqlite" />
283 <conditional name="inheritance">
284 <param name="pattern_type" value="mendel_errors" />
285 </conditional>
286 <section name="oformat">
287 <conditional name="report">
288 <param name="report_selector" value="custom" />
289 <param name="columns" value="gene,ref,alt,impact" />
290 </conditional>
291 </section>
292 <output name="outfile">
293 <assert_contents>
294 <has_line_matching expression="gene&#009;ref&#009;alt&#009;impact&#009;.*violation.*" />
295 </assert_contents>
296 </output>
297 </test>
298 </tests>
299 <help>
300 <![CDATA[
301
302 **What it does**
303
304 Assuming you have defined the familial relationships between samples when
305 loading your VCF into GEMINI, you can use this tool to identify candidate genes
306 and variants that explain the inheritance pattern of a phenotype of interest.
307
308 **Inheritance pattern detection rules**
309
310 *Autosomal recessive*
311
312 Criteria:
313
314 - all affected must be hom_alt
315 - [affected] no unaffected can be hom_alt (can be unknown)
316 - [default] if parents exist they must be unaffected and het for all affected kids
317 - [default] if there are no affecteds that have a parent, a warning is issued.
318
319 If ``--lenient`` is specified, the 2 criteria prefixed with “[default]” are not
320 applied.
321
322 If ``--allow-unaffected`` is specified, the criterion prefixed with
323 “[affected]” is not enforced.
324
325 ----
326
327 *Autosomal dominant*
328
329 Criteria:
330
331 - All affecteds must be het
332 - [affected] No unaffected can be het or homalt (can be unknown)
333 - de_novo mutations are not auto_dom (at least not in the first generation)
334 - At least 1 affected must have 1 affected parent (or have no parents).
335 - If no affected has a parent, a warning is issued.
336 - [default] All affecteds must have parents with known phenotype.
337 - [default] All affected kids must have at least 1 affected parent
338
339 If ``--lenient`` is specified, the criteria prefixed with “[default]” are not
340 enforced.
341
342 If ``--allow-unaffected`` is specified, the criterion prefixed with
343 “[affected]” is not enforced.
344
345 Note that, for autosomal dominant, ``--lenient`` allows singleton affecteds to
346 be used to meet the ``--min-kindreds`` requirement if they are HET.
347
348 If there is incomplete penetrance in the kindred (unaffected obligate carriers),
349 these individuals currently must be coded as having unknown phenotype or as
350 being affected.
351
352 ----
353
354 *X-linked recessive*
355
356 Criteria:
357
358 - Affected females must be HOM_ALT
359 - Unaffected females are HET or HOM_REF
360 - Affected males are not HOM_REF
361 - Unaffected males are HOM_REF
362
363 Note: Pseudo-autosomal regions are not accounted for by the tool.
364
365 ----
366
367 *X-linked dominant*
368
369 Criteria:
370
371 - Affected males are HET or HOM_ALT
372 - Affected females must be HET
373 - Unaffecteds must be HOM_REF
374 - girls of affected dad must be affected
375 - boys of affected dad must be unaffected
376 - mothers of affected males must be het (and affected)
377 - at least 1 parent of affected females must be het (and affected).
378
379 Note: Pseudo-autosomal regions are not accounted for by the tool.
380
381 ----
382
383 *De-novo mutations*
384
385 Criteria:
386
387 - all affected must be het
388 - [affected] all unaffected must be homref or homalt
389 - at least 1 affected kid must have unaffected parents
390 - [default] if an affected has affected parents, it’s not de_novo
391 - [default] all affected kids must have unaffected (or no) parents
392 - [default] warning if none of the affected samples have parents.
393
394 The last 3 items, prefixed with [default] can be turned off with ``--lenient``.
395
396 If ``--allow-unaffected`` is specified, then the criterion prefixed [affected]
397 is not enforced.
398
399 ----
400
401 *X-linked de-novo mutations*
402
403 Criteria:
404
405 - affected female child must be het
406 - affected male child must be hom_alt (or het)
407 - parents should be unaffected and hom_ref
408
409 Note: Pseudo-autosomal regions are not accounted for by the tool.
410
411 ----
412
413 *Compound heterozygosity*
414
415 Unlike canonical recessive sites where the same recessive allele is inherited
416 from both parents at the *same* site in the gene, compound heterozygosity
417 occurs when the individual’s phenotype is caused by two heterozygous recessive
418 alleles at *different* sites in a particular gene.
419
420 To detect compound heterozygosity, the tool looks for two heterozygous variants
421 impacting the same gene at different loci. The complicating factor is that this
422 is a case of *recessive* inheritance and as such, we must also require that the
423 consequential alleles at each heterozygous site were inherited on different
424 chromosomes (one from each parent). Hence, where possible, the tool will phase
425 by transmission.
426
427 Criteria (default):
428
429 - All affected individuals must be heterozygous at both sites.
430 - No unaffected can be homozygous alterate at either site.
431 - Neither parent of an affected sample can be homozygous reference at both
432 sites.
433 - If any unphased-unaffected is het at both sites, the site will be given lower
434 priority.
435 - No phased-unaffected can be heterozygous at both sites.
436
437 a. ``--allow-unaffected`` keeps sites where a phased unaffected shares the
438 het-pair
439 b. unphased, unaffected that share the het pair are counted and reported for
440 each candidate pair.
441 - Candidates where an affected from the same family does NOT share the same het
442 pair are removed.
443 - Sites are automatically phased by transmission when parents are present in
444 order to remove false positive candidates.
445
446 If data from one or both parents are unavailable and the child’s data was not
447 phased prior to loading into GEMINI, all comp_het variant pairs will
448 automatically be given at most priority == 2. If there’s only a single parent
449 and both the parent and the affected are HET at both sites, the candidate
450 will have priority 3.
451
452 Criteria (``--pattern-only``):
453
454 - Kid must be HET at both sites.
455 - Kid must have alts on different chromosomes.
456 - Neither parent can be HOM_ALT at either site.
457 - If either parent is phased at both sites and matches the kid, it’s excluded.
458 - When the above criteria are met, and both parents and kid are phased or
459 parents are HET at different sites, the priority is 1.
460 - If either parent is HET at both sites, priority is reduced.
461 - If both parents are not phased, the priority is 2.
462 - For every parent that’s a het at both sites, the priority is incremented by 1.
463 - The priority in a family is the minimum found among all kids.
464
465 ----
466
467 *Violation of Mendelian laws*
468
469 The tool can be used to detect the following kinds of non-Mendelian patterns:
470
471 - loss of heterozygosity (LOH) events
472 - de-novo mutations
473 - implausible de-novo mutations
474 - potential cases of uniparental disomy
475
476 Criteria:
477
478 - LOH: child and one parent are opposite homozygotes; other parent is HET
479 - plausible de novo: kid is het. parents are same homozygotes
480 - implausible de novo: kid is homozygote. parents are same homozygotes and opposite to kid.
481 - uniparental disomy: parents are opposite homozygotes; kid is homozygote
482
483 ]]>
484 </help>
485 <expand macro="citations"/>
486 </tool>