comparison gemini_gene_wise.xml @ 5:4b26f6c99227 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 62ed732cba355e695181924a8ed4cce49ca21c59
author iuc
date Fri, 11 Jan 2019 17:48:39 -0500
parents a26f0a30df65
children e57a1b0ac6be
comparison
equal deleted inserted replaced
4:a26f0a30df65 5:4b26f6c99227
1 <tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.1"> 1 <tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@">
2 <description>Custom genotype filtering by gene</description> 2 <description>Discover per-gene variant patterns across families</description>
3 <macros> 3 <macros>
4 <import>gemini_macros.xml</import> 4 <import>gemini_macros.xml</import>
5 <token name="@BINARY@">gene_wise</token> 5 <token name="@BINARY@">gene_wise</token>
6 </macros> 6 </macros>
7 <expand macro="requirements" /> 7 <expand macro="requirements" />
9 <expand macro="version_command" /> 9 <expand macro="version_command" />
10 <command> 10 <command>
11 <![CDATA[ 11 <![CDATA[
12 gemini @BINARY@ 12 gemini @BINARY@
13 13
14
15 #if int($min_filters) > 0: 14 #if int($min_filters) > 0:
16 --min_filters $min_filters 15 --min-filters $min_filters
17 #end if 16 #end if
18 17
19 #set $multiline_sql_expr = $gt_filter 18 #for $filter in $filter_by_genotype:
20 #set $cmdln_param = "--gt-filter" 19 #set $multiline_sql_expr = str($filter.gt_filter)
21 @MULTILN_SQL_EXPR_TO_CMDLN@ 20 #if $filter.is_required:
21 #set $cmdln_param = "--gt-filter-required"
22 #else:
23 #set $cmdln_param = "--gt-filter"
24 #end if
25 @MULTILN_SQL_EXPR_TO_CMDLN@
26 #end for
22 27
28 #set $report = $oformat.report
23 @COLUMN_SELECT@ 29 @COLUMN_SELECT@
24 30
25 @CMDLN_SQL_FILTER_FILTER_OPTION@ 31 #set $where_clause_elements = []
32 #set $filter_cmdln_param = '--filter'
33 #for $cond in $constraint:
34 #if str($cond.filter).strip():
35 #silent $where_clause_elements.append(str($cond.filter).strip())
36 #if $cond.overwrite_default_filter:
37 #set $filter_cmdln_param = '--where'
38 #end if
39 #end if
40 #end for
26 41
27 "${ infile }" 42 @PARSE_REGION_ELEMENTS@
28 > "${ outfile }" 43 #if $region_elements:
44 #silent $where_clause_elements.append(" OR ".join($region_elements))
45 #end if
46 #set $filter = " AND ".join($where_clause_elements)
47 #if str($filter):
48 $filter_cmdln_param '$filter'
49 #end if
50
51 '$infile'
52 > '$outfile'
29 ]]> 53 ]]>
30 </command> 54 </command>
31 <inputs> 55 <inputs>
32 <expand macro="infile" /> 56 <expand macro="infile" />
33 57 <expand macro="gt_filter" default_repeat="1" min_repeat="1" max_repeat="999">
34 <param name="gt_filter" type="text" area="True" size="5x50" label="Restrictions to apply to genotype values" help="(--gt-filter)"> 58 <param name="is_required" type="boolean" checked="False"
35 <expand macro="sanitize_query" /> 59 label="Make this an obligate filter that a variant has to pass to be considered"
36 </param> 60 help="By default, a variant has to pass a minimum number of genotype filters (set below) to get reported. By making a filter required, you ensure that variants that fail this one filter are always excluded. Required filters that a variant passes do not count towards its number of passed (regular) filters" />
37 <param name="min_filters" type="integer" value="0" min="0" label="Minimum number of filters" help="(--min-filters)" /> 61 </expand>
38 <expand macro="column_filter" /> 62 <param name="min_filters" type="integer" value="1" min="1" label="Minimum number of filters" help="(--min-filters)" />
39 <expand macro="filter" /> 63 <expand macro="region_filter" />
40 64 <expand macro="insert_constraint">
41 65 <expand macro="overwritable_where_default" default_where="exonic, high impact variants (SQL clause: is_exonic = 1 and impact_severity != 'LOW')" />
66 </expand>
67 <section name="oformat" title="Output - included information" expanded="true">
68 <expand macro="column_filter" />
69 </section>
42 </inputs> 70 </inputs>
43 <outputs> 71 <outputs>
44 <data name="outfile" format="tabular" /> 72 <data name="outfile" format="tabular" />
45 </outputs> 73 </outputs>
46 <tests> 74 <tests>
47 <test> 75 <test>
48 <param name="infile" value="gemini_amend_input.db" ftype="gemini.sqlite" /> 76 <param name="infile" value="gemini_amend_input.db" ftype="gemini.sqlite" />
49 <param name="gt_filter" value="((gt_depths).(*).(>=1).(all))" /> 77 <repeat name="filter_by_genotype">
78 <param name="gt_filter" value="((gt_depths).(*).(>=1).(all))" />
79 </repeat>
50 <output name="outfile"> 80 <output name="outfile">
51 <assert_contents> 81 <assert_contents>
52 <has_line_matching expression="chrom&#009;start&#009;end&#009;gene.*" /> 82 <has_line_matching expression="variant_id&#009;gene.*" />
53 </assert_contents> 83 </assert_contents>
54 </output> 84 </output>
55 </test> 85 </test>
56 </tests> 86 </tests>
57 <help> 87 <help>
58 <![CDATA[ 88 <![CDATA[
59 **What it does** 89 **What it does**
60 90
61 The gemini query tool allows querying by variant and the inheritance tools described above enable querying by gene for fixed inheritance patterns. The gene_wise tool allows querying by gene with custom genotype filters to bridge the gap between these tools. 91 This tool extends the *GEMINI inheritance pattern* tool in that it lets you
92 search for custom gene-wise inheritance patterns of variants, instead of fixed
93 ones.
94
95 See also: the `command line tool documentation
96 <https://gemini.readthedocs.io/en/latest/content/tools.html#gene-wise-custom-genotype-filtering-by-gene>`__
97
98 -----
99
100 *Genotype filters*
101
102 The syntax for specifying a genotype filter (``--gt-filter`` command line
103 option) is the same as for the *GEMINI query* tool and is described `here
104 <https://gemini.readthedocs.io/en/latest/content/querying.html#gt-filter-filtering-on-genotypes>`__.
105
106 The difference with the *gene_wise* tool is that it lets you specify multiple
107 such filters and, if you do, every filter can be met by a **different variant**
108 as long as all of them are in the **same gene**.
109
110 This is useful if your analysis includes several families that you suspect
111 (based on a shared phenotype) to have the same gene affected, but not
112 necessarily through the same variant. In this case, you can formulate one filter
113 per family like, for example::
114
115 gt_types.fam1_kid == HET and gt_types.fam1_mom == HOM_REF and gt_types.fam1_dad == HOM_REF
116
117 gt_types.fam2_kid == HET
118
119 gt_types.fam3_kid == HET
120
121 , which would allow you to find a causal gene that's affected by different
122 (dominant) variants in children from three different families. Note that the
123 first filter combines three conditions applied to family 1, which, thus, must
124 be met by the same variant site.
125
126 *Regular and required filters* (``--gt-filter`` *vs* ``--gt-filter-required``)
127 and the *Minimum number of filters*
128
129 For every single genotype filter you define you can specify whether it should
130 be applied as a regular or as a required filter. The difference is that, if a
131 variant doesn't pass a required filter it is excluded from further analysis.
132 Of the regular filters, a gene and its variants only have to pass a threshold
133 number defined by *Minimum number of filters* (``--min-filters``). Imagine,
134 with the above filters you had specified ``--min-filters`` as ``2``, then a
135 gene for which the child in family 3 carries one copy of a variant allele and
136 the child in family 3 carries a copy of a different allele would be reported
137 no matter if any other allele in that gene passes the first filter, *etc.*.
138
139 -----
140
141 *Region filters*
142
143 They let you restrict your analysis to parts of the genome, which can be useful
144 if you have prior knowledge of the approximate location of the causative gene.
145
146 If you specify more then one region filter, they get combined with a logical
147 *OR*, meaning variants and genes falling in *any* of the regions are reported.
148
149 -----
150
151 *Additional constraints on variants*
152
153 These get translated directly into the WHERE clause of an SQL query and, thus,
154 have to be expressed in valid SQL syntax. Of particular interest, here, is the
155 fact that, by default, the *gene-wise* tool applies the WHERE clause:
156 ``is_exonic = 1 and impact_severity != 'LOW'``, which means the tool only
157 considers variants in exons that are not of *LOW* impact severity (*i.e.*, not
158 silent mutations). While this can be a good and biologically justifiable
159 setting, you can overwrite it if you need.
160
161 Note that in SQL syntax tests for equality use a single ``=``, while genotype
162 filters (discussed above) are following Python syntax and use ``==`` for the
163 same purpose. Also note that non-numerical values need to be enclosed in
164 single-quotes, *e.g.* ``'LOW'``, but numerical values must *NOT* be.
165
62 ]]> 166 ]]>
63 </help> 167 </help>
64 <expand macro="citations"/> 168 <expand macro="citations"/>
65 </tool> 169 </tool>