Mercurial > repos > iuc > gemini_gene_wise
comparison gemini_gene_wise.xml @ 5:4b26f6c99227 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 62ed732cba355e695181924a8ed4cce49ca21c59
author | iuc |
---|---|
date | Fri, 11 Jan 2019 17:48:39 -0500 |
parents | a26f0a30df65 |
children | e57a1b0ac6be |
comparison
equal
deleted
inserted
replaced
4:a26f0a30df65 | 5:4b26f6c99227 |
---|---|
1 <tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.1"> | 1 <tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@"> |
2 <description>Custom genotype filtering by gene</description> | 2 <description>Discover per-gene variant patterns across families</description> |
3 <macros> | 3 <macros> |
4 <import>gemini_macros.xml</import> | 4 <import>gemini_macros.xml</import> |
5 <token name="@BINARY@">gene_wise</token> | 5 <token name="@BINARY@">gene_wise</token> |
6 </macros> | 6 </macros> |
7 <expand macro="requirements" /> | 7 <expand macro="requirements" /> |
9 <expand macro="version_command" /> | 9 <expand macro="version_command" /> |
10 <command> | 10 <command> |
11 <![CDATA[ | 11 <![CDATA[ |
12 gemini @BINARY@ | 12 gemini @BINARY@ |
13 | 13 |
14 | |
15 #if int($min_filters) > 0: | 14 #if int($min_filters) > 0: |
16 --min_filters $min_filters | 15 --min-filters $min_filters |
17 #end if | 16 #end if |
18 | 17 |
19 #set $multiline_sql_expr = $gt_filter | 18 #for $filter in $filter_by_genotype: |
20 #set $cmdln_param = "--gt-filter" | 19 #set $multiline_sql_expr = str($filter.gt_filter) |
21 @MULTILN_SQL_EXPR_TO_CMDLN@ | 20 #if $filter.is_required: |
21 #set $cmdln_param = "--gt-filter-required" | |
22 #else: | |
23 #set $cmdln_param = "--gt-filter" | |
24 #end if | |
25 @MULTILN_SQL_EXPR_TO_CMDLN@ | |
26 #end for | |
22 | 27 |
28 #set $report = $oformat.report | |
23 @COLUMN_SELECT@ | 29 @COLUMN_SELECT@ |
24 | 30 |
25 @CMDLN_SQL_FILTER_FILTER_OPTION@ | 31 #set $where_clause_elements = [] |
32 #set $filter_cmdln_param = '--filter' | |
33 #for $cond in $constraint: | |
34 #if str($cond.filter).strip(): | |
35 #silent $where_clause_elements.append(str($cond.filter).strip()) | |
36 #if $cond.overwrite_default_filter: | |
37 #set $filter_cmdln_param = '--where' | |
38 #end if | |
39 #end if | |
40 #end for | |
26 | 41 |
27 "${ infile }" | 42 @PARSE_REGION_ELEMENTS@ |
28 > "${ outfile }" | 43 #if $region_elements: |
44 #silent $where_clause_elements.append(" OR ".join($region_elements)) | |
45 #end if | |
46 #set $filter = " AND ".join($where_clause_elements) | |
47 #if str($filter): | |
48 $filter_cmdln_param '$filter' | |
49 #end if | |
50 | |
51 '$infile' | |
52 > '$outfile' | |
29 ]]> | 53 ]]> |
30 </command> | 54 </command> |
31 <inputs> | 55 <inputs> |
32 <expand macro="infile" /> | 56 <expand macro="infile" /> |
33 | 57 <expand macro="gt_filter" default_repeat="1" min_repeat="1" max_repeat="999"> |
34 <param name="gt_filter" type="text" area="True" size="5x50" label="Restrictions to apply to genotype values" help="(--gt-filter)"> | 58 <param name="is_required" type="boolean" checked="False" |
35 <expand macro="sanitize_query" /> | 59 label="Make this an obligate filter that a variant has to pass to be considered" |
36 </param> | 60 help="By default, a variant has to pass a minimum number of genotype filters (set below) to get reported. By making a filter required, you ensure that variants that fail this one filter are always excluded. Required filters that a variant passes do not count towards its number of passed (regular) filters" /> |
37 <param name="min_filters" type="integer" value="0" min="0" label="Minimum number of filters" help="(--min-filters)" /> | 61 </expand> |
38 <expand macro="column_filter" /> | 62 <param name="min_filters" type="integer" value="1" min="1" label="Minimum number of filters" help="(--min-filters)" /> |
39 <expand macro="filter" /> | 63 <expand macro="region_filter" /> |
40 | 64 <expand macro="insert_constraint"> |
41 | 65 <expand macro="overwritable_where_default" default_where="exonic, high impact variants (SQL clause: is_exonic = 1 and impact_severity != 'LOW')" /> |
66 </expand> | |
67 <section name="oformat" title="Output - included information" expanded="true"> | |
68 <expand macro="column_filter" /> | |
69 </section> | |
42 </inputs> | 70 </inputs> |
43 <outputs> | 71 <outputs> |
44 <data name="outfile" format="tabular" /> | 72 <data name="outfile" format="tabular" /> |
45 </outputs> | 73 </outputs> |
46 <tests> | 74 <tests> |
47 <test> | 75 <test> |
48 <param name="infile" value="gemini_amend_input.db" ftype="gemini.sqlite" /> | 76 <param name="infile" value="gemini_amend_input.db" ftype="gemini.sqlite" /> |
49 <param name="gt_filter" value="((gt_depths).(*).(>=1).(all))" /> | 77 <repeat name="filter_by_genotype"> |
78 <param name="gt_filter" value="((gt_depths).(*).(>=1).(all))" /> | |
79 </repeat> | |
50 <output name="outfile"> | 80 <output name="outfile"> |
51 <assert_contents> | 81 <assert_contents> |
52 <has_line_matching expression="chrom	start	end	gene.*" /> | 82 <has_line_matching expression="variant_id	gene.*" /> |
53 </assert_contents> | 83 </assert_contents> |
54 </output> | 84 </output> |
55 </test> | 85 </test> |
56 </tests> | 86 </tests> |
57 <help> | 87 <help> |
58 <![CDATA[ | 88 <![CDATA[ |
59 **What it does** | 89 **What it does** |
60 | 90 |
61 The gemini query tool allows querying by variant and the inheritance tools described above enable querying by gene for fixed inheritance patterns. The gene_wise tool allows querying by gene with custom genotype filters to bridge the gap between these tools. | 91 This tool extends the *GEMINI inheritance pattern* tool in that it lets you |
92 search for custom gene-wise inheritance patterns of variants, instead of fixed | |
93 ones. | |
94 | |
95 See also: the `command line tool documentation | |
96 <https://gemini.readthedocs.io/en/latest/content/tools.html#gene-wise-custom-genotype-filtering-by-gene>`__ | |
97 | |
98 ----- | |
99 | |
100 *Genotype filters* | |
101 | |
102 The syntax for specifying a genotype filter (``--gt-filter`` command line | |
103 option) is the same as for the *GEMINI query* tool and is described `here | |
104 <https://gemini.readthedocs.io/en/latest/content/querying.html#gt-filter-filtering-on-genotypes>`__. | |
105 | |
106 The difference with the *gene_wise* tool is that it lets you specify multiple | |
107 such filters and, if you do, every filter can be met by a **different variant** | |
108 as long as all of them are in the **same gene**. | |
109 | |
110 This is useful if your analysis includes several families that you suspect | |
111 (based on a shared phenotype) to have the same gene affected, but not | |
112 necessarily through the same variant. In this case, you can formulate one filter | |
113 per family like, for example:: | |
114 | |
115 gt_types.fam1_kid == HET and gt_types.fam1_mom == HOM_REF and gt_types.fam1_dad == HOM_REF | |
116 | |
117 gt_types.fam2_kid == HET | |
118 | |
119 gt_types.fam3_kid == HET | |
120 | |
121 , which would allow you to find a causal gene that's affected by different | |
122 (dominant) variants in children from three different families. Note that the | |
123 first filter combines three conditions applied to family 1, which, thus, must | |
124 be met by the same variant site. | |
125 | |
126 *Regular and required filters* (``--gt-filter`` *vs* ``--gt-filter-required``) | |
127 and the *Minimum number of filters* | |
128 | |
129 For every single genotype filter you define you can specify whether it should | |
130 be applied as a regular or as a required filter. The difference is that, if a | |
131 variant doesn't pass a required filter it is excluded from further analysis. | |
132 Of the regular filters, a gene and its variants only have to pass a threshold | |
133 number defined by *Minimum number of filters* (``--min-filters``). Imagine, | |
134 with the above filters you had specified ``--min-filters`` as ``2``, then a | |
135 gene for which the child in family 3 carries one copy of a variant allele and | |
136 the child in family 3 carries a copy of a different allele would be reported | |
137 no matter if any other allele in that gene passes the first filter, *etc.*. | |
138 | |
139 ----- | |
140 | |
141 *Region filters* | |
142 | |
143 They let you restrict your analysis to parts of the genome, which can be useful | |
144 if you have prior knowledge of the approximate location of the causative gene. | |
145 | |
146 If you specify more then one region filter, they get combined with a logical | |
147 *OR*, meaning variants and genes falling in *any* of the regions are reported. | |
148 | |
149 ----- | |
150 | |
151 *Additional constraints on variants* | |
152 | |
153 These get translated directly into the WHERE clause of an SQL query and, thus, | |
154 have to be expressed in valid SQL syntax. Of particular interest, here, is the | |
155 fact that, by default, the *gene-wise* tool applies the WHERE clause: | |
156 ``is_exonic = 1 and impact_severity != 'LOW'``, which means the tool only | |
157 considers variants in exons that are not of *LOW* impact severity (*i.e.*, not | |
158 silent mutations). While this can be a good and biologically justifiable | |
159 setting, you can overwrite it if you need. | |
160 | |
161 Note that in SQL syntax tests for equality use a single ``=``, while genotype | |
162 filters (discussed above) are following Python syntax and use ``==`` for the | |
163 same purpose. Also note that non-numerical values need to be enclosed in | |
164 single-quotes, *e.g.* ``'LOW'``, but numerical values must *NOT* be. | |
165 | |
62 ]]> | 166 ]]> |
63 </help> | 167 </help> |
64 <expand macro="citations"/> | 168 <expand macro="citations"/> |
65 </tool> | 169 </tool> |