Mercurial > repos > iuc > gatk2
comparison variant_select.xml @ 6:35c00763cb5c draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gatk2 commit cf399638ebca4250bcc15f468238a9964de97b33
author | iuc |
---|---|
date | Mon, 04 Jun 2018 05:38:15 -0400 |
parents | f244b8209eb8 |
children |
comparison
equal
deleted
inserted
replaced
5:84584664264c | 6:35c00763cb5c |
---|---|
1 <tool id="gatk2_variant_select" name="Select Variants" version="@VERSION@.0"> | 1 <tool id="gatk2_variant_select" name="Select Variants" version="@VERSION@.2"> |
2 <description>from VCF files</description> | 2 <description>from VCF files</description> |
3 <expand macro="requirements" /> | |
4 <macros> | 3 <macros> |
5 <import>gatk2_macros.xml</import> | 4 <import>gatk2_macros.xml</import> |
6 </macros> | 5 </macros> |
6 <expand macro="requirements" /> | |
7 <expand macro="version_command" /> | |
7 <command interpreter="python"> | 8 <command interpreter="python"> |
8 #from binascii import hexlify | 9 #from binascii import hexlify |
9 | 10 |
10 gatk2_wrapper.py | 11 gatk2_wrapper.py |
11 --stdout "${output_log}" | 12 --stdout "${output_log}" |
15 -T "SelectVariants" | 16 -T "SelectVariants" |
16 \$GATK2_SITE_OPTIONS | 17 \$GATK2_SITE_OPTIONS |
17 | 18 |
18 @THREADS@ | 19 @THREADS@ |
19 -o "${output_vcf}" | 20 -o "${output_vcf}" |
20 | 21 |
21 #if $reference_source.reference_source_selector != "history": | 22 #if $reference_source.reference_source_selector != "history": |
22 -R "${reference_source.ref_file.fields.path}" | 23 -R "${reference_source.ref_file.fields.path}" |
23 #end if | 24 #end if |
24 ' | 25 ' |
25 -p ' | 26 -p ' |
27 --concordance "${input_concordance}" | 28 --concordance "${input_concordance}" |
28 #end if | 29 #end if |
29 #if $input_discordance: | 30 #if $input_discordance: |
30 --discordance "${input_discordance}" | 31 --discordance "${input_discordance}" |
31 #end if | 32 #end if |
32 | 33 |
33 #for $exclude_sample_name in $exclude_sample_name_repeat: | 34 #for $exclude_sample_name in $exclude_sample_name_repeat: |
34 --exclude_sample_name "${exclude_sample_name.exclude_sample_name}" | 35 --exclude_sample_name "${exclude_sample_name.exclude_sample_name}" |
35 #end for | 36 #end for |
36 | 37 |
37 ${exclude_filtered} | 38 ${exclude_filtered} |
38 | 39 |
39 #for $sample_name in $sample_name_repeat: | 40 #for $sample_name in $sample_name_repeat: |
40 --sample_name "${sample_name.sample_name}" | 41 --sample_name "${sample_name.sample_name}" |
41 #end for | 42 #end for |
42 ' | 43 ' |
43 | 44 |
44 #for $select_expressions in $select_expressions_repeat: | 45 #for $select_expressions in $select_expressions_repeat: |
45 #set $select_expression = "--select_expressions '%s'" % ( str( $select_expressions.select_expressions ) ) | 46 #set $select_expression = "--select_expressions '%s'" % ( str( $select_expressions.select_expressions ) ) |
46 -o '${ hexlify( $select_expression ) }' | 47 -o '${ hexlify( $select_expression ) }' |
47 #end for | 48 #end for |
48 | 49 |
49 ##start tool specific options | 50 ##start tool specific options |
50 #if str( $analysis_param_type.analysis_param_type_selector ) == 'advanced': | 51 #if str( $analysis_param_type.analysis_param_type_selector ) == 'advanced': |
51 -p ' | 52 -p ' |
52 #for $exclude_sample_file in $analysis_param_type.exclude_sample_file_repeat: | 53 #for $esf in $analysis_param_type.exclude_sample_file: |
53 --exclude_sample_file "${exclude_sample_file.exclude_sample_file}" | 54 --exclude_sample_file "${esf}" |
54 #end for | 55 #end for |
55 | 56 |
56 #for $sample_file in $analysis_param_type.sample_file_repeat: | 57 #for $sf in $analysis_param_type.sample_file: |
57 --sample_file "${ample_file.sample_file}" | 58 --sample_file "${sf}" |
58 #end for | 59 #end for |
59 | 60 |
60 #if $analysis_param_type.input_keep_ids: | 61 #if $analysis_param_type.input_keep_ids: |
61 --keepIDs "${analysis_param_type.input_keep_ids}" | 62 --keepIDs "${analysis_param_type.input_keep_ids}" |
62 #end if | 63 #end if |
63 | 64 |
64 ${analysis_param_type.keep_original_AC} | 65 ${analysis_param_type.keep_original_AC} |
65 | 66 |
66 ${analysis_param_type.mendelian_violation} | 67 ${analysis_param_type.mendelian_violation} |
67 | 68 |
68 --mendelianViolationQualThreshold "${analysis_param_type.mendelian_violation_qual_threshold}" | 69 --mendelianViolationQualThreshold "${analysis_param_type.mendelian_violation_qual_threshold}" |
69 | 70 |
70 --remove_fraction_genotypes "${analysis_param_type.remove_fraction_genotypes}" | 71 --remove_fraction_genotypes "${analysis_param_type.remove_fraction_genotypes}" |
71 | 72 |
72 --restrictAllelesTo "${analysis_param_type.restrict_alleles_to}" | 73 --restrictAllelesTo "${analysis_param_type.restrict_alleles_to}" |
73 | 74 |
74 #if str( $analysis_param_type.select_random_type.select_random_type_selector ) == 'select_random_fraction': | 75 #if str( $analysis_param_type.select_random_type.select_random_type_selector ) == 'select_random_fraction': |
75 --select_random_fraction "${analysis_param_type.select_random_type.select_random_fraction}" | 76 --select_random_fraction "${analysis_param_type.select_random_type.select_random_fraction}" |
76 #elif str( $analysis_param_type.select_random_type.select_random_type_selector ) == 'select_random_number': | 77 #elif str( $analysis_param_type.select_random_type.select_random_type_selector ) == 'select_random_number': |
77 --select_random_number "${analysis_param_type.select_random_type.select_random_number}" | 78 --select_random_number "${analysis_param_type.select_random_type.select_random_number}" |
78 #end if | 79 #end if |
79 | 80 |
80 #if $analysis_param_type.select_type_to_include: | 81 #if $analysis_param_type.select_type_to_include: |
81 #for $type_to_include in str( $analysis_param_type.select_type_to_include ).split( ',' ): | 82 #for $type_to_include in str( $analysis_param_type.select_type_to_include ).split( ',' ): |
82 --selectTypeToInclude "${type_to_include}" | 83 --selectTypeToInclude "${type_to_include}" |
83 #end for | 84 #end for |
84 #end if | 85 #end if |
85 | 86 |
86 ${analysis_param_type.exclude_non_variants} | 87 ${analysis_param_type.exclude_non_variants} |
87 ' | 88 ' |
88 | 89 |
89 #for $sample_expressions in $analysis_param_type.sample_expressions_repeat: | 90 #for $sample_expressions in $analysis_param_type.sample_expressions_repeat: |
90 #set $sample_expression = "--sample_expressions '%s'" % ( str( $sample_expressions.sample_expressions ) ) | 91 #set $sample_expression = "--sample_expressions '%s'" % ( str( $sample_expressions.sample_expressions ) ) |
91 -o '${ hexlify( $sample_expression ) }' | 92 -o '${ hexlify( $sample_expression ) }' |
92 #end for | 93 #end for |
93 | 94 |
94 #end if | 95 #end if |
95 ##end tool specific options | 96 ##end tool specific options |
96 | 97 |
97 #include source=$standard_gatk_options# | 98 #include source=$standard_gatk_options# |
98 | |
99 | |
100 </command> | 99 </command> |
101 <inputs> | 100 <inputs> |
102 <conditional name="reference_source"> | 101 <conditional name="reference_source"> |
103 <expand macro="reference_source_selector_param" /> | 102 <expand macro="reference_source_selector_param" /> |
104 <when value="cached"> | 103 <when value="cached"> |
113 <when value="history"> <!-- FIX ME!!!! --> | 112 <when value="history"> <!-- FIX ME!!!! --> |
114 <param name="input_variant" type="data" format="vcf" label="Variant file to select" help="-V,--variant &lt;variant&gt;" /> | 113 <param name="input_variant" type="data" format="vcf" label="Variant file to select" help="-V,--variant &lt;variant&gt;" /> |
115 <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &lt;reference_sequence&gt;" /> | 114 <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &lt;reference_sequence&gt;" /> |
116 </when> | 115 </when> |
117 </conditional> | 116 </conditional> |
118 | 117 |
119 <repeat name="select_expressions_repeat" title="Criteria to use when selecting the data" help="-select,--select_expressions &lt;select_expressions&gt;"> | 118 <repeat name="select_expressions_repeat" title="Criteria to use when selecting the data" help="-select,--select_expressions &lt;select_expressions&gt;"> |
120 <param name="select_expressions" type="text" label="JEXL expression"> | 119 <param name="select_expressions" type="text" label="JEXL expression"> |
121 <sanitizer> | 120 <sanitizer> |
122 <valid initial="string.printable"> | 121 <valid initial="string.printable"> |
123 <remove value="'"/> | 122 <remove value="'"/> |
124 </valid> | 123 </valid> |
125 <mapping initial="none"/> | 124 <mapping initial="none"/> |
126 </sanitizer> | 125 </sanitizer> |
127 </param> | 126 </param> |
128 </repeat> | 127 </repeat> |
129 | 128 |
130 <param name="input_concordance" type="data" format="vcf" label="Output variants that were also called in this comparison track" optional="True" help="-conc,--concordance &lt;concordance&gt;"/> | 129 <param name="input_concordance" type="data" format="vcf" label="Output variants that were also called in this comparison track" optional="True" help="-conc,--concordance &lt;concordance&gt;"/> |
131 <param name="input_discordance" type="data" format="vcf" label="Output variants that were not called in this comparison track" optional="True" help="-disc,--discordance &lt;discordance&gt;"/> | 130 <param name="input_discordance" type="data" format="vcf" label="Output variants that were not called in this comparison track" optional="True" help="-disc,--discordance &lt;discordance&gt;"/> |
132 | 131 |
133 <repeat name="sample_name_repeat" title="Include Samples by name" help="-sn,--sample_name &lt;sample_name&gt;"> | 132 <repeat name="sample_name_repeat" title="Include Samples by name" help="-sn,--sample_name &lt;sample_name&gt;"> |
134 <param name="sample_name" type="text" label="Include genotypes from this sample"/> | 133 <param name="sample_name" type="text" label="Include genotypes from this sample"/> |
135 </repeat> | 134 </repeat> |
136 | 135 |
137 <repeat name="exclude_sample_name_repeat" title="Exclude Samples by name" help="-xl_sn,--exclude_sample_name &lt;exclude_sample_name&gt;"> | 136 <repeat name="exclude_sample_name_repeat" title="Exclude Samples by name" help="-xl_sn,--exclude_sample_name &lt;exclude_sample_name&gt;"> |
138 <param name="exclude_sample_name" type="text" label="Exclude genotypes from this sample"/> | 137 <param name="exclude_sample_name" type="text" label="Exclude genotypes from this sample"/> |
139 </repeat> | 138 </repeat> |
140 | 139 |
141 <param name="exclude_filtered" type="boolean" truevalue="--excludeFiltered" falsevalue="" label="Don't include filtered loci in the analysis" help="-ef,--excludeFiltered" /> | 140 <param name="exclude_filtered" type="boolean" truevalue="--excludeFiltered" falsevalue="" label="Don't include filtered loci in the analysis" help="-ef,--excludeFiltered" /> |
142 | 141 |
143 <expand macro="gatk_param_type_conditional" /> | 142 <expand macro="gatk_param_type_conditional" /> |
144 | 143 |
145 | |
146 <expand macro="analysis_type_conditional"> | 144 <expand macro="analysis_type_conditional"> |
147 | 145 |
148 <repeat name="exclude_sample_file_repeat" title="Exclude Samples by file" help="-xl_sf,--exclude_sample_file &lt;exclude_sample_file&gt;"> | 146 <param name="exclude_sample_file" type="data" format="txt" multiple="True" label="Exclude Samples by file" help="File containing a list of samples (one per line) to exclude (-xl_sf,--exclude_sample_file &lt;exclude_sample_file&gt;)"/> |
149 <param name="exclude_sample_file" type="data" format="txt" label="File containing a list of samples (one per line) to exclude"/> | 147 |
150 </repeat> | 148 <param name="sample_file" type="data" format="txt" multiple="True" label="Samples by file" help="File containing a list of samples (one per line) to include (-sf,--sample_file &lt;sample_file&gt;)"/> |
151 | 149 |
152 <repeat name="sample_file_repeat" title="Samples by file" help="-sf,--sample_file &lt;sample_file&gt;"> | 150 <param name="input_keep_ids" type="data" format="txt" label="Only emit sites whose ID is found in this file" optional="True" help="-IDs,--keepIDs &lt;keepIDs&gt;"/> |
153 <param name="sample_file" type="data" format="txt" label="File containing a list of samples (one per line) to include" /> | 151 |
154 </repeat> | |
155 | |
156 <param name="input_keep_ids" type="data" format="text" label="Only emit sites whose ID is found in this file" optional="True" help="-IDs,--keepIDs &lt;keepIDs&gt;"/> | |
157 | |
158 <param name="keep_original_AC" type="boolean" truevalue="--keepOriginalAC" falsevalue="" label="Don't update the AC, AF, or AN values in the INFO field after selecting" help="-keepOriginalAC,--keepOriginalAC" /> | 152 <param name="keep_original_AC" type="boolean" truevalue="--keepOriginalAC" falsevalue="" label="Don't update the AC, AF, or AN values in the INFO field after selecting" help="-keepOriginalAC,--keepOriginalAC" /> |
159 | 153 |
160 <param name="mendelian_violation" type="boolean" truevalue="--mendelianViolation" falsevalue="" label="output mendelian violation sites only" help="-mv,--mendelianViolation" /> | 154 <param name="mendelian_violation" type="boolean" truevalue="--mendelianViolation" falsevalue="" label="output mendelian violation sites only" help="-mv,--mendelianViolation" /> |
161 | 155 |
162 <param name="mendelian_violation_qual_threshold" type="float" label="Minimum genotype QUAL score for each trio member required to accept a site as a mendelian violation" value="0" help="-mvq,--mendelianViolationQualThreshold &lt;mendelianViolationQualThreshold&gt;" /> | 156 <param name="mendelian_violation_qual_threshold" type="float" label="Minimum genotype QUAL score for each trio member required to accept a site as a mendelian violation" value="0" help="-mvq,--mendelianViolationQualThreshold &lt;mendelianViolationQualThreshold&gt;" /> |
163 | 157 |
164 <param name="remove_fraction_genotypes" type="float" label="Selects a fraction (a number between 0 and 1) of the total genotypes at random from the variant track and sets them to nocall" value="0" min="0" max="1" help="-fractionGenotypes,--remove_fraction_genotypes &lt;remove_fraction_genotypes&gt;" /> | 158 <param name="remove_fraction_genotypes" type="float" label="Selects a fraction (a number between 0 and 1) of the total genotypes at random from the variant track and sets them to nocall" value="0" min="0" max="1" help="-fractionGenotypes,--remove_fraction_genotypes &lt;remove_fraction_genotypes&gt;" /> |
165 | 159 |
166 <param name="restrict_alleles_to" type="select" label="Select only variants of a particular allelicity" help="-restrictAllelesTo,--restrictAllelesTo &lt;restrictAllelesTo&gt;"> | 160 <param name="restrict_alleles_to" type="select" label="Select only variants of a particular allelicity" help="-restrictAllelesTo,--restrictAllelesTo &lt;restrictAllelesTo&gt;"> |
167 <option value="ALL" selected="True">ALL</option> | 161 <option value="ALL" selected="True">ALL</option> |
168 <option value="MULTIALLELIC">MULTIALLELIC</option> | 162 <option value="MULTIALLELIC">MULTIALLELIC</option> |
169 <option value="BIALLELIC">BIALLELIC</option> | 163 <option value="BIALLELIC">BIALLELIC</option> |
170 </param> | 164 </param> |
171 | 165 |
172 <repeat name="sample_expressions_repeat" title="Regular expression to select many samples from the ROD tracks provided" help="-se,--sample_expressions &lt;sample_expressions&gt;"> | 166 <repeat name="sample_expressions_repeat" title="Regular expression to select many samples from the ROD tracks provided" help="-se,--sample_expressions &lt;sample_expressions&gt;"> |
173 <param name="sample_expressions" type="text" label="Regular expression"> | 167 <param name="sample_expressions" type="text" label="Regular expression"> |
174 <sanitizer> | 168 <sanitizer> |
175 <valid initial="string.printable"> | 169 <valid initial="string.printable"> |
176 <remove value="'"/> | 170 <remove value="'"/> |
177 </valid> | 171 </valid> |
178 <mapping initial="none"/> | 172 <mapping initial="none"/> |
179 </sanitizer> | 173 </sanitizer> |
180 </param> | 174 </param> |
181 </repeat> | 175 </repeat> |
182 | 176 |
183 <conditional name="select_random_type"> | 177 <conditional name="select_random_type"> |
184 <param name="select_random_type_selector" type="select" label="Select a random subset of variants"> | 178 <param name="select_random_type_selector" type="select" label="Select a random subset of variants"> |
185 <option value="select_all" selected="True">Use all variants</option> | 179 <option value="select_all" selected="True">Use all variants</option> |
186 <option value="select_random_fraction">Select random fraction</option> | 180 <option value="select_random_fraction">Select random fraction</option> |
187 <option value="select_random_number">Select random number</option> | 181 <option value="select_random_number">Select random number</option> |
194 </when> | 188 </when> |
195 <when value="select_random_number"> | 189 <when value="select_random_number"> |
196 <param name="select_random_number" type="integer" value="0" label="Count" help="-number,--select_random_number &lt;select_random_number&gt;" /> | 190 <param name="select_random_number" type="integer" value="0" label="Count" help="-number,--select_random_number &lt;select_random_number&gt;" /> |
197 </when> | 191 </when> |
198 </conditional> | 192 </conditional> |
199 | 193 |
200 <param name="exclude_non_variants" type="boolean" truevalue="--excludeNonVariants" falsevalue="" label="Don't include loci found to be non-variant after the subsetting procedure" help="-env,--excludeNonVariants" /> | 194 <param name="exclude_non_variants" type="boolean" truevalue="--excludeNonVariants" falsevalue="" label="Don't include loci found to be non-variant after the subsetting procedure" help="-env,--excludeNonVariants" /> |
201 | 195 |
202 <param name="select_type_to_include" type="select" label="Select only a certain type of variants from the input file" multiple="True" display="checkboxes" help="-selectType,--selectTypeToInclude &lt;selectTypeToInclude&gt;"> | 196 <param name="select_type_to_include" type="select" label="Select only a certain type of variants from the input file" multiple="True" display="checkboxes" help="-selectType,--selectTypeToInclude &lt;selectTypeToInclude&gt;"> |
203 <option value="INDEL">INDEL</option> | 197 <option value="INDEL">INDEL</option> |
204 <option value="SNP">SNP</option> | 198 <option value="SNP">SNP</option> |
205 <option value="MIXED">MIXED</option> | 199 <option value="MIXED">MIXED</option> |
206 <option value="MNP">MNP</option> | 200 <option value="MNP">MNP</option> |
207 <option value="SYMBOLIC">SYMBOLIC</option> | 201 <option value="SYMBOLIC">SYMBOLIC</option> |
208 <option value="NO_VARIATION">NO_VARIATION</option> | 202 <option value="NO_VARIATION">NO_VARIATION</option> |
209 </param> | 203 </param> |
210 </expand> | 204 </expand> |
211 | 205 |
212 </inputs> | 206 </inputs> |
213 <outputs> | 207 <outputs> |
214 <data format="vcf" name="output_vcf" label="${tool.name} on ${on_string} (Variant File)" /> | 208 <data format="vcf" name="output_vcf" label="${tool.name} on ${on_string} (Variant File)" /> |
215 <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" /> | 209 <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" /> |
216 </outputs> | 210 </outputs> |
225 <param name="exclude_sample_name_repeat" value="0" /> | 219 <param name="exclude_sample_name_repeat" value="0" /> |
226 <param name="exclude_filtered" /> | 220 <param name="exclude_filtered" /> |
227 <param name="sample_name_repeat" value="0" /> | 221 <param name="sample_name_repeat" value="0" /> |
228 <param name="gatk_param_type_selector" value="basic" /> | 222 <param name="gatk_param_type_selector" value="basic" /> |
229 <param name="analysis_param_type_selector" value="basic" /> | 223 <param name="analysis_param_type_selector" value="basic" /> |
230 <output name="output_vcf" file="gatk/gatk_variant_select/gatk_variant_select_out_1.vcf" lines_diff="4" /> | 224 <output name="output_vcf" file="gatk/gatk_variant_select/gatk_variant_select_out_1.vcf" lines_diff="4" /> |
231 <output name="output_log" file="gatk/gatk_variant_select/gatk_variant_select_out_1.log.contains" compare="contains" /> | 225 <output name="output_log" file="gatk/gatk_variant_select/gatk_variant_select_out_1.log.contains" compare="contains" /> |
232 </test> | 226 </test> |
233 </tests> | 227 </tests> |
234 <help> | 228 <help> |
235 **What it does** | 229 **What it does** |
236 | 230 |
237 Often, a VCF containing many samples and/or variants will need to be subset in order to facilitate certain analyses (e.g. comparing and contrasting cases vs. controls; extracting variant or non-variant loci that meet certain requirements, displaying just a few samples in a browser like IGV, etc.). SelectVariants can be used for this purpose. Given a single VCF file, one or more samples can be extracted from the file (based on a complete sample name or a pattern match). Variants can be further selected by specifying criteria for inclusion, i.e. "DP > 1000" (depth of coverage greater than 1000x), "AF < 0.25" (sites with allele frequency less than 0.25). These JEXL expressions are documented in the `Using JEXL expressions section <http://gatkforums.broadinstitute.org/discussion/1255/what-are-jexl-expressions-and-how-can-i-use-them-with-the-gatk>`_. One can optionally include concordance or discordance tracks for use in selecting overlapping variants. | 231 Often, a VCF containing many samples and/or variants will need to be subset in order to facilitate certain analyses (e.g. comparing and contrasting cases vs. controls; extracting variant or non-variant loci that meet certain requirements, displaying just a few samples in a browser like IGV, etc.). SelectVariants can be used for this purpose. Given a single VCF file, one or more samples can be extracted from the file (based on a complete sample name or a pattern match). Variants can be further selected by specifying criteria for inclusion, i.e. "DP > 1000" (depth of coverage greater than 1000x), "AF < 0.25" (sites with allele frequency less than 0.25). These JEXL expressions are documented in the `Using JEXL expressions section <http://gatkforums.broadinstitute.org/discussion/1255/what-are-jexl-expressions-and-how-can-i-use-them-with-the-gatk>`_. One can optionally include concordance or discordance tracks for use in selecting overlapping variants. |
238 | 232 |
239 For more information on using the SelectVariants module, see this `tool specific page <http://www.broadinstitute.org/gatk/gatkdocs/org_broadinstitute_sting_gatk_walkers_variantutils_SelectVariants.html>`_. | 233 For more information on using the SelectVariants module, see this `tool specific page <http://www.broadinstitute.org/gatk/gatkdocs/org_broadinstitute_sting_gatk_walkers_variantutils_SelectVariants.html>`_. |
240 | 234 |
241 To learn about best practices for variant detection using GATK, see this `overview <http://www.broadinstitute.org/gatk/guide/topic?name=best-practices>`_. | 235 To learn about best practices for variant detection using GATK, see this `overview <http://www.broadinstitute.org/gatk/guide/topic?name=best-practices>`_. |
242 | 236 |