comparison variant_combine.xml @ 0:340633249b3d draft

Uploaded
author bgruening
date Mon, 02 Dec 2013 06:18:36 -0500
parents
children f244b8209eb8
comparison
equal deleted inserted replaced
-1:000000000000 0:340633249b3d
1 <tool id="gatk2_variant_combine" name="Combine Variants" version="0.0.7">
2 <description></description>
3 <expand macro="requirements" />
4 <macros>
5 <import>gatk2_macros.xml</import>
6 </macros>
7 <command interpreter="python">
8 gatk2_wrapper.py
9 --stdout "${output_log}"
10
11 #set $priority_order = []
12 #for $input_variant in $reference_source.input_variants:
13 -d "--variant:${input_variant.input_variant_name},%(file_type)s" "${input_variant.input_variant}" "${input_variant.input_variant.ext}" "input_variant_${input_variant.input_variant_name}"
14 #set $input_variant_name = str( $input_variant.input_variant_name )
15 #assert $input_variant_name not in $priority_order, "Variant Names must be unique" ##this should be handled by a validator
16 #silent $priority_order.append( $input_variant_name )
17 #end for
18 -p '
19 @JAR_PATH@
20 -T "CombineVariants"
21 --out "${output_variants}"
22 \$GATK2_SITE_OPTIONS
23
24 @THREADS@
25
26 #if $reference_source.reference_source_selector != "history":
27 -R "${reference_source.ref_file.fields.path}"
28 #end if
29 --genotypemergeoption "${genotype_merge_option}"
30 --rod_priority_list "${ ','.join( $priority_order ) }"
31 '
32
33 #include source=$standard_gatk_options#
34
35
36 ##start analysis specific options
37 #if $analysis_param_type.analysis_param_type_selector == "advanced":
38 -p '
39 --filteredrecordsmergetype "${analysis_param_type.filtered_records_merge_type}"
40 ${analysis_param_type.print_complex_merges}
41 ${analysis_param_type.filtered_are_uncalled}
42 ${analysis_param_type.minimal_vcf}
43 ${analysis_param_type.assume_identical_samples}
44
45 #if str( $analysis_param_type.set_key ):
46 --setKey "${analysis_param_type.set_key}"
47 #end if
48
49 --minimumN "${analysis_param_type.minimum_n}"
50 '
51 #end if
52 </command>
53 <inputs>
54
55 <conditional name="reference_source">
56 <expand macro="reference_source_selector_param" />
57 <when value="cached">
58 <repeat min="1" name="input_variants" title="Variants to Merge" help="Records will be prioritized in the order that you list them here (-V,--variant &amp;lt;variant&amp;gt;)">
59 <param name="input_variant" type="data" format="vcf" label="Input variant file" />
60 <param name="input_variant_name" type="text" value="" label="Variant name" help="Names must be unique">
61 <validator type="length" min="1" message="You must provide a unique name for this set of variants" />
62 </param>
63 </repeat>
64 <param name="ref_file" type="select" label="Using reference genome" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;">
65 <options from_data_table="gatk2_picard_indexes">
66 <!-- <filter type="data_meta" key="dbkey" ref="input_variants.input_variant" column="dbkey"/> -->
67 </options>
68 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
69 </param>
70 </when>
71 <when value="history"> <!-- FIX ME!!!! -->
72 <repeat min="1" name="input_variants" title="Variants to Merge" help="Records will be prioritized in the order that you list them here (-V,--variant &amp;lt;variant&amp;gt;)">
73 <param name="input_variant" type="data" format="vcf" label="Input variant file" />
74 <param name="input_variant_name" type="text" value="" label="Variant name" help="Names must be unique">
75 <validator type="length" min="1" message="You must provide a unique name for this set of variants" />
76 </param>
77 </repeat>
78 <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;" />
79 </when>
80 </conditional>
81
82 <param name="genotype_merge_option" type="select" label="How should we merge genotype records across records for samples shared across the ROD files" help="-genotypeMergeOptions,--genotypemergeoption &amp;lt;genotypemergeoption&amp;gt;" >
83 <option value="UNIQUIFY" />
84 <option value="PRIORITIZE" selected="true"/>
85 <option value="UNSORTED" />
86 <option value="REQUIRE_UNIQUE" />
87 </param>
88
89 <expand macro="gatk_param_type_conditional" />
90
91
92 <expand macro="analysis_type_conditional">
93 <param name="filtered_records_merge_type" type="select" label="How should we deal with records seen at the same site in the VCF, but with different FILTER fields?" help="-filteredRecordsMergeType,--filteredrecordsmergetype &amp;lt;filteredrecordsmergetype&amp;gt;" >
94 <option value="KEEP_IF_ANY_UNFILTERED" selected="true"/>
95 <option value="KEEP_IF_ALL_UNFILTERED" />
96 </param>
97
98 <param name="print_complex_merges" checked="false" type="boolean" truevalue="--printComplexMerges" falsevalue="" label="Print out interesting sites requiring complex compatibility merging" help="-printComplexMerges,--printComplexMerges" />
99 <param name="filtered_are_uncalled" checked="false" type="boolean" truevalue="--filteredAreUncalled" falsevalue="" label="If true, then filtered VCFs are treated as uncalled, so that filtered set annotation don't appear in the combined VCF" help="-filteredAreUncalled,--filteredAreUncalled" />
100 <param name="minimal_vcf" checked="false" type="boolean" truevalue="--minimalVCF" falsevalue="" label="If true, then the output VCF will contain no INFO or genotype INFO field" help="-minimalVCF,--minimalVCF" />
101
102 <param name="set_key" type="text" value="" label="Key, by default set, in the INFO key=value tag emitted describing which set the combined VCF record came from." help="-setKey,--setKey &amp;lt;setKey&amp;gt;"/>
103 <param name="assume_identical_samples" checked="false" type="boolean" truevalue="--assumeIdenticalSamples" falsevalue="" label="If true, assume input VCFs have identical sample sets and disjoint calls so that one can simply perform a merge sort to combine the VCFs into one, drastically reducing the runtime." help="-assumeIdenticalSamples,--assumeIdenticalSamples" />
104 <param name="minimum_n" type="integer" value="1" label="Combine variants and output site only if variant is present in at least N input files." help="-minN,--minimumN &amp;lt;minimumN&amp;gt;"/>
105
106 </expand>
107
108
109 </inputs>
110 <outputs>
111 <data format="vcf" name="output_variants" label="${tool.name} on ${on_string} (variants)" />
112 <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" />
113 </outputs>
114 <tests>
115 <test>
116 <param name="reference_source_selector" value="history" />
117 <param name="ref_file" value="phiX.fasta" ftype="fasta" />
118 <param name="input_variant" value="gatk/gatk_variant_annotator/gatk_variant_annotator_out_1.vcf" ftype="vcf" />
119 <param name="input_variant_name" value="from_variant_annotator" />
120 <param name="genotype_merge_option" value="PRIORITIZE" />
121 <param name="gatk_param_type_selector" value="basic" />
122 <param name="analysis_param_type_selector" value="basic" />
123 <output name="output_variants" file="gatk/gatk_variant_combine/gatk_variant_combine_out_1.vcf" lines_diff="4" />
124 <output name="output_log" file="gatk/gatk_variant_combine/gatk_variant_combine_out_1.log.contains" compare="contains" />
125 </test>
126 </tests>
127 <help>
128 **What it does**
129
130 Combines VCF records from different sources; supports both full merges and set unions. Merge: combines multiple records into a single one; if sample names overlap then they are uniquified. Union: assumes each rod represents the same set of samples (although this is not enforced); using the priority list (if provided), emits a single record instance at every position represented in the rods.
131
132 For more information on using the CombineVariants module, see this `tool specific page &lt;http://www.broadinstitute.org/gatk/gatkdocs/org_broadinstitute_sting_gatk_walkers_variantutils_CombineVariants.html&gt;`_.
133
134 To learn about best practices for variant detection using GATK, see this `overview &lt;http://www.broadinstitute.org/gatk/guide/topic?name=best-practices&gt;`_.
135
136 If you encounter errors, please view the `GATK FAQ &lt;http://www.broadinstitute.org/gatk/guide/topic?name=faqs&gt;`_.
137
138 ------
139
140 **Inputs**
141
142 GenomeAnalysisTK: CombineVariants accepts variant files as input.
143
144 ------
145
146 **Outputs**
147
148 The output is a combined vcf file.
149
150
151 Go `here &lt;http://www.broadinstitute.org/gatk/guide/topic?name=intro&gt;`_ for details on GATK file formats.
152
153 -------
154
155 **Settings**::
156
157 out File to which variants should be written
158 genotypemergeoption How should we merge genotype records for samples shared across the ROD files? (UNIQUIFY|PRIORITIZE|UNSORTED|REQUIRE_UNIQUE)
159 filteredrecordsmergetype How should we deal with records seen at the same site in the VCF, but with different FILTER fields? KEEP_IF_ANY_UNFILTERED PASSes the record if any record is unfiltered, KEEP_IF_ALL_UNFILTERED requires all records to be unfiltered (KEEP_IF_ANY_UNFILTERED|KEEP_IF_ALL_UNFILTERED)
160 rod_priority_list When taking the union of variants containing genotypes: a comma-separated string describing the priority ordering for the genotypes as far as which record gets emitted; a complete priority list MUST be provided
161 printComplexMerges Print out interesting sites requiring complex compatibility merging
162 filteredAreUncalled If true, then filtered VCFs are treated as uncalled, so that filtered set annotation don't appear in the combined VCF
163 minimalVCF If true, then the output VCF will contain no INFO or genotype INFO field
164 setKey Key, by default set, in the INFO key=value tag emitted describing which set the combined VCF record came from. Set to null if you don't want the set field emitted.
165 assumeIdenticalSamples If true, assume input VCFs have identical sample sets and disjoint calls so that one can simply perform a merge sort to combine the VCFs into one, drastically reducing the runtime.
166 minimumN Combine variants and output site only if variant is present in at least N input files.
167
168 @CITATION_SECTION@
169 </help>
170 </tool>