view variant_combine.xml @ 6:35c00763cb5c draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gatk2 commit cf399638ebca4250bcc15f468238a9964de97b33
author iuc
date Mon, 04 Jun 2018 05:38:15 -0400
parents f244b8209eb8
children
line wrap: on
line source

<tool id="gatk2_variant_combine" name="Combine Variants" version="@VERSION@.0">
  <description></description>
  <macros>
    <import>gatk2_macros.xml</import>
  </macros>
  <expand macro="requirements" />
  <expand macro="version_command" />
  <command interpreter="python">
    gatk2_wrapper.py
    --stdout "${output_log}"

    #set $priority_order = []
    #for $input_variant in $reference_source.input_variants:
        -d "--variant:${input_variant.input_variant_name},%(file_type)s" "${input_variant.input_variant}" "${input_variant.input_variant.ext}" "input_variant_${input_variant.input_variant_name}"
        #set $input_variant_name = str( $input_variant.input_variant_name )
        #assert $input_variant_name not in $priority_order, "Variant Names must be unique" ##this should be handled by a validator
        #silent $priority_order.append( $input_variant_name )
    #end for
    -p '
    @JAR_PATH@
    -T "CombineVariants"
    --out "${output_variants}"
    \$GATK2_SITE_OPTIONS

    @THREADS@

    #if $reference_source.reference_source_selector != "history":
        -R "${reference_source.ref_file.fields.path}"
    #end if
   --genotypemergeoption "${genotype_merge_option}"
   --rod_priority_list "${ ','.join( $priority_order ) }"
   '

    #include source=$standard_gatk_options#

    ##start analysis specific options
    #if $analysis_param_type.analysis_param_type_selector == "advanced":
        -p '
        --filteredrecordsmergetype "${analysis_param_type.filtered_records_merge_type}"
        ${analysis_param_type.print_complex_merges}
        ${analysis_param_type.filtered_are_uncalled}
        ${analysis_param_type.minimal_vcf}
        ${analysis_param_type.assume_identical_samples}

        #if str( $analysis_param_type.set_key ):
            --setKey "${analysis_param_type.set_key}"
        #end if

        --minimumN "${analysis_param_type.minimum_n}"
        '
    #end if
  </command>
  <inputs>

    <conditional name="reference_source">
      <expand macro="reference_source_selector_param" />
      <when value="cached">
        <repeat min="1" name="input_variants" title="Variants to Merge" help="Records will be prioritized in the order that you list them here (-V,--variant &amp;lt;variant&amp;gt;)">
          <param name="input_variant" type="data" format="vcf" label="Input variant file" />
          <param name="input_variant_name" type="text" value="" label="Variant name" help="Names must be unique">
            <validator type="length" min="1" message="You must provide a unique name for this set of variants" />
          </param>
        </repeat>
        <param name="ref_file" type="select" label="Using reference genome" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;">
          <options from_data_table="gatk2_picard_indexes">
            <!-- <filter type="data_meta" key="dbkey" ref="input_variants.input_variant" column="dbkey"/> -->
          </options>
          <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
        </param>
      </when>
      <when value="history"> <!-- FIX ME!!!! -->
        <repeat min="1" name="input_variants" title="Variants to Merge" help="Records will be prioritized in the order that you list them here (-V,--variant &amp;lt;variant&amp;gt;)">
          <param name="input_variant" type="data" format="vcf" label="Input variant file" />
          <param name="input_variant_name" type="text" value="" label="Variant name" help="Names must be unique">
            <validator type="length" min="1" message="You must provide a unique name for this set of variants" />
          </param>
        </repeat>
        <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;" />
      </when>
    </conditional>

    <param name="genotype_merge_option" type="select" label="How should we merge genotype records across records for samples shared across the ROD files" help="-genotypeMergeOptions,--genotypemergeoption &amp;lt;genotypemergeoption&amp;gt;" >
      <option value="UNIQUIFY" />
      <option value="PRIORITIZE" selected="true"/>
      <option value="UNSORTED" />
      <option value="REQUIRE_UNIQUE" />
    </param>

    <expand macro="gatk_param_type_conditional" />

    <expand macro="analysis_type_conditional">
        <param name="filtered_records_merge_type" type="select" label="How should we deal with records seen at the same site in the VCF, but with different FILTER fields?" help="-filteredRecordsMergeType,--filteredrecordsmergetype &amp;lt;filteredrecordsmergetype&amp;gt;" >
          <option value="KEEP_IF_ANY_UNFILTERED" selected="true"/>
          <option value="KEEP_IF_ALL_UNFILTERED" />
        </param>

        <param name="print_complex_merges" checked="false" type="boolean" truevalue="--printComplexMerges" falsevalue="" label="Print out interesting sites requiring complex compatibility merging" help="-printComplexMerges,--printComplexMerges" />
        <param name="filtered_are_uncalled" checked="false" type="boolean" truevalue="--filteredAreUncalled" falsevalue="" label="If true, then filtered VCFs are treated as uncalled, so that filtered set annotation don't appear in the combined VCF" help="-filteredAreUncalled,--filteredAreUncalled" />
        <param name="minimal_vcf" checked="false" type="boolean" truevalue="--minimalVCF" falsevalue="" label="If true, then the output VCF will contain no INFO or genotype INFO field" help="-minimalVCF,--minimalVCF" />

        <param name="set_key" type="text" value="" label="Key, by default set, in the INFO key=value tag emitted describing which set the combined VCF record came from." help="-setKey,--setKey &amp;lt;setKey&amp;gt;"/>
        <param name="assume_identical_samples" checked="false" type="boolean" truevalue="--assumeIdenticalSamples" falsevalue="" label="If true, assume input VCFs have identical sample sets and disjoint calls so that one can simply perform a merge sort to combine the VCFs into one, drastically reducing the runtime." help="-assumeIdenticalSamples,--assumeIdenticalSamples" />
        <param name="minimum_n" type="integer" value="1" label="Combine variants and output site only if variant is present in at least N input files." help="-minN,--minimumN &amp;lt;minimumN&amp;gt;"/>
    </expand>

  </inputs>
  <outputs>
    <data format="vcf" name="output_variants" label="${tool.name} on ${on_string} (variants)" />
    <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" />
  </outputs>
  <tests>
      <test>
          <param name="reference_source_selector" value="history" />
          <param name="ref_file" value="phiX.fasta" ftype="fasta" />
          <param name="input_variant" value="gatk/gatk_variant_annotator/gatk_variant_annotator_out_1.vcf" ftype="vcf" />
          <param name="input_variant_name" value="from_variant_annotator" />
          <param name="genotype_merge_option" value="PRIORITIZE" />
          <param name="gatk_param_type_selector" value="basic" />
          <param name="analysis_param_type_selector" value="basic" />
          <output name="output_variants" file="gatk/gatk_variant_combine/gatk_variant_combine_out_1.vcf" lines_diff="4" />
          <output name="output_log" file="gatk/gatk_variant_combine/gatk_variant_combine_out_1.log.contains" compare="contains" />
      </test>
  </tests>
  <help>
**What it does**

Combines VCF records from different sources; supports both full merges and set unions. Merge: combines multiple records into a single one; if sample names overlap then they are uniquified. Union: assumes each rod represents the same set of samples (although this is not enforced); using the priority list (if provided), emits a single record instance at every position represented in the rods.

For more information on using the CombineVariants module, see this `tool specific page &lt;http://www.broadinstitute.org/gatk/gatkdocs/org_broadinstitute_sting_gatk_walkers_variantutils_CombineVariants.html&gt;`_.

To learn about best practices for variant detection using GATK, see this `overview &lt;http://www.broadinstitute.org/gatk/guide/topic?name=best-practices&gt;`_.

If you encounter errors, please view the `GATK FAQ &lt;http://www.broadinstitute.org/gatk/guide/topic?name=faqs&gt;`_.

------

**Inputs**

GenomeAnalysisTK: CombineVariants accepts variant files as input.

------

**Outputs**

The output is a combined vcf file.


Go `here &lt;http://www.broadinstitute.org/gatk/guide/topic?name=intro&gt;`_ for details on GATK file formats.

-------

**Settings**::

 out                         File to which variants should be written
 genotypemergeoption         How should we merge genotype records for samples shared across the ROD files? (UNIQUIFY|PRIORITIZE|UNSORTED|REQUIRE_UNIQUE)
 filteredrecordsmergetype    How should we deal with records seen at the same site in the VCF, but with different FILTER fields? KEEP_IF_ANY_UNFILTERED PASSes the record if any record is unfiltered, KEEP_IF_ALL_UNFILTERED requires all records to be unfiltered (KEEP_IF_ANY_UNFILTERED|KEEP_IF_ALL_UNFILTERED)
 rod_priority_list           When taking the union of variants containing genotypes: a comma-separated string describing the priority ordering for the genotypes as far as which record gets emitted; a complete priority list MUST be provided
 printComplexMerges          Print out interesting sites requiring complex compatibility merging
 filteredAreUncalled         If true, then filtered VCFs are treated as uncalled, so that filtered set annotation don't appear in the combined VCF
 minimalVCF                  If true, then the output VCF will contain no INFO or genotype INFO field
 setKey                      Key, by default set, in the INFO key=value tag emitted describing which set the combined VCF record came from.  Set to null if you don't want the set field emitted.
 assumeIdenticalSamples      If true, assume input VCFs have identical sample sets and disjoint calls so that one can simply perform a merge sort to combine the VCFs into one, drastically reducing the runtime.
 minimumN                    Combine variants and output site only if variant is present in at least N input files.

@CITATION_SECTION@
  </help>
  <expand macro="citations" />
</tool>