Mercurial > repos > artbio > gatk4

<tool id="filtermutectcalls" name="gatk4 FilterMutectCalls" version="@WRAPPER_VERSION@" profile="21.01">
    <description>Filter variants in a GATK4 Mutect2 VCF callset</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <expand macro="version_cmd"/>
    <command detect_errors="exit_code">
        <![CDATA[
        #set ref_flag='--reference="reference.fa"'

        #if str($reference_source.reference_source_selector) == 'history'
            ln -s '$reference_source.reference_sequence' reference.fa &&
            samtools faidx reference.fa &&
            gatk CreateSequenceDictionary --REFERENCE="reference.fa" --OUTPUT="reference.dict" &&
        #else if str($reference_source.reference_source_selector) == 'cached'
            ln -s '$reference_source.reference_sequence.fields.path' reference.fa &&
            samtools faidx reference.fa &&
            gatk CreateSequenceDictionary --REFERENCE="reference.fa" --OUTPUT="reference.dict" &&
        #else
            #set ref_flag=''
        #end if

        #if str($input_options.input_options_selector) == 'vcf'
            ln -s '$input_options.unfiltered_vcf_input' input.vcf &&
            ln -s '$gatk_vcf_stats' input.vcf.stats &&
        #else if str($input_options.input_options_selector) == 'vcf_bgzip'
            ln -s '$input_options.unfiltered_vcf_input' input.vcf.gz &&
            ln -s '$gatk_vcf_stats' input.vcf.gz.stats &&
            gatk IndexFeatureFile --input input.vcf.gz &&
        #end if

        gatk FilterMutectCalls --QUIET $ref_flag
                     --variant
                        #if str($input_options.input_options_selector) == 'vcf'
                            input.vcf
                        #else if str($input_options.input_options_selector) == 'vcf_bgzip'
                            input.vcf.gz
                        #end if
                     --output
                        #if str($input_options.input_options_selector) == 'vcf'
                            filtered.vcf
                        #else if str($input_options.input_options_selector) == 'vcf_bgzip'
                            filtered.vcf.gz
                        #end if
        ]]>
    </command>
    <inputs>
        <conditional name="reference_source">
            <param name="reference_source_selector" type="select" label="Choose the source for the reference list">
                <option value="cached">Locally cached</option>
                <option value="history" selected="true">History</option>
            </param>
            <when value="cached">
                <param name="reference_sequence" type="select" label="Reference" help="Reference sequence file." >
                    <options from_data_table="all_fasta" >
                        <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file" />
                    </options>
                </param>
            </when>
            <when value="history">
                <param name="reference_sequence" type="data" format="fasta" label="Reference" help="Reference sequence file." />
            </when>
        </conditional>
        <conditional name="input_options">
            <param name="input_options_selector" type="select" label="Format of input variant dataset">
                <option value="vcf" selected="true">vcf</option>
                <option value="vcf_bgzip">vcf_bgzip</option>
            </param>
            <when value="vcf">
                <param name="unfiltered_vcf_input" type="data" format="vcf" label="vcf input file." />
            </when>
            <when value="vcf_bgzip">
                <param name="unfiltered_vcf_input" type="data" format="vcf_bgzip" label="vcf_bgzip input file" />
            </when>
        </conditional>
        <param format="tabular" name="gatk_vcf_stats" type="data" label="gatk vcf stats" help="this stats file is generated by the Mutect2 tool"/>
    </inputs>
    <outputs>
        <data format="vcf" name="output_vcf" label="${tool.name} on ${on_string}: Filtered vcf" from_work_dir="filtered.vcf" >
            <filter>input_options['input_options_selector'] == 'vcf'</filter>
        </data>
        <data format="tabular" name="output_vcf_stats" label="${tool.name}: Filtered vcf statistics" from_work_dir="filtered.vcf.filteringStats.tsv" >
            <filter>input_options['input_options_selector'] == 'vcf'</filter>
        </data>
        <data format="vcf_bgzip" name="output_vcf_bgzip" label="${tool.name} on ${on_string}: Filtered vcf (bgzip)" from_work_dir="filtered.vcf.gz" >
            <filter>input_options['input_options_selector'] == 'vcf_bgzip'</filter>
        </data>
        <data format="tabular" name="output_vcf_bgzip_stats" label="${tool.name}: Filtered vcf (bgzip) statistics" from_work_dir="filtered.vcf.gz.filteringStats.tsv" >
            <filter>input_options['input_options_selector'] == 'vcf_bgzip'</filter>
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="2">
            <param name="reference_source_selector" value="history" />
            <param name="reference_sequence" ftype="fasta" value="reference.fa" />
            <conditional name="input_options">
                <param name="unfiltered_vcf_input" ftype="vcf" value="Mutect2-out1.vcf" />
            </conditional>
            <param name="gatk_vcf_stats" ftype="tabular" value="Mutect2-out1.vcf.stats" />
            <output name="output_vcf" file="filtered_Mutect2-out1.vcf" lines_diff="2" />
            <output name="output_vcf_stats" file="filtered_Mutect2-out1_stats.tsv" />
        </test>
        <test expect_num_outputs="2">
            <param name="reference_source_selector" value="history" />
            <param name="reference_sequence" ftype="fasta" value="chr20.fa" />
            <conditional name="input_options">
                <param name="input_options_selector" value="vcf_bgzip" />
                <param name="unfiltered_vcf_input" ftype="vcf_bgzip" value="Mutect2-out6.vcf_bgzip" />
            </conditional>
            <param name="gatk_vcf_stats" ftype="tabular" value="Mutect2-out6.vcf_bgzip.stats" />
            <output name="output_vcf" file="filtered_Mutect2-out6.vcf_bgzip" compare="sim_size" />
            <output name="output_vcf_stats" file="filtered_Mutect2-out6_stats.tsv" />
        </test>
        <test expect_num_outputs="2">
            <param name="reference_source_selector" value="history" />
            <param name="reference_sequence" ftype="fasta" value="chr20.fa" />
            <conditional name="input_options">
                <param name="input_options_selector" value="vcf" />
                <param name="unfiltered_vcf_input" ftype="vcf" value="Mutect2-out6.vcf" />
            </conditional>
            <param name="gatk_vcf_stats" ftype="tabular" value="Mutect2-out6.vcf_bgzip.stats" />
            <output name="output_vcf" file="filtered_Mutect2-out6.vcf" lines_diff="2" />
            <output name="output_vcf_stats" file="filtered_Mutect2-out6_stats.tsv" />
        </test>
    </tests>
    <help><![CDATA[
Usage examples
~~~~~~~~~~~~~~


::

    gatk FilterMutectCalls \\

      -R reference.fasta \\

      -V somatic.vcf.gz \\

      --contamination-table contamination.table \\

      --tumor-segmentation segments.tsv \\

      -O filtered.vcf.gz


When running on unfiltered output of Mutect2 in --mitochondria mode, setting the advanced
option --autosomal-coverage argument (default 0) activates a recommended filter against
likely erroneously mapped NuMTs (nuclear mitochondrial DNA segments -- https://en.wikipedia.org/wiki/NUMT).
For the value, provide the median coverage expected in autosomal regions with coverage.


Usage
~~~~~


::

    USAGE: FilterMutectCalls [arguments]

    Filter somatic SNVs and indels called by Mutect2
    Version:4.1.7.0


    Required Arguments:

    --output,-O:String            The output filtered VCF file  Required.

    --reference,-R:GATKPathSpecifier
                                  Reference sequence file  Required.

    --variant,-V:String           A VCF file containing variants  Required.


    Optional Arguments:

    --add-output-sam-program-record,-add-output-sam-program-record:Boolean
                                  If true, adds a PG tag to created SAM/BAM/CRAM files.  Default value: true. Possible
                                  values: {true, false}

    --add-output-vcf-command-line,-add-output-vcf-command-line:Boolean
                                  If true, adds a command line header line to created VCF files.  Default value: true.
                                  Possible values: {true, false}

    --arguments_file:File         read one or more arguments files and add them to the command line  This argument may be
                                  specified 0 or more times. Default value: null.

    --cloud-index-prefetch-buffer,-CIPB:Integer
                                  Size of the cloud-only prefetch buffer (in MB; 0 to disable). Defaults to
                                  cloudPrefetchBuffer if unset.  Default value: -1.

    --cloud-prefetch-buffer,-CPB:Integer
                                  Size of the cloud-only prefetch buffer (in MB; 0 to disable).  Default value: 40.

    --contamination-estimate:Double
                                  Estimate of contamination.  Default value: 0.0.

    --contamination-table:File    Tables containing contamination information.  This argument may be specified 0 or more
                                  times. Default value: null.

    --create-output-bam-index,-OBI:Boolean
                                  If true, create a BAM/CRAM index when writing a coordinate-sorted BAM/CRAM file.  Default
                                  value: true. Possible values: {true, false}

    --create-output-bam-md5,-OBM:Boolean
                                  If true, create a MD5 digest for any BAM/SAM/CRAM file created  Default value: false.
                                  Possible values: {true, false}

    --create-output-variant-index,-OVI:Boolean
                                  If true, create a VCF index when writing a coordinate-sorted VCF file.  Default value:
                                  true. Possible values: {true, false}

    --create-output-variant-md5,-OVM:Boolean
                                  If true, create a a MD5 digest any VCF file created.  Default value: false. Possible
                                  values: {true, false}

    --disable-bam-index-caching,-DBIC:Boolean
                                  If true, don't cache bam indexes, this will reduce memory requirements but may harm
                                  performance if many intervals are specified.  Caching is automatically disabled if there
                                  are no intervals specified.  Default value: false. Possible values: {true, false}

    --disable-read-filter,-DF:String
                                  Read filters to be disabled before analysis  This argument may be specified 0 or more
                                  times. Default value: null. Possible Values: {WellformedReadFilter}

    --disable-sequence-dictionary-validation,-disable-sequence-dictionary-validation:Boolean
                                  If specified, do not check the sequence dictionaries from our inputs for compatibility.
                                  Use at your own risk!  Default value: false. Possible values: {true, false}

    --distance-on-haplotype:Integer
                                  On second filtering pass, variants with same PGT and PID tags as a filtered variant within
                                  this distance are filtered.  Default value: 100.

    --exclude-intervals,-XL:StringOne or more genomic intervals to exclude from processing  This argument may be specified 0
                                  or more times. Default value: null.

    --f-score-beta:Double         F score beta, the relative weight of recall to precision, used if OPTIMAL_F_SCORE strategy
                                  is chosen  Default value: 1.0.

    --false-discovery-rate:Double Maximum false discovery rate allowed if FALSE_DISCOVERY_RATE threshold strategy is chosen
                                  Default value: 0.05.

    --filtering-stats:String      The output filtering stats file  Default value: null.

    --gatk-config-file:String     A configuration file to use with the GATK.  Default value: null.

    --gcs-max-retries,-gcs-retries:Integer
                                  If the GCS bucket channel errors out, how many times it will attempt to re-initiate the
                                  connection  Default value: 20.

    --gcs-project-for-requester-pays:String
                                  Project to bill when accessing "requester pays" buckets. If unset, these buckets cannot be
                                  accessed.  Default value: .

    --help,-h:Boolean             display the help message  Default value: false. Possible values: {true, false}

    --initial-threshold:Double    Initial artifact probability threshold used in first iteration  Default value: 0.1.

    --input,-I:String             BAM/SAM/CRAM file containing reads  This argument may be specified 0 or more times.
                                  Default value: null.

    --interval-exclusion-padding,-ixp:Integer
                                  Amount of padding (in bp) to add to each interval you are excluding.  Default value: 0.

    --interval-merging-rule,-imr:IntervalMergingRule
                                  Interval merging rule for abutting intervals  Default value: ALL. Possible values: {ALL,
                                  OVERLAPPING_ONLY}

    --interval-padding,-ip:IntegerAmount of padding (in bp) to add to each interval you are including.  Default value: 0.

    --interval-set-rule,-isr:IntervalSetRule
                                  Set merging approach to use for combining interval inputs  Default value: UNION. Possible
                                  values: {UNION, INTERSECTION}

    --intervals,-L:String         One or more genomic intervals over which to operate  This argument may be specified 0 or
                                  more times. Default value: null.

    --lenient,-LE:Boolean         Lenient processing of VCF files  Default value: false. Possible values: {true, false}

    --log-artifact-prior:Double   Initial ln prior probability that a called site is not a technical artifact  Default
                                  value: -2.302585092994046.

    --log-indel-prior:Double      Initial ln prior probability that a site has a somatic indel  Default value:
                                  -16.11809565095832.

    --log-snv-prior:Double        Initial ln prior probability that a site has a somatic SNV  Default value:
                                  -13.815510557964275.

    --long-indel-length:Integer   Indels of this length or greater are treated specially by the mapping quality filter.
                                  Default value: 5.

    --max-alt-allele-count:IntegerMaximum alt alleles per site.  Default value: 1.

    --max-events-in-region:IntegerMaximum events in a single assembly region.  Filter all variants if exceeded.  Default
                                  value: 2.

    --max-median-fragment-length-difference:Integer
                                  Maximum difference between median alt and ref fragment lengths  Default value: 10000.

    --max-n-ratio:Double          Maximum fraction of non-ref bases in the pileup that are N (unknown)  Default value:
                                  Infinity.

    --min-allele-fraction:Double  Minimum allele fraction required  Default value: 0.0.

    --min-median-base-quality:Integer
                                  Minimum median base quality of alt reads  Default value: 20.

    --min-median-mapping-quality:Integer
                                  Minimum median mapping quality of alt reads  Default value: 30.

    --min-median-read-position:Integer
                                  Minimum median distance of variants from the end of reads  Default value: 1.

    --min-reads-per-strand:IntegerMinimum alt reads required on both forward and reverse strands  Default value: 0.

    --min-slippage-length:Integer Minimum number of reference bases in an STR to suspect polymerase slippage  Default value:
                                  8.

    --mitochondria-mode:Boolean   Set filters to mitochondrial defaults  Default value: false. Possible values: {true,
                                  false}

    --normal-p-value-threshold:Double
                                  P value threshold for normal artifact filter  Default value: 0.001.

    --orientation-bias-artifact-priors,-ob-priors:File
                                  One or more .tar.gz files containing tables of prior artifact probabilities for the read
                                  orientation filter model, one table per tumor sample  This argument may be specified 0 or
                                  more times. Default value: null.

    --pcr-slippage-rate:Double    The frequency of polymerase slippage in contexts where it is suspected  Default value:
                                  0.1.

    --QUIET:Boolean               Whether to suppress job-summary info on System.err.  Default value: false. Possible
                                  values: {true, false}

    --read-filter,-RF:String      Read filters to be applied before analysis  This argument may be specified 0 or more
                                  times. Default value: null. Possible Values: {AlignmentAgreesWithHeaderReadFilter,
                                  AllowAllReadsReadFilter, AmbiguousBaseReadFilter, CigarContainsNoNOperator,
                                  FirstOfPairReadFilter, FragmentLengthReadFilter, GoodCigarReadFilter,
                                  HasReadGroupReadFilter, IntervalOverlapReadFilter, LibraryReadFilter, MappedReadFilter,
                                  MappingQualityAvailableReadFilter, MappingQualityNotZeroReadFilter,
                                  MappingQualityReadFilter, MatchingBasesAndQualsReadFilter, MateDifferentStrandReadFilter,
                                  MateDistantReadFilter, MateOnSameContigOrNoMappedMateReadFilter,
                                  MateUnmappedAndUnmappedReadFilter, MetricsReadFilter,
                                  NonChimericOriginalAlignmentReadFilter, NonZeroFragmentLengthReadFilter,
                                  NonZeroReferenceLengthAlignmentReadFilter, NotDuplicateReadFilter,
                                  NotOpticalDuplicateReadFilter, NotProperlyPairedReadFilter,
                                  NotSecondaryAlignmentReadFilter, NotSupplementaryAlignmentReadFilter,
                                  OverclippedReadFilter, PairedReadFilter, PassesVendorQualityCheckReadFilter,
                                  PlatformReadFilter, PlatformUnitReadFilter, PrimaryLineReadFilter,
                                  ProperlyPairedReadFilter, ReadGroupBlackListReadFilter, ReadGroupReadFilter,
                                  ReadLengthEqualsCigarLengthReadFilter, ReadLengthReadFilter, ReadNameReadFilter,
                                  ReadStrandFilter, SampleReadFilter, SecondOfPairReadFilter, SeqIsStoredReadFilter,
                                  SoftClippedReadFilter, ValidAlignmentEndReadFilter, ValidAlignmentStartReadFilter,
                                  WellformedReadFilter}

    --read-index,-read-index:String
                                  Indices to use for the read inputs. If specified, an index must be provided for every read
                                  input and in the same order as the read inputs. If this argument is not specified, the
                                  path to the index for each input will be inferred automatically.  This argument may be
                                  specified 0 or more times. Default value: null.

    --read-validation-stringency,-VS:ValidationStringency
                                  Validation stringency for all SAM/BAM/CRAM/SRA files read by this program.  The default
                                  stringency value SILENT can improve performance when processing a BAM file in which
                                  variable-length data (read, qualities, tags) do not otherwise need to be decoded.  Default
                                  value: SILENT. Possible values: {STRICT, LENIENT, SILENT}

    --seconds-between-progress-updates,-seconds-between-progress-updates:Double
                                  Output traversal statistics every time this many seconds elapse  Default value: 10.0.

    --sequence-dictionary,-sequence-dictionary:String
                                  Use the given sequence dictionary as the master/canonical sequence dictionary.  Must be a
                                  .dict file.  Default value: null.

    --sites-only-vcf-output:Boolean
                                  If true, don't emit genotype fields when writing vcf file output.  Default value: false.
                                  Possible values: {true, false}

    --stats:String                The Mutect stats file output by Mutect2  Default value: null.

    --threshold-strategy:Strategy The method for optimizing the posterior probability threshold  Default value:
                                  OPTIMAL_F_SCORE. Possible values: {CONSTANT, FALSE_DISCOVERY_RATE, OPTIMAL_F_SCORE}

    --tmp-dir:GATKPathSpecifier   Temp directory to use.  Default value: null.

    --tumor-segmentation:File     Tables containing tumor segments' minor allele fractions for germline hets emitted by
                                  CalculateContamination  This argument may be specified 0 or more times. Default value:
                                  null.

    --unique-alt-read-count,-unique:Integer
                                  Minimum unique (i.e. deduplicated) reads supporting the alternate allele  Default value:
                                  0.

    --use-jdk-deflater,-jdk-deflater:Boolean
                                  Whether to use the JdkDeflater (as opposed to IntelDeflater)  Default value: false.
                                  Possible values: {true, false}

    --use-jdk-inflater,-jdk-inflater:Boolean
                                  Whether to use the JdkInflater (as opposed to IntelInflater)  Default value: false.
                                  Possible values: {true, false}

    --verbosity,-verbosity:LogLevel
                                  Control verbosity of logging.  Default value: INFO. Possible values: {ERROR, WARNING,
                                  INFO, DEBUG}

    --version:Boolean             display the version number for this tool  Default value: false. Possible values: {true,
                                  false}


    Advanced Arguments:

    --disable-tool-default-read-filters,-disable-tool-default-read-filters:Boolean
                                  Disable all tool default read filters (WARNING: many tools will not function correctly
                                  without their default read filters on)  Default value: false. Possible values: {true,
                                  false}

    --showHidden,-showHidden:Boolean
                                  display hidden arguments  Default value: false. Possible values: {true, false}

    Conditional Arguments for readFilter:

    Valid only if "AmbiguousBaseReadFilter" is specified:
    --ambig-filter-bases:Integer  Threshold number of ambiguous bases. If null, uses threshold fraction; otherwise,
                                  overrides threshold fraction.  Default value: null.  Cannot be used in conjuction with
                                  argument(s) maxAmbiguousBaseFraction

    --ambig-filter-frac:Double    Threshold fraction of ambiguous bases  Default value: 0.05.  Cannot be used in conjuction
                                  with argument(s) maxAmbiguousBases

    Valid only if "FragmentLengthReadFilter" is specified:
    --max-fragment-length:Integer Maximum length of fragment (insert size)  Default value: 1000000.

    --min-fragment-length:Integer Minimum length of fragment (insert size)  Default value: 0.

    Valid only if "IntervalOverlapReadFilter" is specified:
    --keep-intervals:String       One or more genomic intervals to keep  This argument must be specified at least once.
                                  Required.

    Valid only if "LibraryReadFilter" is specified:
    --library,-library:String     Name of the library to keep  This argument must be specified at least once. Required.

    Valid only if "MappingQualityReadFilter" is specified:
    --maximum-mapping-quality:Integer
                                  Maximum mapping quality to keep (inclusive)  Default value: null.

    --minimum-mapping-quality:Integer
                                  Minimum mapping quality to keep (inclusive)  Default value: 10.

    Valid only if "MateDistantReadFilter" is specified:
    --mate-too-distant-length:Integer
                                  Minimum start location difference at which mapped mates are considered distant  Default
                                  value: 1000.

    Valid only if "OverclippedReadFilter" is specified:
    --dont-require-soft-clips-both-ends:Boolean
                                  Allow a read to be filtered out based on having only 1 soft-clipped block. By default,
                                  both ends must have a soft-clipped block, setting this flag requires only 1 soft-clipped
                                  block  Default value: false. Possible values: {true, false}

    --filter-too-short:Integer    Minimum number of aligned bases  Default value: 30.

    Valid only if "PlatformReadFilter" is specified:
    --platform-filter-name:String Platform attribute (PL) to match  This argument must be specified at least once. Required.

    Valid only if "PlatformUnitReadFilter" is specified:
    --black-listed-lanes:String   Platform unit (PU) to filter out  This argument must be specified at least once. Required.

    Valid only if "ReadGroupBlackListReadFilter" is specified:
    --read-group-black-list:StringA read group filter expression in the form "attribute:value", where "attribute" is a two
                                  character read group attribute such as "RG" or "PU".  This argument must be specified at
                                  least once. Required.

    Valid only if "ReadGroupReadFilter" is specified:
    --keep-read-group:String      The name of the read group to keep  Required.

    Valid only if "ReadLengthReadFilter" is specified:
    --max-read-length:Integer     Keep only reads with length at most equal to the specified value  Required.

    --min-read-length:Integer     Keep only reads with length at least equal to the specified value  Default value: 1.

    Valid only if "ReadNameReadFilter" is specified:
    --read-name:String            Keep only reads with this read name  Required.

    Valid only if "ReadStrandFilter" is specified:
    --keep-reverse-strand-only:Boolean
                                  Keep only reads on the reverse strand  Required. Possible values: {true, false}

    Valid only if "SampleReadFilter" is specified:
    --sample,-sample:String       The name of the sample(s) to keep, filtering out all others  This argument must be
                                  specified at least once. Required.

    Valid only if "SoftClippedReadFilter" is specified:
    --invert-soft-clip-ratio-filter:Boolean
                                  Inverts the results from this filter, causing all variants that would pass to fail and
                                  visa-versa.  Default value: false. Possible values: {true, false}

    --soft-clipped-leading-trailing-ratio:Double
                                  Threshold ratio of soft clipped bases (leading / trailing the cigar string) to total bases
                                  in read for read to be filtered.  Default value: null.  Cannot be used in conjuction with
                                  argument(s) minimumSoftClippedRatio

    --soft-clipped-ratio-threshold:Double
                                  Threshold ratio of soft clipped bases (anywhere in the cigar string) to total bases in
                                  read for read to be filtered.  Default value: null.  Cannot be used in conjuction with
                                  argument(s) minimumLeadingTrailingSoftClippedRatio


]]></help>
    <citations>
        <expand macro="citations"/>
    </citations>
</tool>
author	artbio
date	Sun, 15 Oct 2023 12:06:24 +0000
parents	c51c08cc9fcc
children