Mercurial > repos > artbio > gatk4
diff FilterMutectCalls.xml @ 0:c51c08cc9fcc draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/gatk4 commit 408454e8d10befcc76f38ab446091778537d4f31"
author | artbio |
---|---|
date | Wed, 29 Dec 2021 01:36:41 +0000 |
parents | |
children | 646e6943bcd2 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/FilterMutectCalls.xml Wed Dec 29 01:36:41 2021 +0000 @@ -0,0 +1,513 @@ +<tool id="filtermutectcalls" name="gatk4 FilterMutectCalls" version="@WRAPPER_VERSION@" profile="18.05"> + <description>Filter variants in a GATK4 Mutect2 VCF callset</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <expand macro="version_cmd"/> + <command detect_errors="exit_code"> + <![CDATA[ + #set ref_flag='--reference="reference.fa"' + + #if str($reference_source.reference_source_selector) == 'history' + ln -s '$reference_source.reference_sequence' reference.fa && + samtools faidx reference.fa && + gatk CreateSequenceDictionary --REFERENCE="reference.fa" --OUTPUT="reference.dict" && + #else if str($reference_source.reference_source_selector) == 'cached' + ln -s '$reference_source.reference_sequence.fields.path' reference.fa && + samtools faidx reference.fa && + gatk CreateSequenceDictionary --REFERENCE="reference.fa" --OUTPUT="reference.dict" && + #else + #set ref_flag='' + #end if + + #if str($input_options.input_options_selector) == 'vcf' + ln -s '$input_options.unfiltered_vcf_input' input.vcf && + ln -s '$gatk_vcf_stats' input.vcf.stats && + #else if str($input_options.input_options_selector) == 'vcf_bgzip' + ln -s '$input_options.unfiltered_vcf_input' input.vcf.gz && + ln -s '$gatk_vcf_stats' input.vcf.gz.stats && + gatk IndexFeatureFile --input input.vcf.gz && + #end if + + gatk FilterMutectCalls --QUIET $ref_flag + --variant + #if str($input_options.input_options_selector) == 'vcf' + input.vcf + #else if str($input_options.input_options_selector) == 'vcf_bgzip' + input.vcf.gz + #end if + --output + #if str($input_options.input_options_selector) == 'vcf' + filtered.vcf + #else if str($input_options.input_options_selector) == 'vcf_bgzip' + filtered.vcf.gz + #end if + ]]> + </command> + <inputs> + <conditional name="reference_source"> + <param name="reference_source_selector" type="select" label="Choose the source for the reference list"> + <option value="cached">Locally cached</option> + <option value="history" selected="true">History</option> + </param> + <when value="cached"> + <param name="reference_sequence" type="select" label="Reference" help="Reference sequence file." > + <options from_data_table="all_fasta" > + <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file" /> + </options> + </param> + </when> + <when value="history"> + <param name="reference_sequence" type="data" format="fasta" label="Reference" help="Reference sequence file." /> + </when> + </conditional> + <conditional name="input_options"> + <param name="input_options_selector" type="select" label="Format of input variant dataset"> + <option value="vcf" selected="true">vcf</option> + <option value="vcf_bgzip">vcf_bgzip</option> + </param> + <when value="vcf"> + <param name="unfiltered_vcf_input" type="data" format="vcf" label="vcf input file." /> + </when> + <when value="vcf_bgzip"> + <param name="unfiltered_vcf_input" type="data" format="vcf_bgzip" label="vcf_bgzip input file" /> + </when> + </conditional> + <param format="tabular" name="gatk_vcf_stats" type="data" label="gatk vcf stats" help="this stats file is generated by the Mutect2 tool"/> + </inputs> + <outputs> + <data format="vcf" name="output_vcf" label="${tool.name} on ${on_string}: Filtered vcf" from_work_dir="filtered.vcf" > + <filter>input_options['input_options_selector'] == 'vcf'</filter> + </data> + <data format="tabular" name="output_vcf_stats" label="${tool.name}: Filtered vcf statistics" from_work_dir="filtered.vcf.filteringStats.tsv" > + <filter>input_options['input_options_selector'] == 'vcf'</filter> + </data> + <data format="vcf_bgzip" name="output_vcf_bgzip" label="${tool.name} on ${on_string}: Filtered vcf (bgzip)" from_work_dir="filtered.vcf.gz" > + <filter>input_options['input_options_selector'] == 'vcf_bgzip'</filter> + </data> + <data format="tabular" name="output_vcf_bgzip_stats" label="${tool.name}: Filtered vcf (bgzip) statistics" from_work_dir="filtered.vcf.gz.filteringStats.tsv" > + <filter>input_options['input_options_selector'] == 'vcf_bgzip'</filter> + </data> + </outputs> + <tests> + <test> + <param name="reference_source_selector" value="history" /> + <param name="reference_sequence" ftype="fasta" value="reference.fa" /> + <conditional name="input_options"> + <param name="unfiltered_vcf_input" ftype="vcf" value="Mutect2-out1.vcf" /> + </conditional> + <param name="gatk_vcf_stats" ftype="tabular" value="Mutect2-out1.vcf.stats" /> + <output name="output_vcf" file="filtered_Mutect2-out1.vcf" lines_diff="2" /> + <output name="output_vcf_stats" file="filtered_Mutect2-out1_stats.tsv" /> + </test> + <test> + <param name="reference_source_selector" value="history" /> + <param name="reference_sequence" ftype="fasta" value="chr20.fa" /> + <conditional name="input_options"> + <param name="input_options_selector" value="vcf_bgzip" /> + <param name="unfiltered_vcf_input" ftype="vcf_bgzip" value="Mutect2-out6.vcf_bgzip" /> + </conditional> + <param name="gatk_vcf_stats" ftype="tabular" value="Mutect2-out6.vcf_bgzip.stats" /> + <output name="output_vcf" file="filtered_Mutect2-out6.vcf_bgzip" compare="sim_size" /> + <output name="output_vcf_stats" file="filtered_Mutect2-out6_stats.tsv" /> + </test> + <test> + <param name="reference_source_selector" value="history" /> + <param name="reference_sequence" ftype="fasta" value="chr20.fa" /> + <conditional name="input_options"> + <param name="input_options_selector" value="vcf" /> + <param name="unfiltered_vcf_input" ftype="vcf" value="Mutect2-out6.vcf" /> + </conditional> + <param name="gatk_vcf_stats" ftype="tabular" value="Mutect2-out6.vcf_bgzip.stats" /> + <output name="output_vcf" file="filtered_Mutect2-out6.vcf" lines_diff="2" /> + <output name="output_vcf_stats" file="filtered_Mutect2-out6_stats.tsv" /> + </test> + </tests> + <help><![CDATA[ +Usage examples +~~~~~~~~~~~~~~ + + +:: + + gatk FilterMutectCalls \\ + + -R reference.fasta \\ + + -V somatic.vcf.gz \\ + + --contamination-table contamination.table \\ + + --tumor-segmentation segments.tsv \\ + + -O filtered.vcf.gz + + +When running on unfiltered output of Mutect2 in --mitochondria mode, setting the advanced +option --autosomal-coverage argument (default 0) activates a recommended filter against +likely erroneously mapped NuMTs (nuclear mitochondrial DNA segments -- https://en.wikipedia.org/wiki/NUMT). +For the value, provide the median coverage expected in autosomal regions with coverage. + + +Usage +~~~~~ + + +:: + + USAGE: FilterMutectCalls [arguments] + + Filter somatic SNVs and indels called by Mutect2 + Version:4.1.7.0 + + + Required Arguments: + + --output,-O:String The output filtered VCF file Required. + + --reference,-R:GATKPathSpecifier + Reference sequence file Required. + + --variant,-V:String A VCF file containing variants Required. + + + Optional Arguments: + + --add-output-sam-program-record,-add-output-sam-program-record:Boolean + If true, adds a PG tag to created SAM/BAM/CRAM files. Default value: true. Possible + values: {true, false} + + --add-output-vcf-command-line,-add-output-vcf-command-line:Boolean + If true, adds a command line header line to created VCF files. Default value: true. + Possible values: {true, false} + + --arguments_file:File read one or more arguments files and add them to the command line This argument may be + specified 0 or more times. Default value: null. + + --cloud-index-prefetch-buffer,-CIPB:Integer + Size of the cloud-only prefetch buffer (in MB; 0 to disable). Defaults to + cloudPrefetchBuffer if unset. Default value: -1. + + --cloud-prefetch-buffer,-CPB:Integer + Size of the cloud-only prefetch buffer (in MB; 0 to disable). Default value: 40. + + --contamination-estimate:Double + Estimate of contamination. Default value: 0.0. + + --contamination-table:File Tables containing contamination information. This argument may be specified 0 or more + times. Default value: null. + + --create-output-bam-index,-OBI:Boolean + If true, create a BAM/CRAM index when writing a coordinate-sorted BAM/CRAM file. Default + value: true. Possible values: {true, false} + + --create-output-bam-md5,-OBM:Boolean + If true, create a MD5 digest for any BAM/SAM/CRAM file created Default value: false. + Possible values: {true, false} + + --create-output-variant-index,-OVI:Boolean + If true, create a VCF index when writing a coordinate-sorted VCF file. Default value: + true. Possible values: {true, false} + + --create-output-variant-md5,-OVM:Boolean + If true, create a a MD5 digest any VCF file created. Default value: false. Possible + values: {true, false} + + --disable-bam-index-caching,-DBIC:Boolean + If true, don't cache bam indexes, this will reduce memory requirements but may harm + performance if many intervals are specified. Caching is automatically disabled if there + are no intervals specified. Default value: false. Possible values: {true, false} + + --disable-read-filter,-DF:String + Read filters to be disabled before analysis This argument may be specified 0 or more + times. Default value: null. Possible Values: {WellformedReadFilter} + + --disable-sequence-dictionary-validation,-disable-sequence-dictionary-validation:Boolean + If specified, do not check the sequence dictionaries from our inputs for compatibility. + Use at your own risk! Default value: false. Possible values: {true, false} + + --distance-on-haplotype:Integer + On second filtering pass, variants with same PGT and PID tags as a filtered variant within + this distance are filtered. Default value: 100. + + --exclude-intervals,-XL:StringOne or more genomic intervals to exclude from processing This argument may be specified 0 + or more times. Default value: null. + + --f-score-beta:Double F score beta, the relative weight of recall to precision, used if OPTIMAL_F_SCORE strategy + is chosen Default value: 1.0. + + --false-discovery-rate:Double Maximum false discovery rate allowed if FALSE_DISCOVERY_RATE threshold strategy is chosen + Default value: 0.05. + + --filtering-stats:String The output filtering stats file Default value: null. + + --gatk-config-file:String A configuration file to use with the GATK. Default value: null. + + --gcs-max-retries,-gcs-retries:Integer + If the GCS bucket channel errors out, how many times it will attempt to re-initiate the + connection Default value: 20. + + --gcs-project-for-requester-pays:String + Project to bill when accessing "requester pays" buckets. If unset, these buckets cannot be + accessed. Default value: . + + --help,-h:Boolean display the help message Default value: false. Possible values: {true, false} + + --initial-threshold:Double Initial artifact probability threshold used in first iteration Default value: 0.1. + + --input,-I:String BAM/SAM/CRAM file containing reads This argument may be specified 0 or more times. + Default value: null. + + --interval-exclusion-padding,-ixp:Integer + Amount of padding (in bp) to add to each interval you are excluding. Default value: 0. + + --interval-merging-rule,-imr:IntervalMergingRule + Interval merging rule for abutting intervals Default value: ALL. Possible values: {ALL, + OVERLAPPING_ONLY} + + --interval-padding,-ip:IntegerAmount of padding (in bp) to add to each interval you are including. Default value: 0. + + --interval-set-rule,-isr:IntervalSetRule + Set merging approach to use for combining interval inputs Default value: UNION. Possible + values: {UNION, INTERSECTION} + + --intervals,-L:String One or more genomic intervals over which to operate This argument may be specified 0 or + more times. Default value: null. + + --lenient,-LE:Boolean Lenient processing of VCF files Default value: false. Possible values: {true, false} + + --log-artifact-prior:Double Initial ln prior probability that a called site is not a technical artifact Default + value: -2.302585092994046. + + --log-indel-prior:Double Initial ln prior probability that a site has a somatic indel Default value: + -16.11809565095832. + + --log-snv-prior:Double Initial ln prior probability that a site has a somatic SNV Default value: + -13.815510557964275. + + --long-indel-length:Integer Indels of this length or greater are treated specially by the mapping quality filter. + Default value: 5. + + --max-alt-allele-count:IntegerMaximum alt alleles per site. Default value: 1. + + --max-events-in-region:IntegerMaximum events in a single assembly region. Filter all variants if exceeded. Default + value: 2. + + --max-median-fragment-length-difference:Integer + Maximum difference between median alt and ref fragment lengths Default value: 10000. + + --max-n-ratio:Double Maximum fraction of non-ref bases in the pileup that are N (unknown) Default value: + Infinity. + + --min-allele-fraction:Double Minimum allele fraction required Default value: 0.0. + + --min-median-base-quality:Integer + Minimum median base quality of alt reads Default value: 20. + + --min-median-mapping-quality:Integer + Minimum median mapping quality of alt reads Default value: 30. + + --min-median-read-position:Integer + Minimum median distance of variants from the end of reads Default value: 1. + + --min-reads-per-strand:IntegerMinimum alt reads required on both forward and reverse strands Default value: 0. + + --min-slippage-length:Integer Minimum number of reference bases in an STR to suspect polymerase slippage Default value: + 8. + + --mitochondria-mode:Boolean Set filters to mitochondrial defaults Default value: false. Possible values: {true, + false} + + --normal-p-value-threshold:Double + P value threshold for normal artifact filter Default value: 0.001. + + --orientation-bias-artifact-priors,-ob-priors:File + One or more .tar.gz files containing tables of prior artifact probabilities for the read + orientation filter model, one table per tumor sample This argument may be specified 0 or + more times. Default value: null. + + --pcr-slippage-rate:Double The frequency of polymerase slippage in contexts where it is suspected Default value: + 0.1. + + --QUIET:Boolean Whether to suppress job-summary info on System.err. Default value: false. Possible + values: {true, false} + + --read-filter,-RF:String Read filters to be applied before analysis This argument may be specified 0 or more + times. Default value: null. Possible Values: {AlignmentAgreesWithHeaderReadFilter, + AllowAllReadsReadFilter, AmbiguousBaseReadFilter, CigarContainsNoNOperator, + FirstOfPairReadFilter, FragmentLengthReadFilter, GoodCigarReadFilter, + HasReadGroupReadFilter, IntervalOverlapReadFilter, LibraryReadFilter, MappedReadFilter, + MappingQualityAvailableReadFilter, MappingQualityNotZeroReadFilter, + MappingQualityReadFilter, MatchingBasesAndQualsReadFilter, MateDifferentStrandReadFilter, + MateDistantReadFilter, MateOnSameContigOrNoMappedMateReadFilter, + MateUnmappedAndUnmappedReadFilter, MetricsReadFilter, + NonChimericOriginalAlignmentReadFilter, NonZeroFragmentLengthReadFilter, + NonZeroReferenceLengthAlignmentReadFilter, NotDuplicateReadFilter, + NotOpticalDuplicateReadFilter, NotProperlyPairedReadFilter, + NotSecondaryAlignmentReadFilter, NotSupplementaryAlignmentReadFilter, + OverclippedReadFilter, PairedReadFilter, PassesVendorQualityCheckReadFilter, + PlatformReadFilter, PlatformUnitReadFilter, PrimaryLineReadFilter, + ProperlyPairedReadFilter, ReadGroupBlackListReadFilter, ReadGroupReadFilter, + ReadLengthEqualsCigarLengthReadFilter, ReadLengthReadFilter, ReadNameReadFilter, + ReadStrandFilter, SampleReadFilter, SecondOfPairReadFilter, SeqIsStoredReadFilter, + SoftClippedReadFilter, ValidAlignmentEndReadFilter, ValidAlignmentStartReadFilter, + WellformedReadFilter} + + --read-index,-read-index:String + Indices to use for the read inputs. If specified, an index must be provided for every read + input and in the same order as the read inputs. If this argument is not specified, the + path to the index for each input will be inferred automatically. This argument may be + specified 0 or more times. Default value: null. + + --read-validation-stringency,-VS:ValidationStringency + Validation stringency for all SAM/BAM/CRAM/SRA files read by this program. The default + stringency value SILENT can improve performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not otherwise need to be decoded. Default + value: SILENT. Possible values: {STRICT, LENIENT, SILENT} + + --seconds-between-progress-updates,-seconds-between-progress-updates:Double + Output traversal statistics every time this many seconds elapse Default value: 10.0. + + --sequence-dictionary,-sequence-dictionary:String + Use the given sequence dictionary as the master/canonical sequence dictionary. Must be a + .dict file. Default value: null. + + --sites-only-vcf-output:Boolean + If true, don't emit genotype fields when writing vcf file output. Default value: false. + Possible values: {true, false} + + --stats:String The Mutect stats file output by Mutect2 Default value: null. + + --threshold-strategy:Strategy The method for optimizing the posterior probability threshold Default value: + OPTIMAL_F_SCORE. Possible values: {CONSTANT, FALSE_DISCOVERY_RATE, OPTIMAL_F_SCORE} + + --tmp-dir:GATKPathSpecifier Temp directory to use. Default value: null. + + --tumor-segmentation:File Tables containing tumor segments' minor allele fractions for germline hets emitted by + CalculateContamination This argument may be specified 0 or more times. Default value: + null. + + --unique-alt-read-count,-unique:Integer + Minimum unique (i.e. deduplicated) reads supporting the alternate allele Default value: + 0. + + --use-jdk-deflater,-jdk-deflater:Boolean + Whether to use the JdkDeflater (as opposed to IntelDeflater) Default value: false. + Possible values: {true, false} + + --use-jdk-inflater,-jdk-inflater:Boolean + Whether to use the JdkInflater (as opposed to IntelInflater) Default value: false. + Possible values: {true, false} + + --verbosity,-verbosity:LogLevel + Control verbosity of logging. Default value: INFO. Possible values: {ERROR, WARNING, + INFO, DEBUG} + + --version:Boolean display the version number for this tool Default value: false. Possible values: {true, + false} + + + Advanced Arguments: + + --disable-tool-default-read-filters,-disable-tool-default-read-filters:Boolean + Disable all tool default read filters (WARNING: many tools will not function correctly + without their default read filters on) Default value: false. Possible values: {true, + false} + + --showHidden,-showHidden:Boolean + display hidden arguments Default value: false. Possible values: {true, false} + + Conditional Arguments for readFilter: + + Valid only if "AmbiguousBaseReadFilter" is specified: + --ambig-filter-bases:Integer Threshold number of ambiguous bases. If null, uses threshold fraction; otherwise, + overrides threshold fraction. Default value: null. Cannot be used in conjuction with + argument(s) maxAmbiguousBaseFraction + + --ambig-filter-frac:Double Threshold fraction of ambiguous bases Default value: 0.05. Cannot be used in conjuction + with argument(s) maxAmbiguousBases + + Valid only if "FragmentLengthReadFilter" is specified: + --max-fragment-length:Integer Maximum length of fragment (insert size) Default value: 1000000. + + --min-fragment-length:Integer Minimum length of fragment (insert size) Default value: 0. + + Valid only if "IntervalOverlapReadFilter" is specified: + --keep-intervals:String One or more genomic intervals to keep This argument must be specified at least once. + Required. + + Valid only if "LibraryReadFilter" is specified: + --library,-library:String Name of the library to keep This argument must be specified at least once. Required. + + Valid only if "MappingQualityReadFilter" is specified: + --maximum-mapping-quality:Integer + Maximum mapping quality to keep (inclusive) Default value: null. + + --minimum-mapping-quality:Integer + Minimum mapping quality to keep (inclusive) Default value: 10. + + Valid only if "MateDistantReadFilter" is specified: + --mate-too-distant-length:Integer + Minimum start location difference at which mapped mates are considered distant Default + value: 1000. + + Valid only if "OverclippedReadFilter" is specified: + --dont-require-soft-clips-both-ends:Boolean + Allow a read to be filtered out based on having only 1 soft-clipped block. By default, + both ends must have a soft-clipped block, setting this flag requires only 1 soft-clipped + block Default value: false. Possible values: {true, false} + + --filter-too-short:Integer Minimum number of aligned bases Default value: 30. + + Valid only if "PlatformReadFilter" is specified: + --platform-filter-name:String Platform attribute (PL) to match This argument must be specified at least once. Required. + + Valid only if "PlatformUnitReadFilter" is specified: + --black-listed-lanes:String Platform unit (PU) to filter out This argument must be specified at least once. Required. + + Valid only if "ReadGroupBlackListReadFilter" is specified: + --read-group-black-list:StringA read group filter expression in the form "attribute:value", where "attribute" is a two + character read group attribute such as "RG" or "PU". This argument must be specified at + least once. Required. + + Valid only if "ReadGroupReadFilter" is specified: + --keep-read-group:String The name of the read group to keep Required. + + Valid only if "ReadLengthReadFilter" is specified: + --max-read-length:Integer Keep only reads with length at most equal to the specified value Required. + + --min-read-length:Integer Keep only reads with length at least equal to the specified value Default value: 1. + + Valid only if "ReadNameReadFilter" is specified: + --read-name:String Keep only reads with this read name Required. + + Valid only if "ReadStrandFilter" is specified: + --keep-reverse-strand-only:Boolean + Keep only reads on the reverse strand Required. Possible values: {true, false} + + Valid only if "SampleReadFilter" is specified: + --sample,-sample:String The name of the sample(s) to keep, filtering out all others This argument must be + specified at least once. Required. + + Valid only if "SoftClippedReadFilter" is specified: + --invert-soft-clip-ratio-filter:Boolean + Inverts the results from this filter, causing all variants that would pass to fail and + visa-versa. Default value: false. Possible values: {true, false} + + --soft-clipped-leading-trailing-ratio:Double + Threshold ratio of soft clipped bases (leading / trailing the cigar string) to total bases + in read for read to be filtered. Default value: null. Cannot be used in conjuction with + argument(s) minimumSoftClippedRatio + + --soft-clipped-ratio-threshold:Double + Threshold ratio of soft clipped bases (anywhere in the cigar string) to total bases in + read for read to be filtered. Default value: null. Cannot be used in conjuction with + argument(s) minimumLeadingTrailingSoftClippedRatio + + +]]></help> + <citations> + <expand macro="citations"/> + </citations> +</tool>