1.9bcftoolshtslibsamtoolsmatplotlibbcftools 2>&1 | grep 'Version:'10.1093/bioinformatics/btp352https://github.com/samtools/bcftools/wikihttp://samtools.github.io/bcftools/bcftools.html
--threads \${GALAXY_SLOTS:-4}
$input_vcf &&
bcftools index $input_vcf &&
#elif $input_file.is_of_type('vcf_bgzip')
ln -s '$input_file' $input_vcf &&
#if $input_file.metadata.tabix_index:
ln -s '${input_file.metadata.tabix_index}' ${input_vcf}.tbi &&
#else
bcftools index $input_vcf &&
#end if
#elif $input_file.is_of_type('bcf')
#set $input_vcf = 'input.bcf'
ln -s '$input_file' $input_vcf &&
#if $input_file.metadata.bcf_index:
ln -s '${input_file.metadata.bcf_index}' ${input_vcf}.csi &&
#else
bcftools index $input_vcf &&
#end if
#end if
]]>
$input_vcf
$input_vcf &&
bcftools index $input_vcf &&
#elif $input_file.is_of_type('vcf_bgzip')
ln -s '$input_file' $input_vcf &&
#if $input_file.metadata.tabix_index:
ln -s '${input_file.metadata.tabix_index}' ${input_vcf}.tbi &&
#else
bcftools index $input_vcf &&
#end if
#elif $input_file.is_of_type('bcf')
#set $input_vcf = 'input' + str($i) + '.bcf.gz'
ln -s '$input_file' $input_vcf &&
#if $input_file.metadata.bcf_index:
ln -s '${input_file.metadata.bcf_index}' ${input_vcf}.csi &&
#else
bcftools index $input_vcf &&
#end if
#end if
echo '$input_vcf' >> $vcfs_list_file &&
$input_vcfs.append($input_vcf)
#end for
]]>
#echo ' '.join($input_vcfs)#
$vcfs_list_file
#if $input_fa_ref is not None:
--fasta-ref $input_fa_ref
#elif 'fasta_ref' in $section and $section.fasta_ref:
--fasta-ref '${section.fasta_ref}'
#end if
#if 'AF_file' in $section and $section.AF_file:
--AF-file '${section.AF_file}'
#end if
#if 'estimate_AF' in $section and $section.estimate_AF:
--estimate-AF "${section.estimate_AF}"
#end if
$exons_path &&
tabix -s 1 -b 2 -e 3 $exons_path &&
#end if
]]>
#if 'exons_file' in $section and $section.exons_file:
--exons $exons_path
#end if
#if 'ploidy_file' in $section and $section.ploidy_file:
--ploidy "${section.ploidy_file}"
#end if
#if $section.collapse:
--collapse ${section.collapse}
#end if
^([^ \t\n\r\f\v,]+(,[^ \t\n\r\f\v,]+)*)?$
#if $section.apply_filters:
--apply-filters '${section.apply_filters}'
#end if
#if str($output_type) != "__none__":
--output-type '${output_type}'
#end if
value.strip()not value or value.isdigit()not value or value.isdigit()
$targets_path &&
tabix -s 1 -b 2 -e 2 $targets_path &&
#end if
#elif $tgts_sec.targets_file:
#set $targets_path = 'targets_file.tab.gz'
bgzip -c "$section.targets_file" > $targets_path &&
tabix -s 1 -b 2 -e 2 $targets_path &&
#end if
]]>
^(\w+(,\w+)*)?$
#set $samples_defined = False
#if str($section.samples) != '':
#set $samples_defined = True
--samples '${section.invert_samples}${section.samples}'
#end if
#if $section.samples_file:
#set $samples_defined = True
--samples-file "${section.invert_samples_file}${section.samples_file}"
#end if
#if $section.sample:
--sample '${section.sample}'
#end if
^[^']*$
#if $section.include:
--include '${section.include}'
#end if
^[^']*$
#if $section.exclude:
--exclude '${section.exclude}'
#end if
^([^,]+(,[^,]+)*)?$
#if $section.columns != '':
--columns '${section.columns}'
#end if
${section.vcf_ids}
BCF conversion.
This Galaxy tool recommends using the compressed BCF format
as piping is not implemented, and uncompressed data would
use unnecessary amounts of space.
]]> als.tsv.gz && tabix -s1 -b2 -e2 als.tsv.gz
]]>
Collapse
--------
Controls how to treat records with duplicate positions and defines compatible
records across multiple input files. Here by "compatible" we mean records which
should be considered as identical by the tools. For example, when performing
line intersections, the desire may be to consider as identical all sites with
matching positions (bcftools isec -c all), or only sites with matching variant
type (bcftools isec -c snps -c indels), or only sites with all alleles
identical (bcftools isec -c none).
+------------+----------------------------------------------------------------+
| Flag value | Result |
+============+================================================================+
| none | only records with identical REF and ALT alleles are compatible |
+------------+----------------------------------------------------------------+
| some | only records where some subset of ALT alleles match are |
| | compatible |
+------------+----------------------------------------------------------------+
| all | all records are compatible, regardless of whether the ALT |
| | alleles match or not. In the case of records with the same |
| | position, only the first wil lbe considered and appear on |
| | output. |
+------------+----------------------------------------------------------------+
| snps | any SNP records are compatible, regardless of whether the ALT |
| | alleles match or not. For duplicate positions, only the first |
| | SNP record will be considered and appear on output. |
+------------+----------------------------------------------------------------+
| indels | all indel records are compatible, regardless of whether the |
| | REF and ALT alleles match or not. For duplicate positions, |
| | only the first indel record will be considered and appear on |
| | output. |
+------------+----------------------------------------------------------------+
| both | abbreviation of "-c indels -c snps" |
+------------+----------------------------------------------------------------+
| id | only records with identical ID column are compatible. |
| | Supportedby bcftools merge only. |
+------------+----------------------------------------------------------------+
, >=, <=, <, !=
- regex operators "~" and its negation "!~"
::
INFO/HAYSTACK ~ "needle"
- parentheses
::
(, )
- logical operators
::
&& (same as &), ||, |
- INFO tags, FORMAT tags, column names
::
INFO/DP or DP
FORMAT/DV, FMT/DV, or DV
FILTER, QUAL, ID, REF, ALT[0]
- 1 (or 0) to test the presence (or absence) of a flag
::
FlagA=1 && FlagB=0
- "." to test missing values
::
DP=".", DP!=".", ALT="."
- missing genotypes can be matched regardless of phase and ploidy (".|.", "./.", ".") using this expression
::
GT="."
- TYPE for variant type in REF,ALT columns (indel,snp,mnp,ref,other)
::
TYPE="indel" | TYPE="snp"
- array subscripts, "*" for any field
::
(DP4[0]+DP4[1])/(DP4[2]+DP4[3]) > 0.3
DP4[*] == 0
CSQ[*] ~ "missense_variant.*deleterious"
- function on FORMAT tags (over samples) and INFO tags (over vector fields)
::
MAX, MIN, AVG, SUM, STRLEN, ABS
- variables calculated on the fly if not present: number of alternate alleles; number of samples; count of alternate alleles; minor allele count (similar to AC but is always smaller than 0.5); frequency of alternate alleles (AF=AC/AN); frequency of minor alleles (MAF=MAC/AN); number of alleles in called genotypes
::
N_ALT, N_SAMPLES, AC, MAC, AF, MAF, AN
**Notes:**
- String comparisons and regular expressions are case-insensitive
- If the subscript "*" is used in regular expression search, the whole field
is treated as one string. For example, the regex ``STR[*]~"B,C"`` will be
true for the string vector INFO/STR=AB,CD.
- Variables and function names are case-insensitive, but not tag names. For
example, "qual" can be used instead of "QUAL", "strlen()" instead of
"STRLEN()" , but not "dp" instead of "DP".
**Examples:**
::
MIN(DV)>5
MIN(DV/DP)>0.3
MIN(DP)>10 & MIN(DV)>3
FMT/DP>10 & FMT/GQ>10 .. both conditions must be satisfied within one sample
FMT/DP>10 && FMT/GQ>10 .. the conditions can be satisfied in different samples
QUAL>10 | FMT/GQ>10 .. selects only GQ>10 samples
QUAL>10 || FMT/GQ>10 .. selects all samples at QUAL>10 sites
TYPE="snp" && QUAL>=10 && (DP4[2]+DP4[3] > 2)
MIN(DP)>35 && AVG(GQ)>50
ID=@file .. selects lines with ID present in the file
ID!=@~/file .. skip lines with ID present in the ~/file
MAF[0]<0.05 .. select rare variants at 5% cutoff
]]>