1.9 bcftools htslib samtools matplotlib bcftools 2>&1 | grep 'Version:' 10.1093/bioinformatics/btp352 https://github.com/samtools/bcftools/wiki http://samtools.github.io/bcftools/bcftools.html --threads \${GALAXY_SLOTS:-4} $input_vcf && bcftools index $input_vcf && #elif $input_file.is_of_type('vcf_bgzip') ln -s '$input_file' $input_vcf && #if $input_file.metadata.tabix_index: ln -s '${input_file.metadata.tabix_index}' ${input_vcf}.tbi && #else bcftools index $input_vcf && #end if #elif $input_file.is_of_type('bcf') #set $input_vcf = 'input.bcf' ln -s '$input_file' $input_vcf && #if $input_file.metadata.bcf_index: ln -s '${input_file.metadata.bcf_index}' ${input_vcf}.csi && #else bcftools index $input_vcf && #end if #end if ]]> $input_vcf $input_vcf && bcftools index $input_vcf && #elif $input_file.is_of_type('vcf_bgzip') ln -s '$input_file' $input_vcf && #if $input_file.metadata.tabix_index: ln -s '${input_file.metadata.tabix_index}' ${input_vcf}.tbi && #else bcftools index $input_vcf && #end if #elif $input_file.is_of_type('bcf') #set $input_vcf = 'input' + str($i) + '.bcf.gz' ln -s '$input_file' $input_vcf && #if $input_file.metadata.bcf_index: ln -s '${input_file.metadata.bcf_index}' ${input_vcf}.csi && #else bcftools index $input_vcf && #end if #end if echo '$input_vcf' >> $vcfs_list_file && $input_vcfs.append($input_vcf) #end for ]]> #echo ' '.join($input_vcfs)# $vcfs_list_file #if $input_fa_ref is not None: --fasta-ref $input_fa_ref #elif 'fasta_ref' in $section and $section.fasta_ref: --fasta-ref '${section.fasta_ref}' #end if #if 'AF_file' in $section and $section.AF_file: --AF-file '${section.AF_file}' #end if #if 'estimate_AF' in $section and $section.estimate_AF: --estimate-AF "${section.estimate_AF}" #end if $exons_path && tabix -s 1 -b 2 -e 3 $exons_path && #end if ]]> #if 'exons_file' in $section and $section.exons_file: --exons $exons_path #end if #if 'ploidy_file' in $section and $section.ploidy_file: --ploidy "${section.ploidy_file}" #end if #if $section.collapse: --collapse ${section.collapse} #end if ^([^ \t\n\r\f\v,]+(,[^ \t\n\r\f\v,]+)*)?$ #if $section.apply_filters: --apply-filters '${section.apply_filters}' #end if #if str($output_type) != "__none__": --output-type '${output_type}' #end if value.strip() not value or value.isdigit() not value or value.isdigit() $targets_path && tabix -s 1 -b 2 -e 2 $targets_path && #end if #elif $tgts_sec.targets_file: #set $targets_path = 'targets_file.tab.gz' bgzip -c "$section.targets_file" > $targets_path && tabix -s 1 -b 2 -e 2 $targets_path && #end if ]]> ^(\w+(,\w+)*)?$ #set $samples_defined = False #if str($section.samples) != '': #set $samples_defined = True --samples '${section.invert_samples}${section.samples}' #end if #if $section.samples_file: #set $samples_defined = True --samples-file "${section.invert_samples_file}${section.samples_file}" #end if #if $section.sample: --sample '${section.sample}' #end if ^[^']*$ #if $section.include: --include '${section.include}' #end if ^[^']*$ #if $section.exclude: --exclude '${section.exclude}' #end if ^([^,]+(,[^,]+)*)?$ #if $section.columns != '': --columns '${section.columns}' #end if ${section.vcf_ids} BCF conversion. This Galaxy tool recommends using the compressed BCF format as piping is not implemented, and uncompressed data would use unnecessary amounts of space. ]]> als.tsv.gz && tabix -s1 -b2 -e2 als.tsv.gz ]]> Collapse -------- Controls how to treat records with duplicate positions and defines compatible records across multiple input files. Here by "compatible" we mean records which should be considered as identical by the tools. For example, when performing line intersections, the desire may be to consider as identical all sites with matching positions (bcftools isec -c all), or only sites with matching variant type (bcftools isec -c snps -c indels), or only sites with all alleles identical (bcftools isec -c none). +------------+----------------------------------------------------------------+ | Flag value | Result | +============+================================================================+ | none | only records with identical REF and ALT alleles are compatible | +------------+----------------------------------------------------------------+ | some | only records where some subset of ALT alleles match are | | | compatible | +------------+----------------------------------------------------------------+ | all | all records are compatible, regardless of whether the ALT | | | alleles match or not. In the case of records with the same | | | position, only the first wil lbe considered and appear on | | | output. | +------------+----------------------------------------------------------------+ | snps | any SNP records are compatible, regardless of whether the ALT | | | alleles match or not. For duplicate positions, only the first | | | SNP record will be considered and appear on output. | +------------+----------------------------------------------------------------+ | indels | all indel records are compatible, regardless of whether the | | | REF and ALT alleles match or not. For duplicate positions, | | | only the first indel record will be considered and appear on | | | output. | +------------+----------------------------------------------------------------+ | both | abbreviation of "-c indels -c snps" | +------------+----------------------------------------------------------------+ | id | only records with identical ID column are compatible. | | | Supportedby bcftools merge only. | +------------+----------------------------------------------------------------+ , >=, <=, <, != - regex operators "~" and its negation "!~" :: INFO/HAYSTACK ~ "needle" - parentheses :: (, ) - logical operators :: && (same as &), ||, | - INFO tags, FORMAT tags, column names :: INFO/DP or DP FORMAT/DV, FMT/DV, or DV FILTER, QUAL, ID, REF, ALT[0] - 1 (or 0) to test the presence (or absence) of a flag :: FlagA=1 && FlagB=0 - "." to test missing values :: DP=".", DP!=".", ALT="." - missing genotypes can be matched regardless of phase and ploidy (".|.", "./.", ".") using this expression :: GT="." - TYPE for variant type in REF,ALT columns (indel,snp,mnp,ref,other) :: TYPE="indel" | TYPE="snp" - array subscripts, "*" for any field :: (DP4[0]+DP4[1])/(DP4[2]+DP4[3]) > 0.3 DP4[*] == 0 CSQ[*] ~ "missense_variant.*deleterious" - function on FORMAT tags (over samples) and INFO tags (over vector fields) :: MAX, MIN, AVG, SUM, STRLEN, ABS - variables calculated on the fly if not present: number of alternate alleles; number of samples; count of alternate alleles; minor allele count (similar to AC but is always smaller than 0.5); frequency of alternate alleles (AF=AC/AN); frequency of minor alleles (MAF=MAC/AN); number of alleles in called genotypes :: N_ALT, N_SAMPLES, AC, MAC, AF, MAF, AN **Notes:** - String comparisons and regular expressions are case-insensitive - If the subscript "*" is used in regular expression search, the whole field is treated as one string. For example, the regex ``STR[*]~"B,C"`` will be true for the string vector INFO/STR=AB,CD. - Variables and function names are case-insensitive, but not tag names. For example, "qual" can be used instead of "QUAL", "strlen()" instead of "STRLEN()" , but not "dp" instead of "DP". **Examples:** :: MIN(DV)>5 MIN(DV/DP)>0.3 MIN(DP)>10 & MIN(DV)>3 FMT/DP>10 & FMT/GQ>10 .. both conditions must be satisfied within one sample FMT/DP>10 && FMT/GQ>10 .. the conditions can be satisfied in different samples QUAL>10 | FMT/GQ>10 .. selects only GQ>10 samples QUAL>10 || FMT/GQ>10 .. selects all samples at QUAL>10 sites TYPE="snp" && QUAL>=10 && (DP4[2]+DP4[3] > 2) MIN(DP)>35 && AVG(GQ)>50 ID=@file .. selects lines with ID present in the file ID!=@~/file .. skip lines with ID present in the ~/file MAF[0]<0.05 .. select rare variants at 5% cutoff ]]>