Mercurial > repos > iuc > bcftools_concat
view macros.xml @ 12:874cc12ae316 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bcftools commit 8f3fe2272c80fdb749db49be689681e4d1391bee"
author | iuc |
---|---|
date | Wed, 25 Dec 2019 12:35:36 -0500 |
parents | 81426b3cf18b |
children | d40d4bf05718 |
line wrap: on
line source
<macros> <token name="@TOOL_VERSION@">1.10</token> <xml name="requirements"> <requirements> <requirement type="package" version="@TOOL_VERSION@">bcftools</requirement> <requirement type="package" version="1.10">htslib</requirement> <yield /> </requirements> </xml> <xml name="samtools_requirement"> <requirement type="package" version="1.10">samtools</requirement> </xml> <xml name="matplotlib_requirement"> <requirement type="package" version="3.1.0">matplotlib</requirement> </xml> <xml name="version_command"> <version_command>bcftools 2>&1 | grep 'Version:'</version_command> </xml> <xml name="citations"> <citations> <citation type="doi">10.1093/bioinformatics/btp352</citation> <yield /> </citations> </xml> <token name="@BCFTOOLS_WIKI@">https://github.com/samtools/bcftools/wiki</token> <token name="@BCFTOOLS_MANPAGE@">http://samtools.github.io/bcftools/bcftools.html</token> <token name="@THREADS@"> --threads \${GALAXY_SLOTS:-4} </token> <token name="@PREPARE_ENV@"> <![CDATA[ export BCFTOOLS_PLUGINS=`which bcftools | sed 's,bin/bcftools,libexec/bcftools,'`; ]]> </token> <xml name="macro_input"> <param name="input_file" type="data" format="vcf,vcf_bgzip,bcf" label="VCF/BCF Data" /> </xml> <token name="@PREPARE_INPUT_FILE@"> <![CDATA[ ## May need to symlink input if there is an associated #set $input_vcf = 'input.vcf.gz' #if $input_file.is_of_type('vcf') bgzip -c '$input_file' > $input_vcf && bcftools index $input_vcf && #elif $input_file.is_of_type('vcf_bgzip') ln -s '$input_file' $input_vcf && #if $input_file.metadata.tabix_index: ln -s '${input_file.metadata.tabix_index}' ${input_vcf}.tbi && #else bcftools index $input_vcf && #end if #elif $input_file.is_of_type('bcf') #set $input_vcf = 'input.bcf' ln -s '$input_file' $input_vcf && #if $input_file.metadata.bcf_index: ln -s '${input_file.metadata.bcf_index}' ${input_vcf}.csi && #else bcftools index $input_vcf && #end if #end if ]]> </token> <token name="@INPUT_FILE@"> $input_vcf </token> <xml name="macro_inputs"> <param name="input_files" type="data" format="vcf,vcf_bgzip,bcf" label="Other VCF/BCF Datasets" multiple="True" /> </xml> <token name="@PREPARE_INPUT_FILES@"> <![CDATA[ ## May need to symlink input if there is an associated #set $input_vcfs = [] #set $vcfs_list_file = 'vcfs_list' #for (i, input_file) in enumerate($input_files): #set $input_vcf = 'input' + str($i) + '.vcf.gz' #if $input_file.is_of_type('vcf') bgzip -c '$input_file' > $input_vcf && bcftools index $input_vcf && #elif $input_file.is_of_type('vcf_bgzip') ln -s '$input_file' $input_vcf && #if $input_file.metadata.tabix_index: ln -s '${input_file.metadata.tabix_index}' ${input_vcf}.tbi && #else bcftools index $input_vcf && #end if #elif $input_file.is_of_type('bcf') #set $input_vcf = 'input' + str($i) + '.bcf.gz' ln -s '$input_file' $input_vcf && #if $input_file.metadata.bcf_index: ln -s '${input_file.metadata.bcf_index}' ${input_vcf}.csi && #else bcftools index $input_vcf && #end if #end if echo '$input_vcf' >> $vcfs_list_file && $input_vcfs.append($input_vcf) #end for ]]> </token> <token name="@INPUT_FILES@"> #echo ' '.join($input_vcfs)# </token> <token name="@INPUT_LIST_FILE@"> $vcfs_list_file </token> <xml name="test_using_reference" token_select_from="history" token_ref=""> <conditional name="reference_source"> <param name="reference_source_selector" value="@SELECT_FROM@" /> <param name="fasta_ref" ftype="fasta" value="@REF@" /> </conditional> </xml> <xml name="macro_fasta_ref"> <conditional name="reference_source"> <param name="reference_source_selector" type="select" label="Choose the source for the reference genome"> <option value="cached">Use a built-in genome</option> <option value="history">Use a genome from the history</option> </param> <when value="cached"> <param name="fasta_ref" type="select" label="Reference genome"> <options from_data_table="fasta_indexes"> <filter type="data_meta" column="dbkey" key="dbkey" ref="input_file" /> <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file" /> </options> </param> </when> <when value="history"> <param name="fasta_ref" type="data" format="fasta" label="Reference genome" /> </when> </conditional> </xml> <token name="@PREPARE_FASTA_REF@"> <![CDATA[ #set $input_fa_ref = None #if 'fasta_ref' in $section and $section.fasta_ref: #if 'reference_source_selector' in $section: #if str($section.reference_source_selector) == "history": #set $input_fa_ref = 'ref.fa' ln -s '$section.fasta_ref' $input_fa_ref && samtools faidx $input_fa_ref && #else: #set $input_fa_ref = str($section.fasta_ref.fields.path) #end if #end if #end if ]]> </token> <token name="@FASTA_REF@"> #if $input_fa_ref is not None: --fasta-ref $input_fa_ref #elif 'fasta_ref' in $section and $section.fasta_ref: --fasta-ref '${section.fasta_ref}' #end if </token> <xml name="macro_AF_file"> <param name="AF_file" argument="--AF-file" type="data" format="tabular" optional="true" label="Allele frequencies file" help="Tab-delimited file containing the columns CHR,POS,REF,ALT,AF" /> </xml> <!-- This may need to bgzip and tabix the file --> <token name="@PREPARE_AF_FILE@"> <![CDATA[ #if 'AF_file' in $section and $section.AF_file: #pass #end if ]]> </token> <token name="@AF_FILE@"> #if 'AF_file' in $section and $section.AF_file: --AF-file '${section.AF_file}' #end if </token> <xml name="macro_estimate_AF"> <param name="estimate_AF" argument="--estimate-AF" type="data" format="data" optional="true" label="Estimate allele frequency" help="Calculate AC,AN counts on the fly, using either all samples ("-") or samples listed in <file>" /> </xml> <token name="@ESTIMATE_AF@"> #if 'estimate_AF' in $section and $section.estimate_AF: --estimate-AF "${section.estimate_AF}" #end if </token> <xml name="macro_exons_file"> <param name="exons_file" type="data" format="tabular" optional="true" label="Exons file" help="Tab-delimited file with exons for indel frameshifts (chr,from,to; 1-based, inclusive, bgzip compressed)" /> </xml> <token name="@PREPARE_EXONS_FILE@"> <![CDATA[ #set $exons_path = None #if 'exons_file' in $section and $section.exons_file: #set $exons_path = 'exons_file.tab.gz' bgzip -c "$section.exons_file" > $exons_path && tabix -s 1 -b 2 -e 3 $exons_path && #end if ]]> </token> <token name="@EXONS_FILE@"> #if 'exons_file' in $section and $section.exons_file: --exons $exons_path #end if </token> <xml name="macro_ploidy_file"> <param name="ploidy_file" type="data" format="tabular" optional="true" label="Ploidy file" help="Tab-delimited list of CHROM,FROM,TO,SEX,PLOIDY" /> </xml> <token name="@PLOIDY_FILE@"> #if 'ploidy_file' in $section and $section.ploidy_file: --ploidy "${section.ploidy_file}" #end if </token> <xml name="macro_collapse_opt_none"> <option value="none">none - require the exact same set of alleles in all files</option> </xml> <xml name="macro_collapse_opt_id"> <option value="id">id - only records with identical ID column are compatible. </option> </xml> <xml name="macro_collapse"> <param name="collapse" type="select" optional="true" label="Collapse" help="Controls how to treat records with duplicate positions and defines compatible records across multiple input files"> <option value="snps">snps - allow different alleles, as long as they all are SNPs</option> <option value="indels">indels - allow different alleles, as long as they all are indels</option> <option value="both">both - indels and snps </option> <option value="some">some - at least some of the ALTs must match</option> <option value="any">any - any combination of alleles</option> <yield/> </param> </xml> <token name="@COLLAPSE@"> #if $section.collapse: --collapse ${section.collapse} #end if </token> <xml name="macro_apply_filters"> <param argument="--apply_filters" type="text" value="" optional="true" label="Apply filters" help="Skip sites where FILTER column does not contain any of the strings listed (e.g. "PASS,.")"> <validator type="regex" message="FILTER terms separated by commas">^([^ \t\n\r\f\v,]+(,[^ \t\n\r\f\v,]+)*)?$</validator> </param> </xml> <token name="@APPLY_FILTERS@"> #if $section.apply_filters: --apply-filters '${section.apply_filters}' #end if </token> <xml name="macro_select_output_type"> <param name="output_type" type="select"> <option value="b">compressed BCF</option> <!-- no galaxy datatypes for these <option value="u">uncompressed BCF</option> <option value="z">compressed VCF</option> --> <option value="v">uncompressed VCF</option> </param> </xml> <token name="@OUTPUT_TYPE@"> #if str($output_type) != "__none__": --output-type '${output_type}' #end if </token> <xml name="macro_vcf_output"> <data name="output_file" format="vcf"> <change_format> <when input="output_type" value="b" format="bcf" /> <when input="output_type" value="u" format="bcf" /> <when input="output_type" value="z" format="vcf_bgzip" /> <when input="output_type" value="v" format="vcf" /> </change_format> </data> </xml> <xml name="macro_invert_targets"> <param name="invert_targets_file" type="boolean" truevalue="^" falsevalue="" label="Invert Targets" help="inverts the query/filtering applied by the targets" /> </xml> <xml name="macro_restriction_spec" token_type="region" token_label_type="Region"> <repeat name="@TYPE@s" title="@LABEL_TYPE@ Filter" default="1" min="1"> <param name="chrom" type="text" label="@LABEL_TYPE@ chromosome"> <validator type="expression" message="A chromosome identifier is required when specifying a @LABEL_TYPE@ filter">value.strip()</validator> </param> <param name="start" type="text" label="@LABEL_TYPE@ start position"> <validator type="expression" message="an integer number is required">not value or value.isdigit()</validator> </param> <param name="stop" type="text" label="@LABEL_TYPE@ end position"> <validator type="expression" message="an integer number is required">not value or value.isdigit()</validator> </param> <yield /> </repeat> </xml> <xml name="macro_restrictions_file" token_type="region" token_label_type="Region"> <param name="@TYPE@s_file" type="data" format="tabular" label="@LABEL_TYPE@s File" help="restrict to @LABEL_TYPE@s listed in a file" /> </xml> <xml name="macro_restrict" token_type="region" token_label_type="Region" > <conditional name="@TYPE@s"> <param name="@TYPE@s_src" type="select" label="@LABEL_TYPE@s"> <option value="__none__">Do not restrict to @LABEL_TYPE@s</option> <option value="@TYPE@s">Specify one or more @LABEL_TYPE@(s) directly</option> <option value="@TYPE@s_file">Operate on @LABEL_TYPE@s specified in a history dataset</option> </param> <when value="__none__"/> <when value="@TYPE@s"> <expand macro="macro_restriction_spec" type="@TYPE@" label_type="@LABEL_TYPE@" /> <yield /> </when> <when value="@TYPE@s_file"> <expand macro="macro_restrictions_file" type="@TYPE@" label_type="@LABEL_TYPE@" /> <yield /> </when> </conditional> </xml> <token name="@PARSE_INTERVALS@"> <![CDATA[ #set $components = [] #for $i in $intervals: #set $chrom = str($i.chrom).strip() #set $start = str($i.start).strip() #set $stop = str($i.stop).strip() #if $start or $stop: $components.append($chrom + ':' + ($start or '0') + '-' + $stop) #else: $components.append($chrom) #end if #end for #set $intervals_spec = ','.join($components) ]]> </token> <token name="@REGIONS@"> <![CDATA[ #if $section.regions.regions_src == 'regions': #set $intervals = $section.regions.regions @PARSE_INTERVALS@ --regions '$intervals_spec' #elif $section.regions.regions_src == 'regions_file' and $section.regions.regions_file: #if $regions_path is not None: --regions-file '$regions_path' #else: --regions-file '$section.regions.regions_file' #end if #end if ]]> </token> <token name="@TARGETS@"> <![CDATA[ #if $targets_path: --targets-file "${section.targets.invert_targets_file}${targets_path}" #elif $section.targets.targets_src == 'targets': #set $intervals = $section.targets.targets @PARSE_INTERVALS@ --targets '${section.targets.invert_targets_file}$intervals_spec' #elif $section.targets.targets_src == 'targets_file' and $section.targets.targets_file: --targets-file "${section.targets.invert_targets_file}${section.targets.targets_file}" #end if ]]> </token> <token name="@PREPARE_REGIONS_FILE@"> <![CDATA[ #set $regions_path = None #if 'regions' in $section #if $section.regions.regions_src == 'regions_file' and $section.regions.regions_file: #if $section.regions.regions_file.ext.startswith('bed'): #set $regions_path = 'regions_file.bed' ln -s '$section.regions.regions_file' $regions_path && #end if #end if #end if ]]> </token> <token name="@PREPARE_TARGETS_FILE@"> <![CDATA[ #set $targets_path = None #if 'targets' in $section #if $section.targets.targets_src == 'targets_file': #set $targets_path = 'targets_file.tab.gz' bgzip -c "$section.targets.targets_file" > $targets_path && tabix -s 1 -b 2 -e 2 $targets_path && #end if #elif $tgts_sec.targets_file: #set $targets_path = 'targets_file.tab.gz' bgzip -c "$section.targets_file" > $targets_path && tabix -s 1 -b 2 -e 2 $targets_path && #end if ]]> </token> <token name="@TARGETS_FILE@"> <![CDATA[ #if $targets_path is not None: --targets-file "${section.invert_targets_file}${targets_path}" #elif $section.targets_file: --targets-file "${section.invert_targets_file}${section.targets_file}" #end if ]]> </token> <xml name="macro_samples"> <param argument="--samples" type="text" value="" optional="true" label="Samples" help="Comma separated list of samples to annotate (or exclude)"> <validator type="regex" message="">^(\w+(,\w+)*)?$</validator> </param> <param name="invert_samples" type="boolean" truevalue="^" falsevalue="" checked="false" label="Invert Samples" help="Inverts the query/filtering applied by Samples (adds "^" prefix to exclude)" /> <param argument="--samples_file" type="data" format="tabular" optional="true" label="Samples file" help="File of samples to include" /> <param name="invert_samples_file" type="boolean" truevalue="^" falsevalue="" checked="false" label="Invert Samples file" help="inverts the query/filtering applied by Samples file" /> </xml> <token name="@SAMPLES@"> #set $samples_defined = False #if str($section.samples) != '': #set $samples_defined = True --samples '${section.invert_samples}${section.samples}' #end if #if $section.samples_file: #set $samples_defined = True --samples-file "${section.invert_samples_file}${section.samples_file}" #end if </token> <xml name="macro_sample"> <param name="sample" type="text" optional="true" label="Sample" help="Apply variants of the given sample" /> </xml> <token name="@SAMPLE@"> #if $section.sample: --sample '${section.sample}' #end if </token> <xml name="macro_include"> <param argument="--include" type="text" optional="true" label="Include" help="Select sites for which the expression is true"> <validator type="regex" message="Single quote not allowed">^[^']*$</validator> <sanitizer sanitize="False"/> </param> </xml> <token name="@INCLUDE@"> #if $section.include: --include '${section.include}' #end if </token> <xml name="macro_exclude"> <param argument="--exclude" type="text" optional="true" label="Exclude" help="Exclude sites for which the expression is true"> <validator type="regex" message="Single quote not allowed">^[^']*$</validator> <sanitizer sanitize="False"/> </param> </xml> <token name="@EXCLUDE@"> #if $section.exclude: --exclude '${section.exclude}' #end if </token> <xml name="macro_columns"> <param name="columns" type="text" value="" optional="true" label="Columns" help="List of columns in the annotation file, e.g. CHROM,POS,REF,ALT,-,INFO/TAG. See man page for details"> <validator type="regex" message="COLUMN names separated by commas">^([^,]+(,[^,]+)*)?$</validator> </param> </xml> <token name="@COLUMNS@"> #if $section.columns != '': --columns '${section.columns}' #end if </token> <xml name="macro_haploid2diploid"> <param name="haploid2diploid" type="boolean" truevalue="--haploid2diploid" falsevalue="" label="Haploid2Diploid" help="convert haploid genotypes to diploid homozygotes" /> </xml> <xml name="macro_vcf_ids"> <param name="vcf_ids" type="boolean" truevalue="--vcf-ids" falsevalue="" label="Vcf Ids" help="output VCF IDs instead of CHROM:POS_REF_ALT" /> </xml> <token name="@VCF_IDS@"> ${section.vcf_ids} </token> <token name="@OUTPUT_HELP@"> <![CDATA[ Output Type ----------- Output compressed BCF (b), or uncompressed VCF (v). Use the BCF option when piping between bcftools subcommands to speed up performance by removing unecessary compression/decompression and VCF<->BCF conversion. This Galaxy tool recommends using the compressed BCF format as piping is not implemented, and uncompressed data would use unnecessary amounts of space. ]]></token> <token name="@REGIONS_HELP@"> <![CDATA[ Region Selections ----------------- Regions can be specified in a VCF, BED, or tab-delimited file (the default). The columns of the tab-delimited file are: CHROM, POS, and, optionally, POS_TO, where positions are 1-based and inclusive. Uncompressed files are stored in memory, while bgzip-compressed and tabix-indexed region files are streamed. Note that sequence names must match exactly, "chr20" is not the same as "20". Also note that chromosome ordering in FILE will be respected, the VCF will be processed in the order in which chromosomes first appear in FILE. However, within chromosomes, the VCF will always be processed in ascending genomic coordinate order no matter what order they appear in FILE. Note that overlapping regions in FILE can result in duplicated out of order positions in the output. This option requires indexed VCF/BCF files. ]]></token> <token name="@TARGETS_HELP@"><![CDATA[ Targets ------- Similar to regions, but the next position is accessed by streaming the whole VCF/BCF rather than using the tbi/csi index. Both regions and targets options can be applied simultaneously: regions uses the index to jump to a region and targets discards positions which are not in the targets. Unlike regions, targets can be prefixed with "^" to request logical complement. For example, "^X,Y,MT" indicates that sequences X, Y and MT should be skipped. Yet another difference between the two is that regions checks both start and end positions of indels, whereas targets checks start positions only. For the bcftools call command, with the option -C alleles, third column of the targets file must be comma-separated list of alleles, starting with the reference allele. Note that the file must be compressed and index. Such a file can be easily created from a VCF using:: bcftools query -f'%CHROM\t%POS\t%REF,%ALT\n' file.vcf | bgzip -c > als.tsv.gz && tabix -s1 -b2 -e2 als.tsv.gz ]]> <!-- TODO: galaxy-ify --> </token> <token name="@COLLAPSE_HELP@"> Collapse -------- Controls how to treat records with duplicate positions and defines compatible records across multiple input files. Here by "compatible" we mean records which should be considered as identical by the tools. For example, when performing line intersections, the desire may be to consider as identical all sites with matching positions (bcftools isec -c all), or only sites with matching variant type (bcftools isec -c snps -c indels), or only sites with all alleles identical (bcftools isec -c none). +------------+----------------------------------------------------------------+ | Flag value | Result | +============+================================================================+ | none | only records with identical REF and ALT alleles are compatible | +------------+----------------------------------------------------------------+ | some | only records where some subset of ALT alleles match are | | | compatible | +------------+----------------------------------------------------------------+ | all | all records are compatible, regardless of whether the ALT | | | alleles match or not. In the case of records with the same | | | position, only the first wil lbe considered and appear on | | | output. | +------------+----------------------------------------------------------------+ | snps | any SNP records are compatible, regardless of whether the ALT | | | alleles match or not. For duplicate positions, only the first | | | SNP record will be considered and appear on output. | +------------+----------------------------------------------------------------+ | indels | all indel records are compatible, regardless of whether the | | | REF and ALT alleles match or not. For duplicate positions, | | | only the first indel record will be considered and appear on | | | output. | +------------+----------------------------------------------------------------+ | both | abbreviation of "-c indels -c snps" | +------------+----------------------------------------------------------------+ | id | only records with identical ID column are compatible. | | | Supportedby bcftools merge only. | +------------+----------------------------------------------------------------+ </token> <token name="@EXPRESSIONS_HELP@"> <![CDATA[ Expressions ----------- Valid expressions may contain: - numerical constants, string constants :: 1, 1.0, 1e-4 "String" - arithmetic operators :: +,*,-,/ - comparison operators :: == (same as =), >, >=, <=, <, != - regex operators "~" and its negation "!~" :: INFO/HAYSTACK ~ "needle" - parentheses :: (, ) - logical operators :: && (same as &), ||, | - INFO tags, FORMAT tags, column names :: INFO/DP or DP FORMAT/DV, FMT/DV, or DV FILTER, QUAL, ID, REF, ALT[0] - 1 (or 0) to test the presence (or absence) of a flag :: FlagA=1 && FlagB=0 - "." to test missing values :: DP=".", DP!=".", ALT="." - missing genotypes can be matched regardless of phase and ploidy (".|.", "./.", ".") using this expression :: GT="." - TYPE for variant type in REF,ALT columns (indel,snp,mnp,ref,other) :: TYPE="indel" | TYPE="snp" - array subscripts, "*" for any field :: (DP4[0]+DP4[1])/(DP4[2]+DP4[3]) > 0.3 DP4[*] == 0 CSQ[*] ~ "missense_variant.*deleterious" - function on FORMAT tags (over samples) and INFO tags (over vector fields) :: MAX, MIN, AVG, SUM, STRLEN, ABS - variables calculated on the fly if not present: number of alternate alleles; number of samples; count of alternate alleles; minor allele count (similar to AC but is always smaller than 0.5); frequency of alternate alleles (AF=AC/AN); frequency of minor alleles (MAF=MAC/AN); number of alleles in called genotypes :: N_ALT, N_SAMPLES, AC, MAC, AF, MAF, AN **Notes:** - String comparisons and regular expressions are case-insensitive - If the subscript "*" is used in regular expression search, the whole field is treated as one string. For example, the regex ``STR[*]~"B,C"`` will be true for the string vector INFO/STR=AB,CD. - Variables and function names are case-insensitive, but not tag names. For example, "qual" can be used instead of "QUAL", "strlen()" instead of "STRLEN()" , but not "dp" instead of "DP". **Examples:** :: MIN(DV)>5 MIN(DV/DP)>0.3 MIN(DP)>10 & MIN(DV)>3 FMT/DP>10 & FMT/GQ>10 .. both conditions must be satisfied within one sample FMT/DP>10 && FMT/GQ>10 .. the conditions can be satisfied in different samples QUAL>10 | FMT/GQ>10 .. selects only GQ>10 samples QUAL>10 || FMT/GQ>10 .. selects all samples at QUAL>10 sites TYPE="snp" && QUAL>=10 && (DP4[2]+DP4[3] > 2) MIN(DP)>35 && AVG(GQ)>50 ID=@file .. selects lines with ID present in the file ID!=@~/file .. skip lines with ID present in the ~/file MAF[0]<0.05 .. select rare variants at 5% cutoff ]]></token> </macros>