Mercurial > repos > blankenberg > naive_variant_caller
diff naive_variant_caller.xml @ 14:5c852eca82e0 draft
planemo upload for repository https://github.com/blankenberg/tools-blankenberg/tree/master/tools/naive_variant_caller commit a1f39a3e28911591f6a1ed58a43e95e0baf5e750
author | blankenberg |
---|---|
date | Wed, 28 Feb 2018 15:54:57 -0500 |
parents | |
children | aff38ea879f1 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/naive_variant_caller.xml Wed Feb 28 15:54:57 2018 -0500 @@ -0,0 +1,232 @@ +<tool id="naive_variant_caller" name="Naive Variant Caller" version="0.0.3"> + <description> - tabulate variable sites from BAM datasets</description> + <requirements> + <requirement type="package" version="0.0.3">nvc</requirement> + </requirements> + <stdio> + <exit_code range="1:" /> + <exit_code range=":-1" /> + </stdio> + <version_command>naive_variant_caller.py --version</version_command> + <command>naive_variant_caller.py + -o "${output_vcf}" + + #for $input_bam in $reference_source.input_bams: + -b '${input_bam.input_bam}' + -i '${input_bam.input_bam.metadata.bam_index}' + #end for + + #if $reference_source.reference_source_selector != "history": + -r '${reference_source.ref_file.fields.path}' + #elif $reference_source.ref_file: + -r '${reference_source.ref_file}' + #end if + + #for $region in $regions: + --region '${region.chromosome}:${region.start}-${region.end}' + #end for + + #for $region_file in $region_files: + --regions_filename '${region_file.input_region}' + --regions_file_columns '${int($region_file.input_region.metadata.chromCol)-1},${int($region_file.input_region.metadata.startCol)-1},${int($region_file.input_region.metadata.endCol)-1}' + #end for + + ${variants_only} + + ${use_strand} + + --ploidy '${$ploidy}' + + --min_support_depth '${min_support_depth}' + + #if str($min_base_quality): + --min_base_quality '${min_base_quality}' + #end if + + #if str($min_mapping_quality): + --min_mapping_quality '${min_mapping_quality}' + #end if + + --allow_out_of_bounds_positions + + #if str( $advanced_options.advanced_options_selector ) == "advanced": + #if str( $advanced_options.coverage_dtype ) != "guess": + --coverage_dtype '${advanced_options.coverage_dtype}' + #end if + ${advanced_options.safe} + #end if + </command> + <inputs> + <conditional name="reference_source"> + <param name="reference_source_selector" type="select" label="Choose the source for the reference list"> + <option value="cached">Locally cached</option> + <option value="history">History</option> + </param> + <when value="cached"> + <repeat name="input_bams" title="BAM file" min="1" > + <param name="input_bam" type="data" format="bam" label="BAM file"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_data_table" table_name="sam_fa_indexes" metadata_name="dbkey" metadata_column="value" message="Sequences are not currently available for the specified build." /> <!-- fixme!!! this needs to be a select --> + </param> + </repeat> + <param name="ref_file" type="select" label="Using reference genome" > + <options from_data_table="sam_fa_indexes"> + <!-- <filter type="data_meta" key="dbkey" ref="input_bam" column="dbkey"/> does not yet work in a repeat...--> + </options> + <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> + </param> + </when> + <when value="history"> <!-- FIX ME!!!! --> + <repeat name="input_bams" title="BAM file" min="1" > + <param name="input_bam" type="data" format="bam" label="BAM file" > + </param> + </repeat> + <param name="ref_file" type="data" format="fasta" label="Using reference file" optional="True" /> + </when> + </conditional> + + <repeat name="regions" title="Restrict to regions" min="0" > + <param name="chromosome" type="text" value="" optional="False" label="Chromosome" /> + <param name="start" type="integer" value="" optional="True" label="Start" help="0-based, closed. (BED style)" /> + <param name="end" type="integer" value="" optional="True" label="End" help="0-based, open. (BED style)" /> + </repeat> + + <repeat name="region_files" title="Restrict to regions by file" min="0" > + <param name="input_region" type="data" format="interval" label="Genomic Regions" /> + </repeat> + + <!-- TODO: enhance filtering --> + <param name="min_support_depth" type="integer" value="0" min="0" label="Minimum number of reads needed to consider a REF/ALT" /> + <param name="min_base_quality" type="integer" value="" label="Minimum base quality" optional="True" /> + <param name="min_mapping_quality" type="integer" value="" label="Minimum mapping quality" optional="True" /> + + <param name="ploidy" type="integer" value="2" min="1" label="Ploidy" /> + <param name="variants_only" type="boolean" truevalue="--variants_only" falsevalue="" checked="False" label="Only write out positions with possible alternate alleles"/> + + <param name="use_strand" type="boolean" truevalue="--use_strand" falsevalue="" checked="False" label="Report counts by strand"/> + + <conditional name="advanced_options"> + <param name="advanced_options_selector" type="select" label="Show Advanced Options"> + <option value="basic" selected="True">Hide Advanced Options</option> + <option value="advanced">Show Advanced Options</option> + </param> + <when value="basic"> + <!-- Do nothing here --> + </when> + <when value="advanced"> + <param name="coverage_dtype" type="select" label="Choose the dtype to use for storing coverage information" help="This affects the maximum recorded value for a position, e.g. uint8 would be 255 coverage, but will require the least amount of RAM"> + <option value="guess" selected="True">Guess</option> + <option value="uint8">uint8</option> + <option value="uint16">uint16</option> + <option value="uint32">uint32</option> + <option value="uint64">uint64</option> + </param> + <param name="safe" type="boolean" truevalue="--safe" falsevalue="" checked="False" label="Be extra safe"/> + </when> + </conditional> + + </inputs> + <outputs> + <data format="vcf" name="output_vcf" /> + </outputs> + <tests> + <test> + <param name="reference_source_selector" value="history" /> + <param name="input_bam" value="fake_phiX174_reads_1.bam" ftype="bam" /> + <param name="ref_file" value="phiX174.fasta" ftype="fasta" /> + <param name="regions" value="0" /> + <param name="min_support_depth" value="0" /> + <param name="min_base_quality" value="" /> + <param name="min_mapping_quality" value="" /> + <param name="ploidy" value="2" /> + <param name="variants_only" value="False" /> + <param name="use_strand" value="False" /> + <param name="advanced_options_selector" value="advanced" /> + <param name="coverage_dtype" value="uint8" /> + <output name="output_vcf" file="fake_phiX174_reads_1_test_out_1.vcf" compare="contains" /> + </test> + </tests> + <help> +**What it does** + +This tool is a naive variant caller that processes aligned sequencing reads from the BAM format and produces a VCF file containing per position variant calls. This tool allows multiple BAM files to be provided as input and utilizes read group information to make calls for individual samples. + +User configurable options allow filtering reads that do not pass mapping or base quality thresholds and minimum per base read depth; user's can also specify the ploidy and whether to consider each strand separately. + +In addition to calling alternate alleles based upon simple ratios of nucleotides at a position, per base nucleotide counts are also provided. A custom tag, NC, is used within the Genotype fields. The NC field is a comma-separated listing of nucleotide counts in the form of <nucleotide>=<count>, where a plus or minus character is prepended to indicate strand, if the strandedness option was specified. + + +------ + +**Inputs** + +Accepts one or more BAM input files and a reference genome from the built-in list or from a FASTA file in your history. + + +**Outputs** + +The output is in VCF format. + +Example VCF output line, without reporting by strand: + ``chrM 16029 . T G,A,C . . AC=15,9,5;AF=0.00155311658729,0.000931869952371,0.000517705529095 GT:AC:AF:NC 0/0:15,9,5:0.00155311658729,0.000931869952371,0.000517705529095:A=9,C=5,T=9629,G=15,`` + +Example VCF output line, when reporting by strand: + ``chrM 16029 . T G,A,C . . AC=15,9,5;AF=0.00155311658729,0.000931869952371,0.000517705529095 GT:AC:AF:NC 0/0:15,9,5:0.00155311658729,0.000931869952371,0.000517705529095:+T=3972,-A=9,-C=5,-T=5657,-G=15,`` + +**Options** + +Reference Genome: + + Ensure that you have selected the correct reference genome, either from the list of built-in genomes or by selecting the corresponding FASTA file from your history. + +Restrict to regions: + + You can specify any number of regions on which you would like to receive results. You can specify just a chromosome name, or a chromosome name and start postion, or a chromosome name and start and end position for the set of desired regions. + +Minimum number of reads needed to consider a REF/ALT: + + This value declares the minimum number of reads containing a particular base at each position in order to list and use said allele in genotyping calls. Default is 0. + +Minimum base quality: + + The minimum base quality score needed for the position in a read to be used for nucleotide counts and genotyping. Default is no filter. + +Minimum mapping quality: + + The minimum mapping quality score needed to consider a read for nucleotide counts and genotyping. Default is no filter. + +Ploidy: + + The number of genotype calls to make at each reported position. + +Only write out positions with possible alternate alleles: + + When set, only positions which have at least one non-reference nucleotide which passes declare filters will be present in the output. + +Report counts by strand: + + When set, nucleotide counts (NC) will be reported in reference to the aligned read's source strand. Reported as: <strand><BASE>=<COUNT>. + +Choose the dtype to use for storing coverage information: + + This controls the maximum depth value for each nucleotide/position/strand (when specified). Smaller values require the least amount of memory, but have smaller maximal limits. + + +--------+----------------------------+ + | name | maximum coverage value | + +========+============================+ + | uint8 | 255 | + +--------+----------------------------+ + | uint16 | 65,535 | + +--------+----------------------------+ + | uint32 | 4,294,967,295 | + +--------+----------------------------+ + | uint64 | 18,446,744,073,709,551,615 | + +--------+----------------------------+ + + + </help> + <citations> + <citation type="doi">10.1186/gb4161</citation> + </citations> + +</tool>