Mercurial > repos > blankenberg > naive_variant_caller
comparison naive_variant_caller.xml @ 14:5c852eca82e0 draft
planemo upload for repository https://github.com/blankenberg/tools-blankenberg/tree/master/tools/naive_variant_caller commit a1f39a3e28911591f6a1ed58a43e95e0baf5e750
author | blankenberg |
---|---|
date | Wed, 28 Feb 2018 15:54:57 -0500 |
parents | |
children | aff38ea879f1 |
comparison
equal
deleted
inserted
replaced
13:cfc86c3fc5c8 | 14:5c852eca82e0 |
---|---|
1 <tool id="naive_variant_caller" name="Naive Variant Caller" version="0.0.3"> | |
2 <description> - tabulate variable sites from BAM datasets</description> | |
3 <requirements> | |
4 <requirement type="package" version="0.0.3">nvc</requirement> | |
5 </requirements> | |
6 <stdio> | |
7 <exit_code range="1:" /> | |
8 <exit_code range=":-1" /> | |
9 </stdio> | |
10 <version_command>naive_variant_caller.py --version</version_command> | |
11 <command>naive_variant_caller.py | |
12 -o "${output_vcf}" | |
13 | |
14 #for $input_bam in $reference_source.input_bams: | |
15 -b '${input_bam.input_bam}' | |
16 -i '${input_bam.input_bam.metadata.bam_index}' | |
17 #end for | |
18 | |
19 #if $reference_source.reference_source_selector != "history": | |
20 -r '${reference_source.ref_file.fields.path}' | |
21 #elif $reference_source.ref_file: | |
22 -r '${reference_source.ref_file}' | |
23 #end if | |
24 | |
25 #for $region in $regions: | |
26 --region '${region.chromosome}:${region.start}-${region.end}' | |
27 #end for | |
28 | |
29 #for $region_file in $region_files: | |
30 --regions_filename '${region_file.input_region}' | |
31 --regions_file_columns '${int($region_file.input_region.metadata.chromCol)-1},${int($region_file.input_region.metadata.startCol)-1},${int($region_file.input_region.metadata.endCol)-1}' | |
32 #end for | |
33 | |
34 ${variants_only} | |
35 | |
36 ${use_strand} | |
37 | |
38 --ploidy '${$ploidy}' | |
39 | |
40 --min_support_depth '${min_support_depth}' | |
41 | |
42 #if str($min_base_quality): | |
43 --min_base_quality '${min_base_quality}' | |
44 #end if | |
45 | |
46 #if str($min_mapping_quality): | |
47 --min_mapping_quality '${min_mapping_quality}' | |
48 #end if | |
49 | |
50 --allow_out_of_bounds_positions | |
51 | |
52 #if str( $advanced_options.advanced_options_selector ) == "advanced": | |
53 #if str( $advanced_options.coverage_dtype ) != "guess": | |
54 --coverage_dtype '${advanced_options.coverage_dtype}' | |
55 #end if | |
56 ${advanced_options.safe} | |
57 #end if | |
58 </command> | |
59 <inputs> | |
60 <conditional name="reference_source"> | |
61 <param name="reference_source_selector" type="select" label="Choose the source for the reference list"> | |
62 <option value="cached">Locally cached</option> | |
63 <option value="history">History</option> | |
64 </param> | |
65 <when value="cached"> | |
66 <repeat name="input_bams" title="BAM file" min="1" > | |
67 <param name="input_bam" type="data" format="bam" label="BAM file"> | |
68 <validator type="unspecified_build" /> | |
69 <validator type="dataset_metadata_in_data_table" table_name="sam_fa_indexes" metadata_name="dbkey" metadata_column="value" message="Sequences are not currently available for the specified build." /> <!-- fixme!!! this needs to be a select --> | |
70 </param> | |
71 </repeat> | |
72 <param name="ref_file" type="select" label="Using reference genome" > | |
73 <options from_data_table="sam_fa_indexes"> | |
74 <!-- <filter type="data_meta" key="dbkey" ref="input_bam" column="dbkey"/> does not yet work in a repeat...--> | |
75 </options> | |
76 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> | |
77 </param> | |
78 </when> | |
79 <when value="history"> <!-- FIX ME!!!! --> | |
80 <repeat name="input_bams" title="BAM file" min="1" > | |
81 <param name="input_bam" type="data" format="bam" label="BAM file" > | |
82 </param> | |
83 </repeat> | |
84 <param name="ref_file" type="data" format="fasta" label="Using reference file" optional="True" /> | |
85 </when> | |
86 </conditional> | |
87 | |
88 <repeat name="regions" title="Restrict to regions" min="0" > | |
89 <param name="chromosome" type="text" value="" optional="False" label="Chromosome" /> | |
90 <param name="start" type="integer" value="" optional="True" label="Start" help="0-based, closed. (BED style)" /> | |
91 <param name="end" type="integer" value="" optional="True" label="End" help="0-based, open. (BED style)" /> | |
92 </repeat> | |
93 | |
94 <repeat name="region_files" title="Restrict to regions by file" min="0" > | |
95 <param name="input_region" type="data" format="interval" label="Genomic Regions" /> | |
96 </repeat> | |
97 | |
98 <!-- TODO: enhance filtering --> | |
99 <param name="min_support_depth" type="integer" value="0" min="0" label="Minimum number of reads needed to consider a REF/ALT" /> | |
100 <param name="min_base_quality" type="integer" value="" label="Minimum base quality" optional="True" /> | |
101 <param name="min_mapping_quality" type="integer" value="" label="Minimum mapping quality" optional="True" /> | |
102 | |
103 <param name="ploidy" type="integer" value="2" min="1" label="Ploidy" /> | |
104 <param name="variants_only" type="boolean" truevalue="--variants_only" falsevalue="" checked="False" label="Only write out positions with possible alternate alleles"/> | |
105 | |
106 <param name="use_strand" type="boolean" truevalue="--use_strand" falsevalue="" checked="False" label="Report counts by strand"/> | |
107 | |
108 <conditional name="advanced_options"> | |
109 <param name="advanced_options_selector" type="select" label="Show Advanced Options"> | |
110 <option value="basic" selected="True">Hide Advanced Options</option> | |
111 <option value="advanced">Show Advanced Options</option> | |
112 </param> | |
113 <when value="basic"> | |
114 <!-- Do nothing here --> | |
115 </when> | |
116 <when value="advanced"> | |
117 <param name="coverage_dtype" type="select" label="Choose the dtype to use for storing coverage information" help="This affects the maximum recorded value for a position, e.g. uint8 would be 255 coverage, but will require the least amount of RAM"> | |
118 <option value="guess" selected="True">Guess</option> | |
119 <option value="uint8">uint8</option> | |
120 <option value="uint16">uint16</option> | |
121 <option value="uint32">uint32</option> | |
122 <option value="uint64">uint64</option> | |
123 </param> | |
124 <param name="safe" type="boolean" truevalue="--safe" falsevalue="" checked="False" label="Be extra safe"/> | |
125 </when> | |
126 </conditional> | |
127 | |
128 </inputs> | |
129 <outputs> | |
130 <data format="vcf" name="output_vcf" /> | |
131 </outputs> | |
132 <tests> | |
133 <test> | |
134 <param name="reference_source_selector" value="history" /> | |
135 <param name="input_bam" value="fake_phiX174_reads_1.bam" ftype="bam" /> | |
136 <param name="ref_file" value="phiX174.fasta" ftype="fasta" /> | |
137 <param name="regions" value="0" /> | |
138 <param name="min_support_depth" value="0" /> | |
139 <param name="min_base_quality" value="" /> | |
140 <param name="min_mapping_quality" value="" /> | |
141 <param name="ploidy" value="2" /> | |
142 <param name="variants_only" value="False" /> | |
143 <param name="use_strand" value="False" /> | |
144 <param name="advanced_options_selector" value="advanced" /> | |
145 <param name="coverage_dtype" value="uint8" /> | |
146 <output name="output_vcf" file="fake_phiX174_reads_1_test_out_1.vcf" compare="contains" /> | |
147 </test> | |
148 </tests> | |
149 <help> | |
150 **What it does** | |
151 | |
152 This tool is a naive variant caller that processes aligned sequencing reads from the BAM format and produces a VCF file containing per position variant calls. This tool allows multiple BAM files to be provided as input and utilizes read group information to make calls for individual samples. | |
153 | |
154 User configurable options allow filtering reads that do not pass mapping or base quality thresholds and minimum per base read depth; user's can also specify the ploidy and whether to consider each strand separately. | |
155 | |
156 In addition to calling alternate alleles based upon simple ratios of nucleotides at a position, per base nucleotide counts are also provided. A custom tag, NC, is used within the Genotype fields. The NC field is a comma-separated listing of nucleotide counts in the form of <nucleotide>=<count>, where a plus or minus character is prepended to indicate strand, if the strandedness option was specified. | |
157 | |
158 | |
159 ------ | |
160 | |
161 **Inputs** | |
162 | |
163 Accepts one or more BAM input files and a reference genome from the built-in list or from a FASTA file in your history. | |
164 | |
165 | |
166 **Outputs** | |
167 | |
168 The output is in VCF format. | |
169 | |
170 Example VCF output line, without reporting by strand: | |
171 ``chrM 16029 . T G,A,C . . AC=15,9,5;AF=0.00155311658729,0.000931869952371,0.000517705529095 GT:AC:AF:NC 0/0:15,9,5:0.00155311658729,0.000931869952371,0.000517705529095:A=9,C=5,T=9629,G=15,`` | |
172 | |
173 Example VCF output line, when reporting by strand: | |
174 ``chrM 16029 . T G,A,C . . AC=15,9,5;AF=0.00155311658729,0.000931869952371,0.000517705529095 GT:AC:AF:NC 0/0:15,9,5:0.00155311658729,0.000931869952371,0.000517705529095:+T=3972,-A=9,-C=5,-T=5657,-G=15,`` | |
175 | |
176 **Options** | |
177 | |
178 Reference Genome: | |
179 | |
180 Ensure that you have selected the correct reference genome, either from the list of built-in genomes or by selecting the corresponding FASTA file from your history. | |
181 | |
182 Restrict to regions: | |
183 | |
184 You can specify any number of regions on which you would like to receive results. You can specify just a chromosome name, or a chromosome name and start postion, or a chromosome name and start and end position for the set of desired regions. | |
185 | |
186 Minimum number of reads needed to consider a REF/ALT: | |
187 | |
188 This value declares the minimum number of reads containing a particular base at each position in order to list and use said allele in genotyping calls. Default is 0. | |
189 | |
190 Minimum base quality: | |
191 | |
192 The minimum base quality score needed for the position in a read to be used for nucleotide counts and genotyping. Default is no filter. | |
193 | |
194 Minimum mapping quality: | |
195 | |
196 The minimum mapping quality score needed to consider a read for nucleotide counts and genotyping. Default is no filter. | |
197 | |
198 Ploidy: | |
199 | |
200 The number of genotype calls to make at each reported position. | |
201 | |
202 Only write out positions with possible alternate alleles: | |
203 | |
204 When set, only positions which have at least one non-reference nucleotide which passes declare filters will be present in the output. | |
205 | |
206 Report counts by strand: | |
207 | |
208 When set, nucleotide counts (NC) will be reported in reference to the aligned read's source strand. Reported as: <strand><BASE>=<COUNT>. | |
209 | |
210 Choose the dtype to use for storing coverage information: | |
211 | |
212 This controls the maximum depth value for each nucleotide/position/strand (when specified). Smaller values require the least amount of memory, but have smaller maximal limits. | |
213 | |
214 +--------+----------------------------+ | |
215 | name | maximum coverage value | | |
216 +========+============================+ | |
217 | uint8 | 255 | | |
218 +--------+----------------------------+ | |
219 | uint16 | 65,535 | | |
220 +--------+----------------------------+ | |
221 | uint32 | 4,294,967,295 | | |
222 +--------+----------------------------+ | |
223 | uint64 | 18,446,744,073,709,551,615 | | |
224 +--------+----------------------------+ | |
225 | |
226 | |
227 </help> | |
228 <citations> | |
229 <citation type="doi">10.1186/gb4161</citation> | |
230 </citations> | |
231 | |
232 </tool> |