comparison snp_caller_caller.xml @ 0:aa82b2e54055 draft

planemo upload for repository https://github.com/wm75/mimodd_galaxy_wrappers commit b36048cd608ede0ec6f6559648525c9350caae34-dirty
author wolma
date Sat, 11 Nov 2017 18:19:22 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:aa82b2e54055
1 <tool id="mimodd_varcall" name="MiModD Variant Calling"
2 version="@MIMODD_WRAPPER_VERSION@">
3 <description>
4 generates a BCF file of position-specific variant likelihoods and coverage information based on a reference sequence and reads aligned against it
5 </description>
6 <macros>
7 <import>macros.xml</import>
8 <macro name="test_mentions_samples">
9 <assert_stdout>
10 <has_text_matching expression="000.+N2" />
11 <has_text_matching expression="266-1.+ot266" />
12 </assert_stdout>
13 </macro>
14 </macros>
15 <expand macro="requirements" />
16 <expand macro="stdio" />
17 <expand macro="version_command" />
18 <command><![CDATA[
19 mimodd varcall
20 #if str($reference.source) == "cached":
21 '$reference.genome.fields.path'
22 #else:
23 '$reference.genome'
24 #end if
25 #for $input_file in $list_input
26 '$input_file'
27 #end for
28 --index-files
29 #for $input_file in $list_input
30 '${input_file.metadata.bam_index}'
31 #end for
32 --ofile '$ofile'
33 $group_by_id
34 $adv_settings.md5_check
35 --max-depth $adv_settings.max_depth
36 --verbose
37 --quiet
38 ]]></command>
39
40 <inputs>
41 <conditional name="reference">
42 <param name="source" type="select"
43 label="Will you select a reference genome from your history or use a built-in genome?">
44 <option value="cached">Use a built-in genome</option>
45 <option value="history">Use a genome from my history</option>
46 </param>
47 <when value="cached">
48 <param name="genome" type="select"
49 label="reference genome"
50 help="The fasta reference genome that variants should be called against.">
51 <options from_data_table="all_fasta" />
52 </param>
53 </when>
54 <when value="history">
55 <param name="genome" type="data" format="fasta"
56 label="reference genome"
57 help="The fasta reference genome that variants should be called against."/>
58 </when>
59 </conditional>
60 <param name="list_input" type="data" multiple="true" format="bam"
61 label="Aligned reads input dataset(s)"
62 help="Select at least one dataset to call variants on. If you select several datasets or a dataset collection, this tool will perform joint variant calling on all of them and produce a single, possibly multisample, output dataset." />
63 <param name="group_by_id" type="boolean" truevalue="-i" falsevalue="" checked="false"
64 label="group reads based on read group id only"
65 help="If selected, this option ensures that only the read group id (but not the sample name) is considered in grouping reads in the input file(s). If turned off, read groups with identical sample names are automatically pooled and analyzed together even if they come from different NGS runs." />
66 <section name="adv_settings" title="More options" expanded="False">
67 <param name="md5_check" type="boolean" truevalue="" falsevalue="-x" checked="true"
68 label="md5 sum verification of contigs/chromosomes"
69 help="leave turned on to avoid accidental variant calling against a wrong reference genome version (see the tool help below)." />
70 <param name="max_depth" type="integer" value="250" min="0"
71 label="average sample depth cap limit (default: 250)"
72 help="only relevant for very large sample numbers and/or very high sample coverage; increase to use more of the data, decrease to save memory"/>
73 </section>
74 </inputs>
75
76 <outputs>
77 <data name="ofile" format="bcf"
78 label="Variant Calls from MiModd Variant Calling on ${on_string}">
79 <actions>
80 <conditional name="reference.source">
81 <when value="cached">
82 <action type="metadata" name="dbkey">
83 <option type="from_data_table" name="all_fasta" column="1" offset="0">
84 <filter type="param_value" ref="reference.genome" column="0" />
85 </option>
86 </action>
87 </when>
88 </conditional>
89 </actions>
90 </data>
91 </outputs>
92
93 <tests>
94 <test>
95 <conditional name="reference">
96 <param name="source" value="history" />
97 <param name="genome" value="a.fa" />
98 </conditional>
99 <param name="list_input" value="a.bam" />
100 <expand macro="test_mentions_samples" />
101 </test>
102 <test>
103 <conditional name="reference">
104 <param name="source" value="history" />
105 <param name="genome" value="a.fa" />
106 </conditional>
107 <param name="list_input" value="a_part1.bam,a_part2.bam" />
108 <expand macro="test_mentions_samples" />
109 </test>
110 <test>
111 <conditional name="reference">
112 <param name="source" value="history" />
113 <param name="genome" value="a.fa" />
114 </conditional>
115 <param name="list_input" value="a.bam" />
116 <param name="group_by_id" value="true" />
117 <section name="adv_settings">
118 <param name="md5_check" value="false" />
119 <param name="max_depth" value="1000" />
120 </section>
121 <assert_command>
122 <has_text text="-i" />
123 <has_text text="-x" />
124 <has_text text="--max-depth 1000" />
125 </assert_command>
126 </test>
127 </tests>
128 <help><![CDATA[
129 .. class:: infomark
130
131 **What it does**
132
133 The tool transforms the read-centered information in the aligned reads input
134 datasets into position-centered information including variant call statistics
135 (using samtools mpileup and bcftools internally).
136
137 **It produces a BCF file that serves as the basis for all further variant
138 analyses with MiModD**.
139
140 -----
141
142 **Notes on Advanced Settings:**
143
144 **MD5 checksums**
145
146 By default, the tool will check whether the input BAM dataset(s) provide(s) MD5
147 checksums for the reference genome contig/chromosome sequences used during read
148 alignment (e.g., the *MiModD Read Alignment* tool stores these in the BAM file
149 header). If it finds MD5 sums for all sequences, it will compare them to the
150 checksums of the reference genome sequences used in the current tool run and
151 abort with an error message if there is a discrepancy between them. If it finds
152 contigs/chromosomes with matching checksum, but different names in the aligned
153 reads dataset(s) and the reference genome dataset, it will use the name from
154 the reference genome in its output.
155
156 This behavior has two benefits:
157
158 1) It protects from accidental variant calling against a wrong reference genome
159 (*i.e.*, a different one than that used during the alignment step), which would
160 result in wrong calls. This is the primary reason why we recommend to leave the
161 check activated.
162
163 2) It provides an opportunity to change sequence names between aligned reads
164 files and variant call files by providing a reference genome file with altered
165 sequence names (but identical sequence data).
166
167 Since there may be rare cases where you *really* want to align against a
168 reference genome with different checksums (e.g., you may have edited the
169 reference sequence based on the alignment results), the check can be turned
170 off, but only do this if you know *exactly* why.
171
172
173 **Average sample depth cap limit**
174
175 For each of a total of ``M`` BAM input datasets, the tool will only pile up a
176 maximum number of reads ``N`` per position to avoid excessive memory usage with
177 very large numbers of samples sequenced at high coverage.
178 N will be calculated as the maximum of ``8000/M`` and ``DEPTH*S``, where ``S``
179 is the maximum number of samples found in a single input dataset and ``DEPTH``
180 is the *average sample depth cap limit* specified in the tool form.
181
182 This parameter, thus sets the average depth of the pile-up per sample that is
183 guaranteed to be used even when there is a very large number of samples. As can
184 be seen from the formula above, however, it will rarely become relevant for any
185 regular-size analysis.
186
187
188 @HELP_FOOTER@
189 ]]></help>
190 <expand macro="citations" />
191 </tool>