Mercurial > repos > wolma > mimodd_core
comparison snp_caller_caller.xml @ 0:aa82b2e54055 draft
planemo upload for repository https://github.com/wm75/mimodd_galaxy_wrappers commit b36048cd608ede0ec6f6559648525c9350caae34-dirty
author | wolma |
---|---|
date | Sat, 11 Nov 2017 18:19:22 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:aa82b2e54055 |
---|---|
1 <tool id="mimodd_varcall" name="MiModD Variant Calling" | |
2 version="@MIMODD_WRAPPER_VERSION@"> | |
3 <description> | |
4 generates a BCF file of position-specific variant likelihoods and coverage information based on a reference sequence and reads aligned against it | |
5 </description> | |
6 <macros> | |
7 <import>macros.xml</import> | |
8 <macro name="test_mentions_samples"> | |
9 <assert_stdout> | |
10 <has_text_matching expression="000.+N2" /> | |
11 <has_text_matching expression="266-1.+ot266" /> | |
12 </assert_stdout> | |
13 </macro> | |
14 </macros> | |
15 <expand macro="requirements" /> | |
16 <expand macro="stdio" /> | |
17 <expand macro="version_command" /> | |
18 <command><![CDATA[ | |
19 mimodd varcall | |
20 #if str($reference.source) == "cached": | |
21 '$reference.genome.fields.path' | |
22 #else: | |
23 '$reference.genome' | |
24 #end if | |
25 #for $input_file in $list_input | |
26 '$input_file' | |
27 #end for | |
28 --index-files | |
29 #for $input_file in $list_input | |
30 '${input_file.metadata.bam_index}' | |
31 #end for | |
32 --ofile '$ofile' | |
33 $group_by_id | |
34 $adv_settings.md5_check | |
35 --max-depth $adv_settings.max_depth | |
36 --verbose | |
37 --quiet | |
38 ]]></command> | |
39 | |
40 <inputs> | |
41 <conditional name="reference"> | |
42 <param name="source" type="select" | |
43 label="Will you select a reference genome from your history or use a built-in genome?"> | |
44 <option value="cached">Use a built-in genome</option> | |
45 <option value="history">Use a genome from my history</option> | |
46 </param> | |
47 <when value="cached"> | |
48 <param name="genome" type="select" | |
49 label="reference genome" | |
50 help="The fasta reference genome that variants should be called against."> | |
51 <options from_data_table="all_fasta" /> | |
52 </param> | |
53 </when> | |
54 <when value="history"> | |
55 <param name="genome" type="data" format="fasta" | |
56 label="reference genome" | |
57 help="The fasta reference genome that variants should be called against."/> | |
58 </when> | |
59 </conditional> | |
60 <param name="list_input" type="data" multiple="true" format="bam" | |
61 label="Aligned reads input dataset(s)" | |
62 help="Select at least one dataset to call variants on. If you select several datasets or a dataset collection, this tool will perform joint variant calling on all of them and produce a single, possibly multisample, output dataset." /> | |
63 <param name="group_by_id" type="boolean" truevalue="-i" falsevalue="" checked="false" | |
64 label="group reads based on read group id only" | |
65 help="If selected, this option ensures that only the read group id (but not the sample name) is considered in grouping reads in the input file(s). If turned off, read groups with identical sample names are automatically pooled and analyzed together even if they come from different NGS runs." /> | |
66 <section name="adv_settings" title="More options" expanded="False"> | |
67 <param name="md5_check" type="boolean" truevalue="" falsevalue="-x" checked="true" | |
68 label="md5 sum verification of contigs/chromosomes" | |
69 help="leave turned on to avoid accidental variant calling against a wrong reference genome version (see the tool help below)." /> | |
70 <param name="max_depth" type="integer" value="250" min="0" | |
71 label="average sample depth cap limit (default: 250)" | |
72 help="only relevant for very large sample numbers and/or very high sample coverage; increase to use more of the data, decrease to save memory"/> | |
73 </section> | |
74 </inputs> | |
75 | |
76 <outputs> | |
77 <data name="ofile" format="bcf" | |
78 label="Variant Calls from MiModd Variant Calling on ${on_string}"> | |
79 <actions> | |
80 <conditional name="reference.source"> | |
81 <when value="cached"> | |
82 <action type="metadata" name="dbkey"> | |
83 <option type="from_data_table" name="all_fasta" column="1" offset="0"> | |
84 <filter type="param_value" ref="reference.genome" column="0" /> | |
85 </option> | |
86 </action> | |
87 </when> | |
88 </conditional> | |
89 </actions> | |
90 </data> | |
91 </outputs> | |
92 | |
93 <tests> | |
94 <test> | |
95 <conditional name="reference"> | |
96 <param name="source" value="history" /> | |
97 <param name="genome" value="a.fa" /> | |
98 </conditional> | |
99 <param name="list_input" value="a.bam" /> | |
100 <expand macro="test_mentions_samples" /> | |
101 </test> | |
102 <test> | |
103 <conditional name="reference"> | |
104 <param name="source" value="history" /> | |
105 <param name="genome" value="a.fa" /> | |
106 </conditional> | |
107 <param name="list_input" value="a_part1.bam,a_part2.bam" /> | |
108 <expand macro="test_mentions_samples" /> | |
109 </test> | |
110 <test> | |
111 <conditional name="reference"> | |
112 <param name="source" value="history" /> | |
113 <param name="genome" value="a.fa" /> | |
114 </conditional> | |
115 <param name="list_input" value="a.bam" /> | |
116 <param name="group_by_id" value="true" /> | |
117 <section name="adv_settings"> | |
118 <param name="md5_check" value="false" /> | |
119 <param name="max_depth" value="1000" /> | |
120 </section> | |
121 <assert_command> | |
122 <has_text text="-i" /> | |
123 <has_text text="-x" /> | |
124 <has_text text="--max-depth 1000" /> | |
125 </assert_command> | |
126 </test> | |
127 </tests> | |
128 <help><![CDATA[ | |
129 .. class:: infomark | |
130 | |
131 **What it does** | |
132 | |
133 The tool transforms the read-centered information in the aligned reads input | |
134 datasets into position-centered information including variant call statistics | |
135 (using samtools mpileup and bcftools internally). | |
136 | |
137 **It produces a BCF file that serves as the basis for all further variant | |
138 analyses with MiModD**. | |
139 | |
140 ----- | |
141 | |
142 **Notes on Advanced Settings:** | |
143 | |
144 **MD5 checksums** | |
145 | |
146 By default, the tool will check whether the input BAM dataset(s) provide(s) MD5 | |
147 checksums for the reference genome contig/chromosome sequences used during read | |
148 alignment (e.g., the *MiModD Read Alignment* tool stores these in the BAM file | |
149 header). If it finds MD5 sums for all sequences, it will compare them to the | |
150 checksums of the reference genome sequences used in the current tool run and | |
151 abort with an error message if there is a discrepancy between them. If it finds | |
152 contigs/chromosomes with matching checksum, but different names in the aligned | |
153 reads dataset(s) and the reference genome dataset, it will use the name from | |
154 the reference genome in its output. | |
155 | |
156 This behavior has two benefits: | |
157 | |
158 1) It protects from accidental variant calling against a wrong reference genome | |
159 (*i.e.*, a different one than that used during the alignment step), which would | |
160 result in wrong calls. This is the primary reason why we recommend to leave the | |
161 check activated. | |
162 | |
163 2) It provides an opportunity to change sequence names between aligned reads | |
164 files and variant call files by providing a reference genome file with altered | |
165 sequence names (but identical sequence data). | |
166 | |
167 Since there may be rare cases where you *really* want to align against a | |
168 reference genome with different checksums (e.g., you may have edited the | |
169 reference sequence based on the alignment results), the check can be turned | |
170 off, but only do this if you know *exactly* why. | |
171 | |
172 | |
173 **Average sample depth cap limit** | |
174 | |
175 For each of a total of ``M`` BAM input datasets, the tool will only pile up a | |
176 maximum number of reads ``N`` per position to avoid excessive memory usage with | |
177 very large numbers of samples sequenced at high coverage. | |
178 N will be calculated as the maximum of ``8000/M`` and ``DEPTH*S``, where ``S`` | |
179 is the maximum number of samples found in a single input dataset and ``DEPTH`` | |
180 is the *average sample depth cap limit* specified in the tool form. | |
181 | |
182 This parameter, thus sets the average depth of the pile-up per sample that is | |
183 guaranteed to be used even when there is a very large number of samples. As can | |
184 be seen from the formula above, however, it will rarely become relevant for any | |
185 regular-size analysis. | |
186 | |
187 | |
188 @HELP_FOOTER@ | |
189 ]]></help> | |
190 <expand macro="citations" /> | |
191 </tool> |