comparison stacks_genotypes.xml @ 0:26e7ae6adec0 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks commit f3a59c91c231cc1582479109e776d05602b7f24d-dirty
author iuc
date Tue, 14 Jun 2016 14:04:40 -0400
parents
children aa333a0a7d5a
comparison
equal deleted inserted replaced
-1:000000000000 0:26e7ae6adec0
1 <tool id="stacks_genotypes" name="Stacks: genotypes" version="@WRAPPER_VERSION@.0">
2 <description>analyse haplotypes or genotypes in a genetic cross ('genotypes' program)</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements"/>
7 <expand macro="stdio"/>
8 <command><![CDATA[
9
10 mkdir stacks_outputs
11
12 &&
13
14 #for $input_file in $input_col:
15 #set $ext = ""
16 #if not str($input_file.name).endswith('.tsv'):
17 #set $ext = ".tsv"
18 #end if
19 ln -s "${input_file}" "stacks_outputs/${input_file.name}${ext}" &&
20 #end for
21
22 genotypes
23
24 -P stacks_outputs
25 -b $advanced_options.batchid
26
27 -t $options_usage.cross_type
28 -o $options_usage.map_out.map_out_type
29
30 #if str( $options_usage.map_out.map_out_type ) == "genomic":
31 -e ${options_usage.map_out.enzyme}
32 #end if
33
34 #if str($advanced_options.minprogeny):
35 -r $advanced_options.minprogeny
36 #end if
37
38 #if str($advanced_options.mindepth):
39 -m $advanced_options.mindepth
40 #end if
41
42 #if str($advanced_options.lnl):
43 --lnl_lim $advanced_options.lnl
44 #end if
45
46 #if $advanced_options.blacklist:
47 -B "$advanced_options.blacklist"
48 #end if
49 #if $advanced_options.whitelist:
50 -W "$advanced_options.whitelist"
51 #end if
52
53 #if $advanced_options.manual_cor:
54 --corr_path "$advanced_options.manual_cor"
55 #end if
56
57 #if $options_autocorr.corrections:
58 -c
59 --min_hom_seqs $options_autocorr.hom
60 --min_het_seqs $options_autocorr.het
61 --max_het_seqs $options_autocorr.hetmax
62 #end if
63
64 ## output SQL file (as denovo/refmap)
65 -s
66
67 @NORM_GENOTYPES_OUTPUT_FULL@
68 ]]></command>
69 <inputs>
70 <param name="input_col" format="tabular,txt" type="data_collection" collection_type="list" label="Output from previous Stacks pipeline steps (e.g. denovo_map or refmap)" />
71
72 <section name="options_usage" title="Genotyping options">
73
74 <param name="cross_type" argument="-t" type="select" label="Cross type">
75 <expand macro="cross_types"/>
76 </param>
77
78 <conditional name="map_out">
79 <param argument="-o" name="map_out_type" type="select" label="Output file type" help="Output map file type to write" >
80 <option value="joinmap">JoinMap</option>
81 <option value="onemap">OneMap</option>
82 <option value="rqtl">R/QTL</option>
83 <option value="genomic">Genomic</option>
84 </param>
85 <when value="genomic">
86 <param name="enzyme" argument="-e" type="select" label="Restriction enzyme used" help="Only needed for Genomic output format">
87 <expand macro="enzymes"/>
88 </param>
89 </when>
90 </conditional>
91 </section>
92
93 <conditional name="options_autocorr">
94 <param name="corrections" argument="-c" type="boolean" checked="true" truevalue="-c" falsevalue="" label="Make automated corrections to the data" />
95 <when value="-c">
96 <param name="hom" argument="--min_hom_seqs" type="integer" value="5" label="Minimum number of reads required at a stack to call a homozygous genotype" />
97 <param name="het" argument="--min_het_seqs:" type="float" value="0.05" label="Heterozygote minor allele minimum frequency" help="below this minor allele frequency a stack is called a homozygote, above it (but below --max_het_seqs) it is called unknown" />
98 <param name="hetmax" argument="--max_het_seqs:" type="float" value="0.1" label="Heterozygote minor allele maximum frequency" help="minimum frequency of minor allele to call a heterozygote" />
99 </when>
100 <when value="">
101 </when>
102 </conditional>
103
104 <!-- Output options -->
105 <section name="advanced_options" title="advanced options" expanded="False">
106 <param name="minprogeny" type="integer" value="0" optional="true" argument="-r" label="Minimum number of progeny required to print a marker" />
107 <param name="mindepth" type="integer" value="" optional="true" argument="-m" label="Minimum stack depth required before exporting a locus in a particular individual" />
108 <param name="lnl" type="float" value="" optional="true" argument="--lnl_lim" label="Filter loci with log likelihood values below this threshold" />
109
110 <param name="whitelist" argument="-W" format="txt,tabular" type="data" optional="true" label="Specify a file containing Whitelisted markers to include in the export" />
111 <param name="blacklist" argument="-B" format="txt,tabular" type="data" optional="true" label="Specify a file containing Blacklisted markers to be excluded from the export" />
112
113 <param name="manual_cor" argument="--cor_path" type="data" format="tabular,txt" optional="true" label="Path to file containing manual genotype corrections from a Stacks SQL database to incorporate into output." />
114
115 <param name="batchid" type="integer" value="1" label="Batch ID to examine when exporting from the catalog" help="Only useful if you analyse data that was processed outside galaxy" />
116 </section>
117 </inputs>
118 <outputs>
119 <expand macro="genotypes_output_full"/>
120 </outputs>
121
122 <tests>
123 <test>
124 <param name="input_col">
125 <collection type="list">
126 <element name="batch_1.catalog.alleles.tsv" ftype="tabular" value="genotypes/batch_1.catalog.alleles.tsv" />
127 <element name="batch_1.catalog.snps.tsv" ftype="tabular" value="genotypes/batch_1.catalog.snps.tsv" />
128 <element name="batch_1.catalog.tags.tsv" ftype="tabular" value="genotypes/batch_1.catalog.tags.tsv" />
129 <element name="PopA_01.alleles.tsv" ftype="tabular" value="genotypes/PopA_01.alleles.tsv" />
130 <element name="PopA_01.matches.tsv" ftype="tabular" value="genotypes/PopA_01.matches.tsv" />
131 <element name="PopA_01.snps.tsv" ftype="tabular" value="genotypes/PopA_01.snps.tsv" />
132 <element name="PopA_01.tags.tsv" ftype="tabular" value="genotypes/PopA_01.tags.tsv" />
133 <element name="PopA_02.alleles.tsv" ftype="tabular" value="genotypes/PopA_02.alleles.tsv" />
134 <element name="PopA_02.matches.tsv" ftype="tabular" value="genotypes/PopA_02.matches.tsv" />
135 <element name="PopA_02.snps.tsv" ftype="tabular" value="genotypes/PopA_02.snps.tsv" />
136 <element name="PopA_02.tags.tsv" ftype="tabular" value="genotypes/PopA_02.tags.tsv" />
137 </collection>
138 </param>
139 <param name="map_out_type" value="joinmap" />
140 <param name="cross_type" value="CP" />
141 <param name="advanced_options|minprogeny" value="1" />
142
143 <!-- genotypes -->
144 <output name="out_generic_haplo">
145 <assert_contents>
146 <has_text text="Catalog ID" />
147 </assert_contents>
148 </output>
149 <output name="out_sql_markers">
150 <assert_contents>
151 <has_text text="Total Genotypes" />
152 </assert_contents>
153 </output>
154 <output name="out_joinmap">
155 <assert_contents>
156 <has_text text="batch_1.genotypes_" />
157 </assert_contents>
158 </output>
159 <output name="out_sql_genotypes">
160 <assert_contents>
161 <has_text text="SQL ID" />
162 </assert_contents>
163 </output>
164 </test>
165 </tests>
166 <help>
167 <![CDATA[
168 .. class:: infomark
169
170 **What it does**
171
172 This program exports a Stacks data set either as a set of observed haplotypes at each locus in the population, or with the haplotypes encoded into genotypes. The -r option allows only loci that exist in a certain number of population individuals to be exported. In a mapping context, raising or lowering this limit is an effective way to control the quality level of markers exported as genuine markers will be found in a large number of progeny. If exporting a set of observed haplotypes in a population, the "min stack depth" option can be used to restict exported loci to those that have a minimum depth of reads.
173
174 By default, when executing the pipeline (either denovo_map or ref_map) the genotypes program will be executed last and will identify mappable markers in the population and export both a set of observed haplotypes and a set of generic genotypes with "min number of progeny" option = 1.
175
176
177 Making Corrections
178
179 If enabled with the "make automated corrections to the data" option, the genotypes program will make automated corrections to the data. Since loci are matched up in the population, the script can correct false-negative heterozygote alleles since it knows the existence of alleles at a particular locus in the other individuals. For example, the program will identify loci with SNPs that didn’t have high enough coverage to be identified by the SNP caller. It will also check that homozygous tags have a minimum depth of coverage, since a low-coverage polymorphic locus may appear homozygous simply because the other allele wasn’t sequenced.
180
181
182 Correction Thresholds
183
184 The thresholds for automatic corrections can be modified by using the "automated corrections option" and changing the default values for the "min number of reads for homozygous genotype", "homozygote minor minimum allele frequency" and "heterozygote minor minimum allele frequency" parameters to genotypes. "min number of reads for homozygous genotype" is the minimum number of reads required to consider a stack homozygous (default of 5). The "homozygote minor minimum allele frequency" and "heterozygote minor minimum allele frequency" variables represent fractions. If the ratio of the depth of the the smaller allele to the bigger allele is greater than "heterozygote minor minimum allele frequency" (default of 1/10) a stack is called a het. If the ratio is less than homozygote minor minimum allele frequency (default of 1/20) a stack is called homozygous. If the ratio is in between the two values it is unknown and a genotype will not be assigned.
185
186 Automated corrections made by the program are shown in the output file in capital letters.
187
188 --------
189
190 **Input files**
191
192 Output from denovo_map or ref_map
193
194 **Output files:**
195
196 - XXX.tags.tsv file:
197
198 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_
199
200 Notes: For the tags file, each stack will start in the file with a consensus sequence for the entire stack followed by the flags for that stack. Then, each individual read that was merged into that stack will follow. The next stack will start with another consensus sequence.
201
202
203 - XXX.snps.tsv file:
204
205 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_
206
207 Notes: If a stack has two SNPs called within it, then there will be two lines in this file listing each one.
208
209
210 - XXX.alleles.tsv file:
211
212 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_
213
214
215 - XXX.matches.tsv file:
216
217 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_
218
219 Notes: Each line in this file records a match between a catalog locus and a locus in an individual, for a particular haplotype. The Batch ID plus the Catalog ID together represent a unique locus in the entire population, while the Sample ID and the Stack ID together represent a unique locus in an individual sample.
220
221
222 - other files:
223
224 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_
225
226 @STACKS_INFOS@
227 ]]>
228 </help>
229 <expand macro="citation" />
230 </tool>
231