comparison average_fst.xml @ 0:2c498d40ecde

Uploaded
author miller-lab
date Mon, 09 Apr 2012 12:03:06 -0400
parents
children e29f4d801bb0
comparison
equal deleted inserted replaced
-1:000000000000 0:2c498d40ecde
1 <tool id="gd_average_fst" name="Average FST" version="1.0.0">
2 <description>of two populations</description>
3
4 <command interpreter="python">
5 average_fst.py "$input" "$p1_input" "$p2_input" "$data_source.ds_choice" "$data_source.min_value" "$discard_fixed" "$biased" "$output"
6 #if $use_randomization.ur_choice == '1'
7 "$use_randomization.shuffles" "$use_randomization.p0_input"
8 #else
9 "0" "/dev/null"
10 #end if
11 #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns)
12 #set $arg = '%s:%s' % ($individual_col, $individual)
13 "$arg"
14 #end for
15 </command>
16
17 <inputs>
18 <param name="input" type="data" format="wsf" label="SNP table" />
19 <param name="p1_input" type="data" format="ind" label="Population 1 individuals" />
20 <param name="p2_input" type="data" format="ind" label="Population 2 individuals" />
21
22 <conditional name="data_source">
23 <param name="ds_choice" type="select" format="integer" label="Data source">
24 <option value="0" selected="true">sequence coverage and ..</option>
25 <option value="1">estimated genotype and ..</option>
26 </param>
27 <when value="0">
28 <param name="min_value" type="integer" min="1" value="1" label="Minimum total read count for a population" />
29 </when>
30 <when value="1">
31 <param name="min_value" type="integer" min="1" value="1" label="Minimum individual genotype quality" />
32 </when>
33 </conditional>
34
35 <param name="discard_fixed" type="select" label="Apparently fixed SNPs">
36 <option value="0">Retain SNPs that appear fixed in the two populations</option>
37 <option value="1" selected="true">Delete SNPs that appear fixed in the two populations</option>
38 </param>
39
40 <param name="biased" type="select" label="FST estimator">
41 <option value="0" selected="true">Wright's original definition</option>
42 <option value="1">Weir's unbiased estimator</option>
43 </param>
44
45 <conditional name="use_randomization">
46 <param name="ur_choice" type="select" format="integer" label="Use randomization">
47 <option value="0" selected="true">No</option>
48 <option value="1">Yes</option>
49 </param>
50 <when value="0" />
51 <when value="1">
52 <param name="shuffles" type="integer" min="0" value="0" label="Shuffles" />
53 <param name="p0_input" type="data" format="ind" label="Individuals for randomization" />
54 </when>
55 </conditional>
56 </inputs>
57
58 <outputs>
59 <data name="output" format="txt" />
60 </outputs>
61
62 <tests>
63 <test>
64 <param name="input" value="test_in/sample.wsf" ftype="wsf" />
65 <param name="p1_input" value="test_in/a.ind" ftype="ind" />
66 <param name="p2_input" value="test_in/b.ind" ftype="ind" />
67 <param name="ds_choice" value="0" />
68 <param name="min_value" value="3" />
69 <param name="discard_fixed" value="1" />
70 <param name="biased" value="0" />
71 <param name="ur_choice" value="0" />
72 <output name="output" file="test_out/average_fst/average_fst.txt" />
73 </test>
74 </tests>
75
76 <help>
77 **What it does**
78
79 The user specifies a SNP table and two "populations" of individuals,
80 both previously defined using the Galaxy tool to select individuals from
81 a SNP table. No individual can be in both populations. Other choices are
82 as follows.
83
84 Data source. The allele frequencies of a SNP in the two populations can be
85 estimated either by the total number of reads of each allele, or by adding
86 the frequencies inferred from genotypes of individuals in the populations.
87
88 After specifying the data source, the user sets lower bounds on amount
89 of data required at a SNP. For estimating the Fst using read counts,
90 the bound is the minimum count of reads of the two alleles in a population.
91 For estimations based on genotype, the bound is the minimum reported genotype
92 quality per individual. SNPs not meeting these lower bounds are ignored.
93
94 The user specifies whether SNPs where both populations appear to be fixed
95 for the same allele should be retained or discarded.
96
97 The user chooses which definition of Fst to use: Wright's original definition
98 or Weir's unbiased estimator.
99
100 Finally, the user decides whether to use randomizations. If so, then the
101 user specifies how many randomly generated population pairs (retaining
102 the numbers of individuals of the originals) to generate, as well as the
103 "population" of additional individuals (not in the first two populations)
104 that can be used in the randomization process.
105
106 The program prints the average Fst for the original populations and the
107 number of SNPs used to compute it. If randomizations were requested,
108 it prints the average Fst for each randomly generated population pair,
109 ending with a summary that includes the maximum and average value, and the
110 highest-scoring population pair.
111 </help>
112 </tool>