comparison add_fst_column.xml @ 24:248b06e86022

Added gd_genotype datatype. Modified tools to support new datatype.
author Richard Burhans <burhans@bx.psu.edu>
date Tue, 28 May 2013 16:24:19 -0400
parents 95a05c1ef5d5
children 8997f2ca8c7a
comparison
equal deleted inserted replaced
23:66a183c44dd5 24:248b06e86022
1 <tool id="gd_add_fst_column" name="Per-SNP FSTs" version="1.1.0"> 1 <tool id="gd_add_fst_column" name="Per-SNP FSTs" version="1.2.0">
2 <description>: Compute a fixation index score for each SNP</description> 2 <description>: Compute a fixation index score for each SNP</description>
3 3
4 <command interpreter="python"> 4 <command interpreter="python">
5 add_fst_column.py "$input" "$p1_input" "$p2_input" "$data_source" "$min_reads" "$min_qual" "$retain" "$discard_fixed" "$biased" "$output" 5 add_fst_column.py "$input" "$p1_input" "$p2_input"
6 #if $input_type.choice == '0'
7 "gd_snp" "$input_type.data_source.choice"
8 #if $input_type.data_source.choice == '0'
9 "$input_type.data_source.min_reads" "$input_type.data_source.min_qual"
10 #else if $input_type.data_source.choice == '1'
11 "0" "0"
12 #end if
13 #else if $input_type.choice == '1'
14 "gd_genotype" "1" "0" "0"
15 #end if
16 "$retain" "$discard_fixed" "$biased" "$output"
6 #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns) 17 #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns)
7 #set $arg = '%s:%s' % ($individual_col, $individual) 18 #set $arg = '%s:%s' % ($individual_col, $individual)
8 "$arg" 19 "$arg"
9 #end for 20 #end for
10 </command> 21 </command>
11 22
12 <inputs> 23 <inputs>
13 <param name="input" type="data" format="gd_snp" label="SNP dataset" /> 24 <conditional name="input_type">
25 <param name="choice" type="select" format="integer" label="Input format">
26 <option value="0" selected="true">gd_snp</option>
27 <option value="1">gd_genotype</option>
28 </param>
29
30 <when value="0">
31 <param name="input" type="data" format="gd_snp" label="SNP dataset" />
32
33 <conditional name="data_source">
34 <param name="choice" type="select" format="integer" label="Frequency metric">
35 <option value="0">sequence coverage</option>
36 <option value="1" selected="true">estimated genotype</option>
37 </param>
38 <when value="0">
39 <param name="min_reads" type="integer" min="0" value="0" label="Minimum total read count for a population" />
40 <param name="min_qual" type="integer" min="0" value="0" label="Minimum individual genotype quality" />
41 </when>
42 <when value="1"/>
43 </conditional>
44 </when>
45 <when value="1">
46 <param name="input" type="data" format="gd_genotype" label="Genotype dataset" />
47 </when>
48 </conditional>
49
14 <param name="p1_input" type="data" format="gd_indivs" label="Population 1 individuals" /> 50 <param name="p1_input" type="data" format="gd_indivs" label="Population 1 individuals" />
15 <param name="p2_input" type="data" format="gd_indivs" label="Population 2 individuals" /> 51 <param name="p2_input" type="data" format="gd_indivs" label="Population 2 individuals" />
16
17 <param name="data_source" type="select" format="integer" label="Frequency metric">
18 <option value="0">sequence coverage</option>
19 <option value="1" selected="true">estimated genotype</option>
20 </param>
21
22 <param name="min_reads" type="integer" min="0" value="0" label="Minimum total read count for a population" />
23 <param name="min_qual" type="integer" min="0" value="0" label="Minimum individual genotype quality" />
24 52
25 <param name="retain" type="select" label="If a SNP is below minimum"> 53 <param name="retain" type="select" label="If a SNP is below minimum">
26 <option value="0" selected="true">skip SNP</option> 54 <option value="0" selected="true">skip SNP</option>
27 <option value="1">set FST = -1</option> 55 <option value="1">set FST = -1</option>
28 </param> 56 </param>
39 </param> 67 </param>
40 68
41 </inputs> 69 </inputs>
42 70
43 <outputs> 71 <outputs>
44 <data name="output" format="gd_snp" metadata_source="input" /> 72 <data name="output" format="input" format_source="input" metadata_source="input" />
45 </outputs> 73 </outputs>
46 74
47 <tests> 75 <tests>
48 <test> 76 <test>
49 <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" /> 77 <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" />
61 89
62 <help> 90 <help>
63 91
64 **Dataset formats** 92 **Dataset formats**
65 93
66 The input datasets are in gd_snp_ and gd_indivs_ formats. 94 The input datasets are in gd_snp_, gd_genotype_, and gd_indivs_ formats.
67 The output dataset is in gd_snp_ format. (`Dataset missing?`_) 95 The output dataset is in gd_snp_ or gd_genotype_ format. (`Dataset missing?`_)
68 96
69 .. _gd_snp: ./static/formatHelp.html#gd_snp 97 .. _gd_snp: ./static/formatHelp.html#gd_snp
98 .. _gd_genotype: ./static/formatHelp.html#gd_genotype
70 .. _gd_indivs: ./static/formatHelp.html#gd_indivs 99 .. _gd_indivs: ./static/formatHelp.html#gd_indivs
71 .. _Dataset missing?: ./static/formatHelp.html 100 .. _Dataset missing?: ./static/formatHelp.html
72 101
73 ----- 102 -----
74 103
75 **What it does** 104 **What it does**
76 105
77 The user specifies a SNP table and two "populations" of individuals, both previously defined using the Galaxy tool to specify individuals from a SNP table. No individual can be in both populations. Other choices are as follows. 106 The user specifies a SNP table and two "populations" of individuals, both previously defined using the Galaxy tool to specify individuals from a SNP table. No individual can be in both populations. Other choices are as follows.
78 107
79 Frequency metric. The allele frequencies of a SNP in the two populations can be estimated either by the total number of reads of each allele, or by adding the frequencies inferred from genotypes of individuals in the populations. 108 Frequency metric. The allele frequencies of a SNP in the two populations can be estimated either by the total number of reads of each allele (if the table is in gd_snp format, but not with gd_genotype), or by adding the frequencies inferred from genotypes of individuals in the populations.
80 109
81 After specifying the frequency metric, the user sets lower bounds on amount of data required at a SNP. For estimating the Fst using read counts, the bound is the minimum count of reads of the two alleles in a population. For estimations based on genotype, the bound is the minimum reported genotype quality per individual. 110 After specifying the frequency metric, the user sets lower bounds on amount of data required at a SNP. For estimating the Fst using read counts, the bound is the minimum count of reads of the two alleles in a population. For estimations based on genotype, the bound is the minimum reported genotype quality per individual.
82 111
83 The user specifies whether the SNPs that violate the lower bound should be ignored or the Fst set to -1. 112 The user specifies whether the SNPs that violate the lower bound should be ignored or the Fst set to -1.
84 113