comparison snpFreq.xml @ 0:72ea0d13dd66 draft

Imported from capsule None
author devteam
date Mon, 28 Jul 2014 11:56:46 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:72ea0d13dd66
1 <tool id="hgv_snpFreq" name="snpFreq" version="1.0.1">
2 <description>significant SNPs in case-control data</description>
3
4 <requirements>
5 <requirement type="package" version="2.11.0">R</requirement>
6 <requirement type="package" version="1.34.0">bioc_qvalue</requirement>
7 </requirements>
8
9 <command interpreter="perl">
10 snpFreq2.pl $inTypeCond.inType 0.05 $input $output
11 #if $inTypeCond.inType == "tab"
12 $inTypeCond.group1_1 $inTypeCond.group1_2 $inTypeCond.group1_3
13 $inTypeCond.group2_1 $inTypeCond.group2_2 $inTypeCond.group2_3 0.05
14 #else if $inTypeCond.inType == "snp"
15 $group1 $group2
16 #end if
17 </command>
18
19 <inputs>
20 <conditional name="inTypeCond">
21 <param name="inType" type="select" label="Format of input" >
22 <option value="tab">Alleles pre-counted</option>
23 <option value="snp">SNP table</option>
24 </param>
25 <when value="tab">
26 <param format="tabular" name="input" type="data" label="Dataset" />
27 <param name="group1_1" label="Column with genotype 1 count for group 1" type="data_column" data_ref="input" />
28 <param name="group1_2" label="Column with genotype 2 count for group 1" type="data_column" data_ref="input" />
29 <param name="group1_3" label="Column with genotype 3 count for group 1" type="data_column" data_ref="input" />
30 <param name="group2_1" label="Column with genotype 1 count for group 2" type="data_column" data_ref="input" />
31 <param name="group2_2" label="Column with genotype 2 count for group 2" type="data_column" data_ref="input" />
32 <param name="group2_3" label="Column with genotype 3 count for group 2" type="data_column" data_ref="input" />
33 </when>
34 <when value="snp">
35 <param format="snp" name="input" type="data" label="SNP Dataset" />
36 <param format="ind" name="group1" type="data" label="Group 1" />
37 <param format="ind" name="group2" type="data" label="Group 2" />
38 </when>
39 </conditional>
40 </inputs>
41
42 <outputs>
43 <data format="tabular" name="output" />
44 </outputs>
45
46 <tests>
47 <test>
48 <param name="inType" value="tab" />
49 <param name="input" ftype="tabular" value="snpFreqInput.txt" dbkey="hg18" />
50 <param name="group1_1" value="4" />
51 <param name="group1_2" value="5" />
52 <param name="group1_3" value="6" />
53 <param name="group2_1" value="7" />
54 <param name="group2_2" value="8" />
55 <param name="group2_3" value="9" />
56 <output name="output" file="snpFreqTestOut.txt" />
57 </test>
58 </tests>
59
60 <help>
61
62 **Dataset formats**
63
64 The input is tabular_, with six columns of allele counts. The output is also tabular,
65 and includes all of the input data plus the additional columns described below.
66 (`Dataset missing?`_)
67
68 .. _tabular: ${static_path}/formatHelp.html#tab
69 .. _Dataset missing?: ${static_path}/formatHelp.html
70
71 -----
72
73 **What it does**
74
75 This tool performs a basic analysis of bi-allelic SNPs in case-control
76 data, using the R statistical environment and Fisher's exact test to
77 identify SNPs with a significant difference in the allele frequencies
78 between the two groups. R's "qvalue" package is used to correct for
79 multiple testing.
80
81 The input file includes counts for each allele combination (AA aa Aa)
82 for each group at each SNP position. The assignment of codes (1 2 3)
83 to these genotypes is arbitrary, as long as it is consistent for both
84 groups. Any other input columns are ignored in the computation, but
85 are copied to the output. The output appends eight additional columns,
86 namely the minimum expected counts of the three genotypes for each
87 group, the p-value, and the q-value.
88
89 -----
90
91 **Example**
92
93 - input file::
94
95 chr1 210 211 38 4 15 56 0 1 x
96 chr1 228 229 55 0 2 56 0 1 x
97 chr1 230 231 46 0 11 55 0 2 x
98 chr1 234 235 43 0 14 55 0 2 x
99 chr1 236 237 55 0 2 13 10 34 x
100 chr1 437 438 55 0 2 46 0 11 x
101 chr1 439 440 56 0 1 55 0 2 x
102 chr1 449 450 56 0 1 13 20 24 x
103 chr1 518 519 56 0 1 38 4 15 x
104
105 Here the group 1 genotype counts are in columns 4 - 6, while those
106 for group 2 are in columns 7 - 9.
107
108 Note that the "x" column has no meaning. It was added to this example
109 to show that extra columns can be included, and to make it easier
110 to see where the new columns are appended in the output.
111
112 - output file::
113
114 chr1 210 211 38 4 15 56 0 1 x 47 2 8 47 2 8 1.50219088598917e-05 6.32501425679652e-06
115 chr1 228 229 55 0 2 56 0 1 x 55.5 0 1.5 55.5 0 1.5 1 0.210526315789474
116 chr1 230 231 46 0 11 55 0 2 x 50.5 0 6.5 50.5 0 6.5 0.0155644201009862 0.00409590002657532
117 chr1 234 235 43 0 14 55 0 2 x 49 0 8 49 0 8 0.00210854461554067 0.000739840215979182
118 chr1 236 237 55 0 2 13 10 34 x 34 5 18 34 5 18 6.14613878554783e-17 4.31307984950725e-17
119 chr1 437 438 55 0 2 46 0 11 x 50.5 0 6.5 50.5 0 6.5 0.0155644201009862 0.00409590002657532
120 chr1 439 440 56 0 1 55 0 2 x 55.5 0 1.5 55.5 0 1.5 1 0.210526315789474
121 chr1 449 450 56 0 1 13 20 24 x 34.5 10 12.5 34.5 10 12.5 2.25757007974134e-18 2.37638955762246e-18
122 chr1 518 519 56 0 1 38 4 15 x 47 2 8 47 2 8 1.50219088598917e-05 6.32501425679652e-06
123
124 </help>
125 </tool>