Mercurial > repos > devteam > snpfreq
comparison snpFreq.xml @ 0:72ea0d13dd66 draft
Imported from capsule None
author | devteam |
---|---|
date | Mon, 28 Jul 2014 11:56:46 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:72ea0d13dd66 |
---|---|
1 <tool id="hgv_snpFreq" name="snpFreq" version="1.0.1"> | |
2 <description>significant SNPs in case-control data</description> | |
3 | |
4 <requirements> | |
5 <requirement type="package" version="2.11.0">R</requirement> | |
6 <requirement type="package" version="1.34.0">bioc_qvalue</requirement> | |
7 </requirements> | |
8 | |
9 <command interpreter="perl"> | |
10 snpFreq2.pl $inTypeCond.inType 0.05 $input $output | |
11 #if $inTypeCond.inType == "tab" | |
12 $inTypeCond.group1_1 $inTypeCond.group1_2 $inTypeCond.group1_3 | |
13 $inTypeCond.group2_1 $inTypeCond.group2_2 $inTypeCond.group2_3 0.05 | |
14 #else if $inTypeCond.inType == "snp" | |
15 $group1 $group2 | |
16 #end if | |
17 </command> | |
18 | |
19 <inputs> | |
20 <conditional name="inTypeCond"> | |
21 <param name="inType" type="select" label="Format of input" > | |
22 <option value="tab">Alleles pre-counted</option> | |
23 <option value="snp">SNP table</option> | |
24 </param> | |
25 <when value="tab"> | |
26 <param format="tabular" name="input" type="data" label="Dataset" /> | |
27 <param name="group1_1" label="Column with genotype 1 count for group 1" type="data_column" data_ref="input" /> | |
28 <param name="group1_2" label="Column with genotype 2 count for group 1" type="data_column" data_ref="input" /> | |
29 <param name="group1_3" label="Column with genotype 3 count for group 1" type="data_column" data_ref="input" /> | |
30 <param name="group2_1" label="Column with genotype 1 count for group 2" type="data_column" data_ref="input" /> | |
31 <param name="group2_2" label="Column with genotype 2 count for group 2" type="data_column" data_ref="input" /> | |
32 <param name="group2_3" label="Column with genotype 3 count for group 2" type="data_column" data_ref="input" /> | |
33 </when> | |
34 <when value="snp"> | |
35 <param format="snp" name="input" type="data" label="SNP Dataset" /> | |
36 <param format="ind" name="group1" type="data" label="Group 1" /> | |
37 <param format="ind" name="group2" type="data" label="Group 2" /> | |
38 </when> | |
39 </conditional> | |
40 </inputs> | |
41 | |
42 <outputs> | |
43 <data format="tabular" name="output" /> | |
44 </outputs> | |
45 | |
46 <tests> | |
47 <test> | |
48 <param name="inType" value="tab" /> | |
49 <param name="input" ftype="tabular" value="snpFreqInput.txt" dbkey="hg18" /> | |
50 <param name="group1_1" value="4" /> | |
51 <param name="group1_2" value="5" /> | |
52 <param name="group1_3" value="6" /> | |
53 <param name="group2_1" value="7" /> | |
54 <param name="group2_2" value="8" /> | |
55 <param name="group2_3" value="9" /> | |
56 <output name="output" file="snpFreqTestOut.txt" /> | |
57 </test> | |
58 </tests> | |
59 | |
60 <help> | |
61 | |
62 **Dataset formats** | |
63 | |
64 The input is tabular_, with six columns of allele counts. The output is also tabular, | |
65 and includes all of the input data plus the additional columns described below. | |
66 (`Dataset missing?`_) | |
67 | |
68 .. _tabular: ${static_path}/formatHelp.html#tab | |
69 .. _Dataset missing?: ${static_path}/formatHelp.html | |
70 | |
71 ----- | |
72 | |
73 **What it does** | |
74 | |
75 This tool performs a basic analysis of bi-allelic SNPs in case-control | |
76 data, using the R statistical environment and Fisher's exact test to | |
77 identify SNPs with a significant difference in the allele frequencies | |
78 between the two groups. R's "qvalue" package is used to correct for | |
79 multiple testing. | |
80 | |
81 The input file includes counts for each allele combination (AA aa Aa) | |
82 for each group at each SNP position. The assignment of codes (1 2 3) | |
83 to these genotypes is arbitrary, as long as it is consistent for both | |
84 groups. Any other input columns are ignored in the computation, but | |
85 are copied to the output. The output appends eight additional columns, | |
86 namely the minimum expected counts of the three genotypes for each | |
87 group, the p-value, and the q-value. | |
88 | |
89 ----- | |
90 | |
91 **Example** | |
92 | |
93 - input file:: | |
94 | |
95 chr1 210 211 38 4 15 56 0 1 x | |
96 chr1 228 229 55 0 2 56 0 1 x | |
97 chr1 230 231 46 0 11 55 0 2 x | |
98 chr1 234 235 43 0 14 55 0 2 x | |
99 chr1 236 237 55 0 2 13 10 34 x | |
100 chr1 437 438 55 0 2 46 0 11 x | |
101 chr1 439 440 56 0 1 55 0 2 x | |
102 chr1 449 450 56 0 1 13 20 24 x | |
103 chr1 518 519 56 0 1 38 4 15 x | |
104 | |
105 Here the group 1 genotype counts are in columns 4 - 6, while those | |
106 for group 2 are in columns 7 - 9. | |
107 | |
108 Note that the "x" column has no meaning. It was added to this example | |
109 to show that extra columns can be included, and to make it easier | |
110 to see where the new columns are appended in the output. | |
111 | |
112 - output file:: | |
113 | |
114 chr1 210 211 38 4 15 56 0 1 x 47 2 8 47 2 8 1.50219088598917e-05 6.32501425679652e-06 | |
115 chr1 228 229 55 0 2 56 0 1 x 55.5 0 1.5 55.5 0 1.5 1 0.210526315789474 | |
116 chr1 230 231 46 0 11 55 0 2 x 50.5 0 6.5 50.5 0 6.5 0.0155644201009862 0.00409590002657532 | |
117 chr1 234 235 43 0 14 55 0 2 x 49 0 8 49 0 8 0.00210854461554067 0.000739840215979182 | |
118 chr1 236 237 55 0 2 13 10 34 x 34 5 18 34 5 18 6.14613878554783e-17 4.31307984950725e-17 | |
119 chr1 437 438 55 0 2 46 0 11 x 50.5 0 6.5 50.5 0 6.5 0.0155644201009862 0.00409590002657532 | |
120 chr1 439 440 56 0 1 55 0 2 x 55.5 0 1.5 55.5 0 1.5 1 0.210526315789474 | |
121 chr1 449 450 56 0 1 13 20 24 x 34.5 10 12.5 34.5 10 12.5 2.25757007974134e-18 2.37638955762246e-18 | |
122 chr1 518 519 56 0 1 38 4 15 x 47 2 8 47 2 8 1.50219088598917e-05 6.32501425679652e-06 | |
123 | |
124 </help> | |
125 </tool> |