annotate PsiCLASS-1.0.2/samtools-0.1.19/misc/vcfutils.lua @ 0:903fc43d6227 draft default tip

Uploaded
author lsong10
date Fri, 26 Mar 2021 16:52:45 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1 #!/usr/bin/env luajit
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3 -----------------------------------
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
4 -- BEGIN: routines from klib.lua --
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
5 -----------------------------------
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
6
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
7 -- Description: getopt() translated from the BSD getopt(); compatible with the default Unix getopt()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
8 --[[ Example:
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
9 for o, a in os.getopt(arg, 'a:b') do
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
10 print(o, a)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
11 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
12 ]]--
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
13 function os.getopt(args, ostr)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
14 local arg, place = nil, 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
15 return function ()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
16 if place == 0 then -- update scanning pointer
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
17 place = 1
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
18 if #args == 0 or args[1]:sub(1, 1) ~= '-' then place = 0; return nil end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
19 if #args[1] >= 2 then
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
20 place = place + 1
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
21 if args[1]:sub(2, 2) == '-' then -- found "--"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
22 table.remove(args, 1);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
23 place = 0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
24 return nil;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
25 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
26 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
27 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
28 local optopt = place <= #args[1] and args[1]:sub(place, place) or nil
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
29 place = place + 1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
30 local oli = optopt and ostr:find(optopt) or nil
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
31 if optopt == ':' or oli == nil then -- unknown option
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
32 if optopt == '-' then return nil end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
33 if place > #args[1] then
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
34 table.remove(args, 1);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
35 place = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
36 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
37 return '?';
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
38 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
39 oli = oli + 1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
40 if ostr:sub(oli, oli) ~= ':' then -- do not need argument
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
41 arg = nil;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
42 if place > #args[1] then
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
43 table.remove(args, 1);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
44 place = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
45 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
46 else -- need an argument
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
47 if place <= #args[1] then -- no white space
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
48 arg = args[1]:sub(place);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
49 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
50 table.remove(args, 1);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
51 if #args == 0 then -- an option requiring argument is the last one
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
52 place = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
53 if ostr:sub(1, 1) == ':' then return ':' end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
54 return '?';
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
55 else arg = args[1] end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
56 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
57 table.remove(args, 1);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
58 place = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
59 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
60 return optopt, arg;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
61 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
62 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
63
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
64 -- Description: string split
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
65 function string:split(sep, n)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
66 local a, start = {}, 1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
67 sep = sep or "%s+";
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
68 repeat
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
69 local b, e = self:find(sep, start);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
70 if b == nil then
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
71 table.insert(a, self:sub(start));
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
72 break
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
73 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
74 a[#a+1] = self:sub(start, b - 1);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
75 start = e + 1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
76 if n and #a == n then
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
77 table.insert(a, self:sub(start));
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
78 break
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
79 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
80 until start > #self;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
81 return a;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
82 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
83
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
84 -- Description: smart file open
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
85 function io.xopen(fn, mode)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
86 mode = mode or 'r';
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
87 if fn == nil then return io.stdin;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
88 elseif fn == '-' then return (mode == 'r' and io.stdin) or io.stdout;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
89 elseif fn:sub(-3) == '.gz' then return (mode == 'r' and io.popen('gzip -dc ' .. fn, 'r')) or io.popen('gzip > ' .. fn, 'w');
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
90 elseif fn:sub(-4) == '.bz2' then return (mode == 'r' and io.popen('bzip2 -dc ' .. fn, 'r')) or io.popen('bgzip2 > ' .. fn, 'w');
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
91 else return io.open(fn, mode) end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
92 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
93
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
94 -- Description: log gamma function
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
95 -- Required by: math.lbinom()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
96 -- Reference: AS245, 2nd algorithm, http://lib.stat.cmu.edu/apstat/245
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
97 function math.lgamma(z)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
98 local x;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
99 x = 0.1659470187408462e-06 / (z+7);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
100 x = x + 0.9934937113930748e-05 / (z+6);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
101 x = x - 0.1385710331296526 / (z+5);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
102 x = x + 12.50734324009056 / (z+4);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
103 x = x - 176.6150291498386 / (z+3);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
104 x = x + 771.3234287757674 / (z+2);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
105 x = x - 1259.139216722289 / (z+1);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
106 x = x + 676.5203681218835 / z;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
107 x = x + 0.9999999999995183;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
108 return math.log(x) - 5.58106146679532777 - z + (z-0.5) * math.log(z+6.5);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
109 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
110
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
111 -- Description: regularized incomplete gamma function
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
112 -- Dependent on: math.lgamma()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
113 --[[
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
114 Formulas are taken from Wiki, with additional input from Numerical
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
115 Recipes in C (for modified Lentz's algorithm) and AS245
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
116 (http://lib.stat.cmu.edu/apstat/245).
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
117
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
118 A good online calculator is available at:
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
119
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
120 http://www.danielsoper.com/statcalc/calc23.aspx
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
121
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
122 It calculates upper incomplete gamma function, which equals
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
123 math.igamma(s,z,true)*math.exp(math.lgamma(s))
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
124 ]]--
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
125 function math.igamma(s, z, complement)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
126
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
127 local function _kf_gammap(s, z)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
128 local sum, x = 1, 1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
129 for k = 1, 100 do
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
130 x = x * z / (s + k);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
131 sum = sum + x;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
132 if x / sum < 1e-14 then break end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
133 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
134 return math.exp(s * math.log(z) - z - math.lgamma(s + 1.) + math.log(sum));
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
135 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
136
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
137 local function _kf_gammaq(s, z)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
138 local C, D, f, TINY;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
139 f = 1. + z - s; C = f; D = 0.; TINY = 1e-290;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
140 -- Modified Lentz's algorithm for computing continued fraction. See Numerical Recipes in C, 2nd edition, section 5.2
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
141 for j = 1, 100 do
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
142 local d;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
143 local a, b = j * (s - j), j*2 + 1 + z - s;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
144 D = b + a * D;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
145 if D < TINY then D = TINY end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
146 C = b + a / C;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
147 if C < TINY then C = TINY end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
148 D = 1. / D;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
149 d = C * D;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
150 f = f * d;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
151 if math.abs(d - 1) < 1e-14 then break end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
152 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
153 return math.exp(s * math.log(z) - z - math.lgamma(s) - math.log(f));
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
154 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
155
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
156 if complement then
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
157 return ((z <= 1 or z < s) and 1 - _kf_gammap(s, z)) or _kf_gammaq(s, z);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
158 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
159 return ((z <= 1 or z < s) and _kf_gammap(s, z)) or (1 - _kf_gammaq(s, z));
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
160 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
161 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
162
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
163 function math.brent(func, a, b, tol)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
164 local gold1, gold2, tiny, max_iter = 1.6180339887, 0.3819660113, 1e-20, 100
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
165
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
166 local fa, fb = func(a, data), func(b, data)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
167 if fb > fa then -- swap, such that f(a) > f(b)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
168 a, b, fa, fb = b, a, fb, fa
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
169 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
170 local c = b + gold1 * (b - a)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
171 local fc = func(c) -- golden section extrapolation
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
172 while fb > fc do
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
173 local bound = b + 100.0 * (c - b) -- the farthest point where we want to go
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
174 local r = (b - a) * (fb - fc)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
175 local q = (b - c) * (fb - fa)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
176 if math.abs(q - r) < tiny then -- avoid 0 denominator
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
177 tmp = q > r and tiny or 0.0 - tiny
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
178 else tmp = q - r end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
179 u = b - ((b - c) * q - (b - a) * r) / (2.0 * tmp) -- u is the parabolic extrapolation point
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
180 if (b > u and u > c) or (b < u and u < c) then -- u lies between b and c
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
181 fu = func(u)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
182 if fu < fc then -- (b,u,c) bracket the minimum
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
183 a, b, fa, fb = b, u, fb, fu
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
184 break
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
185 elseif fu > fb then -- (a,b,u) bracket the minimum
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
186 c, fc = u, fu
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
187 break
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
188 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
189 u = c + gold1 * (c - b)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
190 fu = func(u) -- golden section extrapolation
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
191 elseif (c > u and u > bound) or (c < u and u < bound) then -- u lies between c and bound
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
192 fu = func(u)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
193 if fu < fc then -- fb > fc > fu
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
194 b, c, u = c, u, c + gold1 * (c - b)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
195 fb, fc, fu = fc, fu, func(u)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
196 else -- (b,c,u) bracket the minimum
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
197 a, b, c = b, c, u
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
198 fa, fb, fc = fb, fc, fu
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
199 break
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
200 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
201 elseif (u > bound and bound > c) or (u < bound and bound < c) then -- u goes beyond the bound
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
202 u = bound
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
203 fu = func(u)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
204 else -- u goes the other way around, use golden section extrapolation
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
205 u = c + gold1 * (c - b)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
206 fu = func(u)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
207 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
208 a, b, c = b, c, u
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
209 fa, fb, fc = fb, fc, fu
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
210 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
211 if a > c then a, c = c, a end -- swap
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
212
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
213 -- now, a<b<c, fa>fb and fb<fc, move on to Brent's algorithm
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
214 local e, d = 0, 0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
215 local w, v, fw, fv
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
216 w, v = b, b
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
217 fw, fv = fb, fb
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
218 for iter = 1, max_iter do
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
219 local mid = 0.5 * (a + c)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
220 local tol1 = tol * math.abs(b) + tiny
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
221 local tol2 = 2.0 * tol1
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
222 if math.abs(b - mid) <= tol2 - 0.5 * (c - a) then return fb, b end -- found
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
223 if math.abs(e) > tol1 then
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
224 -- related to parabolic interpolation
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
225 local r = (b - w) * (fb - fv)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
226 local q = (b - v) * (fb - fw)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
227 local p = (b - v) * q - (b - w) * r
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
228 q = 2.0 * (q - r)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
229 if q > 0.0 then p = 0.0 - p
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
230 else q = 0.0 - q end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
231 eold, e = e, d
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
232 if math.abs(p) >= math.abs(0.5 * q * eold) or p <= q * (a - b) or p >= q * (c - b) then
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
233 e = b >= mid and a - b or c - b
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
234 d = gold2 * e
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
235 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
236 d, u = p / q, b + d -- actual parabolic interpolation happens here
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
237 if u - a < tol2 or c - u < tol2 then
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
238 d = mid > b and tol1 or 0.0 - tol1
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
239 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
240 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
241 else -- golden section interpolation
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
242 e = b >= min and a - b or c - b
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
243 d = gold2 * e
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
244 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
245 u = fabs(d) >= tol1 and b + d or b + (d > 0.0 and tol1 or -tol1);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
246 fu = func(u)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
247 if fu <= fb then -- u is the minimum point so far
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
248 if u >= b then a = b
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
249 else c = b end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
250 v, w, b = w, b, u
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
251 fv, fw, fb = fw, fb, fu
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
252 else -- adjust (a,c) and (u,v,w)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
253 if u < b then a = u
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
254 else c = u end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
255 if fu <= fw or w == b then
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
256 v, w = w, u
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
257 fv, fw = fw, fu
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
258 elseif fu <= fv or v == b or v == w then
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
259 v, fv = u, fu;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
260 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
261 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
262 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
263 return fb, b
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
264 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
265
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
266 matrix = {}
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
267
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
268 -- Description: chi^2 test for contingency tables
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
269 -- Dependent on: math.igamma()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
270 function matrix.chi2(a)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
271 if #a == 2 and #a[1] == 2 then -- 2x2 table
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
272 local x, z
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
273 x = (a[1][1] + a[1][2]) * (a[2][1] + a[2][2]) * (a[1][1] + a[2][1]) * (a[1][2] + a[2][2])
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
274 if x == 0 then return 0, 1, false end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
275 z = a[1][1] * a[2][2] - a[1][2] * a[2][1]
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
276 z = (a[1][1] + a[1][2] + a[2][1] + a[2][2]) * z * z / x
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
277 return z, math.igamma(.5, .5 * z, true), true
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
278 else -- generic table
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
279 local rs, cs, n, m, N, z = {}, {}, #a, #a[1], 0, 0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
280 for i = 1, n do rs[i] = 0 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
281 for j = 1, m do cs[j] = 0 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
282 for i = 1, n do -- compute column sum and row sum
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
283 for j = 1, m do cs[j], rs[i] = cs[j] + a[i][j], rs[i] + a[i][j] end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
284 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
285 for i = 1, n do N = N + rs[i] end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
286 for i = 1, n do -- compute the chi^2 statistics
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
287 for j = 1, m do
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
288 local E = rs[i] * cs[j] / N;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
289 z = z + (a[i][j] - E) * (a[i][j] - E) / E
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
290 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
291 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
292 return z, math.igamma(.5 * (n-1) * (m-1), .5 * z, true), true;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
293 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
294 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
295
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
296 ---------------------------------
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
297 -- END: routines from klib.lua --
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
298 ---------------------------------
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
299
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
300
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
301 --------------------------
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
302 -- BEGIN: misc routines --
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
303 --------------------------
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
304
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
305 -- precompute an array for PL->probability conversion
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
306 -- @param m maximum PL
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
307 function algo_init_q2p(m)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
308 local q2p = {}
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
309 for i = 0, m do
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
310 q2p[i] = math.pow(10, -i / 10)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
311 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
312 return q2p
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
313 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
314
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
315 -- given the haplotype frequency, compute r^2
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
316 -- @param f 4 haplotype frequencies; f[] is 0-indexed.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
317 -- @return r^2
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
318 function algo_r2(f)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
319 local p = { f[0] + f[1], f[0] + f[2] }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
320 local D = f[0] * f[3] - f[1] * f[2]
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
321 return (p[1] == 0 or p[2] == 0 or 1-p[1] == 0 or 1-p[2] == 0) and 0 or D * D / (p[1] * p[2] * (1 - p[1]) * (1 - p[2]))
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
322 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
323
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
324 -- parse a VCF line to get PL
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
325 -- @param q2p is computed by algo_init_q2p()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
326 function text_parse_pl(t, q2p, parse_GT)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
327 parse_GT = parse_GT == nil and true or false
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
328 local ht, gt, pl = {}, {}, {}
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
329 local s, j0 = t[9]:split(':'), 0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
330 for j = 1, #s do
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
331 if s[j] == 'PL' then j0 = j break end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
332 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
333 local has_GT = (s[1] == 'GT' and parse_GT) and true or false
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
334 for i = 10, #t do
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
335 if j0 > 0 then
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
336 local s = t[i]:split(':')
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
337 local a, b = 1, s[j0]:find(',')
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
338 pl[#pl+1] = q2p[tonumber(s[j0]:sub(a, b - 1))]
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
339 a, b = b + 1, s[j0]:find(',', b + 1)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
340 pl[#pl+1] = q2p[tonumber(s[j0]:sub(a, b - 1))]
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
341 a, b = b + 1, s[j0]:find(',', b + 1)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
342 pl[#pl+1] = q2p[tonumber(s[j0]:sub(a, (b and b - 1) or nil))]
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
343 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
344 if has_GT then
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
345 if t[i]:sub(1, 1) ~= '.' then
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
346 local g = tonumber(t[i]:sub(1, 1)) + tonumber(t[i]:sub(3, 3));
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
347 gt[#gt+1] = 1e-6; gt[#gt+1] = 1e-6; gt[#gt+1] = 1e-6
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
348 gt[#gt - 2 + g] = 1
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
349 ht[#ht+1] = tonumber(t[i]:sub(1, 1)); ht[#ht+1] = tonumber(t[i]:sub(3, 3));
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
350 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
351 gt[#gt+1] = 1; gt[#gt+1] = 1; gt[#gt+1] = 1
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
352 ht[#ht+1] = -1; ht[#ht+1] = -1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
353 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
354 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
355 -- print(t[i], pl[#pl-2], pl[#pl-1], pl[#pl], gt[#gt-2], gt[#gt-1], gt[#gt])
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
356 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
357 if #pl == 0 then pl = nil end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
358 local x = has_GT and { t[1], t[2], ht, gt, pl } or { t[1], t[2], nil, nil, pl }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
359 return x
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
360 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
361
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
362 -- Infer haplotype frequency
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
363 -- @param pdg genotype likelihoods P(D|g) generated by text_parse_pl(). pdg[] is 1-indexed.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
364 -- @param eps precision [1e-5]
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
365 -- @return 2-locus haplotype frequencies, 0-indexed array
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
366 function algo_hapfreq2(pdg, eps)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
367 eps = eps or 1e-5
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
368 local n, f = #pdg[1] / 3, {[0]=0.25, 0.25, 0.25, 0.25}
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
369 for iter = 1, 100 do
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
370 local F = {[0]=0, 0, 0, 0}
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
371 for i = 0, n - 1 do
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
372 local p1, p2 = {[0]=pdg[1][i*3+1], pdg[1][i*3+2], pdg[1][i*3+3]}, {[0]=pdg[2][i*3+1], pdg[2][i*3+2], pdg[2][i*3+3]}
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
373 local u = { [0]=
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
374 f[0] * (f[0] * p1[0] * p2[0] + f[1] * p1[0] * p2[1] + f[2] * p1[1] * p2[0] + f[3] * p1[1] * p2[1]),
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
375 f[1] * (f[0] * p1[0] * p2[1] + f[1] * p1[0] * p2[2] + f[2] * p1[1] * p2[1] + f[3] * p1[1] * p2[2]),
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
376 f[2] * (f[0] * p1[1] * p2[0] + f[1] * p1[1] * p2[1] + f[2] * p1[2] * p2[0] + f[3] * p1[2] * p2[1]),
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
377 f[3] * (f[0] * p1[1] * p2[1] + f[1] * p1[1] * p2[2] + f[2] * p1[2] * p2[1] + f[3] * p1[2] * p2[2])
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
378 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
379 local s = u[0] + u[1] + u[2] + u[3]
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
380 s = 1 / (s * n)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
381 F[0] = F[0] + u[0] * s
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
382 F[1] = F[1] + u[1] * s
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
383 F[2] = F[2] + u[2] * s
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
384 F[3] = F[3] + u[3] * s
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
385 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
386 local e = 0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
387 for k = 0, 3 do
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
388 e = math.abs(f[k] - F[k]) > e and math.abs(f[k] - F[k]) or e
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
389 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
390 for k = 0, 3 do f[k] = F[k] end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
391 if e < eps then break end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
392 -- print(f[0], f[1], f[2], f[3])
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
393 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
394 return f
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
395 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
396
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
397 ------------------------
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
398 -- END: misc routines --
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
399 ------------------------
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
400
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
401
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
402 ---------------------
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
403 -- BEGIN: commands --
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
404 ---------------------
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
405
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
406 -- CMD vcf2bgl: convert PL tagged VCF to Beagle input --
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
407 function cmd_vcf2bgl()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
408 if #arg == 0 then
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
409 print("\nUsage: vcf2bgl.lua <in.vcf>")
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
410 print("\nNB: This command finds PL by matching /(\\d+),(\\d+),(\\d+)/.\n");
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
411 os.exit(1)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
412 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
413
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
414 local lookup = {}
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
415 for i = 0, 10000 do lookup[i] = string.format("%.4f", math.pow(10, -i/10)) end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
416
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
417 local fp = io.xopen(arg[1])
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
418 for l in fp:lines() do
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
419 if l:sub(1, 2) == '##' then -- meta lines; do nothing
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
420 elseif l:sub(1, 1) == '#' then -- sample lines
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
421 local t, s = l:split('\t'), {}
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
422 for i = 10, #t do s[#s+1] = t[i]; s[#s+1] = t[i]; s[#s+1] = t[i] end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
423 print('marker', 'alleleA', 'alleleB', table.concat(s, '\t'))
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
424 else -- data line
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
425 local t = l:split('\t');
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
426 if t[5] ~= '.' and t[5]:find(",") == nil and #t[5] == 1 and #t[4] == 1 then -- biallic SNP
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
427 local x, z = -1, {};
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
428 if t[9]:find('PL') then
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
429 for i = 10, #t do
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
430 local AA, Aa, aa = t[i]:match('(%d+),(%d+),(%d+)')
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
431 AA = tonumber(AA); Aa = tonumber(Aa); aa = tonumber(aa);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
432 if AA ~= nil then
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
433 z[#z+1] = lookup[AA]; z[#z+1] = lookup[Aa]; z[#z+1] = lookup[aa];
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
434 else z[#z+1] = 1; z[#z+1] = 1; z[#z+1] = 1; end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
435 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
436 print(t[1]..':'..t[2], t[4], t[5], table.concat(z, '\t'))
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
437 elseif t[9]:find('GL') then
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
438 print('Error: not implemented')
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
439 os.exit(1)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
440 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
441 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
442 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
443 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
444 fp:close()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
445 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
446
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
447 -- CMD bgl2vcf: convert Beagle output to VCF
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
448 function cmd_bgl2vcf()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
449 if #arg < 2 then
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
450 print('Usage: bgl2vcf.lua <in.phased> <in.gprobs>')
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
451 os.exit(1)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
452 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
453
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
454 local fpp = io.xopen(arg[1]);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
455 local fpg = io.xopen(arg[2]);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
456 for lg in fpg:lines() do
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
457 local tp, tg, a = fpp:read():split('%s'), lg:split('%s', 4), {}
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
458 if tp[1] == 'I' then
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
459 for i = 3, #tp, 2 do a[#a+1] = tp[i] end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
460 print('#CHROM', 'POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER', 'INFO', 'FORMAT', table.concat(a, '\t'))
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
461 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
462 local chr, pos = tg[1]:match('(%S+):(%d+)$')
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
463 a = {chr, pos, '.', tg[2], tg[3], 30, '.', '.', 'GT'}
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
464 for i = 3, #tp, 2 do
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
465 a[#a+1] = ((tp[i] == tg[2] and 0) or 1) .. '|' .. ((tp[i+1] == tg[2] and 0) or 1)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
466 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
467 print(table.concat(a, '\t'))
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
468 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
469 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
470 fpg:close(); fpp:close();
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
471 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
472
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
473 -- CMD freq: count alleles in each population
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
474 function cmd_freq()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
475 -- parse the command line
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
476 local site_only = true; -- print site allele frequency or not
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
477 for c in os.getopt(arg, 's') do
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
478 if c == 's' then site_only = false end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
479 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
480 if #arg == 0 then
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
481 print("\nUsage: vcfutils.lua freq [-s] <in.vcf> [samples.txt]\n")
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
482 print("NB: 1) This command only considers biallelic variants.")
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
483 print(" 2) Apply '-s' to get the allele frequency spectrum.")
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
484 print(" 3) 'samples.txt' is TAB-delimited with each line consisting of sample and population.")
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
485 print("")
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
486 os.exit(1)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
487 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
488
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
489 -- read the sample-population pairs
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
490 local pop, sample = {}, {}
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
491 if #arg > 1 then
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
492 local fp = io.xopen(arg[2]);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
493 for l in fp:lines() do
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
494 local s, p = l:match("^(%S+)%s+(%S+)"); -- sample, population pair
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
495 sample[s] = p; -- FIXME: check duplications
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
496 if pop[p] then table.insert(pop[p], s)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
497 else pop[p] = {s} end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
498 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
499 fp:close();
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
500 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
501 pop['NA'] = {}
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
502
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
503 -- parse VCF
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
504 fp = (#arg >= 2 and io.xopen(arg[1])) or io.stdin;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
505 local col, cnt = {}, {};
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
506 for k in pairs(pop) do
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
507 col[k], cnt[k] = {}, {[0]=0};
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
508 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
509 for l in fp:lines() do
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
510 if l:sub(1, 2) == '##' then -- meta lines; do nothing
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
511 elseif l:sub(1, 1) == '#' then -- the sample line
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
512 local t, del_NA = l:split('\t'), true;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
513 for i = 10, #t do
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
514 local k = sample[t[i]]
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
515 if k == nil then
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
516 k, del_NA = 'NA', false
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
517 table.insert(pop[k], t[i])
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
518 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
519 table.insert(col[k], i);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
520 table.insert(cnt[k], 0);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
521 table.insert(cnt[k], 0);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
522 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
523 if del_NA then pop['NA'], col['NA'], cnt['NA'] = nil, nil, nil end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
524 else -- data lines
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
525 local t = l:split('\t');
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
526 if t[5] ~= '.' and t[5]:find(",") == nil then -- biallic
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
527 if site_only == true then io.write(t[1], '\t', t[2], '\t', t[4], '\t', t[5]) end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
528 for k, v in pairs(col) do
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
529 local ac, an = 0, 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
530 for i = 1, #v do
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
531 local a1, a2 = t[v[i]]:match("^(%d).(%d)");
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
532 if a1 ~= nil then ac, an = ac + a1 + a2, an + 2 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
533 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
534 if site_only == true then io.write('\t', k, ':', an, ':', ac) end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
535 if an == #cnt[k] then cnt[k][ac] = cnt[k][ac] + 1 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
536 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
537 if site_only == true then io.write('\n') end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
538 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
539 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
540 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
541 fp:close();
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
542
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
543 -- print
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
544 if site_only == false then
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
545 for k, v in pairs(cnt) do
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
546 io.write(k .. "\t" .. #v);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
547 for i = 0, #v do io.write("\t" .. v[i]) end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
548 io.write('\n');
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
549 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
550 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
551 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
552
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
553 function cmd_vcf2chi2()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
554 if #arg < 3 then
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
555 print("Usage: vcfutils.lua vcf2chi2 <in.vcf> <group1.list> <group2.list>");
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
556 os.exit(1)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
557 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
558
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
559 local g = {};
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
560
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
561 -- read the list of groups
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
562 local fp = io.xopen(arg[2]);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
563 for l in fp:lines() do local x = l:match("^(%S+)"); g[x] = 1 end -- FIXME: check duplicate
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
564 fp:close()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
565 fp = io.xopen(arg[3]);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
566 for l in fp:lines() do local x = l:match("^(%S+)"); g[x] = 2 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
567 fp:close()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
568
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
569 -- process VCF
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
570 fp = io.xopen(arg[1])
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
571 local h = {{}, {}}
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
572 for l in fp:lines() do
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
573 if l:sub(1, 2) == '##' then print(l) -- meta lines; do nothing
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
574 elseif l:sub(1, 1) == '#' then -- sample lines
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
575 local t = l:split('\t');
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
576 for i = 10, #t do
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
577 if g[t[i]] == 1 then table.insert(h[1], i)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
578 elseif g[t[i]] == 2 then table.insert(h[2], i) end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
579 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
580 while #t > 8 do table.remove(t) end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
581 print(table.concat(t, "\t"))
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
582 else -- data line
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
583 local t = l:split('\t');
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
584 if t[5] ~= '.' and t[5]:find(",") == nil then -- biallic
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
585 local a = {{0, 0}, {0, 0}}
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
586 for i = 1, 2 do
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
587 for _, k in pairs(h[i]) do
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
588 if t[k]:find("^0.0") then a[i][1] = a[i][1] + 2
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
589 elseif t[k]:find("^1.1") then a[i][2] = a[i][2] + 2
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
590 elseif t[k]:find("^0.1") or t[k]:find("^1.0") then
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
591 a[i][1], a[i][2] = a[i][1] + 1, a[i][2] + 1
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
592 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
593 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
594 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
595 local chi2, p, succ = matrix.chi2(a);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
596 while #t > 8 do table.remove(t) end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
597 --print(a[1][1], a[1][2], a[2][1], a[2][2], chi2, p);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
598 if succ then print(table.concat(t, "\t") .. ";PCHI2=" .. string.format("%.3g", p)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
599 .. string.format(';AF1=%.4g;AF2=%.4g,%.4g', (a[1][2]+a[2][2]) / (a[1][1]+a[1][2]+a[2][1]+a[2][2]),
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
600 a[1][2]/(a[1][1]+a[1][2]), a[2][2]/(a[2][1]+a[2][2])))
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
601 else print(table.concat(t, "\t")) end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
602 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
603 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
604 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
605 fp:close()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
606 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
607
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
608 -- CMD: compute r^2
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
609 function cmd_r2()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
610 local w, is_ht, is_gt = 1, false, false
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
611 for o, a in os.getopt(arg, 'w:hg') do
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
612 if o == 'w' then w = tonumber(a)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
613 elseif o == 'h' then is_ht, is_gt = true, true
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
614 elseif o == 'g' then is_gt = true
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
615 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
616 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
617 if #arg == 0 then
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
618 print("Usage: vcfutils.lua r2 [-hg] [-w 1] <in.vcf>")
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
619 os.exit(1)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
620 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
621 local stack, fp, q2p = {}, io.xopen(arg[1]), algo_init_q2p(1023)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
622 for l in fp:lines() do
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
623 if l:sub(1, 1) ~= '#' then
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
624 local t = l:split('\t')
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
625 local x = text_parse_pl(t, q2p)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
626 if #t[5] == 1 and t[5] ~= '.' then -- biallelic
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
627 local r2 = {}
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
628 for k = 1, w do
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
629 if is_gt == false then -- use PL
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
630 if stack[k] then
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
631 local pdg = { stack[k][5], x[5] }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
632 r2[#r2+1] = algo_r2(algo_hapfreq2(pdg))
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
633 else r2[#r2+1] = 0 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
634 elseif is_ht == false then -- use unphased GT
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
635 if stack[k] then
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
636 local pdg = { stack[k][4], x[4] }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
637 r2[#r2+1] = algo_r2(algo_hapfreq2(pdg))
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
638 else r2[#r2+1] = 0 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
639 else -- use phased GT
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
640 if stack[k] then
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
641 local f, ht = { [0]=0, 0, 0, 0 }, { stack[k][3], x[3] }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
642 for i = 1, #ht[1] do
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
643 local j = ht[1][i] * 2 + ht[2][i]
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
644 f[j] = f[j] + 1
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
645 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
646 local sum = f[0] + f[1] + f[2] + f[3]
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
647 for k = 0, 3 do f[k] = f[k] / sum end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
648 r2[#r2+1] = algo_r2(f)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
649 else r2[#r2+1] = 0 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
650 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
651 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
652 for k = 1, #r2 do
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
653 r2[k] = string.format('%.3f', r2[k])
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
654 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
655 print(x[1], x[2], table.concat(r2, '\t'))
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
656 if #stack == w then table.remove(stack, 1) end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
657 stack[#stack+1] = x
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
658 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
659 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
660 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
661 fp:close()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
662 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
663
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
664 -------------------
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
665 -- END: commands --
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
666 -------------------
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
667
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
668
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
669 -------------------
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
670 -- MAIN FUNCTION --
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
671 -------------------
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
672
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
673 if #arg == 0 then
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
674 print("\nUsage: vcfutils.lua <command> <arguments>\n")
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
675 print("Command: freq count biallelic alleles in each population")
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
676 print(" r2 compute r^2")
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
677 print(" vcf2chi2 compute 1-degree chi-square between two groups of samples")
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
678 print(" vcf2bgl convert PL annotated VCF to Beagle input")
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
679 print(" bgl2vcf convert Beagle input to VCF")
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
680 print("")
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
681 os.exit(1)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
682 end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
683
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
684 local cmd = arg[1]
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
685 table.remove(arg, 1)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
686 if cmd == 'vcf2bgl' then cmd_vcf2bgl()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
687 elseif cmd == 'bgl2vcf' then cmd_bgl2vcf()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
688 elseif cmd == 'freq' then cmd_freq()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
689 elseif cmd == 'r2' then cmd_r2()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
690 elseif cmd == 'vcf2chi2' then cmd_vcf2chi2()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
691 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
692 print('ERROR: unknown command "' .. cmd .. '"')
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
693 os.exit(1)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
694 end