annotate tools/rgenetics/plinkbinJZ.py @ 1:cdcb0ce84a1b

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:15 -0500
parents 9071e359b9a3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 #!/usr/bin/env python2.4
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 import optparse,os,subprocess,gzip,struct,time,commands
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 from array import array
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 #from AIMS import util
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 #from pga import util as pgautil
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 __FILE_ID__ = '$Id: plinkbinJZ.py,v 1.14 2009/07/13 20:16:50 rejpz Exp $'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 VERBOSE = True
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 MISSING_ALLELES = set(['N', '0', '.', '-',''])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 AUTOSOMES = set(range(1, 23) + [str(c) for c in range(1, 23)])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 MAGIC_BYTE1 = '00110110'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 MAGIC_BYTE2 = '11011000'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 FORMAT_SNP_MAJOR_BYTE = '10000000'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 FORMAT_IND_MAJOR_BYTE = '00000000'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 MAGIC1 = (0, 3, 1, 2)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 MAGIC2 = (3, 1, 2, 0)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 FORMAT_SNP_MAJOR = (2, 0, 0, 0)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 FORMAT_IND_MAJOR = (0, 0, 0, 0)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 HEADER_LENGTH = 3
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 HOM0 = 3
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 HOM1 = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 MISS = 2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 HET = 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 HOM0_GENO = (0, 0)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 HOM1_GENO = (1, 1)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 HET_GENO = (0, 1)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 MISS_GENO = (-9, -9)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 GENO_TO_GCODE = {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 HOM0_GENO: HOM0,
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 HET_GENO: HET,
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 HOM1_GENO: HOM1,
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 MISS_GENO: MISS,
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 CHROM_REPLACE = {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 'X': '23',
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 'Y': '24',
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 'XY': '25',
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 'MT': '26',
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 'M': '26',
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 MAP_LINE_EXCEPTION_TEXT = """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 One or more lines in the *.map file has only three fields.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 The line was:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57 %s
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 If you are running rgGRR through EPMP, this is usually a
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 sign that you are using an old version of the map file.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 You can correct the problem by re-running Subject QC. If
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62 you have already tried this, please contact the developers,
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63 or file a bug.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
66 INT_TO_GCODE = {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
67 0: array('i', (0, 0, 0, 0)), 1: array('i', (2, 0, 0, 0)), 2: array('i', (1, 0, 0, 0)), 3: array('i', (3, 0, 0, 0)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
68 4: array('i', (0, 2, 0, 0)), 5: array('i', (2, 2, 0, 0)), 6: array('i', (1, 2, 0, 0)), 7: array('i', (3, 2, 0, 0)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
69 8: array('i', (0, 1, 0, 0)), 9: array('i', (2, 1, 0, 0)), 10: array('i', (1, 1, 0, 0)), 11: array('i', (3, 1, 0, 0)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
70 12: array('i', (0, 3, 0, 0)), 13: array('i', (2, 3, 0, 0)), 14: array('i', (1, 3, 0, 0)), 15: array('i', (3, 3, 0, 0)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
71 16: array('i', (0, 0, 2, 0)), 17: array('i', (2, 0, 2, 0)), 18: array('i', (1, 0, 2, 0)), 19: array('i', (3, 0, 2, 0)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
72 20: array('i', (0, 2, 2, 0)), 21: array('i', (2, 2, 2, 0)), 22: array('i', (1, 2, 2, 0)), 23: array('i', (3, 2, 2, 0)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
73 24: array('i', (0, 1, 2, 0)), 25: array('i', (2, 1, 2, 0)), 26: array('i', (1, 1, 2, 0)), 27: array('i', (3, 1, 2, 0)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
74 28: array('i', (0, 3, 2, 0)), 29: array('i', (2, 3, 2, 0)), 30: array('i', (1, 3, 2, 0)), 31: array('i', (3, 3, 2, 0)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
75 32: array('i', (0, 0, 1, 0)), 33: array('i', (2, 0, 1, 0)), 34: array('i', (1, 0, 1, 0)), 35: array('i', (3, 0, 1, 0)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
76 36: array('i', (0, 2, 1, 0)), 37: array('i', (2, 2, 1, 0)), 38: array('i', (1, 2, 1, 0)), 39: array('i', (3, 2, 1, 0)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
77 40: array('i', (0, 1, 1, 0)), 41: array('i', (2, 1, 1, 0)), 42: array('i', (1, 1, 1, 0)), 43: array('i', (3, 1, 1, 0)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
78 44: array('i', (0, 3, 1, 0)), 45: array('i', (2, 3, 1, 0)), 46: array('i', (1, 3, 1, 0)), 47: array('i', (3, 3, 1, 0)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
79 48: array('i', (0, 0, 3, 0)), 49: array('i', (2, 0, 3, 0)), 50: array('i', (1, 0, 3, 0)), 51: array('i', (3, 0, 3, 0)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
80 52: array('i', (0, 2, 3, 0)), 53: array('i', (2, 2, 3, 0)), 54: array('i', (1, 2, 3, 0)), 55: array('i', (3, 2, 3, 0)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
81 56: array('i', (0, 1, 3, 0)), 57: array('i', (2, 1, 3, 0)), 58: array('i', (1, 1, 3, 0)), 59: array('i', (3, 1, 3, 0)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
82 60: array('i', (0, 3, 3, 0)), 61: array('i', (2, 3, 3, 0)), 62: array('i', (1, 3, 3, 0)), 63: array('i', (3, 3, 3, 0)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
83 64: array('i', (0, 0, 0, 2)), 65: array('i', (2, 0, 0, 2)), 66: array('i', (1, 0, 0, 2)), 67: array('i', (3, 0, 0, 2)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
84 68: array('i', (0, 2, 0, 2)), 69: array('i', (2, 2, 0, 2)), 70: array('i', (1, 2, 0, 2)), 71: array('i', (3, 2, 0, 2)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
85 72: array('i', (0, 1, 0, 2)), 73: array('i', (2, 1, 0, 2)), 74: array('i', (1, 1, 0, 2)), 75: array('i', (3, 1, 0, 2)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
86 76: array('i', (0, 3, 0, 2)), 77: array('i', (2, 3, 0, 2)), 78: array('i', (1, 3, 0, 2)), 79: array('i', (3, 3, 0, 2)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
87 80: array('i', (0, 0, 2, 2)), 81: array('i', (2, 0, 2, 2)), 82: array('i', (1, 0, 2, 2)), 83: array('i', (3, 0, 2, 2)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
88 84: array('i', (0, 2, 2, 2)), 85: array('i', (2, 2, 2, 2)), 86: array('i', (1, 2, 2, 2)), 87: array('i', (3, 2, 2, 2)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
89 88: array('i', (0, 1, 2, 2)), 89: array('i', (2, 1, 2, 2)), 90: array('i', (1, 1, 2, 2)), 91: array('i', (3, 1, 2, 2)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
90 92: array('i', (0, 3, 2, 2)), 93: array('i', (2, 3, 2, 2)), 94: array('i', (1, 3, 2, 2)), 95: array('i', (3, 3, 2, 2)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
91 96: array('i', (0, 0, 1, 2)), 97: array('i', (2, 0, 1, 2)), 98: array('i', (1, 0, 1, 2)), 99: array('i', (3, 0, 1, 2)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
92 100: array('i', (0, 2, 1, 2)), 101: array('i', (2, 2, 1, 2)), 102: array('i', (1, 2, 1, 2)), 103: array('i', (3, 2, 1, 2)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
93 104: array('i', (0, 1, 1, 2)), 105: array('i', (2, 1, 1, 2)), 106: array('i', (1, 1, 1, 2)), 107: array('i', (3, 1, 1, 2)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
94 108: array('i', (0, 3, 1, 2)), 109: array('i', (2, 3, 1, 2)), 110: array('i', (1, 3, 1, 2)), 111: array('i', (3, 3, 1, 2)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
95 112: array('i', (0, 0, 3, 2)), 113: array('i', (2, 0, 3, 2)), 114: array('i', (1, 0, 3, 2)), 115: array('i', (3, 0, 3, 2)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
96 116: array('i', (0, 2, 3, 2)), 117: array('i', (2, 2, 3, 2)), 118: array('i', (1, 2, 3, 2)), 119: array('i', (3, 2, 3, 2)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
97 120: array('i', (0, 1, 3, 2)), 121: array('i', (2, 1, 3, 2)), 122: array('i', (1, 1, 3, 2)), 123: array('i', (3, 1, 3, 2)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
98 124: array('i', (0, 3, 3, 2)), 125: array('i', (2, 3, 3, 2)), 126: array('i', (1, 3, 3, 2)), 127: array('i', (3, 3, 3, 2)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
99 128: array('i', (0, 0, 0, 1)), 129: array('i', (2, 0, 0, 1)), 130: array('i', (1, 0, 0, 1)), 131: array('i', (3, 0, 0, 1)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
100 132: array('i', (0, 2, 0, 1)), 133: array('i', (2, 2, 0, 1)), 134: array('i', (1, 2, 0, 1)), 135: array('i', (3, 2, 0, 1)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
101 136: array('i', (0, 1, 0, 1)), 137: array('i', (2, 1, 0, 1)), 138: array('i', (1, 1, 0, 1)), 139: array('i', (3, 1, 0, 1)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
102 140: array('i', (0, 3, 0, 1)), 141: array('i', (2, 3, 0, 1)), 142: array('i', (1, 3, 0, 1)), 143: array('i', (3, 3, 0, 1)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
103 144: array('i', (0, 0, 2, 1)), 145: array('i', (2, 0, 2, 1)), 146: array('i', (1, 0, 2, 1)), 147: array('i', (3, 0, 2, 1)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
104 148: array('i', (0, 2, 2, 1)), 149: array('i', (2, 2, 2, 1)), 150: array('i', (1, 2, 2, 1)), 151: array('i', (3, 2, 2, 1)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
105 152: array('i', (0, 1, 2, 1)), 153: array('i', (2, 1, 2, 1)), 154: array('i', (1, 1, 2, 1)), 155: array('i', (3, 1, 2, 1)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
106 156: array('i', (0, 3, 2, 1)), 157: array('i', (2, 3, 2, 1)), 158: array('i', (1, 3, 2, 1)), 159: array('i', (3, 3, 2, 1)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
107 160: array('i', (0, 0, 1, 1)), 161: array('i', (2, 0, 1, 1)), 162: array('i', (1, 0, 1, 1)), 163: array('i', (3, 0, 1, 1)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
108 164: array('i', (0, 2, 1, 1)), 165: array('i', (2, 2, 1, 1)), 166: array('i', (1, 2, 1, 1)), 167: array('i', (3, 2, 1, 1)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
109 168: array('i', (0, 1, 1, 1)), 169: array('i', (2, 1, 1, 1)), 170: array('i', (1, 1, 1, 1)), 171: array('i', (3, 1, 1, 1)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
110 172: array('i', (0, 3, 1, 1)), 173: array('i', (2, 3, 1, 1)), 174: array('i', (1, 3, 1, 1)), 175: array('i', (3, 3, 1, 1)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
111 176: array('i', (0, 0, 3, 1)), 177: array('i', (2, 0, 3, 1)), 178: array('i', (1, 0, 3, 1)), 179: array('i', (3, 0, 3, 1)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
112 180: array('i', (0, 2, 3, 1)), 181: array('i', (2, 2, 3, 1)), 182: array('i', (1, 2, 3, 1)), 183: array('i', (3, 2, 3, 1)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
113 184: array('i', (0, 1, 3, 1)), 185: array('i', (2, 1, 3, 1)), 186: array('i', (1, 1, 3, 1)), 187: array('i', (3, 1, 3, 1)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
114 188: array('i', (0, 3, 3, 1)), 189: array('i', (2, 3, 3, 1)), 190: array('i', (1, 3, 3, 1)), 191: array('i', (3, 3, 3, 1)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
115 192: array('i', (0, 0, 0, 3)), 193: array('i', (2, 0, 0, 3)), 194: array('i', (1, 0, 0, 3)), 195: array('i', (3, 0, 0, 3)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
116 196: array('i', (0, 2, 0, 3)), 197: array('i', (2, 2, 0, 3)), 198: array('i', (1, 2, 0, 3)), 199: array('i', (3, 2, 0, 3)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
117 200: array('i', (0, 1, 0, 3)), 201: array('i', (2, 1, 0, 3)), 202: array('i', (1, 1, 0, 3)), 203: array('i', (3, 1, 0, 3)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
118 204: array('i', (0, 3, 0, 3)), 205: array('i', (2, 3, 0, 3)), 206: array('i', (1, 3, 0, 3)), 207: array('i', (3, 3, 0, 3)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
119 208: array('i', (0, 0, 2, 3)), 209: array('i', (2, 0, 2, 3)), 210: array('i', (1, 0, 2, 3)), 211: array('i', (3, 0, 2, 3)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
120 212: array('i', (0, 2, 2, 3)), 213: array('i', (2, 2, 2, 3)), 214: array('i', (1, 2, 2, 3)), 215: array('i', (3, 2, 2, 3)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
121 216: array('i', (0, 1, 2, 3)), 217: array('i', (2, 1, 2, 3)), 218: array('i', (1, 1, 2, 3)), 219: array('i', (3, 1, 2, 3)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
122 220: array('i', (0, 3, 2, 3)), 221: array('i', (2, 3, 2, 3)), 222: array('i', (1, 3, 2, 3)), 223: array('i', (3, 3, 2, 3)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
123 224: array('i', (0, 0, 1, 3)), 225: array('i', (2, 0, 1, 3)), 226: array('i', (1, 0, 1, 3)), 227: array('i', (3, 0, 1, 3)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
124 228: array('i', (0, 2, 1, 3)), 229: array('i', (2, 2, 1, 3)), 230: array('i', (1, 2, 1, 3)), 231: array('i', (3, 2, 1, 3)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
125 232: array('i', (0, 1, 1, 3)), 233: array('i', (2, 1, 1, 3)), 234: array('i', (1, 1, 1, 3)), 235: array('i', (3, 1, 1, 3)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
126 236: array('i', (0, 3, 1, 3)), 237: array('i', (2, 3, 1, 3)), 238: array('i', (1, 3, 1, 3)), 239: array('i', (3, 3, 1, 3)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
127 240: array('i', (0, 0, 3, 3)), 241: array('i', (2, 0, 3, 3)), 242: array('i', (1, 0, 3, 3)), 243: array('i', (3, 0, 3, 3)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
128 244: array('i', (0, 2, 3, 3)), 245: array('i', (2, 2, 3, 3)), 246: array('i', (1, 2, 3, 3)), 247: array('i', (3, 2, 3, 3)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
129 248: array('i', (0, 1, 3, 3)), 249: array('i', (2, 1, 3, 3)), 250: array('i', (1, 1, 3, 3)), 251: array('i', (3, 1, 3, 3)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
130 252: array('i', (0, 3, 3, 3)), 253: array('i', (2, 3, 3, 3)), 254: array('i', (1, 3, 3, 3)), 255: array('i', (3, 3, 3, 3)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
131 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
132
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
133 GCODE_TO_INT = dict([(tuple(v),k) for (k,v) in INT_TO_GCODE.items()])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
134
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
135 ### Exceptions
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
136 class DuplicateMarkerInMapFile(Exception): pass
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
137 class MapLineTooShort(Exception): pass
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
138 class ThirdAllele(Exception): pass
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
139 class PedError(Exception): pass
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
140 class BadMagic(Exception):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
141 """ Raised when one of the MAGIC bytes in a bed file does not match
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
142 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
143 pass
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
144 class BedError(Exception):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
145 """ Raised when parsing a bed file runs into problems
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
146 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
147 pass
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
148 class UnknownGenocode(Exception):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
149 """ Raised when we get a 2-bit genotype that is undecipherable (is it possible?)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
150 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
151 pass
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
152 class UnknownGeno(Exception): pass
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
153
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
154 ### Utility functions
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
155
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
156 def timenow():
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
157 """return current time as a string
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
158 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
159 return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
160
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
161 def ceiling(n, k):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
162 ''' Return the least multiple of k which is greater than n
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
163 '''
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
164 m = n % k
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
165 if m == 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
166 return n
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
167 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
168 return n + k - m
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
169
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
170 def nbytes(n):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
171 ''' Return the number of bytes required for n subjects
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
172 '''
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
173 return 2*ceiling(n, 4)/8
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
174
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
175 ### Primary module functionality
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
176 class LPed:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
177 """ The uber-class for processing the Linkage-format *.ped/*.map files
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
178 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
179 def __init__(self, base):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
180 self.base = base
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
181 self._ped = Ped('%s.ped' % (self.base))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
182 self._map = Map('%s.map' % (self.base))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
183
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
184 self._markers = {}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
185 self._ordered_markers = []
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
186 self._marker_allele_lookup = {}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
187 self._autosomal_indices = set()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
188
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
189 self._subjects = {}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
190 self._ordered_subjects = []
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
191
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
192 self._genotypes = []
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
193
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
194 def parse(self):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
195 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
196 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
197 if VERBOSE: print 'plinkbinJZ: Analysis started: %s' % (timenow())
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
198 self._map.parse()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
199 self._markers = self._map._markers
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
200 self._ordered_markers = self._map._ordered_markers
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
201 self._autosomal_indices = self._map._autosomal_indices
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
202
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
203 self._ped.parse(self._ordered_markers)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
204 self._subjects = self._ped._subjects
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
205 self._ordered_subjects = self._ped._ordered_subjects
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
206 self._genotypes = self._ped._genotypes
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
207 self._marker_allele_lookup = self._ped._marker_allele_lookup
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
208
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
209 ### Adjust self._markers based on the allele information
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
210 ### we got from parsing the ped file
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
211 for m, name in enumerate(self._ordered_markers):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
212 a1, a2 = self._marker_allele_lookup[m][HET]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
213 self._markers[name][-2] = a1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
214 self._markers[name][-1] = a2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
215 if VERBOSE: print 'plinkbinJZ: Analysis finished: %s' % (timenow())
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
216
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
217 def getSubjectInfo(self, fid, oiid):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
218 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
219 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
220 return self._subject_info[(fid, oiid)]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
221
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
222 def getSubjectInfoByLine(self, line):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
223 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
224 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
225 return self._subject_info[self._ordered_subjects[line]]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
226
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
227 def getGenotypesByIndices(self, s, mlist, format):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
228 """ needed for grr if lped - deprecated but..
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
229 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
230 mlist = dict(zip(mlist,[True,]*len(mlist))) # hash quicker than 'in' ?
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
231 raw_array = array('i', [row[s] for m,row in enumerate(self._genotypes) if mlist.get(m,None)])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
232 if format == 'raw':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
233 return raw_array
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
234 elif format == 'ref':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
235 result = array('i', [0]*len(mlist))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
236 for m, gcode in enumerate(raw_array):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
237 if gcode == HOM0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
238 nref = 3
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
239 elif gcode == HET:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
240 nref = 2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
241 elif gcode == HOM1:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
242 nref = 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
243 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
244 nref = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
245 result[m] = nref
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
246 return result
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
247 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
248 result = []
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
249 for m, gcode in enumerate(raw_array):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
250 result.append(self._marker_allele_lookup[m][gcode])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
251 return result
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
252
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
253 def writebed(self, base):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
254 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
255 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
256 dst_name = '%s.fam' % (base)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
257 print 'Writing pedigree information to [ %s ]' % (dst_name)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
258 dst = open(dst_name, 'w')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
259 for skey in self._ordered_subjects:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
260 (fid, iid, did, mid, sex, phe, sid, d_sid, m_sid) = self._subjects[skey]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
261 dst.write('%s %s %s %s %s %s\n' % (fid, iid, did, mid, sex, phe))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
262 dst.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
263
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
264 dst_name = '%s.bim' % (base)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
265 print 'Writing map (extended format) information to [ %s ]' % (dst_name)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
266 dst = open(dst_name, 'w')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
267 for m, marker in enumerate(self._ordered_markers):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
268 chrom, name, genpos, abspos, a1, a2 = self._markers[marker]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
269 dst.write('%s\t%s\t%s\t%s\t%s\t%s\n' % (chrom, name, genpos, abspos, a1, a2))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
270 dst.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
271
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
272 bed_name = '%s.bed' % (base)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
273 print 'Writing genotype bitfile to [ %s ]' % (bed_name)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
274 print 'Using (default) SNP-major mode'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
275 bed = open(bed_name, 'w')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
276
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
277 ### Write the 3 header bytes
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
278 bed.write(struct.pack('B', int(''.join(reversed(MAGIC_BYTE1)), 2)))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
279 bed.write(struct.pack('B', int(''.join(reversed(MAGIC_BYTE2)), 2)))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
280 bed.write(struct.pack('B', int(''.join(reversed(FORMAT_SNP_MAJOR_BYTE)), 2)))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
281
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
282 ### Calculate how many "pad bits" we should add after the last subject
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
283 nsubjects = len(self._ordered_subjects)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
284 nmarkers = len(self._ordered_markers)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
285 total_bytes = nbytes(nsubjects)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
286 nbits = nsubjects * 2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
287 pad_nibbles = ((total_bytes * 8) - nbits)/2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
288 pad = array('i', [0]*pad_nibbles)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
289
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
290 ### And now write genotypes to the file
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
291 for m in xrange(nmarkers):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
292 geno = self._genotypes[m]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
293 geno.extend(pad)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
294 bytes = len(geno)/4
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
295 for b in range(bytes):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
296 idx = b*4
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
297 gcode = tuple(geno[idx:idx+4])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
298 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
299 byte = struct.pack('B', GCODE_TO_INT[gcode])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
300 except KeyError:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
301 print m, b, gcode
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
302 raise
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
303 bed.write(byte)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
304 bed.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
305
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
306 def autosomal_indices(self):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
307 """ Return the indices of markers in this ped/map that are autosomal.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
308 This is used by rgGRR so that it can select a random set of markers
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
309 from the autosomes (sex chroms screw up the plot)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
310 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
311 return self._autosomal_indices
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
312
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
313 class Ped:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
314 def __init__(self, path):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
315 self.path = path
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
316 self._subjects = {}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
317 self._ordered_subjects = []
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
318 self._genotypes = []
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
319 self._marker_allele_lookup = {}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
320
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
321 def lineCount(self,infile):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
322 """ count the number of lines in a file - efficiently using wget
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
323 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
324 return int(commands.getoutput('wc -l %s' % (infile)).split()[0])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
325
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
326
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
327 def parse(self, markers):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
328 """ Parse a given file -- this needs to be memory-efficient so that large
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
329 files can be parsed (~1 million markers on ~5000 subjects?). It
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
330 should also be fast, if possible.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
331 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
332
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
333 ### Find out how many lines are in the file so we can ...
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
334 nsubjects = self.lineCount(self.path)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
335 ### ... Pre-allocate the genotype arrays
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
336 nmarkers = len(markers)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
337 _marker_alleles = [['0', '0'] for _ in xrange(nmarkers)]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
338 self._genotypes = [array('i', [-1]*nsubjects) for _ in xrange(nmarkers)]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
339
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
340 if self.path.endswith('.gz'):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
341 pfile = gzip.open(self.path, 'r')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
342 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
343 pfile = open(self.path, 'r')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
344
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
345 for s, line in enumerate(pfile):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
346 line = line.strip()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
347 if not line:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
348 continue
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
349
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
350 fid, iid, did, mid, sex, phe, genos = line.split(None, 6)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
351 sid = iid.split('.')[0]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
352 d_sid = did.split('.')[0]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
353 m_sid = mid.split('.')[0]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
354
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
355 skey = (fid, iid)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
356 self._subjects[skey] = (fid, iid, did, mid, sex, phe, sid, d_sid, m_sid)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
357 self._ordered_subjects.append(skey)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
358
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
359 genotypes = genos.split()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
360
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
361 for m, marker in enumerate(markers):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
362 idx = m*2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
363 a1, a2 = genotypes[idx:idx+2] # Alleles for subject s, marker m
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
364 s1, s2 = seen = _marker_alleles[m] # Alleles seen for marker m
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
365
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
366 ### FIXME: I think this can still be faster, and simpler to read
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
367 # Two pieces of logic intertwined here: first, we need to code
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
368 # this genotype as HOM0, HOM1, HET or MISS. Second, we need to
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
369 # keep an ongoing record of the genotypes seen for this marker
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
370 if a1 == a2:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
371 if a1 in MISSING_ALLELES:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
372 geno = MISS_GENO
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
373 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
374 if s1 == '0':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
375 seen[0] = a1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
376 elif s1 == a1 or s2 == a2:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
377 pass
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
378 elif s2 == '0':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
379 seen[1] = a1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
380 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
381 raise ThirdAllele('a1=a2=%s, seen=%s?' % (a1, str(seen)))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
382
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
383 if a1 == seen[0]:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
384 geno = HOM0_GENO
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
385 elif a1 == seen[1]:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
386 geno = HOM1_GENO
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
387 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
388 raise PedError('Cannot assign geno for a1=a2=%s from seen=%s' % (a1, str(seen)))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
389 elif a1 in MISSING_ALLELES or a2 in MISSING_ALLELES:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
390 geno = MISS_GENO
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
391 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
392 geno = HET_GENO
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
393 if s1 == '0':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
394 seen[0] = a1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
395 seen[1] = a2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
396 elif s2 == '0':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
397 if s1 == a1:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
398 seen[1] = a2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
399 elif s1 == a2:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
400 seen[1] = a1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
401 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
402 raise ThirdAllele('a1=%s, a2=%s, seen=%s?' % (a1, a2, str(seen)))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
403 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
404 if sorted(seen) != sorted((a1, a2)):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
405 raise ThirdAllele('a1=%s, a2=%s, seen=%s?' % (a1, a2, str(seen)))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
406
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
407 gcode = GENO_TO_GCODE.get(geno, None)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
408 if gcode is None:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
409 raise UnknownGeno(str(geno))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
410 self._genotypes[m][s] = gcode
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
411
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
412 # Build the _marker_allele_lookup table
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
413 for m, alleles in enumerate(_marker_alleles):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
414 if len(alleles) == 2:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
415 a1, a2 = alleles
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
416 elif len(alleles) == 1:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
417 a1 = alleles[0]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
418 a2 = '0'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
419 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
420 print 'All alleles blank for %s: %s' % (m, str(alleles))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
421 raise
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
422
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
423 self._marker_allele_lookup[m] = {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
424 HOM0: (a2, a2),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
425 HOM1: (a1, a1),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
426 HET : (a1, a2),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
427 MISS: ('0','0'),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
428 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
429
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
430 if VERBOSE: print '%s(%s) individuals read from [ %s ]' % (len(self._subjects), nsubjects, self.path)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
431
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
432 class Map:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
433 def __init__(self, path=None):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
434 self.path = path
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
435 self._markers = {}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
436 self._ordered_markers = []
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
437 self._autosomal_indices = set()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
438
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
439 def __len__(self):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
440 return len(self._markers)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
441
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
442 def parse(self):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
443 """ Parse a Linkage-format map file
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
444 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
445 if self.path.endswith('.gz'):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
446 fh = gzip.open(self.path, 'r')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
447 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
448 fh = open(self.path, 'r')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
449
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
450 for i, line in enumerate(fh):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
451 line = line.strip()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
452 if not line:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
453 continue
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
454
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
455 fields = line.split()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
456 if len(fields) < 4:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
457 raise MapLineTooShort(MAP_LINE_EXCEPTION_TEXT % (str(line), len(fields)))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
458 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
459 chrom, name, genpos, abspos = fields
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
460 if name in self._markers:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
461 raise DuplicateMarkerInMapFile('Marker %s was found twice in map file %s' % (name, self.path))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
462 abspos = int(abspos)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
463 if abspos < 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
464 continue
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
465 if chrom in AUTOSOMES:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
466 self._autosomal_indices.add(i)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
467 chrom = CHROM_REPLACE.get(chrom, chrom)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
468 self._markers[name] = [chrom, name, genpos, abspos, None, None]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
469 self._ordered_markers.append(name)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
470 fh.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
471 if VERBOSE: print '%s (of %s) markers to be included from [ %s ]' % (len(self._ordered_markers), i, self.path)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
472
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
473 class BPed:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
474 """ The uber-class for processing Plink's Binary Ped file format *.bed/*.bim/*.fam
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
475 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
476 def __init__(self, base):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
477 self.base = base
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
478 self._bed = Bed('%s.bed' % (self.base))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
479 self._bim = Bim('%s.bim' % (self.base))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
480 self._fam = Fam('%s.fam' % (self.base))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
481
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
482 self._markers = {}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
483 self._ordered_markers = []
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
484 self._marker_allele_lookup = {}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
485 self._autosomal_indices = set()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
486
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
487 self._subjects = {}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
488 self._ordered_subjects = []
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
489
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
490 self._genotypes = []
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
491
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
492 def parse(self, quick=False):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
493 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
494 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
495 self._quick = quick
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
496
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
497 self._bim.parse()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
498 self._markers = self._bim._markers
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
499 self._ordered_markers = self._bim._ordered_markers
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
500 self._marker_allele_lookup = self._bim._marker_allele_lookup
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
501 self._autosomal_indices = self._bim._autosomal_indices
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
502
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
503 self._fam.parse()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
504 self._subjects = self._fam._subjects
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
505 self._ordered_subjects = self._fam._ordered_subjects
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
506
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
507 self._bed.parse(self._ordered_subjects, self._ordered_markers, quick=quick)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
508 self._bedf = self._bed._fh
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
509 self._genotypes = self._bed._genotypes
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
510 self.nsubjects = len(self._ordered_subjects)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
511 self.nmarkers = len(self._ordered_markers)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
512 self._bytes_per_marker = nbytes(self.nsubjects)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
513
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
514 def writeped(self, path=None):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
515 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
516 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
517 path = self.path = path or self.path
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
518
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
519 map_name = self.path.replace('.bed', '.map')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
520 print 'Writing map file [ %s ]' % (map_name)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
521 dst = open(map_name, 'w')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
522 for m in self._ordered_markers:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
523 chrom, snp, genpos, abspos, a1, a2 = self._markers[m]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
524 dst.write('%s\t%s\t%s\t%s\n' % (chrom, snp, genpos, abspos))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
525 dst.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
526
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
527 ped_name = self.path.replace('.bed', '.ped')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
528 print 'Writing ped file [ %s ]' % (ped_name)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
529 ped = open(ped_name, 'w')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
530 firstyikes = False
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
531 for s, skey in enumerate(self._ordered_subjects):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
532 idx = s*2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
533 (fid, iid, did, mid, sex, phe, oiid, odid, omid) = self._subjects[skey]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
534 ped.write('%s %s %s %s %s %s' % (fid, iid, odid, omid, sex, phe))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
535 genotypes_for_subject = self.getGenotypesForSubject(s)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
536 for m, snp in enumerate(self._ordered_markers):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
537 #a1, a2 = self.getGenotypeByIndices(s, m)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
538 a1,a2 = genotypes_for_subject[m]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
539 ped.write(' %s %s' % (a1, a2))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
540 ped.write('\n')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
541 ped.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
542
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
543 def getGenotype(self, subject, marker):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
544 """ Retrieve a genotype for a particular subject/marker pair
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
545 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
546 m = self._ordered_markers.index(marker)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
547 s = self._ordered_subjects.index(subject)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
548 return self.getGenotypeByIndices(s, m)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
549
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
550 def getGenotypesForSubject(self, s, raw=False):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
551 """ Returns list of genotypes for all m markers
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
552 for subject s. If raw==True, then an array
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
553 of raw integer gcodes is returned instead
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
554 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
555 if self._quick:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
556 nmarkers = len(self._markers)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
557 raw_array = array('i', [0]*nmarkers)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
558 seek_nibble = s % 4
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
559 for m in xrange(nmarkers):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
560 seek_byte = m * self._bytes_per_marker + s/4 + HEADER_LENGTH
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
561 self._bedf.seek(seek_byte)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
562 geno = struct.unpack('B', self._bedf.read(1))[0]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
563 quartet = INT_TO_GCODE[geno]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
564 gcode = quartet[seek_nibble]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
565 raw_array[m] = gcode
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
566 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
567 raw_array = array('i', [row[s] for row in self._genotypes])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
568
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
569 if raw:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
570 return raw_array
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
571 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
572 result = []
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
573 for m, gcode in enumerate(raw_array):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
574 result.append(self._marker_allele_lookup[m][gcode])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
575 return result
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
576
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
577 def getGenotypeByIndices(self, s, m):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
578 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
579 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
580 if self._quick:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
581 # Determine which byte we need to seek to, and
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
582 # which nibble within the byte we need
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
583 seek_byte = m * self._bytes_per_marker + s/4 + HEADER_LENGTH
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
584 seek_nibble = s % 4
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
585 self._bedf.seek(seek_byte)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
586 geno = struct.unpack('B', self._bedf.read(1))[0]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
587 quartet = INT_TO_GCODE[geno]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
588 gcode = quartet[seek_nibble]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
589 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
590 # Otherwise, just grab the genotypes from the
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
591 # list of arrays
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
592 genos_for_marker = self._genotypes[m]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
593 gcode = genos_for_marker[s]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
594
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
595 return self._marker_allele_lookup[m][gcode]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
596
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
597 def getGenotypesByIndices(self, s, mlist, format):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
598 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
599 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
600 if self._quick:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
601 raw_array = array('i', [0]*len(mlist))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
602 seek_nibble = s % 4
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
603 for i,m in enumerate(mlist):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
604 seek_byte = m * self._bytes_per_marker + s/4 + HEADER_LENGTH
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
605 self._bedf.seek(seek_byte)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
606 geno = struct.unpack('B', self._bedf.read(1))[0]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
607 quartet = INT_TO_GCODE[geno]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
608 gcode = quartet[seek_nibble]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
609 raw_array[i] = gcode
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
610 mlist = set(mlist)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
611 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
612 mlist = set(mlist)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
613 raw_array = array('i', [row[s] for m,row in enumerate(self._genotypes) if m in mlist])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
614
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
615 if format == 'raw':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
616 return raw_array
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
617 elif format == 'ref':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
618 result = array('i', [0]*len(mlist))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
619 for m, gcode in enumerate(raw_array):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
620 if gcode == HOM0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
621 nref = 3
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
622 elif gcode == HET:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
623 nref = 2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
624 elif gcode == HOM1:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
625 nref = 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
626 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
627 nref = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
628 result[m] = nref
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
629 return result
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
630 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
631 result = []
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
632 for m, gcode in enumerate(raw_array):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
633 result.append(self._marker_allele_lookup[m][gcode])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
634 return result
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
635
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
636 def getSubject(self, s):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
637 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
638 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
639 skey = self._ordered_subjects[s]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
640 return self._subjects[skey]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
641
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
642 def autosomal_indices(self):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
643 """ Return the indices of markers in this ped/map that are autosomal.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
644 This is used by rgGRR so that it can select a random set of markers
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
645 from the autosomes (sex chroms screw up the plot)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
646 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
647 return self._autosomal_indices
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
648
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
649 class Bed:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
650
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
651 def __init__(self, path):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
652 self.path = path
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
653 self._genotypes = []
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
654 self._fh = None
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
655
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
656 def parse(self, subjects, markers, quick=False):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
657 """ Parse the bed file, indicated either by the path parameter,
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
658 or as the self.path indicated in __init__. If quick is
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
659 True, then just parse the bim and fam, then genotypes will
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
660 be looked up dynamically by indices
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
661 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
662 self._quick = quick
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
663
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
664 ordered_markers = markers
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
665 ordered_subjects = subjects
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
666 nsubjects = len(ordered_subjects)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
667 nmarkers = len(ordered_markers)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
668
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
669 bed = open(self.path, 'rb')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
670 self._fh = bed
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
671
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
672 byte1 = bed.read(1)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
673 byte2 = bed.read(1)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
674 byte3 = bed.read(1)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
675 format_flag = struct.unpack('B', byte3)[0]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
676
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
677 h1 = tuple(INT_TO_GCODE[struct.unpack('B', byte1)[0]])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
678 h2 = tuple(INT_TO_GCODE[struct.unpack('B', byte2)[0]])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
679 h3 = tuple(INT_TO_GCODE[format_flag])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
680
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
681 if h1 != MAGIC1 or h2 != MAGIC2:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
682 raise BadMagic('One or both MAGIC bytes is wrong: %s==%s or %s==%s' % (h1, MAGIC1, h2, MAGIC2))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
683 if format_flag:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
684 print 'Detected that binary PED file is v1.00 SNP-major mode (%s, "%s")\n' % (format_flag, h3)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
685 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
686 raise 'BAD_FORMAT_FLAG? (%s, "%s")\n' % (format_flag, h3)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
687
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
688 print 'Parsing binary ped file for %s markers and %s subjects' % (nmarkers, nsubjects)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
689
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
690 ### If quick mode was specified, we're done ...
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
691 self._quick = quick
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
692 if quick:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
693 return
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
694
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
695 ### ... Otherwise, parse genotypes into an array, and append that
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
696 ### array to self._genotypes
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
697 ngcodes = ceiling(nsubjects, 4)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
698 bytes_per_marker = nbytes(nsubjects)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
699 for m in xrange(nmarkers):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
700 genotype_array = array('i', [-1]*(ngcodes))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
701 for byte in xrange(bytes_per_marker):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
702 intval = struct.unpack('B', bed.read(1))[0]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
703 idx = byte*4
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
704 genotype_array[idx:idx+4] = INT_TO_GCODE[intval]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
705 self._genotypes.append(genotype_array)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
706
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
707 class Bim:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
708 def __init__(self, path):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
709 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
710 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
711 self.path = path
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
712 self._markers = {}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
713 self._ordered_markers = []
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
714 self._marker_allele_lookup = {}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
715 self._autosomal_indices = set()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
716
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
717 def parse(self):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
718 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
719 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
720 print 'Reading map (extended format) from [ %s ]' % (self.path)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
721 bim = open(self.path, 'r')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
722 for m, line in enumerate(bim):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
723 chrom, snp, gpos, apos, a1, a2 = line.strip().split()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
724 self._markers[snp] = (chrom, snp, gpos, apos, a1, a2)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
725 self._marker_allele_lookup[m] = {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
726 HOM0: (a2, a2),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
727 HOM1: (a1, a1),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
728 HET : (a1, a2),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
729 MISS: ('0','0'),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
730 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
731 self._ordered_markers.append(snp)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
732 if chrom in AUTOSOMES:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
733 self._autosomal_indices.add(m)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
734 bim.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
735 print '%s markers to be included from [ %s ]' % (m+1, self.path)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
736
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
737 class Fam:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
738 def __init__(self, path):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
739 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
740 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
741 self.path = path
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
742 self._subjects = {}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
743 self._ordered_subjects = []
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
744
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
745 def parse(self):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
746 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
747 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
748 print 'Reading pedigree information from [ %s ]' % (self.path)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
749 fam = open(self.path, 'r')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
750 for s, line in enumerate(fam):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
751 fid, iid, did, mid, sex, phe = line.strip().split()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
752 sid = iid.split('.')[0]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
753 d_sid = did.split('.')[0]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
754 m_sid = mid.split('.')[0]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
755 skey = (fid, iid)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
756 self._ordered_subjects.append(skey)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
757 self._subjects[skey] = (fid, iid, did, mid, sex, phe, sid, d_sid, m_sid)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
758 fam.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
759 print '%s individuals read from [ %s ]' % (s+1, self.path)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
760
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
761 ### Command-line functionality and testing
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
762 def test(arg):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
763 '''
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
764 '''
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
765
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
766 import time
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
767
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
768 if arg == 'CAMP_AFFY.ped':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
769 print 'Testing bed.parse(quick=True)'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
770 s = time.time()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
771 bed = Bed(arg.replace('.ped', '.bed'))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
772 bed.parse(quick=True)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
773 print bed.getGenotype(('400118', '10300283'), 'rs2000467')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
774 print bed.getGenotype(('400118', '10101384'), 'rs2294019')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
775 print bed.getGenotype(('400121', '10101149'), 'rs2294019')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
776 print bed.getGenotype(('400123', '10200290'), 'rs2294019')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
777 assert bed.getGenotype(('400118', '10101384'), 'rs2294019') == ('4','4')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
778 e = time.time()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
779 print 'e-s = %s\n' % (e-s)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
780
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
781 print 'Testing bed.parse'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
782 s = time.time()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
783 bed = BPed(arg)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
784 bed.parse(quick=False)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
785 e = time.time()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
786 print 'e-s = %s\n' % (e-s)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
787
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
788 print 'Testing bed.writeped'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
789 s = time.time()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
790 outname = '%s_BEDTEST' % (arg)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
791 bed.writeped(outname)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
792 e = time.time()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
793 print 'e-s = %s\n' % (e-s)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
794 del(bed)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
795
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
796 print 'Testing ped.parse'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
797 s = time.time()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
798 ped = LPed(arg)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
799 ped.parse()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
800 e = time.time()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
801 print 'e-s = %s\n' % (e-s)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
802
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
803 print 'Testing ped.writebed'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
804 s = time.time()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
805 outname = '%s_PEDTEST' % (arg)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
806 ped.writebed(outname)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
807 e = time.time()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
808 print 'e-s = %s\n' % (e-s)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
809 del(ped)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
810
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
811 def profile_bed(arg):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
812 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
813 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
814 bed = BPed(arg)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
815 bed.parse(quick=False)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
816 outname = '%s_BEDPROFILE' % (arg)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
817 bed.writeped(outname)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
818
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
819 def profile_ped(arg):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
820 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
821 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
822 ped = LPed(arg)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
823 ped.parse()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
824 outname = '%s_PEDPROFILE' % (arg)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
825 ped.writebed(outname)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
826
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
827 if __name__ == '__main__':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
828 """ Run as a command-line, this script should get one or more arguments,
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
829 each one a ped file to be parsed with the PedParser (unit tests?)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
830 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
831 op = optparse.OptionParser()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
832 op.add_option('--profile-bed', action='store_true', default=False)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
833 op.add_option('--profile-ped', action='store_true', default=False)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
834 opts, args = op.parse_args()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
835
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
836 if opts.profile_bed:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
837 import profile
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
838 import pstats
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
839 profile.run('profile_bed(args[0])', 'fooprof')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
840 p = pstats.Stats('fooprof')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
841 p.sort_stats('cumulative').print_stats(10)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
842 elif opts.profile_ped:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
843 import profile
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
844 import pstats
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
845 profile.run('profile_ped(args[0])', 'fooprof')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
846 p = pstats.Stats('fooprof')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
847 p.sort_stats('cumulative').print_stats(10)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
848 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
849 for arg in args:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
850 test(arg)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
851
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
852 ### Code used to generate the INT_TO_GCODE dictionary
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
853 #print '{\n ',
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
854 #for i in range(256):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
855 # b = INT2BIN[i]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
856 # ints = []
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
857 # s = str(i).rjust(3)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
858 # #print b
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
859 # for j in range(4):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
860 # idx = j*2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
861 # #print i, j, idx, b[idx:idx+2], int(b[idx:idx+2], 2)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
862 # ints.append(int(b[idx:idx+2], 2))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
863 # print '%s: array(\'i\', %s),' % (s,tuple(ints)),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
864 # if i > 0 and (i+1) % 4 == 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
865 # print '\n ',
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
866 #print '}'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
867
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
868