Mercurial > repos > yufei-luo > s_mart
comparison SMART/Java/Python/Cpp/ncListCreator.cpp @ 18:94ab73e8a190
Uploaded
author | m-zytnicki |
---|---|
date | Mon, 29 Apr 2013 03:20:15 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
17:b0e8584489e6 | 18:94ab73e8a190 |
---|---|
1 #include "ncListCreator.hpp" | |
2 | |
3 NCListCreator::NCListCreator(string inputFileName): inputFileName(inputFileName) {} | |
4 | |
5 string NCListCreator::getFileName(string chromosome) { | |
6 return inputFileName.substr(0, inputFileName.find_last_of('.')) + "_" + chromosome + ".bed"; | |
7 } | |
8 | |
9 void NCListCreator::splitFile () { | |
10 Chromosomes chromosomes; | |
11 GenomicInterval interval; | |
12 map <string, ofstream> splittedFiles; | |
13 map <string, ofstream>::iterator it; | |
14 ifstream file; | |
15 string line, fileName; | |
16 string chromosome; | |
17 file.open(inputFileName.c_str()); | |
18 if (file.is_open()) { | |
19 while (file.good()) { | |
20 getline(file, line); | |
21 if (line.size() > 0) { | |
22 interval.parseFromLine(line); | |
23 chromosomes.insert(interval.chromosome); | |
24 fileName = getFileName(interval.chromosome); | |
25 it = splittedFiles.find(interval.chromosome); | |
26 if (it == splittedFiles.end()) { | |
27 ofstream outputFile; | |
28 outputFile.open(fileName.c_str(), ios::out | ios::binary); | |
29 interval.writeBinary(outputFile); | |
30 splittedFiles[chromosome] = outputFile; | |
31 } | |
32 else { | |
33 it->second << line << "\n"; | |
34 } | |
35 } | |
36 } | |
37 file.close(); | |
38 for (it = splittedFiles.begin(); it != splittedFiles.end(); it++) { | |
39 it->second.close(); | |
40 } | |
41 } | |
42 else { | |
43 cout << "Unable to open file" << inputFileName; | |
44 } | |
45 } | |
46 | |
47 void NCListCreator::run() { | |
48 for (Chromosomes::iterator it = chromosomes.begin(); splittedFiles != chromosomes.end(); splittedFiles++) { | |
49 buildLists(*it); | |
50 } | |
51 } | |
52 | |
53 void NCListCreator::buildLists(string chromosome) { | |
54 createTables(chromosome); | |
55 labelLists(); | |
56 computeSubStart(); | |
57 computeAbsPosition(); | |
58 cleanFiles(); | |
59 transfer(chromosome); | |
60 } | |
61 | |
62 void NCListCreator::createTables(string chromosome) { | |
63 initLists(chromosome); | |
64 h = new Table(H_CELL_SIZE, nbLists); | |
65 t = new Table(T_CELL_SIZE, nbLines); | |
66 l = new Table(L_CELL_SIZE, nbLines); | |
67 fillTables(chromosome); | |
68 } | |
69 | |
70 void NCListCreator::initLists (string chromosome) { | |
71 nbLists = 0; | |
72 nbLines = 0; | |
73 ifstream file; | |
74 file.open(getFileName(chromosome).c_str(), ios::in | ios::binary); | |
75 Interval currentInterval, previousInterval; | |
76 if (file.is_open()) { | |
77 while (file.good()) { | |
78 if (currentInterval.parseBinary(file)) { | |
79 nbLines++; | |
80 if (previousInterval.include(currentInterval)) { | |
81 nbLists++; | |
82 } | |
83 } | |
84 previousInterval = currentInterval; | |
85 } | |
86 } | |
87 file.close(); | |
88 } | |
89 | |
90 void NCListCreator::fillTables (string chromosome) { | |
91 ifstream file; | |
92 file.open(getFileName(chromosome).c_str(), ios::in | ios::binary); | |
93 Interval currentInterval, previousInterval; | |
94 unsigned int i = 0; | |
95 if (file.is_open()) { | |
96 while (file.good()) { | |
97 if (currentInterval.parseBinary(file)) { | |
98 t->write(currentInterval.start, i, 0); | |
99 t->writeHere(currentInterval.end); | |
100 t->writeHere(-1); | |
101 t->writeHere(-1); | |
102 } | |
103 i++; | |
104 } | |
105 file.close(); | |
106 } | |
107 t->write(SENTINEL, LIST, -1); | |
108 l->write(SENTINEL, LIST, 0); | |
109 t->write(SENTINEL, NEW, -1); | |
110 } | |
111 | |
112 void NCListCreator::labelLists () { | |
113 unsigned int nextL = 0, thisL, length; | |
114 unsigned int p; | |
115 Interval current, parent; | |
116 for (unsigned int i = 0; i < nbLines; i++) { | |
117 p = i - 1; | |
118 t->moveTo(p, 0); | |
119 parent.readBinary(t->file); | |
120 t->moveTo(i, 0); | |
121 current.readBinary(t->file); | |
122 while ((p != SENTINEL) && (! parent.include(current))) { | |
123 p = t->read(p, PARENT); | |
124 t->moveTo(p, 0); | |
125 parent.readBinary(t->file); | |
126 } | |
127 thisL = t->read(p, LIST); | |
128 if (thisL == SENTINEL) { | |
129 thisL = nextL; | |
130 nextL++; | |
131 length = 0; | |
132 t->write(p, LIST, thisL); | |
133 } | |
134 else { | |
135 length = h->read(thisL, LENGTH); | |
136 } | |
137 t->write(i, PARENT, p); | |
138 h->write(thisL, LENGTH, length+1); | |
139 } | |
140 } | |
141 | |
142 void NCListCreator::computeSubStart () { | |
143 unsigned int total = 0; | |
144 for (unsigned int i = 0; i < nbLists; i++) { | |
145 h->write(i, START, total); | |
146 total += h->read(i, LENGTH); | |
147 h->write(i, LENGTH, 0); | |
148 } | |
149 } | |
150 | |
151 void NCListCreator::computeAbsPosition () { | |
152 Value s, e, pt, hp, pl, nb, lp; | |
153 for (unsigned int i = 0; i < nbLines; i++) { | |
154 s = t->read(i, START); | |
155 e = t->read(i, END); | |
156 pt = t->read(i, PARENT); | |
157 hp = t->read(pt, LIST); | |
158 pl = t->read(pt, NEW); | |
159 nb = h->read(hp, LENGTH); | |
160 lp = h->read(hp, START) + nb; | |
161 t->write(i, NEW, lp); | |
162 l->write(lp, START, s); | |
163 l->write(lp, END, e); | |
164 l->write(lp, LIST, SENTINEL); | |
165 l->write(lp, PARENT, pl); | |
166 h->write(lp, LENGTH, nb+1); | |
167 if (nb == 0) { | |
168 l->write(pl, LIST, hp); | |
169 } | |
170 } | |
171 } | |
172 | |
173 void NCListCreator::cleanFiles () { | |
174 t->destroy(); | |
175 } | |
176 | |
177 void NCListCreator::transfer (string chromosome) { | |
178 ncLists[chromosome] = NCList(h, l); | |
179 } |