| 18 | 1 #include "ncListCreator.hpp" | 
|  | 2 | 
|  | 3 NCListCreator::NCListCreator(string inputFileName): inputFileName(inputFileName) {} | 
|  | 4 | 
|  | 5 string NCListCreator::getFileName(string chromosome) { | 
|  | 6     return inputFileName.substr(0, inputFileName.find_last_of('.')) + "_" + chromosome + ".bed"; | 
|  | 7 } | 
|  | 8 | 
|  | 9 void NCListCreator::splitFile () { | 
|  | 10     Chromosomes chromosomes; | 
|  | 11     GenomicInterval interval; | 
|  | 12     map <string, ofstream> splittedFiles; | 
|  | 13     map <string, ofstream>::iterator it; | 
|  | 14     ifstream file; | 
|  | 15     string line, fileName; | 
|  | 16     string chromosome; | 
|  | 17     file.open(inputFileName.c_str()); | 
|  | 18     if (file.is_open()) { | 
|  | 19         while (file.good()) { | 
|  | 20             getline(file, line); | 
|  | 21             if (line.size() > 0) { | 
|  | 22                 interval.parseFromLine(line); | 
|  | 23                 chromosomes.insert(interval.chromosome); | 
|  | 24                 fileName = getFileName(interval.chromosome); | 
|  | 25                 it = splittedFiles.find(interval.chromosome); | 
|  | 26                 if (it == splittedFiles.end()) { | 
|  | 27                     ofstream outputFile; | 
|  | 28                     outputFile.open(fileName.c_str(), ios::out | ios::binary); | 
|  | 29                     interval.writeBinary(outputFile); | 
|  | 30                     splittedFiles[chromosome] = outputFile; | 
|  | 31                 } | 
|  | 32                 else { | 
|  | 33                     it->second << line << "\n"; | 
|  | 34                 } | 
|  | 35             } | 
|  | 36         } | 
|  | 37         file.close(); | 
|  | 38         for (it = splittedFiles.begin(); it != splittedFiles.end(); it++) { | 
|  | 39             it->second.close(); | 
|  | 40         } | 
|  | 41     } | 
|  | 42     else { | 
|  | 43         cout << "Unable to open file" << inputFileName; | 
|  | 44     } | 
|  | 45 } | 
|  | 46 | 
|  | 47 void NCListCreator::run() { | 
|  | 48     for (Chromosomes::iterator it = chromosomes.begin(); splittedFiles != chromosomes.end(); splittedFiles++) { | 
|  | 49         buildLists(*it); | 
|  | 50     } | 
|  | 51 } | 
|  | 52 | 
|  | 53 void NCListCreator::buildLists(string chromosome) { | 
|  | 54     createTables(chromosome); | 
|  | 55     labelLists(); | 
|  | 56     computeSubStart(); | 
|  | 57     computeAbsPosition(); | 
|  | 58     cleanFiles(); | 
|  | 59     transfer(chromosome); | 
|  | 60 } | 
|  | 61 | 
|  | 62 void NCListCreator::createTables(string chromosome) { | 
|  | 63     initLists(chromosome); | 
|  | 64     h = new Table(H_CELL_SIZE, nbLists); | 
|  | 65     t = new Table(T_CELL_SIZE, nbLines); | 
|  | 66     l = new Table(L_CELL_SIZE, nbLines); | 
|  | 67     fillTables(chromosome); | 
|  | 68 } | 
|  | 69 | 
|  | 70 void NCListCreator::initLists (string chromosome) { | 
|  | 71     nbLists = 0; | 
|  | 72     nbLines = 0; | 
|  | 73     ifstream file; | 
|  | 74     file.open(getFileName(chromosome).c_str(), ios::in | ios::binary); | 
|  | 75     Interval currentInterval, previousInterval; | 
|  | 76     if (file.is_open()) { | 
|  | 77         while (file.good()) { | 
|  | 78             if (currentInterval.parseBinary(file)) { | 
|  | 79                 nbLines++; | 
|  | 80                 if (previousInterval.include(currentInterval)) { | 
|  | 81                     nbLists++; | 
|  | 82                 } | 
|  | 83             } | 
|  | 84             previousInterval = currentInterval; | 
|  | 85         } | 
|  | 86     } | 
|  | 87     file.close(); | 
|  | 88 } | 
|  | 89 | 
|  | 90 void NCListCreator::fillTables (string chromosome) { | 
|  | 91     ifstream file; | 
|  | 92     file.open(getFileName(chromosome).c_str(), ios::in | ios::binary); | 
|  | 93     Interval currentInterval, previousInterval; | 
|  | 94     unsigned int i = 0; | 
|  | 95     if (file.is_open()) { | 
|  | 96         while (file.good()) { | 
|  | 97             if (currentInterval.parseBinary(file)) { | 
|  | 98                 t->write(currentInterval.start, i, 0); | 
|  | 99                 t->writeHere(currentInterval.end); | 
|  | 100                 t->writeHere(-1); | 
|  | 101                 t->writeHere(-1); | 
|  | 102             } | 
|  | 103             i++; | 
|  | 104         } | 
|  | 105         file.close(); | 
|  | 106     } | 
|  | 107     t->write(SENTINEL, LIST, -1); | 
|  | 108     l->write(SENTINEL, LIST,  0); | 
|  | 109     t->write(SENTINEL, NEW,  -1); | 
|  | 110 } | 
|  | 111 | 
|  | 112 void NCListCreator::labelLists () { | 
|  | 113     unsigned int nextL = 0, thisL, length; | 
|  | 114     unsigned int p; | 
|  | 115     Interval current, parent; | 
|  | 116     for (unsigned int i = 0; i < nbLines; i++) { | 
|  | 117         p = i - 1; | 
|  | 118         t->moveTo(p, 0); | 
|  | 119         parent.readBinary(t->file); | 
|  | 120         t->moveTo(i, 0); | 
|  | 121         current.readBinary(t->file); | 
|  | 122         while ((p != SENTINEL) && (! parent.include(current))) { | 
|  | 123             p = t->read(p, PARENT); | 
|  | 124             t->moveTo(p, 0); | 
|  | 125             parent.readBinary(t->file); | 
|  | 126         } | 
|  | 127         thisL = t->read(p, LIST); | 
|  | 128         if (thisL == SENTINEL) { | 
|  | 129             thisL = nextL; | 
|  | 130             nextL++; | 
|  | 131             length = 0; | 
|  | 132             t->write(p, LIST, thisL); | 
|  | 133         } | 
|  | 134         else { | 
|  | 135             length = h->read(thisL, LENGTH); | 
|  | 136         } | 
|  | 137         t->write(i, PARENT, p); | 
|  | 138         h->write(thisL, LENGTH, length+1); | 
|  | 139     } | 
|  | 140 } | 
|  | 141 | 
|  | 142 void NCListCreator::computeSubStart () { | 
|  | 143     unsigned int total = 0; | 
|  | 144     for (unsigned int i = 0; i < nbLists; i++) { | 
|  | 145         h->write(i, START, total); | 
|  | 146         total += h->read(i, LENGTH); | 
|  | 147         h->write(i, LENGTH, 0); | 
|  | 148     } | 
|  | 149 } | 
|  | 150 | 
|  | 151 void NCListCreator::computeAbsPosition () { | 
|  | 152     Value s, e, pt, hp, pl, nb, lp; | 
|  | 153     for (unsigned int i = 0; i < nbLines; i++) { | 
|  | 154         s = t->read(i,   START); | 
|  | 155         e  = t->read(i,  END); | 
|  | 156         pt = t->read(i,  PARENT); | 
|  | 157         hp = t->read(pt, LIST); | 
|  | 158         pl = t->read(pt, NEW); | 
|  | 159         nb = h->read(hp, LENGTH); | 
|  | 160         lp = h->read(hp, START) + nb; | 
|  | 161         t->write(i,  NEW,    lp); | 
|  | 162         l->write(lp, START,  s); | 
|  | 163         l->write(lp, END,    e); | 
|  | 164         l->write(lp, LIST,   SENTINEL); | 
|  | 165         l->write(lp, PARENT, pl); | 
|  | 166         h->write(lp, LENGTH, nb+1); | 
|  | 167         if (nb == 0) { | 
|  | 168             l->write(pl, LIST, hp); | 
|  | 169         } | 
|  | 170     } | 
|  | 171 } | 
|  | 172 | 
|  | 173 void NCListCreator::cleanFiles () { | 
|  | 174     t->destroy(); | 
|  | 175 } | 
|  | 176 | 
|  | 177 void NCListCreator::transfer (string chromosome) { | 
|  | 178     ncLists[chromosome] = NCList(h, l); | 
|  | 179 } |