view SMART/Java/Python/Cpp/ncListCreator.cpp @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
line wrap: on
line source

#include "ncListCreator.hpp"

NCListCreator::NCListCreator(string inputFileName): inputFileName(inputFileName) {}

string NCListCreator::getFileName(string chromosome) {
    return inputFileName.substr(0, inputFileName.find_last_of('.')) + "_" + chromosome + ".bed";
}

void NCListCreator::splitFile () {
    Chromosomes chromosomes;
    GenomicInterval interval;
    map <string, ofstream> splittedFiles;
    map <string, ofstream>::iterator it;
    ifstream file;
    string line, fileName;
    string chromosome;
    file.open(inputFileName.c_str());
    if (file.is_open()) {
        while (file.good()) {
            getline(file, line);
            if (line.size() > 0) {
                interval.parseFromLine(line);
                chromosomes.insert(interval.chromosome);
                fileName = getFileName(interval.chromosome);
                it = splittedFiles.find(interval.chromosome);
                if (it == splittedFiles.end()) {
                    ofstream outputFile;
                    outputFile.open(fileName.c_str(), ios::out | ios::binary);
                    interval.writeBinary(outputFile);
                    splittedFiles[chromosome] = outputFile;
                }
                else {
                    it->second << line << "\n";
                }
            }
        }
        file.close();
        for (it = splittedFiles.begin(); it != splittedFiles.end(); it++) {
            it->second.close();
        }
    }
    else {
        cout << "Unable to open file" << inputFileName;
    }
}

void NCListCreator::run() {
    for (Chromosomes::iterator it = chromosomes.begin(); splittedFiles != chromosomes.end(); splittedFiles++) {
        buildLists(*it);
    }
}

void NCListCreator::buildLists(string chromosome) {
    createTables(chromosome);
    labelLists();
    computeSubStart();
    computeAbsPosition();
    cleanFiles();
    transfer(chromosome);
}

void NCListCreator::createTables(string chromosome) {
    initLists(chromosome);
    h = new Table(H_CELL_SIZE, nbLists);
    t = new Table(T_CELL_SIZE, nbLines);
    l = new Table(L_CELL_SIZE, nbLines);
    fillTables(chromosome);
}
 
void NCListCreator::initLists (string chromosome) {
    nbLists = 0;
    nbLines = 0;
    ifstream file;
    file.open(getFileName(chromosome).c_str(), ios::in | ios::binary);
    Interval currentInterval, previousInterval;
    if (file.is_open()) {
        while (file.good()) {
            if (currentInterval.parseBinary(file)) {
                nbLines++;
                if (previousInterval.include(currentInterval)) {
                    nbLists++;
                }
            }
            previousInterval = currentInterval;
        }
    }
    file.close();
}

void NCListCreator::fillTables (string chromosome) {
    ifstream file;
    file.open(getFileName(chromosome).c_str(), ios::in | ios::binary);
    Interval currentInterval, previousInterval;
    unsigned int i = 0;
    if (file.is_open()) {
        while (file.good()) {
            if (currentInterval.parseBinary(file)) {
                t->write(currentInterval.start, i, 0);
                t->writeHere(currentInterval.end);
                t->writeHere(-1);
                t->writeHere(-1);
            }
            i++;
        }
        file.close();
    }
    t->write(SENTINEL, LIST, -1);
    l->write(SENTINEL, LIST,  0);
    t->write(SENTINEL, NEW,  -1);
}

void NCListCreator::labelLists () {
    unsigned int nextL = 0, thisL, length;
    unsigned int p;
    Interval current, parent;
    for (unsigned int i = 0; i < nbLines; i++) {
        p = i - 1;
        t->moveTo(p, 0);
        parent.readBinary(t->file);
        t->moveTo(i, 0);
        current.readBinary(t->file);
        while ((p != SENTINEL) && (! parent.include(current))) {
            p = t->read(p, PARENT);
            t->moveTo(p, 0);
            parent.readBinary(t->file);
        }
        thisL = t->read(p, LIST);
        if (thisL == SENTINEL) {
            thisL = nextL;
            nextL++;
            length = 0;
            t->write(p, LIST, thisL);
        }
        else {
            length = h->read(thisL, LENGTH);
        }
        t->write(i, PARENT, p);
        h->write(thisL, LENGTH, length+1);
    }
}

void NCListCreator::computeSubStart () {
    unsigned int total = 0;
    for (unsigned int i = 0; i < nbLists; i++) {
        h->write(i, START, total);
        total += h->read(i, LENGTH);
        h->write(i, LENGTH, 0);
    }
}

void NCListCreator::computeAbsPosition () {
    Value s, e, pt, hp, pl, nb, lp;
    for (unsigned int i = 0; i < nbLines; i++) {
        s = t->read(i,   START);
        e  = t->read(i,  END);
        pt = t->read(i,  PARENT);
        hp = t->read(pt, LIST);
        pl = t->read(pt, NEW);
        nb = h->read(hp, LENGTH);
        lp = h->read(hp, START) + nb;
        t->write(i,  NEW,    lp);
        l->write(lp, START,  s);
        l->write(lp, END,    e);
        l->write(lp, LIST,   SENTINEL);
        l->write(lp, PARENT, pl);
        h->write(lp, LENGTH, nb+1);
        if (nb == 0) {
            l->write(pl, LIST, hp);   
        }
    }
}

void NCListCreator::cleanFiles () {
    t->destroy();
}

void NCListCreator::transfer (string chromosome) {
    ncLists[chromosome] = NCList(h, l);
}