view commons/core/parsing/BlatFileParser.py @ 9:1eb55963fe39

Updated CompareOverlappingSmall*.py
author m-zytnicki
date Thu, 14 Mar 2013 05:23:05 -0400
parents 769e306b7933
children
line wrap: on
line source

from commons.core.parsing.BlatParser import BlatParser
import os

class BlatFileParser(object):

    def __init__(self, blatFileName = None):
        self._blatFileName = blatFileName
        self._lBlatHits = []
        self._dBlatHitsByQueries = {}
        self._dQueries = {}
        
    def getDictOfQueries(self):
        return self._dQueries
    
    def getResultLinesOfOneQuery(self, queryName):
        return self._dBlatHitsByQueries[queryName]
    
    def getDictOfBlatHitsByQueries(self):
        return self._dBlatHitsByQueries
    
    def getListsOfHits(self):
        return self._lBlatHits
    
    def parseBlatFile(self):
        blatFile = open(self._blatFileName, 'r')
        line = blatFile.readline()
        n = 1
        while line != "":
            if self._isInteger(line.split("\t")[0]):
                iBlatParser = BlatParser()
                iBlatParser.setAttributesFromString(line, n)
                queryHeader = iBlatParser.getQName()
                self._dQueries[queryHeader] = 1
                self._lBlatHits.append(iBlatParser)
            line = blatFile.readline()
            n += 1
        return self._lBlatHits
    
    def parseBlatFileByQueries(self):
        blatFile = open(self._blatFileName, 'r')
        line = blatFile.readline()
        n = 1
        while line != "":
            if self._isInteger(line.split("\t")[0]):
                iBlatParser = BlatParser()
                iBlatParser.setAttributesFromString(line, n)
                queryHeader = iBlatParser.getQName()
                self._dQueries[queryHeader] = 1
                if self._dBlatHitsByQueries.has_key(queryHeader):
                    self._dBlatHitsByQueries[queryHeader].append(iBlatParser)
                else:
                    self._dBlatHitsByQueries[queryHeader] = [iBlatParser]
            line = blatFile.readline()
            n += 1
        blatFile.close()
        return self._dBlatHitsByQueries
        
    def _isInteger(self, string):
        try:
            int(string)
            return True
        except ValueError:
            return False