view smart_toolShed/commons/core/parsing/BlatFileParser.py @ 0:e0f8dcca02ed

Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
author yufei-luo
date Thu, 17 Jan 2013 10:52:14 -0500
parents
children
line wrap: on
line source

from commons.core.parsing.BlatParser import BlatParser
import os

class BlatFileParser(object):

    def __init__(self, blatFileName = None):
        self._blatFileName = blatFileName
        self._lBlatHits = []
        self._dBlatHitsByQueries = {}
        self._dQueries = {}
        
    def getDictOfQueries(self):
        return self._dQueries
    
    def getResultLinesOfOneQuery(self, queryName):
        return self._dBlatHitsByQueries[queryName]
    
    def getDictOfBlatHitsByQueries(self):
        return self._dBlatHitsByQueries
    
    def getListsOfHits(self):
        return self._lBlatHits
    
    def parseBlatFile(self):
        blatFile = open(self._blatFileName, 'r')
        line = blatFile.readline()
        n = 1
        while line != "":
            if self._isInteger(line.split("\t")[0]):
                iBlatParser = BlatParser()
                iBlatParser.setAttributesFromString(line, n)
                queryHeader = iBlatParser.getQName()
                self._dQueries[queryHeader] = 1
                self._lBlatHits.append(iBlatParser)
            line = blatFile.readline()
            n += 1
        return self._lBlatHits
    
    def parseBlatFileByQueries(self):
        blatFile = open(self._blatFileName, 'r')
        line = blatFile.readline()
        n = 1
        while line != "":
            if self._isInteger(line.split("\t")[0]):
                iBlatParser = BlatParser()
                iBlatParser.setAttributesFromString(line, n)
                queryHeader = iBlatParser.getQName()
                self._dQueries[queryHeader] = 1
                if self._dBlatHitsByQueries.has_key(queryHeader):
                    self._dBlatHitsByQueries[queryHeader].append(iBlatParser)
                else:
                    self._dBlatHitsByQueries[queryHeader] = [iBlatParser]
            line = blatFile.readline()
            n += 1
        blatFile.close()
        return self._dBlatHitsByQueries
        
    def _isInteger(self, string):
        try:
            int(string)
            return True
        except ValueError:
            return False