view commons/pyRepetUnit/profilesDB/CompleteAProfilesDBFromAFileWithProfilesNamesOrAccNumber.py @ 31:0ab839023fe4

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 14:33:21 -0400
parents 94ab73e8a190
children
line wrap: on
line source

from commons.pyRepetUnit.profilesDB.ProfilesDatabankUtils import ProfilesDatabankUtils
import re
import sys


class CompleteAProfilesDBFromAFileWithProfilesNamesOrAccNumber(object):
    '''
    Check if profiles from a profiles list are present in profiles DB, if not you can generate the getz command to add them.
    By default the profiles list is a list of profiles names    
    '''

    def __init__(self):
        '''
        Constructor
        '''
        self.profilesDBUtils = ProfilesDatabankUtils()
        self.profilesToAddNotInDB = []
        self._pfamAccNumber = False

    def setProfilesDBFile ( self, profilesDBFile ):
        self._profilesDBFile = profilesDBFile
        
    def setProfilesToAdd ( self, profilesFile ):
        self._profilesToAdd = profilesFile
        
    def setPfamAccNumberKeys ( self ):
        self._pfamAccNumber = True

    def _IsProfilInDB(self, pfamDBList, profil):
        IsProfilInDB = False
        for profilInstance in pfamDBList.getList():
            if (self._pfamAccNumber == False and profilInstance.name == profil) or (self._pfamAccNumber == True and re.match(profil + "\.\d+", profilInstance.accNumber)):
                IsProfilInDB = True
                break 
        return IsProfilInDB
    
    def _generateProfilesList(self):
        f = open(self._profilesToAdd)
        profilesToAddList = f.readlines()
        return profilesToAddList

    def generateNotExistingProfilesList ( self ):
        '''
        generate the profiles list of profiles not in profiles DB among profiles in a list of name or accession number
        '''        
        self.profilesToAddNotInDB = []
        profilesToAddList = self._generateProfilesList()
        pfamDBList = self.profilesDBUtils.read( self._profilesDBFile )
        if pfamDBList.getList( ) != []:           
            for profil in profilesToAddList: 
                if profil != "\n":
                    sys.stdout.flush()
                    profil = profil.rstrip( )
                    IsProfilInDB = self._IsProfilInDB(pfamDBList, profil)
                    if ( IsProfilInDB == False):
                        self.profilesToAddNotInDB.append( profil )
        return ( self.profilesToAddNotInDB )    
      
    def generateGetzCmdProfilesList ( self, profilesList ):
        '''
        generate the getz command to retrieve profiles list of name or accession number
        ''' 
        getzCmd = "getz -e \'"
        if (self._pfamAccNumber == False):
            for profileName in profilesList:
                getzCmd += "[pfamhmm-Id:\"" + profileName + "*\"] | "
        else:
            for profileAccNumber in profilesList:
                getzCmd += "[pfamhmm-AccNumber:\"" + profileAccNumber + "\"] | "
        getzCmd = getzCmd[ 0:len( getzCmd )-3 ]
        getzCmd += "\'"
        return getzCmd
    
    def CmdToCompleteProfileDB (self):
        '''
        generate the getz command to retrieve profiles list of name or accession number if the profile is not yet in profiles DB
        ''' 
        profilesList2Add = self.generateNotExistingProfilesList()
        return self.generateGetzCmdProfilesList ( profilesList2Add )