Mercurial > repos > yufei-luo > s_mart
view commons/pyRepetUnit/profilesDB/CompleteAProfilesDBFromAFileWithProfilesNamesOrAccNumber.py @ 18:94ab73e8a190
Uploaded
author | m-zytnicki |
---|---|
date | Mon, 29 Apr 2013 03:20:15 -0400 |
parents | |
children |
line wrap: on
line source
from commons.pyRepetUnit.profilesDB.ProfilesDatabankUtils import ProfilesDatabankUtils import re import sys class CompleteAProfilesDBFromAFileWithProfilesNamesOrAccNumber(object): ''' Check if profiles from a profiles list are present in profiles DB, if not you can generate the getz command to add them. By default the profiles list is a list of profiles names ''' def __init__(self): ''' Constructor ''' self.profilesDBUtils = ProfilesDatabankUtils() self.profilesToAddNotInDB = [] self._pfamAccNumber = False def setProfilesDBFile ( self, profilesDBFile ): self._profilesDBFile = profilesDBFile def setProfilesToAdd ( self, profilesFile ): self._profilesToAdd = profilesFile def setPfamAccNumberKeys ( self ): self._pfamAccNumber = True def _IsProfilInDB(self, pfamDBList, profil): IsProfilInDB = False for profilInstance in pfamDBList.getList(): if (self._pfamAccNumber == False and profilInstance.name == profil) or (self._pfamAccNumber == True and re.match(profil + "\.\d+", profilInstance.accNumber)): IsProfilInDB = True break return IsProfilInDB def _generateProfilesList(self): f = open(self._profilesToAdd) profilesToAddList = f.readlines() return profilesToAddList def generateNotExistingProfilesList ( self ): ''' generate the profiles list of profiles not in profiles DB among profiles in a list of name or accession number ''' self.profilesToAddNotInDB = [] profilesToAddList = self._generateProfilesList() pfamDBList = self.profilesDBUtils.read( self._profilesDBFile ) if pfamDBList.getList( ) != []: for profil in profilesToAddList: if profil != "\n": sys.stdout.flush() profil = profil.rstrip( ) IsProfilInDB = self._IsProfilInDB(pfamDBList, profil) if ( IsProfilInDB == False): self.profilesToAddNotInDB.append( profil ) return ( self.profilesToAddNotInDB ) def generateGetzCmdProfilesList ( self, profilesList ): ''' generate the getz command to retrieve profiles list of name or accession number ''' getzCmd = "getz -e \'" if (self._pfamAccNumber == False): for profileName in profilesList: getzCmd += "[pfamhmm-Id:\"" + profileName + "*\"] | " else: for profileAccNumber in profilesList: getzCmd += "[pfamhmm-AccNumber:\"" + profileAccNumber + "\"] | " getzCmd = getzCmd[ 0:len( getzCmd )-3 ] getzCmd += "\'" return getzCmd def CmdToCompleteProfileDB (self): ''' generate the getz command to retrieve profiles list of name or accession number if the profile is not yet in profiles DB ''' profilesList2Add = self.generateNotExistingProfilesList() return self.generateGetzCmdProfilesList ( profilesList2Add )