Mercurial > repos > yufei-luo > s_mart
diff commons/pyRepetUnit/profilesDB/CompleteAProfilesDBFromAFileWithProfilesNamesOrAccNumber.py @ 18:94ab73e8a190
Uploaded
author | m-zytnicki |
---|---|
date | Mon, 29 Apr 2013 03:20:15 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/commons/pyRepetUnit/profilesDB/CompleteAProfilesDBFromAFileWithProfilesNamesOrAccNumber.py Mon Apr 29 03:20:15 2013 -0400 @@ -0,0 +1,79 @@ +from commons.pyRepetUnit.profilesDB.ProfilesDatabankUtils import ProfilesDatabankUtils +import re +import sys + + +class CompleteAProfilesDBFromAFileWithProfilesNamesOrAccNumber(object): + ''' + Check if profiles from a profiles list are present in profiles DB, if not you can generate the getz command to add them. + By default the profiles list is a list of profiles names + ''' + + def __init__(self): + ''' + Constructor + ''' + self.profilesDBUtils = ProfilesDatabankUtils() + self.profilesToAddNotInDB = [] + self._pfamAccNumber = False + + def setProfilesDBFile ( self, profilesDBFile ): + self._profilesDBFile = profilesDBFile + + def setProfilesToAdd ( self, profilesFile ): + self._profilesToAdd = profilesFile + + def setPfamAccNumberKeys ( self ): + self._pfamAccNumber = True + + def _IsProfilInDB(self, pfamDBList, profil): + IsProfilInDB = False + for profilInstance in pfamDBList.getList(): + if (self._pfamAccNumber == False and profilInstance.name == profil) or (self._pfamAccNumber == True and re.match(profil + "\.\d+", profilInstance.accNumber)): + IsProfilInDB = True + break + return IsProfilInDB + + def _generateProfilesList(self): + f = open(self._profilesToAdd) + profilesToAddList = f.readlines() + return profilesToAddList + + def generateNotExistingProfilesList ( self ): + ''' + generate the profiles list of profiles not in profiles DB among profiles in a list of name or accession number + ''' + self.profilesToAddNotInDB = [] + profilesToAddList = self._generateProfilesList() + pfamDBList = self.profilesDBUtils.read( self._profilesDBFile ) + if pfamDBList.getList( ) != []: + for profil in profilesToAddList: + if profil != "\n": + sys.stdout.flush() + profil = profil.rstrip( ) + IsProfilInDB = self._IsProfilInDB(pfamDBList, profil) + if ( IsProfilInDB == False): + self.profilesToAddNotInDB.append( profil ) + return ( self.profilesToAddNotInDB ) + + def generateGetzCmdProfilesList ( self, profilesList ): + ''' + generate the getz command to retrieve profiles list of name or accession number + ''' + getzCmd = "getz -e \'" + if (self._pfamAccNumber == False): + for profileName in profilesList: + getzCmd += "[pfamhmm-Id:\"" + profileName + "*\"] | " + else: + for profileAccNumber in profilesList: + getzCmd += "[pfamhmm-AccNumber:\"" + profileAccNumber + "\"] | " + getzCmd = getzCmd[ 0:len( getzCmd )-3 ] + getzCmd += "\'" + return getzCmd + + def CmdToCompleteProfileDB (self): + ''' + generate the getz command to retrieve profiles list of name or accession number if the profile is not yet in profiles DB + ''' + profilesList2Add = self.generateNotExistingProfilesList() + return self.generateGetzCmdProfilesList ( profilesList2Add )