diff commons/pyRepetUnit/profilesDB/CompleteAProfilesDBFromAFileWithProfilesNamesOrAccNumber.py @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/profilesDB/CompleteAProfilesDBFromAFileWithProfilesNamesOrAccNumber.py	Mon Apr 29 03:20:15 2013 -0400
@@ -0,0 +1,79 @@
+from commons.pyRepetUnit.profilesDB.ProfilesDatabankUtils import ProfilesDatabankUtils
+import re
+import sys
+
+
+class CompleteAProfilesDBFromAFileWithProfilesNamesOrAccNumber(object):
+    '''
+    Check if profiles from a profiles list are present in profiles DB, if not you can generate the getz command to add them.
+    By default the profiles list is a list of profiles names    
+    '''
+
+    def __init__(self):
+        '''
+        Constructor
+        '''
+        self.profilesDBUtils = ProfilesDatabankUtils()
+        self.profilesToAddNotInDB = []
+        self._pfamAccNumber = False
+
+    def setProfilesDBFile ( self, profilesDBFile ):
+        self._profilesDBFile = profilesDBFile
+        
+    def setProfilesToAdd ( self, profilesFile ):
+        self._profilesToAdd = profilesFile
+        
+    def setPfamAccNumberKeys ( self ):
+        self._pfamAccNumber = True
+
+    def _IsProfilInDB(self, pfamDBList, profil):
+        IsProfilInDB = False
+        for profilInstance in pfamDBList.getList():
+            if (self._pfamAccNumber == False and profilInstance.name == profil) or (self._pfamAccNumber == True and re.match(profil + "\.\d+", profilInstance.accNumber)):
+                IsProfilInDB = True
+                break 
+        return IsProfilInDB
+    
+    def _generateProfilesList(self):
+        f = open(self._profilesToAdd)
+        profilesToAddList = f.readlines()
+        return profilesToAddList
+
+    def generateNotExistingProfilesList ( self ):
+        '''
+        generate the profiles list of profiles not in profiles DB among profiles in a list of name or accession number
+        '''        
+        self.profilesToAddNotInDB = []
+        profilesToAddList = self._generateProfilesList()
+        pfamDBList = self.profilesDBUtils.read( self._profilesDBFile )
+        if pfamDBList.getList( ) != []:           
+            for profil in profilesToAddList: 
+                if profil != "\n":
+                    sys.stdout.flush()
+                    profil = profil.rstrip( )
+                    IsProfilInDB = self._IsProfilInDB(pfamDBList, profil)
+                    if ( IsProfilInDB == False):
+                        self.profilesToAddNotInDB.append( profil )
+        return ( self.profilesToAddNotInDB )    
+      
+    def generateGetzCmdProfilesList ( self, profilesList ):
+        '''
+        generate the getz command to retrieve profiles list of name or accession number
+        ''' 
+        getzCmd = "getz -e \'"
+        if (self._pfamAccNumber == False):
+            for profileName in profilesList:
+                getzCmd += "[pfamhmm-Id:\"" + profileName + "*\"] | "
+        else:
+            for profileAccNumber in profilesList:
+                getzCmd += "[pfamhmm-AccNumber:\"" + profileAccNumber + "\"] | "
+        getzCmd = getzCmd[ 0:len( getzCmd )-3 ]
+        getzCmd += "\'"
+        return getzCmd
+    
+    def CmdToCompleteProfileDB (self):
+        '''
+        generate the getz command to retrieve profiles list of name or accession number if the profile is not yet in profiles DB
+        ''' 
+        profilesList2Add = self.generateNotExistingProfilesList()
+        return self.generateGetzCmdProfilesList ( profilesList2Add )