18
|
1 from commons.pyRepetUnit.profilesDB.ProfilesDatabankUtils import ProfilesDatabankUtils
|
|
2 import re
|
|
3 import sys
|
|
4
|
|
5
|
|
6 class CompleteAProfilesDBFromAFileWithProfilesNamesOrAccNumber(object):
|
|
7 '''
|
|
8 Check if profiles from a profiles list are present in profiles DB, if not you can generate the getz command to add them.
|
|
9 By default the profiles list is a list of profiles names
|
|
10 '''
|
|
11
|
|
12 def __init__(self):
|
|
13 '''
|
|
14 Constructor
|
|
15 '''
|
|
16 self.profilesDBUtils = ProfilesDatabankUtils()
|
|
17 self.profilesToAddNotInDB = []
|
|
18 self._pfamAccNumber = False
|
|
19
|
|
20 def setProfilesDBFile ( self, profilesDBFile ):
|
|
21 self._profilesDBFile = profilesDBFile
|
|
22
|
|
23 def setProfilesToAdd ( self, profilesFile ):
|
|
24 self._profilesToAdd = profilesFile
|
|
25
|
|
26 def setPfamAccNumberKeys ( self ):
|
|
27 self._pfamAccNumber = True
|
|
28
|
|
29 def _IsProfilInDB(self, pfamDBList, profil):
|
|
30 IsProfilInDB = False
|
|
31 for profilInstance in pfamDBList.getList():
|
|
32 if (self._pfamAccNumber == False and profilInstance.name == profil) or (self._pfamAccNumber == True and re.match(profil + "\.\d+", profilInstance.accNumber)):
|
|
33 IsProfilInDB = True
|
|
34 break
|
|
35 return IsProfilInDB
|
|
36
|
|
37 def _generateProfilesList(self):
|
|
38 f = open(self._profilesToAdd)
|
|
39 profilesToAddList = f.readlines()
|
|
40 return profilesToAddList
|
|
41
|
|
42 def generateNotExistingProfilesList ( self ):
|
|
43 '''
|
|
44 generate the profiles list of profiles not in profiles DB among profiles in a list of name or accession number
|
|
45 '''
|
|
46 self.profilesToAddNotInDB = []
|
|
47 profilesToAddList = self._generateProfilesList()
|
|
48 pfamDBList = self.profilesDBUtils.read( self._profilesDBFile )
|
|
49 if pfamDBList.getList( ) != []:
|
|
50 for profil in profilesToAddList:
|
|
51 if profil != "\n":
|
|
52 sys.stdout.flush()
|
|
53 profil = profil.rstrip( )
|
|
54 IsProfilInDB = self._IsProfilInDB(pfamDBList, profil)
|
|
55 if ( IsProfilInDB == False):
|
|
56 self.profilesToAddNotInDB.append( profil )
|
|
57 return ( self.profilesToAddNotInDB )
|
|
58
|
|
59 def generateGetzCmdProfilesList ( self, profilesList ):
|
|
60 '''
|
|
61 generate the getz command to retrieve profiles list of name or accession number
|
|
62 '''
|
|
63 getzCmd = "getz -e \'"
|
|
64 if (self._pfamAccNumber == False):
|
|
65 for profileName in profilesList:
|
|
66 getzCmd += "[pfamhmm-Id:\"" + profileName + "*\"] | "
|
|
67 else:
|
|
68 for profileAccNumber in profilesList:
|
|
69 getzCmd += "[pfamhmm-AccNumber:\"" + profileAccNumber + "\"] | "
|
|
70 getzCmd = getzCmd[ 0:len( getzCmd )-3 ]
|
|
71 getzCmd += "\'"
|
|
72 return getzCmd
|
|
73
|
|
74 def CmdToCompleteProfileDB (self):
|
|
75 '''
|
|
76 generate the getz command to retrieve profiles list of name or accession number if the profile is not yet in profiles DB
|
|
77 '''
|
|
78 profilesList2Add = self.generateNotExistingProfilesList()
|
|
79 return self.generateGetzCmdProfilesList ( profilesList2Add )
|