comparison commons/pyRepetUnit/profilesDB/CompleteAProfilesDBFromAFileWithProfilesNamesOrAccNumber.py @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
comparison
equal deleted inserted replaced
17:b0e8584489e6 18:94ab73e8a190
1 from commons.pyRepetUnit.profilesDB.ProfilesDatabankUtils import ProfilesDatabankUtils
2 import re
3 import sys
4
5
6 class CompleteAProfilesDBFromAFileWithProfilesNamesOrAccNumber(object):
7 '''
8 Check if profiles from a profiles list are present in profiles DB, if not you can generate the getz command to add them.
9 By default the profiles list is a list of profiles names
10 '''
11
12 def __init__(self):
13 '''
14 Constructor
15 '''
16 self.profilesDBUtils = ProfilesDatabankUtils()
17 self.profilesToAddNotInDB = []
18 self._pfamAccNumber = False
19
20 def setProfilesDBFile ( self, profilesDBFile ):
21 self._profilesDBFile = profilesDBFile
22
23 def setProfilesToAdd ( self, profilesFile ):
24 self._profilesToAdd = profilesFile
25
26 def setPfamAccNumberKeys ( self ):
27 self._pfamAccNumber = True
28
29 def _IsProfilInDB(self, pfamDBList, profil):
30 IsProfilInDB = False
31 for profilInstance in pfamDBList.getList():
32 if (self._pfamAccNumber == False and profilInstance.name == profil) or (self._pfamAccNumber == True and re.match(profil + "\.\d+", profilInstance.accNumber)):
33 IsProfilInDB = True
34 break
35 return IsProfilInDB
36
37 def _generateProfilesList(self):
38 f = open(self._profilesToAdd)
39 profilesToAddList = f.readlines()
40 return profilesToAddList
41
42 def generateNotExistingProfilesList ( self ):
43 '''
44 generate the profiles list of profiles not in profiles DB among profiles in a list of name or accession number
45 '''
46 self.profilesToAddNotInDB = []
47 profilesToAddList = self._generateProfilesList()
48 pfamDBList = self.profilesDBUtils.read( self._profilesDBFile )
49 if pfamDBList.getList( ) != []:
50 for profil in profilesToAddList:
51 if profil != "\n":
52 sys.stdout.flush()
53 profil = profil.rstrip( )
54 IsProfilInDB = self._IsProfilInDB(pfamDBList, profil)
55 if ( IsProfilInDB == False):
56 self.profilesToAddNotInDB.append( profil )
57 return ( self.profilesToAddNotInDB )
58
59 def generateGetzCmdProfilesList ( self, profilesList ):
60 '''
61 generate the getz command to retrieve profiles list of name or accession number
62 '''
63 getzCmd = "getz -e \'"
64 if (self._pfamAccNumber == False):
65 for profileName in profilesList:
66 getzCmd += "[pfamhmm-Id:\"" + profileName + "*\"] | "
67 else:
68 for profileAccNumber in profilesList:
69 getzCmd += "[pfamhmm-AccNumber:\"" + profileAccNumber + "\"] | "
70 getzCmd = getzCmd[ 0:len( getzCmd )-3 ]
71 getzCmd += "\'"
72 return getzCmd
73
74 def CmdToCompleteProfileDB (self):
75 '''
76 generate the getz command to retrieve profiles list of name or accession number if the profile is not yet in profiles DB
77 '''
78 profilesList2Add = self.generateNotExistingProfilesList()
79 return self.generateGetzCmdProfilesList ( profilesList2Add )