annotate commons/pyRepetUnit/profilesDB/CompleteAProfilesDBFromAFileWithProfilesNamesOrAccNumber.py @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
18
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
1 from commons.pyRepetUnit.profilesDB.ProfilesDatabankUtils import ProfilesDatabankUtils
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
2 import re
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
3 import sys
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
4
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
5
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
6 class CompleteAProfilesDBFromAFileWithProfilesNamesOrAccNumber(object):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
7 '''
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
8 Check if profiles from a profiles list are present in profiles DB, if not you can generate the getz command to add them.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
9 By default the profiles list is a list of profiles names
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
10 '''
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
11
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
12 def __init__(self):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
13 '''
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
14 Constructor
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
15 '''
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
16 self.profilesDBUtils = ProfilesDatabankUtils()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
17 self.profilesToAddNotInDB = []
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
18 self._pfamAccNumber = False
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
19
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
20 def setProfilesDBFile ( self, profilesDBFile ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
21 self._profilesDBFile = profilesDBFile
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
22
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
23 def setProfilesToAdd ( self, profilesFile ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
24 self._profilesToAdd = profilesFile
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
25
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
26 def setPfamAccNumberKeys ( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
27 self._pfamAccNumber = True
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
28
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
29 def _IsProfilInDB(self, pfamDBList, profil):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
30 IsProfilInDB = False
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
31 for profilInstance in pfamDBList.getList():
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
32 if (self._pfamAccNumber == False and profilInstance.name == profil) or (self._pfamAccNumber == True and re.match(profil + "\.\d+", profilInstance.accNumber)):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
33 IsProfilInDB = True
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
34 break
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
35 return IsProfilInDB
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
36
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
37 def _generateProfilesList(self):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
38 f = open(self._profilesToAdd)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
39 profilesToAddList = f.readlines()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
40 return profilesToAddList
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
41
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
42 def generateNotExistingProfilesList ( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
43 '''
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
44 generate the profiles list of profiles not in profiles DB among profiles in a list of name or accession number
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
45 '''
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
46 self.profilesToAddNotInDB = []
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
47 profilesToAddList = self._generateProfilesList()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
48 pfamDBList = self.profilesDBUtils.read( self._profilesDBFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
49 if pfamDBList.getList( ) != []:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
50 for profil in profilesToAddList:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
51 if profil != "\n":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
52 sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
53 profil = profil.rstrip( )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
54 IsProfilInDB = self._IsProfilInDB(pfamDBList, profil)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
55 if ( IsProfilInDB == False):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
56 self.profilesToAddNotInDB.append( profil )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
57 return ( self.profilesToAddNotInDB )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
58
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
59 def generateGetzCmdProfilesList ( self, profilesList ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
60 '''
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
61 generate the getz command to retrieve profiles list of name or accession number
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
62 '''
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
63 getzCmd = "getz -e \'"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
64 if (self._pfamAccNumber == False):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
65 for profileName in profilesList:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
66 getzCmd += "[pfamhmm-Id:\"" + profileName + "*\"] | "
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
67 else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
68 for profileAccNumber in profilesList:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
69 getzCmd += "[pfamhmm-AccNumber:\"" + profileAccNumber + "\"] | "
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
70 getzCmd = getzCmd[ 0:len( getzCmd )-3 ]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
71 getzCmd += "\'"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
72 return getzCmd
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
73
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
74 def CmdToCompleteProfileDB (self):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
75 '''
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
76 generate the getz command to retrieve profiles list of name or accession number if the profile is not yet in profiles DB
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
77 '''
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
78 profilesList2Add = self.generateNotExistingProfilesList()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
79 return self.generateGetzCmdProfilesList ( profilesList2Add )