Mercurial > repos > yufei-luo > s_mart
comparison commons/pyRepetUnit/profilesDB/CompleteAProfilesDBFromAFileWithProfilesNamesOrAccNumber.py @ 18:94ab73e8a190
Uploaded
author | m-zytnicki |
---|---|
date | Mon, 29 Apr 2013 03:20:15 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
17:b0e8584489e6 | 18:94ab73e8a190 |
---|---|
1 from commons.pyRepetUnit.profilesDB.ProfilesDatabankUtils import ProfilesDatabankUtils | |
2 import re | |
3 import sys | |
4 | |
5 | |
6 class CompleteAProfilesDBFromAFileWithProfilesNamesOrAccNumber(object): | |
7 ''' | |
8 Check if profiles from a profiles list are present in profiles DB, if not you can generate the getz command to add them. | |
9 By default the profiles list is a list of profiles names | |
10 ''' | |
11 | |
12 def __init__(self): | |
13 ''' | |
14 Constructor | |
15 ''' | |
16 self.profilesDBUtils = ProfilesDatabankUtils() | |
17 self.profilesToAddNotInDB = [] | |
18 self._pfamAccNumber = False | |
19 | |
20 def setProfilesDBFile ( self, profilesDBFile ): | |
21 self._profilesDBFile = profilesDBFile | |
22 | |
23 def setProfilesToAdd ( self, profilesFile ): | |
24 self._profilesToAdd = profilesFile | |
25 | |
26 def setPfamAccNumberKeys ( self ): | |
27 self._pfamAccNumber = True | |
28 | |
29 def _IsProfilInDB(self, pfamDBList, profil): | |
30 IsProfilInDB = False | |
31 for profilInstance in pfamDBList.getList(): | |
32 if (self._pfamAccNumber == False and profilInstance.name == profil) or (self._pfamAccNumber == True and re.match(profil + "\.\d+", profilInstance.accNumber)): | |
33 IsProfilInDB = True | |
34 break | |
35 return IsProfilInDB | |
36 | |
37 def _generateProfilesList(self): | |
38 f = open(self._profilesToAdd) | |
39 profilesToAddList = f.readlines() | |
40 return profilesToAddList | |
41 | |
42 def generateNotExistingProfilesList ( self ): | |
43 ''' | |
44 generate the profiles list of profiles not in profiles DB among profiles in a list of name or accession number | |
45 ''' | |
46 self.profilesToAddNotInDB = [] | |
47 profilesToAddList = self._generateProfilesList() | |
48 pfamDBList = self.profilesDBUtils.read( self._profilesDBFile ) | |
49 if pfamDBList.getList( ) != []: | |
50 for profil in profilesToAddList: | |
51 if profil != "\n": | |
52 sys.stdout.flush() | |
53 profil = profil.rstrip( ) | |
54 IsProfilInDB = self._IsProfilInDB(pfamDBList, profil) | |
55 if ( IsProfilInDB == False): | |
56 self.profilesToAddNotInDB.append( profil ) | |
57 return ( self.profilesToAddNotInDB ) | |
58 | |
59 def generateGetzCmdProfilesList ( self, profilesList ): | |
60 ''' | |
61 generate the getz command to retrieve profiles list of name or accession number | |
62 ''' | |
63 getzCmd = "getz -e \'" | |
64 if (self._pfamAccNumber == False): | |
65 for profileName in profilesList: | |
66 getzCmd += "[pfamhmm-Id:\"" + profileName + "*\"] | " | |
67 else: | |
68 for profileAccNumber in profilesList: | |
69 getzCmd += "[pfamhmm-AccNumber:\"" + profileAccNumber + "\"] | " | |
70 getzCmd = getzCmd[ 0:len( getzCmd )-3 ] | |
71 getzCmd += "\'" | |
72 return getzCmd | |
73 | |
74 def CmdToCompleteProfileDB (self): | |
75 ''' | |
76 generate the getz command to retrieve profiles list of name or accession number if the profile is not yet in profiles DB | |
77 ''' | |
78 profilesList2Add = self.generateNotExistingProfilesList() | |
79 return self.generateGetzCmdProfilesList ( profilesList2Add ) |