comparison commons/pyRepetUnit/blastnForClassifierStep1/RepbaseBLRnForClassifierStep1.py @ 31:0ab839023fe4

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 14:33:21 -0400
parents 94ab73e8a190
children
comparison
equal deleted inserted replaced
30:5677346472b5 31:0ab839023fe4
1 """
2 Launch Blaster and then Matcher to compare the input sequences with known TEs via blastn and record the results into a MySQL table.
3 """
4
5 import os
6 import ConfigParser
7 from commons.core.utils.FileUtils import FileUtils
8 from commons.core.LoggerFactory import LoggerFactory
9
10 LOG_DEPTH = "repet.tools"
11
12 class RepbaseBLRnForClassifierStep1( object ):
13
14 """
15 Launch Blaster and then Matcher to compare the input sequences with known TEs via blastn and record the results into a MySQL table.
16
17 @param inFileName: name of the input fasta file
18 @type inFileName: string
19
20 @param launch_1: generic command at the beginning of a specific command
21 @type launch_1: string
22
23 @param launch_2: generic command at the end of a specific command
24 @type launch_2: string
25
26 @return: all the commands to run the job
27 @rtype: string
28
29 @param cDir: current directory (where to retrieve the result files)
30 @ype cDir: string
31
32 @param tmpDir: temporary directory (where the job will run)
33 @type tmpDir: string
34
35 @param configFileName: configuration file name
36 @type configFileName: string
37
38 @param logger: a logger Instance
39 @type logger: logger
40
41 @param verbose: verbose(0/1/2)
42 @type verbose: int
43
44 @param pL: program launcher
45 @type pL: programLauncher Instance
46
47 @param project: project name
48 @type project: string
49
50 """
51
52 def __init__(self, inFileName, launch_1, launch_2, cDir, tmpDir, configFileName, verbose, pL, project):
53 """
54 Constructor
55 """
56 self._inFileName = inFileName
57 self._launch_1 = launch_1
58 self._launch_2 = launch_2
59 self._cDir = cDir
60 self._tmpDir = tmpDir
61 self._verbose = verbose
62 self._pL = pL
63 self._project = project
64 self._fileUtils = FileUtils()
65 self._config = ConfigParser.ConfigParser()
66 self._configFileName = configFileName
67 self._config.readfp( open(self._configFileName) )
68 self._bank = self._config.get("detect_features","TE_nucl_bank")
69 self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbose)
70
71 def formatRepbase_ntIfNecessary( self ):
72 """
73 Format Repbase (make 'cut' files).
74 """
75 if not os.path.exists( "%s_cut" % ( self._bank ) ):
76 self._log.debug("prepare bank '%s'..." % ( self._bank ))
77 prg = os.environ["REPET_PATH"] + "/bin/blaster"
78 cmd = prg
79 cmd += " -s %s" % ( self._bank )
80 cmd += " -n blastn"
81 if self._config.get("detect_features","wublast") == "yes":
82 cmd += " -W"
83 cmd += " -r"
84 cmd += " -P"
85 self._pL.launch( prg, cmd )
86 os.system( "rm -f %s-blastn-*.param" % ( self._bank ) )
87
88 def createCmdToLaunch( self ):
89 cmd = self._launch_1 + os.environ["REPET_PATH"] + "/bin/blaster"
90 cmd += " -q %s" % ( self._inFileName )
91 cmd += " -s %s/%s" % ( self._cDir, self._bank )
92 cmd += " -B %s_BLRn_%s" % ( self._inFileName, self._bank )
93 cmd += " -n blastn"
94 if self._config.get("detect_features","wublast") == "yes":
95 cmd += " -W"
96 cmd += " -r"
97 cmd += " -v 1"
98 cmd += self._launch_2
99
100 cmd += "if not os.path.exists( \"%s/%s_BLRn_%s.param\" ):\n" % ( self._cDir, self._inFileName, self._bank )
101 cmd += "\tos.system( \"mv %s_BLRn_%s.param %s\" )\n" % ( self._inFileName, self._bank, self._cDir )
102 cmd += "if os.path.exists( \"%s_cut\" ):\n" % ( self._inFileName )
103 cmd += "\tos.system( \"rm -f %s_cut*\" )\n" % ( self._inFileName )
104 cmd += "if os.path.exists( \"%s.Nstretch.map\" ):\n" % ( self._inFileName )
105 cmd += "\tos.remove( \"%s.Nstretch.map\" )\n" % ( self._inFileName )
106 cmd += "if os.path.exists( \"%s_BLRn_%s.raw\" ):\n" % ( self._inFileName, self._bank )
107 cmd += "\tos.remove( \"%s_BLRn_%s.raw\" )\n" % ( self._inFileName, self._bank )
108 cmd += "if os.path.exists( \"%s_BLRn_%s.seq_treated\" ):\n" % ( self._inFileName, self._bank )
109 cmd += "\tos.remove( \"%s_BLRn_%s.seq_treated\" )\n" % ( self._inFileName, self._bank )
110
111 cmd += self._launch_1
112 cmd += os.environ["REPET_PATH"] + "/bin/matcher"
113 cmd += " -m %s_BLRn_%s.align" % ( self._inFileName, self._bank )
114 cmd += " -q %s" % ( self._inFileName )
115 cmd += " -s %s/%s" % ( self._cDir, self._bank )
116 cmd += " -j"
117 cmd += " -v 1"
118 cmd += self._launch_2
119
120 cmd += "if not os.path.exists( \"%s/%s_BLRn_%s.align.clean_match.path\" ):\n" % ( self._cDir, self._inFileName, self._bank )
121 cmd += "\tos.system( \"mv %s_BLRn_%s.align.clean_match.path %s\" )\n" % ( self._inFileName, self._bank, self._cDir )
122 cmd += "if not os.path.exists( \"%s/%s_BLRn_%s.align.clean_match.param\" ):\n" % ( self._cDir, self._inFileName, self._bank )
123 cmd += "\tos.system( \"mv %s_BLRn_%s.align.clean_match.param %s\" )\n" % ( self._inFileName, self._bank, self._cDir )
124 cmd += "if os.path.exists( \"%s_BLRn_%s.align\" ):\n" % ( self._inFileName, self._bank )
125 cmd += "\tos.remove( \"%s_BLRn_%s.align\" )\n" % ( self._inFileName, self._bank )
126 cmd += "if os.path.exists( \"%s_BLRn_%s.align.clean_match.fa\" ):\n" % ( self._inFileName, self._bank )
127 cmd += "\tos.remove( \"%s_BLRn_%s.align.clean_match.fa\" )\n" % ( self._inFileName, self._bank )
128 cmd += "if os.path.exists( \"%s_BLRn_%s.align.clean_match.map\" ):\n" % ( self._inFileName, self._bank )
129 cmd += "\tos.remove( \"%s_BLRn_%s.align.clean_match.map\" )\n" % ( self._inFileName, self._bank )
130 cmd += "if os.path.exists( \"%s_BLRn_%s.align.clean_match.tab\" ):\n" % ( self._inFileName, self._bank )
131 cmd += "\tos.remove( \"%s_BLRn_%s.align.clean_match.tab\" )\n" % ( self._inFileName, self._bank )
132
133 if self._tmpDir != self._cDir:
134 cmd += "if os.path.exists( \"%s\" ):\n" % ( self._bank )
135 cmd += "\tos.remove( \"%s\" )\n" % ( self._bank )
136
137 return cmd
138
139 def collectRepbaseBLRn( self ):
140 """
141 Concatenate the outputs of blastn, adapt the ID and load the results into a table.
142 """
143 bankFull = self._bank
144 bankPath, bank = os.path.split( bankFull )
145 self._concatPathFile(bank)
146 self._adaptIDInPathFile(bank)
147 self._loadPathFileInTable(bank)
148 self._findAndRemoveUselessFiles(bank)
149
150 def _concatPathFile(self, bank):
151 FileUtils.catFilesByPattern("../batch_*.fa_BLRn_%s.align.clean_match.path" % bank,
152 "%s_BLRn_%s.align.clean_match.path.tmp" % (self._project, bank))
153
154 def _adaptIDInPathFile(self, bank):
155 if os.path.exists(os.environ["REPET_PATH"] + "/bin/pathnum2id"):
156 prg = os.environ["REPET_PATH"] + "/bin/pathnum2id"
157 cmd = prg
158 cmd += " -i %s_BLRn_%s.align.clean_match.path.tmp" % (self._project, bank)
159 cmd += " -o %s_BLRn_%s.align.clean_match.path" % (self._project, bank)
160 cmd += " -v %i" % (self._verbose - 1)
161 self._pL.launch(prg, cmd)
162 else:
163 prg = os.environ["REPET_PATH"] + "/bin/pathnum2id.py"
164 cmd = prg
165 cmd += " -i %s_BLRn_%s.align.clean_match.path.tmp" % (self._project, bank)
166 cmd += " -o %s_BLRn_%s.align.clean_match.path" % (self._project, bank)
167 self._pL.launch(prg, cmd)
168
169 def _loadPathFileInTable(self, bank):
170 prg = os.environ["REPET_PATH"] + "/bin/srptCreateTable.py"
171 cmd = prg
172 cmd += " -f %s_BLRn_%s.align.clean_match.path" % (self._project, bank)
173 cmd += " -n %s_TE_BLRn_path" % (self._project)
174 cmd += " -t path"
175 cmd += " -c ../%s" % (self._configFileName)
176 self._pL.launch(prg, cmd)
177
178 def _findAndRemoveUselessFiles(self, bank):
179 prg = "find"
180 cmd = prg
181 cmd += " .. -name \"batch_*.fa_BLRn_%s.*\" -exec rm {} \;" % (bank)
182 self._pL.launch(prg, cmd)
183 prg = "rm"
184 cmd = prg
185 cmd += " %s_BLRn_%s.align.clean_match.path.tmp" % (self._project, bank)
186 self._pL.launch(prg, cmd)