Mercurial > repos > yufei-luo > s_mart
comparison commons/core/launcher/Launcher2.py @ 31:0ab839023fe4
Uploaded
author | m-zytnicki |
---|---|
date | Tue, 30 Apr 2013 14:33:21 -0400 |
parents | 94ab73e8a190 |
children |
comparison
equal
deleted
inserted
replaced
30:5677346472b5 | 31:0ab839023fe4 |
---|---|
1 from commons.tools.CleanClusterNodesAfterRepet import CleanClusterNodesAfterRepet | |
2 from commons.core.stat.Stat import Stat | |
3 from commons.core.launcher.WriteScript import WriteScript | |
4 from commons.core.sql.TableJobAdaptatorFactory import TableJobAdaptatorFactory | |
5 from commons.core.sql.Job import Job | |
6 import stat | |
7 import os | |
8 import re | |
9 import sys | |
10 import time | |
11 import glob | |
12 | |
13 class LauncherParameter(object): | |
14 | |
15 def __init__(self, jobDB): | |
16 self._jobDB = jobDB | |
17 | |
18 def getJobDB(self): | |
19 return self._jobDB | |
20 | |
21 def setQuery(self, query): | |
22 self._query = query | |
23 | |
24 def setSubject(self, subject): | |
25 self._subject = subject | |
26 | |
27 def setParam(self, param): | |
28 self._param = param | |
29 | |
30 def setCurrentDir(self, currentDir): | |
31 self._currentDir = currentDir | |
32 | |
33 def getCurrentDir(self): | |
34 return self._currentDir | |
35 | |
36 def setTempDir(self, tempDir): | |
37 self._tempDir = tempDir | |
38 | |
39 def getTempDir(self): | |
40 return self._tempDir | |
41 | |
42 def setJobTable(self, jobTable): | |
43 self._jobTable = jobTable | |
44 | |
45 def setQueue(self, queue): | |
46 self._queue = queue | |
47 | |
48 def getQueue(self): | |
49 return self._queue | |
50 | |
51 def setGroupId(self, groupId): | |
52 self._groupId = groupId | |
53 | |
54 def getGroupId(self): | |
55 return self._groupId | |
56 | |
57 def setAcronym(self, acronym): | |
58 self._acronym = acronym | |
59 | |
60 def getAcronym(self): | |
61 return self._acronym | |
62 | |
63 @staticmethod | |
64 def createParameter(jobdb, groupid, acronym): | |
65 launcherParameter = LauncherParameter(jobdb) | |
66 launcherParameter.setQuery(os.getcwd()) | |
67 launcherParameter.setSubject("") | |
68 launcherParameter.setParam("") | |
69 launcherParameter.setCurrentDir(os.getcwd()) | |
70 launcherParameter.setTempDir(os.getcwd()) | |
71 launcherParameter.setJobTable("") | |
72 launcherParameter.setQueue("") | |
73 launcherParameter.setGroupId(groupid) | |
74 launcherParameter.setAcronym(acronym) | |
75 return launcherParameter | |
76 | |
77 | |
78 class Launcher2(object): | |
79 | |
80 #TODO: remove unused parameters : query="", subject="", param="", job_table="" | |
81 def __init__(self, iLauncherParameter): | |
82 jobdb = iLauncherParameter.getJobDB() | |
83 cdir = iLauncherParameter.getCurrentDir() | |
84 if jobdb.__class__.__name__ == "RepetJob": | |
85 self.jobdb = TableJobAdaptatorFactory.createInstance(jobdb, "jobs") | |
86 else: | |
87 self.jobdb = jobdb | |
88 self.jobdb.checkJobTable() | |
89 if cdir == "": | |
90 cdir = os.getcwd() | |
91 self.cdir = cdir | |
92 self.tmpdir = iLauncherParameter.getTempDir() | |
93 self.groupid = iLauncherParameter.getGroupId() | |
94 self.acronyme = iLauncherParameter.getAcronym() | |
95 self._chooseTemplateWithCopy = False | |
96 self._chooseTemplateLight = False | |
97 self.queue, self.lResources = self.getQueueNameAndResources(iLauncherParameter.getQueue()) | |
98 self._createJobInstance() | |
99 self._nbJobs = 0 | |
100 | |
101 def getQueueNameAndResources(self, configQueue): | |
102 tokens = configQueue.replace("'","").split(" ") | |
103 queueName = "" | |
104 lResources = [] | |
105 if tokens[0] != "": | |
106 if re.match(".*\.q", tokens[0]): | |
107 queueName = tokens[0] | |
108 lResources = tokens[1:] | |
109 else: | |
110 lResources = tokens | |
111 return queueName, lResources | |
112 | |
113 def createGroupidIfItNotExist(self): | |
114 if self.groupid == "": | |
115 self.job.groupid = str(os.getpid()) | |
116 else: | |
117 self.job.groupid = self.groupid | |
118 | |
119 def beginRun( self ): | |
120 self.createGroupidIfItNotExist() | |
121 if self.jobdb.hasUnfinishedJob(self.job.groupid): | |
122 self.jobdb.waitJobGroup(self.job.groupid) | |
123 else: | |
124 self.jobdb.cleanJobGroup(self.job.groupid) | |
125 | |
126 ## Launch one job in parallel | |
127 # | |
128 # @param cmdStart string command-line for the job to be launched | |
129 # @param cmdFinish string command to retrieve result files | |
130 # @warning the jobname has to be defined outside from this method | |
131 # | |
132 def runSingleJob(self, cmdStart, cmdFinish = "", cmdSize = "", cmdCopy = ""): | |
133 if self._nbJobs == 0: | |
134 self._nbJobs = 1 | |
135 pid = str(os.getpid()) | |
136 now = time.localtime() | |
137 #TODO: rename ClusterLauncher_ ... | |
138 pyFileName = self.cdir + "/ClusterLauncher_" + self.job.groupid + "_" +\ | |
139 self.job.jobname + "_" + str(now[0]) + "-" + str(now[1]) +\ | |
140 "-" + str(now[2]) + "_" + pid + ".py" | |
141 self.job.launcher = pyFileName | |
142 | |
143 #TODO: to remove when refactoring is done | |
144 cmdStart = self._indentCmd(cmdStart) | |
145 cmdFinish = self._indentCmd(cmdFinish) | |
146 | |
147 iWriteScript = WriteScript(self.job, self.jobdb, self.cdir, self.tmpdir, self._chooseTemplateWithCopy, self._chooseTemplateLight) | |
148 iWriteScript.run(cmdStart, cmdFinish, pyFileName, cmdSize, cmdCopy) | |
149 os.chmod(pyFileName, stat.S_IRWXU+stat.S_IRGRP+stat.S_IXGRP+stat.S_IROTH+stat.S_IXOTH) | |
150 sys.stdout.flush() | |
151 log = self.jobdb.submitJob(self.job) | |
152 if log != 0: | |
153 print "ERROR while submitting job to the cluster" | |
154 sys.exit(1) | |
155 | |
156 def endRun(self, cleanNodes = False): | |
157 string = "waiting for %i job(s) with groupid '%s' (%s)" % (self._nbJobs, self.job.groupid, time.strftime("%Y-%m-%d %H:%M:%S")) | |
158 print string; sys.stdout.flush() | |
159 self.jobdb.waitJobGroup(self.job.groupid) | |
160 if self._nbJobs > 1: | |
161 string = "all jobs with groupid '%s' are finished (%s)" % (self.job.groupid, time.strftime("%Y-%m-%d %H:%M:%S")) | |
162 print string; sys.stdout.flush() | |
163 | |
164 if cleanNodes: | |
165 string = "start cleaning cluster nodes (%s)" % time.strftime("%Y-%m-%d %H:%M:%S") | |
166 print string; sys.stdout.flush() | |
167 self.cleanNodes() | |
168 string = "end cleaning cluster nodes (%s)" % time.strftime("%Y-%m-%d %H:%M:%S") | |
169 print string; sys.stdout.flush() | |
170 | |
171 statsExecutionTime = self.getStatsOfExecutionTime() | |
172 if self._nbJobs > 1: | |
173 print "execution time of all jobs (seconds): %f" % statsExecutionTime.getSum() | |
174 print "execution time per job: %s" % statsExecutionTime.string() | |
175 sys.stdout.flush() | |
176 self.jobdb.cleanJobGroup(self.job.groupid) | |
177 | |
178 def getStatsOfExecutionTime(self, acronyme = ""): | |
179 stat = Stat() | |
180 if acronyme == "": | |
181 pattern = "%s*.o*" % self.acronyme | |
182 else: | |
183 pattern = "%s*.o*" % acronyme | |
184 lJobFiles = glob.glob(pattern) | |
185 for f in lJobFiles: | |
186 fH = open(f, "r") | |
187 while True: | |
188 line = fH.readline() | |
189 if line == "": | |
190 break | |
191 if "executionTime" in line: | |
192 stat.add( float(line[:-1].split("=")[1] ) ) | |
193 break | |
194 fH.close() | |
195 return stat | |
196 | |
197 def clean( self, acronyme = "", stdout = True, stderr = True ): | |
198 lFileToRemove = [] | |
199 if acronyme == "": | |
200 acronyme = self.acronyme | |
201 pattern = "ClusterLauncher*%s*.py" % ( acronyme ) | |
202 lFileToRemove.extend(glob.glob( pattern )) | |
203 if stdout: | |
204 pattern = "%s*.o*" % ( acronyme ) | |
205 lFileToRemove.extend(glob.glob( pattern )) | |
206 if stderr: | |
207 pattern = "%s*.e*" % ( acronyme ) | |
208 lFileToRemove.extend(glob.glob( pattern )) | |
209 for file in lFileToRemove: | |
210 os.remove(file) | |
211 | |
212 #TODO: handle of nodesMustBeCleaned => class attribute ? | |
213 def runLauncherForMultipleJobs(self, acronymPrefix, lCmdsTuples, cleanMustBeDone = True, nodesMustBeCleaned = False): | |
214 self.beginRun() | |
215 print "submitting job(s) with groupid '%s' (%s)" % (self.job.groupid, time.strftime("%Y-%m-%d %H:%M:%S")) | |
216 for cmdsTuple in lCmdsTuples: | |
217 self._nbJobs += 1 | |
218 self.acronyme = "%s_%s" % (acronymPrefix, self._nbJobs) | |
219 self.job.jobname = self.acronyme | |
220 if len(cmdsTuple) == 2: | |
221 self.runSingleJob(cmdsTuple[0], cmdsTuple[1]) | |
222 else: | |
223 self.runSingleJob(cmdsTuple[0], cmdsTuple[1], cmdsTuple[2], cmdsTuple[3]) | |
224 self._createJobInstance() | |
225 self.createGroupidIfItNotExist() | |
226 self.acronyme = acronymPrefix | |
227 self.endRun(nodesMustBeCleaned) | |
228 if cleanMustBeDone: | |
229 self.clean("%s_" % acronymPrefix) | |
230 self.jobdb.close() | |
231 | |
232 def prepareCommands(self, lCmds, lCmdStart = [], lCmdFinish = [], lCmdSize = [], lCmdCopy = []): | |
233 cmdStart = "" | |
234 for cmd in lCmdStart: | |
235 cmdStart += "%s\n\t" % cmd | |
236 for cmd in lCmds: | |
237 cmdStart += "%s\n\t" % cmd | |
238 cmdFinish = "" | |
239 for cmd in lCmdFinish: | |
240 cmdFinish += "%s\n\t" % cmd | |
241 cmdSize = "" | |
242 for cmd in lCmdSize: | |
243 cmdSize += "%s\n\t\t" % cmd | |
244 cmdCopy = "" | |
245 for cmd in lCmdCopy: | |
246 cmdCopy += "%s\n\t\t" % cmd | |
247 return (cmdStart, cmdFinish, cmdSize, cmdCopy) | |
248 | |
249 #TODO: to remove when refactoring is done | |
250 def prepareCommands_withoutIndentation(self, lCmds, lCmdStart = [], lCmdFinish = [], lCmdSize = [], lCmdCopy = []): | |
251 cmdStart = "" | |
252 for cmd in lCmdStart: | |
253 cmdStart += "%s\n" % cmd | |
254 for cmd in lCmds: | |
255 cmdStart += "%s\n" % cmd | |
256 cmdFinish = "" | |
257 for cmd in lCmdFinish: | |
258 cmdFinish += "%s\n" % cmd | |
259 cmdSize = "" | |
260 for cmd in lCmdSize: | |
261 cmdSize += "%s\n\t\t" % cmd | |
262 cmdCopy = "" | |
263 for cmd in lCmdCopy: | |
264 cmdCopy += "%s\n\t\t" % cmd | |
265 return (cmdStart, cmdFinish, cmdSize, cmdCopy) | |
266 | |
267 def getSystemCommand(self, prg, lArgs): | |
268 systemCmd = "log = os.system(\"" + prg | |
269 for arg in lArgs: | |
270 systemCmd += " " + arg | |
271 systemCmd += "\")" | |
272 return systemCmd | |
273 | |
274 def cleanNodes(self): | |
275 iCleanClusterNodeAfterRepet = CleanClusterNodesAfterRepet() | |
276 iCleanClusterNodeAfterRepet.setLNodes(self.jobdb.getNodesListByGroupId(self.groupid)) | |
277 iCleanClusterNodeAfterRepet.setTempDirectory(self.tmpdir) | |
278 iCleanClusterNodeAfterRepet.setPattern("%s*" % self.groupid) | |
279 iCleanClusterNodeAfterRepet.run() | |
280 | |
281 #TODO: to remove when refactoring is done | |
282 def _indentCmd(self, cmd): | |
283 lCmd = cmd.split("\n") | |
284 cmd_Tab = "%s\n" % lCmd[0] | |
285 for line in lCmd[1:-1]: | |
286 cmd_Tab += "\t%s\n" % line | |
287 return cmd_Tab | |
288 | |
289 def _createJobInstance(self): | |
290 if self.lResources == []: | |
291 #To have mem_free=1G: | |
292 self.job = Job(queue=self.queue) | |
293 else: | |
294 self.job = Job(queue=self.queue, lResources=self.lResources) |