36
|
1 # Copyright INRA (Institut National de la Recherche Agronomique)
|
|
2 # http://www.inra.fr
|
|
3 # http://urgi.versailles.inra.fr
|
|
4 #
|
|
5 # This software is governed by the CeCILL license under French law and
|
|
6 # abiding by the rules of distribution of free software. You can use,
|
|
7 # modify and/ or redistribute the software under the terms of the CeCILL
|
|
8 # license as circulated by CEA, CNRS and INRIA at the following URL
|
|
9 # "http://www.cecill.info".
|
|
10 #
|
|
11 # As a counterpart to the access to the source code and rights to copy,
|
|
12 # modify and redistribute granted by the license, users are provided only
|
|
13 # with a limited warranty and the software's author, the holder of the
|
|
14 # economic rights, and the successive licensors have only limited
|
|
15 # liability.
|
|
16 #
|
|
17 # In this respect, the user's attention is drawn to the risks associated
|
|
18 # with loading, using, modifying and/or developing or reproducing the
|
|
19 # software by the user in light of its specific status of free software,
|
|
20 # that may mean that it is complicated to manipulate, and that also
|
|
21 # therefore means that it is reserved for developers and experienced
|
|
22 # professionals having in-depth computer knowledge. Users are therefore
|
|
23 # encouraged to load and test the software's suitability as regards their
|
|
24 # requirements in conditions enabling the security of their systems and/or
|
|
25 # data to be ensured and, more generally, to use and operate it in the
|
|
26 # same conditions as regards security.
|
|
27 #
|
|
28 # The fact that you are presently reading this means that you have had
|
|
29 # knowledge of the CeCILL license and that you accept its terms.
|
|
30
|
|
31
|
|
32 import os
|
|
33 import glob
|
|
34 import shutil
|
|
35 import sys
|
|
36 import re
|
|
37 import math
|
|
38 try:
|
|
39 import hashlib
|
|
40 except:
|
|
41 pass
|
|
42
|
|
43
|
|
44 class FileUtils( object ):
|
|
45
|
|
46 ## Return the number of lines in the given file
|
|
47 #
|
|
48 def getNbLinesInSingleFile( fileName ):
|
|
49 fileHandler = open( fileName, "r" )
|
|
50 lines = fileHandler.readlines()
|
|
51 fileHandler.close()
|
|
52 if (len(lines)>0 and lines[-1]== "\n"):
|
|
53 return (len(lines)-1)
|
|
54 else :
|
|
55 return len(lines)
|
|
56
|
|
57 getNbLinesInSingleFile = staticmethod( getNbLinesInSingleFile )
|
|
58
|
|
59 ## Return the number of lines in the files in the given list
|
|
60 #
|
|
61 def getNbLinesInFileList( lFileNames ):
|
|
62 count = 0
|
|
63 for fileName in lFileNames:
|
|
64 count += FileUtils.getNbLinesInSingleFile( fileName )
|
|
65 return count
|
|
66
|
|
67 getNbLinesInFileList = staticmethod( getNbLinesInFileList )
|
|
68
|
|
69 ## Return True if the given file exists, False otherwise
|
|
70 #
|
|
71 def isRessourceExists( fileName ):
|
|
72 return os.path.exists( fileName )
|
|
73
|
|
74 isRessourceExists = staticmethod( isRessourceExists )
|
|
75
|
|
76 ## Return True if the given file is empty, False otherwise
|
|
77 #
|
|
78 def isEmpty( fileName ):
|
|
79 return 0 == FileUtils.getNbLinesInSingleFile( fileName )
|
|
80
|
|
81 isEmpty = staticmethod( isEmpty )
|
|
82
|
|
83 ## Return True if both files are identical, False otherwise
|
|
84 #
|
|
85 def are2FilesIdentical( file1, file2 ):
|
|
86 tmpFile = "diff_%s_%s" % ( os.path.basename(file1), os.path.basename(file2) )
|
|
87 cmd = "diff %s %s >> %s" % ( file1, file2, tmpFile )
|
|
88 returnStatus = os.system( cmd )
|
|
89 if returnStatus != 0:
|
|
90 print "WARNING: 'diff' returned '%i'" % returnStatus
|
|
91 os.remove( tmpFile )
|
|
92 return False
|
|
93 if FileUtils.isEmpty( tmpFile ):
|
|
94 os.remove( tmpFile )
|
|
95 return True
|
|
96 else:
|
|
97 os.remove( tmpFile )
|
|
98 return False
|
|
99
|
|
100 are2FilesIdentical = staticmethod( are2FilesIdentical )
|
|
101
|
|
102 ## Return a string with all the content of the files in the given list
|
|
103 #
|
|
104 def getFileContent( lFiles ):
|
|
105 content = ""
|
|
106 lFiles.sort()
|
|
107 for fileName in lFiles:
|
|
108 currentFile = open( fileName, "r" )
|
|
109 content += currentFile.read()
|
|
110 currentFile.close()
|
|
111 return content
|
|
112
|
|
113 getFileContent = staticmethod( getFileContent )
|
|
114
|
|
115 ## Save content of the given file after having sorted it
|
|
116 #
|
|
117 def sortFileContent( inFile, outFile="" ):
|
|
118 inFileHandler = open(inFile, "r" )
|
|
119 lines = inFileHandler.readlines()
|
|
120 inFileHandler.close()
|
|
121 lines.sort()
|
|
122 if outFile == "":
|
|
123 outFile = inFile
|
|
124 outFileHandler = open( outFile, "w" )
|
|
125 outFileHandler.writelines( lines )
|
|
126 outFileHandler.close()
|
|
127
|
|
128 sortFileContent = staticmethod( sortFileContent )
|
|
129
|
|
130 ## Add end-of-line symbol to the given file content if necessary
|
|
131 #
|
|
132 def addNewLineAtTheEndOfFileContent( fileContent ):
|
|
133 if not fileContent.endswith('\n') and len(fileContent) != 0:
|
|
134 fileContent += '\n'
|
|
135 return fileContent
|
|
136
|
|
137 addNewLineAtTheEndOfFileContent = staticmethod( addNewLineAtTheEndOfFileContent )
|
|
138
|
|
139 ## Concatenate files in the given list
|
|
140 #
|
|
141 def catFilesFromList( lFiles, outFile, sort=True, skipHeaders = False, separator = "" ):
|
|
142 if sort:
|
|
143 lFiles.sort()
|
|
144 outFileHandler = open( outFile, "a" )
|
|
145 isFirstFile = True
|
|
146 for singleFile in lFiles:
|
|
147 if not isFirstFile:
|
|
148 outFileHandler.write(separator)
|
|
149 isFirstFile = False
|
|
150 singleFileHandler = open( singleFile, "r" )
|
|
151 if skipHeaders:
|
|
152 singleFileHandler.readline()
|
|
153 line = singleFileHandler.readline()
|
|
154 while line:
|
|
155 outFileHandler.write(line)
|
|
156 line = singleFileHandler.readline()
|
|
157 singleFileHandler.close()
|
|
158 outFileHandler.close()
|
|
159
|
|
160 catFilesFromList = staticmethod( catFilesFromList )
|
|
161
|
|
162 ## Concatenate files according to the given pattern
|
|
163 #
|
|
164 def catFilesByPattern( pattern, outFile, skipHeaders = False, separator = "" ):
|
|
165 lFiles = glob.glob( pattern )
|
|
166 FileUtils.catFilesFromList( lFiles, outFile, skipHeaders = skipHeaders, separator = separator )
|
|
167
|
|
168 catFilesByPattern = staticmethod( catFilesByPattern )
|
|
169
|
|
170 ## Remove files listed according to the given pattern
|
|
171 #
|
|
172 # @example prefix="/home/tmp/dummy*.txt"
|
|
173 #
|
|
174 def removeFilesByPattern( prefix ):
|
|
175 lFiles = glob.glob( prefix )
|
|
176 for f in lFiles:
|
|
177 os.remove( f )
|
|
178
|
|
179 removeFilesByPattern = staticmethod( removeFilesByPattern )
|
|
180
|
|
181 ## Remove files listed according to the suffixes in the given list
|
|
182 #
|
|
183 def removeFilesBySuffixList( targetPath, lSuffixes ):
|
|
184 if targetPath[-1] == "/":
|
|
185 targetPath = targetPath[:-1]
|
|
186 for suffix in lSuffixes:
|
|
187 pattern = "%s/*%s" % ( targetPath, suffix )
|
|
188 FileUtils.removeFilesByPattern( pattern )
|
|
189
|
|
190 removeFilesBySuffixList = staticmethod( removeFilesBySuffixList )
|
|
191
|
|
192 ## Remove repeated blanks in the given file
|
|
193 #
|
|
194 def removeRepeatedBlanks( inFile, outFile="" ):
|
|
195 if outFile == "":
|
|
196 outFile = inFile
|
|
197 tmpFile = "tr_%s_%s" % ( inFile, outFile )
|
|
198 cmd = "tr -s ' ' < %s > %s" % ( inFile, tmpFile )
|
|
199 os.system( cmd )
|
|
200 os.rename( tmpFile, outFile )
|
|
201
|
|
202 removeRepeatedBlanks = staticmethod( removeRepeatedBlanks )
|
|
203
|
|
204 ## Remove files in the given list
|
|
205 #
|
|
206 @staticmethod
|
|
207 def removeFilesFromList(lFiles):
|
|
208 for f in lFiles:
|
|
209 os.remove(f)
|
|
210
|
|
211 ## Remove files in the given list if exist
|
|
212 #
|
|
213 @staticmethod
|
|
214 def removeFilesFromListIfExist(lFiles):
|
|
215 for fileName in lFiles:
|
|
216 if FileUtils.isRessourceExists(fileName):
|
|
217 os.remove(fileName)
|
|
218
|
|
219 ## Append the content of a file to another file
|
|
220 #
|
|
221 # @param inFile string name of the input file
|
|
222 # @param outFile string name of the output file
|
|
223 #
|
|
224 def appendFileContent( inFile, outFile ):
|
|
225 outFileHandler = open( outFile, "a" )
|
|
226 inFileHandler = open( inFile, "r" )
|
|
227 shutil.copyfileobj( inFileHandler, outFileHandler )
|
|
228 inFileHandler.close()
|
|
229 outFileHandler.close()
|
|
230
|
|
231 appendFileContent = staticmethod( appendFileContent )
|
|
232
|
|
233
|
|
234 ## Replace Windows end-of-line by Unix end-of-line
|
|
235 #
|
|
236 def fromWindowsToUnixEof( inFile ):
|
|
237 tmpFile = "%s.tmp" % ( inFile )
|
|
238 shutil.copyfile( inFile, tmpFile )
|
|
239 os.remove( inFile )
|
|
240 tmpFileHandler = open( tmpFile, "r" )
|
|
241 inFileHandler = open( inFile, "w" )
|
|
242 while True:
|
|
243 line = tmpFileHandler.readline()
|
|
244 if line == "":
|
|
245 break
|
|
246 inFileHandler.write( line.replace("\r\n","\n") )
|
|
247 tmpFileHandler.close()
|
|
248 inFileHandler.close()
|
|
249 os.remove( tmpFile )
|
|
250
|
|
251 fromWindowsToUnixEof = staticmethod( fromWindowsToUnixEof )
|
|
252
|
|
253
|
|
254 ## Remove duplicated lines in a file
|
|
255 #
|
|
256 # @note it preserves the initial order and handles blank lines
|
|
257 #
|
|
258 def removeDuplicatedLines( inFile ):
|
|
259 tmpFile = "%s.tmp" % ( inFile )
|
|
260 shutil.copyfile( inFile, tmpFile )
|
|
261 os.remove( inFile )
|
|
262
|
|
263 tmpFileHandler = open( tmpFile, "r" )
|
|
264 lLines = list( tmpFileHandler.read().split("\n") )
|
|
265 if lLines[-1] == "":
|
|
266 del lLines[-1]
|
|
267 sLines = set( lLines )
|
|
268 tmpFileHandler.close()
|
|
269 os.remove( tmpFile )
|
|
270
|
|
271 inFileHandler = open( inFile, "w" )
|
|
272 for line in lLines:
|
|
273 if line in sLines:
|
|
274 inFileHandler.write( "%s\n" % ( line ) )
|
|
275 sLines.remove( line )
|
|
276 inFileHandler.close()
|
|
277
|
|
278 removeDuplicatedLines = staticmethod( removeDuplicatedLines )
|
|
279
|
|
280
|
|
281 ## Write a list of lines in a given file
|
|
282 #
|
|
283 def writeLineListInFile( inFile, lLines ):
|
|
284 inFileHandler = open( inFile, "w" )
|
|
285 for line in lLines:
|
|
286 inFileHandler.write( line )
|
|
287 inFileHandler.close()
|
|
288
|
|
289 writeLineListInFile = staticmethod( writeLineListInFile )
|
|
290
|
|
291
|
|
292 ## Give the list of absolute path of each directory in the given directory
|
|
293 #
|
|
294 # @param rootPath string absolute path of the given directory
|
|
295 #
|
|
296 # @return lDirPath list of absolute directory path
|
|
297 #
|
|
298 def getAbsoluteDirectoryPathList(rootPath):
|
|
299 lDirPath = []
|
|
300 lPaths = glob.glob(rootPath + "/*")
|
|
301 for ressource in lPaths:
|
|
302 if os.path.isdir(ressource) :
|
|
303 lDirPath.append(ressource)
|
|
304 return lDirPath
|
|
305
|
|
306 getAbsoluteDirectoryPathList = staticmethod(getAbsoluteDirectoryPathList)
|
|
307
|
|
308
|
|
309 ## Get a sublist of which each element matches/doesn't match a pattern
|
|
310 #
|
|
311 # @param lPath string list of paths
|
|
312 #
|
|
313 # @param pattern string pattern
|
|
314 #
|
|
315 # @param match bool
|
|
316 #
|
|
317 # @return lPathMatching list of path matching pattern
|
|
318 #
|
|
319 def getSubListAccordingToPattern(lPath, pattern, match = True):
|
|
320 lPathMatching = []
|
|
321 for path in lPath:
|
|
322 if match:
|
|
323 if re.match(".*%s.*" % pattern, path):
|
|
324 lPathMatching.append(path)
|
|
325 else:
|
|
326 if not re.match(".*%s.*" % pattern, path):
|
|
327 lPathMatching.append(path)
|
|
328 return lPathMatching
|
|
329
|
|
330 getSubListAccordingToPattern = staticmethod(getSubListAccordingToPattern)
|
|
331
|
|
332
|
|
333 ## Give the list of file names found in the given directory
|
|
334 #
|
|
335 # @param dirPath string absolute path of the given directory
|
|
336 #
|
|
337 # @return lFilesInDir list of file names
|
|
338 #
|
|
339 def getFileNamesList( dirPath, patternFileFilter = ".*" ):
|
|
340 lFilesInDir = []
|
|
341 lPaths = glob.glob( dirPath + "/*" )
|
|
342 for ressource in lPaths:
|
|
343 if os.path.isfile( ressource ):
|
|
344 fileName = os.path.basename( ressource )
|
|
345 if re.match(patternFileFilter, fileName):
|
|
346 lFilesInDir.append( fileName )
|
|
347 return lFilesInDir
|
|
348
|
|
349 getFileNamesList = staticmethod( getFileNamesList )
|
|
350
|
|
351 ## Return the MD5 sum of a file
|
|
352 #
|
|
353 def getMd5SecureHash( inFile ):
|
|
354 if "hashlib" in sys.modules:
|
|
355 md5 = hashlib.md5()
|
|
356 inFileHandler = open( inFile, "r" )
|
|
357 while True:
|
|
358 line = inFileHandler.readline()
|
|
359 if line == "":
|
|
360 break
|
|
361 md5.update( line )
|
|
362 inFileHandler.close()
|
|
363 return md5.hexdigest()
|
|
364 else:
|
|
365 return ""
|
|
366
|
|
367 getMd5SecureHash = staticmethod( getMd5SecureHash )
|
|
368
|
|
369 ## Cat all files of a given directory
|
|
370 #
|
|
371 # @param dir string directory name
|
|
372 # @param outFileName string output file name
|
|
373 #
|
|
374 def catFilesOfDir(dir, outFileName):
|
|
375 lFiles = FileUtils.getFileNamesList(dir)
|
|
376 lFile2 = []
|
|
377 for file in lFiles:
|
|
378 lFile2.append(dir + "/" + file)
|
|
379 FileUtils.catFilesFromList(lFile2, outFileName)
|
|
380
|
|
381 catFilesOfDir = staticmethod(catFilesOfDir)
|
|
382
|
|
383 ## Return True if size file > 0 octet
|
|
384 #
|
|
385 # @param fileName string file name
|
|
386 #
|
|
387 def isSizeNotNull(fileName):
|
|
388 size = os.path.getsize(fileName)
|
|
389 if size > 0:
|
|
390 return True
|
|
391 return False
|
|
392
|
|
393 isSizeNotNull = staticmethod(isSizeNotNull)
|
|
394
|
|
395 ## Split one file into N Files by lines
|
|
396 #
|
|
397 # @param fileName string file name
|
|
398 # @param N int number of files to create
|
|
399 #
|
|
400 @staticmethod
|
|
401 def splitFileIntoNFiles(fileName, N):
|
|
402 nbLine = FileUtils.getNbLinesInSingleFile(fileName)
|
|
403 nbLinesInEachFile = nbLine
|
|
404 if N > nbLine:
|
|
405 N = nbLine
|
|
406 if N != 0:
|
|
407 nbLinesInEachFile = math.ceil(float(nbLine) / N)
|
|
408 else:
|
|
409 N = 1
|
|
410 filePrefix, fileExt = os.path.splitext(os.path.basename(fileName))
|
|
411 fileHandler = open(fileName, "r")
|
|
412 for i in range(1,N+1):
|
|
413 with open("%s-%s%s" %(filePrefix, i, fileExt), "w") as f:
|
|
414 j = 0
|
|
415 while j < nbLinesInEachFile:
|
|
416 j += 1
|
|
417 f.write(fileHandler.readline())
|
|
418 fileHandler.close()
|
|
419
|
|
420 ## Split one file into files of N lines
|
|
421 #
|
|
422 # @param fileName string input file name
|
|
423 # @param N int lines number per files
|
|
424 #
|
|
425 @staticmethod
|
|
426 def splitFileAccordingToLineNumber(fileName, N):
|
|
427 filePrefix, fileExt = os.path.splitext(os.path.basename(fileName))
|
|
428 with open(fileName) as inF:
|
|
429 fileNb = 1
|
|
430 line = inF.readline()
|
|
431 if not line or N == 0:
|
|
432 outFileName = "%s-%s%s" %(filePrefix, fileNb, fileExt)
|
|
433 f = open(outFileName, "wb")
|
|
434 shutil.copyfileobj(open(fileName, "rb"), f)
|
|
435 f.close()
|
|
436 else:
|
|
437 while line:
|
|
438 outFileName = "%s-%s%s" %(filePrefix, fileNb, fileExt)
|
|
439 with open(outFileName, "w") as outF:
|
|
440 lineNb = 1
|
|
441 while lineNb <= N and line:
|
|
442 outF.write(line)
|
|
443 line = inF.readline()
|
|
444 lineNb += 1
|
|
445 fileNb += 1 |