comparison directory_copier.py @ 0:b737d0ed42be draft default tip

Uploaded
author brenninc
date Tue, 21 Jun 2016 03:38:52 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:b737d0ed42be
1 import gzip
2 import optparse # using optparse as hydra still python 2.6
3 import os.path
4 import re
5 import shutil
6 import sys
7
8 def report_error(*args):
9 sys.stderr.write(' '.join(map(str,args)) + '\n')
10 sys.stderr.flush()
11 sys.exit(1)
12
13
14 def check_pattern_get_new_name(a_file, ending, options):
15 if options.start:
16 if not(a_file.startswith(options.start)):
17 return None
18 name = a_file[:-len(ending)]
19 if name.endswith("."):
20 name = name[:-1]
21 if options.last:
22 if not(name.endswith(last)):
23 return None
24 if options.regex:
25 pattern = re.compile(options.regex)
26 if pattern.search(name) is None:
27 return None
28 if options.new_ending:
29 if options.new_ending[0] ==".":
30 return name + options.new_ending
31 else:
32 return name + "." + options.new_ending
33 if options.decompress:
34 if a_file.endswith(".gz"):
35 return a_file[:-3]
36 return a_file
37
38
39 def check_and_get_new_name(a_file, options):
40 for ending in options.endings:
41 if a_file.endswith(ending):
42 return check_pattern_get_new_name (a_file, ending, options)
43 return None
44
45
46 def link(a_file, new_name, path):
47 file_path = os.path.join(os.path.realpath(path), a_file)
48 sym_path = os.path.join(os.path.realpath("output"), new_name)
49 os.link(file_path, sym_path)
50
51
52 def decompress(a_file, new_name, path):
53 file_path = os.path.join(os.path.realpath(path), a_file)
54 target_path = os.path.join(os.path.realpath("output"), new_name)
55 with gzip.open(file_path, 'rb') as f_in, open(target_path, 'wb') as f_out:
56 shutil.copyfileobj(f_in, f_out)
57
58
59 def copy_and_link(path, options):
60 if options.decompress or options.link:
61 os.mkdir("output")
62 with open(options.list, 'w') as list_file:
63 files = os.listdir(path)
64 files.sort()
65 for a_file in files:
66 new_name = check_and_get_new_name(a_file, options)
67 if new_name:
68 list_file.write(new_name)
69 list_file.write("\n")
70 if options.decompress:
71 if a_file.endswith(".gz"):
72 decompress(a_file, new_name,path)
73 else:
74 link(a_file, new_name, path)
75 elif options.link:
76 link(a_file, new_name, path)
77
78
79 if __name__ == '__main__':
80 parser = optparse.OptionParser()
81 parser.add_option("--path", action="store", type="string",
82 help="Path of directory to check. ")
83 parser.add_option("--ending", action="append", type="string", dest="endings",
84 help="Ending that can be listed and if requested linked or decompressed. ")
85 parser.add_option("--start", action="store", type="string",
86 help="String that must be at the start of the file name ")
87 parser.add_option("--last", action="store", type="string",
88 help="String that must be the last bit of the file name before the endings")
89 parser.add_option("--regex", action="store", type="string",
90 help="Regex for file names not including the endings")
91 parser.add_option("--new_ending", action="store", type="string",
92 help="New ending to replace any previous ending in list and if required links or decompressions. Note: If not set decompression will auto remove the compressioned part of the ending")
93 parser.add_option("--list", action="store", type="string",
94 help="Path to where all files should be listed. ")
95 parser.add_option("--link", action="store_true", default=False,
96 help="If set will cause links to be added in output directory. ")
97 parser.add_option("--decompress", action="store_true", default=False,
98 help="If set will cause gz files to be decompressed or if not a supported decompression ending linked.")
99 (options, args) = parser.parse_args()
100
101
102 path = options.path.strip()
103 if path[-1] != '/':
104 path = path + "/"
105 copy_and_link(path, options)
106