0
|
1 import gzip
|
|
2 import optparse # using optparse as hydra still python 2.6
|
|
3 import os.path
|
1
|
4 import re
|
0
|
5 import shutil
|
|
6 import sys
|
|
7
|
|
8 def report_error(*args):
|
|
9 sys.stderr.write(' '.join(map(str,args)) + '\n')
|
|
10 sys.stderr.flush()
|
|
11 sys.exit(1)
|
|
12
|
|
13
|
|
14 def check_pattern_get_new_name(a_file, ending, options):
|
|
15 if options.start:
|
|
16 if not(a_file.startswith(options.start)):
|
|
17 return None
|
1
|
18 name = a_file[:-len(ending)]
|
|
19 if name.endswith("."):
|
|
20 name = name[:-1]
|
0
|
21 if options.last:
|
1
|
22 if not(name.endswith(last)):
|
|
23 return None
|
|
24 if options.regex:
|
|
25 pattern = re.compile(options.regex)
|
|
26 if pattern.search(name) is None:
|
0
|
27 return None
|
|
28 if options.new_ending:
|
|
29 if options.new_ending[0] ==".":
|
1
|
30 return name + options.new_ending
|
|
31 else:
|
|
32 return name + "." + options.new_ending
|
0
|
33 if options.decompress:
|
|
34 if a_file.endswith(".gz"):
|
|
35 return a_file[:-3]
|
|
36 return a_file
|
|
37
|
|
38
|
|
39 def check_and_get_new_name(a_file, options):
|
|
40 for ending in options.endings:
|
|
41 if a_file.endswith(ending):
|
|
42 return check_pattern_get_new_name (a_file, ending, options)
|
|
43 return None
|
|
44
|
|
45
|
|
46 def link(a_file, new_name, path):
|
|
47 file_path = os.path.join(os.path.realpath(path), a_file)
|
|
48 sym_path = os.path.join(os.path.realpath("output"), new_name)
|
|
49 os.link(file_path, sym_path)
|
|
50
|
|
51
|
|
52 def decompress(a_file, new_name, path):
|
|
53 file_path = os.path.join(os.path.realpath(path), a_file)
|
|
54 target_path = os.path.join(os.path.realpath("output"), new_name)
|
|
55 with gzip.open(file_path, 'rb') as f_in, open(target_path, 'wb') as f_out:
|
|
56 shutil.copyfileobj(f_in, f_out)
|
|
57
|
|
58
|
|
59 def copy_and_link(path, options):
|
1
|
60 if options.decompress or options.link:
|
|
61 os.mkdir("output")
|
0
|
62 with open(options.list, 'w') as list_file:
|
|
63 files = os.listdir(path)
|
|
64 files.sort()
|
|
65 for a_file in files:
|
|
66 new_name = check_and_get_new_name(a_file, options)
|
|
67 if new_name:
|
|
68 list_file.write(new_name)
|
|
69 list_file.write("\n")
|
|
70 if options.decompress:
|
|
71 if a_file.endswith(".gz"):
|
|
72 decompress(a_file, new_name,path)
|
|
73 else:
|
|
74 link(a_file, new_name, path)
|
|
75 elif options.link:
|
|
76 link(a_file, new_name, path)
|
|
77
|
|
78
|
|
79 if __name__ == '__main__':
|
|
80 parser = optparse.OptionParser()
|
|
81 parser.add_option("--path", action="store", type="string",
|
|
82 help="Path of directory to check. ")
|
|
83 parser.add_option("--ending", action="append", type="string", dest="endings",
|
|
84 help="Ending that can be listed and if requested linked or decompressed. ")
|
|
85 parser.add_option("--start", action="store", type="string",
|
|
86 help="String that must be at the start of the file name ")
|
|
87 parser.add_option("--last", action="store", type="string",
|
|
88 help="String that must be the last bit of the file name before the endings")
|
1
|
89 parser.add_option("--regex", action="store", type="string",
|
|
90 help="Regex for file names not including the endings")
|
0
|
91 parser.add_option("--new_ending", action="store", type="string",
|
|
92 help="New ending to replace any previous ending in list and if required links or decompressions. Note: If not set decompression will auto remove the compressioned part of the ending")
|
|
93 parser.add_option("--list", action="store", type="string",
|
|
94 help="Path to where all files should be listed. ")
|
|
95 parser.add_option("--link", action="store_true", default=False,
|
|
96 help="If set will cause links to be added in output directory. ")
|
|
97 parser.add_option("--decompress", action="store_true", default=False,
|
|
98 help="If set will cause gz files to be decompressed or if not a supported decompression ending linked.")
|
|
99 (options, args) = parser.parse_args()
|
|
100
|
|
101
|
|
102 path = options.path.strip()
|
|
103 if path[-1] != '/':
|
|
104 path = path + "/"
|
|
105 copy_and_link(path, options)
|
|
106
|