Mercurial > repos > iss > eurl_vtec_wgs_pt
comparison scripts/ReMatCh/modules/utils.py @ 0:c6bab5103a14 draft
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
author | iss |
---|---|
date | Mon, 21 Mar 2022 15:23:09 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c6bab5103a14 |
---|---|
1 import pickle | |
2 from traceback import format_exception as traceback_format_exception | |
3 import shlex | |
4 import subprocess | |
5 from threading import Timer | |
6 import shutil | |
7 import time | |
8 from functools import wraps as functools_wraps | |
9 import os.path | |
10 import sys | |
11 | |
12 | |
13 def start_logger(workdir): | |
14 time_str = time.strftime("%Y%m%d-%H%M%S") | |
15 sys.stdout = Logger(workdir, time_str) | |
16 logfile = sys.stdout.getLogFile() | |
17 return logfile, time_str | |
18 | |
19 | |
20 class Logger(object): | |
21 def __init__(self, out_directory, time_str): | |
22 self.logfile = os.path.join(out_directory, str('run.' + time_str + '.log')) | |
23 self.terminal = sys.stdout | |
24 self.log = open(self.logfile, "w") | |
25 | |
26 def write(self, message): | |
27 self.terminal.write(message) | |
28 self.log.write(message) | |
29 self.log.flush() | |
30 | |
31 def flush(self): | |
32 pass | |
33 | |
34 def getLogFile(self): | |
35 return self.logfile | |
36 | |
37 | |
38 def get_cpu_information(outdir, time_str): | |
39 with open(os.path.join(outdir, 'cpu_information.' + time_str + '.cpu.txt'), 'wt') as writer: | |
40 command = ['cat', '/proc/cpuinfo'] | |
41 run_successfully, stdout, stderr = run_command_popen_communicate(command, False, None, False) | |
42 if run_successfully: | |
43 writer.write(stdout) | |
44 | |
45 with open(os.path.join(outdir, 'cpu_information.' + time_str + '.slurm.txt'), 'wt') as writer: | |
46 for environment in sorted(os.environ): | |
47 if environment.startswith('SLURM_'): | |
48 writer.write('#' + environment + '\n' + os.environ[environment] + '\n') | |
49 | |
50 | |
51 def setPATHvariable(doNotUseProvidedSoftware, script_path): | |
52 path_variable = os.environ['PATH'] | |
53 script_folder = os.path.dirname(script_path) | |
54 # Set path to use provided softwares | |
55 if not doNotUseProvidedSoftware: | |
56 bowtie2 = os.path.join(script_folder, 'src', 'bowtie2-2.2.9') | |
57 samtools = os.path.join(script_folder, 'src', 'samtools-1.3.1', 'bin') | |
58 bcftools = os.path.join(script_folder, 'src', 'bcftools-1.3.1', 'bin') | |
59 | |
60 os.environ['PATH'] = str(':'.join([bowtie2, samtools, bcftools, path_variable])) | |
61 | |
62 # Print PATH variable | |
63 print('\n' + 'PATH variable:') | |
64 print(os.environ['PATH']) | |
65 | |
66 | |
67 def checkPrograms(programs_version_dictionary): | |
68 print('\n' + 'Checking dependencies...') | |
69 programs = programs_version_dictionary | |
70 which_program = ['which', ''] | |
71 listMissings = [] | |
72 for program in programs: | |
73 which_program[1] = program | |
74 run_successfully, stdout, stderr = run_command_popen_communicate(which_program, False, None, False) | |
75 if not run_successfully: | |
76 listMissings.append(program + ' not found in PATH.') | |
77 else: | |
78 print(stdout.splitlines()[0]) | |
79 if programs[program][0] is None: | |
80 print(program + ' (impossible to determine programme version) found at: ' + stdout.splitlines()[0]) | |
81 else: | |
82 if program.endswith('.jar'): | |
83 check_version = ['java', '-jar', stdout.splitlines()[0], programs[program][0]] | |
84 programs[program].append(stdout.splitlines()[0]) | |
85 else: | |
86 check_version = [stdout.splitlines()[0], programs[program][0]] | |
87 run_successfully, stdout, stderr = run_command_popen_communicate(check_version, False, None, False) | |
88 if stdout == '': | |
89 stdout = stderr | |
90 if program in ['wget', 'awk']: | |
91 version_line = stdout.splitlines()[0].split(' ', 3)[2] | |
92 elif program in ['prefetch', 'fastq-dump']: | |
93 version_line = stdout.splitlines()[1].split(' ')[-1] | |
94 else: | |
95 version_line = stdout.splitlines()[0].split(' ')[-1] | |
96 replace_characters = ['"', 'v', 'V', '+', ','] | |
97 for i in replace_characters: | |
98 version_line = version_line.replace(i, '') | |
99 print(program + ' (' + version_line + ') found') | |
100 if programs[program][1] == '>=': | |
101 program_found_version = version_line.split('.') | |
102 program_version_required = programs[program][2].split('.') | |
103 if len(program_version_required) == 3: | |
104 if len(program_found_version) == 2: | |
105 program_found_version.append(0) | |
106 else: | |
107 program_found_version[2] = program_found_version[2].split('_')[0] | |
108 for i in range(0, len(program_version_required)): | |
109 if int(program_found_version[i]) > int(program_version_required[i]): | |
110 break | |
111 elif int(program_found_version[i]) == int(program_version_required[i]): | |
112 continue | |
113 else: | |
114 listMissings.append('It is required ' + program + ' with version ' + | |
115 programs[program][1] + ' ' + programs[program][2]) | |
116 else: | |
117 if version_line != programs[program][2]: | |
118 listMissings.append('It is required ' + program + ' with version ' + programs[program][1] + | |
119 ' ' + programs[program][2]) | |
120 return listMissings | |
121 | |
122 | |
123 def requiredPrograms(asperaKey, downloadCramBam, SRA, SRAopt): | |
124 programs_version_dictionary = {} | |
125 programs_version_dictionary['wget'] = ['--version', '>=', '1.12'] | |
126 programs_version_dictionary['gzip'] = ['--version', '>=', '1.6'] | |
127 programs_version_dictionary['bowtie2'] = ['--version', '>=', '2.2.9'] | |
128 programs_version_dictionary['samtools'] = ['--version', '==', '1.3.1'] | |
129 programs_version_dictionary['bcftools'] = ['--version', '==', '1.3.1'] | |
130 if asperaKey is not None: | |
131 programs_version_dictionary['ascp'] = ['--version', '>=', '3.6.1'] | |
132 if SRA or SRAopt: | |
133 programs_version_dictionary['prefetch'] = ['--version', '>=', '2.8.2'] | |
134 programs_version_dictionary['fastq-dump'] = ['--version', '>=', '2.8.2'] | |
135 programs_version_dictionary['awk'] = ['--version', '>=', '3.0.4'] | |
136 missingPrograms = checkPrograms(programs_version_dictionary) | |
137 if len(missingPrograms) > 0: | |
138 sys.exit('\n' + 'Errors:' + '\n' + '\n'.join(missingPrograms)) | |
139 | |
140 | |
141 def general_information(logfile, version, outdir, time_str, doNotUseProvidedSoftware, asperaKey, downloadCramBam, SRA, SRAopt): | |
142 # Check if output directory exists | |
143 | |
144 print('\n' + '==========> ReMatCh <==========') | |
145 print('\n' + 'Program start: ' + time.ctime()) | |
146 | |
147 # Tells where the logfile will be stored | |
148 print('\n' + 'LOGFILE:') | |
149 print(logfile) | |
150 | |
151 # Print command | |
152 print('\n' + 'COMMAND:') | |
153 script_path = os.path.join(os.path.dirname(os.path.dirname(os.path.realpath(__file__))), 'rematch.py') | |
154 print(sys.executable + ' ' + ' '.join(sys.argv)) | |
155 | |
156 # Print directory where programme was lunch | |
157 print('\n' + 'PRESENT DIRECTORY:') | |
158 present_directory = os.path.abspath(os.getcwd()) | |
159 print(present_directory) | |
160 | |
161 # Print program version | |
162 print('\n' + 'VERSION:') | |
163 script_version_git(version, present_directory, script_path) | |
164 | |
165 # Get CPU information | |
166 get_cpu_information(outdir, time_str) | |
167 | |
168 # Set and print PATH variable | |
169 setPATHvariable(doNotUseProvidedSoftware, script_path) | |
170 | |
171 # Check programms | |
172 requiredPrograms(asperaKey, downloadCramBam, SRA, SRAopt) | |
173 | |
174 return script_path | |
175 | |
176 | |
177 def script_version_git(version, current_directory, script_path, no_git_info=False): | |
178 """ | |
179 Print script version and get GitHub commit information | |
180 | |
181 Parameters | |
182 ---------- | |
183 version : str | |
184 Version of the script, e.g. "4.0" | |
185 current_directory : str | |
186 Path to the directory where the script was start to run | |
187 script_path : str | |
188 Path to the script running | |
189 no_git_info : bool, default False | |
190 True if it is not necessary to retreive the GitHub commit information | |
191 | |
192 Returns | |
193 ------- | |
194 | |
195 """ | |
196 print('Version {}'.format(version)) | |
197 | |
198 if not no_git_info: | |
199 try: | |
200 os.chdir(os.path.dirname(os.path.dirname(script_path))) | |
201 command = ['git', 'log', '-1', '--date=local', '--pretty=format:"%h (%H) - Commit by %cn, %cd) : %s"'] | |
202 run_successfully, stdout, stderr = run_command_popen_communicate(command, False, 15, False) | |
203 print(stdout) | |
204 command = ['git', 'remote', 'show', 'origin'] | |
205 run_successfully, stdout, stderr = run_command_popen_communicate(command, False, 15, False) | |
206 print(stdout) | |
207 except: | |
208 print('HARMLESS WARNING: git command possibly not found. The GitHub repository information will not be' | |
209 ' obtained.') | |
210 finally: | |
211 os.chdir(current_directory) | |
212 | |
213 | |
214 def run_time(start_time): | |
215 end_time = time.time() | |
216 time_taken = end_time - start_time | |
217 hours, rest = divmod(time_taken, 3600) | |
218 minutes, seconds = divmod(rest, 60) | |
219 print('Runtime :' + str(hours) + 'h:' + str(minutes) + 'm:' + str(round(seconds, 2)) + 's') | |
220 return round(time_taken, 2) | |
221 | |
222 | |
223 def timer(function, name): | |
224 @functools_wraps(function) | |
225 def wrapper(*args, **kwargs): | |
226 print('\n' + 'RUNNING {0}\n'.format(name)) | |
227 start_time = time.time() | |
228 | |
229 results = list(function(*args, **kwargs)) # guarantees return is a list to allow .insert() | |
230 | |
231 time_taken = run_time(start_time) | |
232 print('END {0}'.format(name)) | |
233 | |
234 results.insert(0, time_taken) | |
235 return results | |
236 return wrapper | |
237 | |
238 | |
239 def remove_directory(directory): | |
240 if os.path.isdir(directory): | |
241 shutil.rmtree(directory) | |
242 | |
243 | |
244 def save_variable_to_pickle(variableToStore, outdir, prefix): | |
245 pickleFile = os.path.join(outdir, str(prefix + '.pkl')) | |
246 with open(pickleFile, 'wb') as writer: | |
247 pickle.dump(variableToStore, writer) | |
248 | |
249 | |
250 def extract_variable_from_pickle(pickleFile): | |
251 with open(pickleFile, 'rb') as reader: | |
252 variable = pickle.load(reader) | |
253 return variable | |
254 | |
255 | |
256 def trace_unhandled_exceptions(func): | |
257 @functools_wraps(func) | |
258 def wrapped_func(*args, **kwargs): | |
259 try: | |
260 func(*args, **kwargs) | |
261 except Exception as e: | |
262 print('Exception in ' + func.__name__) | |
263 print(e) | |
264 | |
265 exc_type, exc_value, exc_tb = sys.exc_info() | |
266 print(''.join(traceback_format_exception(exc_type, exc_value, exc_tb))) | |
267 | |
268 raise exc_type(exc_value) | |
269 | |
270 return wrapped_func | |
271 | |
272 | |
273 def kill_subprocess_Popen(subprocess_Popen, command): | |
274 print('Command run out of time: ' + str(command)) | |
275 subprocess_Popen.kill() | |
276 | |
277 | |
278 def run_command_popen_communicate(command, shell_True, timeout_sec_None, print_comand_True): | |
279 run_successfully = False | |
280 if not isinstance(command, str): | |
281 command = ' '.join(command) | |
282 command = shlex.split(command) | |
283 | |
284 if print_comand_True: | |
285 print('Running: ' + ' '.join(command)) | |
286 | |
287 if shell_True: | |
288 command = ' '.join(command) | |
289 proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) | |
290 else: | |
291 proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
292 | |
293 not_killed_by_timer = True | |
294 if timeout_sec_None is None: | |
295 stdout, stderr = proc.communicate() | |
296 else: | |
297 time_counter = Timer(timeout_sec_None, kill_subprocess_Popen, args=(proc, command,)) | |
298 time_counter.start() | |
299 stdout, stderr = proc.communicate() | |
300 time_counter.cancel() | |
301 not_killed_by_timer = time_counter.isAlive() | |
302 | |
303 if proc.returncode == 0: | |
304 run_successfully = True | |
305 else: | |
306 if not print_comand_True and not_killed_by_timer: | |
307 print('Running: ' + str(command)) | |
308 if len(stdout) > 0: | |
309 print('STDOUT') | |
310 print(stdout.decode("utf-8")) | |
311 if len(stderr) > 0: | |
312 print('STDERR') | |
313 print(stderr.decode("utf-8")) | |
314 return run_successfully, stdout.decode("utf-8"), stderr.decode("utf-8") | |
315 | |
316 | |
317 def rchop(string, ending): | |
318 if string.endswith(ending): | |
319 string = string[:-len(ending)] | |
320 return string | |
321 | |
322 | |
323 def reverse_complement(seq): | |
324 complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A', 'N': 'N'} | |
325 | |
326 reverse_complement_string = '' | |
327 | |
328 seq = reversed(list(seq.upper())) | |
329 | |
330 for base in seq: | |
331 reverse_complement_string += complement[base] | |
332 | |
333 return reverse_complement_string |