comparison scripts/ReMatCh/modules/utils.py @ 0:c6bab5103a14 draft

"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
author iss
date Mon, 21 Mar 2022 15:23:09 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:c6bab5103a14
1 import pickle
2 from traceback import format_exception as traceback_format_exception
3 import shlex
4 import subprocess
5 from threading import Timer
6 import shutil
7 import time
8 from functools import wraps as functools_wraps
9 import os.path
10 import sys
11
12
13 def start_logger(workdir):
14 time_str = time.strftime("%Y%m%d-%H%M%S")
15 sys.stdout = Logger(workdir, time_str)
16 logfile = sys.stdout.getLogFile()
17 return logfile, time_str
18
19
20 class Logger(object):
21 def __init__(self, out_directory, time_str):
22 self.logfile = os.path.join(out_directory, str('run.' + time_str + '.log'))
23 self.terminal = sys.stdout
24 self.log = open(self.logfile, "w")
25
26 def write(self, message):
27 self.terminal.write(message)
28 self.log.write(message)
29 self.log.flush()
30
31 def flush(self):
32 pass
33
34 def getLogFile(self):
35 return self.logfile
36
37
38 def get_cpu_information(outdir, time_str):
39 with open(os.path.join(outdir, 'cpu_information.' + time_str + '.cpu.txt'), 'wt') as writer:
40 command = ['cat', '/proc/cpuinfo']
41 run_successfully, stdout, stderr = run_command_popen_communicate(command, False, None, False)
42 if run_successfully:
43 writer.write(stdout)
44
45 with open(os.path.join(outdir, 'cpu_information.' + time_str + '.slurm.txt'), 'wt') as writer:
46 for environment in sorted(os.environ):
47 if environment.startswith('SLURM_'):
48 writer.write('#' + environment + '\n' + os.environ[environment] + '\n')
49
50
51 def setPATHvariable(doNotUseProvidedSoftware, script_path):
52 path_variable = os.environ['PATH']
53 script_folder = os.path.dirname(script_path)
54 # Set path to use provided softwares
55 if not doNotUseProvidedSoftware:
56 bowtie2 = os.path.join(script_folder, 'src', 'bowtie2-2.2.9')
57 samtools = os.path.join(script_folder, 'src', 'samtools-1.3.1', 'bin')
58 bcftools = os.path.join(script_folder, 'src', 'bcftools-1.3.1', 'bin')
59
60 os.environ['PATH'] = str(':'.join([bowtie2, samtools, bcftools, path_variable]))
61
62 # Print PATH variable
63 print('\n' + 'PATH variable:')
64 print(os.environ['PATH'])
65
66
67 def checkPrograms(programs_version_dictionary):
68 print('\n' + 'Checking dependencies...')
69 programs = programs_version_dictionary
70 which_program = ['which', '']
71 listMissings = []
72 for program in programs:
73 which_program[1] = program
74 run_successfully, stdout, stderr = run_command_popen_communicate(which_program, False, None, False)
75 if not run_successfully:
76 listMissings.append(program + ' not found in PATH.')
77 else:
78 print(stdout.splitlines()[0])
79 if programs[program][0] is None:
80 print(program + ' (impossible to determine programme version) found at: ' + stdout.splitlines()[0])
81 else:
82 if program.endswith('.jar'):
83 check_version = ['java', '-jar', stdout.splitlines()[0], programs[program][0]]
84 programs[program].append(stdout.splitlines()[0])
85 else:
86 check_version = [stdout.splitlines()[0], programs[program][0]]
87 run_successfully, stdout, stderr = run_command_popen_communicate(check_version, False, None, False)
88 if stdout == '':
89 stdout = stderr
90 if program in ['wget', 'awk']:
91 version_line = stdout.splitlines()[0].split(' ', 3)[2]
92 elif program in ['prefetch', 'fastq-dump']:
93 version_line = stdout.splitlines()[1].split(' ')[-1]
94 else:
95 version_line = stdout.splitlines()[0].split(' ')[-1]
96 replace_characters = ['"', 'v', 'V', '+', ',']
97 for i in replace_characters:
98 version_line = version_line.replace(i, '')
99 print(program + ' (' + version_line + ') found')
100 if programs[program][1] == '>=':
101 program_found_version = version_line.split('.')
102 program_version_required = programs[program][2].split('.')
103 if len(program_version_required) == 3:
104 if len(program_found_version) == 2:
105 program_found_version.append(0)
106 else:
107 program_found_version[2] = program_found_version[2].split('_')[0]
108 for i in range(0, len(program_version_required)):
109 if int(program_found_version[i]) > int(program_version_required[i]):
110 break
111 elif int(program_found_version[i]) == int(program_version_required[i]):
112 continue
113 else:
114 listMissings.append('It is required ' + program + ' with version ' +
115 programs[program][1] + ' ' + programs[program][2])
116 else:
117 if version_line != programs[program][2]:
118 listMissings.append('It is required ' + program + ' with version ' + programs[program][1] +
119 ' ' + programs[program][2])
120 return listMissings
121
122
123 def requiredPrograms(asperaKey, downloadCramBam, SRA, SRAopt):
124 programs_version_dictionary = {}
125 programs_version_dictionary['wget'] = ['--version', '>=', '1.12']
126 programs_version_dictionary['gzip'] = ['--version', '>=', '1.6']
127 programs_version_dictionary['bowtie2'] = ['--version', '>=', '2.2.9']
128 programs_version_dictionary['samtools'] = ['--version', '==', '1.3.1']
129 programs_version_dictionary['bcftools'] = ['--version', '==', '1.3.1']
130 if asperaKey is not None:
131 programs_version_dictionary['ascp'] = ['--version', '>=', '3.6.1']
132 if SRA or SRAopt:
133 programs_version_dictionary['prefetch'] = ['--version', '>=', '2.8.2']
134 programs_version_dictionary['fastq-dump'] = ['--version', '>=', '2.8.2']
135 programs_version_dictionary['awk'] = ['--version', '>=', '3.0.4']
136 missingPrograms = checkPrograms(programs_version_dictionary)
137 if len(missingPrograms) > 0:
138 sys.exit('\n' + 'Errors:' + '\n' + '\n'.join(missingPrograms))
139
140
141 def general_information(logfile, version, outdir, time_str, doNotUseProvidedSoftware, asperaKey, downloadCramBam, SRA, SRAopt):
142 # Check if output directory exists
143
144 print('\n' + '==========> ReMatCh <==========')
145 print('\n' + 'Program start: ' + time.ctime())
146
147 # Tells where the logfile will be stored
148 print('\n' + 'LOGFILE:')
149 print(logfile)
150
151 # Print command
152 print('\n' + 'COMMAND:')
153 script_path = os.path.join(os.path.dirname(os.path.dirname(os.path.realpath(__file__))), 'rematch.py')
154 print(sys.executable + ' ' + ' '.join(sys.argv))
155
156 # Print directory where programme was lunch
157 print('\n' + 'PRESENT DIRECTORY:')
158 present_directory = os.path.abspath(os.getcwd())
159 print(present_directory)
160
161 # Print program version
162 print('\n' + 'VERSION:')
163 script_version_git(version, present_directory, script_path)
164
165 # Get CPU information
166 get_cpu_information(outdir, time_str)
167
168 # Set and print PATH variable
169 setPATHvariable(doNotUseProvidedSoftware, script_path)
170
171 # Check programms
172 requiredPrograms(asperaKey, downloadCramBam, SRA, SRAopt)
173
174 return script_path
175
176
177 def script_version_git(version, current_directory, script_path, no_git_info=False):
178 """
179 Print script version and get GitHub commit information
180
181 Parameters
182 ----------
183 version : str
184 Version of the script, e.g. "4.0"
185 current_directory : str
186 Path to the directory where the script was start to run
187 script_path : str
188 Path to the script running
189 no_git_info : bool, default False
190 True if it is not necessary to retreive the GitHub commit information
191
192 Returns
193 -------
194
195 """
196 print('Version {}'.format(version))
197
198 if not no_git_info:
199 try:
200 os.chdir(os.path.dirname(os.path.dirname(script_path)))
201 command = ['git', 'log', '-1', '--date=local', '--pretty=format:"%h (%H) - Commit by %cn, %cd) : %s"']
202 run_successfully, stdout, stderr = run_command_popen_communicate(command, False, 15, False)
203 print(stdout)
204 command = ['git', 'remote', 'show', 'origin']
205 run_successfully, stdout, stderr = run_command_popen_communicate(command, False, 15, False)
206 print(stdout)
207 except:
208 print('HARMLESS WARNING: git command possibly not found. The GitHub repository information will not be'
209 ' obtained.')
210 finally:
211 os.chdir(current_directory)
212
213
214 def run_time(start_time):
215 end_time = time.time()
216 time_taken = end_time - start_time
217 hours, rest = divmod(time_taken, 3600)
218 minutes, seconds = divmod(rest, 60)
219 print('Runtime :' + str(hours) + 'h:' + str(minutes) + 'm:' + str(round(seconds, 2)) + 's')
220 return round(time_taken, 2)
221
222
223 def timer(function, name):
224 @functools_wraps(function)
225 def wrapper(*args, **kwargs):
226 print('\n' + 'RUNNING {0}\n'.format(name))
227 start_time = time.time()
228
229 results = list(function(*args, **kwargs)) # guarantees return is a list to allow .insert()
230
231 time_taken = run_time(start_time)
232 print('END {0}'.format(name))
233
234 results.insert(0, time_taken)
235 return results
236 return wrapper
237
238
239 def remove_directory(directory):
240 if os.path.isdir(directory):
241 shutil.rmtree(directory)
242
243
244 def save_variable_to_pickle(variableToStore, outdir, prefix):
245 pickleFile = os.path.join(outdir, str(prefix + '.pkl'))
246 with open(pickleFile, 'wb') as writer:
247 pickle.dump(variableToStore, writer)
248
249
250 def extract_variable_from_pickle(pickleFile):
251 with open(pickleFile, 'rb') as reader:
252 variable = pickle.load(reader)
253 return variable
254
255
256 def trace_unhandled_exceptions(func):
257 @functools_wraps(func)
258 def wrapped_func(*args, **kwargs):
259 try:
260 func(*args, **kwargs)
261 except Exception as e:
262 print('Exception in ' + func.__name__)
263 print(e)
264
265 exc_type, exc_value, exc_tb = sys.exc_info()
266 print(''.join(traceback_format_exception(exc_type, exc_value, exc_tb)))
267
268 raise exc_type(exc_value)
269
270 return wrapped_func
271
272
273 def kill_subprocess_Popen(subprocess_Popen, command):
274 print('Command run out of time: ' + str(command))
275 subprocess_Popen.kill()
276
277
278 def run_command_popen_communicate(command, shell_True, timeout_sec_None, print_comand_True):
279 run_successfully = False
280 if not isinstance(command, str):
281 command = ' '.join(command)
282 command = shlex.split(command)
283
284 if print_comand_True:
285 print('Running: ' + ' '.join(command))
286
287 if shell_True:
288 command = ' '.join(command)
289 proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
290 else:
291 proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
292
293 not_killed_by_timer = True
294 if timeout_sec_None is None:
295 stdout, stderr = proc.communicate()
296 else:
297 time_counter = Timer(timeout_sec_None, kill_subprocess_Popen, args=(proc, command,))
298 time_counter.start()
299 stdout, stderr = proc.communicate()
300 time_counter.cancel()
301 not_killed_by_timer = time_counter.isAlive()
302
303 if proc.returncode == 0:
304 run_successfully = True
305 else:
306 if not print_comand_True and not_killed_by_timer:
307 print('Running: ' + str(command))
308 if len(stdout) > 0:
309 print('STDOUT')
310 print(stdout.decode("utf-8"))
311 if len(stderr) > 0:
312 print('STDERR')
313 print(stderr.decode("utf-8"))
314 return run_successfully, stdout.decode("utf-8"), stderr.decode("utf-8")
315
316
317 def rchop(string, ending):
318 if string.endswith(ending):
319 string = string[:-len(ending)]
320 return string
321
322
323 def reverse_complement(seq):
324 complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A', 'N': 'N'}
325
326 reverse_complement_string = ''
327
328 seq = reversed(list(seq.upper()))
329
330 for base in seq:
331 reverse_complement_string += complement[base]
332
333 return reverse_complement_string