Mercurial > repos > petr-novak > dante_ltr
comparison detect_putative_ltr_wrapper.py @ 13:559940c04c44 draft
"planemo upload commit 139c041f671459192beb10ae45a8b371367c23b6"
author | petr-novak |
---|---|
date | Thu, 11 Aug 2022 07:29:06 +0000 |
parents | ff01d4263391 |
children |
comparison
equal
deleted
inserted
replaced
12:ff01d4263391 | 13:559940c04c44 |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 """This wrapper is intended to be used on large genomes and large DANTE input to | 2 """This wrapper is intended to be used on large genomes and large DANTE input to |
3 minimize memory usage, It splits input files to pieces and analyze it on by one by | 3 minimize memory usage, It splits input files to pieces and analyze it on by one by |
4 detect_putative_ltr.R | 4 detect_putative_ltr.R |
5 If input does not exceed memory limit, it will run detect_putative_ltr.R directly | 5 If input does not exceed specified max-chunk_size, it will run detect_putative_ltr.R |
6 directly | |
6 """ | 7 """ |
7 | 8 |
8 import argparse | 9 import argparse |
9 import os | 10 import os |
10 import sys | 11 import sys |
11 import tempfile | 12 import tempfile |
12 from itertools import cycle | 13 from itertools import cycle |
13 import subprocess | 14 import subprocess |
14 | 15 |
15 | 16 |
17 class Gff3Feature: | |
18 """ | |
19 Class for gff3 feature | |
20 """ | |
21 | |
22 def __init__(self, line): | |
23 self.line = line | |
24 self.items = line.strip().split('\t') | |
25 self.header = self.items[0] | |
26 self.source = self.items[1] | |
27 self.type = self.items[2] | |
28 self.start = int(self.items[3]) | |
29 self.end = int(self.items[4]) | |
30 self.score = self.items[5] | |
31 self.strand = self.items[6] | |
32 self.frame = self.items[7] | |
33 self.attributes = self.items[8] | |
34 self.attributes_dict = {} | |
35 for item in self.attributes.split(';'): | |
36 if item != '': | |
37 key, value = item.split('=') | |
38 self.attributes_dict[key] = value | |
39 | |
40 self.attributes_str = ';'.join( | |
41 ['{}={}'.format(key, value) for key, value in self.attributes_dict.items()] | |
42 ) | |
43 | |
44 def __str__(self): | |
45 return '\t'.join( | |
46 [self.header, self.source, self.type, str(self.start), str(self.end), | |
47 self.score, self.strand, self.frame, self.attributes_str] | |
48 ) + '\n' | |
49 | |
50 def __repr__(self): | |
51 return '\t'.join( | |
52 [self.header, self.source, self.type, str(self.start), str(self.end), | |
53 self.score, self.strand, self.frame, self.attributes_str] | |
54 ) + '\n' | |
55 | |
56 def __eq__(self, other): | |
57 return self.line_recalculated() == other.line_recalculated() | |
58 | |
59 def __hash__(self): | |
60 return hash(self.line_recalculated()) | |
61 | |
62 def get_line(self): | |
63 """returns original line""" | |
64 return self.line | |
65 | |
66 def overlap(self, other): | |
67 """ | |
68 Check if two features overlap | |
69 :param other: | |
70 :return: | |
71 """ | |
72 if self.start <= other.end and self.end >= other.start: | |
73 return True | |
74 else: | |
75 return False | |
76 | |
77 def line_recalculated(self): | |
78 """ | |
79 :return: | |
80 string with recalculated line | |
81 """ | |
82 return '\t'.join( | |
83 [self.header, self.source, self.type, str(self.start), str(self.end), | |
84 self.score, self.strand, self.frame, self.attributes_str] | |
85 ) + '\n' | |
86 | |
87 def __lt__(self, other): | |
88 width = self.end - self.start | |
89 other_width = other.end - other.start | |
90 return width < other_width | |
91 | |
92 def __gt__(self, other): | |
93 width = self.end - self.start | |
94 other_width = other.end - other.start | |
95 return width > other_width | |
96 | |
97 def identical_region(self, other): | |
98 """ | |
99 Check if two features are identical | |
100 :param other: | |
101 :return: | |
102 """ | |
103 if self.start == other.start and self.end == other.end and self.header == \ | |
104 other.header: | |
105 return True | |
106 else: | |
107 return False | |
108 | |
109 | |
16 def get_arguments(): | 110 def get_arguments(): |
17 """ | 111 """ |
18 Get arguments from command line | 112 Get arguments from command line |
19 :return: | 113 :return: |
20 args | 114 args |
48 "considered for retrostransposon detection" | 142 "considered for retrostransposon detection" |
49 ) | 143 ) |
50 parser.add_argument( | 144 parser.add_argument( |
51 '-S', '--max_chunk_size', default=100000000, required=False, type=int, | 145 '-S', '--max_chunk_size', default=100000000, required=False, type=int, |
52 help='If size of reference sequence is greater than this value, reference is ' | 146 help='If size of reference sequence is greater than this value, reference is ' |
53 'analyzed in chunks of this size. This is just approximate value - ' | 147 'analyzed in chunks of this size. default is %(default)s' |
54 'sequences ' | 148 'Setting this value too small will slow down the analysis' |
55 'which are longer are are not split, default is %(default)s' | |
56 ) | 149 ) |
57 args = parser.parse_args() | 150 args = parser.parse_args() |
58 return args | 151 return args |
59 | 152 |
60 | 153 |
79 filepaths | 172 filepaths |
80 """ | 173 """ |
81 temp_files = [] | 174 temp_files = [] |
82 for i in range(number_of_files): | 175 for i in range(number_of_files): |
83 temp_files.append(tempfile.NamedTemporaryFile(delete=False).name) | 176 temp_files.append(tempfile.NamedTemporaryFile(delete=False).name) |
177 os.remove(temp_files[-1]) | |
84 return temp_files | 178 return temp_files |
85 | 179 |
86 | 180 |
87 def sum_up_stats_files(files): | 181 def sum_up_stats_files(files): |
88 """ | 182 """ |
110 statistics_str.append(classification + '\t' + '\t'.join([str(x) for x in counts])) | 204 statistics_str.append(classification + '\t' + '\t'.join([str(x) for x in counts])) |
111 sorted_stat_with_header = ['\t'.join(header)] + sorted(statistics_str) | 205 sorted_stat_with_header = ['\t'.join(header)] + sorted(statistics_str) |
112 return sorted_stat_with_header | 206 return sorted_stat_with_header |
113 | 207 |
114 | 208 |
209 def read_single_fasta_to_dictionary(fh): | |
210 """ | |
211 Read fasta file into dictionary | |
212 :param fh: | |
213 :return: | |
214 fasta_dict | |
215 """ | |
216 fasta_dict = {} | |
217 for line in fh: | |
218 if line[0] == '>': | |
219 header = line.strip().split(' ')[0][1:] # remove part of name after space | |
220 fasta_dict[header] = [] | |
221 else: | |
222 fasta_dict[header] += [line.strip()] | |
223 fasta_dict = {k: ''.join(v) for k, v in fasta_dict.items()} | |
224 return fasta_dict | |
225 | |
226 | |
227 def split_fasta_to_chunks(fasta_file, chunk_size=100000000, overlap=100000): | |
228 """ | |
229 Split fasta file to chunks, sequences longe than chuck size are split to overlaping | |
230 peaces. If sequences are shorter, chunck with multiple sequences are created. | |
231 :param fasta_file: | |
232 | |
233 :param fasta_file: | |
234 :param chunk_size: | |
235 :param overlap: | |
236 :return: | |
237 fasta_file_split | |
238 matching_table | |
239 """ | |
240 min_chunk_size = chunk_size * 2 | |
241 fasta_dict = read_fasta_sequence_size(fasta_file) | |
242 # calculates ranges for splitting of fasta files and store them in list | |
243 matching_table = [] | |
244 fasta_file_split = tempfile.NamedTemporaryFile(delete=False).name | |
245 for header, size in fasta_dict.items(): | |
246 if size > min_chunk_size: | |
247 number_of_chunks = int(size / chunk_size) | |
248 adjusted_chunk_size = int(size / number_of_chunks) | |
249 for i in range(number_of_chunks): | |
250 start = i * adjusted_chunk_size | |
251 end = ((i + 1) * | |
252 adjusted_chunk_size | |
253 + overlap) if i + 1 < number_of_chunks else size | |
254 new_header = header + '_' + str(i) | |
255 matching_table.append([header, i, start, end, new_header]) | |
256 else: | |
257 new_header = header + '_0' | |
258 matching_table.append([header, 0, 0, size, new_header]) | |
259 # read sequences from fasta files and split them to chunks according to matching table | |
260 # open output and input files, use with statement to close files | |
261 fasta_dict = read_single_fasta_to_dictionary(open(fasta_file, 'r')) | |
262 with open(fasta_file_split, 'w') as fh_out: | |
263 for header in fasta_dict: | |
264 matching_table_part = [x for x in matching_table if x[0] == header] | |
265 for header2, i, start, end, new_header in matching_table_part: | |
266 fh_out.write('>' + new_header + '\n') | |
267 fh_out.write(fasta_dict[header][start:end] + '\n') | |
268 return fasta_file_split, matching_table | |
269 | |
270 | |
271 def get_new_header_and_coordinates(header, start, end, matching_table): | |
272 """ | |
273 Get new header and coordinates for sequence | |
274 :param header: | |
275 :param start: | |
276 :param end: | |
277 :param matching_table: | |
278 :return: | |
279 new_header | |
280 new_start | |
281 new_end | |
282 """ | |
283 matching_table_part = [x for x in matching_table if x[0] == header] | |
284 new_coords = [] | |
285 for chunk in matching_table_part: | |
286 if chunk[2] <= start < chunk[3]: | |
287 new_header = chunk[4] | |
288 new_start = start - chunk[2] | |
289 new_end = end - chunk[2] | |
290 new_sequence_length = chunk[3] - chunk[2] | |
291 new_coords.append([new_header, new_start, new_end, new_sequence_length]) | |
292 return new_coords | |
293 | |
294 | |
295 def get_original_header_and_coordinates(new_header, new_start, new_end, matching_table): | |
296 """ | |
297 Get original header and coordinates for sequence | |
298 :param new_header: | |
299 :param new_start: | |
300 :param new_end: | |
301 :param matching_table: | |
302 :return: | |
303 original_header | |
304 original_start | |
305 original_end | |
306 """ | |
307 matching_table_part = [x for x in matching_table if x[4] == new_header] | |
308 ori_header = matching_table_part[0][0] | |
309 start = matching_table_part[0][2] | |
310 ori_start = new_start + start | |
311 ori_end = new_end + start | |
312 return ori_header, ori_start, ori_end | |
313 | |
314 | |
315 # recalculate gff3 coordinates, use gff3_feature class | |
316 def recalculate_gff3_coordinates(gff3_file, matching_table): | |
317 """ | |
318 Recalculate gff3 coordinates, use gff3_feature class | |
319 :param gff3_file: | |
320 :param matching_table: | |
321 :return: | |
322 gff3_file_recalculated | |
323 """ | |
324 gff3_file_recalculated = tempfile.NamedTemporaryFile(delete=False).name | |
325 | |
326 with open(gff3_file, 'r') as fh_in: | |
327 with open(gff3_file_recalculated, 'w') as fh_out: | |
328 for line in fh_in: | |
329 if line[0] == '#': | |
330 fh_out.write(line) | |
331 else: | |
332 feature = Gff3Feature(line) | |
333 new_coords = get_new_header_and_coordinates( | |
334 feature.header, feature.start, feature.end, matching_table | |
335 ) | |
336 for new_header, new_start, new_end, sequence_length in new_coords: | |
337 if new_start >= 1 and new_end <= sequence_length: | |
338 feature.header = new_header | |
339 feature.start = new_start | |
340 feature.end = new_end | |
341 fh_out.write(str(feature)) | |
342 return gff3_file_recalculated | |
343 | |
344 | |
345 # recalculate gff3 back to original coordinates, use gff3_feature class | |
346 def recalculate_gff3_back_to_original_coordinates(gff3_file, matching_table): | |
347 """ | |
348 Recalculate gff3 back to original coordinates, use gff3_feature class | |
349 :param gff3_file: | |
350 :param matching_table: | |
351 :return: | |
352 gff3_file_recalculated | |
353 """ | |
354 gff3_file_recalculated = tempfile.NamedTemporaryFile(delete=False).name | |
355 with open(gff3_file, 'r') as fh_in: | |
356 with open(gff3_file_recalculated, 'w') as fh_out: | |
357 for line in fh_in: | |
358 if line[0] == '#': | |
359 fh_out.write(line) | |
360 else: | |
361 feature = Gff3Feature(line) | |
362 ori_header, ori_start, ori_end = get_original_header_and_coordinates( | |
363 feature.header, feature.start, feature.end, matching_table | |
364 ) | |
365 feature.header = ori_header | |
366 feature.start = ori_start | |
367 feature.end = ori_end | |
368 fh_out.write(str(feature)) | |
369 return gff3_file_recalculated | |
370 | |
371 | |
372 def get_feature_attributes(line): | |
373 """ | |
374 Get attributes as dictionary from gff3 list | |
375 :param line: | |
376 :return: | |
377 attributes_dict | |
378 """ | |
379 attributes_dict = {} | |
380 for item in line[8].split(';'): | |
381 if item.strip(): | |
382 key, value = item.strip().split('=') | |
383 attributes_dict[key] = value | |
384 return attributes_dict | |
385 | |
386 | |
387 def get_unique_features(gff3_file): | |
388 """ | |
389 return list of ID of non-ovelaping features. | |
390 | |
391 :param gff3_file: | |
392 :return: | |
393 duplicated_features | |
394 """ | |
395 good_id = [] | |
396 feature_list = [] | |
397 with open(gff3_file, 'r') as fh: | |
398 for line in fh: | |
399 if line[0] == '#': | |
400 continue | |
401 feature = Gff3Feature(line) | |
402 if feature.type != 'transposable_element': | |
403 continue | |
404 feature_list.append(feature) # sort by start position and header | |
405 feature_list.sort(key=lambda x: (x.start, x.header)) | |
406 i = 0 | |
407 while i < len(feature_list) - 1: | |
408 ch = feature_list[i].header == feature_list[i + 1].header | |
409 if not ch: | |
410 good_id.append(feature_list[i].attributes_dict['ID']) | |
411 i += 1 | |
412 continue | |
413 if feature_list[i].identical_region(feature_list[i + 1]): | |
414 # identical position | |
415 good_id.append(feature_list[i].attributes_dict['ID']) | |
416 i += 2 | |
417 continue | |
418 if feature_list[i].overlap(feature_list[i + 1]): | |
419 # overlap | |
420 if feature_list[i] > feature_list[i + 1]: | |
421 good_id.append(feature_list[i].attributes_dict['ID']) | |
422 i += 2 | |
423 continue | |
424 else: | |
425 good_id.append(feature_list[i + 1].attributes_dict['ID']) | |
426 i += 2 | |
427 continue | |
428 else: | |
429 good_id.append(feature_list[i].attributes_dict['ID']) | |
430 i += 1 | |
431 if i == len(feature_list) - 1: | |
432 good_id.append(feature_list[i].attributes_dict['ID']) | |
433 return good_id | |
434 | |
435 | |
436 def filter_gff3_file(gff3_file, good_id, output_file): | |
437 """ | |
438 Filter gff3 file by good_id | |
439 :param gff3_file: | |
440 :param good_id: | |
441 :param output_file: | |
442 :return: | |
443 filtered_gff3_file | |
444 """ | |
445 | |
446 with open(gff3_file, 'r') as fh, open(output_file, 'w') as fout: | |
447 for line in fh: | |
448 if line[0] == '#': | |
449 fout.write(line) | |
450 else: | |
451 feature = Gff3Feature(line) | |
452 if ('ID' in feature.attributes_dict and | |
453 feature.attributes_dict['ID'] in good_id): | |
454 fout.write(line) | |
455 continue | |
456 if 'Parent' in feature.attributes_dict: | |
457 if feature.attributes_dict['Parent'] in good_id: | |
458 fout.write(line) | |
459 continue | |
460 | |
461 | |
115 def main(): | 462 def main(): |
116 """ | 463 """ |
117 Main function | 464 Main function |
118 """ | 465 """ |
119 args = get_arguments() | 466 args = get_arguments() |
120 # locate directory of current script | 467 # locate directory of current script |
121 tool_path = os.path.dirname(os.path.realpath(__file__)) | 468 tool_path = os.path.dirname(os.path.realpath(__file__)) |
122 fasta_seq_size = read_fasta_sequence_size(args.reference_sequence) | 469 # split fasta file to chunks |
470 fasta_file_split, matching_table = split_fasta_to_chunks( | |
471 args.reference_sequence, chunk_size=args.max_chunk_size | |
472 ) | |
473 # recalculate gff3 coordinates | |
474 gff3_file_recalculated = recalculate_gff3_coordinates(args.gff3, matching_table) | |
475 | |
476 fasta_seq_size = read_fasta_sequence_size(fasta_file_split) | |
123 total_size = sum(fasta_seq_size.values()) | 477 total_size = sum(fasta_seq_size.values()) |
124 number_of_sequences = len(fasta_seq_size) | 478 number_of_sequences = len(fasta_seq_size) |
125 if total_size > args.max_chunk_size and number_of_sequences > 1: | 479 if total_size > args.max_chunk_size and number_of_sequences > 1: |
480 print('running analysis on chunks of ~ {} Mb'.format(args.max_chunk_size)) | |
126 # sort dictionary by values | 481 # sort dictionary by values |
127 seq_id_size_sorted = [i[0] for i in sorted( | 482 seq_id_size_sorted = [i[0] for i in sorted( |
128 fasta_seq_size.items(), key=lambda x: int(x[1]), reverse=True | 483 fasta_seq_size.items(), key=lambda x: int(x[1]), reverse=True |
129 )] | 484 )] |
130 number_of_temp_files = int(total_size / args.max_chunk_size) + 1 | 485 number_of_temp_files = int(total_size / args.max_chunk_size) + 1 |
135 file_handles = [open(temp_file, 'w') for temp_file in temp_files_fasta] | 490 file_handles = [open(temp_file, 'w') for temp_file in temp_files_fasta] |
136 # make dictionary seq_id_sorted as keys and values as file handles | 491 # make dictionary seq_id_sorted as keys and values as file handles |
137 seq_id_file_handle_dict = dict(zip(seq_id_size_sorted, cycle(file_handles))) | 492 seq_id_file_handle_dict = dict(zip(seq_id_size_sorted, cycle(file_handles))) |
138 | 493 |
139 # write sequences to temporary files | 494 # write sequences to temporary files |
140 with open(args.reference_sequence, 'r') as f: | 495 with open(fasta_file_split, 'r') as f: |
141 for line in f: | 496 for line in f: |
142 if line[0] == '>': | 497 if line[0] == '>': |
143 header = line.strip().split(' ')[0][1:] | 498 header = line.strip().split(' ')[0][1:] |
144 print(header) | |
145 seq_id_file_handle_dict[header].write(line) | 499 seq_id_file_handle_dict[header].write(line) |
146 else: | 500 else: |
147 seq_id_file_handle_dict[header].write(line) | 501 seq_id_file_handle_dict[header].write(line) |
502 os.remove(fasta_file_split) | |
148 # close file handles | 503 # close file handles |
149 for file_handle in file_handles: | 504 for file_handle in file_handles: |
150 file_handle.close() | 505 file_handle.close() |
151 | 506 |
152 # split gff3 file to temporary files - | 507 # split gff3 file to temporary files - |
154 temp_files_gff = make_temp_files(number_of_temp_files) | 509 temp_files_gff = make_temp_files(number_of_temp_files) |
155 file_handles = [open(temp_file, 'w') for temp_file in temp_files_gff] | 510 file_handles = [open(temp_file, 'w') for temp_file in temp_files_gff] |
156 # make dictionary seq_id_sorted as keys and values as file handles | 511 # make dictionary seq_id_sorted as keys and values as file handles |
157 seq_id_file_handle_dict = dict(zip(seq_id_size_sorted, cycle(file_handles))) | 512 seq_id_file_handle_dict = dict(zip(seq_id_size_sorted, cycle(file_handles))) |
158 # write gff lines to chunks | 513 # write gff lines to chunks |
159 with open(args.gff3, 'r') as f: | 514 with open(gff3_file_recalculated, 'r') as f: |
160 for line in f: | 515 for line in f: |
161 if line[0] == '#': | 516 if line[0] == '#': |
162 continue | 517 continue |
163 else: | 518 else: |
164 header = line.strip().split('\t')[0] | 519 header = line.strip().split('\t')[0] |
165 seq_id_file_handle_dict[header].write(line) | 520 seq_id_file_handle_dict[header].write(line) |
166 # close file handles | 521 # close file handles |
167 for file_handle in file_handles: | 522 for file_handle in file_handles: |
168 file_handle.close() | 523 file_handle.close() |
169 | 524 os.remove(gff3_file_recalculated) |
170 # run retrotransposon detection on each temporary file | 525 # run retrotransposon detection on each temporary file |
171 output_files = make_temp_files(number_of_temp_files) | 526 output_files = make_temp_files(number_of_temp_files) |
172 for i in range(number_of_temp_files): | 527 for i in range(number_of_temp_files): |
173 print('Running retrotransposon detection on file ' + str(i)) | 528 print('Running retrotransposon detection on file ' + str(i)) |
174 subprocess.check_call( | 529 subprocess.check_call( |
175 [f'{tool_path}/detect_putative_ltr.R', '-s', temp_files_fasta[i], '-g', | 530 [f'{tool_path}/detect_putative_ltr.R', '-s', temp_files_fasta[i], '-g', |
176 temp_files_gff[i], '-o', output_files[i], '-c', str(args.cpu), '-M', | 531 temp_files_gff[i], '-o', output_files[i], '-c', str(args.cpu), '-M', |
177 str(args.max_missing_domains), '-L', str(args.min_relative_length)] | 532 str(args.max_missing_domains), '-L', str(args.min_relative_length)] |
178 ) | 533 ) |
179 | 534 |
180 # remove all temporary input files | 535 #remove all temporary input files |
181 for temp_file in temp_files_fasta + temp_files_gff: | 536 for temp_file in temp_files_fasta + temp_files_gff: |
182 os.remove(temp_file) | 537 os.remove(temp_file) |
183 | 538 |
184 # concatenate output files | 539 # concatenate output files |
185 output_file_suffixes = ['_D.fasta', '_DL.fasta', '_DLT.fasta', '_DLTP.fasta', | 540 output_file_suffixes = ['_D.fasta', '_DL.fasta', '_DLT.fasta', '_DLTP.fasta', |
193 with open(args.output + suffix, 'w') as f: | 548 with open(args.output + suffix, 'w') as f: |
194 f.write("\n".join(new_statistics)) | 549 f.write("\n".join(new_statistics)) |
195 # remove parsed temporary statistics files | 550 # remove parsed temporary statistics files |
196 for file in stat_files: | 551 for file in stat_files: |
197 os.remove(file) | 552 os.remove(file) |
553 elif suffix == '.gff3': | |
554 tmp_gff_unfiltered = tempfile.NamedTemporaryFile(delete=False).name | |
555 with open(tmp_gff_unfiltered, 'w') as f: | |
556 for i in range(number_of_temp_files): | |
557 tmp_gff = recalculate_gff3_back_to_original_coordinates( | |
558 output_files[i] + suffix, matching_table | |
559 ) | |
560 # remove temporary gff3 file | |
561 os.remove(output_files[i] + suffix) | |
562 with open(tmp_gff, 'r') as f_tmp: | |
563 for line in f_tmp: | |
564 f.write(line) | |
565 os.remove(tmp_gff) | |
566 # filter overlapping features | |
567 good_id = get_unique_features(tmp_gff_unfiltered) | |
568 filter_gff3_file( | |
569 tmp_gff_unfiltered, good_id, args.output + suffix | |
570 ) | |
571 # remove temporary gff3 file | |
572 os.remove(tmp_gff_unfiltered) | |
198 else: | 573 else: |
199 with open(args.output + suffix, 'w') as f: | 574 with open(args.output + suffix, 'w') as f: |
200 for i in range(number_of_temp_files): | 575 for i in range(number_of_temp_files): |
201 # some file may not exist, so we need to check | 576 # some file may not exist, so we need to check |
202 try: | 577 try: |
203 with open(output_files[i] + suffix, 'r') as g: | 578 with open(output_files[i] + suffix, 'r') as g: |
204 for line in g: | 579 for line in g: |
205 f.write(line) | 580 f.write(line) |
206 # remove parsed temporary output files | 581 # remove parsed temporary output files |
207 os.remove(output_files[i]) | 582 os.remove(output_files[i] + suffix) |
208 except FileNotFoundError: | 583 except FileNotFoundError: |
209 pass | 584 pass |
210 else: | 585 else: |
586 print('running analysis on whole input') | |
211 # no need to split sequences into chunks | 587 # no need to split sequences into chunks |
212 subprocess.check_call( | 588 subprocess.check_call( |
213 [f'{tool_path}/detect_putative_ltr.R', '-s', args.reference_sequence, '-g', | 589 [f'{tool_path}/detect_putative_ltr.R', '-s', args.reference_sequence, '-g', |
214 args.gff3, '-o', args.output, '-c', str(args.cpu), '-M', | 590 args.gff3, '-o', args.output, '-c', str(args.cpu), '-M', |
215 str(args.max_missing_domains), '-L', str(args.min_relative_length)] | 591 str(args.max_missing_domains), '-L', str(args.min_relative_length)] |