Mercurial > repos > iss > eurl_vtec_wgs_pt
comparison scripts/modules/typing.py @ 0:c6bab5103a14 draft
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
author | iss |
---|---|
date | Mon, 21 Mar 2022 15:23:09 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c6bab5103a14 |
---|---|
1 import os.path | |
2 import functools | |
3 | |
4 try: | |
5 import modules.utils as utils | |
6 except ImportError: | |
7 from pathotyping.modules import utils as utils | |
8 | |
9 | |
10 # {1: {'gene_identity': 0, 'gene_mean_read_coverage': 0.0, 'gene_number_positions_multiple_alleles': 0, 'header': 'fyuA', 'gene_coverage': 0.0, 'gene_low_coverage': 100.0}} | |
11 | |
12 | |
13 def simplify_data_by_gene(data_by_gene): | |
14 cleaned_data_by_gene = {} | |
15 for counter, data in list(data_by_gene.items()): | |
16 cleaned_data_by_gene[data['header'].lower()] = {'gene_identity': data['gene_identity'], | |
17 'gene_coverage': data['gene_coverage'], | |
18 'gene_depth': data['gene_mean_read_coverage']} | |
19 return cleaned_data_by_gene | |
20 | |
21 | |
22 def possible_types(data_by_gene, typing_rules_file, min_gene_coverage, min_gene_identity, min_gene_depth): | |
23 data_by_gene = simplify_data_by_gene(data_by_gene) | |
24 | |
25 possible_pathotypes = [] | |
26 genes_present = [] | |
27 with open(typing_rules_file, 'rtU') as reader: | |
28 genes = [] | |
29 for line in reader: | |
30 line = line.splitlines()[0] | |
31 if len(line) > 0: | |
32 line = line.split('\t') | |
33 if not line[0].startswith('##'): | |
34 if line[0].startswith('#'): | |
35 genes = line[1:] | |
36 else: | |
37 profile = line[1:] | |
38 congruence = [] | |
39 for x, gene_requirement in enumerate(profile): | |
40 if data_by_gene[genes[x].lower()]['gene_coverage'] >= min_gene_coverage and \ | |
41 data_by_gene[genes[x].lower()]['gene_identity'] >= min_gene_identity and \ | |
42 data_by_gene[genes[x].lower()]['gene_depth'] >= min_gene_depth: | |
43 gene_present = True | |
44 genes_present.append(genes[x]) | |
45 else: | |
46 gene_present = False | |
47 | |
48 gene_requirement = \ | |
49 True if gene_requirement == '1' else False if gene_requirement == '0' else None | |
50 if gene_requirement is not None: | |
51 if gene_present == gene_requirement: | |
52 congruence.append(True) | |
53 else: | |
54 congruence.append(False) | |
55 else: | |
56 congruence.append(True) | |
57 if all(congruence): | |
58 possible_pathotypes.append(line[0]) | |
59 return possible_pathotypes, list(set(genes_present)) | |
60 | |
61 | |
62 module_timer = functools.partial(utils.timer, name='Module Typing') | |
63 | |
64 | |
65 @module_timer | |
66 def typing(data_by_gene, typing_rules_file, min_gene_coverage, min_gene_identity, min_gene_depth, outdir): | |
67 possible_pathotypes, genes_present = possible_types(data_by_gene, typing_rules_file, min_gene_coverage, | |
68 min_gene_identity, min_gene_depth) | |
69 with open(os.path.join(outdir, 'patho_typing.report.txt'), 'wt') as writer_report: | |
70 with open(os.path.join(outdir, 'patho_typing.extended_report.txt'), 'wt') as writer_extended_report: | |
71 writer_extended_report.write('#Pathotypes_found' + '\n') | |
72 if len(possible_pathotypes) > 0: | |
73 writer_report.write('\n'.join(possible_pathotypes) + '\n') | |
74 writer_extended_report.write('\n'.join(possible_pathotypes) + '\n') | |
75 print('\n' + 'Pathotypes found:' + '\n') | |
76 print('\n'.join(possible_pathotypes) + '\n') | |
77 else: | |
78 writer_report.write('TND' + '\n') # Type Not Determined | |
79 writer_extended_report.write('TND' + '\n') # Type Not Determined | |
80 print('\n' + 'It was not possible to identify any possible pathotype match' + '\n') | |
81 writer_extended_report.write('\n' + '#Genes_present' + '\n') | |
82 writer_extended_report.write('\n'.join(genes_present) + '\n') | |
83 | |
84 return None, None |