|
493
|
1 #!/usr/bin/env python3
|
|
|
2 """
|
|
|
3 Comprehensive test suite for GPR translation functionality in COBRAxy.
|
|
|
4
|
|
|
5 This test suite covers:
|
|
|
6 - Basic 1:1, 1:many, many:1 gene mappings
|
|
|
7 - Complex GPR expressions with AND/OR logic
|
|
|
8 - Translation issues tracking
|
|
|
9 - OR-only GPR flattening functionality
|
|
|
10 - Edge cases and nested expressions
|
|
|
11 - Statistical reporting
|
|
|
12 """
|
|
|
13
|
|
|
14 import cobra
|
|
|
15 import pandas as pd
|
|
|
16 import sys
|
|
|
17 import os
|
|
|
18 import logging
|
|
|
19 from typing import Dict, List, Tuple
|
|
|
20 import re
|
|
|
21
|
|
|
22 # Add the COBRAxy utils directory to the path
|
|
|
23 sys.path.append('/hdd/home/flapi/COBRAxy')
|
|
|
24 from utils import model_utils
|
|
|
25
|
|
|
26 # Configure logging
|
|
|
27 logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
|
|
|
28 logger = logging.getLogger(__name__)
|
|
|
29
|
|
|
30 class GPRTranslationTester:
|
|
|
31 """Comprehensive GPR translation test suite"""
|
|
|
32
|
|
|
33 def __init__(self):
|
|
|
34 self.test_results = {}
|
|
|
35 self.failed_tests = []
|
|
|
36
|
|
|
37 def create_comprehensive_test_model(self) -> cobra.Model:
|
|
|
38 """Create a comprehensive test model with diverse GPR patterns"""
|
|
|
39 model = cobra.Model('comprehensive_test_model')
|
|
|
40
|
|
|
41 # Create metabolites
|
|
|
42 metabolites = []
|
|
|
43 for i in range(30):
|
|
|
44 met = cobra.Metabolite(f'met_{chr(65+i%26)}{i//26}', compartment='c')
|
|
|
45 metabolites.append(met)
|
|
|
46
|
|
|
47 reactions_data = [
|
|
|
48 # === BASIC CASES ===
|
|
|
49 ('BASIC_1to1', 'GENE1', 0, 1), # Simple 1:1 mapping
|
|
|
50 ('BASIC_1tomany', 'GENE2', 1, 2), # 1:many mapping
|
|
|
51 ('BASIC_manyto1', 'GENE3', 2, 3), # many:1 mapping
|
|
|
52 ('BASIC_unmapped', 'UNMAPPED_GENE', 3, 4), # unmapped gene
|
|
|
53
|
|
|
54 # === SIMPLE OR CASES (candidates for flattening) ===
|
|
|
55 ('OR_simple', 'GENE4 or GENE5', 4, 5), # Simple OR with many:1
|
|
|
56 ('OR_three', 'GENE6 or GENE7 or GENE8', 5, 6), # Three genes OR
|
|
|
57 ('OR_parentheses', '(GENE9 or GENE10)', 6, 7), # OR with parentheses
|
|
|
58 ('OR_duplicates', 'GENE11 or GENE12 or GENE11', 7, 8), # OR with duplicates after translation
|
|
|
59
|
|
|
60 # === COMPLEX OR CASES (candidates for flattening) ===
|
|
|
61 ('OR_nested_simple', '(GENE13 or GENE14) or (GENE15 or GENE16)', 8, 9), # Nested OR only
|
|
|
62 ('OR_many_parentheses', '((GENE17 or GENE18) or GENE19) or GENE20', 9, 10), # Multiple nesting levels
|
|
|
63 ('OR_mixed_mapping', 'GENE21 or GENE22 or GENE23', 10, 11), # Mixed 1:1, 1:many, many:1
|
|
|
64
|
|
|
65 # === AND CASES (should NOT be flattened) ===
|
|
|
66 ('AND_simple', 'GENE24 and GENE25', 11, 12), # Simple AND
|
|
|
67 ('AND_complex', '(GENE26 and GENE27) and GENE28', 12, 13), # Complex AND
|
|
|
68
|
|
|
69 # === MIXED AND/OR (should NOT be flattened) ===
|
|
|
70 ('MIXED_basic', 'GENE29 and (GENE30 or GENE31)', 13, 14), # AND with OR
|
|
|
71 ('MIXED_complex', '(GENE32 or GENE33) and (GENE34 or GENE35)', 14, 15), # OR and AND
|
|
|
72 ('MIXED_nested', '((GENE36 and GENE37) or GENE38) and GENE39', 15, 16), # Complex nesting
|
|
|
73
|
|
|
74 # === EDGE CASES ===
|
|
|
75 ('EDGE_single', 'GENE40', 16, 17), # Single gene
|
|
|
76 ('EDGE_empty', '', 17, 18), # Empty GPR
|
|
|
77 ('EDGE_whitespace', ' GENE41 or GENE42 ', 18, 19), # Whitespace
|
|
|
78 ('EDGE_case_sensitive', 'Gene43 OR gene44', 19, 20), # Case variations
|
|
|
79
|
|
|
80 # === STRESS TESTS ===
|
|
|
81 ('STRESS_long_or', 'GENE45 or GENE46 or GENE47 or GENE48 or GENE49 or GENE50', 20, 21), # Long OR chain
|
|
|
82 ('STRESS_deep_nest', '(((GENE51 or GENE52) or GENE53) or GENE54)', 21, 22), # Deep nesting
|
|
|
83 ('STRESS_complex', '(GENE55 or (GENE56 or GENE57)) or ((GENE58 or GENE59) or GENE60)', 22, 23), # Complex structure
|
|
|
84
|
|
|
85 # === TRANSLATION ISSUE TRIGGERS ===
|
|
|
86 ('ISSUE_1many_or', 'GENE61 or GENE62', 23, 24), # 1:many in OR (should be flattened)
|
|
|
87 ('ISSUE_manyto1_and', 'GENE63 and GENE64', 24, 25), # many:1 in AND (should NOT be flattened)
|
|
|
88 ('ISSUE_mixed_problems', '(GENE65 or GENE66) and GENE67', 25, 26), # Mixed problems
|
|
|
89
|
|
|
90 # === REAL-WORLD INSPIRED CASES ===
|
|
|
91 ('REAL_metabolism', '(ENSG001 or ENSG002) or (ENSG003 or ENSG004)', 26, 27), # Metabolic pathway
|
|
|
92 ('REAL_transport', 'TRANSPORTER1 and (COFACTOR1 or COFACTOR2)', 27, 28), # Transport reaction
|
|
|
93 ('REAL_complex_enzyme', '((SUBUNIT1 and SUBUNIT2) or SUBUNIT3) and COFACTOR3', 28, 29), # Complex enzyme
|
|
|
94 ]
|
|
|
95
|
|
|
96 # Create reactions
|
|
|
97 for rxn_id, gpr, met_in, met_out in reactions_data:
|
|
|
98 rxn = cobra.Reaction(rxn_id)
|
|
|
99 if met_in < len(metabolites) and met_out < len(metabolites):
|
|
|
100 rxn.add_metabolites({metabolites[met_in]: -1, metabolites[met_out]: 1})
|
|
|
101 rxn.gene_reaction_rule = gpr
|
|
|
102 model.add_reactions([rxn])
|
|
|
103
|
|
|
104 return model
|
|
|
105
|
|
|
106 def create_comprehensive_mapping(self) -> pd.DataFrame:
|
|
|
107 """Create a comprehensive gene mapping covering all test scenarios"""
|
|
|
108 mapping_data = {
|
|
|
109 'hgnc_symbol': [],
|
|
|
110 'ensg': []
|
|
|
111 }
|
|
|
112
|
|
|
113 # === BASIC MAPPINGS ===
|
|
|
114 # 1:1 mappings
|
|
|
115 one_to_one = [
|
|
|
116 ('GENE1', 'TARGET1'),
|
|
|
117 ('GENE24', 'TARGET24'),
|
|
|
118 ('GENE25', 'TARGET25'),
|
|
|
119 ('GENE26', 'TARGET26'),
|
|
|
120 ('GENE27', 'TARGET27'),
|
|
|
121 ('GENE28', 'TARGET28'),
|
|
|
122 ('GENE29', 'TARGET29'),
|
|
|
123 ('GENE40', 'TARGET40'),
|
|
|
124 ('GENE41', 'TARGET41'),
|
|
|
125 ('GENE42', 'TARGET42'),
|
|
|
126 ]
|
|
|
127
|
|
|
128 # 1:many mappings (one source gene maps to multiple targets)
|
|
|
129 one_to_many = [
|
|
|
130 ('GENE2', 'TARGET2A'),
|
|
|
131 ('GENE2', 'TARGET2B'),
|
|
|
132 ('GENE30', 'TARGET30A'),
|
|
|
133 ('GENE30', 'TARGET30B'),
|
|
|
134 ('GENE61', 'TARGET61A'),
|
|
|
135 ('GENE61', 'TARGET61B'),
|
|
|
136 ('GENE61', 'TARGET61C'), # Maps to 3 targets
|
|
|
137 ('GENE65', 'TARGET65A'),
|
|
|
138 ('GENE65', 'TARGET65B'),
|
|
|
139 ]
|
|
|
140
|
|
|
141 # many:1 mappings (multiple source genes map to one target)
|
|
|
142 many_to_one = [
|
|
|
143 ('GENE3', 'SHARED_TARGET1'),
|
|
|
144 ('GENE4', 'SHARED_TARGET1'),
|
|
|
145 ('GENE5', 'SHARED_TARGET1'),
|
|
|
146 ('GENE6', 'SHARED_TARGET2'),
|
|
|
147 ('GENE7', 'SHARED_TARGET2'),
|
|
|
148 ('GENE8', 'SHARED_TARGET2'),
|
|
|
149 ('GENE9', 'SHARED_TARGET3'),
|
|
|
150 ('GENE10', 'SHARED_TARGET3'),
|
|
|
151 ('GENE11', 'SHARED_TARGET4'),
|
|
|
152 ('GENE12', 'SHARED_TARGET4'),
|
|
|
153 ('GENE13', 'SHARED_TARGET5'),
|
|
|
154 ('GENE14', 'SHARED_TARGET5'),
|
|
|
155 ('GENE15', 'SHARED_TARGET5'),
|
|
|
156 ('GENE16', 'SHARED_TARGET5'),
|
|
|
157 ('GENE17', 'SHARED_TARGET6'),
|
|
|
158 ('GENE18', 'SHARED_TARGET6'),
|
|
|
159 ('GENE19', 'SHARED_TARGET6'),
|
|
|
160 ('GENE20', 'SHARED_TARGET6'),
|
|
|
161 ('GENE45', 'SHARED_TARGET7'),
|
|
|
162 ('GENE46', 'SHARED_TARGET7'),
|
|
|
163 ('GENE47', 'SHARED_TARGET7'),
|
|
|
164 ('GENE48', 'SHARED_TARGET7'),
|
|
|
165 ('GENE49', 'SHARED_TARGET7'),
|
|
|
166 ('GENE50', 'SHARED_TARGET7'),
|
|
|
167 ('GENE51', 'SHARED_TARGET8'),
|
|
|
168 ('GENE52', 'SHARED_TARGET8'),
|
|
|
169 ('GENE53', 'SHARED_TARGET8'),
|
|
|
170 ('GENE54', 'SHARED_TARGET8'),
|
|
|
171 ('GENE55', 'SHARED_TARGET9'),
|
|
|
172 ('GENE56', 'SHARED_TARGET9'),
|
|
|
173 ('GENE57', 'SHARED_TARGET9'),
|
|
|
174 ('GENE58', 'SHARED_TARGET9'),
|
|
|
175 ('GENE59', 'SHARED_TARGET9'),
|
|
|
176 ('GENE60', 'SHARED_TARGET9'),
|
|
|
177 ('GENE63', 'SHARED_TARGET10'),
|
|
|
178 ('GENE64', 'SHARED_TARGET10'),
|
|
|
179 ('GENE66', 'SHARED_TARGET11'),
|
|
|
180 ]
|
|
|
181
|
|
|
182 # Mixed mappings for complex cases
|
|
|
183 mixed_mappings = [
|
|
|
184 ('GENE21', 'TARGET21'), # 1:1
|
|
|
185 ('GENE22', 'TARGET22A'), # 1:many
|
|
|
186 ('GENE22', 'TARGET22B'),
|
|
|
187 ('GENE23', 'SHARED_TARGET1'), # many:1 (shares with GENE3-5)
|
|
|
188 ('GENE31', 'SHARED_TARGET12'),
|
|
|
189 ('GENE32', 'SHARED_TARGET13'),
|
|
|
190 ('GENE33', 'SHARED_TARGET13'),
|
|
|
191 ('GENE34', 'TARGET34'),
|
|
|
192 ('GENE35', 'TARGET35'),
|
|
|
193 ('GENE36', 'TARGET36'),
|
|
|
194 ('GENE37', 'TARGET37'),
|
|
|
195 ('GENE38', 'TARGET38'),
|
|
|
196 ('GENE39', 'TARGET39'),
|
|
|
197 ('GENE62', 'TARGET62A'),
|
|
|
198 ('GENE62', 'TARGET62B'),
|
|
|
199 ('GENE67', 'TARGET67'),
|
|
|
200 ]
|
|
|
201
|
|
|
202 # Case sensitivity tests
|
|
|
203 case_mappings = [
|
|
|
204 ('Gene43', 'TARGET43'),
|
|
|
205 ('gene44', 'TARGET44'),
|
|
|
206 ]
|
|
|
207
|
|
|
208 # Real-world inspired mappings
|
|
|
209 real_mappings = [
|
|
|
210 ('ENSG001', 'HUMAN_GENE1'),
|
|
|
211 ('ENSG002', 'HUMAN_GENE2'),
|
|
|
212 ('ENSG003', 'HUMAN_GENE1'), # many:1
|
|
|
213 ('ENSG004', 'HUMAN_GENE2'), # many:1
|
|
|
214 ('TRANSPORTER1', 'SLC_FAMILY1'),
|
|
|
215 ('COFACTOR1', 'COFACTOR_A'),
|
|
|
216 ('COFACTOR2', 'COFACTOR_A'), # many:1
|
|
|
217 ('COFACTOR3', 'COFACTOR_B'),
|
|
|
218 ('SUBUNIT1', 'COMPLEX_SUBUNIT1'),
|
|
|
219 ('SUBUNIT2', 'COMPLEX_SUBUNIT2'),
|
|
|
220 ('SUBUNIT3', 'COMPLEX_ALTERNATIVE'),
|
|
|
221 ]
|
|
|
222
|
|
|
223 # Combine all mappings
|
|
|
224 all_mappings = one_to_one + one_to_many + many_to_one + mixed_mappings + case_mappings + real_mappings
|
|
|
225
|
|
|
226 for source, target in all_mappings:
|
|
|
227 mapping_data['hgnc_symbol'].append(source)
|
|
|
228 mapping_data['ensg'].append(target)
|
|
|
229
|
|
|
230 return pd.DataFrame(mapping_data)
|
|
|
231
|
|
|
232 def analyze_mapping_statistics(self, mapping_df: pd.DataFrame) -> Dict:
|
|
|
233 """Analyze mapping statistics"""
|
|
|
234 stats = {}
|
|
|
235
|
|
|
236 source_counts = mapping_df.groupby('hgnc_symbol')['ensg'].count()
|
|
|
237 target_counts = mapping_df.groupby('ensg')['hgnc_symbol'].count()
|
|
|
238
|
|
|
239 stats['total_mappings'] = len(mapping_df)
|
|
|
240 stats['unique_sources'] = len(source_counts)
|
|
|
241 stats['unique_targets'] = len(target_counts)
|
|
|
242
|
|
|
243 stats['one_to_one'] = (source_counts == 1).sum()
|
|
|
244 stats['one_to_many'] = (source_counts > 1).sum()
|
|
|
245 stats['many_to_one_targets'] = (target_counts > 1).sum()
|
|
|
246
|
|
|
247 stats['one_to_many_details'] = {}
|
|
|
248 for gene, count in source_counts[source_counts > 1].items():
|
|
|
249 targets = mapping_df[mapping_df['hgnc_symbol'] == gene]['ensg'].tolist()
|
|
|
250 stats['one_to_many_details'][gene] = targets
|
|
|
251
|
|
|
252 stats['many_to_one_details'] = {}
|
|
|
253 for target, count in target_counts[target_counts > 1].items():
|
|
|
254 sources = mapping_df[mapping_df['ensg'] == target]['hgnc_symbol'].tolist()
|
|
|
255 stats['many_to_one_details'][target] = sources
|
|
|
256
|
|
|
257 return stats
|
|
|
258
|
|
|
259 def predict_translation_issues(self, model: cobra.Model, mapping_df: pd.DataFrame) -> Dict:
|
|
|
260 """Predict which reactions will have translation issues"""
|
|
|
261 predictions = {}
|
|
|
262 mapping_dict = {}
|
|
|
263
|
|
|
264 # Build mapping dictionary
|
|
|
265 for _, row in mapping_df.iterrows():
|
|
|
266 source = row['hgnc_symbol']
|
|
|
267 target = row['ensg']
|
|
|
268 if source not in mapping_dict:
|
|
|
269 mapping_dict[source] = []
|
|
|
270 mapping_dict[source].append(target)
|
|
|
271
|
|
|
272 for rxn in model.reactions:
|
|
|
273 if not rxn.gene_reaction_rule or rxn.gene_reaction_rule.strip() == '':
|
|
|
274 continue
|
|
|
275
|
|
|
276 # Extract genes from GPR
|
|
|
277 token_pattern = r'\b[A-Za-z0-9:_.-]+\b'
|
|
|
278 tokens = re.findall(token_pattern, rxn.gene_reaction_rule)
|
|
|
279 logical_operators = {'and', 'or', 'AND', 'OR', '(', ')'}
|
|
|
280 genes = [t for t in tokens if t not in logical_operators]
|
|
|
281
|
|
|
282 issues = []
|
|
|
283 has_1_to_many = False
|
|
|
284 has_many_to_1 = False
|
|
|
285 has_unmapped = False
|
|
|
286
|
|
|
287 for gene in set(genes):
|
|
|
288 norm_gene = model_utils._normalize_gene_id(gene)
|
|
|
289 if norm_gene in mapping_dict:
|
|
|
290 targets = mapping_dict[norm_gene]
|
|
|
291 if len(targets) > 1:
|
|
|
292 has_1_to_many = True
|
|
|
293 issues.append(f"1:many - {gene} -> {targets}")
|
|
|
294 else:
|
|
|
295 has_unmapped = True
|
|
|
296 issues.append(f"unmapped - {gene}")
|
|
|
297
|
|
|
298 # Check for many:1 mappings
|
|
|
299 target_to_sources = {}
|
|
|
300 for gene in set(genes):
|
|
|
301 norm_gene = model_utils._normalize_gene_id(gene)
|
|
|
302 if norm_gene in mapping_dict:
|
|
|
303 for target in mapping_dict[norm_gene]:
|
|
|
304 if target not in target_to_sources:
|
|
|
305 target_to_sources[target] = []
|
|
|
306 target_to_sources[target].append(gene)
|
|
|
307
|
|
|
308 for target, sources in target_to_sources.items():
|
|
|
309 if len(sources) > 1:
|
|
|
310 has_many_to_1 = True
|
|
|
311 issues.append(f"many:1 - {sources} -> {target}")
|
|
|
312
|
|
|
313 if issues:
|
|
|
314 predictions[rxn.id] = {
|
|
|
315 'issues': issues,
|
|
|
316 'has_1_to_many': has_1_to_many,
|
|
|
317 'has_many_to_1': has_many_to_1,
|
|
|
318 'has_unmapped': has_unmapped,
|
|
|
319 'is_or_only': self._check_if_or_only(rxn.gene_reaction_rule),
|
|
|
320 'predicted_flattening': has_1_to_many or has_many_to_1 and self._check_if_or_only(rxn.gene_reaction_rule)
|
|
|
321 }
|
|
|
322
|
|
|
323 return predictions
|
|
|
324
|
|
|
325 def _check_if_or_only(self, gpr: str) -> bool:
|
|
|
326 """Check if GPR contains only OR operators (and parentheses)"""
|
|
|
327 if not gpr or gpr.strip() == '':
|
|
|
328 return False
|
|
|
329
|
|
|
330 # Remove gene names and whitespace, keep only logical operators
|
|
|
331 token_pattern = r'\b[A-Za-z0-9:_.-]+\b'
|
|
|
332 logic_only = re.sub(token_pattern, '', gpr)
|
|
|
333 logic_only = re.sub(r'\s+', ' ', logic_only.strip())
|
|
|
334
|
|
|
335 # Check for AND operators
|
|
|
336 and_pattern = r'\b(and|AND)\b'
|
|
|
337 return not bool(re.search(and_pattern, logic_only))
|
|
|
338
|
|
|
339 def run_comprehensive_test(self) -> Dict:
|
|
|
340 """Run the comprehensive translation test"""
|
|
|
341 print("="*80)
|
|
|
342 print("COMPREHENSIVE GPR TRANSLATION TEST SUITE")
|
|
|
343 print("="*80)
|
|
|
344
|
|
|
345 # Create test model and mapping
|
|
|
346 print("\n1. Creating test model and mapping...")
|
|
|
347 model = self.create_comprehensive_test_model()
|
|
|
348 mapping_df = self.create_comprehensive_mapping()
|
|
|
349
|
|
|
350 print(f" ā Created model with {len(model.reactions)} reactions")
|
|
|
351 print(f" ā Created mapping with {len(mapping_df)} entries")
|
|
|
352
|
|
|
353 # Analyze mapping statistics
|
|
|
354 print("\n2. Analyzing mapping statistics...")
|
|
|
355 mapping_stats = self.analyze_mapping_statistics(mapping_df)
|
|
|
356 print(f" ā Unique source genes: {mapping_stats['unique_sources']}")
|
|
|
357 print(f" ā Unique target genes: {mapping_stats['unique_targets']}")
|
|
|
358 print(f" ā 1:1 mappings: {mapping_stats['one_to_one']}")
|
|
|
359 print(f" ā 1:many mappings: {mapping_stats['one_to_many']}")
|
|
|
360 print(f" ā Many:1 target genes: {mapping_stats['many_to_one_targets']}")
|
|
|
361
|
|
|
362 # Predict translation issues
|
|
|
363 print("\n3. Predicting translation issues...")
|
|
|
364 predicted_issues = self.predict_translation_issues(model, mapping_df)
|
|
|
365 predicted_or_only = sum(1 for pred in predicted_issues.values() if pred['is_or_only'])
|
|
|
366 predicted_flattening = sum(1 for pred in predicted_issues.values() if pred['predicted_flattening'])
|
|
|
367
|
|
|
368 print(f" ā Reactions with predicted issues: {len(predicted_issues)}")
|
|
|
369 print(f" ā OR-only reactions: {predicted_or_only}")
|
|
|
370 print(f" ā Predicted for flattening: {predicted_flattening}")
|
|
|
371
|
|
|
372 # Display original GPRs
|
|
|
373 print("\n4. Original model GPRs:")
|
|
|
374 for rxn in sorted(model.reactions, key=lambda x: x.id):
|
|
|
375 status = "š" if rxn.id in predicted_issues else "ā"
|
|
|
376 or_only = "š" if predicted_issues.get(rxn.id, {}).get('is_or_only', False) else " "
|
|
|
377 print(f" {status}{or_only} {rxn.id:20} : {rxn.gene_reaction_rule}")
|
|
|
378
|
|
|
379 # Run translation
|
|
|
380 print("\n5. Running translation...")
|
|
|
381 try:
|
|
|
382 translated_model, translation_issues = model_utils.translate_model_genes(
|
|
|
383 model=model,
|
|
|
384 mapping_df=mapping_df,
|
|
|
385 target_nomenclature='ensg',
|
|
|
386 source_nomenclature='hgnc_symbol',
|
|
|
387 allow_many_to_one=True
|
|
|
388 )
|
|
|
389 print(" ā Translation completed successfully")
|
|
|
390 except Exception as e:
|
|
|
391 print(f" ā Translation failed: {e}")
|
|
|
392 import traceback
|
|
|
393 traceback.print_exc()
|
|
|
394 return {'success': False, 'error': str(e)}
|
|
|
395
|
|
|
396 # Display translated GPRs
|
|
|
397 print("\n6. Translated model GPRs:")
|
|
|
398 for rxn in sorted(translated_model.reactions, key=lambda x: x.id):
|
|
|
399 has_issues = "šØ" if rxn.id in translation_issues else "ā"
|
|
|
400 print(f" {has_issues} {rxn.id:20} : {rxn.gene_reaction_rule}")
|
|
|
401
|
|
|
402 # Analyze translation issues
|
|
|
403 print("\n7. Translation issues analysis:")
|
|
|
404 if translation_issues:
|
|
|
405 for rxn_id, issues_str in sorted(translation_issues.items()):
|
|
|
406 predicted = predicted_issues.get(rxn_id, {})
|
|
|
407 prediction_status = "ā PREDICTED" if rxn_id in predicted_issues else "ā UNEXPECTED"
|
|
|
408 print(f" šØ {rxn_id:20} ({prediction_status})")
|
|
|
409 # Split issues string by semicolon separator
|
|
|
410 if issues_str:
|
|
|
411 issues_list = [issue.strip() for issue in issues_str.split(';') if issue.strip()]
|
|
|
412 for issue in issues_list:
|
|
|
413 print(f" - {issue}")
|
|
|
414 else:
|
|
|
415 print(f" - No specific issues reported")
|
|
|
416 else:
|
|
|
417 print(" ā
No translation issues detected")
|
|
|
418
|
|
|
419 # Compare predictions vs actual
|
|
|
420 print("\n8. Prediction accuracy:")
|
|
|
421 true_positive = set(predicted_issues.keys()) & set(translation_issues.keys())
|
|
|
422 false_positive = set(predicted_issues.keys()) - set(translation_issues.keys())
|
|
|
423 false_negative = set(translation_issues.keys()) - set(predicted_issues.keys())
|
|
|
424
|
|
|
425 print(f" ā Correctly predicted issues: {len(true_positive)}")
|
|
|
426 print(f" ā False positives: {len(false_positive)}")
|
|
|
427 print(f" ā False negatives: {len(false_negative)}")
|
|
|
428
|
|
|
429 if false_positive:
|
|
|
430 print(" False positive reactions:")
|
|
|
431 for rxn_id in false_positive:
|
|
|
432 print(f" - {rxn_id}")
|
|
|
433
|
|
|
434 if false_negative:
|
|
|
435 print(" False negative reactions:")
|
|
|
436 for rxn_id in false_negative:
|
|
|
437 print(f" - {rxn_id}")
|
|
|
438
|
|
|
439 # Test specific functionality
|
|
|
440 print("\n9. Testing OR-only GPR flattening...")
|
|
|
441 flattening_tests = self.test_or_only_flattening(translated_model, translation_issues)
|
|
|
442
|
|
|
443 # Summary statistics
|
|
|
444 print("\n10. Summary:")
|
|
|
445 results = {
|
|
|
446 'success': True,
|
|
|
447 'model_reactions': len(model.reactions),
|
|
|
448 'mapping_entries': len(mapping_df),
|
|
|
449 'predicted_issues': len(predicted_issues),
|
|
|
450 'actual_issues': len(translation_issues),
|
|
|
451 'prediction_accuracy': {
|
|
|
452 'true_positive': len(true_positive),
|
|
|
453 'false_positive': len(false_positive),
|
|
|
454 'false_negative': len(false_negative),
|
|
|
455 'precision': len(true_positive) / len(predicted_issues) if predicted_issues else 0,
|
|
|
456 'recall': len(true_positive) / len(translation_issues) if translation_issues else 0,
|
|
|
457 },
|
|
|
458 'mapping_stats': mapping_stats,
|
|
|
459 'flattening_tests': flattening_tests,
|
|
|
460 'models': {
|
|
|
461 'original': model,
|
|
|
462 'translated': translated_model
|
|
|
463 },
|
|
|
464 'issues': {
|
|
|
465 'predicted': predicted_issues,
|
|
|
466 'actual': translation_issues
|
|
|
467 }
|
|
|
468 }
|
|
|
469
|
|
|
470 precision = results['prediction_accuracy']['precision']
|
|
|
471 recall = results['prediction_accuracy']['recall']
|
|
|
472 f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
|
|
|
473
|
|
|
474 print(f" š Total reactions: {len(model.reactions)}")
|
|
|
475 print(f" š Reactions with issues: {len(translation_issues)}")
|
|
|
476 print(f" š Prediction precision: {precision:.2%}")
|
|
|
477 print(f" š Prediction recall: {recall:.2%}")
|
|
|
478 print(f" š Prediction F1-score: {f1:.2%}")
|
|
|
479 print(f" š OR-only flattening tests: {flattening_tests['passed']}/{flattening_tests['total']}")
|
|
|
480
|
|
|
481 print("\n" + "="*80)
|
|
|
482 print("TEST SUITE COMPLETED")
|
|
|
483 print("="*80)
|
|
|
484
|
|
|
485 return results
|
|
|
486
|
|
|
487 def test_or_only_flattening(self, model: cobra.Model, translation_issues: Dict) -> Dict:
|
|
|
488 """Test the OR-only GPR flattening functionality"""
|
|
|
489 test_cases = [
|
|
|
490 # (original_gpr, expected_after_flattening, should_be_flattened)
|
|
|
491 ("SHARED_TARGET1 or SHARED_TARGET1", "SHARED_TARGET1", True),
|
|
|
492 ("(SHARED_TARGET2 or SHARED_TARGET2) or SHARED_TARGET2", "SHARED_TARGET2", True),
|
|
|
493 ("TARGET1 or TARGET2 or TARGET1", "TARGET1 or TARGET2", True),
|
|
|
494 ("(TARGET1 or TARGET2) and TARGET3", "(TARGET1 or TARGET2) and TARGET3", False), # Contains AND
|
|
|
495 ("TARGET1 and TARGET1", "TARGET1", True), # Should simplify AND duplicates too
|
|
|
496 ]
|
|
|
497
|
|
|
498 results = {'total': 0, 'passed': 0, 'failed': [], 'details': []}
|
|
|
499
|
|
|
500 print(" Testing OR-only flattening functionality:")
|
|
|
501
|
|
|
502 # Test the helper functions directly
|
|
|
503 for original, expected, should_flatten in test_cases:
|
|
|
504 results['total'] += 1
|
|
|
505
|
|
|
506 # Test _is_or_only_expression
|
|
|
507 is_or_only = model_utils._is_or_only_expression(original)
|
|
|
508
|
|
|
509 # Test _flatten_or_only_gpr if it should be OR-only
|
|
|
510 if should_flatten and 'and' not in original.lower():
|
|
|
511 flattened = model_utils._flatten_or_only_gpr(original)
|
|
|
512 passed = flattened == expected
|
|
|
513 else:
|
|
|
514 passed = not should_flatten or is_or_only == (not 'and' in original.lower())
|
|
|
515 flattened = original
|
|
|
516
|
|
|
517 status = "ā" if passed else "ā"
|
|
|
518 results['details'].append({
|
|
|
519 'original': original,
|
|
|
520 'expected': expected,
|
|
|
521 'flattened': flattened,
|
|
|
522 'is_or_only': is_or_only,
|
|
|
523 'should_flatten': should_flatten,
|
|
|
524 'passed': passed
|
|
|
525 })
|
|
|
526
|
|
|
527 if passed:
|
|
|
528 results['passed'] += 1
|
|
|
529 else:
|
|
|
530 results['failed'].append(f"{original} -> {flattened} (expected: {expected})")
|
|
|
531
|
|
|
532 print(f" {status} '{original}' -> '{flattened}' (OR-only: {is_or_only})")
|
|
|
533
|
|
|
534 # Test actual model reactions that should have been flattened
|
|
|
535 for rxn in model.reactions:
|
|
|
536 if rxn.id in translation_issues:
|
|
|
537 original_gpr = rxn.gene_reaction_rule
|
|
|
538 is_or_only = model_utils._is_or_only_expression(original_gpr)
|
|
|
539 if is_or_only:
|
|
|
540 print(f" š Real case: {rxn.id} has OR-only GPR: '{original_gpr}'")
|
|
|
541
|
|
|
542 return results
|
|
|
543
|
|
|
544 def run_individual_tests():
|
|
|
545 """Run individual component tests"""
|
|
|
546 print("\n" + "="*80)
|
|
|
547 print("INDIVIDUAL COMPONENT TESTS")
|
|
|
548 print("="*80)
|
|
|
549
|
|
|
550 # Test 1: OR-only detection
|
|
|
551 print("\n1. Testing OR-only detection...")
|
|
|
552 or_only_cases = [
|
|
|
553 ("GENE1 or GENE2", True),
|
|
|
554 ("(GENE1 or GENE2)", True),
|
|
|
555 ("GENE1 or GENE2 or GENE3", True),
|
|
|
556 ("(GENE1 or GENE2) or GENE3", True),
|
|
|
557 ("((GENE1 or GENE2) or GENE3) or GENE4", True),
|
|
|
558 ("GENE1 and GENE2", False),
|
|
|
559 ("GENE1 or (GENE2 and GENE3)", False),
|
|
|
560 ("(GENE1 or GENE2) and GENE3", False),
|
|
|
561 ("GENE1", False), # Single gene
|
|
|
562 ("", False), # Empty
|
|
|
563 ]
|
|
|
564
|
|
|
565 for gpr, expected in or_only_cases:
|
|
|
566 result = model_utils._is_or_only_expression(gpr)
|
|
|
567 status = "ā" if result == expected else "ā"
|
|
|
568 print(f" {status} '{gpr}' -> {result} (expected: {expected})")
|
|
|
569
|
|
|
570 # Test 2: GPR flattening
|
|
|
571 print("\n2. Testing GPR flattening...")
|
|
|
572 flattening_cases = [
|
|
|
573 ("GENE1 or GENE1", "GENE1"),
|
|
|
574 ("(GENE1 or GENE1) or GENE2", "GENE1 or GENE2"),
|
|
|
575 ("GENE1 or GENE2 or GENE1", "GENE1 or GENE2"),
|
|
|
576 ("(GENE1 or GENE2) or (GENE1 or GENE3)", "GENE1 or GENE2 or GENE3"),
|
|
|
577 ("((A or A) or B) or C", "A or B or C"),
|
|
|
578 ]
|
|
|
579
|
|
|
580 for original, expected in flattening_cases:
|
|
|
581 result = model_utils._flatten_or_only_gpr(original)
|
|
|
582 status = "ā" if result == expected else "ā"
|
|
|
583 print(f" {status} '{original}' -> '{result}' (expected: '{expected}')")
|
|
|
584
|
|
|
585 def main():
|
|
|
586 """Main test function"""
|
|
|
587 print("COBRAxy GPR Translation Comprehensive Test Suite")
|
|
|
588 print("=" * 80)
|
|
|
589
|
|
|
590 # Run individual component tests first
|
|
|
591 run_individual_tests()
|
|
|
592
|
|
|
593 # Run comprehensive test suite
|
|
|
594 tester = GPRTranslationTester()
|
|
|
595 results = tester.run_comprehensive_test()
|
|
|
596
|
|
|
597 # Save results for further analysis if needed
|
|
|
598 if results['success']:
|
|
|
599 print(f"\nā
All tests completed successfully!")
|
|
|
600 print(f"š Test models and results available in results object")
|
|
|
601
|
|
|
602 # Optionally save to file
|
|
|
603 try:
|
|
|
604 import pickle
|
|
|
605 with open('/tmp/gpr_translation_test_results.pkl', 'wb') as f:
|
|
|
606 pickle.dump(results, f)
|
|
|
607 print(f"š Detailed results saved to /tmp/gpr_translation_test_results.pkl")
|
|
|
608 except:
|
|
|
609 pass
|
|
|
610 else:
|
|
|
611 print(f"\nā Tests failed: {results.get('error', 'Unknown error')}")
|
|
|
612 return False
|
|
|
613
|
|
|
614 return True
|
|
|
615
|
|
|
616 if __name__ == "__main__":
|
|
|
617 success = main()
|
|
|
618 sys.exit(0 if success else 1) |