comparison COBRAxy/test_gpr_translation_comprehensive.py @ 493:a92d21f92956 draft

Uploaded
author francesco_lapi
date Tue, 30 Sep 2025 15:00:21 +0000
parents
children
comparison
equal deleted inserted replaced
492:4ed95023af20 493:a92d21f92956
1 #!/usr/bin/env python3
2 """
3 Comprehensive test suite for GPR translation functionality in COBRAxy.
4
5 This test suite covers:
6 - Basic 1:1, 1:many, many:1 gene mappings
7 - Complex GPR expressions with AND/OR logic
8 - Translation issues tracking
9 - OR-only GPR flattening functionality
10 - Edge cases and nested expressions
11 - Statistical reporting
12 """
13
14 import cobra
15 import pandas as pd
16 import sys
17 import os
18 import logging
19 from typing import Dict, List, Tuple
20 import re
21
22 # Add the COBRAxy utils directory to the path
23 sys.path.append('/hdd/home/flapi/COBRAxy')
24 from utils import model_utils
25
26 # Configure logging
27 logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
28 logger = logging.getLogger(__name__)
29
30 class GPRTranslationTester:
31 """Comprehensive GPR translation test suite"""
32
33 def __init__(self):
34 self.test_results = {}
35 self.failed_tests = []
36
37 def create_comprehensive_test_model(self) -> cobra.Model:
38 """Create a comprehensive test model with diverse GPR patterns"""
39 model = cobra.Model('comprehensive_test_model')
40
41 # Create metabolites
42 metabolites = []
43 for i in range(30):
44 met = cobra.Metabolite(f'met_{chr(65+i%26)}{i//26}', compartment='c')
45 metabolites.append(met)
46
47 reactions_data = [
48 # === BASIC CASES ===
49 ('BASIC_1to1', 'GENE1', 0, 1), # Simple 1:1 mapping
50 ('BASIC_1tomany', 'GENE2', 1, 2), # 1:many mapping
51 ('BASIC_manyto1', 'GENE3', 2, 3), # many:1 mapping
52 ('BASIC_unmapped', 'UNMAPPED_GENE', 3, 4), # unmapped gene
53
54 # === SIMPLE OR CASES (candidates for flattening) ===
55 ('OR_simple', 'GENE4 or GENE5', 4, 5), # Simple OR with many:1
56 ('OR_three', 'GENE6 or GENE7 or GENE8', 5, 6), # Three genes OR
57 ('OR_parentheses', '(GENE9 or GENE10)', 6, 7), # OR with parentheses
58 ('OR_duplicates', 'GENE11 or GENE12 or GENE11', 7, 8), # OR with duplicates after translation
59
60 # === COMPLEX OR CASES (candidates for flattening) ===
61 ('OR_nested_simple', '(GENE13 or GENE14) or (GENE15 or GENE16)', 8, 9), # Nested OR only
62 ('OR_many_parentheses', '((GENE17 or GENE18) or GENE19) or GENE20', 9, 10), # Multiple nesting levels
63 ('OR_mixed_mapping', 'GENE21 or GENE22 or GENE23', 10, 11), # Mixed 1:1, 1:many, many:1
64
65 # === AND CASES (should NOT be flattened) ===
66 ('AND_simple', 'GENE24 and GENE25', 11, 12), # Simple AND
67 ('AND_complex', '(GENE26 and GENE27) and GENE28', 12, 13), # Complex AND
68
69 # === MIXED AND/OR (should NOT be flattened) ===
70 ('MIXED_basic', 'GENE29 and (GENE30 or GENE31)', 13, 14), # AND with OR
71 ('MIXED_complex', '(GENE32 or GENE33) and (GENE34 or GENE35)', 14, 15), # OR and AND
72 ('MIXED_nested', '((GENE36 and GENE37) or GENE38) and GENE39', 15, 16), # Complex nesting
73
74 # === EDGE CASES ===
75 ('EDGE_single', 'GENE40', 16, 17), # Single gene
76 ('EDGE_empty', '', 17, 18), # Empty GPR
77 ('EDGE_whitespace', ' GENE41 or GENE42 ', 18, 19), # Whitespace
78 ('EDGE_case_sensitive', 'Gene43 OR gene44', 19, 20), # Case variations
79
80 # === STRESS TESTS ===
81 ('STRESS_long_or', 'GENE45 or GENE46 or GENE47 or GENE48 or GENE49 or GENE50', 20, 21), # Long OR chain
82 ('STRESS_deep_nest', '(((GENE51 or GENE52) or GENE53) or GENE54)', 21, 22), # Deep nesting
83 ('STRESS_complex', '(GENE55 or (GENE56 or GENE57)) or ((GENE58 or GENE59) or GENE60)', 22, 23), # Complex structure
84
85 # === TRANSLATION ISSUE TRIGGERS ===
86 ('ISSUE_1many_or', 'GENE61 or GENE62', 23, 24), # 1:many in OR (should be flattened)
87 ('ISSUE_manyto1_and', 'GENE63 and GENE64', 24, 25), # many:1 in AND (should NOT be flattened)
88 ('ISSUE_mixed_problems', '(GENE65 or GENE66) and GENE67', 25, 26), # Mixed problems
89
90 # === REAL-WORLD INSPIRED CASES ===
91 ('REAL_metabolism', '(ENSG001 or ENSG002) or (ENSG003 or ENSG004)', 26, 27), # Metabolic pathway
92 ('REAL_transport', 'TRANSPORTER1 and (COFACTOR1 or COFACTOR2)', 27, 28), # Transport reaction
93 ('REAL_complex_enzyme', '((SUBUNIT1 and SUBUNIT2) or SUBUNIT3) and COFACTOR3', 28, 29), # Complex enzyme
94 ]
95
96 # Create reactions
97 for rxn_id, gpr, met_in, met_out in reactions_data:
98 rxn = cobra.Reaction(rxn_id)
99 if met_in < len(metabolites) and met_out < len(metabolites):
100 rxn.add_metabolites({metabolites[met_in]: -1, metabolites[met_out]: 1})
101 rxn.gene_reaction_rule = gpr
102 model.add_reactions([rxn])
103
104 return model
105
106 def create_comprehensive_mapping(self) -> pd.DataFrame:
107 """Create a comprehensive gene mapping covering all test scenarios"""
108 mapping_data = {
109 'hgnc_symbol': [],
110 'ensg': []
111 }
112
113 # === BASIC MAPPINGS ===
114 # 1:1 mappings
115 one_to_one = [
116 ('GENE1', 'TARGET1'),
117 ('GENE24', 'TARGET24'),
118 ('GENE25', 'TARGET25'),
119 ('GENE26', 'TARGET26'),
120 ('GENE27', 'TARGET27'),
121 ('GENE28', 'TARGET28'),
122 ('GENE29', 'TARGET29'),
123 ('GENE40', 'TARGET40'),
124 ('GENE41', 'TARGET41'),
125 ('GENE42', 'TARGET42'),
126 ]
127
128 # 1:many mappings (one source gene maps to multiple targets)
129 one_to_many = [
130 ('GENE2', 'TARGET2A'),
131 ('GENE2', 'TARGET2B'),
132 ('GENE30', 'TARGET30A'),
133 ('GENE30', 'TARGET30B'),
134 ('GENE61', 'TARGET61A'),
135 ('GENE61', 'TARGET61B'),
136 ('GENE61', 'TARGET61C'), # Maps to 3 targets
137 ('GENE65', 'TARGET65A'),
138 ('GENE65', 'TARGET65B'),
139 ]
140
141 # many:1 mappings (multiple source genes map to one target)
142 many_to_one = [
143 ('GENE3', 'SHARED_TARGET1'),
144 ('GENE4', 'SHARED_TARGET1'),
145 ('GENE5', 'SHARED_TARGET1'),
146 ('GENE6', 'SHARED_TARGET2'),
147 ('GENE7', 'SHARED_TARGET2'),
148 ('GENE8', 'SHARED_TARGET2'),
149 ('GENE9', 'SHARED_TARGET3'),
150 ('GENE10', 'SHARED_TARGET3'),
151 ('GENE11', 'SHARED_TARGET4'),
152 ('GENE12', 'SHARED_TARGET4'),
153 ('GENE13', 'SHARED_TARGET5'),
154 ('GENE14', 'SHARED_TARGET5'),
155 ('GENE15', 'SHARED_TARGET5'),
156 ('GENE16', 'SHARED_TARGET5'),
157 ('GENE17', 'SHARED_TARGET6'),
158 ('GENE18', 'SHARED_TARGET6'),
159 ('GENE19', 'SHARED_TARGET6'),
160 ('GENE20', 'SHARED_TARGET6'),
161 ('GENE45', 'SHARED_TARGET7'),
162 ('GENE46', 'SHARED_TARGET7'),
163 ('GENE47', 'SHARED_TARGET7'),
164 ('GENE48', 'SHARED_TARGET7'),
165 ('GENE49', 'SHARED_TARGET7'),
166 ('GENE50', 'SHARED_TARGET7'),
167 ('GENE51', 'SHARED_TARGET8'),
168 ('GENE52', 'SHARED_TARGET8'),
169 ('GENE53', 'SHARED_TARGET8'),
170 ('GENE54', 'SHARED_TARGET8'),
171 ('GENE55', 'SHARED_TARGET9'),
172 ('GENE56', 'SHARED_TARGET9'),
173 ('GENE57', 'SHARED_TARGET9'),
174 ('GENE58', 'SHARED_TARGET9'),
175 ('GENE59', 'SHARED_TARGET9'),
176 ('GENE60', 'SHARED_TARGET9'),
177 ('GENE63', 'SHARED_TARGET10'),
178 ('GENE64', 'SHARED_TARGET10'),
179 ('GENE66', 'SHARED_TARGET11'),
180 ]
181
182 # Mixed mappings for complex cases
183 mixed_mappings = [
184 ('GENE21', 'TARGET21'), # 1:1
185 ('GENE22', 'TARGET22A'), # 1:many
186 ('GENE22', 'TARGET22B'),
187 ('GENE23', 'SHARED_TARGET1'), # many:1 (shares with GENE3-5)
188 ('GENE31', 'SHARED_TARGET12'),
189 ('GENE32', 'SHARED_TARGET13'),
190 ('GENE33', 'SHARED_TARGET13'),
191 ('GENE34', 'TARGET34'),
192 ('GENE35', 'TARGET35'),
193 ('GENE36', 'TARGET36'),
194 ('GENE37', 'TARGET37'),
195 ('GENE38', 'TARGET38'),
196 ('GENE39', 'TARGET39'),
197 ('GENE62', 'TARGET62A'),
198 ('GENE62', 'TARGET62B'),
199 ('GENE67', 'TARGET67'),
200 ]
201
202 # Case sensitivity tests
203 case_mappings = [
204 ('Gene43', 'TARGET43'),
205 ('gene44', 'TARGET44'),
206 ]
207
208 # Real-world inspired mappings
209 real_mappings = [
210 ('ENSG001', 'HUMAN_GENE1'),
211 ('ENSG002', 'HUMAN_GENE2'),
212 ('ENSG003', 'HUMAN_GENE1'), # many:1
213 ('ENSG004', 'HUMAN_GENE2'), # many:1
214 ('TRANSPORTER1', 'SLC_FAMILY1'),
215 ('COFACTOR1', 'COFACTOR_A'),
216 ('COFACTOR2', 'COFACTOR_A'), # many:1
217 ('COFACTOR3', 'COFACTOR_B'),
218 ('SUBUNIT1', 'COMPLEX_SUBUNIT1'),
219 ('SUBUNIT2', 'COMPLEX_SUBUNIT2'),
220 ('SUBUNIT3', 'COMPLEX_ALTERNATIVE'),
221 ]
222
223 # Combine all mappings
224 all_mappings = one_to_one + one_to_many + many_to_one + mixed_mappings + case_mappings + real_mappings
225
226 for source, target in all_mappings:
227 mapping_data['hgnc_symbol'].append(source)
228 mapping_data['ensg'].append(target)
229
230 return pd.DataFrame(mapping_data)
231
232 def analyze_mapping_statistics(self, mapping_df: pd.DataFrame) -> Dict:
233 """Analyze mapping statistics"""
234 stats = {}
235
236 source_counts = mapping_df.groupby('hgnc_symbol')['ensg'].count()
237 target_counts = mapping_df.groupby('ensg')['hgnc_symbol'].count()
238
239 stats['total_mappings'] = len(mapping_df)
240 stats['unique_sources'] = len(source_counts)
241 stats['unique_targets'] = len(target_counts)
242
243 stats['one_to_one'] = (source_counts == 1).sum()
244 stats['one_to_many'] = (source_counts > 1).sum()
245 stats['many_to_one_targets'] = (target_counts > 1).sum()
246
247 stats['one_to_many_details'] = {}
248 for gene, count in source_counts[source_counts > 1].items():
249 targets = mapping_df[mapping_df['hgnc_symbol'] == gene]['ensg'].tolist()
250 stats['one_to_many_details'][gene] = targets
251
252 stats['many_to_one_details'] = {}
253 for target, count in target_counts[target_counts > 1].items():
254 sources = mapping_df[mapping_df['ensg'] == target]['hgnc_symbol'].tolist()
255 stats['many_to_one_details'][target] = sources
256
257 return stats
258
259 def predict_translation_issues(self, model: cobra.Model, mapping_df: pd.DataFrame) -> Dict:
260 """Predict which reactions will have translation issues"""
261 predictions = {}
262 mapping_dict = {}
263
264 # Build mapping dictionary
265 for _, row in mapping_df.iterrows():
266 source = row['hgnc_symbol']
267 target = row['ensg']
268 if source not in mapping_dict:
269 mapping_dict[source] = []
270 mapping_dict[source].append(target)
271
272 for rxn in model.reactions:
273 if not rxn.gene_reaction_rule or rxn.gene_reaction_rule.strip() == '':
274 continue
275
276 # Extract genes from GPR
277 token_pattern = r'\b[A-Za-z0-9:_.-]+\b'
278 tokens = re.findall(token_pattern, rxn.gene_reaction_rule)
279 logical_operators = {'and', 'or', 'AND', 'OR', '(', ')'}
280 genes = [t for t in tokens if t not in logical_operators]
281
282 issues = []
283 has_1_to_many = False
284 has_many_to_1 = False
285 has_unmapped = False
286
287 for gene in set(genes):
288 norm_gene = model_utils._normalize_gene_id(gene)
289 if norm_gene in mapping_dict:
290 targets = mapping_dict[norm_gene]
291 if len(targets) > 1:
292 has_1_to_many = True
293 issues.append(f"1:many - {gene} -> {targets}")
294 else:
295 has_unmapped = True
296 issues.append(f"unmapped - {gene}")
297
298 # Check for many:1 mappings
299 target_to_sources = {}
300 for gene in set(genes):
301 norm_gene = model_utils._normalize_gene_id(gene)
302 if norm_gene in mapping_dict:
303 for target in mapping_dict[norm_gene]:
304 if target not in target_to_sources:
305 target_to_sources[target] = []
306 target_to_sources[target].append(gene)
307
308 for target, sources in target_to_sources.items():
309 if len(sources) > 1:
310 has_many_to_1 = True
311 issues.append(f"many:1 - {sources} -> {target}")
312
313 if issues:
314 predictions[rxn.id] = {
315 'issues': issues,
316 'has_1_to_many': has_1_to_many,
317 'has_many_to_1': has_many_to_1,
318 'has_unmapped': has_unmapped,
319 'is_or_only': self._check_if_or_only(rxn.gene_reaction_rule),
320 'predicted_flattening': has_1_to_many or has_many_to_1 and self._check_if_or_only(rxn.gene_reaction_rule)
321 }
322
323 return predictions
324
325 def _check_if_or_only(self, gpr: str) -> bool:
326 """Check if GPR contains only OR operators (and parentheses)"""
327 if not gpr or gpr.strip() == '':
328 return False
329
330 # Remove gene names and whitespace, keep only logical operators
331 token_pattern = r'\b[A-Za-z0-9:_.-]+\b'
332 logic_only = re.sub(token_pattern, '', gpr)
333 logic_only = re.sub(r'\s+', ' ', logic_only.strip())
334
335 # Check for AND operators
336 and_pattern = r'\b(and|AND)\b'
337 return not bool(re.search(and_pattern, logic_only))
338
339 def run_comprehensive_test(self) -> Dict:
340 """Run the comprehensive translation test"""
341 print("="*80)
342 print("COMPREHENSIVE GPR TRANSLATION TEST SUITE")
343 print("="*80)
344
345 # Create test model and mapping
346 print("\n1. Creating test model and mapping...")
347 model = self.create_comprehensive_test_model()
348 mapping_df = self.create_comprehensive_mapping()
349
350 print(f" ✓ Created model with {len(model.reactions)} reactions")
351 print(f" ✓ Created mapping with {len(mapping_df)} entries")
352
353 # Analyze mapping statistics
354 print("\n2. Analyzing mapping statistics...")
355 mapping_stats = self.analyze_mapping_statistics(mapping_df)
356 print(f" ✓ Unique source genes: {mapping_stats['unique_sources']}")
357 print(f" ✓ Unique target genes: {mapping_stats['unique_targets']}")
358 print(f" ✓ 1:1 mappings: {mapping_stats['one_to_one']}")
359 print(f" ✓ 1:many mappings: {mapping_stats['one_to_many']}")
360 print(f" ✓ Many:1 target genes: {mapping_stats['many_to_one_targets']}")
361
362 # Predict translation issues
363 print("\n3. Predicting translation issues...")
364 predicted_issues = self.predict_translation_issues(model, mapping_df)
365 predicted_or_only = sum(1 for pred in predicted_issues.values() if pred['is_or_only'])
366 predicted_flattening = sum(1 for pred in predicted_issues.values() if pred['predicted_flattening'])
367
368 print(f" ✓ Reactions with predicted issues: {len(predicted_issues)}")
369 print(f" ✓ OR-only reactions: {predicted_or_only}")
370 print(f" ✓ Predicted for flattening: {predicted_flattening}")
371
372 # Display original GPRs
373 print("\n4. Original model GPRs:")
374 for rxn in sorted(model.reactions, key=lambda x: x.id):
375 status = "🔍" if rxn.id in predicted_issues else "✓"
376 or_only = "🔗" if predicted_issues.get(rxn.id, {}).get('is_or_only', False) else " "
377 print(f" {status}{or_only} {rxn.id:20} : {rxn.gene_reaction_rule}")
378
379 # Run translation
380 print("\n5. Running translation...")
381 try:
382 translated_model, translation_issues = model_utils.translate_model_genes(
383 model=model,
384 mapping_df=mapping_df,
385 target_nomenclature='ensg',
386 source_nomenclature='hgnc_symbol',
387 allow_many_to_one=True
388 )
389 print(" ✓ Translation completed successfully")
390 except Exception as e:
391 print(f" ❌ Translation failed: {e}")
392 import traceback
393 traceback.print_exc()
394 return {'success': False, 'error': str(e)}
395
396 # Display translated GPRs
397 print("\n6. Translated model GPRs:")
398 for rxn in sorted(translated_model.reactions, key=lambda x: x.id):
399 has_issues = "🚨" if rxn.id in translation_issues else "✓"
400 print(f" {has_issues} {rxn.id:20} : {rxn.gene_reaction_rule}")
401
402 # Analyze translation issues
403 print("\n7. Translation issues analysis:")
404 if translation_issues:
405 for rxn_id, issues_str in sorted(translation_issues.items()):
406 predicted = predicted_issues.get(rxn_id, {})
407 prediction_status = "✓ PREDICTED" if rxn_id in predicted_issues else "❓ UNEXPECTED"
408 print(f" 🚨 {rxn_id:20} ({prediction_status})")
409 # Split issues string by semicolon separator
410 if issues_str:
411 issues_list = [issue.strip() for issue in issues_str.split(';') if issue.strip()]
412 for issue in issues_list:
413 print(f" - {issue}")
414 else:
415 print(f" - No specific issues reported")
416 else:
417 print(" ✅ No translation issues detected")
418
419 # Compare predictions vs actual
420 print("\n8. Prediction accuracy:")
421 true_positive = set(predicted_issues.keys()) & set(translation_issues.keys())
422 false_positive = set(predicted_issues.keys()) - set(translation_issues.keys())
423 false_negative = set(translation_issues.keys()) - set(predicted_issues.keys())
424
425 print(f" ✓ Correctly predicted issues: {len(true_positive)}")
426 print(f" ⚠ False positives: {len(false_positive)}")
427 print(f" ❌ False negatives: {len(false_negative)}")
428
429 if false_positive:
430 print(" False positive reactions:")
431 for rxn_id in false_positive:
432 print(f" - {rxn_id}")
433
434 if false_negative:
435 print(" False negative reactions:")
436 for rxn_id in false_negative:
437 print(f" - {rxn_id}")
438
439 # Test specific functionality
440 print("\n9. Testing OR-only GPR flattening...")
441 flattening_tests = self.test_or_only_flattening(translated_model, translation_issues)
442
443 # Summary statistics
444 print("\n10. Summary:")
445 results = {
446 'success': True,
447 'model_reactions': len(model.reactions),
448 'mapping_entries': len(mapping_df),
449 'predicted_issues': len(predicted_issues),
450 'actual_issues': len(translation_issues),
451 'prediction_accuracy': {
452 'true_positive': len(true_positive),
453 'false_positive': len(false_positive),
454 'false_negative': len(false_negative),
455 'precision': len(true_positive) / len(predicted_issues) if predicted_issues else 0,
456 'recall': len(true_positive) / len(translation_issues) if translation_issues else 0,
457 },
458 'mapping_stats': mapping_stats,
459 'flattening_tests': flattening_tests,
460 'models': {
461 'original': model,
462 'translated': translated_model
463 },
464 'issues': {
465 'predicted': predicted_issues,
466 'actual': translation_issues
467 }
468 }
469
470 precision = results['prediction_accuracy']['precision']
471 recall = results['prediction_accuracy']['recall']
472 f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
473
474 print(f" 📊 Total reactions: {len(model.reactions)}")
475 print(f" 📊 Reactions with issues: {len(translation_issues)}")
476 print(f" 📊 Prediction precision: {precision:.2%}")
477 print(f" 📊 Prediction recall: {recall:.2%}")
478 print(f" 📊 Prediction F1-score: {f1:.2%}")
479 print(f" 📊 OR-only flattening tests: {flattening_tests['passed']}/{flattening_tests['total']}")
480
481 print("\n" + "="*80)
482 print("TEST SUITE COMPLETED")
483 print("="*80)
484
485 return results
486
487 def test_or_only_flattening(self, model: cobra.Model, translation_issues: Dict) -> Dict:
488 """Test the OR-only GPR flattening functionality"""
489 test_cases = [
490 # (original_gpr, expected_after_flattening, should_be_flattened)
491 ("SHARED_TARGET1 or SHARED_TARGET1", "SHARED_TARGET1", True),
492 ("(SHARED_TARGET2 or SHARED_TARGET2) or SHARED_TARGET2", "SHARED_TARGET2", True),
493 ("TARGET1 or TARGET2 or TARGET1", "TARGET1 or TARGET2", True),
494 ("(TARGET1 or TARGET2) and TARGET3", "(TARGET1 or TARGET2) and TARGET3", False), # Contains AND
495 ("TARGET1 and TARGET1", "TARGET1", True), # Should simplify AND duplicates too
496 ]
497
498 results = {'total': 0, 'passed': 0, 'failed': [], 'details': []}
499
500 print(" Testing OR-only flattening functionality:")
501
502 # Test the helper functions directly
503 for original, expected, should_flatten in test_cases:
504 results['total'] += 1
505
506 # Test _is_or_only_expression
507 is_or_only = model_utils._is_or_only_expression(original)
508
509 # Test _flatten_or_only_gpr if it should be OR-only
510 if should_flatten and 'and' not in original.lower():
511 flattened = model_utils._flatten_or_only_gpr(original)
512 passed = flattened == expected
513 else:
514 passed = not should_flatten or is_or_only == (not 'and' in original.lower())
515 flattened = original
516
517 status = "✓" if passed else "❌"
518 results['details'].append({
519 'original': original,
520 'expected': expected,
521 'flattened': flattened,
522 'is_or_only': is_or_only,
523 'should_flatten': should_flatten,
524 'passed': passed
525 })
526
527 if passed:
528 results['passed'] += 1
529 else:
530 results['failed'].append(f"{original} -> {flattened} (expected: {expected})")
531
532 print(f" {status} '{original}' -> '{flattened}' (OR-only: {is_or_only})")
533
534 # Test actual model reactions that should have been flattened
535 for rxn in model.reactions:
536 if rxn.id in translation_issues:
537 original_gpr = rxn.gene_reaction_rule
538 is_or_only = model_utils._is_or_only_expression(original_gpr)
539 if is_or_only:
540 print(f" 🔍 Real case: {rxn.id} has OR-only GPR: '{original_gpr}'")
541
542 return results
543
544 def run_individual_tests():
545 """Run individual component tests"""
546 print("\n" + "="*80)
547 print("INDIVIDUAL COMPONENT TESTS")
548 print("="*80)
549
550 # Test 1: OR-only detection
551 print("\n1. Testing OR-only detection...")
552 or_only_cases = [
553 ("GENE1 or GENE2", True),
554 ("(GENE1 or GENE2)", True),
555 ("GENE1 or GENE2 or GENE3", True),
556 ("(GENE1 or GENE2) or GENE3", True),
557 ("((GENE1 or GENE2) or GENE3) or GENE4", True),
558 ("GENE1 and GENE2", False),
559 ("GENE1 or (GENE2 and GENE3)", False),
560 ("(GENE1 or GENE2) and GENE3", False),
561 ("GENE1", False), # Single gene
562 ("", False), # Empty
563 ]
564
565 for gpr, expected in or_only_cases:
566 result = model_utils._is_or_only_expression(gpr)
567 status = "✓" if result == expected else "❌"
568 print(f" {status} '{gpr}' -> {result} (expected: {expected})")
569
570 # Test 2: GPR flattening
571 print("\n2. Testing GPR flattening...")
572 flattening_cases = [
573 ("GENE1 or GENE1", "GENE1"),
574 ("(GENE1 or GENE1) or GENE2", "GENE1 or GENE2"),
575 ("GENE1 or GENE2 or GENE1", "GENE1 or GENE2"),
576 ("(GENE1 or GENE2) or (GENE1 or GENE3)", "GENE1 or GENE2 or GENE3"),
577 ("((A or A) or B) or C", "A or B or C"),
578 ]
579
580 for original, expected in flattening_cases:
581 result = model_utils._flatten_or_only_gpr(original)
582 status = "✓" if result == expected else "❌"
583 print(f" {status} '{original}' -> '{result}' (expected: '{expected}')")
584
585 def main():
586 """Main test function"""
587 print("COBRAxy GPR Translation Comprehensive Test Suite")
588 print("=" * 80)
589
590 # Run individual component tests first
591 run_individual_tests()
592
593 # Run comprehensive test suite
594 tester = GPRTranslationTester()
595 results = tester.run_comprehensive_test()
596
597 # Save results for further analysis if needed
598 if results['success']:
599 print(f"\n✅ All tests completed successfully!")
600 print(f"📁 Test models and results available in results object")
601
602 # Optionally save to file
603 try:
604 import pickle
605 with open('/tmp/gpr_translation_test_results.pkl', 'wb') as f:
606 pickle.dump(results, f)
607 print(f"📁 Detailed results saved to /tmp/gpr_translation_test_results.pkl")
608 except:
609 pass
610 else:
611 print(f"\n❌ Tests failed: {results.get('error', 'Unknown error')}")
612 return False
613
614 return True
615
616 if __name__ == "__main__":
617 success = main()
618 sys.exit(0 if success else 1)