Mercurial > repos > iuc > amas_replicate
comparison check_interleaved.py @ 0:24431ccf6352 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/amas commit 158ec0e635067d354c425baf14b95cb616fd93c4
| author | iuc |
|---|---|
| date | Tue, 02 Dec 2025 09:26:59 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:24431ccf6352 |
|---|---|
| 1 """ | |
| 2 Helper script to check if AMAS input files are interleaved. | |
| 3 """ | |
| 4 import argparse | |
| 5 import re | |
| 6 import sys | |
| 7 | |
| 8 | |
| 9 def check_phylip_interleaved(filepath): | |
| 10 """Check if PHYLIP file is interleaved.""" | |
| 11 with open(filepath, encoding='utf-8') as f: | |
| 12 # First line is header: ntax nchar | |
| 13 header = next(f).strip().split() | |
| 14 ntax = int(header[0]) | |
| 15 | |
| 16 for idx, line in enumerate(f, 1): | |
| 17 if line.strip(): | |
| 18 if idx > ntax: | |
| 19 return True | |
| 20 | |
| 21 return False | |
| 22 | |
| 23 | |
| 24 def check_nexus_interleaved(filepath): | |
| 25 """Check if NEXUS file is interleaved.""" | |
| 26 in_data_block = False | |
| 27 in_matrix = False | |
| 28 ntax = None | |
| 29 seq_lines = 0 | |
| 30 | |
| 31 with open(filepath, encoding='utf-8') as f: | |
| 32 for line in f: | |
| 33 content = line.strip().lower() | |
| 34 | |
| 35 if not content: | |
| 36 continue | |
| 37 | |
| 38 if in_matrix: | |
| 39 if content == 'end;': | |
| 40 return seq_lines != ntax if ntax else False | |
| 41 | |
| 42 if content != ';': | |
| 43 seq_lines += 1 | |
| 44 if ntax and seq_lines > ntax: | |
| 45 return True | |
| 46 continue | |
| 47 | |
| 48 if not in_data_block: | |
| 49 if content.startswith('begin'): | |
| 50 words = content.split() | |
| 51 if len(words) > 1 and ( | |
| 52 words[1].startswith('data') | |
| 53 or words[1].startswith('characters')): | |
| 54 in_data_block = True | |
| 55 continue | |
| 56 | |
| 57 if content.startswith('dimensions') and ntax is None: | |
| 58 match = re.search(r'ntax=(\d+)', content) | |
| 59 if match: | |
| 60 ntax = int(match.group(1)) | |
| 61 | |
| 62 elif content.startswith('format'): | |
| 63 if re.search(r'\binterleave(?:;|=yes;?)?\b', content): | |
| 64 return True | |
| 65 | |
| 66 elif content.startswith('matrix'): | |
| 67 in_matrix = True | |
| 68 | |
| 69 return False | |
| 70 | |
| 71 | |
| 72 def check_fasta_interleaved(filepath): | |
| 73 """FASTA files are not interleaved.""" | |
| 74 return False | |
| 75 | |
| 76 | |
| 77 def main(): | |
| 78 parser = argparse.ArgumentParser( | |
| 79 description='Check if AMAS input files are interleaved' | |
| 80 ) | |
| 81 parser.add_argument('input_files', nargs='+', help='Input sequence files') | |
| 82 parser.add_argument('--format', required=True, | |
| 83 choices=['fasta', 'phylip', 'nexus'], | |
| 84 help='Input format') | |
| 85 | |
| 86 args = parser.parse_args() | |
| 87 | |
| 88 interleaved_status = [] | |
| 89 for filepath in args.input_files: | |
| 90 if args.format == 'phylip': | |
| 91 is_interleaved = check_phylip_interleaved(filepath) | |
| 92 elif args.format == 'nexus': | |
| 93 is_interleaved = check_nexus_interleaved(filepath) | |
| 94 else: | |
| 95 is_interleaved = check_fasta_interleaved(filepath) | |
| 96 | |
| 97 interleaved_status.append(is_interleaved) | |
| 98 | |
| 99 interleaved_status = list(set(interleaved_status)) | |
| 100 if len(interleaved_status) > 1: | |
| 101 raise Exception("Error: Input files are a mix of interleaved/sequential formats") | |
| 102 | |
| 103 if interleaved_status[0]: | |
| 104 print(f"{args.format}-int") | |
| 105 else: | |
| 106 print(args.format) | |
| 107 | |
| 108 return 0 | |
| 109 | |
| 110 | |
| 111 if __name__ == '__main__': | |
| 112 sys.exit(main()) |
