comparison check_interleaved.py @ 0:24431ccf6352 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/amas commit 158ec0e635067d354c425baf14b95cb616fd93c4
author iuc
date Tue, 02 Dec 2025 09:26:59 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:24431ccf6352
1 """
2 Helper script to check if AMAS input files are interleaved.
3 """
4 import argparse
5 import re
6 import sys
7
8
9 def check_phylip_interleaved(filepath):
10 """Check if PHYLIP file is interleaved."""
11 with open(filepath, encoding='utf-8') as f:
12 # First line is header: ntax nchar
13 header = next(f).strip().split()
14 ntax = int(header[0])
15
16 for idx, line in enumerate(f, 1):
17 if line.strip():
18 if idx > ntax:
19 return True
20
21 return False
22
23
24 def check_nexus_interleaved(filepath):
25 """Check if NEXUS file is interleaved."""
26 in_data_block = False
27 in_matrix = False
28 ntax = None
29 seq_lines = 0
30
31 with open(filepath, encoding='utf-8') as f:
32 for line in f:
33 content = line.strip().lower()
34
35 if not content:
36 continue
37
38 if in_matrix:
39 if content == 'end;':
40 return seq_lines != ntax if ntax else False
41
42 if content != ';':
43 seq_lines += 1
44 if ntax and seq_lines > ntax:
45 return True
46 continue
47
48 if not in_data_block:
49 if content.startswith('begin'):
50 words = content.split()
51 if len(words) > 1 and (
52 words[1].startswith('data')
53 or words[1].startswith('characters')):
54 in_data_block = True
55 continue
56
57 if content.startswith('dimensions') and ntax is None:
58 match = re.search(r'ntax=(\d+)', content)
59 if match:
60 ntax = int(match.group(1))
61
62 elif content.startswith('format'):
63 if re.search(r'\binterleave(?:;|=yes;?)?\b', content):
64 return True
65
66 elif content.startswith('matrix'):
67 in_matrix = True
68
69 return False
70
71
72 def check_fasta_interleaved(filepath):
73 """FASTA files are not interleaved."""
74 return False
75
76
77 def main():
78 parser = argparse.ArgumentParser(
79 description='Check if AMAS input files are interleaved'
80 )
81 parser.add_argument('input_files', nargs='+', help='Input sequence files')
82 parser.add_argument('--format', required=True,
83 choices=['fasta', 'phylip', 'nexus'],
84 help='Input format')
85
86 args = parser.parse_args()
87
88 interleaved_status = []
89 for filepath in args.input_files:
90 if args.format == 'phylip':
91 is_interleaved = check_phylip_interleaved(filepath)
92 elif args.format == 'nexus':
93 is_interleaved = check_nexus_interleaved(filepath)
94 else:
95 is_interleaved = check_fasta_interleaved(filepath)
96
97 interleaved_status.append(is_interleaved)
98
99 interleaved_status = list(set(interleaved_status))
100 if len(interleaved_status) > 1:
101 raise Exception("Error: Input files are a mix of interleaved/sequential formats")
102
103 if interleaved_status[0]:
104 print(f"{args.format}-int")
105 else:
106 print(args.format)
107
108 return 0
109
110
111 if __name__ == '__main__':
112 sys.exit(main())