diff check_interleaved.py @ 0:e2e756484892 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/amas commit 158ec0e635067d354c425baf14b95cb616fd93c4
author iuc
date Tue, 02 Dec 2025 09:28:02 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/check_interleaved.py	Tue Dec 02 09:28:02 2025 +0000
@@ -0,0 +1,112 @@
+"""
+Helper script to check if AMAS input files are interleaved.
+"""
+import argparse
+import re
+import sys
+
+
+def check_phylip_interleaved(filepath):
+    """Check if PHYLIP file is interleaved."""
+    with open(filepath, encoding='utf-8') as f:
+        # First line is header: ntax nchar
+        header = next(f).strip().split()
+        ntax = int(header[0])
+
+        for idx, line in enumerate(f, 1):
+            if line.strip():
+                if idx > ntax:
+                    return True
+
+        return False
+
+
+def check_nexus_interleaved(filepath):
+    """Check if NEXUS file is interleaved."""
+    in_data_block = False
+    in_matrix = False
+    ntax = None
+    seq_lines = 0
+
+    with open(filepath, encoding='utf-8') as f:
+        for line in f:
+            content = line.strip().lower()
+
+            if not content:
+                continue
+
+            if in_matrix:
+                if content == 'end;':
+                    return seq_lines != ntax if ntax else False
+
+                if content != ';':
+                    seq_lines += 1
+                    if ntax and seq_lines > ntax:
+                        return True
+                continue
+
+            if not in_data_block:
+                if content.startswith('begin'):
+                    words = content.split()
+                    if len(words) > 1 and (
+                            words[1].startswith('data')
+                            or words[1].startswith('characters')):
+                        in_data_block = True
+                continue
+
+            if content.startswith('dimensions') and ntax is None:
+                match = re.search(r'ntax=(\d+)', content)
+                if match:
+                    ntax = int(match.group(1))
+
+            elif content.startswith('format'):
+                if re.search(r'\binterleave(?:;|=yes;?)?\b', content):
+                    return True
+
+            elif content.startswith('matrix'):
+                in_matrix = True
+
+    return False
+
+
+def check_fasta_interleaved(filepath):
+    """FASTA files are not interleaved."""
+    return False
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Check if AMAS input files are interleaved'
+    )
+    parser.add_argument('input_files', nargs='+', help='Input sequence files')
+    parser.add_argument('--format', required=True,
+                        choices=['fasta', 'phylip', 'nexus'],
+                        help='Input format')
+
+    args = parser.parse_args()
+
+    interleaved_status = []
+    for filepath in args.input_files:
+        if args.format == 'phylip':
+            is_interleaved = check_phylip_interleaved(filepath)
+        elif args.format == 'nexus':
+            is_interleaved = check_nexus_interleaved(filepath)
+        else:
+            is_interleaved = check_fasta_interleaved(filepath)
+
+        interleaved_status.append(is_interleaved)
+
+    interleaved_status = list(set(interleaved_status))
+    if len(interleaved_status) > 1:
+        raise Exception("Error: Input files are a mix of interleaved/sequential formats")
+
+    if interleaved_status[0]:
+        print(f"{args.format}-int")
+    else:
+        print(args.format)
+
+    return 0
+
+
+if __name__ == '__main__':
+    sys.exit(main())