changeset 0:5acf23babd9a draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/amas commit 158ec0e635067d354c425baf14b95cb616fd93c4
author iuc
date Tue, 02 Dec 2025 09:27:37 +0000
parents
children
files amas_concat.xml check_interleaved.py macros.xml test-data/inputs/concat_1.fasta test-data/inputs/concat_2.fasta test-data/inputs/concat_int_1.nex test-data/inputs/concat_int_1.phylip test-data/inputs/concat_int_2.nex test-data/inputs/concat_int_2.phylip test-data/inputs/concat_int_3.nex test-data/inputs/concat_result.phylip test-data/inputs/fasta1.fas test-data/inputs/partitions_concat.nex test-data/inputs/partitions_concat_unspecified.txt test-data/inputs/remove_input.nex test-data/outputs/expected_concat.phylip test-data/outputs/expected_concat_fasta.fas test-data/outputs/expected_concat_int.nex test-data/outputs/expected_concat_int_multi.nex test-data/outputs/expected_partitions.nex test-data/outputs/expected_partitions_int.txt test-data/outputs/expected_partitions_int_multi.txt test-data/outputs/expected_partitions_raxml.txt test-data/outputs/expected_remove_filtered.int-nex test-data/outputs/expected_replicate1.nex test-data/outputs/expected_replicate2.nex test-data/outputs/expected_split_partition1.fas test-data/outputs/expected_split_partition2.fas test-data/outputs/expected_summary.txt test-data/outputs/expected_taxa_summary.txt
diffstat 30 files changed, 729 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/amas_concat.xml	Tue Dec 02 09:27:37 2025 +0000
@@ -0,0 +1,144 @@
+<tool id="amas_concat" name="AMAS concat" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>concatenate multiple alignments</description>
+
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+
+    <xrefs>
+        <xref type="bio.tools">amas</xref>
+    </xrefs>
+
+    <expand macro="requirements" />
+    <expand macro="version_command" />
+
+    <command detect_errors="exit_code"><![CDATA[
+        #import re
+        set -eu;
+
+        @SNIFF_INPUT_FORMAT@
+
+        @CHECK_INTERLEAVED@
+
+        @SYMLINK_INPUTS@
+
+        python -m amas.AMAS
+            concat
+            --concat-part partitions.txt
+            --concat-out concatenated.out
+            --part-format $part_format
+            --out-format $out_format
+            --in-files
+                @INPUT_FILENAMES@
+            --in-format "\${IN_FORMAT}"
+            --data-type $data_type
+            --cores "\${GALAXY_SLOTS:-1}"
+            $check_align
+    ]]></command>
+
+    <inputs>
+        <param name="input_files" type="data" format="fasta,phylip,nex" label="Sequences to concatenate" multiple="true" 
+               help="Provide pre-aligned FASTA/PHYLIP/NEXUS files (DNA or protein); mixes of unaligned reads or contigs will produce meaningless results." />
+        <expand macro="output_format" label="Select output format for concatenated alignment" />
+        <param name="part_format" type="select" label="Format of the partitions file"
+               help="A file defining how the concatenated alignment is split into separate gene/locus regions. Each line specifies a partition name and its position range (e.g., 'gene1 = 1-500' or 'DNA, gene1 = 1-500' for RAxML format).">
+            <option value="unspecified" selected="true">unspecified</option>            
+            <option value="nexus">nexus</option>
+            <option value="raxml">raxml</option>
+        </param>
+        <expand macro="data_type" />
+        <expand macro="check_align" />
+    </inputs>
+
+    <outputs>
+        <data name="output" from_work_dir="concatenated.out" format="txt" label="${tool.name} on ${on_string}: Concatenated alignment">
+            <change_format>
+                <when input="out_format" value="fasta" format="fasta" />
+                <when input="out_format" value="phylip" format="phylip" />
+                <when input="out_format" value="phylip-int" format="phylip" />
+                <when input="out_format" value="nexus" format="nex" />
+                <when input="out_format" value="nexus-int" format="nex" />
+            </change_format>
+        </data>
+        <data name="partitions_out" from_work_dir="partitions.txt" format="txt" label="${tool.name} on ${on_string}: Partition file">
+            <change_format>
+                <!-- Untitled and RAxML partition formats have no current equivalent datatypes so are outputted as txt by default -->
+                <when input="part_format" value="nexus" format="nex" />
+            </change_format>
+        </data>
+    </outputs>
+
+    <tests>
+        <test expect_num_outputs="2">
+            <param name="input_files" value="inputs/concat_1.fasta,inputs/concat_2.fasta" />
+            <param name="out_format" value="phylip" />
+            <param name="part_format" value="nexus" />
+            <param name="data_type" value="dna" />
+            <param name="check_align" value="false" />
+            <output name="output" file="outputs/expected_concat.phylip" ftype="phylip" />
+            <output name="partitions_out" file="outputs/expected_partitions.nex" ftype="nex" />
+        </test>
+        <test expect_num_outputs="2">
+            <param name="input_files" value="inputs/concat_1.fasta,inputs/concat_2.fasta" />
+            <param name="out_format" value="fasta" />
+            <param name="part_format" value="raxml" />
+            <param name="data_type" value="dna" />
+            <param name="check_align" value="false" />
+            <output name="output" file="outputs/expected_concat_fasta.fas" ftype="fasta" />
+            <output name="partitions_out" file="outputs/expected_partitions_raxml.txt" ftype="txt" />
+        </test>
+        <!-- Interleave tests: amas starts interleaving when sequences have lines > 500 characters -->
+        <test expect_num_outputs="2">
+            <param name="input_files" value="inputs/concat_int_1.phylip,inputs/concat_int_2.phylip" />
+            <param name="out_format" value="nexus-int" />
+            <param name="part_format" value="unspecified" />
+            <param name="data_type" value="dna" />
+            <param name="check_align" value="false" />
+            <output name="output" file="outputs/expected_concat_int.nex" ftype="nex" />
+            <output name="partitions_out" file="outputs/expected_partitions_int.txt" ftype="txt" />
+        </test>
+        <!-- Nexus interleave tests for: 'INTERLEAVE', 'Interleave=yes;', no interleave flag -->
+        <test expect_num_outputs="2">
+            <param name="input_files" value="inputs/concat_int_1.nex,inputs/concat_int_2.nex,inputs/concat_int_3.nex" />
+            <param name="out_format" value="nexus-int" />
+            <param name="part_format" value="unspecified" />
+            <param name="data_type" value="dna" />
+            <param name="check_align" value="false" />
+            <output name="output" file="outputs/expected_concat_int_multi.nex" ftype="nex" />
+            <output name="partitions_out" file="outputs/expected_partitions_int_multi.txt" ftype="txt" />
+        </test>
+    </tests>
+
+    <help><![CDATA[
+        **What it does**
+
+        AMAS Concat combines multiple sequence alignments into a single concatenated alignment, commonly used in phylogenomic analyses.
+
+        **Inputs**
+
+        - **Multiple alignment files**: Select 2 or more pre-aligned sequence files (FASTA, PHYLIP, or NEXUS format)
+        - **Input format**: Specify the format of your input files
+        - **Partition format**: Specify how you want the partition file to be formatted (Unspecified, RAxML, NEXUS)
+        - **Data type**: Choose DNA for nucleotide sequences or Protein for amino acid sequences
+        - **Output format**: Select the desired format for the concatenated alignment
+
+        **Outputs**
+
+        1. **Concatenated alignment**: A single file containing all input alignments joined end-to-end
+        2. **Partitions file**: Defines the boundaries of each original alignment within the concatenated file
+
+        @PARTITIONS_HELP@
+
+        **Use cases**
+
+        - **Multi-locus phylogenomics**: Combine hundreds of genes for species tree inference
+        - **Partitioned phylogenetic analysis**: Apply different evolutionary models to different genes using tools like RAxML or IQ-TREE
+        - **Supermatrix construction**: Create dataset for concatenation-based phylogenetic methods
+        - **Increased phylogenetic signal**: Leverage information from multiple loci to resolve difficult nodes
+        - **Comparative analyses**: Prepare datasets for testing hypotheses across multiple genomic regions
+
+        @AMAS_SHARED_HELP@
+    ]]></help>
+
+    <expand macro="citations" />
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/check_interleaved.py	Tue Dec 02 09:27:37 2025 +0000
@@ -0,0 +1,112 @@
+"""
+Helper script to check if AMAS input files are interleaved.
+"""
+import argparse
+import re
+import sys
+
+
+def check_phylip_interleaved(filepath):
+    """Check if PHYLIP file is interleaved."""
+    with open(filepath, encoding='utf-8') as f:
+        # First line is header: ntax nchar
+        header = next(f).strip().split()
+        ntax = int(header[0])
+
+        for idx, line in enumerate(f, 1):
+            if line.strip():
+                if idx > ntax:
+                    return True
+
+        return False
+
+
+def check_nexus_interleaved(filepath):
+    """Check if NEXUS file is interleaved."""
+    in_data_block = False
+    in_matrix = False
+    ntax = None
+    seq_lines = 0
+
+    with open(filepath, encoding='utf-8') as f:
+        for line in f:
+            content = line.strip().lower()
+
+            if not content:
+                continue
+
+            if in_matrix:
+                if content == 'end;':
+                    return seq_lines != ntax if ntax else False
+
+                if content != ';':
+                    seq_lines += 1
+                    if ntax and seq_lines > ntax:
+                        return True
+                continue
+
+            if not in_data_block:
+                if content.startswith('begin'):
+                    words = content.split()
+                    if len(words) > 1 and (
+                            words[1].startswith('data')
+                            or words[1].startswith('characters')):
+                        in_data_block = True
+                continue
+
+            if content.startswith('dimensions') and ntax is None:
+                match = re.search(r'ntax=(\d+)', content)
+                if match:
+                    ntax = int(match.group(1))
+
+            elif content.startswith('format'):
+                if re.search(r'\binterleave(?:;|=yes;?)?\b', content):
+                    return True
+
+            elif content.startswith('matrix'):
+                in_matrix = True
+
+    return False
+
+
+def check_fasta_interleaved(filepath):
+    """FASTA files are not interleaved."""
+    return False
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Check if AMAS input files are interleaved'
+    )
+    parser.add_argument('input_files', nargs='+', help='Input sequence files')
+    parser.add_argument('--format', required=True,
+                        choices=['fasta', 'phylip', 'nexus'],
+                        help='Input format')
+
+    args = parser.parse_args()
+
+    interleaved_status = []
+    for filepath in args.input_files:
+        if args.format == 'phylip':
+            is_interleaved = check_phylip_interleaved(filepath)
+        elif args.format == 'nexus':
+            is_interleaved = check_nexus_interleaved(filepath)
+        else:
+            is_interleaved = check_fasta_interleaved(filepath)
+
+        interleaved_status.append(is_interleaved)
+
+    interleaved_status = list(set(interleaved_status))
+    if len(interleaved_status) > 1:
+        raise Exception("Error: Input files are a mix of interleaved/sequential formats")
+
+    if interleaved_status[0]:
+        print(f"{args.format}-int")
+    else:
+        print(args.format)
+
+    return 0
+
+
+if __name__ == '__main__':
+    sys.exit(main())
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Tue Dec 02 09:27:37 2025 +0000
@@ -0,0 +1,173 @@
+<macros>
+    <token name="@TOOL_VERSION@">1.0</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">25.0</token>
+
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">amas</requirement>
+        </requirements>
+    </xml>
+
+    <xml name="version_command">
+        <version_command>python -c "import amas; print(amas.__version__)"</version_command>
+    </xml>
+
+    <token name="@SNIFF_INPUT_FORMAT@"><![CDATA[
+        #set $in_format = $input_files[0].ext
+        #if $in_format == 'nex'
+            #set $in_format = 'nexus'
+        #end if
+    ]]></token>
+
+    <token name="@CHECK_INTERLEAVED@"><![CDATA[
+        ## Check if inputs are interleaved
+        IN_FORMAT=\$(python '$__tool_directory__/check_interleaved.py' 
+            #for $f in $input_files
+                '${f}'
+            #end for
+            --format '${in_format}') &&
+    ]]></token>
+
+    <token name="@SYMLINK_INPUTS@"><![CDATA[
+        ## Create symlinks with original filename for consistent tests
+        #for $f in $input_files
+            #set $safename_input = re.sub('[^\w\-_\.]', '_', $f.element_identifier)
+            ln -s '${f}' '${safename_input}';
+        #end for
+    ]]></token>
+
+    <token name="@INPUT_FILENAMES@"><![CDATA[
+        #for $f in $input_files
+            #set $safename_input = re.sub('[^\w\-_\.]', '_', $f.element_identifier)
+            '${safename_input}'
+        #end for
+    ]]></token>
+
+    <xml name="output_format" token_name="out_format" token_label="Format of the output file">
+        <param name="out_format" type="select" label="@LABEL@">
+            <option value="fasta">fasta</option>
+            <option value="phylip">phylip (sequential)</option>
+            <option value="phylip-int">phylip (interleaved)</option>
+            <option value="nexus">nexus (sequential)</option>
+            <option value="nexus-int">nexus (interleaved)</option>
+        </param>
+    </xml>
+
+    <xml name="data_type">
+        <param name="data_type" type="select" label="Data type">
+            <option value="aa">Protein alignments</option>
+            <option value="dna">Nucleotide alignments</option>
+        </param>
+    </xml>
+
+    <xml name="check_align">
+        <param argument="--check-align" type="boolean" label="Check if input sequences are aligned" checked="false" truevalue="--check-align" falsevalue="" />
+    </xml>
+
+    <!-- Galaxy doesn't currently detect whether PHYLIP or NEXUS format is interleaved/sequential; if implemented update here and assoc in subcommands -->
+    <xml name="collection_outputs" token_name="alignments" token_label="alignment files">
+        <collection name="@NAME@_fasta" type="list" label="${tool.name} on ${on_string}: fasta">
+            <discover_datasets pattern="(?P&lt;name&gt;.+-out\..+)" format="fasta" />
+            <filter>out_format == "fasta"</filter>
+        </collection>
+        
+        <collection name="@NAME@_phylip" type="list" label="${tool.name} on ${on_string}: phylip">
+            <discover_datasets pattern="(?P&lt;name&gt;.+-out\..+)" format="phylip" />
+            <filter>out_format == "phylip" or out_format == "phylip-int"</filter>
+        </collection>
+        
+        <collection name="@NAME@_nexus" type="list" label="${tool.name} on ${on_string}: nexus">
+            <discover_datasets pattern="(?P&lt;name&gt;.+-out\..+)" format="nex" />
+            <filter>out_format == "nexus" or out_format == "nexus-int"</filter>
+        </collection>
+    </xml>
+
+    <token name="@PARTITIONS_HELP@"><![CDATA[
+        **What is a partitions file?**
+
+        The partitions file maps each gene/locus to its position in the concatenated alignment. This is essential for downstream phylogenetic analyses (e.g., RAxML, IQ-TREE) that can apply different evolutionary models to different partitions.
+
+        **Example:**
+
+        If you concatenate three genes::
+
+            gene1.fasta (500 bp)
+            gene2.fasta (700 bp)  
+            gene3.fasta (400 bp)
+
+        The partitions file (unspecified format) will contain::
+
+            gene1 = 1-500
+            gene2 = 501-1200
+            gene3 = 1201-1600
+
+        **Partition formats:**
+
+        - **Unspecified**
+        
+        ::
+
+            gene1 = 1-500
+            gene2 = 501-1200
+
+        - **RAxML**
+        
+        ::
+
+            DNA, gene1 = 1-500
+            DNA, gene2 = 501-1200
+
+        - **NEXUS**
+        
+        ::
+
+            #NEXUS
+
+            Begin sets;
+                charset gene1 = 1-500;
+                charset gene2 = 501-1200;
+            End;
+    ]]></token>
+
+    <token name="@AMAS_SHARED_HELP@"><![CDATA[
+        **Sequential vs Interleaved Phylip Format**
+
+        - **Sequential**: Each complete sequence is written in order, one after another. Easier for programmatic parsing.
+
+        ::
+
+            4 60
+            Seq1    ATGCATGCATATGCATGCATATGCATGCAT...
+            Seq2    ATGCATGCATATGCATGCATATGCATGCAT...
+            Seq3    ATGCATGCATATGCATGCATATGCATGCAT...
+            Seq4    ATGCATGCATATGCATGCATATGCATGCAT...
+
+        - **Interleaved**: Sequences are written in aligned blocks, making it easier to visually compare positions across sequences.
+
+        ::
+
+            4 60
+            Seq1    ATGCATGCATATGCATGCAT
+            Seq2    ATGCATGCATATGCATGCAT
+            Seq3    ATGCATGCATATGCATGCAT
+            Seq4    ATGCATGCATATGCATGCAT
+
+            Seq1    ATGCATGCAT...
+            Seq2    ATGCATGCAT...
+            Seq3    ATGCATGCAT...
+            Seq4    ATGCATGCAT...
+
+        **About AMAS**
+
+        AMAS (Alignment manipulation and summary statistics) is designed for modern phylogenomics workflows involving hundreds of taxa and thousands of loci.
+
+        Source code and manual: https://github.com/marekborowiec/AMAS
+    ]]></token>
+
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.7717/peerj.1660</citation>
+        </citations>
+    </xml>
+</macros>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/inputs/concat_1.fasta	Tue Dec 02 09:27:37 2025 +0000
@@ -0,0 +1,6 @@
+>OTU1
+?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+>OTU2
+ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+>OTU10
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/inputs/concat_2.fasta	Tue Dec 02 09:27:37 2025 +0000
@@ -0,0 +1,6 @@
+>OTU1
+?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+>OTU2
+ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+>OTU10
+TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/inputs/concat_int_1.nex	Tue Dec 02 09:27:37 2025 +0000
@@ -0,0 +1,22 @@
+#NEXUS
+
+BEGIN DATA;
+	DIMENSIONS  NTAX=3 NCHAR=300;
+	FORMAT   INTERLEAVE   DATATYPE=DNA  GAP = - MISSING = ?;
+	MATRIX
+Taxon_A    ATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCAT
+Taxon_B    GCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGT
+Taxon_C    TTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCT
+
+Taxon_A    ATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCAT
+Taxon_B    GCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGT
+Taxon_C    TTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCT
+
+Taxon_A    ATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCAT
+Taxon_B    GCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGT
+Taxon_C    TTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCT
+
+
+;
+
+END;
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/inputs/concat_int_1.phylip	Tue Dec 02 09:27:37 2025 +0000
@@ -0,0 +1,12 @@
+3 300
+Taxon_A    ATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCAT
+Taxon_B    GCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGT
+Taxon_C    TTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCT
+
+ATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCAT
+GCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGT
+TTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCT
+
+ATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCAT
+GCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGT
+TTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCT
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/inputs/concat_int_2.nex	Tue Dec 02 09:27:37 2025 +0000
@@ -0,0 +1,19 @@
+#NEXUS
+
+Begin data;
+	Dimensions Nchar=300 Ntax=3;
+	Format  Datatype=DNA  Gap = - Missing = ? Interleave=yes;
+	matrix
+Taxon_A    AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC
+Taxon_B    GGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAA
+Taxon_C    TTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGG
+
+Taxon_A    GGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTT
+Taxon_B    CCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTT
+Taxon_C    AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC
+
+Taxon_A    AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC
+Taxon_B    GGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAA
+Taxon_C    TTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGG
+;
+End;
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/inputs/concat_int_2.phylip	Tue Dec 02 09:27:37 2025 +0000
@@ -0,0 +1,12 @@
+3 300
+Taxon_A    AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC
+Taxon_B    GGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAA
+Taxon_C    TTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGG
+
+GGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTT
+CCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTT
+AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC
+
+AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC
+GGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAA
+TTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGG
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/inputs/concat_int_3.nex	Tue Dec 02 09:27:37 2025 +0000
@@ -0,0 +1,16 @@
+#NEXUS
+
+begin data;
+	dimensions nchar=200 ntax=3;
+	format  datatype=DNA  gap = - missing = ?;
+	matrix
+Taxon_A    AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC
+Taxon_B    GGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAA
+Taxon_C    TTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGG
+
+Taxon_A    GGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTT
+Taxon_B    CCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTT
+Taxon_C    AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC
+
+;
+end;
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/inputs/concat_result.phylip	Tue Dec 02 09:27:37 2025 +0000
@@ -0,0 +1,4 @@
+3 200
+OTU1    ?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+OTU10   AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
+OTU2    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCTACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/inputs/fasta1.fas	Tue Dec 02 09:27:37 2025 +0000
@@ -0,0 +1,20 @@
+>OTU1
+?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+>OTU2
+ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+>OTU3
+ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+>OTU4
+ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+>OTU5
+ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+>OTU6
+ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+>OTU7
+ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+>OTU8
+ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+>OTU9
+ACACATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+>OTU10
+ACACATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/inputs/partitions_concat.nex	Tue Dec 02 09:27:37 2025 +0000
@@ -0,0 +1,6 @@
+#NEXUS
+
+Begin sets;
+	charset p1_concat_1 = 1-100;
+	charset p2_concat_2 = 101-200;
+End;
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/inputs/partitions_concat_unspecified.txt	Tue Dec 02 09:27:37 2025 +0000
@@ -0,0 +1,2 @@
+p1_concat_1 = 1-100
+p2_concat_2 = 101-200
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/inputs/remove_input.nex	Tue Dec 02 09:27:37 2025 +0000
@@ -0,0 +1,20 @@
+#NEXUS
+
+BEGIN DATA;
+	DIMENSIONS  NTAX=10 NCHAR=100;
+	FORMAT DATATYPE=DNA  GAP = - MISSING = ?;
+	MATRIX
+	OTU1    ?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU10   ACACATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU2    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU3    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU4    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU5    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU6    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU7    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU8    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU9    ACACATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+
+;
+
+END;
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outputs/expected_concat.phylip	Tue Dec 02 09:27:37 2025 +0000
@@ -0,0 +1,4 @@
+3 200
+OTU1    ?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+OTU10   AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
+OTU2    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCTACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outputs/expected_concat_fasta.fas	Tue Dec 02 09:27:37 2025 +0000
@@ -0,0 +1,12 @@
+>OTU1
+?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCA
+ACTGCTTAGCTCCACTCGCT?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTC
+CAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+>OTU10
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+AAAAAAAAAAAAAAAAAAAATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
+TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
+>OTU2
+ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCA
+ACTGCTTAGCTCCACTCGCTACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTC
+CAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outputs/expected_concat_int.nex	Tue Dec 02 09:27:37 2025 +0000
@@ -0,0 +1,18 @@
+#NEXUS
+
+BEGIN DATA;
+	DIMENSIONS  NTAX=3 NCHAR=600;
+	FORMAT   INTERLEAVE   DATATYPE=DNA  GAP = - MISSING = ?;
+	MATRIX
+Taxon_A   ATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTT
+Taxon_B   GCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAACCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTT
+Taxon_C   TTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC
+
+AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC
+GGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAA
+TTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGG
+
+
+;
+
+END;
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outputs/expected_concat_int_multi.nex	Tue Dec 02 09:27:37 2025 +0000
@@ -0,0 +1,18 @@
+#NEXUS
+
+BEGIN DATA;
+	DIMENSIONS  NTAX=3 NCHAR=800;
+	FORMAT   INTERLEAVE   DATATYPE=DNA  GAP = - MISSING = ?;
+	MATRIX
+Taxon_A   ATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTT
+Taxon_B   GCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAACCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTT
+Taxon_C   TTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC
+
+AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTT
+GGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAACCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTT
+TTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC
+
+
+;
+
+END;
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outputs/expected_partitions.nex	Tue Dec 02 09:27:37 2025 +0000
@@ -0,0 +1,6 @@
+#NEXUS
+
+Begin sets;
+	charset p1_concat_1 = 1-100;
+	charset p2_concat_2 = 101-200;
+End;
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outputs/expected_partitions_int.txt	Tue Dec 02 09:27:37 2025 +0000
@@ -0,0 +1,2 @@
+p1_concat_int_1 = 1-300
+p2_concat_int_2 = 301-600
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outputs/expected_partitions_int_multi.txt	Tue Dec 02 09:27:37 2025 +0000
@@ -0,0 +1,3 @@
+p1_concat_int_1 = 1-300
+p2_concat_int_2 = 301-600
+p3_concat_int_3 = 601-800
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outputs/expected_partitions_raxml.txt	Tue Dec 02 09:27:37 2025 +0000
@@ -0,0 +1,2 @@
+DNA, p1_concat_1 = 1-100
+DNA, p2_concat_2 = 101-200
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outputs/expected_remove_filtered.int-nex	Tue Dec 02 09:27:37 2025 +0000
@@ -0,0 +1,19 @@
+#NEXUS
+
+BEGIN DATA;
+	DIMENSIONS  NTAX=8 NCHAR=100;
+	FORMAT   INTERLEAVE   DATATYPE=DNA  GAP = - MISSING = ?;
+	MATRIX
+OTU1   ?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+OTU2   ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+OTU3   ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+OTU4   ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+OTU5   ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+OTU6   ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+OTU7   ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+OTU8   ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+
+
+;
+
+END;
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outputs/expected_replicate1.nex	Tue Dec 02 09:27:37 2025 +0000
@@ -0,0 +1,20 @@
+#NEXUS
+
+BEGIN DATA;
+	DIMENSIONS  NTAX=10 NCHAR=100;
+	FORMAT DATATYPE=DNA  GAP = - MISSING = ?;
+	MATRIX
+	OTU1    ?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU10   ACACATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU2    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU3    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU4    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU5    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU6    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU7    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU8    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU9    ACACATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+
+;
+
+END;
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outputs/expected_replicate2.nex	Tue Dec 02 09:27:37 2025 +0000
@@ -0,0 +1,20 @@
+#NEXUS
+
+BEGIN DATA;
+	DIMENSIONS  NTAX=10 NCHAR=100;
+	FORMAT DATATYPE=DNA  GAP = - MISSING = ?;
+	MATRIX
+	OTU1    ?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU10   ACACATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU2    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU3    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU4    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU5    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU6    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU7    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU8    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU9    ACACATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+
+;
+
+END;
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outputs/expected_split_partition1.fas	Tue Dec 02 09:27:37 2025 +0000
@@ -0,0 +1,9 @@
+>OTU1
+?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCA
+ACTGCTTAGCTCCACTCGCT
+>OTU10
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+AAAAAAAAAAAAAAAAAAAA
+>OTU2
+ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCA
+ACTGCTTAGCTCCACTCGCT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outputs/expected_split_partition2.fas	Tue Dec 02 09:27:37 2025 +0000
@@ -0,0 +1,9 @@
+>OTU1
+?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCA
+ACTGCTTAGCTCCACTCGCT
+>OTU10
+TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
+TTTTTTTTTTTTTTTTTTTT
+>OTU2
+ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCA
+ACTGCTTAGCTCCACTCGCT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outputs/expected_summary.txt	Tue Dec 02 09:27:37 2025 +0000
@@ -0,0 +1,2 @@
+Alignment_name	No_of_taxa	Alignment_length	Total_matrix_cells	Undetermined_characters	Missing_percent	No_variable_sites	Proportion_variable_sites	Parsimony_informative_sites	Proportion_parsimony_informative	AT_content	GC_content	A	C	G	T	K	M	R	Y	S	W	B	V	H	D	X	N	O	-	?
+fasta1.fas	10	100	1000	1	0.1	2	0.02	1	0.01	0.543	0.457	262	297	160	280	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outputs/expected_taxa_summary.txt	Tue Dec 02 09:27:37 2025 +0000
@@ -0,0 +1,11 @@
+Alignment_name	Taxon_name	Sequence_length	Undetermined_characters	Missing_percent	AT_content	GC_content	A	C	G	T	K	M	R	Y	S	W	B	V	H	D	X	N	O	-	?
+fasta1.fas	OTU1	100	1	1.0	0.545	0.455	26	29	16	28	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1
+fasta1.fas	OTU10	100	0	0.0	0.55	0.45	27	29	16	28	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+fasta1.fas	OTU2	100	0	0.0	0.54	0.46	26	30	16	28	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+fasta1.fas	OTU3	100	0	0.0	0.54	0.46	26	30	16	28	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+fasta1.fas	OTU4	100	0	0.0	0.54	0.46	26	30	16	28	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+fasta1.fas	OTU5	100	0	0.0	0.54	0.46	26	30	16	28	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+fasta1.fas	OTU6	100	0	0.0	0.54	0.46	26	30	16	28	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+fasta1.fas	OTU7	100	0	0.0	0.54	0.46	26	30	16	28	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+fasta1.fas	OTU8	100	0	0.0	0.54	0.46	26	30	16	28	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+fasta1.fas	OTU9	100	0	0.0	0.55	0.45	27	29	16	28	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
\ No newline at end of file