changeset 0:e2e756484892 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/amas commit 158ec0e635067d354c425baf14b95cb616fd93c4
author iuc
date Tue, 02 Dec 2025 09:28:02 +0000
parents
children
files amas_split.xml check_interleaved.py macros.xml test-data/inputs/concat_1.fasta test-data/inputs/concat_2.fasta test-data/inputs/concat_int_1.nex test-data/inputs/concat_int_1.phylip test-data/inputs/concat_int_2.nex test-data/inputs/concat_int_2.phylip test-data/inputs/concat_int_3.nex test-data/inputs/concat_result.phylip test-data/inputs/fasta1.fas test-data/inputs/partitions_concat.nex test-data/inputs/partitions_concat_unspecified.txt test-data/inputs/remove_input.nex test-data/outputs/expected_concat.phylip test-data/outputs/expected_concat_fasta.fas test-data/outputs/expected_concat_int.nex test-data/outputs/expected_concat_int_multi.nex test-data/outputs/expected_partitions.nex test-data/outputs/expected_partitions_int.txt test-data/outputs/expected_partitions_int_multi.txt test-data/outputs/expected_partitions_raxml.txt test-data/outputs/expected_remove_filtered.int-nex test-data/outputs/expected_replicate1.nex test-data/outputs/expected_replicate2.nex test-data/outputs/expected_split_partition1.fas test-data/outputs/expected_split_partition2.fas test-data/outputs/expected_summary.txt test-data/outputs/expected_taxa_summary.txt
diffstat 30 files changed, 697 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/amas_split.xml	Tue Dec 02 09:28:02 2025 +0000
@@ -0,0 +1,112 @@
+<tool id="amas_split" name="AMAS split" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>split multiple alignments</description>
+    
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+
+    <xrefs>
+        <xref type="bio.tools">amas</xref>
+    </xrefs>
+
+    <expand macro="requirements" />
+    <expand macro="version_command" />
+
+    <command detect_errors="exit_code"><![CDATA[
+        #import re
+        set -eu;
+
+        ## Let galaxy sniff input format
+        #set $in_format = $input_file.ext
+        #if $in_format == 'nex'
+            #set $in_format = 'nexus'
+        #end if
+
+        ## Check if inputs are interleaved
+        IN_FORMAT=\$(python '$__tool_directory__/check_interleaved.py' 
+            '${input_file}'
+            --format '${in_format}') &&
+
+        ## Create symlinks with original filename for consistent tests because
+        ##  input filenames are used as str vars
+        #set $safename_input = re.sub('[^\w\-_\.]', '_', $input_file.element_identifier)
+        ln -s '${input_file}' '${safename_input}';
+
+        python -m amas.AMAS
+        split
+        --split-by $split_by
+        $remove_empty
+        --out-format $out_format
+        --in-files $safename_input
+        --in-format "\${IN_FORMAT}"
+        --data-type $data_type
+        --cores "\${GALAXY_SLOTS:-1}"
+        $check_align
+    ]]></command>
+
+    <inputs>
+        <param name="input_file" type="data" format="fasta,phylip,nex" label="Sequence to split" multiple="false" help="Provide pre-aligned FASTA/PHYLIP/NEXUS file (DNA or protein); mixes of unaligned reads or contigs will produce meaningless results." />
+        <expand macro="output_format" label="Select output format for split alignments" />
+        <!-- If amas updates split to handle NEXUS format include nex format here -->
+        <param name="split_by" type="data" format="txt" label="Partitions file for splitting. Note: needs to be a partions file in the Unspecified format (See help section for more information)" 
+               help="A file defining how to split the concatenated alignment into separate gene/locus regions. Each line specifies a partition name and its position range (e.g., 'gene1 = 1-500' for unspecified format). See the help section for more information about partitions." />
+        <param argument="--remove-empty" type="boolean" label="Remove taxa that are entirely missing within a partition" checked="false" truevalue="--remove-empty" falsevalue="" />
+        <expand macro="data_type" />
+        <expand macro="check_align" />
+    </inputs>
+
+    <outputs>
+       <expand macro="collection_outputs" name="split_alignments" />
+    </outputs>
+
+    <tests>
+        <test expect_num_outputs="1">
+            <param name="input_file" value="inputs/concat_result.phylip" />
+            <param name="split_by" value="inputs/partitions_concat_unspecified.txt" />
+            <param name="remove_empty" value="true" />
+            <param name="out_format" value="fasta" />
+            <param name="data_type" value="dna" />
+            <param name="check_align" value="false" />
+            <output_collection name="split_alignments_fasta" type="list">
+                <element name="concat_result_p1_concat_1-out.fas" file="outputs/expected_split_partition1.fas" ftype="fasta" />
+                <element name="concat_result_p2_concat_2-out.fas" file="outputs/expected_split_partition2.fas" ftype="fasta" />
+            </output_collection>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+        **What it does**
+
+        AMAS Split divides a concatenated alignment back into separate gene/locus files using a partitions file. This is the reverse operation of AMAS Concat.
+
+        **Inputs**
+
+        - **Concatenated alignment**: A single alignment file containing multiple genes/loci joined end-to-end
+        - **Partitions file**: Defines the boundaries of each gene/locus (.txt file containing Unspecified formatting)
+        - **Input format**: Specify the format of your concatenated alignment
+        - **Data type**: Choose DNA for nucleotide sequences or Protein for amino acid sequences
+        - **Output format**: Select the desired format for the split alignment files
+        - **Remove empty sequences**: Optionally exclude taxa with only gaps/missing data in a partition
+
+        **Outputs**
+
+        A collection of alignment files, one per partition/gene defined in your partitions file.
+
+        @PARTITIONS_HELP@
+
+        **IMPORTANT**: A .txt file containing RAxML, or NEXUS formatting, or a .nex file containing NEXUS formatting that are produced using AMAS Concat will not work.
+
+        **Tip:** An example for your data can be generated using the AMAS concat tool.
+
+        **Use cases**
+
+        - Extract individual gene alignments from a concatenated dataset
+        - Analyze genes separately after joint phylogenetic analysis
+        - Apply gene-specific filtering or trimming
+        - Recover original locus alignments from published concatenated datasets
+
+        @AMAS_SHARED_HELP@
+    ]]></help>
+
+    <expand macro="citations" />
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/check_interleaved.py	Tue Dec 02 09:28:02 2025 +0000
@@ -0,0 +1,112 @@
+"""
+Helper script to check if AMAS input files are interleaved.
+"""
+import argparse
+import re
+import sys
+
+
+def check_phylip_interleaved(filepath):
+    """Check if PHYLIP file is interleaved."""
+    with open(filepath, encoding='utf-8') as f:
+        # First line is header: ntax nchar
+        header = next(f).strip().split()
+        ntax = int(header[0])
+
+        for idx, line in enumerate(f, 1):
+            if line.strip():
+                if idx > ntax:
+                    return True
+
+        return False
+
+
+def check_nexus_interleaved(filepath):
+    """Check if NEXUS file is interleaved."""
+    in_data_block = False
+    in_matrix = False
+    ntax = None
+    seq_lines = 0
+
+    with open(filepath, encoding='utf-8') as f:
+        for line in f:
+            content = line.strip().lower()
+
+            if not content:
+                continue
+
+            if in_matrix:
+                if content == 'end;':
+                    return seq_lines != ntax if ntax else False
+
+                if content != ';':
+                    seq_lines += 1
+                    if ntax and seq_lines > ntax:
+                        return True
+                continue
+
+            if not in_data_block:
+                if content.startswith('begin'):
+                    words = content.split()
+                    if len(words) > 1 and (
+                            words[1].startswith('data')
+                            or words[1].startswith('characters')):
+                        in_data_block = True
+                continue
+
+            if content.startswith('dimensions') and ntax is None:
+                match = re.search(r'ntax=(\d+)', content)
+                if match:
+                    ntax = int(match.group(1))
+
+            elif content.startswith('format'):
+                if re.search(r'\binterleave(?:;|=yes;?)?\b', content):
+                    return True
+
+            elif content.startswith('matrix'):
+                in_matrix = True
+
+    return False
+
+
+def check_fasta_interleaved(filepath):
+    """FASTA files are not interleaved."""
+    return False
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Check if AMAS input files are interleaved'
+    )
+    parser.add_argument('input_files', nargs='+', help='Input sequence files')
+    parser.add_argument('--format', required=True,
+                        choices=['fasta', 'phylip', 'nexus'],
+                        help='Input format')
+
+    args = parser.parse_args()
+
+    interleaved_status = []
+    for filepath in args.input_files:
+        if args.format == 'phylip':
+            is_interleaved = check_phylip_interleaved(filepath)
+        elif args.format == 'nexus':
+            is_interleaved = check_nexus_interleaved(filepath)
+        else:
+            is_interleaved = check_fasta_interleaved(filepath)
+
+        interleaved_status.append(is_interleaved)
+
+    interleaved_status = list(set(interleaved_status))
+    if len(interleaved_status) > 1:
+        raise Exception("Error: Input files are a mix of interleaved/sequential formats")
+
+    if interleaved_status[0]:
+        print(f"{args.format}-int")
+    else:
+        print(args.format)
+
+    return 0
+
+
+if __name__ == '__main__':
+    sys.exit(main())
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Tue Dec 02 09:28:02 2025 +0000
@@ -0,0 +1,173 @@
+<macros>
+    <token name="@TOOL_VERSION@">1.0</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">25.0</token>
+
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">amas</requirement>
+        </requirements>
+    </xml>
+
+    <xml name="version_command">
+        <version_command>python -c "import amas; print(amas.__version__)"</version_command>
+    </xml>
+
+    <token name="@SNIFF_INPUT_FORMAT@"><![CDATA[
+        #set $in_format = $input_files[0].ext
+        #if $in_format == 'nex'
+            #set $in_format = 'nexus'
+        #end if
+    ]]></token>
+
+    <token name="@CHECK_INTERLEAVED@"><![CDATA[
+        ## Check if inputs are interleaved
+        IN_FORMAT=\$(python '$__tool_directory__/check_interleaved.py' 
+            #for $f in $input_files
+                '${f}'
+            #end for
+            --format '${in_format}') &&
+    ]]></token>
+
+    <token name="@SYMLINK_INPUTS@"><![CDATA[
+        ## Create symlinks with original filename for consistent tests
+        #for $f in $input_files
+            #set $safename_input = re.sub('[^\w\-_\.]', '_', $f.element_identifier)
+            ln -s '${f}' '${safename_input}';
+        #end for
+    ]]></token>
+
+    <token name="@INPUT_FILENAMES@"><![CDATA[
+        #for $f in $input_files
+            #set $safename_input = re.sub('[^\w\-_\.]', '_', $f.element_identifier)
+            '${safename_input}'
+        #end for
+    ]]></token>
+
+    <xml name="output_format" token_name="out_format" token_label="Format of the output file">
+        <param name="out_format" type="select" label="@LABEL@">
+            <option value="fasta">fasta</option>
+            <option value="phylip">phylip (sequential)</option>
+            <option value="phylip-int">phylip (interleaved)</option>
+            <option value="nexus">nexus (sequential)</option>
+            <option value="nexus-int">nexus (interleaved)</option>
+        </param>
+    </xml>
+
+    <xml name="data_type">
+        <param name="data_type" type="select" label="Data type">
+            <option value="aa">Protein alignments</option>
+            <option value="dna">Nucleotide alignments</option>
+        </param>
+    </xml>
+
+    <xml name="check_align">
+        <param argument="--check-align" type="boolean" label="Check if input sequences are aligned" checked="false" truevalue="--check-align" falsevalue="" />
+    </xml>
+
+    <!-- Galaxy doesn't currently detect whether PHYLIP or NEXUS format is interleaved/sequential; if implemented update here and assoc in subcommands -->
+    <xml name="collection_outputs" token_name="alignments" token_label="alignment files">
+        <collection name="@NAME@_fasta" type="list" label="${tool.name} on ${on_string}: fasta">
+            <discover_datasets pattern="(?P&lt;name&gt;.+-out\..+)" format="fasta" />
+            <filter>out_format == "fasta"</filter>
+        </collection>
+        
+        <collection name="@NAME@_phylip" type="list" label="${tool.name} on ${on_string}: phylip">
+            <discover_datasets pattern="(?P&lt;name&gt;.+-out\..+)" format="phylip" />
+            <filter>out_format == "phylip" or out_format == "phylip-int"</filter>
+        </collection>
+        
+        <collection name="@NAME@_nexus" type="list" label="${tool.name} on ${on_string}: nexus">
+            <discover_datasets pattern="(?P&lt;name&gt;.+-out\..+)" format="nex" />
+            <filter>out_format == "nexus" or out_format == "nexus-int"</filter>
+        </collection>
+    </xml>
+
+    <token name="@PARTITIONS_HELP@"><![CDATA[
+        **What is a partitions file?**
+
+        The partitions file maps each gene/locus to its position in the concatenated alignment. This is essential for downstream phylogenetic analyses (e.g., RAxML, IQ-TREE) that can apply different evolutionary models to different partitions.
+
+        **Example:**
+
+        If you concatenate three genes::
+
+            gene1.fasta (500 bp)
+            gene2.fasta (700 bp)  
+            gene3.fasta (400 bp)
+
+        The partitions file (unspecified format) will contain::
+
+            gene1 = 1-500
+            gene2 = 501-1200
+            gene3 = 1201-1600
+
+        **Partition formats:**
+
+        - **Unspecified**
+        
+        ::
+
+            gene1 = 1-500
+            gene2 = 501-1200
+
+        - **RAxML**
+        
+        ::
+
+            DNA, gene1 = 1-500
+            DNA, gene2 = 501-1200
+
+        - **NEXUS**
+        
+        ::
+
+            #NEXUS
+
+            Begin sets;
+                charset gene1 = 1-500;
+                charset gene2 = 501-1200;
+            End;
+    ]]></token>
+
+    <token name="@AMAS_SHARED_HELP@"><![CDATA[
+        **Sequential vs Interleaved Phylip Format**
+
+        - **Sequential**: Each complete sequence is written in order, one after another. Easier for programmatic parsing.
+
+        ::
+
+            4 60
+            Seq1    ATGCATGCATATGCATGCATATGCATGCAT...
+            Seq2    ATGCATGCATATGCATGCATATGCATGCAT...
+            Seq3    ATGCATGCATATGCATGCATATGCATGCAT...
+            Seq4    ATGCATGCATATGCATGCATATGCATGCAT...
+
+        - **Interleaved**: Sequences are written in aligned blocks, making it easier to visually compare positions across sequences.
+
+        ::
+
+            4 60
+            Seq1    ATGCATGCATATGCATGCAT
+            Seq2    ATGCATGCATATGCATGCAT
+            Seq3    ATGCATGCATATGCATGCAT
+            Seq4    ATGCATGCATATGCATGCAT
+
+            Seq1    ATGCATGCAT...
+            Seq2    ATGCATGCAT...
+            Seq3    ATGCATGCAT...
+            Seq4    ATGCATGCAT...
+
+        **About AMAS**
+
+        AMAS (Alignment manipulation and summary statistics) is designed for modern phylogenomics workflows involving hundreds of taxa and thousands of loci.
+
+        Source code and manual: https://github.com/marekborowiec/AMAS
+    ]]></token>
+
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.7717/peerj.1660</citation>
+        </citations>
+    </xml>
+</macros>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/inputs/concat_1.fasta	Tue Dec 02 09:28:02 2025 +0000
@@ -0,0 +1,6 @@
+>OTU1
+?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+>OTU2
+ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+>OTU10
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/inputs/concat_2.fasta	Tue Dec 02 09:28:02 2025 +0000
@@ -0,0 +1,6 @@
+>OTU1
+?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+>OTU2
+ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+>OTU10
+TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/inputs/concat_int_1.nex	Tue Dec 02 09:28:02 2025 +0000
@@ -0,0 +1,22 @@
+#NEXUS
+
+BEGIN DATA;
+	DIMENSIONS  NTAX=3 NCHAR=300;
+	FORMAT   INTERLEAVE   DATATYPE=DNA  GAP = - MISSING = ?;
+	MATRIX
+Taxon_A    ATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCAT
+Taxon_B    GCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGT
+Taxon_C    TTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCT
+
+Taxon_A    ATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCAT
+Taxon_B    GCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGT
+Taxon_C    TTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCT
+
+Taxon_A    ATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCAT
+Taxon_B    GCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGT
+Taxon_C    TTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCT
+
+
+;
+
+END;
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/inputs/concat_int_1.phylip	Tue Dec 02 09:28:02 2025 +0000
@@ -0,0 +1,12 @@
+3 300
+Taxon_A    ATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCAT
+Taxon_B    GCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGT
+Taxon_C    TTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCT
+
+ATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCAT
+GCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGT
+TTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCT
+
+ATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCAT
+GCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGT
+TTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCT
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/inputs/concat_int_2.nex	Tue Dec 02 09:28:02 2025 +0000
@@ -0,0 +1,19 @@
+#NEXUS
+
+Begin data;
+	Dimensions Nchar=300 Ntax=3;
+	Format  Datatype=DNA  Gap = - Missing = ? Interleave=yes;
+	matrix
+Taxon_A    AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC
+Taxon_B    GGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAA
+Taxon_C    TTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGG
+
+Taxon_A    GGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTT
+Taxon_B    CCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTT
+Taxon_C    AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC
+
+Taxon_A    AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC
+Taxon_B    GGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAA
+Taxon_C    TTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGG
+;
+End;
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/inputs/concat_int_2.phylip	Tue Dec 02 09:28:02 2025 +0000
@@ -0,0 +1,12 @@
+3 300
+Taxon_A    AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC
+Taxon_B    GGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAA
+Taxon_C    TTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGG
+
+GGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTT
+CCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTT
+AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC
+
+AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC
+GGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAA
+TTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGG
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/inputs/concat_int_3.nex	Tue Dec 02 09:28:02 2025 +0000
@@ -0,0 +1,16 @@
+#NEXUS
+
+begin data;
+	dimensions nchar=200 ntax=3;
+	format  datatype=DNA  gap = - missing = ?;
+	matrix
+Taxon_A    AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC
+Taxon_B    GGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAA
+Taxon_C    TTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGG
+
+Taxon_A    GGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTT
+Taxon_B    CCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTT
+Taxon_C    AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC
+
+;
+end;
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/inputs/concat_result.phylip	Tue Dec 02 09:28:02 2025 +0000
@@ -0,0 +1,4 @@
+3 200
+OTU1    ?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+OTU10   AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
+OTU2    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCTACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/inputs/fasta1.fas	Tue Dec 02 09:28:02 2025 +0000
@@ -0,0 +1,20 @@
+>OTU1
+?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+>OTU2
+ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+>OTU3
+ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+>OTU4
+ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+>OTU5
+ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+>OTU6
+ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+>OTU7
+ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+>OTU8
+ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+>OTU9
+ACACATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+>OTU10
+ACACATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/inputs/partitions_concat.nex	Tue Dec 02 09:28:02 2025 +0000
@@ -0,0 +1,6 @@
+#NEXUS
+
+Begin sets;
+	charset p1_concat_1 = 1-100;
+	charset p2_concat_2 = 101-200;
+End;
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/inputs/partitions_concat_unspecified.txt	Tue Dec 02 09:28:02 2025 +0000
@@ -0,0 +1,2 @@
+p1_concat_1 = 1-100
+p2_concat_2 = 101-200
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/inputs/remove_input.nex	Tue Dec 02 09:28:02 2025 +0000
@@ -0,0 +1,20 @@
+#NEXUS
+
+BEGIN DATA;
+	DIMENSIONS  NTAX=10 NCHAR=100;
+	FORMAT DATATYPE=DNA  GAP = - MISSING = ?;
+	MATRIX
+	OTU1    ?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU10   ACACATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU2    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU3    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU4    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU5    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU6    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU7    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU8    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU9    ACACATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+
+;
+
+END;
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outputs/expected_concat.phylip	Tue Dec 02 09:28:02 2025 +0000
@@ -0,0 +1,4 @@
+3 200
+OTU1    ?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+OTU10   AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
+OTU2    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCTACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outputs/expected_concat_fasta.fas	Tue Dec 02 09:28:02 2025 +0000
@@ -0,0 +1,12 @@
+>OTU1
+?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCA
+ACTGCTTAGCTCCACTCGCT?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTC
+CAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+>OTU10
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+AAAAAAAAAAAAAAAAAAAATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
+TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
+>OTU2
+ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCA
+ACTGCTTAGCTCCACTCGCTACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTC
+CAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outputs/expected_concat_int.nex	Tue Dec 02 09:28:02 2025 +0000
@@ -0,0 +1,18 @@
+#NEXUS
+
+BEGIN DATA;
+	DIMENSIONS  NTAX=3 NCHAR=600;
+	FORMAT   INTERLEAVE   DATATYPE=DNA  GAP = - MISSING = ?;
+	MATRIX
+Taxon_A   ATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTT
+Taxon_B   GCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAACCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTT
+Taxon_C   TTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC
+
+AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC
+GGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAA
+TTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGG
+
+
+;
+
+END;
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outputs/expected_concat_int_multi.nex	Tue Dec 02 09:28:02 2025 +0000
@@ -0,0 +1,18 @@
+#NEXUS
+
+BEGIN DATA;
+	DIMENSIONS  NTAX=3 NCHAR=800;
+	FORMAT   INTERLEAVE   DATATYPE=DNA  GAP = - MISSING = ?;
+	MATRIX
+Taxon_A   ATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTT
+Taxon_B   GCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAACCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTT
+Taxon_C   TTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC
+
+AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTT
+GGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAACCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTT
+TTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC
+
+
+;
+
+END;
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outputs/expected_partitions.nex	Tue Dec 02 09:28:02 2025 +0000
@@ -0,0 +1,6 @@
+#NEXUS
+
+Begin sets;
+	charset p1_concat_1 = 1-100;
+	charset p2_concat_2 = 101-200;
+End;
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outputs/expected_partitions_int.txt	Tue Dec 02 09:28:02 2025 +0000
@@ -0,0 +1,2 @@
+p1_concat_int_1 = 1-300
+p2_concat_int_2 = 301-600
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outputs/expected_partitions_int_multi.txt	Tue Dec 02 09:28:02 2025 +0000
@@ -0,0 +1,3 @@
+p1_concat_int_1 = 1-300
+p2_concat_int_2 = 301-600
+p3_concat_int_3 = 601-800
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outputs/expected_partitions_raxml.txt	Tue Dec 02 09:28:02 2025 +0000
@@ -0,0 +1,2 @@
+DNA, p1_concat_1 = 1-100
+DNA, p2_concat_2 = 101-200
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outputs/expected_remove_filtered.int-nex	Tue Dec 02 09:28:02 2025 +0000
@@ -0,0 +1,19 @@
+#NEXUS
+
+BEGIN DATA;
+	DIMENSIONS  NTAX=8 NCHAR=100;
+	FORMAT   INTERLEAVE   DATATYPE=DNA  GAP = - MISSING = ?;
+	MATRIX
+OTU1   ?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+OTU2   ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+OTU3   ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+OTU4   ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+OTU5   ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+OTU6   ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+OTU7   ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+OTU8   ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+
+
+;
+
+END;
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outputs/expected_replicate1.nex	Tue Dec 02 09:28:02 2025 +0000
@@ -0,0 +1,20 @@
+#NEXUS
+
+BEGIN DATA;
+	DIMENSIONS  NTAX=10 NCHAR=100;
+	FORMAT DATATYPE=DNA  GAP = - MISSING = ?;
+	MATRIX
+	OTU1    ?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU10   ACACATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU2    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU3    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU4    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU5    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU6    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU7    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU8    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU9    ACACATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+
+;
+
+END;
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outputs/expected_replicate2.nex	Tue Dec 02 09:28:02 2025 +0000
@@ -0,0 +1,20 @@
+#NEXUS
+
+BEGIN DATA;
+	DIMENSIONS  NTAX=10 NCHAR=100;
+	FORMAT DATATYPE=DNA  GAP = - MISSING = ?;
+	MATRIX
+	OTU1    ?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU10   ACACATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU2    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU3    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU4    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU5    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU6    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU7    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU8    ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+	OTU9    ACACATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
+
+;
+
+END;
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outputs/expected_split_partition1.fas	Tue Dec 02 09:28:02 2025 +0000
@@ -0,0 +1,9 @@
+>OTU1
+?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCA
+ACTGCTTAGCTCCACTCGCT
+>OTU10
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+AAAAAAAAAAAAAAAAAAAA
+>OTU2
+ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCA
+ACTGCTTAGCTCCACTCGCT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outputs/expected_split_partition2.fas	Tue Dec 02 09:28:02 2025 +0000
@@ -0,0 +1,9 @@
+>OTU1
+?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCA
+ACTGCTTAGCTCCACTCGCT
+>OTU10
+TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
+TTTTTTTTTTTTTTTTTTTT
+>OTU2
+ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCA
+ACTGCTTAGCTCCACTCGCT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outputs/expected_summary.txt	Tue Dec 02 09:28:02 2025 +0000
@@ -0,0 +1,2 @@
+Alignment_name	No_of_taxa	Alignment_length	Total_matrix_cells	Undetermined_characters	Missing_percent	No_variable_sites	Proportion_variable_sites	Parsimony_informative_sites	Proportion_parsimony_informative	AT_content	GC_content	A	C	G	T	K	M	R	Y	S	W	B	V	H	D	X	N	O	-	?
+fasta1.fas	10	100	1000	1	0.1	2	0.02	1	0.01	0.543	0.457	262	297	160	280	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outputs/expected_taxa_summary.txt	Tue Dec 02 09:28:02 2025 +0000
@@ -0,0 +1,11 @@
+Alignment_name	Taxon_name	Sequence_length	Undetermined_characters	Missing_percent	AT_content	GC_content	A	C	G	T	K	M	R	Y	S	W	B	V	H	D	X	N	O	-	?
+fasta1.fas	OTU1	100	1	1.0	0.545	0.455	26	29	16	28	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1
+fasta1.fas	OTU10	100	0	0.0	0.55	0.45	27	29	16	28	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+fasta1.fas	OTU2	100	0	0.0	0.54	0.46	26	30	16	28	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+fasta1.fas	OTU3	100	0	0.0	0.54	0.46	26	30	16	28	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+fasta1.fas	OTU4	100	0	0.0	0.54	0.46	26	30	16	28	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+fasta1.fas	OTU5	100	0	0.0	0.54	0.46	26	30	16	28	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+fasta1.fas	OTU6	100	0	0.0	0.54	0.46	26	30	16	28	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+fasta1.fas	OTU7	100	0	0.0	0.54	0.46	26	30	16	28	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+fasta1.fas	OTU8	100	0	0.0	0.54	0.46	26	30	16	28	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+fasta1.fas	OTU9	100	0	0.0	0.55	0.45	27	29	16	28	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
\ No newline at end of file